mirror of
https://github.com/redlib-org/redlib.git
synced 2025-04-05 14:07:39 +03:00
* Fix links not being converted when multiple emojis are in one comment * Make (most) emotes embed within comments * Restore the behavior that the "rewrite_urls_removes_backslashes_and_rewrites_url" test looks for * Listen to cargo fmt and cargo clippy's suggestions as well as removing some leftover comments and code --------- Co-authored-by: Matthew Esposito <matt@matthew.science>
This commit is contained in:
parent
403513ac4c
commit
a807002ddf
5 changed files with 165 additions and 8 deletions
105
src/utils.rs
105
src/utils.rs
|
@ -12,6 +12,7 @@ use once_cell::sync::Lazy;
|
|||
use regex::Regex;
|
||||
use rust_embed::RustEmbed;
|
||||
use serde_json::Value;
|
||||
use serde_json_path::{JsonPath, JsonPathExt};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::env;
|
||||
use std::str::FromStr;
|
||||
|
@ -919,12 +920,19 @@ pub fn rewrite_urls(input_text: &str) -> String {
|
|||
// Rewrite Reddit links to Redlib
|
||||
REDDIT_REGEX.replace_all(input_text, r#"href="/"#)
|
||||
.to_string();
|
||||
text1 = REDDIT_EMOJI_REGEX
|
||||
.replace_all(&text1, format_url(REDDIT_EMOJI_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default()))
|
||||
.to_string()
|
||||
// Remove (html-encoded) "\" from URLs.
|
||||
.replace("%5C", "")
|
||||
.replace("\\_", "_");
|
||||
|
||||
loop {
|
||||
if REDDIT_EMOJI_REGEX.find(&text1).is_none() {
|
||||
break;
|
||||
} else {
|
||||
text1 = REDDIT_EMOJI_REGEX
|
||||
.replace_all(&text1, format_url(REDDIT_EMOJI_REGEX.find(&text1).map(|x| x.as_str()).unwrap_or_default()))
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// Remove (html-encoded) "\" from URLs.
|
||||
text1 = text1.replace("%5C", "").replace("\\_", "_");
|
||||
|
||||
// Rewrite external media previews to Redlib
|
||||
loop {
|
||||
|
@ -980,6 +988,83 @@ pub fn rewrite_urls(input_text: &str) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
// These links all follow a pattern of "https://reddit-econ-prod-assets-permanent.s3.amazonaws.com/asset-manager/SUBREDDIT_ID/RANDOM_FILENAME.png"
|
||||
static REDDIT_EMOTE_LINK_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"https://reddit-econ-prod-assets-permanent.s3.amazonaws.com/asset-manager/(.*)"#).unwrap());
|
||||
|
||||
// These all follow a pattern of '"emote|SUBREDDIT_IT|NUMBER"', we want the number
|
||||
static REDDIT_EMOTE_ID_NUMBER_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#""emote\|.*\|(.*)""#).unwrap());
|
||||
|
||||
pub fn rewrite_emotes(media_metadata: &Value, comment: String) -> String {
|
||||
/* Create the paths we'll use to look for our data inside the json.
|
||||
Because we don't know the name of any given emote we use a wildcard to parse them. */
|
||||
let link_path = JsonPath::parse("$[*].s.u").expect("valid JSON Path");
|
||||
let id_path = JsonPath::parse("$[*].id").expect("valid JSON Path");
|
||||
let size_path = JsonPath::parse("$[*].s.y").expect("valid JSON Path");
|
||||
|
||||
// Extract all of the results from those json paths
|
||||
let link_nodes = media_metadata.json_path(&link_path);
|
||||
let id_nodes = media_metadata.json_path(&id_path);
|
||||
|
||||
// Initialize our vectors
|
||||
let mut id_vec = Vec::new();
|
||||
let mut link_vec = Vec::new();
|
||||
|
||||
// Add the relevant data to each of our vectors so we can access it by number later
|
||||
for current_id in id_nodes {
|
||||
id_vec.push(current_id)
|
||||
}
|
||||
for current_link in link_nodes {
|
||||
link_vec.push(current_link)
|
||||
}
|
||||
|
||||
/* Set index to the length of link_vec.
|
||||
This is one larger than we'll actually be looking at, but we correct that later */
|
||||
let mut index = link_vec.len();
|
||||
|
||||
// Comment needs to be in scope for when we call rewrite_urls()
|
||||
let mut comment = comment;
|
||||
|
||||
/* Loop until index hits zero.
|
||||
This also prevents us from trying to do anything on an empty vector */
|
||||
while index != 0 {
|
||||
/* Subtract 1 from index to get the real index we should be looking at.
|
||||
Then continue on each subsequent loop to continue until we hit the last entry in the vector.
|
||||
This is how we get this to deal with multiple emotes in a single message and properly replace each ID with it's link */
|
||||
index -= 1;
|
||||
|
||||
// Convert our current index in id_vec into a string so we can search through it with regex
|
||||
let current_id = id_vec[index].to_string();
|
||||
|
||||
/* The ID number can be multiple lengths, so we capture it with regex.
|
||||
We also want to only attempt anything when we get matches to avoid panicking */
|
||||
if let Some(id_capture) = REDDIT_EMOTE_ID_NUMBER_REGEX.captures(¤t_id) {
|
||||
// Format the ID to include the colons it has in the comment text
|
||||
let id = format!(":{}:", &id_capture[1]);
|
||||
|
||||
// Convert current link to string to search through it with the regex
|
||||
let link = link_vec[index].to_string();
|
||||
|
||||
// Make sure we only do operations when we get matches, otherwise we panic when trying to access the first match
|
||||
if let Some(link_capture) = REDDIT_EMOTE_LINK_REGEX.captures(&link) {
|
||||
/* Reddit sends a size for the image based on whether it's alone or accompanied by text.
|
||||
It's a good idea and makes everything look nicer, so we'll do the same. */
|
||||
let size = media_metadata.json_path(&size_path).first().unwrap().to_string();
|
||||
|
||||
// Replace the ID we found earlier in the comment with the respective image and it's link from the regex capture
|
||||
let to_replace_with = format!(
|
||||
"<img loading=\"lazy\" src=\"/emote/{} width=\"{size}\" height=\"{size}\" style=\"vertical-align:text-bottom\">",
|
||||
&link_capture[1]
|
||||
);
|
||||
|
||||
// Inside the comment replace the ID we found with the string that will embed the image
|
||||
comment = comment.replace(&id, &to_replace_with).to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Call rewrite_urls() to transform any other Reddit links
|
||||
rewrite_urls(&comment)
|
||||
}
|
||||
|
||||
// Format vote count to a string that will be displayed.
|
||||
// Append `m` and `k` for millions and thousands respectively, and
|
||||
// round to the nearest tenth.
|
||||
|
@ -1301,3 +1386,11 @@ fn test_url_path_basename() {
|
|||
// empty path
|
||||
assert_eq!(url_path_basename("/"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rewriting_emotes() {
|
||||
let json_input = serde_json::from_str(r#"{"emote|t5_31hpy|2028":{"e":"Image","id":"emote|t5_31hpy|2028","m":"image/png","s":{"u":"https://reddit-econ-prod-assets-permanent.s3.amazonaws.com/asset-manager/t5_31hpy/PW6WsOaLcd.png","x":60,"y":60},"status":"valid","t":"sticker"}}"#).expect("Valid JSON");
|
||||
let comment_input = r#"<div class="comment_body "><div class="md"><p>:2028:</p></div></div>"#;
|
||||
let output = r#"<div class="comment_body "><div class="md"><p><img loading="lazy" src="/emote/t5_31hpy/PW6WsOaLcd.png" width="60" height="60" style="vertical-align:text-bottom"></p></div></div>"#;
|
||||
assert_eq!(rewrite_emotes(&json_input, comment_input.to_string()), output);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue