From 32120294585b3f07f20cad582e15e5dcbd00ae36 Mon Sep 17 00:00:00 2001 From: Martin Lindhe Date: Fri, 24 Jan 2025 11:23:22 +0100 Subject: [PATCH] unescape selftext_html from json api, fixes #354 --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + src/utils.rs | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 819d4bc..20d528b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -770,6 +770,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "0.2.12" @@ -1367,6 +1373,7 @@ dependencies = [ "dotenvy", "fastrand", "futures-lite", + "htmlescape", "hyper", "hyper-rustls", "libflate", diff --git a/Cargo.toml b/Cargo.toml index a1d3ec0..a4d0170 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ common-words-all = { version = "0.0.2", default-features = false, features = ["e hyper-rustls = { version = "0.24.2", features = [ "http2" ] } tegen = "0.1.4" serde_urlencoded = "0.7.1" +htmlescape = "0.3.1" [dev-dependencies] diff --git a/src/utils.rs b/src/utils.rs index c15dcea..6fef007 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -22,6 +22,7 @@ use std::str::FromStr; use std::string::ToString; use time::{macros::format_description, Duration, OffsetDateTime}; use url::Url; +use htmlescape::decode_html; /// Write a message to stderr on debug mode. This function is a no-op on /// release code. @@ -376,7 +377,7 @@ impl Post { let awards = Awards::parse(&data["all_awardings"]); // selftext_html is set for text posts when browsing. - let mut body = rewrite_urls(&val(post, "selftext_html")); + let mut body = rewrite_urls(&decode_html(&val(post, "selftext_html")).unwrap()); if body.is_empty() { body = rewrite_urls(&val(post, "body_html")); }