fix(parse): don't bail out on encountering weird characters

replace all \x3C(<) with its valid unique escape sequence
This commit is contained in:
zyachel 2024-04-08 00:22:51 +05:30
parent 1073e61530
commit f11d3f2ac6
3 changed files with 18 additions and 8 deletions

View file

@ -5,6 +5,7 @@
import * as cheerio from 'cheerio';
import getAxiosInstance from '../utils/getAxiosInstance.js';
import AppError from '../utils/AppError.js';
import parse from '../utils/parse.js';
////////////////////////////////////////////////////////
// FUNCTION
@ -31,7 +32,7 @@ const answersFetcher = async (resourceStr, lang) => {
if (!matches) return;
// brittle logic, but works
const matchedPart = JSON.parse(JSON.parse(matches[1])).data;
const matchedPart = JSON.parse(parse(matches[1])).data;
// only question block has this word
if (typeof matchedPart.question?.viewerHasAnswered !== 'undefined') {

View file

@ -5,6 +5,7 @@
import * as cheerio from 'cheerio';
import getAxiosInstance from '../utils/getAxiosInstance.js';
import AppError from '../utils/AppError.js';
import parse from '../utils/parse.js';
////////////////////////////////////////////////////////
// FUNCTION
@ -18,19 +19,16 @@ import AppError from '../utils/AppError.js';
* await fetcher('topic/Space-Physics'); // will return 'space physics' topic object
* await fetcher('profile/Charlie-Cheever'); // will return object containing information about charlie cheever
*/
const fetcher = async (
resourceStr,
{ keyword, lang, toEncode = true }
) => {
const fetcher = async (resourceStr, { keyword, lang, toEncode = true }) => {
try {
// as url might contain unescaped chars. so, encoding it right away
const str = toEncode ? encodeURIComponent(resourceStr) : resourceStr;
const axiosInstance = getAxiosInstance(lang);
const res = await axiosInstance.get(str);
const $ = cheerio.load(res.data);
const regex = new RegExp(`"{\\\\"data\\\\":\\{\\\\"${keyword}.*\\}"`); // equivalent to /"\{\\"data\\":\{\\"searchConnection.*\}"/
const regex = new RegExp(String.raw`"{\\"data\\":\{\\"${keyword}.*?\}"`);
let rawData;
$('body script').each((i, el) => {
@ -45,7 +43,7 @@ const fetcher = async (
if (!rawData) throw new AppError("couldn't retrieve data", 500);
return JSON.parse(rawData);
return parse(rawData);
} catch (err) {
const statusCode = err.response?.status;
if (statusCode === 404) throw new AppError('Not found', 404);