mirror of
https://github.com/zyachel/quetre.git
synced 2025-04-04 13:37:39 +03:00
fix(parse): don't bail out on encountering weird characters
replace all \x3C(<) with its valid unique escape sequence
This commit is contained in:
parent
1073e61530
commit
f11d3f2ac6
3 changed files with 18 additions and 8 deletions
|
@ -5,6 +5,7 @@
|
||||||
import * as cheerio from 'cheerio';
|
import * as cheerio from 'cheerio';
|
||||||
import getAxiosInstance from '../utils/getAxiosInstance.js';
|
import getAxiosInstance from '../utils/getAxiosInstance.js';
|
||||||
import AppError from '../utils/AppError.js';
|
import AppError from '../utils/AppError.js';
|
||||||
|
import parse from '../utils/parse.js';
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// FUNCTION
|
// FUNCTION
|
||||||
|
@ -31,7 +32,7 @@ const answersFetcher = async (resourceStr, lang) => {
|
||||||
if (!matches) return;
|
if (!matches) return;
|
||||||
|
|
||||||
// brittle logic, but works
|
// brittle logic, but works
|
||||||
const matchedPart = JSON.parse(JSON.parse(matches[1])).data;
|
const matchedPart = JSON.parse(parse(matches[1])).data;
|
||||||
|
|
||||||
// only question block has this word
|
// only question block has this word
|
||||||
if (typeof matchedPart.question?.viewerHasAnswered !== 'undefined') {
|
if (typeof matchedPart.question?.viewerHasAnswered !== 'undefined') {
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
import * as cheerio from 'cheerio';
|
import * as cheerio from 'cheerio';
|
||||||
import getAxiosInstance from '../utils/getAxiosInstance.js';
|
import getAxiosInstance from '../utils/getAxiosInstance.js';
|
||||||
import AppError from '../utils/AppError.js';
|
import AppError from '../utils/AppError.js';
|
||||||
|
import parse from '../utils/parse.js';
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// FUNCTION
|
// FUNCTION
|
||||||
|
@ -18,19 +19,16 @@ import AppError from '../utils/AppError.js';
|
||||||
* await fetcher('topic/Space-Physics'); // will return 'space physics' topic object
|
* await fetcher('topic/Space-Physics'); // will return 'space physics' topic object
|
||||||
* await fetcher('profile/Charlie-Cheever'); // will return object containing information about charlie cheever
|
* await fetcher('profile/Charlie-Cheever'); // will return object containing information about charlie cheever
|
||||||
*/
|
*/
|
||||||
const fetcher = async (
|
const fetcher = async (resourceStr, { keyword, lang, toEncode = true }) => {
|
||||||
resourceStr,
|
|
||||||
{ keyword, lang, toEncode = true }
|
|
||||||
) => {
|
|
||||||
try {
|
try {
|
||||||
// as url might contain unescaped chars. so, encoding it right away
|
// as url might contain unescaped chars. so, encoding it right away
|
||||||
const str = toEncode ? encodeURIComponent(resourceStr) : resourceStr;
|
const str = toEncode ? encodeURIComponent(resourceStr) : resourceStr;
|
||||||
const axiosInstance = getAxiosInstance(lang);
|
const axiosInstance = getAxiosInstance(lang);
|
||||||
const res = await axiosInstance.get(str);
|
const res = await axiosInstance.get(str);
|
||||||
|
|
||||||
const $ = cheerio.load(res.data);
|
const $ = cheerio.load(res.data);
|
||||||
|
|
||||||
const regex = new RegExp(`"{\\\\"data\\\\":\\{\\\\"${keyword}.*\\}"`); // equivalent to /"\{\\"data\\":\{\\"searchConnection.*\}"/
|
const regex = new RegExp(String.raw`"{\\"data\\":\{\\"${keyword}.*?\}"`);
|
||||||
|
|
||||||
let rawData;
|
let rawData;
|
||||||
$('body script').each((i, el) => {
|
$('body script').each((i, el) => {
|
||||||
|
@ -45,7 +43,7 @@ const fetcher = async (
|
||||||
|
|
||||||
if (!rawData) throw new AppError("couldn't retrieve data", 500);
|
if (!rawData) throw new AppError("couldn't retrieve data", 500);
|
||||||
|
|
||||||
return JSON.parse(rawData);
|
return parse(rawData);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const statusCode = err.response?.status;
|
const statusCode = err.response?.status;
|
||||||
if (statusCode === 404) throw new AppError('Not found', 404);
|
if (statusCode === 404) throw new AppError('Not found', 404);
|
||||||
|
|
11
utils/parse.js
Normal file
11
utils/parse.js
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
const invalidLessThan = /\\x3C/g;
|
||||||
|
const validLessThan = '\\u003C';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* parses and corrects invalid escape sequences
|
||||||
|
* @param {string} data
|
||||||
|
* @returns {Record<PropertyKey, any>}
|
||||||
|
*/
|
||||||
|
const parse = data => JSON.parse(data.replace(invalidLessThan, validLessThan));
|
||||||
|
|
||||||
|
export default parse;
|
Loading…
Add table
Add a link
Reference in a new issue