mirror of
https://github.com/zyachel/quetre.git
synced 2025-04-03 04:57:37 +03:00
fix: fix broken answers feed
this commit fixes the issue of answers not being shown as the underlying HTML changed which broke old scraping logic BREAKING CHANGE: older versions will not work with answer route fix https://github.com/zyachel/quetre/issues/96
This commit is contained in:
parent
9d195c2d33
commit
677b846eb2
2 changed files with 119 additions and 61 deletions
76
fetchers/answersFetcher.js
Normal file
76
fetchers/answersFetcher.js
Normal file
|
@ -0,0 +1,76 @@
|
|||
/* eslint-disable no-useless-catch */
|
||||
////////////////////////////////////////////////////////
|
||||
// IMPORTS
|
||||
////////////////////////////////////////////////////////
|
||||
import * as cheerio from 'cheerio';
|
||||
import getAxiosInstance from '../utils/getAxiosInstance.js';
|
||||
import AppError from '../utils/AppError.js';
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// FUNCTION
|
||||
////////////////////////////////////////////////////////
|
||||
/**
|
||||
* makes a call to quora.com(with the resourceStr appended) and returns parsed JSON containing the data about the resource requested.
|
||||
* @param {string} resourceStr a string after the baseURL
|
||||
* @param {string} lang additional options
|
||||
* @returns JSON containing the result
|
||||
*/
|
||||
const answersFetcher = async (resourceStr, lang) => {
|
||||
try {
|
||||
const axiosInstance = getAxiosInstance(lang);
|
||||
const res = await axiosInstance.get(encodeURIComponent(resourceStr));
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
const rawData = { question: null, answers: [], related: [], answerCount: 0 };
|
||||
|
||||
// there are about 9-10 script tags containing data we need
|
||||
$('body script').each((i, el) => {
|
||||
const text = $(el).html();
|
||||
const matches = text.match(/\.push\((".*")\);/); // data is contained like: someProp.push("<data>");
|
||||
|
||||
if (!matches) return;
|
||||
|
||||
// brittle logic, but works
|
||||
const matchedPart = JSON.parse(JSON.parse(matches[1])).data;
|
||||
|
||||
// only question block has this word
|
||||
if (typeof matchedPart.question?.viewerHasAnswered !== 'undefined') {
|
||||
rawData.question = matchedPart.question;
|
||||
|
||||
// primary answer block
|
||||
} else if (matchedPart.question?.answers?.edges) {
|
||||
rawData.answers.push(matchedPart.question.answers.edges[0].node.answer);
|
||||
|
||||
// other answer blocks
|
||||
} else if (
|
||||
// eslint-disable-next-line no-underscore-dangle
|
||||
matchedPart.node?.__typename === 'QuestionRelevantAnswerItem2'
|
||||
) {
|
||||
rawData.answers.push(matchedPart.node.answer);
|
||||
|
||||
// related questions block contains both answer count and related questions
|
||||
} else if (matchedPart.bottomRelatedQuestionsInfo) {
|
||||
rawData.related = matchedPart.bottomRelatedQuestionsInfo.relatedQuestions;
|
||||
rawData.answerCount = matchedPart.answerCount;
|
||||
}
|
||||
});
|
||||
|
||||
if (!rawData.question) throw new AppError("couldn't retrieve data", 500);
|
||||
|
||||
return rawData;
|
||||
} catch (err) {
|
||||
const statusCode = err.response?.status;
|
||||
if (statusCode === 404) throw new AppError('Not found', 404);
|
||||
else if (statusCode === 429 || statusCode === 403)
|
||||
throw new AppError(
|
||||
'Quora is rate limiting this instance. Try another or host your own.',
|
||||
503
|
||||
);
|
||||
else throw err;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// EXPORTS
|
||||
////////////////////////////////////////////////////////
|
||||
export default answersFetcher;
|
|
@ -1,87 +1,69 @@
|
|||
////////////////////////////////////////////////////////
|
||||
// IMPORTS
|
||||
////////////////////////////////////////////////////////
|
||||
// import log from '../utils/log.js';
|
||||
import AppError from '../utils/AppError.js';
|
||||
import { quetrefy } from '../utils/urlModifiers.js';
|
||||
import fetcher from './fetcher.js';
|
||||
import answersFetcher from './answersFetcher.js';
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// FUNCTION
|
||||
////////////////////////////////////////////////////////
|
||||
const KEYWORD = 'question';
|
||||
|
||||
const getAnswers = async (slug, lang) => {
|
||||
// getting data and destructuring it in case it exists
|
||||
const res = await fetcher(slug, { keyword: KEYWORD, lang });
|
||||
|
||||
const {
|
||||
data: { [KEYWORD]: rawData },
|
||||
} = JSON.parse(res);
|
||||
|
||||
if (!rawData)
|
||||
throw new AppError(
|
||||
"Answers couldn't be fetched. Recheck the URL, or resend the request if you believe the URL is correct.",
|
||||
404
|
||||
);
|
||||
const rawData = await answersFetcher(slug, lang);
|
||||
|
||||
// array containing all the answers with metadata
|
||||
const ansArr = rawData.pagedListDataConnection.edges
|
||||
.filter(ansObj => ansObj.node.answer !== undefined)
|
||||
.map(ansObj => ({
|
||||
text: JSON.parse(ansObj.node.answer.content).sections,
|
||||
isViewable: !!ansObj.node.answer.viewerHasAccess,
|
||||
creationTime: ansObj.node.answer.creationTime,
|
||||
updatedTime: ansObj.node.answer.updatedTime,
|
||||
numComments: ansObj.node.answer.numDisplayComments,
|
||||
numUpvotes: ansObj.node.answer.numUpvotes,
|
||||
numViews: ansObj.node.answer.numViews,
|
||||
numShares: ansObj.node.answer.numSharers,
|
||||
numAnswerRequests: ansObj.node.answer.numRequesters,
|
||||
aid: ansObj.node.answer.aid,
|
||||
isBusinessAnswer: ansObj.node.answer.businessAnswer,
|
||||
author: {
|
||||
uid: ansObj.node.answer.author.uid,
|
||||
isAnon: ansObj.node.answer.author.isAnon,
|
||||
image: ansObj.node.answer.author.profileImageUrl,
|
||||
isVerified: ansObj.node.answer.author.isVerified,
|
||||
url: quetrefy(ansObj.node.answer.author.profileUrl),
|
||||
name: `${ansObj.node.answer.author.names[0].givenName} ${ansObj.node.answer.author.names[0].familyName}`,
|
||||
credential: ansObj.node.answer.authorCredential?.translatedString,
|
||||
// additionalCredentials: ansObj.node.answer?.credibilityFacts.map(),
|
||||
},
|
||||
originalQuestion: {
|
||||
text: JSON.parse(ansObj.node.answer.question.title).sections,
|
||||
url: quetrefy(ansObj.node.answer.question.url),
|
||||
qid: ansObj.node.answer.question.qid,
|
||||
isDeleted: ansObj.node.answer.question.isDeleted,
|
||||
},
|
||||
}));
|
||||
const ansArr = rawData.answers.map(answer => ({
|
||||
text: JSON.parse(answer.content).sections,
|
||||
isViewable: !!answer.viewerHasAccess,
|
||||
creationTime: answer.creationTime,
|
||||
updatedTime: answer.updatedTime,
|
||||
numComments: answer.numDisplayComments,
|
||||
numUpvotes: answer.numUpvotes,
|
||||
numViews: answer.numViews,
|
||||
numShares: answer.numShares,
|
||||
numAnswerRequests: answer.numRequesters,
|
||||
aid: answer.aid,
|
||||
isBusinessAnswer: answer.businessAnswer,
|
||||
author: {
|
||||
uid: answer.author.uid,
|
||||
isAnon: answer.author.isAnon,
|
||||
image: answer.author.profileImageUrl,
|
||||
isVerified: answer.author.isVerified,
|
||||
url: quetrefy(answer.author.profileUrl),
|
||||
name: `${answer.author.names[0].givenName} ${answer.author.names[0].familyName}`,
|
||||
credential: answer.authorCredential?.translatedString,
|
||||
// additionalCredentials: answer.node.answer?.credibilityFacts.map(),
|
||||
},
|
||||
originalQuestion: {
|
||||
text: JSON.parse(answer.question.title).sections,
|
||||
url: quetrefy(answer.question.url),
|
||||
qid: answer.question.qid,
|
||||
isDeleted: answer.question.isDeleted,
|
||||
},
|
||||
}));
|
||||
|
||||
// main data object to be returned
|
||||
const data = {
|
||||
question: {
|
||||
text: JSON.parse(rawData.title).sections,
|
||||
url: quetrefy(rawData.url),
|
||||
qid: rawData.qid,
|
||||
idDeleted: rawData.isDeleted,
|
||||
isViewable: rawData.isVisibleToViewer,
|
||||
askerUid: rawData.asker.uid,
|
||||
text: JSON.parse(rawData.question.title).sections,
|
||||
url: quetrefy(rawData.question.url),
|
||||
qid: rawData.question.qid,
|
||||
idDeleted: rawData.question.isDeleted,
|
||||
isViewable: rawData.question.isVisibleToViewer,
|
||||
askerUid: rawData.question.asker.uid,
|
||||
},
|
||||
numAnswers: rawData.answerCount,
|
||||
answers: ansArr,
|
||||
topics: rawData.topics.map(topicObj => ({
|
||||
topics: rawData.question.topics.map(topicObj => ({
|
||||
tid: topicObj.tid,
|
||||
name: topicObj.name,
|
||||
url: quetrefy(topicObj.url),
|
||||
})),
|
||||
relatedQuestions: rawData.bottomRelatedQuestionsInfo.relatedQuestions.map(
|
||||
questionObj => ({
|
||||
qid: questionObj.qid,
|
||||
url: quetrefy(questionObj.url),
|
||||
text: JSON.parse(questionObj.title).sections,
|
||||
})
|
||||
),
|
||||
relatedQuestions: rawData.related.map(questionObj => ({
|
||||
qid: questionObj.qid,
|
||||
url: quetrefy(questionObj.url),
|
||||
text: JSON.parse(questionObj.title).sections,
|
||||
})),
|
||||
};
|
||||
|
||||
return data;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue