mirror of
https://github.com/zyachel/quetre.git
synced 2025-04-05 14:07:37 +03:00
fix: fix broken answers feed
this commit fixes the issue of answers not being shown as the underlying HTML changed which broke old scraping logic BREAKING CHANGE: older versions will not work with answer route fix https://github.com/zyachel/quetre/issues/96
This commit is contained in:
parent
9d195c2d33
commit
677b846eb2
2 changed files with 119 additions and 61 deletions
76
fetchers/answersFetcher.js
Normal file
76
fetchers/answersFetcher.js
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
/* eslint-disable no-useless-catch */
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// IMPORTS
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
import * as cheerio from 'cheerio';
|
||||||
|
import getAxiosInstance from '../utils/getAxiosInstance.js';
|
||||||
|
import AppError from '../utils/AppError.js';
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// FUNCTION
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
/**
|
||||||
|
* makes a call to quora.com(with the resourceStr appended) and returns parsed JSON containing the data about the resource requested.
|
||||||
|
* @param {string} resourceStr a string after the baseURL
|
||||||
|
* @param {string} lang additional options
|
||||||
|
* @returns JSON containing the result
|
||||||
|
*/
|
||||||
|
const answersFetcher = async (resourceStr, lang) => {
|
||||||
|
try {
|
||||||
|
const axiosInstance = getAxiosInstance(lang);
|
||||||
|
const res = await axiosInstance.get(encodeURIComponent(resourceStr));
|
||||||
|
const $ = cheerio.load(res.data);
|
||||||
|
|
||||||
|
const rawData = { question: null, answers: [], related: [], answerCount: 0 };
|
||||||
|
|
||||||
|
// there are about 9-10 script tags containing data we need
|
||||||
|
$('body script').each((i, el) => {
|
||||||
|
const text = $(el).html();
|
||||||
|
const matches = text.match(/\.push\((".*")\);/); // data is contained like: someProp.push("<data>");
|
||||||
|
|
||||||
|
if (!matches) return;
|
||||||
|
|
||||||
|
// brittle logic, but works
|
||||||
|
const matchedPart = JSON.parse(JSON.parse(matches[1])).data;
|
||||||
|
|
||||||
|
// only question block has this word
|
||||||
|
if (typeof matchedPart.question?.viewerHasAnswered !== 'undefined') {
|
||||||
|
rawData.question = matchedPart.question;
|
||||||
|
|
||||||
|
// primary answer block
|
||||||
|
} else if (matchedPart.question?.answers?.edges) {
|
||||||
|
rawData.answers.push(matchedPart.question.answers.edges[0].node.answer);
|
||||||
|
|
||||||
|
// other answer blocks
|
||||||
|
} else if (
|
||||||
|
// eslint-disable-next-line no-underscore-dangle
|
||||||
|
matchedPart.node?.__typename === 'QuestionRelevantAnswerItem2'
|
||||||
|
) {
|
||||||
|
rawData.answers.push(matchedPart.node.answer);
|
||||||
|
|
||||||
|
// related questions block contains both answer count and related questions
|
||||||
|
} else if (matchedPart.bottomRelatedQuestionsInfo) {
|
||||||
|
rawData.related = matchedPart.bottomRelatedQuestionsInfo.relatedQuestions;
|
||||||
|
rawData.answerCount = matchedPart.answerCount;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!rawData.question) throw new AppError("couldn't retrieve data", 500);
|
||||||
|
|
||||||
|
return rawData;
|
||||||
|
} catch (err) {
|
||||||
|
const statusCode = err.response?.status;
|
||||||
|
if (statusCode === 404) throw new AppError('Not found', 404);
|
||||||
|
else if (statusCode === 429 || statusCode === 403)
|
||||||
|
throw new AppError(
|
||||||
|
'Quora is rate limiting this instance. Try another or host your own.',
|
||||||
|
503
|
||||||
|
);
|
||||||
|
else throw err;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// EXPORTS
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
export default answersFetcher;
|
|
@ -1,87 +1,69 @@
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// IMPORTS
|
// IMPORTS
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// import log from '../utils/log.js';
|
|
||||||
import AppError from '../utils/AppError.js';
|
|
||||||
import { quetrefy } from '../utils/urlModifiers.js';
|
import { quetrefy } from '../utils/urlModifiers.js';
|
||||||
import fetcher from './fetcher.js';
|
import answersFetcher from './answersFetcher.js';
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// FUNCTION
|
// FUNCTION
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
const KEYWORD = 'question';
|
|
||||||
|
|
||||||
const getAnswers = async (slug, lang) => {
|
const getAnswers = async (slug, lang) => {
|
||||||
// getting data and destructuring it in case it exists
|
// getting data and destructuring it in case it exists
|
||||||
const res = await fetcher(slug, { keyword: KEYWORD, lang });
|
const rawData = await answersFetcher(slug, lang);
|
||||||
|
|
||||||
const {
|
|
||||||
data: { [KEYWORD]: rawData },
|
|
||||||
} = JSON.parse(res);
|
|
||||||
|
|
||||||
if (!rawData)
|
|
||||||
throw new AppError(
|
|
||||||
"Answers couldn't be fetched. Recheck the URL, or resend the request if you believe the URL is correct.",
|
|
||||||
404
|
|
||||||
);
|
|
||||||
|
|
||||||
// array containing all the answers with metadata
|
// array containing all the answers with metadata
|
||||||
const ansArr = rawData.pagedListDataConnection.edges
|
const ansArr = rawData.answers.map(answer => ({
|
||||||
.filter(ansObj => ansObj.node.answer !== undefined)
|
text: JSON.parse(answer.content).sections,
|
||||||
.map(ansObj => ({
|
isViewable: !!answer.viewerHasAccess,
|
||||||
text: JSON.parse(ansObj.node.answer.content).sections,
|
creationTime: answer.creationTime,
|
||||||
isViewable: !!ansObj.node.answer.viewerHasAccess,
|
updatedTime: answer.updatedTime,
|
||||||
creationTime: ansObj.node.answer.creationTime,
|
numComments: answer.numDisplayComments,
|
||||||
updatedTime: ansObj.node.answer.updatedTime,
|
numUpvotes: answer.numUpvotes,
|
||||||
numComments: ansObj.node.answer.numDisplayComments,
|
numViews: answer.numViews,
|
||||||
numUpvotes: ansObj.node.answer.numUpvotes,
|
numShares: answer.numShares,
|
||||||
numViews: ansObj.node.answer.numViews,
|
numAnswerRequests: answer.numRequesters,
|
||||||
numShares: ansObj.node.answer.numSharers,
|
aid: answer.aid,
|
||||||
numAnswerRequests: ansObj.node.answer.numRequesters,
|
isBusinessAnswer: answer.businessAnswer,
|
||||||
aid: ansObj.node.answer.aid,
|
|
||||||
isBusinessAnswer: ansObj.node.answer.businessAnswer,
|
|
||||||
author: {
|
author: {
|
||||||
uid: ansObj.node.answer.author.uid,
|
uid: answer.author.uid,
|
||||||
isAnon: ansObj.node.answer.author.isAnon,
|
isAnon: answer.author.isAnon,
|
||||||
image: ansObj.node.answer.author.profileImageUrl,
|
image: answer.author.profileImageUrl,
|
||||||
isVerified: ansObj.node.answer.author.isVerified,
|
isVerified: answer.author.isVerified,
|
||||||
url: quetrefy(ansObj.node.answer.author.profileUrl),
|
url: quetrefy(answer.author.profileUrl),
|
||||||
name: `${ansObj.node.answer.author.names[0].givenName} ${ansObj.node.answer.author.names[0].familyName}`,
|
name: `${answer.author.names[0].givenName} ${answer.author.names[0].familyName}`,
|
||||||
credential: ansObj.node.answer.authorCredential?.translatedString,
|
credential: answer.authorCredential?.translatedString,
|
||||||
// additionalCredentials: ansObj.node.answer?.credibilityFacts.map(),
|
// additionalCredentials: answer.node.answer?.credibilityFacts.map(),
|
||||||
},
|
},
|
||||||
originalQuestion: {
|
originalQuestion: {
|
||||||
text: JSON.parse(ansObj.node.answer.question.title).sections,
|
text: JSON.parse(answer.question.title).sections,
|
||||||
url: quetrefy(ansObj.node.answer.question.url),
|
url: quetrefy(answer.question.url),
|
||||||
qid: ansObj.node.answer.question.qid,
|
qid: answer.question.qid,
|
||||||
isDeleted: ansObj.node.answer.question.isDeleted,
|
isDeleted: answer.question.isDeleted,
|
||||||
},
|
},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// main data object to be returned
|
// main data object to be returned
|
||||||
const data = {
|
const data = {
|
||||||
question: {
|
question: {
|
||||||
text: JSON.parse(rawData.title).sections,
|
text: JSON.parse(rawData.question.title).sections,
|
||||||
url: quetrefy(rawData.url),
|
url: quetrefy(rawData.question.url),
|
||||||
qid: rawData.qid,
|
qid: rawData.question.qid,
|
||||||
idDeleted: rawData.isDeleted,
|
idDeleted: rawData.question.isDeleted,
|
||||||
isViewable: rawData.isVisibleToViewer,
|
isViewable: rawData.question.isVisibleToViewer,
|
||||||
askerUid: rawData.asker.uid,
|
askerUid: rawData.question.asker.uid,
|
||||||
},
|
},
|
||||||
numAnswers: rawData.answerCount,
|
numAnswers: rawData.answerCount,
|
||||||
answers: ansArr,
|
answers: ansArr,
|
||||||
topics: rawData.topics.map(topicObj => ({
|
topics: rawData.question.topics.map(topicObj => ({
|
||||||
tid: topicObj.tid,
|
tid: topicObj.tid,
|
||||||
name: topicObj.name,
|
name: topicObj.name,
|
||||||
url: quetrefy(topicObj.url),
|
url: quetrefy(topicObj.url),
|
||||||
})),
|
})),
|
||||||
relatedQuestions: rawData.bottomRelatedQuestionsInfo.relatedQuestions.map(
|
relatedQuestions: rawData.related.map(questionObj => ({
|
||||||
questionObj => ({
|
|
||||||
qid: questionObj.qid,
|
qid: questionObj.qid,
|
||||||
url: quetrefy(questionObj.url),
|
url: quetrefy(questionObj.url),
|
||||||
text: JSON.parse(questionObj.title).sections,
|
text: JSON.parse(questionObj.title).sections,
|
||||||
})
|
})),
|
||||||
),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue