This commit is contained in:
Nicolas 2024-08-30 17:32:41 -03:00
parent 522d256b11
commit d347160ff9
3 changed files with 26 additions and 19 deletions

View File

@ -254,13 +254,26 @@ export async function scrapeController(req: Request, res: Response) {
}
}
}
let doc = result.data;
if (!pageOptions || !pageOptions.includeRawHtml) {
if (doc && doc.rawHtml) {
delete doc.rawHtml;
}
}
if(pageOptions && pageOptions.includeExtract) {
if(!pageOptions.includeMarkdown && doc && doc.markdown) {
delete doc.markdown;
}
}
logJob({
job_id: jobId,
success: result.success,
message: result.error,
num_docs: 1,
docs: [result.data],
docs: [doc],
time_taken: timeTakenInSeconds,
team_id: team_id,
mode: "scrape",

View File

@ -115,6 +115,18 @@ export async function scrapeController(
});
}
if (!pageOptions || !pageOptions.includeRawHtml) {
if (doc && doc.rawHtml) {
delete doc.rawHtml;
}
}
if(pageOptions && pageOptions.includeExtract) {
if(!pageOptions.includeMarkdown && doc && doc.markdown) {
delete doc.markdown;
}
}
logJob({
job_id: jobId,
success: true,

View File

@ -202,24 +202,6 @@ async function processJob(job: Job, token: string) {
const rawHtml = docs[0] ? docs[0].rawHtml : "";
if (job.data.crawl_id && (!job.data.pageOptions || !job.data.pageOptions.includeRawHtml)) {
if (docs[0] && docs[0].rawHtml) {
delete docs[0].rawHtml;
}
}
if(job.data.pageOptions && job.data.pageOptions.includeExtract ) {
if(!job.data.pageOptions.includeMarkdown) {
delete docs[0].markdown;
}
// if(!job.data.pageOptions.includeRawHtml) {
// delete docs[0].rawHtml;
// }
// if(!job.data.pageOptions.includeHtml) {
// delete docs[0].html;
// }
}
const data = {
success,
result: {