mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-04 19:00:37 +08:00
fix: aaaaahhh
This commit is contained in:
parent
6798695ee4
commit
d1a3df6d08
@ -11,6 +11,7 @@ import { numTokensFromString } from '../lib/LLM-extraction/helpers';
|
|||||||
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values';
|
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values';
|
||||||
import { addWebScraperJob } from '../services/queue-jobs';
|
import { addWebScraperJob } from '../services/queue-jobs';
|
||||||
import { getWebScraperQueue } from '../services/queue-service';
|
import { getWebScraperQueue } from '../services/queue-service';
|
||||||
|
import { supabase_service } from '../services/supabase';
|
||||||
|
|
||||||
export async function scrapeHelper(
|
export async function scrapeHelper(
|
||||||
req: Request,
|
req: Request,
|
||||||
@ -64,10 +65,10 @@ export async function scrapeHelper(
|
|||||||
promiseResolve = resolve;
|
promiseResolve = resolve;
|
||||||
});
|
});
|
||||||
|
|
||||||
const listener = (j: string) => {
|
const listener = (j: string, res: any) => {
|
||||||
console.log("JOB COMPLETED", j, "vs", job.id);
|
console.log("JOB COMPLETED", j, "vs", job.id, res);
|
||||||
if (j === job.id) {
|
if (j === job.id) {
|
||||||
promiseResolve(j);
|
promiseResolve([j, res]);
|
||||||
wsq.removeListener("global:completed", listener);
|
wsq.removeListener("global:completed", listener);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -86,15 +87,23 @@ export async function scrapeHelper(
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
const jobNew = (await wsq.getJob(j));
|
let j1 = typeof j[1] === "string" ? JSON.parse(j[1]) : j[1];
|
||||||
const doc = jobNew.progress().currentDocument;
|
|
||||||
delete doc.index;
|
const doc = j1 !== null ? j1.result.links[0].content : (await supabase_service
|
||||||
|
.from("firecrawl_jobs")
|
||||||
|
.select("docs")
|
||||||
|
.eq("job_id", job.id as string)).data[0]?.docs[0];
|
||||||
|
|
||||||
// make sure doc.content is not empty
|
|
||||||
if (!doc) {
|
if (!doc) {
|
||||||
return { success: true, error: "No page found", returnCode: 200, data: doc };
|
return { success: true, error: "No page found", returnCode: 200, data: doc };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete doc.index;
|
||||||
|
delete doc.provider;
|
||||||
|
|
||||||
|
// make sure doc.content is not empty
|
||||||
|
|
||||||
|
|
||||||
// Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html
|
// Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html
|
||||||
if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") {
|
if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") {
|
||||||
delete doc.rawHtml;
|
delete doc.rawHtml;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user