fix: aaaaahhh

This commit is contained in:
Gergő Móricz 2024-07-25 00:50:03 +02:00
parent 6798695ee4
commit d1a3df6d08

View File

@ -11,6 +11,7 @@ import { numTokensFromString } from '../lib/LLM-extraction/helpers';
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values';
import { addWebScraperJob } from '../services/queue-jobs'; import { addWebScraperJob } from '../services/queue-jobs';
import { getWebScraperQueue } from '../services/queue-service'; import { getWebScraperQueue } from '../services/queue-service';
import { supabase_service } from '../services/supabase';
export async function scrapeHelper( export async function scrapeHelper(
req: Request, req: Request,
@ -64,10 +65,10 @@ export async function scrapeHelper(
promiseResolve = resolve; promiseResolve = resolve;
}); });
const listener = (j: string) => { const listener = (j: string, res: any) => {
console.log("JOB COMPLETED", j, "vs", job.id); console.log("JOB COMPLETED", j, "vs", job.id, res);
if (j === job.id) { if (j === job.id) {
promiseResolve(j); promiseResolve([j, res]);
wsq.removeListener("global:completed", listener); wsq.removeListener("global:completed", listener);
} }
} }
@ -86,15 +87,23 @@ export async function scrapeHelper(
return error; return error;
} }
const jobNew = (await wsq.getJob(j)); let j1 = typeof j[1] === "string" ? JSON.parse(j[1]) : j[1];
const doc = jobNew.progress().currentDocument;
delete doc.index; const doc = j1 !== null ? j1.result.links[0].content : (await supabase_service
.from("firecrawl_jobs")
.select("docs")
.eq("job_id", job.id as string)).data[0]?.docs[0];
// make sure doc.content is not empty
if (!doc) { if (!doc) {
return { success: true, error: "No page found", returnCode: 200, data: doc }; return { success: true, error: "No page found", returnCode: 200, data: doc };
} }
delete doc.index;
delete doc.provider;
// make sure doc.content is not empty
// Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html // Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html
if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") { if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") {
delete doc.rawHtml; delete doc.rawHtml;