mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 17:39:06 +08:00
fix(scrape): poll
This commit is contained in:
parent
dd737f1235
commit
ad82175fb8
@ -9,7 +9,7 @@ import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import
|
||||
import { numTokensFromString } from '../lib/LLM-extraction/helpers';
|
||||
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values';
|
||||
import { addScrapeJob } from '../services/queue-jobs';
|
||||
import { scrapeQueueEvents } from '../services/queue-service';
|
||||
import { getScrapeQueue, scrapeQueueEvents } from '../services/queue-service';
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { Logger } from '../lib/logger';
|
||||
import * as Sentry from "@sentry/node";
|
||||
@ -52,7 +52,19 @@ export async function scrapeHelper(
|
||||
|
||||
const err = await Sentry.startSpan({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => {
|
||||
try {
|
||||
doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]
|
||||
doc = (await new Promise((resolve, reject) => {
|
||||
const start = Date.now();
|
||||
const int = setInterval(async () => {
|
||||
if (Date.now() >= start + timeout) {
|
||||
clearInterval(int);
|
||||
reject(new Error("Job wait "));
|
||||
} else if (await job.getState() === "completed") {
|
||||
clearInterval(int);
|
||||
resolve((await getScrapeQueue().getJob(job.id)).returnvalue);
|
||||
}
|
||||
}, 1000);
|
||||
job.waitUntilFinished(scrapeQueueEvents, timeout)
|
||||
}))[0]
|
||||
} catch (e) {
|
||||
if (e instanceof Error && e.message.startsWith("Job wait")) {
|
||||
span.setAttribute("timedOut", true);
|
||||
|
Loading…
x
Reference in New Issue
Block a user