mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 18:09:03 +08:00
Update document-scraper.ts
This commit is contained in:
parent
2d4f4de0ab
commit
5030fea634
@ -12,6 +12,7 @@ interface ScrapeDocumentOptions {
|
|||||||
plan: PlanType;
|
plan: PlanType;
|
||||||
origin: string;
|
origin: string;
|
||||||
timeout: number;
|
timeout: number;
|
||||||
|
isSingleUrl?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function scrapeDocument(
|
export async function scrapeDocument(
|
||||||
@ -24,14 +25,14 @@ export async function scrapeDocument(
|
|||||||
trace.timing.scrapedAt = new Date().toISOString();
|
trace.timing.scrapedAt = new Date().toISOString();
|
||||||
}
|
}
|
||||||
|
|
||||||
const jobId = crypto.randomUUID();
|
async function attemptScrape(timeout: number) {
|
||||||
const jobPriority = await getJobPriority({
|
const jobId = crypto.randomUUID();
|
||||||
plan: options.plan,
|
const jobPriority = await getJobPriority({
|
||||||
team_id: options.teamId,
|
plan: options.plan,
|
||||||
basePriority: 10,
|
team_id: options.teamId,
|
||||||
});
|
basePriority: 10,
|
||||||
|
});
|
||||||
|
|
||||||
try {
|
|
||||||
await addScrapeJob(
|
await addScrapeJob(
|
||||||
{
|
{
|
||||||
url: options.url,
|
url: options.url,
|
||||||
@ -50,7 +51,7 @@ export async function scrapeDocument(
|
|||||||
jobPriority,
|
jobPriority,
|
||||||
);
|
);
|
||||||
|
|
||||||
const doc = await waitForJob<Document>(jobId, options.timeout);
|
const doc = await waitForJob<Document>(jobId, timeout);
|
||||||
await getScrapeQueue().remove(jobId);
|
await getScrapeQueue().remove(jobId);
|
||||||
|
|
||||||
if (trace) {
|
if (trace) {
|
||||||
@ -63,6 +64,18 @@ export async function scrapeDocument(
|
|||||||
}
|
}
|
||||||
|
|
||||||
return doc;
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
return await attemptScrape(options.timeout);
|
||||||
|
} catch (timeoutError) {
|
||||||
|
if (options.isSingleUrl) {
|
||||||
|
// For single URLs, try again with double timeout
|
||||||
|
return await attemptScrape(options.timeout * 2);
|
||||||
|
}
|
||||||
|
throw timeoutError;
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error in scrapeDocument: ${error}`);
|
logger.error(`Error in scrapeDocument: ${error}`);
|
||||||
if (trace) {
|
if (trace) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user