mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-04 17:50:35 +08:00
Nick: scrape timeout + warnings
This commit is contained in:
parent
b693c6c23b
commit
30def84c0a
@ -128,7 +128,7 @@ export async function extractController(
|
||||
// Scrape all links in parallel
|
||||
const scrapePromises = links.map(async (url) => {
|
||||
const origin = req.body.origin || "api";
|
||||
const timeout = req.body.timeout ?? 30000;
|
||||
const timeout = Math.floor((req.body.timeout || 40000) * 0.7) || 30000; // Use 70% of total timeout for individual scrapes
|
||||
const jobId = crypto.randomUUID();
|
||||
|
||||
const jobPriority = await getJobPriority({
|
||||
@ -153,10 +153,8 @@ export async function extractController(
|
||||
jobPriority
|
||||
);
|
||||
|
||||
const totalWait = 0;
|
||||
|
||||
try {
|
||||
const doc = await waitForJob<Document>(jobId, timeout + totalWait);
|
||||
const doc = await waitForJob<Document>(jobId, timeout);
|
||||
await getScrapeQueue().remove(jobId);
|
||||
if (earlyReturn) {
|
||||
return null;
|
||||
@ -216,10 +214,13 @@ export async function extractController(
|
||||
// console.log("completions.extract", completions.extract);
|
||||
|
||||
let data: any;
|
||||
let warning = completions.warning ?? "";
|
||||
try {
|
||||
data = JSON.parse(completions.extract);
|
||||
} catch (e) {
|
||||
logger.warn(`ExtractController: Error parsing JSON: ${e}`);
|
||||
data = completions.extract;
|
||||
warning = "JSON could not be parsed correctly. Returning raw LLM output...";
|
||||
}
|
||||
|
||||
logJob({
|
||||
@ -241,5 +242,6 @@ export async function extractController(
|
||||
success: true,
|
||||
data: data,
|
||||
scrape_id: id,
|
||||
warning: warning
|
||||
});
|
||||
}
|
@ -163,7 +163,7 @@ export const extractV1Options = z.object({
|
||||
includeSubdomains: z.boolean().default(true),
|
||||
allowExternalLinks: z.boolean().default(false),
|
||||
origin: z.string().optional().default("api"),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000)
|
||||
}).strict(strictMessage)
|
||||
|
||||
export type ExtractV1Options = z.infer<typeof extractV1Options>;
|
||||
|
Loading…
x
Reference in New Issue
Block a user