mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 10:00:38 +08:00
feat(scrape): add error tallying instead of empty response
This commit is contained in:
parent
712ca31615
commit
01f42b980d
@ -64,22 +64,21 @@ export async function scrapeController(
|
||||
success: false,
|
||||
error: "Request timed out",
|
||||
});
|
||||
} else {
|
||||
} else if (typeof e === "string" && e.startsWith("{\"type\":\"all\",")) {
|
||||
return res.status(500).json({
|
||||
success: false,
|
||||
error: `(Internal server error) - ${e && e?.message ? e.message : e} ${
|
||||
extractorOptions && extractorOptions.mode !== "markdown"
|
||||
? " - Could be due to LLM parsing issues"
|
||||
: ""
|
||||
}`,
|
||||
error: "All scraping methods failed for URL: " + req.body.url,
|
||||
details: JSON.parse(e).errors as string[],
|
||||
});
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
await job.remove();
|
||||
|
||||
if (!doc) {
|
||||
console.error("!!! PANIC DOC IS", doc, job);
|
||||
// console.error("!!! PANIC DOC IS", doc, job);
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
warning: "No page found",
|
||||
|
@ -351,6 +351,9 @@ export async function scrapSingleUrl(
|
||||
pageStatusCode: 200,
|
||||
pageError: undefined,
|
||||
};
|
||||
|
||||
const errors: Record<string, string> = {};
|
||||
|
||||
try {
|
||||
let urlKey = urlToScrap;
|
||||
try {
|
||||
@ -392,6 +395,12 @@ export async function scrapSingleUrl(
|
||||
pageError = undefined;
|
||||
}
|
||||
|
||||
if (attempt.pageError) {
|
||||
errors[scraper] = attempt.pageError;
|
||||
} else {
|
||||
errors[scraper] = null;
|
||||
}
|
||||
|
||||
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
|
||||
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
|
||||
break;
|
||||
@ -443,12 +452,17 @@ export async function scrapSingleUrl(
|
||||
|
||||
return document;
|
||||
} catch (error) {
|
||||
Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
||||
Logger.error(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
||||
ScrapeEvents.insert(jobId, {
|
||||
type: "error",
|
||||
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
|
||||
stack: error.stack,
|
||||
});
|
||||
|
||||
if (error instanceof Error && error.message.startsWith("All scraping methods failed")) {
|
||||
throw new Error(JSON.stringify({"type": "all", "errors": Object.values(errors)}));
|
||||
}
|
||||
|
||||
return {
|
||||
content: "",
|
||||
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
|
||||
|
Loading…
x
Reference in New Issue
Block a user