mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 12:30:36 +08:00
Revert error tallying
This commit is contained in:
parent
a4b128e8b7
commit
a59b5836d5
@ -283,21 +283,13 @@ export async function scrapeController(req: Request, res: Response) {
|
||||
|
||||
return res.status(result.returnCode).json(result);
|
||||
} catch (error) {
|
||||
if (typeof error === "string" && error.startsWith("{\"type\":\"all\",")) {
|
||||
return res.status(500).json({
|
||||
success: false,
|
||||
error: "All scraping methods failed for URL: " + req.body.url,
|
||||
details: JSON.parse(error).errors as string[],
|
||||
});
|
||||
} else {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({
|
||||
error:
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({
|
||||
error:
|
||||
typeof error === "string"
|
||||
? error
|
||||
: error?.message ?? "Internal Server Error",
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -196,9 +196,7 @@ export async function searchController(req: Request, res: Response) {
|
||||
return res.status(408).json({ error: "Request timed out" });
|
||||
}
|
||||
|
||||
if (!(error instanceof Error && error.message.startsWith('{"type":"all",'))) {
|
||||
Sentry.captureException(error);
|
||||
}
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
@ -64,21 +64,22 @@ export async function scrapeController(
|
||||
success: false,
|
||||
error: "Request timed out",
|
||||
});
|
||||
} else if (typeof e === "string" && e.startsWith("{\"type\":\"all\",")) {
|
||||
} else {
|
||||
return res.status(500).json({
|
||||
success: false,
|
||||
error: "All scraping methods failed for URL: " + req.body.url,
|
||||
details: JSON.parse(e).errors as string[],
|
||||
error: `(Internal server error) - ${e && e?.message ? e.message : e} ${
|
||||
extractorOptions && extractorOptions.mode !== "markdown"
|
||||
? " - Could be due to LLM parsing issues"
|
||||
: ""
|
||||
}`,
|
||||
});
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
await job.remove();
|
||||
|
||||
if (!doc) {
|
||||
// console.error("!!! PANIC DOC IS", doc, job);
|
||||
console.error("!!! PANIC DOC IS", doc, job);
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
warning: "No page found",
|
||||
|
@ -357,9 +357,6 @@ export async function scrapSingleUrl(
|
||||
pageStatusCode: 200,
|
||||
pageError: undefined,
|
||||
};
|
||||
|
||||
const errors: Record<string, string> = {};
|
||||
|
||||
try {
|
||||
let urlKey = urlToScrap;
|
||||
try {
|
||||
@ -401,12 +398,6 @@ export async function scrapSingleUrl(
|
||||
pageError = undefined;
|
||||
}
|
||||
|
||||
if (attempt.pageError) {
|
||||
errors[scraper] = attempt.pageError;
|
||||
} else {
|
||||
errors[scraper] = null;
|
||||
}
|
||||
|
||||
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
|
||||
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
|
||||
break;
|
||||
@ -421,9 +412,7 @@ export async function scrapSingleUrl(
|
||||
// }
|
||||
}
|
||||
|
||||
// NOTE: This exception for status codes may only work with fire-engine. In lieu of better error management,
|
||||
// it's the best we can do. - mogery
|
||||
if (!text && !Object.values(errors).some(x => x.startsWith("Request failed with status code ") || x === "NOT FOUND")) {
|
||||
if (!text) {
|
||||
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
|
||||
}
|
||||
|
||||
@ -460,17 +449,12 @@ export async function scrapSingleUrl(
|
||||
|
||||
return document;
|
||||
} catch (error) {
|
||||
Logger.error(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
||||
Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
|
||||
ScrapeEvents.insert(jobId, {
|
||||
type: "error",
|
||||
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
|
||||
stack: error.stack,
|
||||
});
|
||||
|
||||
if (error instanceof Error && error.message.startsWith("All scraping methods failed")) {
|
||||
throw new Error(JSON.stringify({"type": "all", "errors": Object.values(errors)}));
|
||||
}
|
||||
|
||||
return {
|
||||
content: "",
|
||||
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,
|
||||
|
@ -448,7 +448,7 @@ async function processJob(job: Job, token: string) {
|
||||
} catch (error) {
|
||||
Logger.error(`🐂 Job errored ${job.id} - ${error}`);
|
||||
|
||||
if (!(error instanceof Error && (error.message.includes("JSON parsing error(s): ") || error.message.startsWith('{"type":"all",')))) {
|
||||
if (!(error instanceof Error && error.message.includes("JSON parsing error(s): "))) {
|
||||
Sentry.captureException(error, {
|
||||
data: {
|
||||
job: job.id,
|
||||
|
Loading…
x
Reference in New Issue
Block a user