Revert error tallying

This commit is contained in:
Gergő Móricz 2024-09-24 10:27:49 +02:00
parent a4b128e8b7
commit a59b5836d5
5 changed files with 16 additions and 41 deletions

View File

@ -283,13 +283,6 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(result.returnCode).json(result);
} catch (error) {
if (typeof error === "string" && error.startsWith("{\"type\":\"all\",")) {
return res.status(500).json({
success: false,
error: "All scraping methods failed for URL: " + req.body.url,
details: JSON.parse(error).errors as string[],
});
} else {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({
@ -299,5 +292,4 @@ export async function scrapeController(req: Request, res: Response) {
: error?.message ?? "Internal Server Error",
});
}
}
}

View File

@ -196,9 +196,7 @@ export async function searchController(req: Request, res: Response) {
return res.status(408).json({ error: "Request timed out" });
}
if (!(error instanceof Error && error.message.startsWith('{"type":"all",'))) {
Sentry.captureException(error);
}
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -64,21 +64,22 @@ export async function scrapeController(
success: false,
error: "Request timed out",
});
} else if (typeof e === "string" && e.startsWith("{\"type\":\"all\",")) {
} else {
return res.status(500).json({
success: false,
error: "All scraping methods failed for URL: " + req.body.url,
details: JSON.parse(e).errors as string[],
error: `(Internal server error) - ${e && e?.message ? e.message : e} ${
extractorOptions && extractorOptions.mode !== "markdown"
? " - Could be due to LLM parsing issues"
: ""
}`,
});
} else {
throw e;
}
}
await job.remove();
if (!doc) {
// console.error("!!! PANIC DOC IS", doc, job);
console.error("!!! PANIC DOC IS", doc, job);
return res.status(200).json({
success: true,
warning: "No page found",

View File

@ -357,9 +357,6 @@ export async function scrapSingleUrl(
pageStatusCode: 200,
pageError: undefined,
};
const errors: Record<string, string> = {};
try {
let urlKey = urlToScrap;
try {
@ -401,12 +398,6 @@ export async function scrapSingleUrl(
pageError = undefined;
}
if (attempt.pageError) {
errors[scraper] = attempt.pageError;
} else {
errors[scraper] = null;
}
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
break;
@ -421,9 +412,7 @@ export async function scrapSingleUrl(
// }
}
// NOTE: This exception for status codes may only work with fire-engine. In lieu of better error management,
// it's the best we can do. - mogery
if (!text && !Object.values(errors).some(x => x.startsWith("Request failed with status code ") || x === "NOT FOUND")) {
if (!text) {
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
}
@ -460,17 +449,12 @@ export async function scrapSingleUrl(
return document;
} catch (error) {
Logger.error(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
ScrapeEvents.insert(jobId, {
type: "error",
message: typeof error === "string" ? error : typeof error.message === "string" ? error.message : JSON.stringify(error),
stack: error.stack,
});
if (error instanceof Error && error.message.startsWith("All scraping methods failed")) {
throw new Error(JSON.stringify({"type": "all", "errors": Object.values(errors)}));
}
return {
content: "",
markdown: pageOptions.includeMarkdown || pageOptions.includeExtract ? "" : undefined,

View File

@ -448,7 +448,7 @@ async function processJob(job: Job, token: string) {
} catch (error) {
Logger.error(`🐂 Job errored ${job.id} - ${error}`);
if (!(error instanceof Error && (error.message.includes("JSON parsing error(s): ") || error.message.startsWith('{"type":"all",')))) {
if (!(error instanceof Error && error.message.includes("JSON parsing error(s): "))) {
Sentry.captureException(error, {
data: {
job: job.id,