mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 20:29:01 +08:00
fix(scrapeURL/fire-engine): fast fail on chrome error
This commit is contained in:
parent
60ea97c51c
commit
42980c899d
@ -3,7 +3,7 @@ import * as Sentry from "@sentry/node";
|
|||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
|
||||||
import { robustFetch } from "../../lib/fetch";
|
import { robustFetch } from "../../lib/fetch";
|
||||||
import { EngineError } from "../../error";
|
import { EngineError, SiteError } from "../../error";
|
||||||
|
|
||||||
const successSchema = z.object({
|
const successSchema = z.object({
|
||||||
jobId: z.string(),
|
jobId: z.string(),
|
||||||
@ -90,11 +90,15 @@ export async function fireEngineCheckStatus(logger: Logger, jobId: string): Prom
|
|||||||
throw new StillProcessingError(jobId);
|
throw new StillProcessingError(jobId);
|
||||||
} else if (failedParse.success) {
|
} else if (failedParse.success) {
|
||||||
logger.debug("Scrape job failed", { status, jobId });
|
logger.debug("Scrape job failed", { status, jobId });
|
||||||
throw new EngineError("Scrape job failed", {
|
if (typeof status.error === "string" && status.error.includes("Chrome error: ")) {
|
||||||
cause: {
|
throw new SiteError(status.error.split("Chrome error: ")[1]);
|
||||||
status, jobId
|
} else {
|
||||||
}
|
throw new EngineError("Scrape job failed", {
|
||||||
});
|
cause: {
|
||||||
|
status, jobId
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.debug("Check status returned response not matched by any schema", { status, jobId });
|
logger.debug("Check status returned response not matched by any schema", { status, jobId });
|
||||||
throw new Error("Check status returned response not matched by any schema", {
|
throw new Error("Check status returned response not matched by any schema", {
|
||||||
|
@ -3,7 +3,7 @@ import { Meta } from "../..";
|
|||||||
import { fireEngineScrape, FireEngineScrapeRequestChromeCDP, FireEngineScrapeRequestCommon, FireEngineScrapeRequestPlaywright, FireEngineScrapeRequestTLSClient } from "./scrape";
|
import { fireEngineScrape, FireEngineScrapeRequestChromeCDP, FireEngineScrapeRequestCommon, FireEngineScrapeRequestPlaywright, FireEngineScrapeRequestTLSClient } from "./scrape";
|
||||||
import { EngineScrapeResult } from "..";
|
import { EngineScrapeResult } from "..";
|
||||||
import { fireEngineCheckStatus, FireEngineCheckStatusSuccess, StillProcessingError } from "./checkStatus";
|
import { fireEngineCheckStatus, FireEngineCheckStatusSuccess, StillProcessingError } from "./checkStatus";
|
||||||
import { EngineError, TimeoutError } from "../../error";
|
import { EngineError, SiteError, TimeoutError } from "../../error";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import { Action } from "../../../../lib/entities";
|
import { Action } from "../../../../lib/entities";
|
||||||
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
|
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
|
||||||
@ -41,7 +41,7 @@ async function performFireEngineScrape<Engine extends FireEngineScrapeRequestChr
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error instanceof StillProcessingError) {
|
if (error instanceof StillProcessingError) {
|
||||||
// nop
|
// nop
|
||||||
} else if (error instanceof EngineError) {
|
} else if (error instanceof EngineError || error instanceof SiteError) {
|
||||||
logger.debug("Fire-engine scrape job failed.", { error, jobId: scrape.jobId });
|
logger.debug("Fire-engine scrape job failed.", { error, jobId: scrape.jobId });
|
||||||
throw error;
|
throw error;
|
||||||
} else {
|
} else {
|
||||||
|
@ -31,4 +31,12 @@ export class AddFeatureError extends Error {
|
|||||||
super("New feature flags have been discovered: " + featureFlags.join(", "));
|
super("New feature flags have been discovered: " + featureFlags.join(", "));
|
||||||
this.featureFlags = featureFlags;
|
this.featureFlags = featureFlags;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export class SiteError extends Error {
|
||||||
|
public code: string;
|
||||||
|
constructor(code: string) {
|
||||||
|
super("Specified URL is failing to load in the browser. Error code: " + code)
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -5,7 +5,7 @@ import { Document, ScrapeOptions } from "../../controllers/v1/types";
|
|||||||
import { logger } from "../../lib/logger";
|
import { logger } from "../../lib/logger";
|
||||||
import { buildFallbackList, Engine, EngineScrapeResult, FeatureFlag, scrapeURLWithEngine } from "./engines";
|
import { buildFallbackList, Engine, EngineScrapeResult, FeatureFlag, scrapeURLWithEngine } from "./engines";
|
||||||
import { parseMarkdown } from "../../lib/html-to-markdown";
|
import { parseMarkdown } from "../../lib/html-to-markdown";
|
||||||
import { AddFeatureError, EngineError, NoEnginesLeftError, TimeoutError } from "./error";
|
import { AddFeatureError, EngineError, NoEnginesLeftError, SiteError, TimeoutError } from "./error";
|
||||||
import { executeTransformers } from "./transformers";
|
import { executeTransformers } from "./transformers";
|
||||||
import { LLMRefusalError } from "./transformers/llmExtract";
|
import { LLMRefusalError } from "./transformers/llmExtract";
|
||||||
import { urlSpecificParams } from "./lib/urlSpecificParams";
|
import { urlSpecificParams } from "./lib/urlSpecificParams";
|
||||||
@ -227,6 +227,8 @@ async function scrapeURLLoop(
|
|||||||
error.results = results;
|
error.results = results;
|
||||||
meta.logger.warn("LLM refusal encountered", { error });
|
meta.logger.warn("LLM refusal encountered", { error });
|
||||||
throw error;
|
throw error;
|
||||||
|
} else if (error instanceof SiteError) {
|
||||||
|
throw error;
|
||||||
} else {
|
} else {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
meta.logger.info("An unexpected error happened while scraping with " + engine + ".", { error });
|
meta.logger.info("An unexpected error happened while scraping with " + engine + ".", { error });
|
||||||
@ -306,6 +308,8 @@ export async function scrapeURL(
|
|||||||
} else if (error instanceof Error && error.message.includes("Invalid schema for response_format")) { // TODO: seperate into custom error
|
} else if (error instanceof Error && error.message.includes("Invalid schema for response_format")) { // TODO: seperate into custom error
|
||||||
meta.logger.warn("scrapeURL: LLM schema error", { error });
|
meta.logger.warn("scrapeURL: LLM schema error", { error });
|
||||||
// TODO: results?
|
// TODO: results?
|
||||||
|
} else if (error instanceof SiteError) {
|
||||||
|
meta.logger.warn("scrapeURL: Site failed to load in browser", { error });
|
||||||
} else {
|
} else {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
meta.logger.error("scrapeURL: Unexpected error happened", { error });
|
meta.logger.error("scrapeURL: Unexpected error happened", { error });
|
||||||
|
Loading…
x
Reference in New Issue
Block a user