mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 06:59:01 +08:00
chore: formatting
This commit is contained in:
parent
4987880b32
commit
588f747ee8
@ -2,7 +2,6 @@ import { logger } from "../../../lib/logger";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { Request, Response } from "express";
|
||||
|
||||
|
||||
export async function checkFireEngine(req: Request, res: Response) {
|
||||
try {
|
||||
if (!process.env.FIRE_ENGINE_BETA_URL) {
|
||||
@ -17,17 +16,20 @@ export async function checkFireEngine(req: Request, res: Response) {
|
||||
const timeout = setTimeout(() => controller.abort(), 30000);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/scrape`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-Disable-Cache": "true",
|
||||
const response = await fetch(
|
||||
`${process.env.FIRE_ENGINE_BETA_URL}/scrape`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-Disable-Cache": "true",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
url: "https://example.com",
|
||||
}),
|
||||
signal: controller.signal,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
url: "https://example.com",
|
||||
}),
|
||||
signal: controller.signal,
|
||||
});
|
||||
);
|
||||
|
||||
clearTimeout(timeout);
|
||||
|
||||
@ -43,7 +45,7 @@ export async function checkFireEngine(req: Request, res: Response) {
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.name === 'AbortError') {
|
||||
if (error.name === "AbortError") {
|
||||
return res.status(504).json({
|
||||
success: false,
|
||||
error: "Request timed out after 30 seconds",
|
||||
|
@ -74,7 +74,16 @@ export async function runWebScraper({
|
||||
|
||||
for (let i = 0; i < tries; i++) {
|
||||
if (i > 0) {
|
||||
logger.debug("Retrying scrape...", { scrapeId: bull_job_id, jobId: bull_job_id, method: "runWebScraper", module: "runWebScraper", tries, i, previousStatusCode: (response as any)?.document?.metadata?.statusCode, previousError: error });
|
||||
logger.debug("Retrying scrape...", {
|
||||
scrapeId: bull_job_id,
|
||||
jobId: bull_job_id,
|
||||
method: "runWebScraper",
|
||||
module: "runWebScraper",
|
||||
tries,
|
||||
i,
|
||||
previousStatusCode: (response as any)?.document?.metadata?.statusCode,
|
||||
previousError: error,
|
||||
});
|
||||
}
|
||||
|
||||
response = undefined;
|
||||
@ -100,13 +109,17 @@ export async function runWebScraper({
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This is where the returnvalue from the job is set
|
||||
// onSuccess(response.document, mode);
|
||||
|
||||
|
||||
engines = response.engines;
|
||||
|
||||
if ((response.document.metadata.statusCode >= 200 && response.document.metadata.statusCode < 300) || response.document.metadata.statusCode === 304) {
|
||||
if (
|
||||
(response.document.metadata.statusCode >= 200 &&
|
||||
response.document.metadata.statusCode < 300) ||
|
||||
response.document.metadata.statusCode === 304
|
||||
) {
|
||||
// status code is good -- do not attempt retry
|
||||
break;
|
||||
}
|
||||
@ -121,34 +134,34 @@ export async function runWebScraper({
|
||||
}
|
||||
|
||||
const engineOrder = Object.entries(engines)
|
||||
.sort((a, b) => a[1].startedAt - b[1].startedAt)
|
||||
.map((x) => x[0]) as Engine[];
|
||||
.sort((a, b) => a[1].startedAt - b[1].startedAt)
|
||||
.map((x) => x[0]) as Engine[];
|
||||
|
||||
for (const engine of engineOrder) {
|
||||
const result = engines[engine] as Exclude<
|
||||
EngineResultsTracker[Engine],
|
||||
undefined
|
||||
>;
|
||||
ScrapeEvents.insert(bull_job_id, {
|
||||
type: "scrape",
|
||||
url,
|
||||
method: engine,
|
||||
result: {
|
||||
success: result.state === "success",
|
||||
response_code:
|
||||
result.state === "success" ? result.result.statusCode : undefined,
|
||||
response_size:
|
||||
result.state === "success" ? result.result.html.length : undefined,
|
||||
error:
|
||||
result.state === "error"
|
||||
? result.error
|
||||
: result.state === "timeout"
|
||||
? "Timed out"
|
||||
: undefined,
|
||||
time_taken: result.finishedAt - result.startedAt,
|
||||
},
|
||||
});
|
||||
}
|
||||
for (const engine of engineOrder) {
|
||||
const result = engines[engine] as Exclude<
|
||||
EngineResultsTracker[Engine],
|
||||
undefined
|
||||
>;
|
||||
ScrapeEvents.insert(bull_job_id, {
|
||||
type: "scrape",
|
||||
url,
|
||||
method: engine,
|
||||
result: {
|
||||
success: result.state === "success",
|
||||
response_code:
|
||||
result.state === "success" ? result.result.statusCode : undefined,
|
||||
response_size:
|
||||
result.state === "success" ? result.result.html.length : undefined,
|
||||
error:
|
||||
result.state === "error"
|
||||
? result.error
|
||||
: result.state === "timeout"
|
||||
? "Timed out"
|
||||
: undefined,
|
||||
time_taken: result.finishedAt - result.startedAt,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
if (error === undefined && response?.success) {
|
||||
if (is_scrape === false) {
|
||||
|
@ -10,7 +10,7 @@ function encryptAES(plaintext: string, key: Buffer): string {
|
||||
const cipher = crypto.createCipheriv(algorithm, key, null);
|
||||
const encrypted = Buffer.concat([
|
||||
cipher.update(plaintext, "utf-8"),
|
||||
cipher.final()
|
||||
cipher.final(),
|
||||
]);
|
||||
return encrypted.toString("base64");
|
||||
}
|
||||
@ -68,7 +68,10 @@ const urlBlocklist = [
|
||||
"l8GDVI8w/ueHnNzdN1ODuQ==",
|
||||
];
|
||||
|
||||
const decryptedBlocklist = hashKey.length > 0 ? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey)) : [];
|
||||
const decryptedBlocklist =
|
||||
hashKey.length > 0
|
||||
? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey))
|
||||
: [];
|
||||
|
||||
const allowedKeywords = [
|
||||
"pulse",
|
||||
@ -128,4 +131,4 @@ export function isUrlBlocked(url: string): boolean {
|
||||
logger.error(`Error parsing the following URL: ${url}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user