chore: formatting

This commit is contained in:
Nicolas 2024-12-15 02:54:49 -03:00
parent 4987880b32
commit 588f747ee8
3 changed files with 64 additions and 46 deletions

View File

@ -2,7 +2,6 @@ import { logger } from "../../../lib/logger";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { Request, Response } from "express"; import { Request, Response } from "express";
export async function checkFireEngine(req: Request, res: Response) { export async function checkFireEngine(req: Request, res: Response) {
try { try {
if (!process.env.FIRE_ENGINE_BETA_URL) { if (!process.env.FIRE_ENGINE_BETA_URL) {
@ -17,17 +16,20 @@ export async function checkFireEngine(req: Request, res: Response) {
const timeout = setTimeout(() => controller.abort(), 30000); const timeout = setTimeout(() => controller.abort(), 30000);
try { try {
const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/scrape`, { const response = await fetch(
method: "POST", `${process.env.FIRE_ENGINE_BETA_URL}/scrape`,
headers: { {
"Content-Type": "application/json", method: "POST",
"X-Disable-Cache": "true", headers: {
"Content-Type": "application/json",
"X-Disable-Cache": "true",
},
body: JSON.stringify({
url: "https://example.com",
}),
signal: controller.signal,
}, },
body: JSON.stringify({ );
url: "https://example.com",
}),
signal: controller.signal,
});
clearTimeout(timeout); clearTimeout(timeout);
@ -43,7 +45,7 @@ export async function checkFireEngine(req: Request, res: Response) {
}); });
} }
} catch (error) { } catch (error) {
if (error.name === 'AbortError') { if (error.name === "AbortError") {
return res.status(504).json({ return res.status(504).json({
success: false, success: false,
error: "Request timed out after 30 seconds", error: "Request timed out after 30 seconds",

View File

@ -74,7 +74,16 @@ export async function runWebScraper({
for (let i = 0; i < tries; i++) { for (let i = 0; i < tries; i++) {
if (i > 0) { if (i > 0) {
logger.debug("Retrying scrape...", { scrapeId: bull_job_id, jobId: bull_job_id, method: "runWebScraper", module: "runWebScraper", tries, i, previousStatusCode: (response as any)?.document?.metadata?.statusCode, previousError: error }); logger.debug("Retrying scrape...", {
scrapeId: bull_job_id,
jobId: bull_job_id,
method: "runWebScraper",
module: "runWebScraper",
tries,
i,
previousStatusCode: (response as any)?.document?.metadata?.statusCode,
previousError: error,
});
} }
response = undefined; response = undefined;
@ -106,7 +115,11 @@ export async function runWebScraper({
engines = response.engines; engines = response.engines;
if ((response.document.metadata.statusCode >= 200 && response.document.metadata.statusCode < 300) || response.document.metadata.statusCode === 304) { if (
(response.document.metadata.statusCode >= 200 &&
response.document.metadata.statusCode < 300) ||
response.document.metadata.statusCode === 304
) {
// status code is good -- do not attempt retry // status code is good -- do not attempt retry
break; break;
} }
@ -121,34 +134,34 @@ export async function runWebScraper({
} }
const engineOrder = Object.entries(engines) const engineOrder = Object.entries(engines)
.sort((a, b) => a[1].startedAt - b[1].startedAt) .sort((a, b) => a[1].startedAt - b[1].startedAt)
.map((x) => x[0]) as Engine[]; .map((x) => x[0]) as Engine[];
for (const engine of engineOrder) { for (const engine of engineOrder) {
const result = engines[engine] as Exclude< const result = engines[engine] as Exclude<
EngineResultsTracker[Engine], EngineResultsTracker[Engine],
undefined undefined
>; >;
ScrapeEvents.insert(bull_job_id, { ScrapeEvents.insert(bull_job_id, {
type: "scrape", type: "scrape",
url, url,
method: engine, method: engine,
result: { result: {
success: result.state === "success", success: result.state === "success",
response_code: response_code:
result.state === "success" ? result.result.statusCode : undefined, result.state === "success" ? result.result.statusCode : undefined,
response_size: response_size:
result.state === "success" ? result.result.html.length : undefined, result.state === "success" ? result.result.html.length : undefined,
error: error:
result.state === "error" result.state === "error"
? result.error ? result.error
: result.state === "timeout" : result.state === "timeout"
? "Timed out" ? "Timed out"
: undefined, : undefined,
time_taken: result.finishedAt - result.startedAt, time_taken: result.finishedAt - result.startedAt,
}, },
}); });
} }
if (error === undefined && response?.success) { if (error === undefined && response?.success) {
if (is_scrape === false) { if (is_scrape === false) {

View File

@ -10,7 +10,7 @@ function encryptAES(plaintext: string, key: Buffer): string {
const cipher = crypto.createCipheriv(algorithm, key, null); const cipher = crypto.createCipheriv(algorithm, key, null);
const encrypted = Buffer.concat([ const encrypted = Buffer.concat([
cipher.update(plaintext, "utf-8"), cipher.update(plaintext, "utf-8"),
cipher.final() cipher.final(),
]); ]);
return encrypted.toString("base64"); return encrypted.toString("base64");
} }
@ -68,7 +68,10 @@ const urlBlocklist = [
"l8GDVI8w/ueHnNzdN1ODuQ==", "l8GDVI8w/ueHnNzdN1ODuQ==",
]; ];
const decryptedBlocklist = hashKey.length > 0 ? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey)) : []; const decryptedBlocklist =
hashKey.length > 0
? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey))
: [];
const allowedKeywords = [ const allowedKeywords = [
"pulse", "pulse",