diff --git a/apps/api/src/controllers/v0/admin/check-fire-engine.ts b/apps/api/src/controllers/v0/admin/check-fire-engine.ts index 8e69d106..0671f7a9 100644 --- a/apps/api/src/controllers/v0/admin/check-fire-engine.ts +++ b/apps/api/src/controllers/v0/admin/check-fire-engine.ts @@ -2,7 +2,6 @@ import { logger } from "../../../lib/logger"; import * as Sentry from "@sentry/node"; import { Request, Response } from "express"; - export async function checkFireEngine(req: Request, res: Response) { try { if (!process.env.FIRE_ENGINE_BETA_URL) { @@ -17,17 +16,20 @@ export async function checkFireEngine(req: Request, res: Response) { const timeout = setTimeout(() => controller.abort(), 30000); try { - const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/scrape`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "X-Disable-Cache": "true", + const response = await fetch( + `${process.env.FIRE_ENGINE_BETA_URL}/scrape`, + { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-Disable-Cache": "true", + }, + body: JSON.stringify({ + url: "https://example.com", + }), + signal: controller.signal, }, - body: JSON.stringify({ - url: "https://example.com", - }), - signal: controller.signal, - }); + ); clearTimeout(timeout); @@ -43,7 +45,7 @@ export async function checkFireEngine(req: Request, res: Response) { }); } } catch (error) { - if (error.name === 'AbortError') { + if (error.name === "AbortError") { return res.status(504).json({ success: false, error: "Request timed out after 30 seconds", diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 411acfe6..63063576 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -74,7 +74,16 @@ export async function runWebScraper({ for (let i = 0; i < tries; i++) { if (i > 0) { - logger.debug("Retrying scrape...", { scrapeId: bull_job_id, jobId: bull_job_id, method: "runWebScraper", module: "runWebScraper", tries, i, previousStatusCode: (response as any)?.document?.metadata?.statusCode, previousError: error }); + logger.debug("Retrying scrape...", { + scrapeId: bull_job_id, + jobId: bull_job_id, + method: "runWebScraper", + module: "runWebScraper", + tries, + i, + previousStatusCode: (response as any)?.document?.metadata?.statusCode, + previousError: error, + }); } response = undefined; @@ -100,13 +109,17 @@ export async function runWebScraper({ ); } } - + // This is where the returnvalue from the job is set // onSuccess(response.document, mode); - + engines = response.engines; - if ((response.document.metadata.statusCode >= 200 && response.document.metadata.statusCode < 300) || response.document.metadata.statusCode === 304) { + if ( + (response.document.metadata.statusCode >= 200 && + response.document.metadata.statusCode < 300) || + response.document.metadata.statusCode === 304 + ) { // status code is good -- do not attempt retry break; } @@ -121,34 +134,34 @@ export async function runWebScraper({ } const engineOrder = Object.entries(engines) - .sort((a, b) => a[1].startedAt - b[1].startedAt) - .map((x) => x[0]) as Engine[]; + .sort((a, b) => a[1].startedAt - b[1].startedAt) + .map((x) => x[0]) as Engine[]; - for (const engine of engineOrder) { - const result = engines[engine] as Exclude< - EngineResultsTracker[Engine], - undefined - >; - ScrapeEvents.insert(bull_job_id, { - type: "scrape", - url, - method: engine, - result: { - success: result.state === "success", - response_code: - result.state === "success" ? result.result.statusCode : undefined, - response_size: - result.state === "success" ? result.result.html.length : undefined, - error: - result.state === "error" - ? result.error - : result.state === "timeout" - ? "Timed out" - : undefined, - time_taken: result.finishedAt - result.startedAt, - }, - }); - } + for (const engine of engineOrder) { + const result = engines[engine] as Exclude< + EngineResultsTracker[Engine], + undefined + >; + ScrapeEvents.insert(bull_job_id, { + type: "scrape", + url, + method: engine, + result: { + success: result.state === "success", + response_code: + result.state === "success" ? result.result.statusCode : undefined, + response_size: + result.state === "success" ? result.result.html.length : undefined, + error: + result.state === "error" + ? result.error + : result.state === "timeout" + ? "Timed out" + : undefined, + time_taken: result.finishedAt - result.startedAt, + }, + }); + } if (error === undefined && response?.success) { if (is_scrape === false) { diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index ba382040..16e9e45f 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -10,7 +10,7 @@ function encryptAES(plaintext: string, key: Buffer): string { const cipher = crypto.createCipheriv(algorithm, key, null); const encrypted = Buffer.concat([ cipher.update(plaintext, "utf-8"), - cipher.final() + cipher.final(), ]); return encrypted.toString("base64"); } @@ -68,7 +68,10 @@ const urlBlocklist = [ "l8GDVI8w/ueHnNzdN1ODuQ==", ]; -const decryptedBlocklist = hashKey.length > 0 ? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey)) : []; +const decryptedBlocklist = + hashKey.length > 0 + ? urlBlocklist.map((ciphertext) => decryptAES(ciphertext, hashKey)) + : []; const allowedKeywords = [ "pulse", @@ -128,4 +131,4 @@ export function isUrlBlocked(url: string): boolean { logger.error(`Error parsing the following URL: ${url}`); return false; } -} \ No newline at end of file +}