From e326249a571d30b1fde4df961e465ec61d04ba20 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:26:42 -0300 Subject: [PATCH 1/2] added check job and cancel to fire-engine requests --- .../scraper/WebScraper/scrapers/fireEngine.ts | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 7c24fab4..9c52b9e1 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -86,7 +86,12 @@ export async function scrapWithFireEngine({ pageOptions.atsv = false; } - const response = await axios.post( + const axiosInstance = axios.create({ + headers: { "Content-Type": "application/json" } + }); + + const startTime = Date.now(); + const response = await axiosInstance.post( process.env.FIRE_ENGINE_BETA_URL + endpoint, { url: url, @@ -98,16 +103,31 @@ export async function scrapWithFireEngine({ disableJsDom: pageOptions?.disableJsDom ?? false, priority, engine, + instantReturn: true, ...fireEngineOptionsParam, }, { headers: { "Content-Type": "application/json", - }, - timeout: universalTimeout + waitParam, + } } ); + let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`); + while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) { + await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second + checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`); + } + + if (checkStatusResponse.data.processing) { + axiosInstance.delete( + process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`, + ); + Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`); + logParams.error_message = "Request timed out"; + return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; + } + if (response.status !== 200) { Logger.debug( `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` From 1f27182a139cdd6ccef33980ef265a0d56f98fe3 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 20 Aug 2024 15:42:39 -0300 Subject: [PATCH 2/2] added try catch --- .../src/scraper/WebScraper/scrapers/fireEngine.ts | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index 9c52b9e1..10be4a1d 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -120,9 +120,17 @@ export async function scrapWithFireEngine({ } if (checkStatusResponse.data.processing) { - axiosInstance.delete( - process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`, - ); + Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${response.data.jobId}`); + try { + axiosInstance.delete( + process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`, + ); + } catch (error) { + Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${response.data.jobId} | error: ${error}`); + logParams.error_message = "Failed to delete request"; + return { html: "", screenshot: "", pageStatusCode: null, pageError: "" }; + } + Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`); logParams.error_message = "Request timed out"; return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };