From 975f0575b4e031b017444c1d4f82cd533fd5e2a2 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 27 Sep 2024 12:58:57 -0400 Subject: [PATCH] Nick: max retries with axios-retry --- apps/api/package.json | 1 + apps/api/pnpm-lock.yaml | 21 +++++++++++++++---- .../scraper/WebScraper/scrapers/fireEngine.ts | 8 +++++-- apps/api/src/scraper/WebScraper/single_url.ts | 2 +- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/apps/api/package.json b/apps/api/package.json index 770828c1..a0f9cf8e 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -67,6 +67,7 @@ "async": "^3.2.5", "async-mutex": "^0.5.0", "axios": "^1.3.4", + "axios-retry": "^4.5.0", "bottleneck": "^2.19.5", "bullmq": "^5.11.0", "cacheable-lookup": "^6.1.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index b8f876a8..095b507c 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -65,6 +65,9 @@ importers: axios: specifier: ^1.3.4 version: 1.7.2 + axios-retry: + specifier: ^4.5.0 + version: 4.5.0(axios@1.7.2) bottleneck: specifier: ^2.19.5 version: 2.19.5 @@ -1903,6 +1906,11 @@ packages: axios-retry@3.9.1: resolution: {integrity: sha512-8PJDLJv7qTTMMwdnbMvrLYuvB47M81wRtxQmEdV5w4rgbTXTt+vtPkXwajOfOdSyv/wZICJOC+/UhXH4aQ/R+w==} + axios-retry@4.5.0: + resolution: {integrity: sha512-aR99oXhpEDGo0UuAlYcn2iGRds30k366Zfa05XWScR9QaQD4JYiP3/1Qt1u7YlefUOK+cn0CcwoL1oefavQUlQ==} + peerDependencies: + axios: 0.x || 1.x + axios@0.26.1: resolution: {integrity: sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==} @@ -4518,8 +4526,8 @@ packages: engines: {node: '>=14.17'} hasBin: true - typescript@5.5.4: - resolution: {integrity: sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==} + typescript@5.6.2: + resolution: {integrity: sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==} engines: {node: '>=14.17'} hasBin: true @@ -6950,6 +6958,11 @@ snapshots: '@babel/runtime': 7.24.6 is-retry-allowed: 2.2.0 + axios-retry@4.5.0(axios@1.7.2): + dependencies: + axios: 1.7.2 + is-retry-allowed: 2.2.0 + axios@0.26.1: dependencies: follow-redirects: 1.15.6 @@ -9195,7 +9208,7 @@ snapshots: csv-parse: 5.5.6 gpt3-tokenizer: 1.1.5 openai: 3.3.0 - typescript: 5.5.4 + typescript: 5.6.2 uuid: 9.0.1 zod: 3.23.8 transitivePeerDependencies: @@ -9793,7 +9806,7 @@ snapshots: typescript@5.4.5: {} - typescript@5.5.4: {} + typescript@5.6.2: {} typesense@1.8.2(@babel/runtime@7.24.6): dependencies: diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index dd3577b0..f715c427 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -6,7 +6,11 @@ import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { universalTimeout } from "../global"; import { Logger } from "../../../lib/logger"; import * as Sentry from "@sentry/node"; +import axiosRetry from 'axios-retry'; +axiosRetry(axios, { retries: 3 , onRetry:()=>{ + console.log("Retrying (fire-engine)..."); +}, retryDelay: axiosRetry.exponentialDelay}); /** * Scrapes a URL with Fire-Engine * @param url The URL to scrape @@ -203,10 +207,10 @@ export async function scrapWithFireEngine({ } } catch (error) { if (error.code === "ECONNABORTED") { - Logger.debug(`⛏️ Fire-Engine: Request timed out for ${url}`); + Logger.debug(`⛏️ Fire-Engine (catch block): Request timed out for ${url}`); logParams.error_message = "Request timed out"; } else { - Logger.debug(`⛏️ Fire-Engine: Failed to fetch url: ${url} | Error: ${error}`); + Logger.debug(`⛏️ Fire-Engine(catch block): Failed to fetch url: ${url} | Error: ${error}`); logParams.error_message = error.message || error; } return { html: "", pageStatusCode: null, pageError: logParams.error_message }; diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 27bdc3a8..db8c46b1 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -424,7 +424,7 @@ export async function scrapSingleUrl( Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`); break; } - if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 500)) { + if (pageStatusCode && (pageStatusCode == 404)) { Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with status code 404, breaking`); break; }