From 7f57c868be83f2c1c5e92f0eaba81dcdde0f1836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Mon, 16 Dec 2024 23:08:20 +0100 Subject: [PATCH] Revert "fix(scrapeURL): better timeToRun distribution" This reverts commit 284a6ccedd1baede825571ee933eb7e4f773e2de. --- apps/api/src/scraper/scrapeURL/index.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/apps/api/src/scraper/scrapeURL/index.ts b/apps/api/src/scraper/scrapeURL/index.ts index 31f7e2f2..800457a8 100644 --- a/apps/api/src/scraper/scrapeURL/index.ts +++ b/apps/api/src/scraper/scrapeURL/index.ts @@ -203,20 +203,15 @@ async function scrapeURLLoop(meta: Meta): Promise { const results: EngineResultsTracker = {}; let result: EngineScrapeResultWithContext | null = null; - let ttrInstanceCount = Math.min(fallbackList.length, 3); - let ttrRatios = new Array(ttrInstanceCount).fill(0).map((_, i) => ttrInstanceCount - i); - let ttrRatioSum = ttrRatios.reduce((a, x) => a + x, 0); - const timeToRun = meta.options.timeout !== undefined - ? ttrRatios.map(ratio => Math.round(meta.options.timeout! * ratio / ttrRatioSum)).map(ratio => isNaN(ratio) ? undefined : ratio) - : [undefined] + ? Math.round(meta.options.timeout / Math.min(fallbackList.length, 3)) + : undefined - for (const i in fallbackList) { - const { engine, unsupportedFeatures } = fallbackList[i]; + for (const { engine, unsupportedFeatures } of fallbackList) { const startedAt = Date.now(); try { meta.logger.info("Scraping via " + engine + "..."); - const _engineResult = await scrapeURLWithEngine(meta, engine, timeToRun[i] ?? timeToRun.slice(-1)[0]); + const _engineResult = await scrapeURLWithEngine(meta, engine, timeToRun); if (_engineResult.markdown === undefined) { // Some engines emit Markdown directly. _engineResult.markdown = await parseMarkdown(_engineResult.html);