From 192d056bef828addd811607a85c6852b4c04231a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Mon, 19 May 2025 21:36:15 +0200 Subject: [PATCH] feat(scrapeURL/pdf/mu): add timeout and created_at (#1570) --- apps/api/src/scraper/scrapeURL/engines/pdf/index.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts index 29a07a5d..733b06df 100644 --- a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts @@ -27,6 +27,8 @@ async function scrapePDFWithRunPodMU( tempFilePath, }); + const preCacheCheckStartTime = Date.now(); + try { const cachedResult = await getPdfResultFromCache(base64Content); @@ -54,6 +56,8 @@ async function scrapePDFWithRunPodMU( input: { file_content: base64Content, filename: path.basename(tempFilePath) + ".pdf", + timeout: timeToRun ? timeToRun - (Date.now() - preCacheCheckStartTime) : undefined, + created_at: Date.now(), }, }, logger: meta.logger.child({ @@ -103,6 +107,8 @@ export async function scrapePDF( meta: Meta, timeToRun: number | undefined, ): Promise { + const startTime = Date.now(); + if (!meta.options.parsePDF) { if (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null) { const content = (await readFile(meta.pdfPrefetch.filePath)).toString("base64"); @@ -167,7 +173,7 @@ export async function scrapePDF( }), }, tempFilePath, - timeToRun, + timeToRun ? (timeToRun - (Date.now() - startTime)) : undefined, base64Content, ); } catch (error) {