From 512a2b1cd494d64a8139f089116b224cd7790a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Tue, 15 Apr 2025 22:57:20 -0700 Subject: [PATCH] feat(extract): run on original links if reranker is weird --- .../api/src/lib/extract/extraction-service.ts | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/apps/api/src/lib/extract/extraction-service.ts b/apps/api/src/lib/extract/extraction-service.ts index 22d6f6c7..1ea5651f 100644 --- a/apps/api/src/lib/extract/extraction-service.ts +++ b/apps/api/src/lib/extract/extraction-service.ts @@ -273,28 +273,21 @@ export async function performExtraction( ); const processedUrls = await Promise.all(urlPromises); - const links = processedUrls.flat().filter((url) => url); + let links = processedUrls.flat().filter((url) => url); logger.debug("Processed URLs.", { linkCount: links.length, }); - log["links"] = links; - log["linksLength"] = links.length; - if (links.length === 0) { - logger.error("0 links! Bailing.", { + links = urls.map(x => x.replace(/\*$/g, "")); + logger.warn("0 links! Doing just the original URLs. (without * wildcard)", { linkCount: links.length, }); - return { - success: false, - error: - "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs.", - extractId, - urlTrace: urlTraces, - totalUrlsScraped: 0, - }; } + log["links"] = links; + log["linksLength"] = links.length; + await updateExtract(extractId, { status: "processing", steps: [