From 512a2b1cd494d64a8139f089116b224cd7790a0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= <mo.geryy@gmail.com>
Date: Tue, 15 Apr 2025 22:57:20 -0700
Subject: [PATCH] feat(extract): run on original links if reranker is weird

---
 .../api/src/lib/extract/extraction-service.ts | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/apps/api/src/lib/extract/extraction-service.ts b/apps/api/src/lib/extract/extraction-service.ts
index 22d6f6c7..1ea5651f 100644
--- a/apps/api/src/lib/extract/extraction-service.ts
+++ b/apps/api/src/lib/extract/extraction-service.ts
@@ -273,28 +273,21 @@ export async function performExtraction(
   );
 
   const processedUrls = await Promise.all(urlPromises);
-  const links = processedUrls.flat().filter((url) => url);
+  let links = processedUrls.flat().filter((url) => url);
   logger.debug("Processed URLs.", {
     linkCount: links.length,
   });
 
-  log["links"] = links;
-  log["linksLength"] = links.length;
-
   if (links.length === 0) {
-    logger.error("0 links! Bailing.", {
+    links = urls.map(x => x.replace(/\*$/g, ""));
+    logger.warn("0 links! Doing just the original URLs. (without * wildcard)", {
       linkCount: links.length,
     });
-    return {
-      success: false,
-      error:
-        "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs.",
-      extractId,
-      urlTrace: urlTraces,
-      totalUrlsScraped: 0,
-    };
   }
 
+  log["links"] = links;
+  log["linksLength"] = links.length;
+
   await updateExtract(extractId, {
     status: "processing",
     steps: [