From 8a26f08b1487184eeace21289449d046680736f3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 20:37:58 -0800 Subject: [PATCH] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 8202a513..d462e651 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -32,7 +32,7 @@ const MAX_EXTRACT_LIMIT = 100; const MAX_RANKING_LIMIT = 10; const INITIAL_SCORE_THRESHOLD = 0.75; const FALLBACK_SCORE_THRESHOLD = 0.5; -const MIN_REQUIRED_LINKS = 3; +const MIN_REQUIRED_LINKS = 1; /** * Extracts data from the provided URLs based on the request parameters. @@ -103,6 +103,7 @@ export async function extractController( // If we don't have enough high-quality links, try with lower threshold if (filteredLinks.length < MIN_REQUIRED_LINKS) { + console.log(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); logger.info(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); filteredLinks = filterAndProcessLinks(mappedLinks, linksAndScores, FALLBACK_SCORE_THRESHOLD); @@ -227,15 +228,8 @@ export async function extractController( // Optionally, you could notify an admin or add to a retry queue here }); - let data: any; - let warning = completions.warning ?? ""; - try { - data = JSON.parse(completions.extract); - } catch (e) { - logger.warn(`ExtractController: Error parsing JSON: ${e}`); - data = completions.extract; - warning = "JSON could not be parsed correctly. Returning raw LLM output..."; - } + let data = completions.extract ?? {}; + let warning = completions.warning; logJob({ job_id: id,