From 943bbae88d672d52895059bd85bebf270152970c Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:29:37 -0300 Subject: [PATCH] fixed nested data inside extract --- apps/api/src/controllers/v1/extract.ts | 4 +++- .../api/src/scraper/scrapeURL/transformers/llmExtract.ts | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index e5f2595c..f59b1ff1 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -211,7 +211,9 @@ export async function extractController( prompt: req.body.prompt, schema: req.body.schema, }, - docs.map(x => buildDocument(x)).join('\n') + docs.map(x => buildDocument(x)).join('\n'), + undefined, + true // isExtractEndpoint ); // TODO: change this later diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 3866683a..64073eee 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -58,7 +58,7 @@ function normalizeSchema(x: any): any { } } -export async function generateOpenAICompletions(logger: Logger, options: ExtractOptions, markdown?: string, previousWarning?: string): Promise<{ extract: any, numTokens: number, warning: string | undefined }> { +export async function generateOpenAICompletions(logger: Logger, options: ExtractOptions, markdown?: string, previousWarning?: string, isExtractEndpoint?: boolean): Promise<{ extract: any, numTokens: number, warning: string | undefined }> { let extract: any; let warning: string | undefined; @@ -158,7 +158,12 @@ export async function generateOpenAICompletions(logger: Logger, options: Extract if (extract === null && jsonCompletion.choices[0].message.content !== null) { try { - extract = JSON.parse(jsonCompletion.choices[0].message.content); + if (!isExtractEndpoint) { + extract = JSON.parse(jsonCompletion.choices[0].message.content); + } else { + const extractData = JSON.parse(jsonCompletion.choices[0].message.content); + extract = extractData.data.extract; + } } catch (e) { logger.error("Failed to parse returned JSON, no schema specified.", { error: e }); throw new LLMRefusalError("Failed to parse returned JSON. Please specify a schema in the extract object.");