From e4adbaa88eaa6e56985df8b1e6087ded95c4fc8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 Aug 2024 14:12:52 +0200 Subject: [PATCH] fix(llm-extract): handle llm-extract if scrape failed --- apps/api/src/lib/LLM-extraction/models.ts | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/apps/api/src/lib/LLM-extraction/models.ts b/apps/api/src/lib/LLM-extraction/models.ts index e696a8cd..8ca6bbd4 100644 --- a/apps/api/src/lib/LLM-extraction/models.ts +++ b/apps/api/src/lib/LLM-extraction/models.ts @@ -15,7 +15,7 @@ const defaultPrompt = function prepareOpenAIDoc( document: Document, mode: "markdown" | "raw-html" -): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] { +): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null { let markdown = document.markdown; @@ -27,9 +27,10 @@ function prepareOpenAIDoc( // Check if the markdown content exists in the document if (!extractionTarget) { - throw new Error( - `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai` - ); + return null; + // throw new Error( + // `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai` + // ); } @@ -64,7 +65,16 @@ export async function generateOpenAICompletions({ mode: "markdown" | "raw-html"; }): Promise { const openai = client as OpenAI; - const [content, numTokens] = prepareOpenAIDoc(document, mode); + const preparedDoc = prepareOpenAIDoc(document, mode); + + if (preparedDoc === null) { + return { + ...document, + warning: "LLM extraction was not performed since the document's content is empty or missing.", + }; + } + + const [content, numTokens] = preparedDoc; const completion = await openai.chat.completions.create({ model,