diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index f072ca0b..28dbb48f 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -53,7 +53,7 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu export const extractOptions = z.object({ mode: z.enum(["llm"]).default("llm"), schema: z.any().optional(), - systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required."), + systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required."), prompt: z.string().optional() }).strict(strictMessage); diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 22e2649b..69a92197 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -144,6 +144,16 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt } document.extract = jsonCompletion.choices[0].message.parsed; + + if (document.extract === null && jsonCompletion.choices[0].message.content !== null) { + try { + document.extract = JSON.parse(jsonCompletion.choices[0].message.content); + } catch (e) { + logger.error("Failed to parse returned JSON, no schema specified.", { error: e }); + throw new LLMRefusalError("Failed to parse returned JSON. Please specify a schema in the extract object."); + } + } + if (options.schema && options.schema.type === "array") { document.extract = document.extract?.items; }