From 751c30f139bea9739d6bef32177495888ec0850e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 16 Apr 2025 16:23:12 -0700 Subject: [PATCH] feat(extractSmartScrape): better pagination handling --- apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts | 2 +- apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts index 0e7ac878..37205343 100644 --- a/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts +++ b/apps/api/src/scraper/scrapeURL/lib/extractSmartScrape.ts @@ -16,7 +16,7 @@ const commonSmartScrapeProperties = { shouldUseSmartscrape: { type: "boolean", description: - "Set to `true` if any of the extractedData is null and you think you can find the information by performing user-like interactions (e.g., clicking buttons/accordions to reveal hidden text, login, inputs etc.). SmartScrape can perform these actions to access the data.", + "Set to `true` if any of the extractedData is null and you think you can find the information by performing user-like interactions (e.g., clicking buttons/accordions to reveal hidden text, login, inputs, pagination etc.). SmartScrape can perform these actions to access the data.", }, // Note: extractedData is added dynamically in prepareSmartScrapeSchema }; diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 21572c9d..73456096 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -386,7 +386,7 @@ export async function generateCompletions({ const repairConfig = { experimental_repairText: async ({ text, error }) => { // AI may output a markdown JSON code block. Remove it - mogery - logger.debug("Repairing text", { textType: typeof text, error }); + logger.debug("Repairing text", { textType: typeof text, textPeek: JSON.stringify(text).slice(0, 100) + "...", error }); if (typeof text === "string" && text.trim().startsWith("```")) { if (text.trim().startsWith("```json")) {