diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 4dcb32fc..ffaf6c19 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -279,6 +279,16 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions { } export function legacyDocumentConverter(doc: any): Document { + if (doc.metadata.screenshot) { + doc.screenshot = doc.metadata.screenshot; + delete doc.metadata.screenshot; + } + + if (doc.metadata.fullPageScreenshot) { + doc.fullPageScreenshot = doc.metadata.fullPageScreenshot; + delete doc.metadata.fullPageScreenshot; + } + return { markdown: doc.markdown, links: doc.linksOnPage, diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index c2bbbc7b..408f9838 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -340,8 +340,8 @@ export async function scrapSingleUrl( pageError = undefined; } - if (text && text.trim().length >= 100) { - Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`); + if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) { + Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`); break; } if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 500)) {