fix: treat existing screenshot as a scraper success condition

This commit is contained in:
Gergő Móricz 2024-08-20 22:24:18 +02:00
parent 0c48c8a436
commit 1368f9a87f
2 changed files with 12 additions and 2 deletions

View File

@ -279,6 +279,16 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
}
export function legacyDocumentConverter(doc: any): Document {
if (doc.metadata.screenshot) {
doc.screenshot = doc.metadata.screenshot;
delete doc.metadata.screenshot;
}
if (doc.metadata.fullPageScreenshot) {
doc.fullPageScreenshot = doc.metadata.fullPageScreenshot;
delete doc.metadata.fullPageScreenshot;
}
return {
markdown: doc.markdown,
links: doc.linksOnPage,

View File

@ -340,8 +340,8 @@ export async function scrapSingleUrl(
pageError = undefined;
}
if (text && text.trim().length >= 100) {
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`);
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
break;
}
if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 500)) {