mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 02:35:55 +08:00
fix: treat existing screenshot as a scraper success condition
This commit is contained in:
parent
0c48c8a436
commit
1368f9a87f
@ -279,6 +279,16 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function legacyDocumentConverter(doc: any): Document {
|
export function legacyDocumentConverter(doc: any): Document {
|
||||||
|
if (doc.metadata.screenshot) {
|
||||||
|
doc.screenshot = doc.metadata.screenshot;
|
||||||
|
delete doc.metadata.screenshot;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (doc.metadata.fullPageScreenshot) {
|
||||||
|
doc.fullPageScreenshot = doc.metadata.fullPageScreenshot;
|
||||||
|
delete doc.metadata.fullPageScreenshot;
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
markdown: doc.markdown,
|
markdown: doc.markdown,
|
||||||
links: doc.linksOnPage,
|
links: doc.linksOnPage,
|
||||||
|
@ -340,8 +340,8 @@ export async function scrapSingleUrl(
|
|||||||
pageError = undefined;
|
pageError = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (text && text.trim().length >= 100) {
|
if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
|
||||||
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`);
|
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 500)) {
|
if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 500)) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user