mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-14 17:36:07 +08:00
fix: reduce frequency of screenshot if possible
This commit is contained in:
parent
ae99af50aa
commit
94a72052f4
@ -309,6 +309,7 @@ ${this.content}
|
|||||||
const crawlOpts: ScrappingOptions = {
|
const crawlOpts: ScrappingOptions = {
|
||||||
proxyUrl: ctx.req.get('x-proxy-url'),
|
proxyUrl: ctx.req.get('x-proxy-url'),
|
||||||
cookies,
|
cookies,
|
||||||
|
favorScreenshot: customMode === 'screenshot'
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
||||||
@ -418,6 +419,7 @@ ${this.content}
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
isFresh: !stale,
|
isFresh: !stale,
|
||||||
|
...cache,
|
||||||
snapshot: {
|
snapshot: {
|
||||||
...r,
|
...r,
|
||||||
screenshot: undefined,
|
screenshot: undefined,
|
||||||
@ -471,7 +473,7 @@ ${this.content}
|
|||||||
cache = await this.queryCache(urlToCrawl);
|
cache = await this.queryCache(urlToCrawl);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cache?.isFresh) {
|
if (cache?.isFresh && (!crawlOpts.favorScreenshot || (crawlOpts.favorScreenshot && cache?.screenshotAvailable))) {
|
||||||
yield cache.snapshot;
|
yield cache.snapshot;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
@ -50,6 +50,7 @@ export interface PageSnapshot {
|
|||||||
export interface ScrappingOptions {
|
export interface ScrappingOptions {
|
||||||
proxyUrl?: string;
|
proxyUrl?: string;
|
||||||
cookies?: CookieParam[];
|
cookies?: CookieParam[];
|
||||||
|
favorScreenshot?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -289,7 +290,7 @@ document.addEventListener('load', handlePageLoad);
|
|||||||
yield { ...snapshot, screenshot } as PageSnapshot;
|
yield { ...snapshot, screenshot } as PageSnapshot;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (snapshot?.title && snapshot?.html !== lastHTML) {
|
if (options.favorScreenshot && snapshot?.title && snapshot?.html !== lastHTML) {
|
||||||
screenshot = await page.screenshot();
|
screenshot = await page.screenshot();
|
||||||
lastHTML = snapshot.html;
|
lastHTML = snapshot.html;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user