fix: tweak default timing to be more conservative

This commit is contained in:
Yanlong Wang 2025-03-12 18:32:08 +08:00
parent 8121d62324
commit 6027963670
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37

View File

@ -225,7 +225,8 @@ class Viewport extends AutoCastable {
`- html: unrendered HTML is enough to return\n` + `- html: unrendered HTML is enough to return\n` +
`- mutation-idle: wait for DOM mutations to settle and remain unchanged for at least 0.2s\n` + `- mutation-idle: wait for DOM mutations to settle and remain unchanged for at least 0.2s\n` +
`- resource-idle: wait for no additional resources that would affect page logic and content SUCCEEDED loading for at least 0.5s\n` + `- resource-idle: wait for no additional resources that would affect page logic and content SUCCEEDED loading for at least 0.5s\n` +
`- media-idle: wait for no additional resources, including media resources, SUCCEEDED loading for at least 0.5s\n\n`, `- media-idle: wait for no additional resources, including media resources, SUCCEEDED loading for at least 0.5s\n` +
`- network-idle: wait for full load of webpage, as usual.\n\n`,
in: 'header', in: 'header',
schema: { type: 'string' } schema: { type: 'string' }
}, },
@ -583,11 +584,13 @@ export class CrawlerOptions extends AutoCastable {
if (respondTiming) { if (respondTiming) {
instance.respondTiming ??= respondTiming as RESPOND_TIMING; instance.respondTiming ??= respondTiming as RESPOND_TIMING;
} }
instance.respondTiming ??= ( if (instance.timeout) {
instance.timeout || instance.respondTiming ??= RESPOND_TIMING.NETWORK_IDLE;
instance.respondWith.includes('shot') || }
instance.respondWith.includes('vlm') if (instance.respondWith.includes('shot') || instance.respondWith.includes('vlm')) {
) ? RESPOND_TIMING.MEDIA_IDLE : RESPOND_TIMING.MUTATION_IDLE; instance.respondTiming ??= RESPOND_TIMING.MEDIA_IDLE;
}
instance.respondTiming ??= RESPOND_TIMING.RESOURCE_IDLE;
if (instance.cacheTolerance) { if (instance.cacheTolerance) {
instance.cacheTolerance = instance.cacheTolerance * 1000; instance.cacheTolerance = instance.cacheTolerance * 1000;
@ -607,7 +610,7 @@ export class CrawlerOptions extends AutoCastable {
if (this.respondTiming === RESPOND_TIMING.HTML && snapshot.html) { if (this.respondTiming === RESPOND_TIMING.HTML && snapshot.html) {
return true; return true;
} }
if (this.respondTiming === RESPOND_TIMING.MEDIA_IDLE && snapshot.lastMediaResourceLoaded) { if (this.respondTiming === RESPOND_TIMING.MEDIA_IDLE && snapshot.lastMediaResourceLoaded && snapshot.lastMutationIdle) {
const now = Date.now(); const now = Date.now();
if ((Math.max(snapshot.lastMediaResourceLoaded, snapshot.lastContentResourceLoaded || 0) + 500) < now) { if ((Math.max(snapshot.lastMediaResourceLoaded, snapshot.lastContentResourceLoaded || 0) + 500) < now) {
return true; return true;
@ -619,7 +622,7 @@ export class CrawlerOptions extends AutoCastable {
if ((this.respondWith.includes('vlm') || this.respondWith.includes('screenshot')) && !snapshot.screenshot) { if ((this.respondWith.includes('vlm') || this.respondWith.includes('screenshot')) && !snapshot.screenshot) {
return false; return false;
} }
if (this.respondTiming === RESPOND_TIMING.RESOURCE_IDLE && snapshot.lastContentResourceLoaded) { if (this.respondTiming === RESPOND_TIMING.RESOURCE_IDLE && snapshot.lastContentResourceLoaded && snapshot.lastMutationIdle) {
const now = Date.now(); const now = Date.now();
if ((snapshot.lastContentResourceLoaded + 500) < now) { if ((snapshot.lastContentResourceLoaded + 500) < now) {
return true; return true;