mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-14 19:26:01 +08:00
fix
This commit is contained in:
parent
8951578ef1
commit
0a09aeb967
@ -277,9 +277,14 @@ function giveSnapshot(stopActiveSnapshot) {
|
||||
|
||||
const domainSet = new Set<string>();
|
||||
let reqCounter = 0;
|
||||
const t0 = Date.now();
|
||||
let halt = false;
|
||||
|
||||
page.on('request', (req) => {
|
||||
reqCounter++;
|
||||
if (halt) {
|
||||
return req.abort('blockedbyclient', 1000);
|
||||
}
|
||||
const requestUrl = req.url();
|
||||
if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
|
||||
return req.abort('blockedbyclient', 1000);
|
||||
@ -291,7 +296,6 @@ function giveSnapshot(stopActiveSnapshot) {
|
||||
|
||||
if (this.circuitBreakerHosts.has(parsedUrl.hostname.toLowerCase())) {
|
||||
page.emit('abuse', { url: requestUrl, page, sn, reason: `Abusive request: ${requestUrl}` });
|
||||
|
||||
return req.abort('blockedbyclient', 1000);
|
||||
}
|
||||
|
||||
@ -304,14 +308,22 @@ function giveSnapshot(stopActiveSnapshot) {
|
||||
return req.abort('blockedbyclient', 1000);
|
||||
}
|
||||
|
||||
if (reqCounter > 2000) {
|
||||
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
|
||||
const dt = Math.ceil((Date.now() - t0) / 1000);
|
||||
const rps = reqCounter / dt;
|
||||
// console.log(`rps: ${rps}`);
|
||||
|
||||
return req.abort('blockedbyclient', 1000);
|
||||
if (reqCounter > 1000) {
|
||||
if (rps > 60 || reqCounter > 2000) {
|
||||
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests` });
|
||||
halt = true;
|
||||
|
||||
return req.abort('blockedbyclient', 1000);
|
||||
}
|
||||
}
|
||||
|
||||
if (domainSet.size > 200) {
|
||||
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
|
||||
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains` });
|
||||
halt = true;
|
||||
|
||||
return req.abort('blockedbyclient', 1000);
|
||||
}
|
||||
@ -329,7 +341,7 @@ const handlePageLoad = () => {
|
||||
if (window.haltSnapshot) {
|
||||
return;
|
||||
}
|
||||
if (document.readyState !== 'complete') {
|
||||
if (document.readyState === 'loading') {
|
||||
return;
|
||||
}
|
||||
const thisTextLength = (document.body.innerText || '').length;
|
||||
@ -503,7 +515,8 @@ document.addEventListener('load', handlePageLoad);
|
||||
if (options?.minIntervalMs) {
|
||||
ckpt.push(delay(options.minIntervalMs));
|
||||
}
|
||||
await Promise.race(ckpt);
|
||||
let error;
|
||||
await Promise.race(ckpt).catch((err)=> error = err);
|
||||
if (finalized) {
|
||||
yield { ...snapshot, screenshot } as PageSnapshot;
|
||||
break;
|
||||
@ -515,6 +528,9 @@ document.addEventListener('load', handlePageLoad);
|
||||
if (snapshot || screenshot) {
|
||||
yield { ...snapshot, screenshot } as PageSnapshot;
|
||||
}
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
gotoPromise.finally(() => {
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 7a221b7a8d5fd0244f33dd5dc9d62ce551c5bded
|
||||
Subproject commit 24a942452fa12e622a82f05f817c6102c5e84891
|
Loading…
x
Reference in New Issue
Block a user