mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 10:25:57 +08:00
fix: abuse blocker
This commit is contained in:
parent
249408df6b
commit
d2bebec60f
4
backend/functions/package-lock.json
generated
4
backend/functions/package-lock.json
generated
@ -38,6 +38,7 @@
|
|||||||
"set-cookie-parser": "^2.6.0",
|
"set-cookie-parser": "^2.6.0",
|
||||||
"stripe": "^11.11.0",
|
"stripe": "^11.11.0",
|
||||||
"tiktoken": "^1.0.10",
|
"tiktoken": "^1.0.10",
|
||||||
|
"tld-extract": "^2.1.0",
|
||||||
"turndown": "^7.1.3",
|
"turndown": "^7.1.3",
|
||||||
"turndown-plugin-gfm": "^1.0.2",
|
"turndown-plugin-gfm": "^1.0.2",
|
||||||
"undici": "^5.24.0"
|
"undici": "^5.24.0"
|
||||||
@ -11306,8 +11307,7 @@
|
|||||||
"node_modules/tld-extract": {
|
"node_modules/tld-extract": {
|
||||||
"version": "2.1.0",
|
"version": "2.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
|
||||||
"integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw==",
|
"integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw=="
|
||||||
"optional": true
|
|
||||||
},
|
},
|
||||||
"node_modules/tmpl": {
|
"node_modules/tmpl": {
|
||||||
"version": "1.0.5",
|
"version": "1.0.5",
|
||||||
|
@ -58,6 +58,7 @@
|
|||||||
"set-cookie-parser": "^2.6.0",
|
"set-cookie-parser": "^2.6.0",
|
||||||
"stripe": "^11.11.0",
|
"stripe": "^11.11.0",
|
||||||
"tiktoken": "^1.0.10",
|
"tiktoken": "^1.0.10",
|
||||||
|
"tld-extract": "^2.1.0",
|
||||||
"turndown": "^7.1.3",
|
"turndown": "^7.1.3",
|
||||||
"turndown-plugin-gfm": "^1.0.2",
|
"turndown-plugin-gfm": "^1.0.2",
|
||||||
"undici": "^5.24.0"
|
"undici": "^5.24.0"
|
||||||
|
@ -65,7 +65,7 @@ export class CrawlerHost extends RPCHost {
|
|||||||
cacheRetentionMs = 1000 * 3600 * 24 * 7;
|
cacheRetentionMs = 1000 * 3600 * 24 * 7;
|
||||||
cacheValidMs = 1000 * 3600;
|
cacheValidMs = 1000 * 3600;
|
||||||
urlValidMs = 1000 * 3600 * 4;
|
urlValidMs = 1000 * 3600 * 4;
|
||||||
abuseBlockMs = 1000 * 3600 * 24;
|
abuseBlockMs = 1000 * 3600;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: Logger,
|
protected globalLogger: Logger,
|
||||||
|
@ -12,6 +12,7 @@ import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
|||||||
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
|
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
|
||||||
import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
|
import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
|
||||||
import { Readability } from '@mozilla/readability';
|
import { Readability } from '@mozilla/readability';
|
||||||
|
const tldExtract = require('tld-extract');
|
||||||
|
|
||||||
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
||||||
|
|
||||||
@ -279,8 +280,10 @@ function giveSnapshot(stopActiveSnapshot) {
|
|||||||
if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
|
if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
|
||||||
return req.abort('blockedbyclient', 1000);
|
return req.abort('blockedbyclient', 1000);
|
||||||
}
|
}
|
||||||
|
const tldParsed = tldExtract(requestUrl);
|
||||||
|
domainSet.add(tldParsed.domain);
|
||||||
|
|
||||||
const parsedUrl = new URL(requestUrl);
|
const parsedUrl = new URL(requestUrl);
|
||||||
domainSet.add(parsedUrl.hostname);
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
parsedUrl.hostname === 'localhost' ||
|
parsedUrl.hostname === 'localhost' ||
|
||||||
@ -291,13 +294,13 @@ function giveSnapshot(stopActiveSnapshot) {
|
|||||||
return req.abort('blockedbyclient', 1000);
|
return req.abort('blockedbyclient', 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reqCounter > 200) {
|
if (reqCounter > 2000) {
|
||||||
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
|
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
|
||||||
|
|
||||||
return req.abort('blockedbyclient', 1000);
|
return req.abort('blockedbyclient', 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (domainSet.size > 51) {
|
if (domainSet.size > 200) {
|
||||||
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
|
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
|
||||||
|
|
||||||
return req.abort('blockedbyclient', 1000);
|
return req.abort('blockedbyclient', 1000);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user