fix: abuse blocker

This commit is contained in:
Yanlong Wang 2024-06-01 02:01:12 +08:00
parent 249408df6b
commit d2bebec60f
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
4 changed files with 10 additions and 6 deletions

View File

@ -38,6 +38,7 @@
"set-cookie-parser": "^2.6.0", "set-cookie-parser": "^2.6.0",
"stripe": "^11.11.0", "stripe": "^11.11.0",
"tiktoken": "^1.0.10", "tiktoken": "^1.0.10",
"tld-extract": "^2.1.0",
"turndown": "^7.1.3", "turndown": "^7.1.3",
"turndown-plugin-gfm": "^1.0.2", "turndown-plugin-gfm": "^1.0.2",
"undici": "^5.24.0" "undici": "^5.24.0"
@ -11306,8 +11307,7 @@
"node_modules/tld-extract": { "node_modules/tld-extract": {
"version": "2.1.0", "version": "2.1.0",
"resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz", "resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
"integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw==", "integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw=="
"optional": true
}, },
"node_modules/tmpl": { "node_modules/tmpl": {
"version": "1.0.5", "version": "1.0.5",

View File

@ -58,6 +58,7 @@
"set-cookie-parser": "^2.6.0", "set-cookie-parser": "^2.6.0",
"stripe": "^11.11.0", "stripe": "^11.11.0",
"tiktoken": "^1.0.10", "tiktoken": "^1.0.10",
"tld-extract": "^2.1.0",
"turndown": "^7.1.3", "turndown": "^7.1.3",
"turndown-plugin-gfm": "^1.0.2", "turndown-plugin-gfm": "^1.0.2",
"undici": "^5.24.0" "undici": "^5.24.0"

View File

@ -65,7 +65,7 @@ export class CrawlerHost extends RPCHost {
cacheRetentionMs = 1000 * 3600 * 24 * 7; cacheRetentionMs = 1000 * 3600 * 24 * 7;
cacheValidMs = 1000 * 3600; cacheValidMs = 1000 * 3600;
urlValidMs = 1000 * 3600 * 4; urlValidMs = 1000 * 3600 * 4;
abuseBlockMs = 1000 * 3600 * 24; abuseBlockMs = 1000 * 3600;
constructor( constructor(
protected globalLogger: Logger, protected globalLogger: Logger,

View File

@ -12,6 +12,7 @@ import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy'; import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors'; import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
import { Readability } from '@mozilla/readability'; import { Readability } from '@mozilla/readability';
const tldExtract = require('tld-extract');
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8'); const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
@ -279,8 +280,10 @@ function giveSnapshot(stopActiveSnapshot) {
if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') { if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
return req.abort('blockedbyclient', 1000); return req.abort('blockedbyclient', 1000);
} }
const tldParsed = tldExtract(requestUrl);
domainSet.add(tldParsed.domain);
const parsedUrl = new URL(requestUrl); const parsedUrl = new URL(requestUrl);
domainSet.add(parsedUrl.hostname);
if ( if (
parsedUrl.hostname === 'localhost' || parsedUrl.hostname === 'localhost' ||
@ -291,13 +294,13 @@ function giveSnapshot(stopActiveSnapshot) {
return req.abort('blockedbyclient', 1000); return req.abort('blockedbyclient', 1000);
} }
if (reqCounter > 200) { if (reqCounter > 2000) {
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` }); page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
return req.abort('blockedbyclient', 1000); return req.abort('blockedbyclient', 1000);
} }
if (domainSet.size > 51) { if (domainSet.size > 200) {
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` }); page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
return req.abort('blockedbyclient', 1000); return req.abort('blockedbyclient', 1000);