From d71c89a79c268e2c71955c8ebec314bc1f451865 Mon Sep 17 00:00:00 2001 From: "yanlong.wang" Date: Mon, 10 Mar 2025 16:46:26 +0800 Subject: [PATCH] fix: block suspicious requests before sideload --- src/api/crawler.ts | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/api/crawler.ts b/src/api/crawler.ts index d3b9a79..a46222e 100644 --- a/src/api/crawler.ts +++ b/src/api/crawler.ts @@ -305,11 +305,10 @@ export class CrawlerHost extends RPCHost { } } + const crawlOpts = await this.configure(crawlerOptions); if (crawlerOptions.robotsTxt) { await this.robotsTxtService.assertAccessAllowed(targetUrl, crawlerOptions.robotsTxt); } - - const crawlOpts = await this.configure(crawlerOptions); if (!ctx.accepts('text/plain') && ctx.accepts('text/event-stream')) { const sseStream = new OutputServerEventStream(); rpcReflect.return(sseStream); @@ -508,7 +507,27 @@ export class CrawlerHost extends RPCHost { }); } - if (!isIP(result.hostname)) { + + if (this.puppeteerControl.circuitBreakerHosts.has(result.hostname.toLowerCase())) { + throw new SecurityCompromiseError({ + message: `Circular hostname: ${result.protocol}`, + path: 'url' + }); + } + + const isIp = isIP(result.hostname); + + if ( + (result.hostname === 'localhost') || + (isIp && result.hostname.startsWith('127.')) + ) { + throw new SecurityCompromiseError({ + message: `Suspicious action: Request to localhost: ${result}`, + path: 'url' + }); + } + + if (!isIp) { await lookup(result.hostname).catch((err) => { if (err.code === 'ENOTFOUND') { return Promise.reject(new ParamValidationError({