mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-15 00:26:06 +08:00
fix: block suspicious requests before sideload
This commit is contained in:
parent
3b3a0265df
commit
d71c89a79c
@ -305,11 +305,10 @@ export class CrawlerHost extends RPCHost {
|
||||
}
|
||||
}
|
||||
|
||||
const crawlOpts = await this.configure(crawlerOptions);
|
||||
if (crawlerOptions.robotsTxt) {
|
||||
await this.robotsTxtService.assertAccessAllowed(targetUrl, crawlerOptions.robotsTxt);
|
||||
}
|
||||
|
||||
const crawlOpts = await this.configure(crawlerOptions);
|
||||
if (!ctx.accepts('text/plain') && ctx.accepts('text/event-stream')) {
|
||||
const sseStream = new OutputServerEventStream();
|
||||
rpcReflect.return(sseStream);
|
||||
@ -508,7 +507,27 @@ export class CrawlerHost extends RPCHost {
|
||||
});
|
||||
}
|
||||
|
||||
if (!isIP(result.hostname)) {
|
||||
|
||||
if (this.puppeteerControl.circuitBreakerHosts.has(result.hostname.toLowerCase())) {
|
||||
throw new SecurityCompromiseError({
|
||||
message: `Circular hostname: ${result.protocol}`,
|
||||
path: 'url'
|
||||
});
|
||||
}
|
||||
|
||||
const isIp = isIP(result.hostname);
|
||||
|
||||
if (
|
||||
(result.hostname === 'localhost') ||
|
||||
(isIp && result.hostname.startsWith('127.'))
|
||||
) {
|
||||
throw new SecurityCompromiseError({
|
||||
message: `Suspicious action: Request to localhost: ${result}`,
|
||||
path: 'url'
|
||||
});
|
||||
}
|
||||
|
||||
if (!isIp) {
|
||||
await lookup(result.hostname).catch((err) => {
|
||||
if (err.code === 'ENOTFOUND') {
|
||||
return Promise.reject(new ParamValidationError({
|
||||
|
Loading…
x
Reference in New Issue
Block a user