mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-14 17:15:56 +08:00
chore: dont abuse our service
This commit is contained in:
parent
1e3bae6aad
commit
5789ae1407
@ -571,10 +571,15 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
if (blockade) {
|
if (blockade) {
|
||||||
throw new SecurityCompromiseError(`Domain ${urlToCrawl.hostname} blocked until ${blockade.expireAt || 'Eternally'} due to previous abuse found on ${blockade.triggerUrl || 'site'}: ${blockade.triggerReason}`);
|
throw new SecurityCompromiseError(`Domain ${urlToCrawl.hostname} blocked until ${blockade.expireAt || 'Eternally'} due to previous abuse found on ${blockade.triggerUrl || 'site'}: ${blockade.triggerReason}`);
|
||||||
}
|
}
|
||||||
}
|
if (urlToCrawl.protocol === 'http:' && (!urlToCrawl.pathname || urlToCrawl.pathname === '/') &&
|
||||||
|
crawlerOptions.respondWith === 'html') {
|
||||||
|
throw new SecurityCompromiseError(`Your request is categorized as abuse. Please don't abuse our service. If you are sure you are not abusing, please authenticate yourself with an API key.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
const crawlOpts = this.configure(crawlerOptions);
|
const crawlOpts = this.configure(crawlerOptions);
|
||||||
|
|
||||||
|
|
||||||
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
||||||
const sseStream = new OutputServerEventStream();
|
const sseStream = new OutputServerEventStream();
|
||||||
rpcReflect.return(sseStream);
|
rpcReflect.return(sseStream);
|
||||||
@ -767,7 +772,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
async *cachedScrap(urlToCrawl: URL, crawlOpts?: ExtraScrappingOptions, cacheTolerance: number = this.cacheValidMs) {
|
async * cachedScrap(urlToCrawl: URL, crawlOpts?: ExtraScrappingOptions, cacheTolerance: number = this.cacheValidMs) {
|
||||||
let cache;
|
let cache;
|
||||||
if (cacheTolerance && !crawlOpts?.cookies?.length) {
|
if (cacheTolerance && !crawlOpts?.cookies?.length) {
|
||||||
cache = await this.queryCache(urlToCrawl, cacheTolerance);
|
cache = await this.queryCache(urlToCrawl, cacheTolerance);
|
||||||
@ -821,7 +826,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async *scrapMany(urls: URL[], options?: ScrappingOptions, cacheTolerance?: number) {
|
async * scrapMany(urls: URL[], options?: ScrappingOptions, cacheTolerance?: number) {
|
||||||
const iterators = urls.map((url) => this.cachedScrap(url, options, cacheTolerance));
|
const iterators = urls.map((url) => this.cachedScrap(url, options, cacheTolerance));
|
||||||
|
|
||||||
const results: (PageSnapshot | undefined)[] = iterators.map((_x) => undefined);
|
const results: (PageSnapshot | undefined)[] = iterators.map((_x) => undefined);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user