mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-07-30 08:01:58 +08:00
saas: tweaks
This commit is contained in:
parent
efa6d9ed75
commit
b7d32b9cbc
@ -891,9 +891,6 @@ export class CrawlerHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
} else if (crawlOpts?.allocProxy && crawlOpts.allocProxy !== 'none' && !crawlOpts.proxyUrl) {
|
} else if (crawlOpts?.allocProxy && crawlOpts.allocProxy !== 'none' && !crawlOpts.proxyUrl) {
|
||||||
const proxyUrl = await this.proxyProvider.alloc(this.figureOutBestProxyCountry(crawlOpts));
|
const proxyUrl = await this.proxyProvider.alloc(this.figureOutBestProxyCountry(crawlOpts));
|
||||||
if (proxyUrl.protocol === 'socks5h:') {
|
|
||||||
proxyUrl.protocol = 'socks5:';
|
|
||||||
}
|
|
||||||
crawlOpts.proxyUrl = proxyUrl.href;
|
crawlOpts.proxyUrl = proxyUrl.href;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1242,7 +1239,6 @@ export class CrawlerHost extends RPCHost {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
retryDet = new WeakSet<ExtraScrappingOptions>();
|
|
||||||
@retryWith((err) => {
|
@retryWith((err) => {
|
||||||
if (err instanceof ServiceBadApproachError) {
|
if (err instanceof ServiceBadApproachError) {
|
||||||
return false;
|
return false;
|
||||||
@ -1263,12 +1259,7 @@ export class CrawlerHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const proxy = await this.proxyProvider.alloc(this.figureOutBestProxyCountry(opts));
|
const proxy = await this.proxyProvider.alloc(this.figureOutBestProxyCountry(opts));
|
||||||
if (opts) {
|
this.logger.debug(`Proxy allocated`, { proxy: proxy.href });
|
||||||
if (this.retryDet.has(opts) && proxy.protocol === 'socks5h:') {
|
|
||||||
proxy.protocol = 'socks5:';
|
|
||||||
}
|
|
||||||
this.retryDet.add(opts);
|
|
||||||
}
|
|
||||||
const r = await this.curlControl.sideLoad(url, {
|
const r = await this.curlControl.sideLoad(url, {
|
||||||
...opts,
|
...opts,
|
||||||
proxyUrl: proxy.href,
|
proxyUrl: proxy.href,
|
||||||
|
@ -136,7 +136,7 @@ export class SerpHost extends RPCHost {
|
|||||||
@Param('hl', { validate: (v: string) => WORLD_LANGUAGES.some(l => l.code === v) }) hl?: string,
|
@Param('hl', { validate: (v: string) => WORLD_LANGUAGES.some(l => l.code === v) }) hl?: string,
|
||||||
@Param('location') location?: string,
|
@Param('location') location?: string,
|
||||||
@Param('page') page?: number,
|
@Param('page') page?: number,
|
||||||
@Param('fallback', { default: true }) fallback?: boolean,
|
@Param('fallback') fallback?: boolean,
|
||||||
) {
|
) {
|
||||||
const authToken = auth.bearerToken;
|
const authToken = auth.bearerToken;
|
||||||
let highFreqKey: RateLimitCache | undefined;
|
let highFreqKey: RateLimitCache | undefined;
|
||||||
|
@ -16,7 +16,7 @@ import { ProxyProvider } from '../../shared/services/proxy-provider';
|
|||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
export class GoogleSERP extends AsyncService {
|
export class GoogleSERP extends AsyncService {
|
||||||
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
googleDomain = process.env.OVERRIDE_GOOGLE_DOMAIN || 'www.google.com';
|
googleDomain = process.env.OVERRIDE_GOOGLE_DOMAIN || 'www.google.com';
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
@ -36,7 +36,6 @@ export class GoogleSERP extends AsyncService {
|
|||||||
this.emit('ready');
|
this.emit('ready');
|
||||||
}
|
}
|
||||||
|
|
||||||
retryDet = new WeakSet<ScrappingOptions>();
|
|
||||||
@retryWith((err) => {
|
@retryWith((err) => {
|
||||||
if (err instanceof ServiceBadApproachError) {
|
if (err instanceof ServiceBadApproachError) {
|
||||||
return false;
|
return false;
|
||||||
@ -59,12 +58,7 @@ export class GoogleSERP extends AsyncService {
|
|||||||
const proxy = await this.proxyProvider.alloc(
|
const proxy = await this.proxyProvider.alloc(
|
||||||
process.env.PREFERRED_PROXY_COUNTRY || 'auto'
|
process.env.PREFERRED_PROXY_COUNTRY || 'auto'
|
||||||
);
|
);
|
||||||
if (opts) {
|
this.logger.debug(`Proxy allocated`, { proxy: proxy.href });
|
||||||
if (this.retryDet.has(opts) && proxy.protocol === 'socks5h:') {
|
|
||||||
proxy.protocol = 'socks5:';
|
|
||||||
}
|
|
||||||
this.retryDet.add(opts);
|
|
||||||
}
|
|
||||||
const r = await this.curlControl.sideLoad(url, {
|
const r = await this.curlControl.sideLoad(url, {
|
||||||
...opts,
|
...opts,
|
||||||
proxyUrl: proxy.href,
|
proxyUrl: proxy.href,
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 936ebc4158a3007b59496822ef650e995c189614
|
Subproject commit 424f50ca8b6277d74185e16aa67ff2b366d9f727
|
Loading…
x
Reference in New Issue
Block a user