mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-14 09:25:59 +08:00
fix: ua switch
This commit is contained in:
parent
5ba93067d2
commit
6b1bfdaf1a
@ -153,8 +153,8 @@ export class CrawlerHost extends RPCHost {
|
||||
override async init() {
|
||||
await this.dependencyReady();
|
||||
|
||||
if (this.puppeteerControl.ua) {
|
||||
this.curlControl.impersonateChrome(this.puppeteerControl.ua.replace(/Headless/i, ''));
|
||||
if (this.puppeteerControl.effectiveUA) {
|
||||
this.curlControl.impersonateChrome(this.puppeteerControl.effectiveUA);
|
||||
}
|
||||
|
||||
this.emit('ready');
|
||||
|
@ -510,6 +510,7 @@ export class PuppeteerControl extends AsyncService {
|
||||
pagePhase = new WeakMap<Page, 'idle' | 'active' | 'background'>();
|
||||
lastPageCratedAt: number = 0;
|
||||
ua: string = '';
|
||||
effectiveUA: string = '';
|
||||
|
||||
concurrentRequestsPerPage: number = 32;
|
||||
pageReqCtrl = new WeakMap<Page, PageReqCtrlKit>();
|
||||
@ -582,7 +583,8 @@ export class PuppeteerControl extends AsyncService {
|
||||
});
|
||||
this.ua = await this.browser.userAgent();
|
||||
this.logger.info(`Browser launched: ${this.browser.process()?.pid}, ${this.ua}`);
|
||||
this.curlControl.impersonateChrome(this.ua.replace(/Headless/i, ''));
|
||||
this.effectiveUA = this.ua.replace(/Headless/i, '').replace('Mozilla/5.0 (X11; Linux x86_64)', 'Mozilla/5.0 (Linux; Android 10; K)');
|
||||
this.curlControl.impersonateChrome(this.effectiveUA);
|
||||
|
||||
await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r));
|
||||
|
||||
@ -615,7 +617,7 @@ export class PuppeteerControl extends AsyncService {
|
||||
}
|
||||
const preparations = [];
|
||||
|
||||
preparations.push(page.setUserAgent(this.ua.replace(/Headless/i, '')));
|
||||
preparations.push(page.setUserAgent(this.effectiveUA));
|
||||
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
|
||||
// preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
||||
preparations.push(page.setBypassCSP(true));
|
||||
|
Loading…
x
Reference in New Issue
Block a user