fix: ua switch

This commit is contained in:
Yanlong Wang 2025-04-14 21:48:09 +08:00
parent 5ba93067d2
commit 6b1bfdaf1a
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 6 additions and 4 deletions

View File

@ -153,8 +153,8 @@ export class CrawlerHost extends RPCHost {
override async init() {
await this.dependencyReady();
if (this.puppeteerControl.ua) {
this.curlControl.impersonateChrome(this.puppeteerControl.ua.replace(/Headless/i, ''));
if (this.puppeteerControl.effectiveUA) {
this.curlControl.impersonateChrome(this.puppeteerControl.effectiveUA);
}
this.emit('ready');

View File

@ -510,6 +510,7 @@ export class PuppeteerControl extends AsyncService {
pagePhase = new WeakMap<Page, 'idle' | 'active' | 'background'>();
lastPageCratedAt: number = 0;
ua: string = '';
effectiveUA: string = '';
concurrentRequestsPerPage: number = 32;
pageReqCtrl = new WeakMap<Page, PageReqCtrlKit>();
@ -582,7 +583,8 @@ export class PuppeteerControl extends AsyncService {
});
this.ua = await this.browser.userAgent();
this.logger.info(`Browser launched: ${this.browser.process()?.pid}, ${this.ua}`);
this.curlControl.impersonateChrome(this.ua.replace(/Headless/i, ''));
this.effectiveUA = this.ua.replace(/Headless/i, '').replace('Mozilla/5.0 (X11; Linux x86_64)', 'Mozilla/5.0 (Linux; Android 10; K)');
this.curlControl.impersonateChrome(this.effectiveUA);
await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r));
@ -615,7 +617,7 @@ export class PuppeteerControl extends AsyncService {
}
const preparations = [];
preparations.push(page.setUserAgent(this.ua.replace(/Headless/i, '')));
preparations.push(page.setUserAgent(this.effectiveUA));
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
// preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
preparations.push(page.setBypassCSP(true));