diff --git a/src/api/crawler.ts b/src/api/crawler.ts index 6e4fa5b..6e63068 100644 --- a/src/api/crawler.ts +++ b/src/api/crawler.ts @@ -153,8 +153,8 @@ export class CrawlerHost extends RPCHost { override async init() { await this.dependencyReady(); - if (this.puppeteerControl.ua) { - this.curlControl.impersonateChrome(this.puppeteerControl.ua.replace(/Headless/i, '')); + if (this.puppeteerControl.effectiveUA) { + this.curlControl.impersonateChrome(this.puppeteerControl.effectiveUA); } this.emit('ready'); diff --git a/src/services/puppeteer.ts b/src/services/puppeteer.ts index 72f588d..b7388df 100644 --- a/src/services/puppeteer.ts +++ b/src/services/puppeteer.ts @@ -510,6 +510,7 @@ export class PuppeteerControl extends AsyncService { pagePhase = new WeakMap(); lastPageCratedAt: number = 0; ua: string = ''; + effectiveUA: string = ''; concurrentRequestsPerPage: number = 32; pageReqCtrl = new WeakMap(); @@ -582,7 +583,8 @@ export class PuppeteerControl extends AsyncService { }); this.ua = await this.browser.userAgent(); this.logger.info(`Browser launched: ${this.browser.process()?.pid}, ${this.ua}`); - this.curlControl.impersonateChrome(this.ua.replace(/Headless/i, '')); + this.effectiveUA = this.ua.replace(/Headless/i, '').replace('Mozilla/5.0 (X11; Linux x86_64)', 'Mozilla/5.0 (Linux; Android 10; K)'); + this.curlControl.impersonateChrome(this.effectiveUA); await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r)); @@ -615,7 +617,7 @@ export class PuppeteerControl extends AsyncService { } const preparations = []; - preparations.push(page.setUserAgent(this.ua.replace(/Headless/i, ''))); + preparations.push(page.setUserAgent(this.effectiveUA)); // preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`)); // preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`)); preparations.push(page.setBypassCSP(true));