From de50c93825e3cc5c3b8ef532a4e4785c5d5ecfc7 Mon Sep 17 00:00:00 2001 From: Zhaofeng Miao <522856232@qq.com> Date: Tue, 20 Aug 2024 16:14:48 +0800 Subject: [PATCH] feat: expose X-Locale parameter --- .../functions/src/cloud-functions/crawler.ts | 1 + .../functions/src/dto/scrapping-options.ts | 13 +++++++++++ backend/functions/src/services/puppeteer.ts | 22 +++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 9cc12fc..7a81be4 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -1106,6 +1106,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; overrideUserAgent: opts.userAgent, timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined, withIframe: opts.withIframe, + locale: opts.locale, }; return crawlOpts; diff --git a/backend/functions/src/dto/scrapping-options.ts b/backend/functions/src/dto/scrapping-options.ts index c4f6ad6..e4615a0 100644 --- a/backend/functions/src/dto/scrapping-options.ts +++ b/backend/functions/src/dto/scrapping-options.ts @@ -111,6 +111,11 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser'; in: 'header', schema: { type: 'string' } }, + 'X-Locale': { + description: 'Specify browser locale for the page.', + in: 'header', + schema: { type: 'string' } + } } } } @@ -188,6 +193,9 @@ export class CrawlerOptions extends AutoCastable { }) timeout?: number | null; + @Prop() + locale?: string; + static override from(input: any) { const instance = super.from(input) as CrawlerOptions; const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as { @@ -200,6 +208,11 @@ export class CrawlerOptions extends AutoCastable { instance.respondWith = customMode; } + const locale = ctx?.req.get('x-locale'); + if (locale !== undefined) { + instance.locale = locale; + } + const withGeneratedAlt = ctx?.req.get('x-with-generated-alt'); if (withGeneratedAlt !== undefined) { instance.withGeneratedAlt = Boolean(withGeneratedAlt); diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index c194e35..3ab0dec 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -68,6 +68,7 @@ export interface ScrappingOptions { minIntervalMs?: number; overrideUserAgent?: string; timeoutMs?: number; + locale?: string; } @@ -472,6 +473,27 @@ document.addEventListener('load', handlePageLoad); const page = await this.getNextPage(); const sn = this.snMap.get(page); this.logger.info(`Page ${sn}: Scraping ${url}`, { url }); + + this.logger.info(`Locale setting: ${options?.locale}`); + if (options?.locale) { + await page.setExtraHTTPHeaders({ + 'Accept-Language': options?.locale + }); + + await page.evaluateOnNewDocument(() => { + Object.defineProperty(navigator, "language", { + get: function() { + return options?.locale; + } + }); + Object.defineProperty(navigator, "languages", { + get: function() { + return [options?.locale]; + } + }); + }) + } + if (options?.proxyUrl) { await page.useProxy(options.proxyUrl); }