mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-15 08:25:54 +08:00
feat: expose X-Locale parameter
This commit is contained in:
parent
fb5bd58ee4
commit
de50c93825
@ -1106,6 +1106,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
overrideUserAgent: opts.userAgent,
|
overrideUserAgent: opts.userAgent,
|
||||||
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
||||||
withIframe: opts.withIframe,
|
withIframe: opts.withIframe,
|
||||||
|
locale: opts.locale,
|
||||||
};
|
};
|
||||||
|
|
||||||
return crawlOpts;
|
return crawlOpts;
|
||||||
|
@ -111,6 +111,11 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
|||||||
in: 'header',
|
in: 'header',
|
||||||
schema: { type: 'string' }
|
schema: { type: 'string' }
|
||||||
},
|
},
|
||||||
|
'X-Locale': {
|
||||||
|
description: 'Specify browser locale for the page.',
|
||||||
|
in: 'header',
|
||||||
|
schema: { type: 'string' }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -188,6 +193,9 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
})
|
})
|
||||||
timeout?: number | null;
|
timeout?: number | null;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
locale?: string;
|
||||||
|
|
||||||
static override from(input: any) {
|
static override from(input: any) {
|
||||||
const instance = super.from(input) as CrawlerOptions;
|
const instance = super.from(input) as CrawlerOptions;
|
||||||
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
||||||
@ -200,6 +208,11 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
instance.respondWith = customMode;
|
instance.respondWith = customMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const locale = ctx?.req.get('x-locale');
|
||||||
|
if (locale !== undefined) {
|
||||||
|
instance.locale = locale;
|
||||||
|
}
|
||||||
|
|
||||||
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
||||||
if (withGeneratedAlt !== undefined) {
|
if (withGeneratedAlt !== undefined) {
|
||||||
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
||||||
|
@ -68,6 +68,7 @@ export interface ScrappingOptions {
|
|||||||
minIntervalMs?: number;
|
minIntervalMs?: number;
|
||||||
overrideUserAgent?: string;
|
overrideUserAgent?: string;
|
||||||
timeoutMs?: number;
|
timeoutMs?: number;
|
||||||
|
locale?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -472,6 +473,27 @@ document.addEventListener('load', handlePageLoad);
|
|||||||
const page = await this.getNextPage();
|
const page = await this.getNextPage();
|
||||||
const sn = this.snMap.get(page);
|
const sn = this.snMap.get(page);
|
||||||
this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
|
this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
|
||||||
|
|
||||||
|
this.logger.info(`Locale setting: ${options?.locale}`);
|
||||||
|
if (options?.locale) {
|
||||||
|
await page.setExtraHTTPHeaders({
|
||||||
|
'Accept-Language': options?.locale
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.evaluateOnNewDocument(() => {
|
||||||
|
Object.defineProperty(navigator, "language", {
|
||||||
|
get: function() {
|
||||||
|
return options?.locale;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Object.defineProperty(navigator, "languages", {
|
||||||
|
get: function() {
|
||||||
|
return [options?.locale];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
if (options?.proxyUrl) {
|
if (options?.proxyUrl) {
|
||||||
await page.useProxy(options.proxyUrl);
|
await page.useProxy(options.proxyUrl);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user