feat(crawl): viewport options

This commit is contained in:
yanlong.wang 2024-12-24 19:07:48 +08:00
parent d8ad1cb6a1
commit 2606c445d9
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
3 changed files with 31 additions and 1 deletions

View File

@ -69,7 +69,7 @@ export class CrawlerHost extends RPCHost {
// Potential privacy issue, dont cache if cookies are used // Potential privacy issue, dont cache if cookies are used
return; return;
} }
if (options.injectFrameScripts?.length || options.injectPageScripts?.length) { if (options.injectFrameScripts?.length || options.injectPageScripts?.length || options.viewport) {
// Potentially mangeled content, dont cache if scripts are injected // Potentially mangeled content, dont cache if scripts are injected
return; return;
} }
@ -725,6 +725,7 @@ export class CrawlerHost extends RPCHost {
withShadowDom: opts.withShadowDom, withShadowDom: opts.withShadowDom,
locale: opts.locale, locale: opts.locale,
referer: opts.referer, referer: opts.referer,
viewport: opts.viewport,
}; };
if (opts.locale) { if (opts.locale) {

View File

@ -16,6 +16,25 @@ const CONTENT_FORMAT_VALUES = new Set<string>(Object.values(CONTENT_FORMAT));
export const IMAGE_RETENTION_MODES = ['none', 'all', 'alt', 'all_p', 'alt_p'] as const; export const IMAGE_RETENTION_MODES = ['none', 'all', 'alt', 'all_p', 'alt_p'] as const;
const IMAGE_RETENTION_MODE_VALUES = new Set<string>(IMAGE_RETENTION_MODES); const IMAGE_RETENTION_MODE_VALUES = new Set<string>(IMAGE_RETENTION_MODES);
class Viewport extends AutoCastable {
@Prop({
default: 1024
})
width!: number;
@Prop({
default: 1024
})
height!: number;
@Prop()
deviceScaleFactor?: number;
@Prop()
isMobile?: boolean;
@Prop()
isLandscape?: boolean;
@Prop()
hasTouch?: boolean;
}
@Also({ @Also({
openapi: { openapi: {
operation: { operation: {
@ -279,6 +298,9 @@ export class CrawlerOptions extends AutoCastable {
@Prop() @Prop()
tokenBudget?: number; tokenBudget?: number;
@Prop()
viewport?: Viewport;
static override from(input: any) { static override from(input: any) {
const instance = super.from(input) as CrawlerOptions; const instance = super.from(input) as CrawlerOptions;
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as { const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
@ -432,6 +454,9 @@ export class CrawlerOptions extends AutoCastable {
if (this.injectFrameScript?.length || this.injectPageScript?.length) { if (this.injectFrameScript?.length || this.injectPageScript?.length) {
return false; return false;
} }
if (this.viewport) {
return false;
}
return true; return true;
} }

View File

@ -80,6 +80,7 @@ export interface ScrappingOptions {
extraHeaders?: Record<string, string>; extraHeaders?: Record<string, string>;
injectFrameScripts?: string[]; injectFrameScripts?: string[];
injectPageScripts?: string[]; injectPageScripts?: string[];
viewport?: Viewport;
} }
@ -863,6 +864,9 @@ export class PuppeteerControl extends AsyncService {
if (options?.overrideUserAgent) { if (options?.overrideUserAgent) {
await page.setUserAgent(options.overrideUserAgent); await page.setUserAgent(options.overrideUserAgent);
} }
if (options?.viewport) {
await page.setViewport(options.viewport);
}
let nextSnapshotDeferred = Defer(); let nextSnapshotDeferred = Defer();
const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` })); const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` }));