mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 01:55:59 +08:00
feat(crawl): viewport options
This commit is contained in:
parent
d8ad1cb6a1
commit
2606c445d9
@ -69,7 +69,7 @@ export class CrawlerHost extends RPCHost {
|
|||||||
// Potential privacy issue, dont cache if cookies are used
|
// Potential privacy issue, dont cache if cookies are used
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (options.injectFrameScripts?.length || options.injectPageScripts?.length) {
|
if (options.injectFrameScripts?.length || options.injectPageScripts?.length || options.viewport) {
|
||||||
// Potentially mangeled content, dont cache if scripts are injected
|
// Potentially mangeled content, dont cache if scripts are injected
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -725,6 +725,7 @@ export class CrawlerHost extends RPCHost {
|
|||||||
withShadowDom: opts.withShadowDom,
|
withShadowDom: opts.withShadowDom,
|
||||||
locale: opts.locale,
|
locale: opts.locale,
|
||||||
referer: opts.referer,
|
referer: opts.referer,
|
||||||
|
viewport: opts.viewport,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (opts.locale) {
|
if (opts.locale) {
|
||||||
|
@ -16,6 +16,25 @@ const CONTENT_FORMAT_VALUES = new Set<string>(Object.values(CONTENT_FORMAT));
|
|||||||
export const IMAGE_RETENTION_MODES = ['none', 'all', 'alt', 'all_p', 'alt_p'] as const;
|
export const IMAGE_RETENTION_MODES = ['none', 'all', 'alt', 'all_p', 'alt_p'] as const;
|
||||||
const IMAGE_RETENTION_MODE_VALUES = new Set<string>(IMAGE_RETENTION_MODES);
|
const IMAGE_RETENTION_MODE_VALUES = new Set<string>(IMAGE_RETENTION_MODES);
|
||||||
|
|
||||||
|
class Viewport extends AutoCastable {
|
||||||
|
@Prop({
|
||||||
|
default: 1024
|
||||||
|
})
|
||||||
|
width!: number;
|
||||||
|
@Prop({
|
||||||
|
default: 1024
|
||||||
|
})
|
||||||
|
height!: number;
|
||||||
|
@Prop()
|
||||||
|
deviceScaleFactor?: number;
|
||||||
|
@Prop()
|
||||||
|
isMobile?: boolean;
|
||||||
|
@Prop()
|
||||||
|
isLandscape?: boolean;
|
||||||
|
@Prop()
|
||||||
|
hasTouch?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
@Also({
|
@Also({
|
||||||
openapi: {
|
openapi: {
|
||||||
operation: {
|
operation: {
|
||||||
@ -279,6 +298,9 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
@Prop()
|
@Prop()
|
||||||
tokenBudget?: number;
|
tokenBudget?: number;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
viewport?: Viewport;
|
||||||
|
|
||||||
static override from(input: any) {
|
static override from(input: any) {
|
||||||
const instance = super.from(input) as CrawlerOptions;
|
const instance = super.from(input) as CrawlerOptions;
|
||||||
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
||||||
@ -432,6 +454,9 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
if (this.injectFrameScript?.length || this.injectPageScript?.length) {
|
if (this.injectFrameScript?.length || this.injectPageScript?.length) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (this.viewport) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -80,6 +80,7 @@ export interface ScrappingOptions {
|
|||||||
extraHeaders?: Record<string, string>;
|
extraHeaders?: Record<string, string>;
|
||||||
injectFrameScripts?: string[];
|
injectFrameScripts?: string[];
|
||||||
injectPageScripts?: string[];
|
injectPageScripts?: string[];
|
||||||
|
viewport?: Viewport;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -863,6 +864,9 @@ export class PuppeteerControl extends AsyncService {
|
|||||||
if (options?.overrideUserAgent) {
|
if (options?.overrideUserAgent) {
|
||||||
await page.setUserAgent(options.overrideUserAgent);
|
await page.setUserAgent(options.overrideUserAgent);
|
||||||
}
|
}
|
||||||
|
if (options?.viewport) {
|
||||||
|
await page.setViewport(options.viewport);
|
||||||
|
}
|
||||||
|
|
||||||
let nextSnapshotDeferred = Defer();
|
let nextSnapshotDeferred = Defer();
|
||||||
const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` }));
|
const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` }));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user