mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 12:45:53 +08:00
feat: add referer param
This commit is contained in:
parent
080056e889
commit
7e6c2fcf48
@ -1123,6 +1123,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
||||||
withIframe: opts.withIframe,
|
withIframe: opts.withIframe,
|
||||||
locale: opts.locale,
|
locale: opts.locale,
|
||||||
|
referer: opts.referer,
|
||||||
};
|
};
|
||||||
|
|
||||||
return crawlOpts;
|
return crawlOpts;
|
||||||
|
@ -115,6 +115,11 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
|||||||
description: 'Specify browser locale for the page.',
|
description: 'Specify browser locale for the page.',
|
||||||
in: 'header',
|
in: 'header',
|
||||||
schema: { type: 'string' }
|
schema: { type: 'string' }
|
||||||
|
},
|
||||||
|
'X-Referer': {
|
||||||
|
description: 'Specify referer for the page.',
|
||||||
|
in: 'header',
|
||||||
|
schema: { type: 'string' }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -201,6 +206,9 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
@Prop()
|
@Prop()
|
||||||
locale?: string;
|
locale?: string;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
referer?: string;
|
||||||
|
|
||||||
static override from(input: any) {
|
static override from(input: any) {
|
||||||
const instance = super.from(input) as CrawlerOptions;
|
const instance = super.from(input) as CrawlerOptions;
|
||||||
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
||||||
@ -218,6 +226,11 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
instance.locale = locale;
|
instance.locale = locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const referer = ctx?.req.get('x-referer');
|
||||||
|
if (referer !== undefined) {
|
||||||
|
instance.referer = referer;
|
||||||
|
}
|
||||||
|
|
||||||
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
||||||
if (withGeneratedAlt !== undefined) {
|
if (withGeneratedAlt !== undefined) {
|
||||||
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
||||||
|
@ -4,7 +4,7 @@ import { container, singleton } from 'tsyringe';
|
|||||||
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency } from 'civkit';
|
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency } from 'civkit';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { Logger } from '../shared/services/logger';
|
||||||
|
|
||||||
import type { Browser, CookieParam, Page } from 'puppeteer';
|
import type { Browser, CookieParam, GoToOptions, Page } from 'puppeteer';
|
||||||
import puppeteer from 'puppeteer-extra';
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
|
||||||
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
||||||
@ -69,6 +69,7 @@ export interface ScrappingOptions {
|
|||||||
overrideUserAgent?: string;
|
overrideUserAgent?: string;
|
||||||
timeoutMs?: number;
|
timeoutMs?: number;
|
||||||
locale?: string;
|
locale?: string;
|
||||||
|
referer?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -545,11 +546,16 @@ document.addEventListener('load', handlePageLoad);
|
|||||||
});
|
});
|
||||||
|
|
||||||
const timeout = options?.timeoutMs || 30_000;
|
const timeout = options?.timeoutMs || 30_000;
|
||||||
|
const goToOptions: GoToOptions = {
|
||||||
const gotoPromise = page.goto(url, {
|
|
||||||
waitUntil: ['load', 'domcontentloaded', 'networkidle0'],
|
waitUntil: ['load', 'domcontentloaded', 'networkidle0'],
|
||||||
timeout,
|
timeout,
|
||||||
})
|
};
|
||||||
|
|
||||||
|
if (options?.referer) {
|
||||||
|
goToOptions.referer = options.referer;
|
||||||
|
}
|
||||||
|
|
||||||
|
const gotoPromise = page.goto(url, goToOptions)
|
||||||
.catch((err) => {
|
.catch((err) => {
|
||||||
if (err instanceof TimeoutError) {
|
if (err instanceof TimeoutError) {
|
||||||
this.logger.warn(`Page ${sn}: Browsing of ${url} timed out`, { err: marshalErrorLike(err) });
|
this.logger.warn(`Page ${sn}: Browsing of ${url} timed out`, { err: marshalErrorLike(err) });
|
||||||
|
Loading…
x
Reference in New Issue
Block a user