mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 14:19:13 +08:00
fix: expect cookie encoding issue
This commit is contained in:
parent
53bc91c31a
commit
67d4a9f45a
@ -1,7 +1,6 @@
|
|||||||
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
|
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
|
||||||
import type { Request, Response } from 'express';
|
import type { Request, Response } from 'express';
|
||||||
import type { CookieParam } from 'puppeteer';
|
import { Cookie, parseString as parseSetCookieString } from 'set-cookie-parser';
|
||||||
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
|
||||||
|
|
||||||
export enum CONTENT_FORMAT {
|
export enum CONTENT_FORMAT {
|
||||||
CONTENT = 'content',
|
CONTENT = 'content',
|
||||||
@ -218,7 +217,7 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
@Prop({
|
@Prop({
|
||||||
arrayOf: String,
|
arrayOf: String,
|
||||||
})
|
})
|
||||||
setCookies?: CookieParam[];
|
setCookies?: Cookie[];
|
||||||
|
|
||||||
@Prop()
|
@Prop()
|
||||||
proxyUrl?: string;
|
proxyUrl?: string;
|
||||||
@ -331,17 +330,17 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
instance.timeout ??= null;
|
instance.timeout ??= null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const cookies: CookieParam[] = [];
|
const cookies: Cookie[] = [];
|
||||||
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
|
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
|
||||||
if (Array.isArray(setCookieHeaders)) {
|
if (Array.isArray(setCookieHeaders)) {
|
||||||
for (const setCookie of setCookieHeaders) {
|
for (const setCookie of setCookieHeaders) {
|
||||||
cookies.push({
|
cookies.push({
|
||||||
...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam,
|
...parseSetCookieString(setCookie, { decodeValues: true }),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
|
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
|
||||||
cookies.push({
|
cookies.push({
|
||||||
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
...parseSetCookieString(setCookieHeaders, { decodeValues: true }),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
instance.setCookies = cookies;
|
instance.setCookies = cookies;
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
import os from 'os';
|
import os from 'os';
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import { container, singleton } from 'tsyringe';
|
import { container, singleton } from 'tsyringe';
|
||||||
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick } from 'civkit';
|
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError } from 'civkit';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { Logger } from '../shared/services/logger';
|
||||||
|
|
||||||
import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer';
|
import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer';
|
||||||
|
import type { Cookie } from 'set-cookie-parser';
|
||||||
import puppeteer from 'puppeteer-extra';
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
|
||||||
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
||||||
@ -67,7 +68,7 @@ export interface ExtendedSnapshot extends PageSnapshot {
|
|||||||
|
|
||||||
export interface ScrappingOptions {
|
export interface ScrappingOptions {
|
||||||
proxyUrl?: string;
|
proxyUrl?: string;
|
||||||
cookies?: CookieParam[];
|
cookies?: Cookie[];
|
||||||
favorScreenshot?: boolean;
|
favorScreenshot?: boolean;
|
||||||
waitForSelector?: string | string[];
|
waitForSelector?: string | string[];
|
||||||
minIntervalMs?: number;
|
minIntervalMs?: number;
|
||||||
@ -817,13 +818,33 @@ export class PuppeteerControl extends AsyncService {
|
|||||||
}
|
}
|
||||||
if (options?.cookies) {
|
if (options?.cookies) {
|
||||||
const mapped = options.cookies.map((x) => {
|
const mapped = options.cookies.map((x) => {
|
||||||
if (x.domain || x.url) {
|
const draft: CookieParam = {
|
||||||
return x;
|
name: x.name,
|
||||||
|
value: encodeURIComponent(x.value),
|
||||||
|
secure: x.secure,
|
||||||
|
domain: x.domain,
|
||||||
|
path: x.path,
|
||||||
|
expires: x.expires ? Math.floor(x.expires.valueOf() / 1000) : undefined,
|
||||||
|
sameSite: x.sameSite as any,
|
||||||
|
};
|
||||||
|
if (!draft.expires && x.maxAge) {
|
||||||
|
draft.expires = Math.floor(Date.now() / 1000) + x.maxAge;
|
||||||
|
}
|
||||||
|
if (!draft.domain) {
|
||||||
|
draft.url = parsedUrl.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
return { ...x, url: parsedUrl.toString() };
|
return draft;
|
||||||
});
|
});
|
||||||
await page.setCookie(...mapped);
|
try {
|
||||||
|
await page.setCookie(...mapped);
|
||||||
|
} catch (err: any) {
|
||||||
|
this.logger.warn(`Page ${sn}: Failed to set cookies`, { err: marshalErrorLike(err) });
|
||||||
|
throw new ParamValidationError({
|
||||||
|
path: 'cookies',
|
||||||
|
message: `Failed to set cookies: ${err?.message}`
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (options?.overrideUserAgent) {
|
if (options?.overrideUserAgent) {
|
||||||
await page.setUserAgent(options.overrideUserAgent);
|
await page.setUserAgent(options.overrideUserAgent);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user