mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 06:45:53 +08:00
fix: expect cookie encoding issue
This commit is contained in:
parent
53bc91c31a
commit
67d4a9f45a
@ -1,7 +1,6 @@
|
||||
import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
|
||||
import type { Request, Response } from 'express';
|
||||
import type { CookieParam } from 'puppeteer';
|
||||
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
||||
import { Cookie, parseString as parseSetCookieString } from 'set-cookie-parser';
|
||||
|
||||
export enum CONTENT_FORMAT {
|
||||
CONTENT = 'content',
|
||||
@ -218,7 +217,7 @@ export class CrawlerOptions extends AutoCastable {
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
})
|
||||
setCookies?: CookieParam[];
|
||||
setCookies?: Cookie[];
|
||||
|
||||
@Prop()
|
||||
proxyUrl?: string;
|
||||
@ -331,17 +330,17 @@ export class CrawlerOptions extends AutoCastable {
|
||||
instance.timeout ??= null;
|
||||
}
|
||||
|
||||
const cookies: CookieParam[] = [];
|
||||
const cookies: Cookie[] = [];
|
||||
const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
|
||||
if (Array.isArray(setCookieHeaders)) {
|
||||
for (const setCookie of setCookieHeaders) {
|
||||
cookies.push({
|
||||
...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam,
|
||||
...parseSetCookieString(setCookie, { decodeValues: true }),
|
||||
});
|
||||
}
|
||||
} else if (setCookieHeaders && typeof setCookieHeaders === 'string') {
|
||||
cookies.push({
|
||||
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
||||
...parseSetCookieString(setCookieHeaders, { decodeValues: true }),
|
||||
});
|
||||
}
|
||||
instance.setCookies = cookies;
|
||||
|
@ -1,10 +1,11 @@
|
||||
import os from 'os';
|
||||
import fs from 'fs';
|
||||
import { container, singleton } from 'tsyringe';
|
||||
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick } from 'civkit';
|
||||
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError } from 'civkit';
|
||||
import { Logger } from '../shared/services/logger';
|
||||
|
||||
import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer';
|
||||
import type { Cookie } from 'set-cookie-parser';
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
|
||||
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
||||
@ -67,7 +68,7 @@ export interface ExtendedSnapshot extends PageSnapshot {
|
||||
|
||||
export interface ScrappingOptions {
|
||||
proxyUrl?: string;
|
||||
cookies?: CookieParam[];
|
||||
cookies?: Cookie[];
|
||||
favorScreenshot?: boolean;
|
||||
waitForSelector?: string | string[];
|
||||
minIntervalMs?: number;
|
||||
@ -817,13 +818,33 @@ export class PuppeteerControl extends AsyncService {
|
||||
}
|
||||
if (options?.cookies) {
|
||||
const mapped = options.cookies.map((x) => {
|
||||
if (x.domain || x.url) {
|
||||
return x;
|
||||
const draft: CookieParam = {
|
||||
name: x.name,
|
||||
value: encodeURIComponent(x.value),
|
||||
secure: x.secure,
|
||||
domain: x.domain,
|
||||
path: x.path,
|
||||
expires: x.expires ? Math.floor(x.expires.valueOf() / 1000) : undefined,
|
||||
sameSite: x.sameSite as any,
|
||||
};
|
||||
if (!draft.expires && x.maxAge) {
|
||||
draft.expires = Math.floor(Date.now() / 1000) + x.maxAge;
|
||||
}
|
||||
if (!draft.domain) {
|
||||
draft.url = parsedUrl.toString();
|
||||
}
|
||||
|
||||
return { ...x, url: parsedUrl.toString() };
|
||||
return draft;
|
||||
});
|
||||
await page.setCookie(...mapped);
|
||||
try {
|
||||
await page.setCookie(...mapped);
|
||||
} catch (err: any) {
|
||||
this.logger.warn(`Page ${sn}: Failed to set cookies`, { err: marshalErrorLike(err) });
|
||||
throw new ParamValidationError({
|
||||
path: 'cookies',
|
||||
message: `Failed to set cookies: ${err?.message}`
|
||||
});
|
||||
}
|
||||
}
|
||||
if (options?.overrideUserAgent) {
|
||||
await page.setUserAgent(options.overrideUserAgent);
|
||||
|
Loading…
x
Reference in New Issue
Block a user