diff --git a/backend/functions/src/dto/scrapping-options.ts b/backend/functions/src/dto/scrapping-options.ts index b4b8d3b..5c46e20 100644 --- a/backend/functions/src/dto/scrapping-options.ts +++ b/backend/functions/src/dto/scrapping-options.ts @@ -1,7 +1,6 @@ import { Also, AutoCastable, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined import type { Request, Response } from 'express'; -import type { CookieParam } from 'puppeteer'; -import { parseString as parseSetCookieString } from 'set-cookie-parser'; +import { Cookie, parseString as parseSetCookieString } from 'set-cookie-parser'; export enum CONTENT_FORMAT { CONTENT = 'content', @@ -218,7 +217,7 @@ export class CrawlerOptions extends AutoCastable { @Prop({ arrayOf: String, }) - setCookies?: CookieParam[]; + setCookies?: Cookie[]; @Prop() proxyUrl?: string; @@ -331,17 +330,17 @@ export class CrawlerOptions extends AutoCastable { instance.timeout ??= null; } - const cookies: CookieParam[] = []; + const cookies: Cookie[] = []; const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]); if (Array.isArray(setCookieHeaders)) { for (const setCookie of setCookieHeaders) { cookies.push({ - ...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam, + ...parseSetCookieString(setCookie, { decodeValues: true }), }); } } else if (setCookieHeaders && typeof setCookieHeaders === 'string') { cookies.push({ - ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam, + ...parseSetCookieString(setCookieHeaders, { decodeValues: true }), }); } instance.setCookies = cookies; diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index 4aac333..9b19887 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -1,10 +1,11 @@ import os from 'os'; import fs from 'fs'; import { container, singleton } from 'tsyringe'; -import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick } from 'civkit'; +import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError } from 'civkit'; import { Logger } from '../shared/services/logger'; import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page } from 'puppeteer'; +import type { Cookie } from 'set-cookie-parser'; import puppeteer from 'puppeteer-extra'; import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources'; @@ -67,7 +68,7 @@ export interface ExtendedSnapshot extends PageSnapshot { export interface ScrappingOptions { proxyUrl?: string; - cookies?: CookieParam[]; + cookies?: Cookie[]; favorScreenshot?: boolean; waitForSelector?: string | string[]; minIntervalMs?: number; @@ -817,13 +818,33 @@ export class PuppeteerControl extends AsyncService { } if (options?.cookies) { const mapped = options.cookies.map((x) => { - if (x.domain || x.url) { - return x; + const draft: CookieParam = { + name: x.name, + value: encodeURIComponent(x.value), + secure: x.secure, + domain: x.domain, + path: x.path, + expires: x.expires ? Math.floor(x.expires.valueOf() / 1000) : undefined, + sameSite: x.sameSite as any, + }; + if (!draft.expires && x.maxAge) { + draft.expires = Math.floor(Date.now() / 1000) + x.maxAge; + } + if (!draft.domain) { + draft.url = parsedUrl.toString(); } - return { ...x, url: parsedUrl.toString() }; + return draft; }); - await page.setCookie(...mapped); + try { + await page.setCookie(...mapped); + } catch (err: any) { + this.logger.warn(`Page ${sn}: Failed to set cookies`, { err: marshalErrorLike(err) }); + throw new ParamValidationError({ + path: 'cookies', + message: `Failed to set cookies: ${err?.message}` + }); + } } if (options?.overrideUserAgent) { await page.setUserAgent(options.overrideUserAgent);