From c7860e615ccef7d84e1d941028021a50011fa5c0 Mon Sep 17 00:00:00 2001 From: Yanlong Wang Date: Fri, 16 Aug 2024 19:37:13 +0800 Subject: [PATCH] fix: set-cookie --- .../functions/src/cloud-functions/searcher.ts | 16 ---------------- backend/functions/src/dto/scrapping-options.ts | 1 + backend/functions/src/services/puppeteer.ts | 9 ++++++++- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/backend/functions/src/cloud-functions/searcher.ts b/backend/functions/src/cloud-functions/searcher.ts index 9a8de51..00dd898 100644 --- a/backend/functions/src/cloud-functions/searcher.ts +++ b/backend/functions/src/cloud-functions/searcher.ts @@ -12,9 +12,6 @@ import { Request, Response } from 'express'; import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth'; import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search'; import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler'; -import { CookieParam } from 'puppeteer'; - -import { parseString as parseSetCookieString } from 'set-cookie-parser'; import { WebSearchQueryParams } from '../shared/3rd-party/brave-search'; import { SearchResult } from '../db/searched'; import { WebSearchApiResponse, SearchResult as WebSearchResult } from '../shared/3rd-party/brave-types'; @@ -152,19 +149,6 @@ export class SearcherHost extends RPCHost { delete crawlerOptions.html; const crawlOpts = this.crawler.configure(crawlerOptions); - const cookies: CookieParam[] = []; - const setCookieHeaders = ctx.req.headers['x-set-cookie']; - if (Array.isArray(setCookieHeaders)) { - for (const setCookie of setCookieHeaders) { - cookies.push({ - ...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam, - }); - } - } else if (setCookieHeaders) { - cookies.push({ - ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam, - }); - } const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1)); const r = await this.cachedWebSearch({ q: searchQuery, diff --git a/backend/functions/src/dto/scrapping-options.ts b/backend/functions/src/dto/scrapping-options.ts index 6bb2763..c4f6ad6 100644 --- a/backend/functions/src/dto/scrapping-options.ts +++ b/backend/functions/src/dto/scrapping-options.ts @@ -266,6 +266,7 @@ export class CrawlerOptions extends AutoCastable { ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam, }); } + instance.setCookies = cookies; const proxyUrl = ctx?.req.get('x-proxy-url'); instance.proxyUrl ??= proxyUrl; diff --git a/backend/functions/src/services/puppeteer.ts b/backend/functions/src/services/puppeteer.ts index b43a92d..c194e35 100644 --- a/backend/functions/src/services/puppeteer.ts +++ b/backend/functions/src/services/puppeteer.ts @@ -476,7 +476,14 @@ document.addEventListener('load', handlePageLoad); await page.useProxy(options.proxyUrl); } if (options?.cookies) { - await page.setCookie(...options.cookies); + const mapped = options.cookies.map((x) => { + if (x.domain || x.url) { + return x; + } + + return { ...x, url: parsedUrl.toString() }; + }); + await page.setCookie(...mapped); } if (options?.overrideUserAgent) { await page.setUserAgent(options.overrideUserAgent);