mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 03:16:00 +08:00
fix: set-cookie
This commit is contained in:
parent
df58fcb3fa
commit
c7860e615c
@ -12,9 +12,6 @@ import { Request, Response } from 'express';
|
|||||||
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
||||||
import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search';
|
import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search';
|
||||||
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
|
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
|
||||||
import { CookieParam } from 'puppeteer';
|
|
||||||
|
|
||||||
import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
|
||||||
import { WebSearchQueryParams } from '../shared/3rd-party/brave-search';
|
import { WebSearchQueryParams } from '../shared/3rd-party/brave-search';
|
||||||
import { SearchResult } from '../db/searched';
|
import { SearchResult } from '../db/searched';
|
||||||
import { WebSearchApiResponse, SearchResult as WebSearchResult } from '../shared/3rd-party/brave-types';
|
import { WebSearchApiResponse, SearchResult as WebSearchResult } from '../shared/3rd-party/brave-types';
|
||||||
@ -152,19 +149,6 @@ export class SearcherHost extends RPCHost {
|
|||||||
delete crawlerOptions.html;
|
delete crawlerOptions.html;
|
||||||
|
|
||||||
const crawlOpts = this.crawler.configure(crawlerOptions);
|
const crawlOpts = this.crawler.configure(crawlerOptions);
|
||||||
const cookies: CookieParam[] = [];
|
|
||||||
const setCookieHeaders = ctx.req.headers['x-set-cookie'];
|
|
||||||
if (Array.isArray(setCookieHeaders)) {
|
|
||||||
for (const setCookie of setCookieHeaders) {
|
|
||||||
cookies.push({
|
|
||||||
...parseSetCookieString(setCookie, { decodeValues: false }) as CookieParam,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (setCookieHeaders) {
|
|
||||||
cookies.push({
|
|
||||||
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1));
|
const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1));
|
||||||
const r = await this.cachedWebSearch({
|
const r = await this.cachedWebSearch({
|
||||||
q: searchQuery,
|
q: searchQuery,
|
||||||
|
@ -266,6 +266,7 @@ export class CrawlerOptions extends AutoCastable {
|
|||||||
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
instance.setCookies = cookies;
|
||||||
|
|
||||||
const proxyUrl = ctx?.req.get('x-proxy-url');
|
const proxyUrl = ctx?.req.get('x-proxy-url');
|
||||||
instance.proxyUrl ??= proxyUrl;
|
instance.proxyUrl ??= proxyUrl;
|
||||||
|
@ -476,7 +476,14 @@ document.addEventListener('load', handlePageLoad);
|
|||||||
await page.useProxy(options.proxyUrl);
|
await page.useProxy(options.proxyUrl);
|
||||||
}
|
}
|
||||||
if (options?.cookies) {
|
if (options?.cookies) {
|
||||||
await page.setCookie(...options.cookies);
|
const mapped = options.cookies.map((x) => {
|
||||||
|
if (x.domain || x.url) {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { ...x, url: parsedUrl.toString() };
|
||||||
|
});
|
||||||
|
await page.setCookie(...mapped);
|
||||||
}
|
}
|
||||||
if (options?.overrideUserAgent) {
|
if (options?.overrideUserAgent) {
|
||||||
await page.setUserAgent(options.overrideUserAgent);
|
await page.setUserAgent(options.overrideUserAgent);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user