From 8597daa96b0abde02d8363d6641fcee908311a9f Mon Sep 17 00:00:00 2001 From: Yanlong Wang Date: Sat, 8 Mar 2025 16:49:14 +0800 Subject: [PATCH] fix: side load context bridging --- package-lock.json | 8 +-- package.json | 2 +- src/services/puppeteer.ts | 126 +++++++++++++++++++++----------------- thinapps-shared | 2 +- 4 files changed, 76 insertions(+), 62 deletions(-) diff --git a/package-lock.json b/package-lock.json index d630397..e01c9a3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,7 +17,7 @@ "axios": "^1.3.3", "bcrypt": "^5.1.0", "busboy": "^1.6.0", - "civkit": "^0.8.4-c44153f", + "civkit": "^0.8.4-6ed9027", "core-js": "^3.37.1", "cors": "^2.8.5", "dayjs": "^1.11.9", @@ -4095,9 +4095,9 @@ } }, "node_modules/civkit": { - "version": "0.8.4-c44153f", - "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-c44153f.tgz", - "integrity": "sha512-VBElW71aAqqP0G+8F460hZfnDrn4kMCxTCn+FaFqGG2B0TmNkfwjVZL9VuDRNtSzNBbEO9rRKLJG1iw4y8sZxQ==", + "version": "0.8.4-6ed9027", + "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-6ed9027.tgz", + "integrity": "sha512-VU8Ykik1L16Li9/QZfw5wYsmu3jJYH/zIHbM6Vd2ajRI7Mh4fSO3cXadUntM190BersLW9Fts+qunDPabhIWZA==", "license": "AGPL", "dependencies": { "lodash": "^4.17.21", diff --git a/package.json b/package.json index a2a7fe9..3724cc6 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "axios": "^1.3.3", "bcrypt": "^5.1.0", "busboy": "^1.6.0", - "civkit": "^0.8.4-c44153f", + "civkit": "^0.8.4-6ed9027", "core-js": "^3.37.1", "cors": "^2.8.5", "dayjs": "^1.11.9", diff --git a/src/services/puppeteer.ts b/src/services/puppeteer.ts index 6b25f0b..007ae18 100644 --- a/src/services/puppeteer.ts +++ b/src/services/puppeteer.ts @@ -17,6 +17,7 @@ import { isIP } from 'net'; import { CurlControl } from './curl'; import { readFile } from 'fs/promises'; import { BlackHoleDetector } from './blackhole-detector'; +import { AsyncLocalContext } from './async-context'; const tldExtract = require('tld-extract'); const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8'); @@ -468,8 +469,11 @@ export class PuppeteerControl extends AsyncService { circuitBreakerHosts: Set = new Set(); + lifeCycleTrack = new WeakMap(); + constructor( protected globalLogger: Logger, + protected asyncLocalContext: AsyncLocalContext, protected curlControl: CurlControl, protected blackHoleDetector: BlackHoleDetector, ) { @@ -774,6 +778,7 @@ export class PuppeteerControl extends AsyncService { const pdfUrls: string[] = []; let navigationResponse: HTTPResponse | undefined; const page = await this.getNextPage(); + this.lifeCycleTrack.set(page, this.asyncLocalContext.ctx); this.pagePhase.set(page, 'active'); page.on('response', (resp) => { this.blackHoleDetector.itWorked(); @@ -805,6 +810,19 @@ export class PuppeteerControl extends AsyncService { if (!options.proxyResources) { const isDocRequest = ['document', 'xhr', 'fetch', 'websocket', 'prefetch', 'eventsource', 'ping'].includes(typ); if (!isDocRequest) { + if (options.extraHeaders) { + const overrides = req.continueRequestOverrides(); + const continueArgs = [{ + ...overrides, + headers: { + ...req.headers(), + ...overrides?.headers, + ...options.extraHeaders, + } + }, 1] as const; + + return req.continue(continueArgs[0], continueArgs[1]); + } const overrides = req.continueRequestOverrides(); return req.continue(overrides, 0); @@ -830,54 +848,69 @@ export class PuppeteerControl extends AsyncService { } const proxy = options.proxyUrl || sideload?.proxyOrigin?.[reqUrlParsed.origin]; + const ctx = this.lifeCycleTrack.get(page); + if (proxy && ctx) { + return this.asyncLocalContext.bridge(ctx, async () => { + try { + const curled = await this.curlControl.sideLoad(reqUrlParsed, { + ...options, + method: req.method(), + body: req.postData(), + extraHeaders: { + ...req.headers(), + ...options.extraHeaders, + }, + proxyUrl: proxy + }); + if (req.isInterceptResolutionHandled()) { + return; + }; - if (proxy) { - try { - const curled = await this.curlControl.sideLoad(reqUrlParsed, { - ...options, - method: req.method(), - body: req.postData(), - extraHeaders: { - ...req.headers(), - ...options.extraHeaders, - }, - proxyUrl: proxy - }); - if (req.isInterceptResolutionHandled()) { - return; - }; - - if (curled.chain.length === 1) { - if (!curled.file) { + if (curled.chain.length === 1) { + if (!curled.file) { + return req.respond({ + status: curled.status, + headers: _.omit(curled.headers, 'result'), + contentType: curled.contentType, + }, 999); + } + const body = await readFile(await curled.file.filePath); + if (req.isInterceptResolutionHandled()) { + return; + }; return req.respond({ status: curled.status, headers: _.omit(curled.headers, 'result'), contentType: curled.contentType, + body: Uint8Array.from(body), }, 999); } - const body = await readFile(await curled.file.filePath); - if (req.isInterceptResolutionHandled()) { - return; - }; + options.sideLoad ??= curled.sideLoadOpts; + _.merge(options.sideLoad, curled.sideLoadOpts); + const firstReq = curled.chain[0]; + return req.respond({ - status: curled.status, - headers: _.omit(curled.headers, 'result'), - contentType: curled.contentType, - body: Uint8Array.from(body), + status: firstReq.result!.code, + headers: _.omit(firstReq, 'result'), }, 999); + } catch (err: any) { + this.logger.warn(`Failed to sideload ${reqUrlParsed.origin}`, { href: reqUrlParsed.href, err: marshalErrorLike(err) }); } - options.sideLoad ??= curled.sideLoadOpts; - _.merge(options.sideLoad, curled.sideLoadOpts); - const firstReq = curled.chain[0]; + if (req.isInterceptResolutionHandled()) { + return; + }; + const overrides = req.continueRequestOverrides(); + const continueArgs = [{ + ...overrides, + headers: { + ...req.headers(), + ...overrides?.headers, + ...options.extraHeaders, + } + }, 1] as const; - return req.respond({ - status: firstReq.result!.code, - headers: _.omit(firstReq, 'result'), - }, 999); - } catch (err: any) { - this.logger.warn(`Failed to sideload ${reqUrlParsed.origin}`, { href: reqUrlParsed.href, err: marshalErrorLike(err) }); - - } + return req.continue(continueArgs[0], continueArgs[1]); + }); } if (req.isInterceptResolutionHandled()) { @@ -895,25 +928,6 @@ export class PuppeteerControl extends AsyncService { return req.continue(continueArgs[0], continueArgs[1]); }); - if (options.extraHeaders) { - page.on('request', async (req) => { - if (req.isInterceptResolutionHandled()) { - return; - }; - - const overrides = req.continueRequestOverrides(); - const continueArgs = [{ - ...overrides, - headers: { - ...req.headers(), - ...overrides?.headers, - ...options.extraHeaders, - } - }, 1] as const; - - return req.continue(continueArgs[0], continueArgs[1]); - }); - } let pageScriptEvaluations: Promise[] = []; let frameScriptEvaluations: Promise[] = []; if (options.injectPageScripts?.length) { diff --git a/thinapps-shared b/thinapps-shared index 16521fd..20417f5 160000 --- a/thinapps-shared +++ b/thinapps-shared @@ -1 +1 @@ -Subproject commit 16521fd4a55f983c050d4cdd0c24a8ac400901d1 +Subproject commit 20417f5bb7f8c773a835304f0624a180b558ff65