fix: side load context bridging

This commit is contained in:
Yanlong Wang 2025-03-08 16:49:14 +08:00
parent e92ff33ad0
commit 8597daa96b
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
4 changed files with 76 additions and 62 deletions

8
package-lock.json generated
View File

@ -17,7 +17,7 @@
"axios": "^1.3.3",
"bcrypt": "^5.1.0",
"busboy": "^1.6.0",
"civkit": "^0.8.4-c44153f",
"civkit": "^0.8.4-6ed9027",
"core-js": "^3.37.1",
"cors": "^2.8.5",
"dayjs": "^1.11.9",
@ -4095,9 +4095,9 @@
}
},
"node_modules/civkit": {
"version": "0.8.4-c44153f",
"resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-c44153f.tgz",
"integrity": "sha512-VBElW71aAqqP0G+8F460hZfnDrn4kMCxTCn+FaFqGG2B0TmNkfwjVZL9VuDRNtSzNBbEO9rRKLJG1iw4y8sZxQ==",
"version": "0.8.4-6ed9027",
"resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-6ed9027.tgz",
"integrity": "sha512-VU8Ykik1L16Li9/QZfw5wYsmu3jJYH/zIHbM6Vd2ajRI7Mh4fSO3cXadUntM190BersLW9Fts+qunDPabhIWZA==",
"license": "AGPL",
"dependencies": {
"lodash": "^4.17.21",

View File

@ -25,7 +25,7 @@
"axios": "^1.3.3",
"bcrypt": "^5.1.0",
"busboy": "^1.6.0",
"civkit": "^0.8.4-c44153f",
"civkit": "^0.8.4-6ed9027",
"core-js": "^3.37.1",
"cors": "^2.8.5",
"dayjs": "^1.11.9",

View File

@ -17,6 +17,7 @@ import { isIP } from 'net';
import { CurlControl } from './curl';
import { readFile } from 'fs/promises';
import { BlackHoleDetector } from './blackhole-detector';
import { AsyncLocalContext } from './async-context';
const tldExtract = require('tld-extract');
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
@ -468,8 +469,11 @@ export class PuppeteerControl extends AsyncService {
circuitBreakerHosts: Set<string> = new Set();
lifeCycleTrack = new WeakMap();
constructor(
protected globalLogger: Logger,
protected asyncLocalContext: AsyncLocalContext,
protected curlControl: CurlControl,
protected blackHoleDetector: BlackHoleDetector,
) {
@ -774,6 +778,7 @@ export class PuppeteerControl extends AsyncService {
const pdfUrls: string[] = [];
let navigationResponse: HTTPResponse | undefined;
const page = await this.getNextPage();
this.lifeCycleTrack.set(page, this.asyncLocalContext.ctx);
this.pagePhase.set(page, 'active');
page.on('response', (resp) => {
this.blackHoleDetector.itWorked();
@ -805,6 +810,19 @@ export class PuppeteerControl extends AsyncService {
if (!options.proxyResources) {
const isDocRequest = ['document', 'xhr', 'fetch', 'websocket', 'prefetch', 'eventsource', 'ping'].includes(typ);
if (!isDocRequest) {
if (options.extraHeaders) {
const overrides = req.continueRequestOverrides();
const continueArgs = [{
...overrides,
headers: {
...req.headers(),
...overrides?.headers,
...options.extraHeaders,
}
}, 1] as const;
return req.continue(continueArgs[0], continueArgs[1]);
}
const overrides = req.continueRequestOverrides();
return req.continue(overrides, 0);
@ -830,54 +848,69 @@ export class PuppeteerControl extends AsyncService {
}
const proxy = options.proxyUrl || sideload?.proxyOrigin?.[reqUrlParsed.origin];
const ctx = this.lifeCycleTrack.get(page);
if (proxy && ctx) {
return this.asyncLocalContext.bridge(ctx, async () => {
try {
const curled = await this.curlControl.sideLoad(reqUrlParsed, {
...options,
method: req.method(),
body: req.postData(),
extraHeaders: {
...req.headers(),
...options.extraHeaders,
},
proxyUrl: proxy
});
if (req.isInterceptResolutionHandled()) {
return;
};
if (proxy) {
try {
const curled = await this.curlControl.sideLoad(reqUrlParsed, {
...options,
method: req.method(),
body: req.postData(),
extraHeaders: {
...req.headers(),
...options.extraHeaders,
},
proxyUrl: proxy
});
if (req.isInterceptResolutionHandled()) {
return;
};
if (curled.chain.length === 1) {
if (!curled.file) {
if (curled.chain.length === 1) {
if (!curled.file) {
return req.respond({
status: curled.status,
headers: _.omit(curled.headers, 'result'),
contentType: curled.contentType,
}, 999);
}
const body = await readFile(await curled.file.filePath);
if (req.isInterceptResolutionHandled()) {
return;
};
return req.respond({
status: curled.status,
headers: _.omit(curled.headers, 'result'),
contentType: curled.contentType,
body: Uint8Array.from(body),
}, 999);
}
const body = await readFile(await curled.file.filePath);
if (req.isInterceptResolutionHandled()) {
return;
};
options.sideLoad ??= curled.sideLoadOpts;
_.merge(options.sideLoad, curled.sideLoadOpts);
const firstReq = curled.chain[0];
return req.respond({
status: curled.status,
headers: _.omit(curled.headers, 'result'),
contentType: curled.contentType,
body: Uint8Array.from(body),
status: firstReq.result!.code,
headers: _.omit(firstReq, 'result'),
}, 999);
} catch (err: any) {
this.logger.warn(`Failed to sideload ${reqUrlParsed.origin}`, { href: reqUrlParsed.href, err: marshalErrorLike(err) });
}
options.sideLoad ??= curled.sideLoadOpts;
_.merge(options.sideLoad, curled.sideLoadOpts);
const firstReq = curled.chain[0];
if (req.isInterceptResolutionHandled()) {
return;
};
const overrides = req.continueRequestOverrides();
const continueArgs = [{
...overrides,
headers: {
...req.headers(),
...overrides?.headers,
...options.extraHeaders,
}
}, 1] as const;
return req.respond({
status: firstReq.result!.code,
headers: _.omit(firstReq, 'result'),
}, 999);
} catch (err: any) {
this.logger.warn(`Failed to sideload ${reqUrlParsed.origin}`, { href: reqUrlParsed.href, err: marshalErrorLike(err) });
}
return req.continue(continueArgs[0], continueArgs[1]);
});
}
if (req.isInterceptResolutionHandled()) {
@ -895,25 +928,6 @@ export class PuppeteerControl extends AsyncService {
return req.continue(continueArgs[0], continueArgs[1]);
});
if (options.extraHeaders) {
page.on('request', async (req) => {
if (req.isInterceptResolutionHandled()) {
return;
};
const overrides = req.continueRequestOverrides();
const continueArgs = [{
...overrides,
headers: {
...req.headers(),
...overrides?.headers,
...options.extraHeaders,
}
}, 1] as const;
return req.continue(continueArgs[0], continueArgs[1]);
});
}
let pageScriptEvaluations: Promise<unknown>[] = [];
let frameScriptEvaluations: Promise<unknown>[] = [];
if (options.injectPageScripts?.length) {

@ -1 +1 @@
Subproject commit 16521fd4a55f983c050d4cdd0c24a8ac400901d1
Subproject commit 20417f5bb7f8c773a835304f0624a180b558ff65