fix: some invalid uriComponent case

This commit is contained in:
yanlong.wang 2025-02-17 12:27:02 +08:00
parent 05df989202
commit 0e8308e627
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 18 additions and 1 deletions

View File

@ -24,6 +24,7 @@ import { JSDomControl } from '../services/jsdom';
import { FormattedPage, md5Hasher, SnapshotFormatter } from '../services/snapshot-formatter';
import { CurlControl } from '../services/curl';
import { LmControl } from '../services/lm';
import { tryDecodeURIComponent } from '../utils/misc';
export interface ExtraScrappingOptions extends ScrappingOptions {
withIframe?: boolean | 'quoted';
@ -169,7 +170,8 @@ export class CrawlerHost extends RPCHost {
let chargeAmount = 0;
const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
const targetUrl = await this.getTargetUrl(decodeURIComponent(ctx.req.url), crawlerOptions);
// Note req.url in express is actually unparsed `path`, e.g. `/some-path?abc`. Instead of a real url.
const targetUrl = await this.getTargetUrl(tryDecodeURIComponent(ctx.req.url), crawlerOptions);
if (!targetUrl) {
const latestUser = uid ? await auth.assertUser() : undefined;
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {

View File

@ -1,3 +1,18 @@
import { ParamValidationError } from 'civkit';
export function cleanAttribute(attribute: string | null) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
}
export function tryDecodeURIComponent(input: string) {
try {
return decodeURIComponent(input);
} catch (err) {
if (URL.canParse(input, 'http://localhost:3000')) {
return input;
}
throw new ParamValidationError(`Invalid URIComponent: ${input}`);
}
}