mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-06 08:36:02 +08:00
fix: some invalid uriComponent case
This commit is contained in:
parent
05df989202
commit
0e8308e627
@ -24,6 +24,7 @@ import { JSDomControl } from '../services/jsdom';
|
||||
import { FormattedPage, md5Hasher, SnapshotFormatter } from '../services/snapshot-formatter';
|
||||
import { CurlControl } from '../services/curl';
|
||||
import { LmControl } from '../services/lm';
|
||||
import { tryDecodeURIComponent } from '../utils/misc';
|
||||
|
||||
export interface ExtraScrappingOptions extends ScrappingOptions {
|
||||
withIframe?: boolean | 'quoted';
|
||||
@ -169,7 +170,8 @@ export class CrawlerHost extends RPCHost {
|
||||
let chargeAmount = 0;
|
||||
const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
|
||||
|
||||
const targetUrl = await this.getTargetUrl(decodeURIComponent(ctx.req.url), crawlerOptions);
|
||||
// Note req.url in express is actually unparsed `path`, e.g. `/some-path?abc`. Instead of a real url.
|
||||
const targetUrl = await this.getTargetUrl(tryDecodeURIComponent(ctx.req.url), crawlerOptions);
|
||||
if (!targetUrl) {
|
||||
const latestUser = uid ? await auth.assertUser() : undefined;
|
||||
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
||||
|
@ -1,3 +1,18 @@
|
||||
import { ParamValidationError } from 'civkit';
|
||||
|
||||
export function cleanAttribute(attribute: string | null) {
|
||||
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
|
||||
}
|
||||
|
||||
|
||||
export function tryDecodeURIComponent(input: string) {
|
||||
try {
|
||||
return decodeURIComponent(input);
|
||||
} catch (err) {
|
||||
if (URL.canParse(input, 'http://localhost:3000')) {
|
||||
return input;
|
||||
}
|
||||
|
||||
throw new ParamValidationError(`Invalid URIComponent: ${input}`);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user