mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-07-18 11:24:27 +08:00
chore: clean code
This commit is contained in:
parent
b29a569d39
commit
c1743db305
@ -26,6 +26,7 @@
|
|||||||
},
|
},
|
||||||
"main": "build/index.js",
|
"main": "build/index.js",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@esm2cjs/normalize-url": "^8.0.0",
|
||||||
"@google-cloud/translate": "^8.2.0",
|
"@google-cloud/translate": "^8.2.0",
|
||||||
"@mozilla/readability": "^0.5.0",
|
"@mozilla/readability": "^0.5.0",
|
||||||
"@napi-rs/canvas": "^0.1.44",
|
"@napi-rs/canvas": "^0.1.44",
|
||||||
|
@ -5,6 +5,7 @@ import _ from 'lodash';
|
|||||||
import { PageSnapshot, PuppeteerControl } from '../services/puppeteer';
|
import { PageSnapshot, PuppeteerControl } from '../services/puppeteer';
|
||||||
import TurnDownService from 'turndown';
|
import TurnDownService from 'turndown';
|
||||||
import { Request, Response } from 'express';
|
import { Request, Response } from 'express';
|
||||||
|
import normalizeUrl from "@esm2cjs/normalize-url";
|
||||||
|
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
@ -57,11 +58,8 @@ ${contentText.trim()}
|
|||||||
res: Response,
|
res: Response,
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
const url = new URL(ctx.req.url, `${ctx.req.protocol}://${ctx.req.headers.host}`);
|
const noSlashURL = ctx.req.url.slice(1);
|
||||||
const rawPath = url.pathname.split('/').filter(Boolean);
|
const urlToCrawl = new URL(normalizeUrl(noSlashURL));
|
||||||
const host = rawPath.shift();
|
|
||||||
const urlToCrawl = new URL(`${ctx.req.protocol}://${host}/${rawPath.join('/')}`);
|
|
||||||
urlToCrawl.search = url.search;
|
|
||||||
|
|
||||||
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
|
||||||
const sseStream = new OutputServerEventStream();
|
const sseStream = new OutputServerEventStream();
|
||||||
@ -88,7 +86,7 @@ ${contentText.trim()}
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
this.logger.error(`Failed to crawl ${url}`, { err: marshalErrorLike(err) });
|
this.logger.error(`Failed to crawl ${urlToCrawl.toString()}`, { err: marshalErrorLike(err) });
|
||||||
sseStream.write({
|
sseStream.write({
|
||||||
event: 'error',
|
event: 'error',
|
||||||
data: marshalErrorLike(err),
|
data: marshalErrorLike(err),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user