mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 05:05:59 +08:00
fix
This commit is contained in:
parent
664d4b1c9f
commit
629ab270be
@ -18,8 +18,7 @@
|
||||
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
|
||||
"start": "npm run shell",
|
||||
"deploy": "firebase deploy --only functions",
|
||||
"logs": "firebase functions:log",
|
||||
"gcp-build": "node node_modules/puppeteer/install.js"
|
||||
"logs": "firebase functions:log"
|
||||
},
|
||||
"engines": {
|
||||
"node": "18"
|
||||
|
@ -30,7 +30,9 @@ export class CrawlerHost extends RPCHost {
|
||||
formatSnapshot(snapshot: PageSnapshot) {
|
||||
|
||||
const toBeTurnedToMd = snapshot.parsed?.content;
|
||||
const contentText = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd) : snapshot.text;
|
||||
const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
|
||||
|
||||
const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text.trim();
|
||||
|
||||
const formatted = {
|
||||
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
||||
@ -51,6 +53,16 @@ ${contentText}
|
||||
return formatted;
|
||||
}
|
||||
|
||||
@CloudHTTPv2({
|
||||
name: 'crawl2',
|
||||
runtime: {
|
||||
memory: '4GiB',
|
||||
timeoutSeconds: 540,
|
||||
concurrency: 4,
|
||||
},
|
||||
httpMethod: ['get', 'post'],
|
||||
returnType: [String, OutputServerEventStream],
|
||||
})
|
||||
@CloudHTTPv2({
|
||||
runtime: {
|
||||
memory: '4GiB',
|
||||
|
@ -71,8 +71,7 @@ export class PuppeteerControl extends AsyncService {
|
||||
}
|
||||
}
|
||||
this.browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
timeout: 60_000
|
||||
headless: true
|
||||
});
|
||||
this.browser.once('disconnected', () => {
|
||||
this.logger.warn(`Browser disconnected`);
|
||||
|
Loading…
x
Reference in New Issue
Block a user