This commit is contained in:
Yanlong Wang 2024-04-12 10:24:56 +08:00
parent 664d4b1c9f
commit 629ab270be
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
3 changed files with 15 additions and 5 deletions

View File

@ -18,8 +18,7 @@
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
"start": "npm run shell",
"deploy": "firebase deploy --only functions",
"logs": "firebase functions:log",
"gcp-build": "node node_modules/puppeteer/install.js"
"logs": "firebase functions:log"
},
"engines": {
"node": "18"

View File

@ -30,7 +30,9 @@ export class CrawlerHost extends RPCHost {
formatSnapshot(snapshot: PageSnapshot) {
const toBeTurnedToMd = snapshot.parsed?.content;
const contentText = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd) : snapshot.text;
const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text.trim();
const formatted = {
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
@ -51,6 +53,16 @@ ${contentText}
return formatted;
}
@CloudHTTPv2({
name: 'crawl2',
runtime: {
memory: '4GiB',
timeoutSeconds: 540,
concurrency: 4,
},
httpMethod: ['get', 'post'],
returnType: [String, OutputServerEventStream],
})
@CloudHTTPv2({
runtime: {
memory: '4GiB',

View File

@ -71,8 +71,7 @@ export class PuppeteerControl extends AsyncService {
}
}
this.browser = await puppeteer.launch({
headless: true,
timeout: 60_000
headless: true
});
this.browser.once('disconnected', () => {
this.logger.warn(`Browser disconnected`);