This commit is contained in:
Yanlong Wang 2024-04-13 08:07:55 +08:00
parent 5199b00eeb
commit 950338261a
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 8 additions and 4 deletions

View File

@ -90,6 +90,10 @@ ${this.content}
try { try {
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) { for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
if (!scrapped) {
continue;
}
const formatted = this.formatSnapshot(scrapped); const formatted = this.formatSnapshot(scrapped);
if (scrapped.screenshot && screenshotEnabled) { if (scrapped.screenshot && screenshotEnabled) {

View File

@ -15,7 +15,7 @@ export interface PageSnapshot {
href: string; href: string;
html: string; html: string;
text: string; text: string;
parsed: { parsed?: {
title: string; title: string;
content: string; content: string;
textContent: string; textContent: string;
@ -78,7 +78,7 @@ export class PuppeteerControl extends AsyncService {
timeout: 10_000 timeout: 10_000
}).catch((err) => { }).catch((err) => {
this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err }); this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
process.nextTick(()=> { process.nextTick(() => {
process.exit(1); process.exit(1);
}); });
return Promise.reject(err); return Promise.reject(err);
@ -153,7 +153,7 @@ function giveSnapshot() {
return page; return page;
} }
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> { async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot | undefined> {
const parsedUrl = new URL(url); const parsedUrl = new URL(url);
// parsedUrl.search = ''; // parsedUrl.search = '';
parsedUrl.hash = ''; parsedUrl.hash = '';
@ -236,7 +236,7 @@ function giveSnapshot() {
while (true) { while (true) {
await Promise.race([nextSnapshotDeferred.promise, gotoPromise]); await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
if (finalized) { if (finalized) {
yield { ...snapshot, screenshot }; yield { ...snapshot, screenshot } as PageSnapshot;
break; break;
} }
yield snapshot; yield snapshot;