mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-17 03:25:55 +08:00
fix
This commit is contained in:
parent
5199b00eeb
commit
950338261a
@ -90,6 +90,10 @@ ${this.content}
|
||||
|
||||
try {
|
||||
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
||||
if (!scrapped) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const formatted = this.formatSnapshot(scrapped);
|
||||
|
||||
if (scrapped.screenshot && screenshotEnabled) {
|
||||
|
@ -15,7 +15,7 @@ export interface PageSnapshot {
|
||||
href: string;
|
||||
html: string;
|
||||
text: string;
|
||||
parsed: {
|
||||
parsed?: {
|
||||
title: string;
|
||||
content: string;
|
||||
textContent: string;
|
||||
@ -78,7 +78,7 @@ export class PuppeteerControl extends AsyncService {
|
||||
timeout: 10_000
|
||||
}).catch((err) => {
|
||||
this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
|
||||
process.nextTick(()=> {
|
||||
process.nextTick(() => {
|
||||
process.exit(1);
|
||||
});
|
||||
return Promise.reject(err);
|
||||
@ -153,7 +153,7 @@ function giveSnapshot() {
|
||||
return page;
|
||||
}
|
||||
|
||||
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
|
||||
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot | undefined> {
|
||||
const parsedUrl = new URL(url);
|
||||
// parsedUrl.search = '';
|
||||
parsedUrl.hash = '';
|
||||
@ -236,7 +236,7 @@ function giveSnapshot() {
|
||||
while (true) {
|
||||
await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
|
||||
if (finalized) {
|
||||
yield { ...snapshot, screenshot };
|
||||
yield { ...snapshot, screenshot } as PageSnapshot;
|
||||
break;
|
||||
}
|
||||
yield snapshot;
|
||||
|
Loading…
x
Reference in New Issue
Block a user