mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-17 07:26:04 +08:00
fix
This commit is contained in:
parent
5199b00eeb
commit
950338261a
@ -90,6 +90,10 @@ ${this.content}
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
||||||
|
if (!scrapped) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const formatted = this.formatSnapshot(scrapped);
|
const formatted = this.formatSnapshot(scrapped);
|
||||||
|
|
||||||
if (scrapped.screenshot && screenshotEnabled) {
|
if (scrapped.screenshot && screenshotEnabled) {
|
||||||
|
@ -15,7 +15,7 @@ export interface PageSnapshot {
|
|||||||
href: string;
|
href: string;
|
||||||
html: string;
|
html: string;
|
||||||
text: string;
|
text: string;
|
||||||
parsed: {
|
parsed?: {
|
||||||
title: string;
|
title: string;
|
||||||
content: string;
|
content: string;
|
||||||
textContent: string;
|
textContent: string;
|
||||||
@ -78,7 +78,7 @@ export class PuppeteerControl extends AsyncService {
|
|||||||
timeout: 10_000
|
timeout: 10_000
|
||||||
}).catch((err) => {
|
}).catch((err) => {
|
||||||
this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
|
this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
|
||||||
process.nextTick(()=> {
|
process.nextTick(() => {
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
});
|
});
|
||||||
return Promise.reject(err);
|
return Promise.reject(err);
|
||||||
@ -153,7 +153,7 @@ function giveSnapshot() {
|
|||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
|
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot | undefined> {
|
||||||
const parsedUrl = new URL(url);
|
const parsedUrl = new URL(url);
|
||||||
// parsedUrl.search = '';
|
// parsedUrl.search = '';
|
||||||
parsedUrl.hash = '';
|
parsedUrl.hash = '';
|
||||||
@ -236,7 +236,7 @@ function giveSnapshot() {
|
|||||||
while (true) {
|
while (true) {
|
||||||
await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
|
await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
|
||||||
if (finalized) {
|
if (finalized) {
|
||||||
yield { ...snapshot, screenshot };
|
yield { ...snapshot, screenshot } as PageSnapshot;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
yield snapshot;
|
yield snapshot;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user