mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-20 15:09:14 +08:00
fix
This commit is contained in:
parent
5ed3f90b9c
commit
5199b00eeb
@ -1,4 +1,4 @@
|
||||
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection } from 'civkit';
|
||||
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit';
|
||||
import { singleton } from 'tsyringe';
|
||||
import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
||||
import _ from 'lodash';
|
||||
@ -90,10 +90,6 @@ ${this.content}
|
||||
|
||||
try {
|
||||
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
||||
if (!scrapped) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const formatted = this.formatSnapshot(scrapped);
|
||||
|
||||
if (scrapped.screenshot && screenshotEnabled) {
|
||||
@ -134,6 +130,10 @@ ${this.content}
|
||||
return formatted;
|
||||
}
|
||||
|
||||
if (!lastScrapped) {
|
||||
throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
|
||||
}
|
||||
|
||||
return this.formatSnapshot(lastScrapped);
|
||||
}
|
||||
|
||||
@ -148,6 +148,10 @@ ${this.content}
|
||||
return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null });
|
||||
}
|
||||
|
||||
if (!lastScrapped) {
|
||||
throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
|
||||
}
|
||||
|
||||
return `${this.formatSnapshot(lastScrapped)}`;
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,7 @@ function giveSnapshot() {
|
||||
return page;
|
||||
}
|
||||
|
||||
async *scrap(url: string, noCache: string | boolean = false) {
|
||||
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
|
||||
const parsedUrl = new URL(url);
|
||||
// parsedUrl.search = '';
|
||||
parsedUrl.hash = '';
|
||||
|
Loading…
x
Reference in New Issue
Block a user