This commit is contained in:
Yanlong Wang 2024-04-13 08:04:07 +08:00
parent 5ed3f90b9c
commit 5199b00eeb
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 10 additions and 6 deletions

View File

@ -1,4 +1,4 @@
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection } from 'civkit';
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit';
import { singleton } from 'tsyringe';
import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
import _ from 'lodash';
@ -90,10 +90,6 @@ ${this.content}
try {
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
if (!scrapped) {
continue;
}
const formatted = this.formatSnapshot(scrapped);
if (scrapped.screenshot && screenshotEnabled) {
@ -134,6 +130,10 @@ ${this.content}
return formatted;
}
if (!lastScrapped) {
throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
}
return this.formatSnapshot(lastScrapped);
}
@ -148,6 +148,10 @@ ${this.content}
return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null });
}
if (!lastScrapped) {
throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
}
return `${this.formatSnapshot(lastScrapped)}`;
}

View File

@ -153,7 +153,7 @@ function giveSnapshot() {
return page;
}
async *scrap(url: string, noCache: string | boolean = false) {
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
const parsedUrl = new URL(url);
// parsedUrl.search = '';
parsedUrl.hash = '';