fix: early return for search

This commit is contained in:
Yanlong Wang 2024-05-15 08:47:16 +08:00
parent 1cf8e83857
commit 445624c405
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37

View File

@ -241,53 +241,85 @@ export class SearcherHost extends RPCHost {
return sseStream; return sseStream;
} }
const t0 = Date.now(); let lastScrapped: any[] | undefined;
let earlyReturn = false;
let lastScrapped;
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) { if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
const earlyReturnTimer = setTimeout(() => {
if (!lastScrapped) {
return;
}
chargeAmount = this.getChargeAmount(lastScrapped);
rpcReflect.return(lastScrapped);
earlyReturn = true;
}, this.reasonableDelayMs);
for await (const scrapped of it) { for await (const scrapped of it) {
lastScrapped = scrapped; lastScrapped = scrapped;
if (!this.qualified(scrapped) && ((Date.now() - t0) < this.reasonableDelayMs)) { if (!this.qualified(scrapped)) {
continue; continue;
} }
clearTimeout(earlyReturnTimer);
chargeAmount = this.getChargeAmount(scrapped); chargeAmount = this.getChargeAmount(scrapped);
return scrapped; return scrapped;
} }
clearTimeout(earlyReturnTimer);
if (!lastScrapped) { if (!lastScrapped) {
throw new AssertionFailureError(`No content available for query ${searchQuery}`); throw new AssertionFailureError(`No content available for query ${searchQuery}`);
} }
chargeAmount = this.getChargeAmount(lastScrapped); if (!earlyReturn) {
chargeAmount = this.getChargeAmount(lastScrapped);
}
return lastScrapped; return lastScrapped;
} }
const earlyReturnTimer = setTimeout(() => {
if (!lastScrapped) {
return;
}
chargeAmount = this.getChargeAmount(lastScrapped);
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
earlyReturn = true;
}, this.reasonableDelayMs);
for await (const scrapped of it) { for await (const scrapped of it) {
lastScrapped = scrapped; lastScrapped = scrapped;
if (!this.qualified(scrapped) && ((Date.now() - t0) < this.reasonableDelayMs)) { if (!this.qualified(scrapped)) {
continue; continue;
} }
clearTimeout(earlyReturnTimer);
chargeAmount = this.getChargeAmount(scrapped); chargeAmount = this.getChargeAmount(scrapped);
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null }); return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
} }
clearTimeout(earlyReturnTimer);
if (!lastScrapped) { if (!lastScrapped) {
throw new AssertionFailureError(`No content available for query ${searchQuery}`); throw new AssertionFailureError(`No content available for query ${searchQuery}`);
} }
chargeAmount = this.getChargeAmount(lastScrapped); if (!earlyReturn) {
chargeAmount = this.getChargeAmount(lastScrapped);
}
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }); return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
} }
async *fetchSearchResults(mode: string | 'markdown' | 'html' | 'text' | 'screenshot', async *fetchSearchResults(
searchResults: WebSearchResult[], options?: ScrappingOptions, pageCacheTolerance?: number) { mode: string | 'markdown' | 'html' | 'text' | 'screenshot',
searchResults: WebSearchResult[],
options?: ScrappingOptions,
pageCacheTolerance?: number
) {
const urls = searchResults.map((x) => new URL(x.url)); const urls = searchResults.map((x) => new URL(x.url));
for await (const scrapped of this.crawler.scrapMany(urls, options, pageCacheTolerance)) { for await (const scrapped of this.crawler.scrapMany(urls, options, pageCacheTolerance)) {
const mapped = scrapped.map((x, i) => { const mapped = scrapped.map((x, i) => {
@ -323,10 +355,6 @@ export class SearcherHost extends RPCHost {
mixins.push(`[${i + 1}] Published Time: ${this.publishedTime}`); mixins.push(`[${i + 1}] Published Time: ${this.publishedTime}`);
} }
if (mode === 'markdown') {
return `[${i + 1}]\n${this.content}`;
}
return `[${i + 1}] Title: ${this.title} return `[${i + 1}] Title: ${this.title}
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''} [${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}
[${i + 1}] Markdown Content: [${i + 1}] Markdown Content: