mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-18 04:25:54 +08:00
fix: early return for search
This commit is contained in:
parent
1cf8e83857
commit
445624c405
@ -241,53 +241,85 @@ export class SearcherHost extends RPCHost {
|
|||||||
return sseStream;
|
return sseStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
const t0 = Date.now();
|
let lastScrapped: any[] | undefined;
|
||||||
|
let earlyReturn = false;
|
||||||
let lastScrapped;
|
|
||||||
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
||||||
|
const earlyReturnTimer = setTimeout(() => {
|
||||||
|
if (!lastScrapped) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||||
|
rpcReflect.return(lastScrapped);
|
||||||
|
earlyReturn = true;
|
||||||
|
}, this.reasonableDelayMs);
|
||||||
|
|
||||||
for await (const scrapped of it) {
|
for await (const scrapped of it) {
|
||||||
lastScrapped = scrapped;
|
lastScrapped = scrapped;
|
||||||
|
|
||||||
if (!this.qualified(scrapped) && ((Date.now() - t0) < this.reasonableDelayMs)) {
|
if (!this.qualified(scrapped)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
clearTimeout(earlyReturnTimer);
|
||||||
chargeAmount = this.getChargeAmount(scrapped);
|
chargeAmount = this.getChargeAmount(scrapped);
|
||||||
|
|
||||||
return scrapped;
|
return scrapped;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clearTimeout(earlyReturnTimer);
|
||||||
|
|
||||||
if (!lastScrapped) {
|
if (!lastScrapped) {
|
||||||
throw new AssertionFailureError(`No content available for query ${searchQuery}`);
|
throw new AssertionFailureError(`No content available for query ${searchQuery}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!earlyReturn) {
|
||||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||||
|
}
|
||||||
|
|
||||||
return lastScrapped;
|
return lastScrapped;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const earlyReturnTimer = setTimeout(() => {
|
||||||
|
if (!lastScrapped) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||||
|
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
|
||||||
|
earlyReturn = true;
|
||||||
|
}, this.reasonableDelayMs);
|
||||||
|
|
||||||
for await (const scrapped of it) {
|
for await (const scrapped of it) {
|
||||||
lastScrapped = scrapped;
|
lastScrapped = scrapped;
|
||||||
|
|
||||||
if (!this.qualified(scrapped) && ((Date.now() - t0) < this.reasonableDelayMs)) {
|
if (!this.qualified(scrapped)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clearTimeout(earlyReturnTimer);
|
||||||
chargeAmount = this.getChargeAmount(scrapped);
|
chargeAmount = this.getChargeAmount(scrapped);
|
||||||
|
|
||||||
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
|
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clearTimeout(earlyReturnTimer);
|
||||||
|
|
||||||
if (!lastScrapped) {
|
if (!lastScrapped) {
|
||||||
throw new AssertionFailureError(`No content available for query ${searchQuery}`);
|
throw new AssertionFailureError(`No content available for query ${searchQuery}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!earlyReturn) {
|
||||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
|
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
|
||||||
}
|
}
|
||||||
|
|
||||||
async *fetchSearchResults(mode: string | 'markdown' | 'html' | 'text' | 'screenshot',
|
async *fetchSearchResults(
|
||||||
searchResults: WebSearchResult[], options?: ScrappingOptions, pageCacheTolerance?: number) {
|
mode: string | 'markdown' | 'html' | 'text' | 'screenshot',
|
||||||
|
searchResults: WebSearchResult[],
|
||||||
|
options?: ScrappingOptions,
|
||||||
|
pageCacheTolerance?: number
|
||||||
|
) {
|
||||||
const urls = searchResults.map((x) => new URL(x.url));
|
const urls = searchResults.map((x) => new URL(x.url));
|
||||||
for await (const scrapped of this.crawler.scrapMany(urls, options, pageCacheTolerance)) {
|
for await (const scrapped of this.crawler.scrapMany(urls, options, pageCacheTolerance)) {
|
||||||
const mapped = scrapped.map((x, i) => {
|
const mapped = scrapped.map((x, i) => {
|
||||||
@ -323,10 +355,6 @@ export class SearcherHost extends RPCHost {
|
|||||||
mixins.push(`[${i + 1}] Published Time: ${this.publishedTime}`);
|
mixins.push(`[${i + 1}] Published Time: ${this.publishedTime}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode === 'markdown') {
|
|
||||||
return `[${i + 1}]\n${this.content}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
return `[${i + 1}] Title: ${this.title}
|
return `[${i + 1}] Title: ${this.title}
|
||||||
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}
|
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}
|
||||||
[${i + 1}] Markdown Content:
|
[${i + 1}] Markdown Content:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user