mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-04-18 11:50:00 +08:00
feat: search return with date (#1173)
* feat: search return with date * cleanup: keep changes to searcher --------- Co-authored-by: yanlong.wang <yanlong.wang@naiver.org>
This commit is contained in:
parent
f7f6a98839
commit
b304d5809d
@ -11,7 +11,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
||||
import { CrawlerHost, ExtraScrappingOptions } from './crawler';
|
||||
import { SerperSearchResult } from '../db/searched';
|
||||
import { CrawlerOptions } from '../dto/crawler-options';
|
||||
import { SnapshotFormatter, FormattedPage } from '../services/snapshot-formatter';
|
||||
import { SnapshotFormatter, FormattedPage as RealFormattedPage } from '../services/snapshot-formatter';
|
||||
import { GoogleSearchExplicitOperatorsDto, SerperSearchService } from '../services/serper-search';
|
||||
|
||||
import { GlobalLogger } from '../services/logger';
|
||||
@ -24,6 +24,11 @@ import { SerperSearchQueryParams, SerperSearchResponse, WORLD_COUNTRIES, WORLD_L
|
||||
|
||||
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES);
|
||||
|
||||
interface FormattedPage extends RealFormattedPage {
|
||||
favicon?: string;
|
||||
date?: string;
|
||||
}
|
||||
|
||||
@singleton()
|
||||
export class SearcherHost extends RPCHost {
|
||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||
@ -367,6 +372,7 @@ export class SearcherHost extends RPCHost {
|
||||
url: upstreamSearchResult.link,
|
||||
title: upstreamSearchResult.title,
|
||||
description: upstreamSearchResult.snippet,
|
||||
date: upstreamSearchResult.date,
|
||||
} as FormattedPage;
|
||||
|
||||
const dataItems = [
|
||||
@ -375,6 +381,10 @@ export class SearcherHost extends RPCHost {
|
||||
{ key: 'description', label: 'Description' },
|
||||
];
|
||||
|
||||
if (upstreamSearchResult.date) {
|
||||
dataItems.push({ key: 'date', label: 'Date' });
|
||||
}
|
||||
|
||||
if (withContent) {
|
||||
result.content = ['html', 'text', 'screenshot'].includes(mode) ? undefined : '';
|
||||
}
|
||||
@ -425,6 +435,7 @@ export class SearcherHost extends RPCHost {
|
||||
url,
|
||||
title: upstreamSearchResult.title,
|
||||
description: upstreamSearchResult.snippet,
|
||||
date: upstreamSearchResult.date,
|
||||
content: ['html', 'text', 'screenshot'].includes(mode) ? undefined : ''
|
||||
};
|
||||
}
|
||||
@ -434,6 +445,7 @@ export class SearcherHost extends RPCHost {
|
||||
return this.crawler.formatSnapshotWithPDFSideLoad(mode, x, urls[i], undefined, options).then((r) => {
|
||||
r.title ??= upstreamSearchResult.title;
|
||||
r.description = upstreamSearchResult.snippet;
|
||||
r.date ??= upstreamSearchResult.date;
|
||||
snapshotMap.set(x, r);
|
||||
|
||||
return r;
|
||||
@ -444,6 +456,7 @@ export class SearcherHost extends RPCHost {
|
||||
url,
|
||||
title: upstreamSearchResult.title,
|
||||
description: upstreamSearchResult.snippet,
|
||||
date: upstreamSearchResult.date,
|
||||
content: x.text,
|
||||
};
|
||||
});
|
||||
@ -484,7 +497,7 @@ export class SearcherHost extends RPCHost {
|
||||
const textRep = x.textRepresentation ? `\n[${i + 1}] Content: \n${x.textRepresentation}` : '';
|
||||
return `[${i + 1}] Title: ${this.title}
|
||||
[${i + 1}] URL Source: ${this.url}
|
||||
[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}
|
||||
[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
|
||||
`;
|
||||
}
|
||||
|
||||
@ -522,7 +535,7 @@ export class SearcherHost extends RPCHost {
|
||||
}
|
||||
|
||||
return `[${i + 1}] Title: ${this.title}
|
||||
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}
|
||||
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
|
||||
[${i + 1}] Markdown Content:
|
||||
${this.content}
|
||||
${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
|
||||
|
@ -34,7 +34,6 @@ export interface FormattedPage {
|
||||
links?: { [k: string]: string; } | [string, string][];
|
||||
images?: { [k: string]: string; } | [string, string][];
|
||||
warning?: string;
|
||||
favicon?: string;
|
||||
usage?: {
|
||||
total_tokens?: number;
|
||||
totalTokens?: number;
|
||||
|
Loading…
x
Reference in New Issue
Block a user