feat: search return with date (#1173)

* feat: search return with date

* cleanup: keep changes to searcher

---------

Co-authored-by: yanlong.wang <yanlong.wang@naiver.org>
This commit is contained in:
Aaron Ji 2025-03-24 14:46:22 +08:00 committed by GitHub
parent f7f6a98839
commit b304d5809d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 4 deletions

View File

@ -11,7 +11,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
import { CrawlerHost, ExtraScrappingOptions } from './crawler'; import { CrawlerHost, ExtraScrappingOptions } from './crawler';
import { SerperSearchResult } from '../db/searched'; import { SerperSearchResult } from '../db/searched';
import { CrawlerOptions } from '../dto/crawler-options'; import { CrawlerOptions } from '../dto/crawler-options';
import { SnapshotFormatter, FormattedPage } from '../services/snapshot-formatter'; import { SnapshotFormatter, FormattedPage as RealFormattedPage } from '../services/snapshot-formatter';
import { GoogleSearchExplicitOperatorsDto, SerperSearchService } from '../services/serper-search'; import { GoogleSearchExplicitOperatorsDto, SerperSearchService } from '../services/serper-search';
import { GlobalLogger } from '../services/logger'; import { GlobalLogger } from '../services/logger';
@ -24,6 +24,11 @@ import { SerperSearchQueryParams, SerperSearchResponse, WORLD_COUNTRIES, WORLD_L
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES); const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES);
interface FormattedPage extends RealFormattedPage {
favicon?: string;
date?: string;
}
@singleton() @singleton()
export class SearcherHost extends RPCHost { export class SearcherHost extends RPCHost {
logger = this.globalLogger.child({ service: this.constructor.name }); logger = this.globalLogger.child({ service: this.constructor.name });
@ -367,6 +372,7 @@ export class SearcherHost extends RPCHost {
url: upstreamSearchResult.link, url: upstreamSearchResult.link,
title: upstreamSearchResult.title, title: upstreamSearchResult.title,
description: upstreamSearchResult.snippet, description: upstreamSearchResult.snippet,
date: upstreamSearchResult.date,
} as FormattedPage; } as FormattedPage;
const dataItems = [ const dataItems = [
@ -375,6 +381,10 @@ export class SearcherHost extends RPCHost {
{ key: 'description', label: 'Description' }, { key: 'description', label: 'Description' },
]; ];
if (upstreamSearchResult.date) {
dataItems.push({ key: 'date', label: 'Date' });
}
if (withContent) { if (withContent) {
result.content = ['html', 'text', 'screenshot'].includes(mode) ? undefined : ''; result.content = ['html', 'text', 'screenshot'].includes(mode) ? undefined : '';
} }
@ -425,6 +435,7 @@ export class SearcherHost extends RPCHost {
url, url,
title: upstreamSearchResult.title, title: upstreamSearchResult.title,
description: upstreamSearchResult.snippet, description: upstreamSearchResult.snippet,
date: upstreamSearchResult.date,
content: ['html', 'text', 'screenshot'].includes(mode) ? undefined : '' content: ['html', 'text', 'screenshot'].includes(mode) ? undefined : ''
}; };
} }
@ -434,6 +445,7 @@ export class SearcherHost extends RPCHost {
return this.crawler.formatSnapshotWithPDFSideLoad(mode, x, urls[i], undefined, options).then((r) => { return this.crawler.formatSnapshotWithPDFSideLoad(mode, x, urls[i], undefined, options).then((r) => {
r.title ??= upstreamSearchResult.title; r.title ??= upstreamSearchResult.title;
r.description = upstreamSearchResult.snippet; r.description = upstreamSearchResult.snippet;
r.date ??= upstreamSearchResult.date;
snapshotMap.set(x, r); snapshotMap.set(x, r);
return r; return r;
@ -444,6 +456,7 @@ export class SearcherHost extends RPCHost {
url, url,
title: upstreamSearchResult.title, title: upstreamSearchResult.title,
description: upstreamSearchResult.snippet, description: upstreamSearchResult.snippet,
date: upstreamSearchResult.date,
content: x.text, content: x.text,
}; };
}); });
@ -484,7 +497,7 @@ export class SearcherHost extends RPCHost {
const textRep = x.textRepresentation ? `\n[${i + 1}] Content: \n${x.textRepresentation}` : ''; const textRep = x.textRepresentation ? `\n[${i + 1}] Content: \n${x.textRepresentation}` : '';
return `[${i + 1}] Title: ${this.title} return `[${i + 1}] Title: ${this.title}
[${i + 1}] URL Source: ${this.url} [${i + 1}] URL Source: ${this.url}
[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''} [${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
`; `;
} }
@ -522,7 +535,7 @@ export class SearcherHost extends RPCHost {
} }
return `[${i + 1}] Title: ${this.title} return `[${i + 1}] Title: ${this.title}
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''} [${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
[${i + 1}] Markdown Content: [${i + 1}] Markdown Content:
${this.content} ${this.content}
${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`; ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;

View File

@ -34,7 +34,6 @@ export interface FormattedPage {
links?: { [k: string]: string; } | [string, string][]; links?: { [k: string]: string; } | [string, string][];
images?: { [k: string]: string; } | [string, string][]; images?: { [k: string]: string; } | [string, string][];
warning?: string; warning?: string;
favicon?: string;
usage?: { usage?: {
total_tokens?: number; total_tokens?: number;
totalTokens?: number; totalTokens?: number;