feat: search return with date (#1173)

* feat: search return with date

* cleanup: keep changes to searcher

---------

Co-authored-by: yanlong.wang <yanlong.wang@naiver.org>
This commit is contained in:
Aaron Ji 2025-03-24 14:46:22 +08:00 committed by GitHub
parent f7f6a98839
commit b304d5809d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 4 deletions

View File

@ -11,7 +11,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
import { CrawlerHost, ExtraScrappingOptions } from './crawler';
import { SerperSearchResult } from '../db/searched';
import { CrawlerOptions } from '../dto/crawler-options';
import { SnapshotFormatter, FormattedPage } from '../services/snapshot-formatter';
import { SnapshotFormatter, FormattedPage as RealFormattedPage } from '../services/snapshot-formatter';
import { GoogleSearchExplicitOperatorsDto, SerperSearchService } from '../services/serper-search';
import { GlobalLogger } from '../services/logger';
@ -24,6 +24,11 @@ import { SerperSearchQueryParams, SerperSearchResponse, WORLD_COUNTRIES, WORLD_L
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES);
interface FormattedPage extends RealFormattedPage {
favicon?: string;
date?: string;
}
@singleton()
export class SearcherHost extends RPCHost {
logger = this.globalLogger.child({ service: this.constructor.name });
@ -367,6 +372,7 @@ export class SearcherHost extends RPCHost {
url: upstreamSearchResult.link,
title: upstreamSearchResult.title,
description: upstreamSearchResult.snippet,
date: upstreamSearchResult.date,
} as FormattedPage;
const dataItems = [
@ -375,6 +381,10 @@ export class SearcherHost extends RPCHost {
{ key: 'description', label: 'Description' },
];
if (upstreamSearchResult.date) {
dataItems.push({ key: 'date', label: 'Date' });
}
if (withContent) {
result.content = ['html', 'text', 'screenshot'].includes(mode) ? undefined : '';
}
@ -425,6 +435,7 @@ export class SearcherHost extends RPCHost {
url,
title: upstreamSearchResult.title,
description: upstreamSearchResult.snippet,
date: upstreamSearchResult.date,
content: ['html', 'text', 'screenshot'].includes(mode) ? undefined : ''
};
}
@ -434,6 +445,7 @@ export class SearcherHost extends RPCHost {
return this.crawler.formatSnapshotWithPDFSideLoad(mode, x, urls[i], undefined, options).then((r) => {
r.title ??= upstreamSearchResult.title;
r.description = upstreamSearchResult.snippet;
r.date ??= upstreamSearchResult.date;
snapshotMap.set(x, r);
return r;
@ -444,6 +456,7 @@ export class SearcherHost extends RPCHost {
url,
title: upstreamSearchResult.title,
description: upstreamSearchResult.snippet,
date: upstreamSearchResult.date,
content: x.text,
};
});
@ -484,7 +497,7 @@ export class SearcherHost extends RPCHost {
const textRep = x.textRepresentation ? `\n[${i + 1}] Content: \n${x.textRepresentation}` : '';
return `[${i + 1}] Title: ${this.title}
[${i + 1}] URL Source: ${this.url}
[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}
[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
`;
}
@ -522,7 +535,7 @@ export class SearcherHost extends RPCHost {
}
return `[${i + 1}] Title: ${this.title}
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
[${i + 1}] Markdown Content:
${this.content}
${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;

View File

@ -34,7 +34,6 @@ export interface FormattedPage {
links?: { [k: string]: string; } | [string, string][];
images?: { [k: string]: string; } | [string, string][];
warning?: string;
favicon?: string;
usage?: {
total_tokens?: number;
totalTokens?: number;