diff --git a/src/api/searcher-serper.ts b/src/api/searcher-serper.ts index 0bb2d9d..e40e85c 100644 --- a/src/api/searcher-serper.ts +++ b/src/api/searcher-serper.ts @@ -11,7 +11,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit'; import { CrawlerHost, ExtraScrappingOptions } from './crawler'; import { SerperSearchResult } from '../db/searched'; import { CrawlerOptions } from '../dto/crawler-options'; -import { SnapshotFormatter, FormattedPage } from '../services/snapshot-formatter'; +import { SnapshotFormatter, FormattedPage as RealFormattedPage } from '../services/snapshot-formatter'; import { GoogleSearchExplicitOperatorsDto, SerperSearchService } from '../services/serper-search'; import { GlobalLogger } from '../services/logger'; @@ -24,6 +24,11 @@ import { SerperSearchQueryParams, SerperSearchResponse, WORLD_COUNTRIES, WORLD_L const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES); +interface FormattedPage extends RealFormattedPage { + favicon?: string; + date?: string; +} + @singleton() export class SearcherHost extends RPCHost { logger = this.globalLogger.child({ service: this.constructor.name }); @@ -367,6 +372,7 @@ export class SearcherHost extends RPCHost { url: upstreamSearchResult.link, title: upstreamSearchResult.title, description: upstreamSearchResult.snippet, + date: upstreamSearchResult.date, } as FormattedPage; const dataItems = [ @@ -375,6 +381,10 @@ export class SearcherHost extends RPCHost { { key: 'description', label: 'Description' }, ]; + if (upstreamSearchResult.date) { + dataItems.push({ key: 'date', label: 'Date' }); + } + if (withContent) { result.content = ['html', 'text', 'screenshot'].includes(mode) ? undefined : ''; } @@ -425,6 +435,7 @@ export class SearcherHost extends RPCHost { url, title: upstreamSearchResult.title, description: upstreamSearchResult.snippet, + date: upstreamSearchResult.date, content: ['html', 'text', 'screenshot'].includes(mode) ? undefined : '' }; } @@ -434,6 +445,7 @@ export class SearcherHost extends RPCHost { return this.crawler.formatSnapshotWithPDFSideLoad(mode, x, urls[i], undefined, options).then((r) => { r.title ??= upstreamSearchResult.title; r.description = upstreamSearchResult.snippet; + r.date ??= upstreamSearchResult.date; snapshotMap.set(x, r); return r; @@ -444,6 +456,7 @@ export class SearcherHost extends RPCHost { url, title: upstreamSearchResult.title, description: upstreamSearchResult.snippet, + date: upstreamSearchResult.date, content: x.text, }; }); @@ -484,7 +497,7 @@ export class SearcherHost extends RPCHost { const textRep = x.textRepresentation ? `\n[${i + 1}] Content: \n${x.textRepresentation}` : ''; return `[${i + 1}] Title: ${this.title} [${i + 1}] URL Source: ${this.url} -[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''} +[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''} `; } @@ -522,7 +535,7 @@ export class SearcherHost extends RPCHost { } return `[${i + 1}] Title: ${this.title} -[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''} +[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''} [${i + 1}] Markdown Content: ${this.content} ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`; diff --git a/src/services/snapshot-formatter.ts b/src/services/snapshot-formatter.ts index f02fa4b..ffb6792 100644 --- a/src/services/snapshot-formatter.ts +++ b/src/services/snapshot-formatter.ts @@ -34,7 +34,6 @@ export interface FormattedPage { links?: { [k: string]: string; } | [string, string][]; images?: { [k: string]: string; } | [string, string][]; warning?: string; - favicon?: string; usage?: { total_tokens?: number; totalTokens?: number;