mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-11 15:09:01 +08:00
feat: search return with date (#1173)
* feat: search return with date * cleanup: keep changes to searcher --------- Co-authored-by: yanlong.wang <yanlong.wang@naiver.org>
This commit is contained in:
parent
f7f6a98839
commit
b304d5809d
@ -11,7 +11,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
|||||||
import { CrawlerHost, ExtraScrappingOptions } from './crawler';
|
import { CrawlerHost, ExtraScrappingOptions } from './crawler';
|
||||||
import { SerperSearchResult } from '../db/searched';
|
import { SerperSearchResult } from '../db/searched';
|
||||||
import { CrawlerOptions } from '../dto/crawler-options';
|
import { CrawlerOptions } from '../dto/crawler-options';
|
||||||
import { SnapshotFormatter, FormattedPage } from '../services/snapshot-formatter';
|
import { SnapshotFormatter, FormattedPage as RealFormattedPage } from '../services/snapshot-formatter';
|
||||||
import { GoogleSearchExplicitOperatorsDto, SerperSearchService } from '../services/serper-search';
|
import { GoogleSearchExplicitOperatorsDto, SerperSearchService } from '../services/serper-search';
|
||||||
|
|
||||||
import { GlobalLogger } from '../services/logger';
|
import { GlobalLogger } from '../services/logger';
|
||||||
@ -24,6 +24,11 @@ import { SerperSearchQueryParams, SerperSearchResponse, WORLD_COUNTRIES, WORLD_L
|
|||||||
|
|
||||||
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES);
|
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES);
|
||||||
|
|
||||||
|
interface FormattedPage extends RealFormattedPage {
|
||||||
|
favicon?: string;
|
||||||
|
date?: string;
|
||||||
|
}
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
export class SearcherHost extends RPCHost {
|
export class SearcherHost extends RPCHost {
|
||||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
@ -367,6 +372,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
url: upstreamSearchResult.link,
|
url: upstreamSearchResult.link,
|
||||||
title: upstreamSearchResult.title,
|
title: upstreamSearchResult.title,
|
||||||
description: upstreamSearchResult.snippet,
|
description: upstreamSearchResult.snippet,
|
||||||
|
date: upstreamSearchResult.date,
|
||||||
} as FormattedPage;
|
} as FormattedPage;
|
||||||
|
|
||||||
const dataItems = [
|
const dataItems = [
|
||||||
@ -375,6 +381,10 @@ export class SearcherHost extends RPCHost {
|
|||||||
{ key: 'description', label: 'Description' },
|
{ key: 'description', label: 'Description' },
|
||||||
];
|
];
|
||||||
|
|
||||||
|
if (upstreamSearchResult.date) {
|
||||||
|
dataItems.push({ key: 'date', label: 'Date' });
|
||||||
|
}
|
||||||
|
|
||||||
if (withContent) {
|
if (withContent) {
|
||||||
result.content = ['html', 'text', 'screenshot'].includes(mode) ? undefined : '';
|
result.content = ['html', 'text', 'screenshot'].includes(mode) ? undefined : '';
|
||||||
}
|
}
|
||||||
@ -425,6 +435,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
url,
|
url,
|
||||||
title: upstreamSearchResult.title,
|
title: upstreamSearchResult.title,
|
||||||
description: upstreamSearchResult.snippet,
|
description: upstreamSearchResult.snippet,
|
||||||
|
date: upstreamSearchResult.date,
|
||||||
content: ['html', 'text', 'screenshot'].includes(mode) ? undefined : ''
|
content: ['html', 'text', 'screenshot'].includes(mode) ? undefined : ''
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -434,6 +445,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
return this.crawler.formatSnapshotWithPDFSideLoad(mode, x, urls[i], undefined, options).then((r) => {
|
return this.crawler.formatSnapshotWithPDFSideLoad(mode, x, urls[i], undefined, options).then((r) => {
|
||||||
r.title ??= upstreamSearchResult.title;
|
r.title ??= upstreamSearchResult.title;
|
||||||
r.description = upstreamSearchResult.snippet;
|
r.description = upstreamSearchResult.snippet;
|
||||||
|
r.date ??= upstreamSearchResult.date;
|
||||||
snapshotMap.set(x, r);
|
snapshotMap.set(x, r);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
@ -444,6 +456,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
url,
|
url,
|
||||||
title: upstreamSearchResult.title,
|
title: upstreamSearchResult.title,
|
||||||
description: upstreamSearchResult.snippet,
|
description: upstreamSearchResult.snippet,
|
||||||
|
date: upstreamSearchResult.date,
|
||||||
content: x.text,
|
content: x.text,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
@ -484,7 +497,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
const textRep = x.textRepresentation ? `\n[${i + 1}] Content: \n${x.textRepresentation}` : '';
|
const textRep = x.textRepresentation ? `\n[${i + 1}] Content: \n${x.textRepresentation}` : '';
|
||||||
return `[${i + 1}] Title: ${this.title}
|
return `[${i + 1}] Title: ${this.title}
|
||||||
[${i + 1}] URL Source: ${this.url}
|
[${i + 1}] URL Source: ${this.url}
|
||||||
[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}
|
[${i + 1}] Description: ${this.description}${textRep}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -522,7 +535,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return `[${i + 1}] Title: ${this.title}
|
return `[${i + 1}] Title: ${this.title}
|
||||||
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}
|
[${i + 1}] URL Source: ${this.url}${mixins.length ? `\n${mixins.join('\n')}` : ''}${this.favicon !== undefined ? `\n[${i + 1}] Favicon: ${this.favicon}` : ''}${this.date ? `\n[${i + 1}] Date: ${this.date}` : ''}
|
||||||
[${i + 1}] Markdown Content:
|
[${i + 1}] Markdown Content:
|
||||||
${this.content}
|
${this.content}
|
||||||
${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
|
${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
|
||||||
|
@ -34,7 +34,6 @@ export interface FormattedPage {
|
|||||||
links?: { [k: string]: string; } | [string, string][];
|
links?: { [k: string]: string; } | [string, string][];
|
||||||
images?: { [k: string]: string; } | [string, string][];
|
images?: { [k: string]: string; } | [string, string][];
|
||||||
warning?: string;
|
warning?: string;
|
||||||
favicon?: string;
|
|
||||||
usage?: {
|
usage?: {
|
||||||
total_tokens?: number;
|
total_tokens?: number;
|
||||||
totalTokens?: number;
|
totalTokens?: number;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user