feat: search v2 (#1151)

This commit is contained in:
Sha Zhou 2025-02-25 16:33:23 +08:00 committed by GitHub
parent 29774ac637
commit abd37e5266
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -81,13 +81,17 @@ export class SearcherHost extends RPCHost {
res: Response, res: Response,
}, },
auth: JinaEmbeddingsAuthDTO, auth: JinaEmbeddingsAuthDTO,
@Param('count', { default: 5, validate: (v) => v >= 0 && v <= 10 }) @Param('count', { default: 5, validate: (v) => v >= 0 && v <= 20 })
count: number, count: number,
@Param('version', { default: 1, validate: (v) => v >= 1 })
version: number,
crawlerOptions: CrawlerOptions, crawlerOptions: CrawlerOptions,
searchExplicitOperators: GoogleSearchExplicitOperatorsDto, searchExplicitOperators: GoogleSearchExplicitOperatorsDto,
@Param('q') q?: string, @Param('q') q?: string,
) { ) {
const uid = await auth.solveUID(); const uid = await auth.solveUID();
const isVersion2 = version === 2;
let chargeAmount = 0; let chargeAmount = 0;
const noSlashPath = decodeURIComponent(ctx.req.path).slice(1); const noSlashPath = decodeURIComponent(ctx.req.path).slice(1);
if (!noSlashPath && !q) { if (!noSlashPath && !q) {
@ -143,7 +147,7 @@ export class SearcherHost extends RPCHost {
const searchQuery = searchExplicitOperators.addTo(q || noSlashPath); const searchQuery = searchExplicitOperators.addTo(q || noSlashPath);
const r = await this.cachedWebSearch({ const r = await this.cachedWebSearch({
q: searchQuery, q: searchQuery,
num: count ? Math.floor(count + 2) : 10 num: count ? (isVersion2 ? count : Math.min(Math.floor(count + 2)), 10) : 10
}, crawlerOptions.noCache); }, crawlerOptions.noCache);
if (!r.organic.length) { if (!r.organic.length) {
@ -154,6 +158,30 @@ export class SearcherHost extends RPCHost {
delete crawlOpts.timeoutMs; delete crawlOpts.timeoutMs;
} }
if (isVersion2) {
chargeAmount = 1000;
const result = [];
for (const x of r.organic) {
const url = new URL(x.link);
const favicon = await this.getFavicon(url.origin);
result.push({
url: x.link,
title: x.title,
snippet: x.snippet,
domain: url.origin,
favicon: favicon,
});
}
return {
result,
usage: {
tokens: chargeAmount,
}
};
}
const it = this.fetchSearchResults(crawlerOptions.respondWith, r.organic.slice(0, count + 2), crawlOpts, const it = this.fetchSearchResults(crawlerOptions.respondWith, r.organic.slice(0, count + 2), crawlOpts,
CrawlerOptions.from({ ...crawlerOptions, cacheTolerance: crawlerOptions.cacheTolerance ?? this.pageCacheToleranceMs }), CrawlerOptions.from({ ...crawlerOptions, cacheTolerance: crawlerOptions.cacheTolerance ?? this.pageCacheToleranceMs }),
count, count,
@ -453,6 +481,24 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
return _.every(results, (x) => this.pageQualified(x)) && results.length >= targetResultCount; return _.every(results, (x) => this.pageQualified(x)) && results.length >= targetResultCount;
} }
async getFavicon (domain: string) {
const url = `https://www.google.com/s2/favicons?sz=32&domain_url=${domain}`;
try {
const response = await fetch(url);
if (!response.ok) {
return '';
}
const ab = await response.arrayBuffer();
const buffer = Buffer.from(ab);
const base64 = buffer.toString('base64');
return `data:image/png;base64,${base64}`;
} catch (error: any) {
this.logger.warn(`Failed to get favicon base64 string`, { err: marshalErrorLike(error) });
return '';
}
}
async cachedWebSearch(query: SerperSearchQueryParams, noCache: boolean = false) { async cachedWebSearch(query: SerperSearchQueryParams, noCache: boolean = false) {
const queryDigest = objHashMd5B64Of(query); const queryDigest = objHashMd5B64Of(query);
let cache; let cache;