mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 01:45:56 +08:00
feat: search v2 (#1151)
This commit is contained in:
parent
29774ac637
commit
abd37e5266
@ -81,13 +81,17 @@ export class SearcherHost extends RPCHost {
|
|||||||
res: Response,
|
res: Response,
|
||||||
},
|
},
|
||||||
auth: JinaEmbeddingsAuthDTO,
|
auth: JinaEmbeddingsAuthDTO,
|
||||||
@Param('count', { default: 5, validate: (v) => v >= 0 && v <= 10 })
|
@Param('count', { default: 5, validate: (v) => v >= 0 && v <= 20 })
|
||||||
count: number,
|
count: number,
|
||||||
|
@Param('version', { default: 1, validate: (v) => v >= 1 })
|
||||||
|
version: number,
|
||||||
crawlerOptions: CrawlerOptions,
|
crawlerOptions: CrawlerOptions,
|
||||||
searchExplicitOperators: GoogleSearchExplicitOperatorsDto,
|
searchExplicitOperators: GoogleSearchExplicitOperatorsDto,
|
||||||
@Param('q') q?: string,
|
@Param('q') q?: string,
|
||||||
) {
|
) {
|
||||||
const uid = await auth.solveUID();
|
const uid = await auth.solveUID();
|
||||||
|
const isVersion2 = version === 2;
|
||||||
|
|
||||||
let chargeAmount = 0;
|
let chargeAmount = 0;
|
||||||
const noSlashPath = decodeURIComponent(ctx.req.path).slice(1);
|
const noSlashPath = decodeURIComponent(ctx.req.path).slice(1);
|
||||||
if (!noSlashPath && !q) {
|
if (!noSlashPath && !q) {
|
||||||
@ -143,7 +147,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
const searchQuery = searchExplicitOperators.addTo(q || noSlashPath);
|
const searchQuery = searchExplicitOperators.addTo(q || noSlashPath);
|
||||||
const r = await this.cachedWebSearch({
|
const r = await this.cachedWebSearch({
|
||||||
q: searchQuery,
|
q: searchQuery,
|
||||||
num: count ? Math.floor(count + 2) : 10
|
num: count ? (isVersion2 ? count : Math.min(Math.floor(count + 2)), 10) : 10
|
||||||
}, crawlerOptions.noCache);
|
}, crawlerOptions.noCache);
|
||||||
|
|
||||||
if (!r.organic.length) {
|
if (!r.organic.length) {
|
||||||
@ -154,6 +158,30 @@ export class SearcherHost extends RPCHost {
|
|||||||
delete crawlOpts.timeoutMs;
|
delete crawlOpts.timeoutMs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isVersion2) {
|
||||||
|
chargeAmount = 1000;
|
||||||
|
const result = [];
|
||||||
|
for (const x of r.organic) {
|
||||||
|
const url = new URL(x.link);
|
||||||
|
const favicon = await this.getFavicon(url.origin);
|
||||||
|
|
||||||
|
result.push({
|
||||||
|
url: x.link,
|
||||||
|
title: x.title,
|
||||||
|
snippet: x.snippet,
|
||||||
|
domain: url.origin,
|
||||||
|
favicon: favicon,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
result,
|
||||||
|
usage: {
|
||||||
|
tokens: chargeAmount,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const it = this.fetchSearchResults(crawlerOptions.respondWith, r.organic.slice(0, count + 2), crawlOpts,
|
const it = this.fetchSearchResults(crawlerOptions.respondWith, r.organic.slice(0, count + 2), crawlOpts,
|
||||||
CrawlerOptions.from({ ...crawlerOptions, cacheTolerance: crawlerOptions.cacheTolerance ?? this.pageCacheToleranceMs }),
|
CrawlerOptions.from({ ...crawlerOptions, cacheTolerance: crawlerOptions.cacheTolerance ?? this.pageCacheToleranceMs }),
|
||||||
count,
|
count,
|
||||||
@ -453,6 +481,24 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
|
|||||||
return _.every(results, (x) => this.pageQualified(x)) && results.length >= targetResultCount;
|
return _.every(results, (x) => this.pageQualified(x)) && results.length >= targetResultCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getFavicon (domain: string) {
|
||||||
|
const url = `https://www.google.com/s2/favicons?sz=32&domain_url=${domain}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url);
|
||||||
|
if (!response.ok) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
const ab = await response.arrayBuffer();
|
||||||
|
const buffer = Buffer.from(ab);
|
||||||
|
const base64 = buffer.toString('base64');
|
||||||
|
return `data:image/png;base64,${base64}`;
|
||||||
|
} catch (error: any) {
|
||||||
|
this.logger.warn(`Failed to get favicon base64 string`, { err: marshalErrorLike(error) });
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async cachedWebSearch(query: SerperSearchQueryParams, noCache: boolean = false) {
|
async cachedWebSearch(query: SerperSearchQueryParams, noCache: boolean = false) {
|
||||||
const queryDigest = objHashMd5B64Of(query);
|
const queryDigest = objHashMd5B64Of(query);
|
||||||
let cache;
|
let cache;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user