mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-15 07:45:57 +08:00
feat: expose brave search operators explicitly
This commit is contained in:
parent
336931b5e8
commit
d0e2920163
@ -10,7 +10,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
||||
import _ from 'lodash';
|
||||
import { Request, Response } from 'express';
|
||||
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
||||
import { BraveSearchService } from '../services/brave-search';
|
||||
import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search';
|
||||
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
|
||||
import { CookieParam } from 'puppeteer';
|
||||
|
||||
@ -84,6 +84,7 @@ export class SearcherHost extends RPCHost {
|
||||
},
|
||||
auth: JinaEmbeddingsAuthDTO,
|
||||
crawlerOptions: CrawlerOptions,
|
||||
braveSearchExplicitOperators: BraveSearchExplicitOperatorsDto,
|
||||
) {
|
||||
const uid = await auth.solveUID();
|
||||
let chargeAmount = 0;
|
||||
@ -153,7 +154,7 @@ export class SearcherHost extends RPCHost {
|
||||
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
||||
});
|
||||
}
|
||||
const searchQuery = noSlashPath;
|
||||
const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1));
|
||||
const r = await this.cachedWebSearch({
|
||||
q: searchQuery,
|
||||
count: 10
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit';
|
||||
import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, marshalErrorLike } from 'civkit';
|
||||
import { singleton } from 'tsyringe';
|
||||
import { Logger } from '../shared/services/logger';
|
||||
import { SecretExposer } from '../shared/services/secrets';
|
||||
@ -76,3 +76,74 @@ export class BraveSearchService extends AsyncService {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
export class BraveSearchExplicitOperatorsDto extends AutoCastable {
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns web pages with a specific file extension. Example: to find the Honda GX120 Owner’s manual in PDF, type “Honda GX120 ownners manual ext:pdf”.`
|
||||
})
|
||||
ext?: string | string[];
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns web pages created in the specified file type. Example: to find a web page created in PDF format about the evaluation of age-related cognitive changes, type “evaluation of age cognitive changes filetype:pdf”.`
|
||||
})
|
||||
filetype?: string | string[];
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns web pages containing the specified term in the body of the page. Example: to find information about the Nvidia GeForce GTX 1080 Ti, making sure the page contains the keywords “founders edition” in the body, type “nvidia 1080 ti inbody:“founders edition””.`
|
||||
})
|
||||
inbody?: string | string[];
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns webpages containing the specified term in the title of the page. Example: to find pages about SEO conferences making sure the results contain 2023 in the title, type “seo conference intitle:2023”.`
|
||||
})
|
||||
intitle?: string | string[];
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns webpages containing the specified term either in the title or in the body of the page. Example: to find pages about the 2024 Oscars containing the keywords “best costume design” in the page, type “oscars 2024 inpage:“best costume design””.`
|
||||
})
|
||||
inpage?: string | string[];
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
|
||||
})
|
||||
lang?: string | string[];
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
|
||||
})
|
||||
loc?: string | string[];
|
||||
|
||||
@Prop({
|
||||
arrayOf: String,
|
||||
desc: `Returns web pages coming only from a specific web site. Example: to find information about Goggles only on Brave pages, type “goggles site:brave.com”.`
|
||||
})
|
||||
site?: string | string[];
|
||||
|
||||
addTo(searchTerm: string) {
|
||||
const chunks = [];
|
||||
for (const [key, value] of Object.entries(this)) {
|
||||
if (value) {
|
||||
const values = Array.isArray(value) ? value : [value];
|
||||
const textValue = values.map((v) => `${key}:${v}`).join(' OR ');
|
||||
if (textValue) {
|
||||
chunks.push(textValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
const opPart = chunks.length > 1 ? chunks.map((x) => `(${x})`).join(' AND ') : chunks;
|
||||
|
||||
if (opPart.length) {
|
||||
return [searchTerm, opPart].join(' ');
|
||||
}
|
||||
|
||||
return searchTerm
|
||||
}
|
||||
}
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit f4becc28564c90a571c655804c4f3910747f657a
|
||||
Subproject commit f166680848c5700030389cb69181e5de1535acff
|
Loading…
x
Reference in New Issue
Block a user