feat: expose brave search operators explicitly

This commit is contained in:
yanlong.wang 2024-07-15 18:11:58 +08:00
parent 336931b5e8
commit d0e2920163
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
3 changed files with 76 additions and 4 deletions

View File

@ -10,7 +10,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
import _ from 'lodash'; import _ from 'lodash';
import { Request, Response } from 'express'; import { Request, Response } from 'express';
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth'; import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
import { BraveSearchService } from '../services/brave-search'; import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search';
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler'; import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
import { CookieParam } from 'puppeteer'; import { CookieParam } from 'puppeteer';
@ -84,6 +84,7 @@ export class SearcherHost extends RPCHost {
}, },
auth: JinaEmbeddingsAuthDTO, auth: JinaEmbeddingsAuthDTO,
crawlerOptions: CrawlerOptions, crawlerOptions: CrawlerOptions,
braveSearchExplicitOperators: BraveSearchExplicitOperatorsDto,
) { ) {
const uid = await auth.solveUID(); const uid = await auth.solveUID();
let chargeAmount = 0; let chargeAmount = 0;
@ -153,7 +154,7 @@ export class SearcherHost extends RPCHost {
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam, ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
}); });
} }
const searchQuery = noSlashPath; const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1));
const r = await this.cachedWebSearch({ const r = await this.cachedWebSearch({
q: searchQuery, q: searchQuery,
count: 10 count: 10

View File

@ -1,4 +1,4 @@
import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit'; import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, marshalErrorLike } from 'civkit';
import { singleton } from 'tsyringe'; import { singleton } from 'tsyringe';
import { Logger } from '../shared/services/logger'; import { Logger } from '../shared/services/logger';
import { SecretExposer } from '../shared/services/secrets'; import { SecretExposer } from '../shared/services/secrets';
@ -76,3 +76,74 @@ export class BraveSearchService extends AsyncService {
} }
} }
export class BraveSearchExplicitOperatorsDto extends AutoCastable {
@Prop({
arrayOf: String,
desc: `Returns web pages with a specific file extension. Example: to find the Honda GX120 Owners manual in PDF, type “Honda GX120 ownners manual ext:pdf”.`
})
ext?: string | string[];
@Prop({
arrayOf: String,
desc: `Returns web pages created in the specified file type. Example: to find a web page created in PDF format about the evaluation of age-related cognitive changes, type “evaluation of age cognitive changes filetype:pdf”.`
})
filetype?: string | string[];
@Prop({
arrayOf: String,
desc: `Returns web pages containing the specified term in the body of the page. Example: to find information about the Nvidia GeForce GTX 1080 Ti, making sure the page contains the keywords “founders edition” in the body, type “nvidia 1080 ti inbody:“founders edition””.`
})
inbody?: string | string[];
@Prop({
arrayOf: String,
desc: `Returns webpages containing the specified term in the title of the page. Example: to find pages about SEO conferences making sure the results contain 2023 in the title, type “seo conference intitle:2023”.`
})
intitle?: string | string[];
@Prop({
arrayOf: String,
desc: `Returns webpages containing the specified term either in the title or in the body of the page. Example: to find pages about the 2024 Oscars containing the keywords “best costume design” in the page, type “oscars 2024 inpage:“best costume design””.`
})
inpage?: string | string[];
@Prop({
arrayOf: String,
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
})
lang?: string | string[];
@Prop({
arrayOf: String,
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
})
loc?: string | string[];
@Prop({
arrayOf: String,
desc: `Returns web pages coming only from a specific web site. Example: to find information about Goggles only on Brave pages, type “goggles site:brave.com”.`
})
site?: string | string[];
addTo(searchTerm: string) {
const chunks = [];
for (const [key, value] of Object.entries(this)) {
if (value) {
const values = Array.isArray(value) ? value : [value];
const textValue = values.map((v) => `${key}:${v}`).join(' OR ');
if (textValue) {
chunks.push(textValue);
}
}
}
const opPart = chunks.length > 1 ? chunks.map((x) => `(${x})`).join(' AND ') : chunks;
if (opPart.length) {
return [searchTerm, opPart].join(' ');
}
return searchTerm
}
}

@ -1 +1 @@
Subproject commit f4becc28564c90a571c655804c4f3910747f657a Subproject commit f166680848c5700030389cb69181e5de1535acff