mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-15 19:55:53 +08:00
feat: expose brave search operators explicitly
This commit is contained in:
parent
336931b5e8
commit
d0e2920163
@ -10,7 +10,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
|||||||
import _ from 'lodash';
|
import _ from 'lodash';
|
||||||
import { Request, Response } from 'express';
|
import { Request, Response } from 'express';
|
||||||
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
||||||
import { BraveSearchService } from '../services/brave-search';
|
import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search';
|
||||||
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
|
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
|
||||||
import { CookieParam } from 'puppeteer';
|
import { CookieParam } from 'puppeteer';
|
||||||
|
|
||||||
@ -84,6 +84,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
},
|
},
|
||||||
auth: JinaEmbeddingsAuthDTO,
|
auth: JinaEmbeddingsAuthDTO,
|
||||||
crawlerOptions: CrawlerOptions,
|
crawlerOptions: CrawlerOptions,
|
||||||
|
braveSearchExplicitOperators: BraveSearchExplicitOperatorsDto,
|
||||||
) {
|
) {
|
||||||
const uid = await auth.solveUID();
|
const uid = await auth.solveUID();
|
||||||
let chargeAmount = 0;
|
let chargeAmount = 0;
|
||||||
@ -153,7 +154,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const searchQuery = noSlashPath;
|
const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1));
|
||||||
const r = await this.cachedWebSearch({
|
const r = await this.cachedWebSearch({
|
||||||
q: searchQuery,
|
q: searchQuery,
|
||||||
count: 10
|
count: 10
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit';
|
import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, marshalErrorLike } from 'civkit';
|
||||||
import { singleton } from 'tsyringe';
|
import { singleton } from 'tsyringe';
|
||||||
import { Logger } from '../shared/services/logger';
|
import { Logger } from '../shared/services/logger';
|
||||||
import { SecretExposer } from '../shared/services/secrets';
|
import { SecretExposer } from '../shared/services/secrets';
|
||||||
@ -76,3 +76,74 @@ export class BraveSearchService extends AsyncService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export class BraveSearchExplicitOperatorsDto extends AutoCastable {
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns web pages with a specific file extension. Example: to find the Honda GX120 Owner’s manual in PDF, type “Honda GX120 ownners manual ext:pdf”.`
|
||||||
|
})
|
||||||
|
ext?: string | string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns web pages created in the specified file type. Example: to find a web page created in PDF format about the evaluation of age-related cognitive changes, type “evaluation of age cognitive changes filetype:pdf”.`
|
||||||
|
})
|
||||||
|
filetype?: string | string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns web pages containing the specified term in the body of the page. Example: to find information about the Nvidia GeForce GTX 1080 Ti, making sure the page contains the keywords “founders edition” in the body, type “nvidia 1080 ti inbody:“founders edition””.`
|
||||||
|
})
|
||||||
|
inbody?: string | string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns webpages containing the specified term in the title of the page. Example: to find pages about SEO conferences making sure the results contain 2023 in the title, type “seo conference intitle:2023”.`
|
||||||
|
})
|
||||||
|
intitle?: string | string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns webpages containing the specified term either in the title or in the body of the page. Example: to find pages about the 2024 Oscars containing the keywords “best costume design” in the page, type “oscars 2024 inpage:“best costume design””.`
|
||||||
|
})
|
||||||
|
inpage?: string | string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
|
||||||
|
})
|
||||||
|
lang?: string | string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
|
||||||
|
})
|
||||||
|
loc?: string | string[];
|
||||||
|
|
||||||
|
@Prop({
|
||||||
|
arrayOf: String,
|
||||||
|
desc: `Returns web pages coming only from a specific web site. Example: to find information about Goggles only on Brave pages, type “goggles site:brave.com”.`
|
||||||
|
})
|
||||||
|
site?: string | string[];
|
||||||
|
|
||||||
|
addTo(searchTerm: string) {
|
||||||
|
const chunks = [];
|
||||||
|
for (const [key, value] of Object.entries(this)) {
|
||||||
|
if (value) {
|
||||||
|
const values = Array.isArray(value) ? value : [value];
|
||||||
|
const textValue = values.map((v) => `${key}:${v}`).join(' OR ');
|
||||||
|
if (textValue) {
|
||||||
|
chunks.push(textValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const opPart = chunks.length > 1 ? chunks.map((x) => `(${x})`).join(' AND ') : chunks;
|
||||||
|
|
||||||
|
if (opPart.length) {
|
||||||
|
return [searchTerm, opPart].join(' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
return searchTerm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit f4becc28564c90a571c655804c4f3910747f657a
|
Subproject commit f166680848c5700030389cb69181e5de1535acff
|
Loading…
x
Reference in New Issue
Block a user