mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-17 05:45:59 +08:00
chore: created WechatSearchService
This commit is contained in:
parent
9d51458350
commit
79b8bde9cc
@ -22,6 +22,7 @@ import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
|||||||
import { InsufficientBalanceError } from '../services/errors';
|
import { InsufficientBalanceError } from '../services/errors';
|
||||||
import { SerperImageSearchResponse, SerperNewsSearchResponse, SerperSearchQueryParams, SerperSearchResponse, SerperWebSearchResponse, WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search';
|
import { SerperImageSearchResponse, SerperNewsSearchResponse, SerperSearchQueryParams, SerperSearchResponse, SerperWebSearchResponse, WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search';
|
||||||
import { toAsyncGenerator } from '../utils/misc';
|
import { toAsyncGenerator } from '../utils/misc';
|
||||||
|
import { WechatSearchService } from '../services/wechat-search';
|
||||||
|
|
||||||
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES).map((x) => x.toLowerCase());
|
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES).map((x) => x.toLowerCase());
|
||||||
|
|
||||||
@ -49,6 +50,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
protected serperSearchService: SerperSearchService,
|
protected serperSearchService: SerperSearchService,
|
||||||
protected crawler: CrawlerHost,
|
protected crawler: CrawlerHost,
|
||||||
protected snapshotFormatter: SnapshotFormatter,
|
protected snapshotFormatter: SnapshotFormatter,
|
||||||
|
protected wechatSearchService: WechatSearchService,
|
||||||
) {
|
) {
|
||||||
super(...arguments);
|
super(...arguments);
|
||||||
}
|
}
|
||||||
@ -88,10 +90,10 @@ export class SearcherHost extends RPCHost {
|
|||||||
searchExplicitOperators: GoogleSearchExplicitOperatorsDto,
|
searchExplicitOperators: GoogleSearchExplicitOperatorsDto,
|
||||||
@Param('count', { validate: (v: number) => v >= 0 && v <= 20 })
|
@Param('count', { validate: (v: number) => v >= 0 && v <= 20 })
|
||||||
count: number,
|
count: number,
|
||||||
@Param('type', { type: new Set(['web', 'images', 'news']), default: 'web' })
|
@Param('type', { type: new Set(['web', 'images', 'news', 'wechat']), default: 'web' })
|
||||||
variant: 'web' | 'images' | 'news',
|
variant: 'web' | 'images' | 'news' | 'wechat',
|
||||||
@Param('provider', { type: new Set(['google', 'bing', 'wechat']), default: 'google' })
|
@Param('provider', { type: new Set(['google', 'bing']), default: 'google' })
|
||||||
searchEngine: 'google' | 'bing' | 'wechat',
|
searchEngine: 'google' | 'bing',
|
||||||
@Param('num', { validate: (v: number) => v >= 0 && v <= 20 })
|
@Param('num', { validate: (v: number) => v >= 0 && v <= 20 })
|
||||||
num?: number,
|
num?: number,
|
||||||
@Param('gl', { validate: (v: string) => WORLD_COUNTRY_CODES.includes(v?.toLowerCase()) }) gl?: string,
|
@Param('gl', { validate: (v: string) => WORLD_COUNTRY_CODES.includes(v?.toLowerCase()) }) gl?: string,
|
||||||
@ -171,9 +173,6 @@ export class SearcherHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let chargeAmountScaler = 1;
|
let chargeAmountScaler = 1;
|
||||||
if (searchEngine === 'wechat') {
|
|
||||||
this.threadLocal.set('wechat-preferred', true);
|
|
||||||
}
|
|
||||||
if (searchEngine === 'bing') {
|
if (searchEngine === 'bing') {
|
||||||
this.threadLocal.set('bing-preferred', true);
|
this.threadLocal.set('bing-preferred', true);
|
||||||
chargeAmountScaler = 3;
|
chargeAmountScaler = 3;
|
||||||
@ -203,6 +202,10 @@ export class SearcherHost extends RPCHost {
|
|||||||
results = (r as SerperNewsSearchResponse).news;
|
results = (r as SerperNewsSearchResponse).news;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 'wechat': {
|
||||||
|
results = (r as unknown as SerperWebSearchResponse['organic']);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 'web':
|
case 'web':
|
||||||
default: {
|
default: {
|
||||||
results = (r as SerperWebSearchResponse).organic;
|
results = (r as SerperWebSearchResponse).organic;
|
||||||
@ -510,7 +513,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async cachedSearch(query: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; }, noCache: boolean = false) {
|
async cachedSearch(query: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news' | 'wechat'; provider?: string; }, noCache: boolean = false) {
|
||||||
const queryDigest = objHashMd5B64Of(query);
|
const queryDigest = objHashMd5B64Of(query);
|
||||||
Reflect.deleteProperty(query, 'provider');
|
Reflect.deleteProperty(query, 'provider');
|
||||||
let cache;
|
let cache;
|
||||||
@ -546,6 +549,13 @@ export class SearcherHost extends RPCHost {
|
|||||||
r = await this.serperSearchService.newsSearch(query);
|
r = await this.serperSearchService.newsSearch(query);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 'wechat':{
|
||||||
|
r = await this.wechatSearchService.search({
|
||||||
|
kw: query.q,
|
||||||
|
page: query.page,
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 'web':
|
case 'web':
|
||||||
default: {
|
default: {
|
||||||
r = await this.serperSearchService.webSearch(query);
|
r = await this.serperSearchService.webSearch(query);
|
||||||
|
@ -7,7 +7,6 @@ import { SerperBingHTTP, SerperGoogleHTTP, SerperImageSearchResponse, SerperNews
|
|||||||
import { BlackHoleDetector } from './blackhole-detector';
|
import { BlackHoleDetector } from './blackhole-detector';
|
||||||
import { Context } from './registry';
|
import { Context } from './registry';
|
||||||
import { ServiceBadAttemptError } from '../shared';
|
import { ServiceBadAttemptError } from '../shared';
|
||||||
import { WechatSearchHTTP } from '../shared/3rd-party/wechat-search';
|
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
export class SerperSearchService extends AsyncService {
|
export class SerperSearchService extends AsyncService {
|
||||||
@ -16,7 +15,6 @@ export class SerperSearchService extends AsyncService {
|
|||||||
|
|
||||||
serperGoogleSearchHTTP!: SerperGoogleHTTP;
|
serperGoogleSearchHTTP!: SerperGoogleHTTP;
|
||||||
serperBingSearchHTTP!: SerperBingHTTP;
|
serperBingSearchHTTP!: SerperBingHTTP;
|
||||||
wechatSearchHTTP!: WechatSearchHTTP;
|
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
protected globalLogger: GlobalLogger,
|
protected globalLogger: GlobalLogger,
|
||||||
@ -33,18 +31,13 @@ export class SerperSearchService extends AsyncService {
|
|||||||
|
|
||||||
this.serperGoogleSearchHTTP = new SerperGoogleHTTP(this.secretExposer.SERPER_SEARCH_API_KEY);
|
this.serperGoogleSearchHTTP = new SerperGoogleHTTP(this.secretExposer.SERPER_SEARCH_API_KEY);
|
||||||
this.serperBingSearchHTTP = new SerperBingHTTP(this.secretExposer.SERPER_SEARCH_API_KEY);
|
this.serperBingSearchHTTP = new SerperBingHTTP(this.secretExposer.SERPER_SEARCH_API_KEY);
|
||||||
this.wechatSearchHTTP = new WechatSearchHTTP(this.secretExposer.WECHAT_SEARCH_API_KEY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
*iterClient() {
|
*iterClient() {
|
||||||
const preferBingSearch = this.threadLocal.get('bing-preferred');
|
const preferBingSearch = this.threadLocal.get('bing-preferred');
|
||||||
const preferWechatSearch = this.threadLocal.get('wechat-preferred');
|
|
||||||
if (preferBingSearch) {
|
if (preferBingSearch) {
|
||||||
yield this.serperBingSearchHTTP;
|
yield this.serperBingSearchHTTP;
|
||||||
}
|
}
|
||||||
if (preferWechatSearch) {
|
|
||||||
yield this.wechatSearchHTTP;
|
|
||||||
}
|
|
||||||
while (true) {
|
while (true) {
|
||||||
yield this.serperGoogleSearchHTTP;
|
yield this.serperGoogleSearchHTTP;
|
||||||
}
|
}
|
||||||
@ -66,17 +59,6 @@ export class SerperSearchService extends AsyncService {
|
|||||||
try {
|
try {
|
||||||
this.logger.debug(`Doing external search`, query);
|
this.logger.debug(`Doing external search`, query);
|
||||||
let r;
|
let r;
|
||||||
|
|
||||||
if (client instanceof WechatSearchHTTP) {
|
|
||||||
r = await client.blogSearch({
|
|
||||||
kw: query.q,
|
|
||||||
page: query.page
|
|
||||||
});
|
|
||||||
|
|
||||||
this.blackHoleDetector.itWorked();
|
|
||||||
|
|
||||||
return r.parsed;
|
|
||||||
}
|
|
||||||
switch (variant) {
|
switch (variant) {
|
||||||
case 'images': {
|
case 'images': {
|
||||||
r = await client.imageSearch(query);
|
r = await client.imageSearch(query);
|
||||||
|
56
src/services/wechat-search.ts
Normal file
56
src/services/wechat-search.ts
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import { singleton } from 'tsyringe';
|
||||||
|
import { GlobalLogger } from './logger';
|
||||||
|
import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit';
|
||||||
|
import { SecretExposer } from '../shared/services/secrets';
|
||||||
|
import { WechatBlogQueryParams, WechatSearchHTTP } from '../shared/3rd-party/wechat-search';
|
||||||
|
|
||||||
|
|
||||||
|
@singleton()
|
||||||
|
export class WechatSearchService extends AsyncService {
|
||||||
|
|
||||||
|
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||||
|
|
||||||
|
wechatSearchHTTP!: WechatSearchHTTP;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
protected globalLogger: GlobalLogger,
|
||||||
|
protected secretExposer: SecretExposer,
|
||||||
|
) {
|
||||||
|
super(...arguments);
|
||||||
|
}
|
||||||
|
|
||||||
|
override async init() {
|
||||||
|
await this.dependencyReady();
|
||||||
|
this.emit('ready');
|
||||||
|
|
||||||
|
this.wechatSearchHTTP = new WechatSearchHTTP(this.secretExposer.WECHAT_SEARCH_API_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query: WechatBlogQueryParams) {
|
||||||
|
this.logger.info('searching for official account blogs', query);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// get wechat blog search results and convert format
|
||||||
|
const r = await this.wechatSearchHTTP.blogSearch(query);
|
||||||
|
|
||||||
|
if (r.parsed.code > 100 && r.parsed.code < 200) {
|
||||||
|
throw new DownstreamServiceFailureError({ message: `Search(wechat) failed` });
|
||||||
|
}
|
||||||
|
|
||||||
|
return r.parsed.data?.map((page: any) => {
|
||||||
|
return {
|
||||||
|
title: page.title,
|
||||||
|
link: page.url,
|
||||||
|
content: page.content,
|
||||||
|
snippet: '',
|
||||||
|
publishedTime: page.publish_time,
|
||||||
|
date: page.publish_time_str,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
this.logger.error(`Wechat search failed: ${err?.message}`, { err: marshalErrorLike(err) });
|
||||||
|
throw new DownstreamServiceFailureError({ message: `Search(wechat) failed` });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1 +1 @@
|
|||||||
Subproject commit 00717c3b08eeaf425d3a0d4880b05714051c01bd
|
Subproject commit ba27f5425a0fce93bac00690a867dc97a19dccf3
|
Loading…
x
Reference in New Issue
Block a user