mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 18:55:55 +08:00
chore: created WechatSearchService
This commit is contained in:
parent
9d51458350
commit
79b8bde9cc
@ -22,6 +22,7 @@ import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
||||
import { InsufficientBalanceError } from '../services/errors';
|
||||
import { SerperImageSearchResponse, SerperNewsSearchResponse, SerperSearchQueryParams, SerperSearchResponse, SerperWebSearchResponse, WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search';
|
||||
import { toAsyncGenerator } from '../utils/misc';
|
||||
import { WechatSearchService } from '../services/wechat-search';
|
||||
|
||||
const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES).map((x) => x.toLowerCase());
|
||||
|
||||
@ -49,6 +50,7 @@ export class SearcherHost extends RPCHost {
|
||||
protected serperSearchService: SerperSearchService,
|
||||
protected crawler: CrawlerHost,
|
||||
protected snapshotFormatter: SnapshotFormatter,
|
||||
protected wechatSearchService: WechatSearchService,
|
||||
) {
|
||||
super(...arguments);
|
||||
}
|
||||
@ -88,10 +90,10 @@ export class SearcherHost extends RPCHost {
|
||||
searchExplicitOperators: GoogleSearchExplicitOperatorsDto,
|
||||
@Param('count', { validate: (v: number) => v >= 0 && v <= 20 })
|
||||
count: number,
|
||||
@Param('type', { type: new Set(['web', 'images', 'news']), default: 'web' })
|
||||
variant: 'web' | 'images' | 'news',
|
||||
@Param('provider', { type: new Set(['google', 'bing', 'wechat']), default: 'google' })
|
||||
searchEngine: 'google' | 'bing' | 'wechat',
|
||||
@Param('type', { type: new Set(['web', 'images', 'news', 'wechat']), default: 'web' })
|
||||
variant: 'web' | 'images' | 'news' | 'wechat',
|
||||
@Param('provider', { type: new Set(['google', 'bing']), default: 'google' })
|
||||
searchEngine: 'google' | 'bing',
|
||||
@Param('num', { validate: (v: number) => v >= 0 && v <= 20 })
|
||||
num?: number,
|
||||
@Param('gl', { validate: (v: string) => WORLD_COUNTRY_CODES.includes(v?.toLowerCase()) }) gl?: string,
|
||||
@ -171,9 +173,6 @@ export class SearcherHost extends RPCHost {
|
||||
}
|
||||
|
||||
let chargeAmountScaler = 1;
|
||||
if (searchEngine === 'wechat') {
|
||||
this.threadLocal.set('wechat-preferred', true);
|
||||
}
|
||||
if (searchEngine === 'bing') {
|
||||
this.threadLocal.set('bing-preferred', true);
|
||||
chargeAmountScaler = 3;
|
||||
@ -203,6 +202,10 @@ export class SearcherHost extends RPCHost {
|
||||
results = (r as SerperNewsSearchResponse).news;
|
||||
break;
|
||||
}
|
||||
case 'wechat': {
|
||||
results = (r as unknown as SerperWebSearchResponse['organic']);
|
||||
break;
|
||||
}
|
||||
case 'web':
|
||||
default: {
|
||||
results = (r as SerperWebSearchResponse).organic;
|
||||
@ -510,7 +513,7 @@ export class SearcherHost extends RPCHost {
|
||||
}
|
||||
}
|
||||
|
||||
async cachedSearch(query: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; }, noCache: boolean = false) {
|
||||
async cachedSearch(query: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news' | 'wechat'; provider?: string; }, noCache: boolean = false) {
|
||||
const queryDigest = objHashMd5B64Of(query);
|
||||
Reflect.deleteProperty(query, 'provider');
|
||||
let cache;
|
||||
@ -546,6 +549,13 @@ export class SearcherHost extends RPCHost {
|
||||
r = await this.serperSearchService.newsSearch(query);
|
||||
break;
|
||||
}
|
||||
case 'wechat':{
|
||||
r = await this.wechatSearchService.search({
|
||||
kw: query.q,
|
||||
page: query.page,
|
||||
});
|
||||
break;
|
||||
}
|
||||
case 'web':
|
||||
default: {
|
||||
r = await this.serperSearchService.webSearch(query);
|
||||
|
@ -7,7 +7,6 @@ import { SerperBingHTTP, SerperGoogleHTTP, SerperImageSearchResponse, SerperNews
|
||||
import { BlackHoleDetector } from './blackhole-detector';
|
||||
import { Context } from './registry';
|
||||
import { ServiceBadAttemptError } from '../shared';
|
||||
import { WechatSearchHTTP } from '../shared/3rd-party/wechat-search';
|
||||
|
||||
@singleton()
|
||||
export class SerperSearchService extends AsyncService {
|
||||
@ -16,7 +15,6 @@ export class SerperSearchService extends AsyncService {
|
||||
|
||||
serperGoogleSearchHTTP!: SerperGoogleHTTP;
|
||||
serperBingSearchHTTP!: SerperBingHTTP;
|
||||
wechatSearchHTTP!: WechatSearchHTTP;
|
||||
|
||||
constructor(
|
||||
protected globalLogger: GlobalLogger,
|
||||
@ -33,18 +31,13 @@ export class SerperSearchService extends AsyncService {
|
||||
|
||||
this.serperGoogleSearchHTTP = new SerperGoogleHTTP(this.secretExposer.SERPER_SEARCH_API_KEY);
|
||||
this.serperBingSearchHTTP = new SerperBingHTTP(this.secretExposer.SERPER_SEARCH_API_KEY);
|
||||
this.wechatSearchHTTP = new WechatSearchHTTP(this.secretExposer.WECHAT_SEARCH_API_KEY);
|
||||
}
|
||||
|
||||
*iterClient() {
|
||||
const preferBingSearch = this.threadLocal.get('bing-preferred');
|
||||
const preferWechatSearch = this.threadLocal.get('wechat-preferred');
|
||||
if (preferBingSearch) {
|
||||
yield this.serperBingSearchHTTP;
|
||||
}
|
||||
if (preferWechatSearch) {
|
||||
yield this.wechatSearchHTTP;
|
||||
}
|
||||
while (true) {
|
||||
yield this.serperGoogleSearchHTTP;
|
||||
}
|
||||
@ -66,17 +59,6 @@ export class SerperSearchService extends AsyncService {
|
||||
try {
|
||||
this.logger.debug(`Doing external search`, query);
|
||||
let r;
|
||||
|
||||
if (client instanceof WechatSearchHTTP) {
|
||||
r = await client.blogSearch({
|
||||
kw: query.q,
|
||||
page: query.page
|
||||
});
|
||||
|
||||
this.blackHoleDetector.itWorked();
|
||||
|
||||
return r.parsed;
|
||||
}
|
||||
switch (variant) {
|
||||
case 'images': {
|
||||
r = await client.imageSearch(query);
|
||||
|
56
src/services/wechat-search.ts
Normal file
56
src/services/wechat-search.ts
Normal file
@ -0,0 +1,56 @@
|
||||
import { singleton } from 'tsyringe';
|
||||
import { GlobalLogger } from './logger';
|
||||
import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit';
|
||||
import { SecretExposer } from '../shared/services/secrets';
|
||||
import { WechatBlogQueryParams, WechatSearchHTTP } from '../shared/3rd-party/wechat-search';
|
||||
|
||||
|
||||
@singleton()
|
||||
export class WechatSearchService extends AsyncService {
|
||||
|
||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||
|
||||
wechatSearchHTTP!: WechatSearchHTTP;
|
||||
|
||||
constructor(
|
||||
protected globalLogger: GlobalLogger,
|
||||
protected secretExposer: SecretExposer,
|
||||
) {
|
||||
super(...arguments);
|
||||
}
|
||||
|
||||
override async init() {
|
||||
await this.dependencyReady();
|
||||
this.emit('ready');
|
||||
|
||||
this.wechatSearchHTTP = new WechatSearchHTTP(this.secretExposer.WECHAT_SEARCH_API_KEY);
|
||||
}
|
||||
|
||||
async search(query: WechatBlogQueryParams) {
|
||||
this.logger.info('searching for official account blogs', query);
|
||||
|
||||
try {
|
||||
// get wechat blog search results and convert format
|
||||
const r = await this.wechatSearchHTTP.blogSearch(query);
|
||||
|
||||
if (r.parsed.code > 100 && r.parsed.code < 200) {
|
||||
throw new DownstreamServiceFailureError({ message: `Search(wechat) failed` });
|
||||
}
|
||||
|
||||
return r.parsed.data?.map((page: any) => {
|
||||
return {
|
||||
title: page.title,
|
||||
link: page.url,
|
||||
content: page.content,
|
||||
snippet: '',
|
||||
publishedTime: page.publish_time,
|
||||
date: page.publish_time_str,
|
||||
};
|
||||
});
|
||||
|
||||
} catch (err: any) {
|
||||
this.logger.error(`Wechat search failed: ${err?.message}`, { err: marshalErrorLike(err) });
|
||||
throw new DownstreamServiceFailureError({ message: `Search(wechat) failed` });
|
||||
}
|
||||
}
|
||||
}
|
@ -1 +1 @@
|
||||
Subproject commit 00717c3b08eeaf425d3a0d4880b05714051c01bd
|
||||
Subproject commit ba27f5425a0fce93bac00690a867dc97a19dccf3
|
Loading…
x
Reference in New Issue
Block a user