From 79b8bde9cce139ac92310fe707da850d5cbac664 Mon Sep 17 00:00:00 2001 From: Aaron Ji Date: Thu, 27 Mar 2025 16:40:11 +0800 Subject: [PATCH] chore: created WechatSearchService --- src/api/searcher-serper.ts | 26 +++++++++++----- src/services/serper-search.ts | 18 ----------- src/services/wechat-search.ts | 56 +++++++++++++++++++++++++++++++++++ thinapps-shared | 2 +- 4 files changed, 75 insertions(+), 27 deletions(-) create mode 100644 src/services/wechat-search.ts diff --git a/src/api/searcher-serper.ts b/src/api/searcher-serper.ts index c73da84..142fd40 100644 --- a/src/api/searcher-serper.ts +++ b/src/api/searcher-serper.ts @@ -22,6 +22,7 @@ import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth'; import { InsufficientBalanceError } from '../services/errors'; import { SerperImageSearchResponse, SerperNewsSearchResponse, SerperSearchQueryParams, SerperSearchResponse, SerperWebSearchResponse, WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search'; import { toAsyncGenerator } from '../utils/misc'; +import { WechatSearchService } from '../services/wechat-search'; const WORLD_COUNTRY_CODES = Object.keys(WORLD_COUNTRIES).map((x) => x.toLowerCase()); @@ -49,6 +50,7 @@ export class SearcherHost extends RPCHost { protected serperSearchService: SerperSearchService, protected crawler: CrawlerHost, protected snapshotFormatter: SnapshotFormatter, + protected wechatSearchService: WechatSearchService, ) { super(...arguments); } @@ -88,10 +90,10 @@ export class SearcherHost extends RPCHost { searchExplicitOperators: GoogleSearchExplicitOperatorsDto, @Param('count', { validate: (v: number) => v >= 0 && v <= 20 }) count: number, - @Param('type', { type: new Set(['web', 'images', 'news']), default: 'web' }) - variant: 'web' | 'images' | 'news', - @Param('provider', { type: new Set(['google', 'bing', 'wechat']), default: 'google' }) - searchEngine: 'google' | 'bing' | 'wechat', + @Param('type', { type: new Set(['web', 'images', 'news', 'wechat']), default: 'web' }) + variant: 'web' | 'images' | 'news' | 'wechat', + @Param('provider', { type: new Set(['google', 'bing']), default: 'google' }) + searchEngine: 'google' | 'bing', @Param('num', { validate: (v: number) => v >= 0 && v <= 20 }) num?: number, @Param('gl', { validate: (v: string) => WORLD_COUNTRY_CODES.includes(v?.toLowerCase()) }) gl?: string, @@ -171,9 +173,6 @@ export class SearcherHost extends RPCHost { } let chargeAmountScaler = 1; - if (searchEngine === 'wechat') { - this.threadLocal.set('wechat-preferred', true); - } if (searchEngine === 'bing') { this.threadLocal.set('bing-preferred', true); chargeAmountScaler = 3; @@ -203,6 +202,10 @@ export class SearcherHost extends RPCHost { results = (r as SerperNewsSearchResponse).news; break; } + case 'wechat': { + results = (r as unknown as SerperWebSearchResponse['organic']); + break; + } case 'web': default: { results = (r as SerperWebSearchResponse).organic; @@ -510,7 +513,7 @@ export class SearcherHost extends RPCHost { } } - async cachedSearch(query: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; }, noCache: boolean = false) { + async cachedSearch(query: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news' | 'wechat'; provider?: string; }, noCache: boolean = false) { const queryDigest = objHashMd5B64Of(query); Reflect.deleteProperty(query, 'provider'); let cache; @@ -546,6 +549,13 @@ export class SearcherHost extends RPCHost { r = await this.serperSearchService.newsSearch(query); break; } + case 'wechat':{ + r = await this.wechatSearchService.search({ + kw: query.q, + page: query.page, + }); + break; + } case 'web': default: { r = await this.serperSearchService.webSearch(query); diff --git a/src/services/serper-search.ts b/src/services/serper-search.ts index a1408e8..4a6c41c 100644 --- a/src/services/serper-search.ts +++ b/src/services/serper-search.ts @@ -7,7 +7,6 @@ import { SerperBingHTTP, SerperGoogleHTTP, SerperImageSearchResponse, SerperNews import { BlackHoleDetector } from './blackhole-detector'; import { Context } from './registry'; import { ServiceBadAttemptError } from '../shared'; -import { WechatSearchHTTP } from '../shared/3rd-party/wechat-search'; @singleton() export class SerperSearchService extends AsyncService { @@ -16,7 +15,6 @@ export class SerperSearchService extends AsyncService { serperGoogleSearchHTTP!: SerperGoogleHTTP; serperBingSearchHTTP!: SerperBingHTTP; - wechatSearchHTTP!: WechatSearchHTTP; constructor( protected globalLogger: GlobalLogger, @@ -33,18 +31,13 @@ export class SerperSearchService extends AsyncService { this.serperGoogleSearchHTTP = new SerperGoogleHTTP(this.secretExposer.SERPER_SEARCH_API_KEY); this.serperBingSearchHTTP = new SerperBingHTTP(this.secretExposer.SERPER_SEARCH_API_KEY); - this.wechatSearchHTTP = new WechatSearchHTTP(this.secretExposer.WECHAT_SEARCH_API_KEY); } *iterClient() { const preferBingSearch = this.threadLocal.get('bing-preferred'); - const preferWechatSearch = this.threadLocal.get('wechat-preferred'); if (preferBingSearch) { yield this.serperBingSearchHTTP; } - if (preferWechatSearch) { - yield this.wechatSearchHTTP; - } while (true) { yield this.serperGoogleSearchHTTP; } @@ -66,17 +59,6 @@ export class SerperSearchService extends AsyncService { try { this.logger.debug(`Doing external search`, query); let r; - - if (client instanceof WechatSearchHTTP) { - r = await client.blogSearch({ - kw: query.q, - page: query.page - }); - - this.blackHoleDetector.itWorked(); - - return r.parsed; - } switch (variant) { case 'images': { r = await client.imageSearch(query); diff --git a/src/services/wechat-search.ts b/src/services/wechat-search.ts new file mode 100644 index 0000000..a1b05d2 --- /dev/null +++ b/src/services/wechat-search.ts @@ -0,0 +1,56 @@ +import { singleton } from 'tsyringe'; +import { GlobalLogger } from './logger'; +import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit'; +import { SecretExposer } from '../shared/services/secrets'; +import { WechatBlogQueryParams, WechatSearchHTTP } from '../shared/3rd-party/wechat-search'; + + +@singleton() +export class WechatSearchService extends AsyncService { + + logger = this.globalLogger.child({ service: this.constructor.name }); + + wechatSearchHTTP!: WechatSearchHTTP; + + constructor( + protected globalLogger: GlobalLogger, + protected secretExposer: SecretExposer, + ) { + super(...arguments); + } + + override async init() { + await this.dependencyReady(); + this.emit('ready'); + + this.wechatSearchHTTP = new WechatSearchHTTP(this.secretExposer.WECHAT_SEARCH_API_KEY); + } + + async search(query: WechatBlogQueryParams) { + this.logger.info('searching for official account blogs', query); + + try { + // get wechat blog search results and convert format + const r = await this.wechatSearchHTTP.blogSearch(query); + + if (r.parsed.code > 100 && r.parsed.code < 200) { + throw new DownstreamServiceFailureError({ message: `Search(wechat) failed` }); + } + + return r.parsed.data?.map((page: any) => { + return { + title: page.title, + link: page.url, + content: page.content, + snippet: '', + publishedTime: page.publish_time, + date: page.publish_time_str, + }; + }); + + } catch (err: any) { + this.logger.error(`Wechat search failed: ${err?.message}`, { err: marshalErrorLike(err) }); + throw new DownstreamServiceFailureError({ message: `Search(wechat) failed` }); + } + } +} \ No newline at end of file diff --git a/thinapps-shared b/thinapps-shared index 00717c3..ba27f54 160000 --- a/thinapps-shared +++ b/thinapps-shared @@ -1 +1 @@ -Subproject commit 00717c3b08eeaf425d3a0d4880b05714051c01bd +Subproject commit ba27f5425a0fce93bac00690a867dc97a19dccf3