fix: fallback logic (#1182)

* fix: fallback logic

* fix: fallback logic

* fix: fallback logic

* chore: cleanup

* chore: cleanup

* chore: adjust fallback logic

* chore: adjust fallback logic

* chore: cleanup

* chore: cleanup

* tweak: fallback mech

---------

Co-authored-by: Yanlong Wang <yanlong.wang@naiver.org>
This commit is contained in:
Aaron Ji 2025-04-03 16:11:24 +08:00 committed by GitHub
parent 2c0d0209f0
commit 60e67dbafa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 88 additions and 62 deletions

View File

@ -286,30 +286,13 @@ export class SearcherHost extends RPCHost {
page, page,
}; };
const { response: r, query: successQuery, tryTimes } = await this.searchWithFallback( const { results, query: successQuery, tryTimes } = await this.searchWithFallback(
searchParams, fallback, crawlerOptions.noCache searchParams, fallback, crawlerOptions.noCache
); );
chargeAmountScaler *= tryTimes; chargeAmountScaler *= tryTimes;
fallbackQuery = successQuery !== searchQuery ? successQuery : undefined; fallbackQuery = successQuery !== searchQuery ? successQuery : undefined;
let results;
switch (variant) {
case 'images': {
results = (r as SerperImageSearchResponse).images;
break;
}
case 'news': {
results = (r as SerperNewsSearchResponse).news;
break;
}
case 'web':
default: {
results = (r as SerperWebSearchResponse).organic;
break;
}
}
if (!results.length) { if (!results.length) {
throw new AssertionFailureError(`No search results available for query ${searchQuery}`); throw new AssertionFailureError(`No search results available for query ${searchQuery}`);
} }
@ -515,52 +498,72 @@ export class SearcherHost extends RPCHost {
params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; }, params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
useFallback: boolean = false, useFallback: boolean = false,
noCache: boolean = false noCache: boolean = false
): Promise<{ response: SerperSearchResponse; query: string; tryTimes: number }> { ) {
// Try original query first // Try original query first
const originalQuery = params.q; const originalQuery = params.q;
const response = await this.cachedSearch(params, noCache); const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(originalQuery);
// Extract results based on variant // Extract results based on variant
let results: any[] = [];
let tryTimes = 1; let tryTimes = 1;
const results = await this.doSearch(params, noCache);
if (results.length && !useFallback) {
return { results, query: params.q, tryTimes };
}
let queryTerms = originalQuery.split(/\s+/);
const lastResort = containsRTL ? queryTerms.slice(queryTerms.length - 2) : queryTerms.slice(0, 2);
this.logger.info(`No results for "${originalQuery}", trying fallback queries`);
let terms: string[] = [];
// fallback n times
const n = 4;
while (tryTimes <= n) {
const delta = Math.ceil(queryTerms.length / n) * tryTimes;
terms = containsRTL ? queryTerms.slice(0, queryTerms.length - delta) : queryTerms.slice(delta);
const query = terms.join(' ');
if (!query) {
break;
}
tryTimes += 1;
this.logger.info(`Retrying search with fallback query: "${query}"`);
const fallbackParams = { ...params, q: query };
const fallbackResults = await this.doSearch(fallbackParams, noCache);
if (fallbackResults.length > 0) {
return { results: fallbackResults, query: fallbackParams.q, tryTimes };
}
}
if (terms.length > lastResort.length) {
const query = lastResort.join(' ');
this.logger.info(`Retrying search with fallback query: "${query}"`);
const fallbackParams = { ...params, q: query };
tryTimes += 1;
const fallbackResults = await this.doSearch(fallbackParams, noCache);
if (fallbackResults.length > 0) {
return { results: fallbackResults, query, tryTimes };
}
}
return { results, query: originalQuery, tryTimes };
}
async doSearch(
params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
noCache: boolean = false,
) {
const response = await this.cachedSearch(params, noCache);
let results = [];
switch (params.variant) { switch (params.variant) {
case 'images': results = (response as SerperImageSearchResponse).images; break; case 'images': results = (response as SerperImageSearchResponse).images; break;
case 'news': results = (response as SerperNewsSearchResponse).news; break; case 'news': results = (response as SerperNewsSearchResponse).news; break;
case 'web': default: results = (response as SerperWebSearchResponse).organic; break; case 'web': default: results = (response as SerperWebSearchResponse).organic; break;
} }
// Return early if we got results or fallback is disabled return results;
if (results.length > 0 || !useFallback) {
return { response, query: originalQuery, tryTimes };
}
// Try with progressively shorter queries
const terms = originalQuery.trim().split(/\s+/);
this.logger.info(`No results for "${originalQuery}", trying fallback queries`);
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(originalQuery);
while (terms.length > 1) {
containsRTL ? terms.shift() : terms.pop(); // Remove last term
const shortenedQuery = terms.join(' ');
const fallbackParams = { ...params, q: shortenedQuery };
const fallbackResponse = await this.cachedSearch(fallbackParams, noCache);
let fallbackResults: any[] = [];
switch (params.variant) {
case 'images': fallbackResults = (fallbackResponse as SerperImageSearchResponse).images; break;
case 'news': fallbackResults = (fallbackResponse as SerperNewsSearchResponse).news; break;
case 'web': default: fallbackResults = (fallbackResponse as SerperWebSearchResponse).organic; break;
}
tryTimes++;
if (fallbackResults.length > 0) {
return { response: fallbackResponse, query: shortenedQuery, tryTimes };
}
}
return { response, query: originalQuery, tryTimes };
} }
async *fetchSearchResults( async *fetchSearchResults(

View File

@ -275,6 +275,8 @@ export class SerpHost extends RPCHost {
} }
let realQuery = q; let realQuery = q;
let queryTerms = q.split(/\s+/g).filter((x) => !!x);
let results = await this.cachedSearch(variant, { let results = await this.cachedSearch(variant, {
provider: searchEngine, provider: searchEngine,
q, q,
@ -289,14 +291,21 @@ export class SerpHost extends RPCHost {
if (fallback && !results?.length && (!page || page === 1)) { if (fallback && !results?.length && (!page || page === 1)) {
let tryTimes = 1; let tryTimes = 1;
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(q); const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(q);
let terms = q.split(/\s+/g).filter((x) => !!x); const lastResort = (containsRTL ? queryTerms.slice(queryTerms.length - 2) : queryTerms.slice(0, 2)).join(' ');
terms = containsRTL ? terms.slice(10) : terms.slice(0, 10); // don't try to fallback on more than 10 terms const n = 4;
while (terms.length > 1) { let terms: string[] = [];
containsRTL ? terms.shift() : terms.pop(); // reduce the query by one term at a time while (tryTimes <= n) {
realQuery = terms.join(' ').trim(); const delta = Math.ceil(queryTerms.length / n) * tryTimes;
if (!realQuery) { terms = containsRTL ? queryTerms.slice(0, queryTerms.length - delta) : queryTerms.slice(delta);
const query = terms.join(' ');
if (!query) {
break; break;
} }
if (realQuery === query) {
continue;
}
tryTimes += 1;
realQuery = query;
this.logger.info(`Retrying search with fallback query: "${realQuery}"`); this.logger.info(`Retrying search with fallback query: "${realQuery}"`);
results = await this.cachedSearch(variant, { results = await this.cachedSearch(variant, {
provider: searchEngine, provider: searchEngine,
@ -306,11 +315,25 @@ export class SerpHost extends RPCHost {
hl, hl,
location, location,
}, crawlerOptions); }, crawlerOptions);
tryTimes += 1;
if (results?.length) { if (results?.length) {
break; break;
} }
} }
if (!results?.length && realQuery.length > lastResort.length) {
realQuery = lastResort;
this.logger.info(`Retrying search with fallback query: "${realQuery}"`);
tryTimes += 1;
results = await this.cachedSearch(variant, {
provider: searchEngine,
q: realQuery,
num,
gl,
hl,
location,
}, crawlerOptions);
}
chargeAmountScaler *= tryTimes; chargeAmountScaler *= tryTimes;
} }