fix: fallback logic (#1182)

* fix: fallback logic

* fix: fallback logic

* fix: fallback logic

* chore: cleanup

* chore: cleanup

* chore: adjust fallback logic

* chore: adjust fallback logic

* chore: cleanup

* chore: cleanup

* tweak: fallback mech

---------

Co-authored-by: Yanlong Wang <yanlong.wang@naiver.org>
This commit is contained in:
Aaron Ji 2025-04-03 16:11:24 +08:00 committed by GitHub
parent 2c0d0209f0
commit 60e67dbafa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 88 additions and 62 deletions

View File

@ -286,30 +286,13 @@ export class SearcherHost extends RPCHost {
page,
};
const { response: r, query: successQuery, tryTimes } = await this.searchWithFallback(
const { results, query: successQuery, tryTimes } = await this.searchWithFallback(
searchParams, fallback, crawlerOptions.noCache
);
chargeAmountScaler *= tryTimes;
fallbackQuery = successQuery !== searchQuery ? successQuery : undefined;
let results;
switch (variant) {
case 'images': {
results = (r as SerperImageSearchResponse).images;
break;
}
case 'news': {
results = (r as SerperNewsSearchResponse).news;
break;
}
case 'web':
default: {
results = (r as SerperWebSearchResponse).organic;
break;
}
}
if (!results.length) {
throw new AssertionFailureError(`No search results available for query ${searchQuery}`);
}
@ -323,7 +306,7 @@ export class SearcherHost extends RPCHost {
const targetResultCount = crawlWithoutContent ? count : count + 2;
const trimmedResults = results.filter((x) => Boolean(x.link)).slice(0, targetResultCount).map((x) => this.mapToFinalResults(x));
trimmedResults.toString = function () {
let r = this.map((x, i) => x ? Reflect.apply(x.toString, x, [i]) : '').join('\n\n').trimEnd() + '\n';
let r = this.map((x, i) => x ? Reflect.apply(x.toString, x, [i]) : '').join('\n\n').trimEnd() + '\n';
if (fallbackQuery) {
r = `Fallback query: ${fallbackQuery}\n\n${r}`;
}
@ -515,52 +498,72 @@ export class SearcherHost extends RPCHost {
params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
useFallback: boolean = false,
noCache: boolean = false
): Promise<{ response: SerperSearchResponse; query: string; tryTimes: number }> {
) {
// Try original query first
const originalQuery = params.q;
const response = await this.cachedSearch(params, noCache);
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(originalQuery);
// Extract results based on variant
let results: any[] = [];
let tryTimes = 1;
const results = await this.doSearch(params, noCache);
if (results.length && !useFallback) {
return { results, query: params.q, tryTimes };
}
let queryTerms = originalQuery.split(/\s+/);
const lastResort = containsRTL ? queryTerms.slice(queryTerms.length - 2) : queryTerms.slice(0, 2);
this.logger.info(`No results for "${originalQuery}", trying fallback queries`);
let terms: string[] = [];
// fallback n times
const n = 4;
while (tryTimes <= n) {
const delta = Math.ceil(queryTerms.length / n) * tryTimes;
terms = containsRTL ? queryTerms.slice(0, queryTerms.length - delta) : queryTerms.slice(delta);
const query = terms.join(' ');
if (!query) {
break;
}
tryTimes += 1;
this.logger.info(`Retrying search with fallback query: "${query}"`);
const fallbackParams = { ...params, q: query };
const fallbackResults = await this.doSearch(fallbackParams, noCache);
if (fallbackResults.length > 0) {
return { results: fallbackResults, query: fallbackParams.q, tryTimes };
}
}
if (terms.length > lastResort.length) {
const query = lastResort.join(' ');
this.logger.info(`Retrying search with fallback query: "${query}"`);
const fallbackParams = { ...params, q: query };
tryTimes += 1;
const fallbackResults = await this.doSearch(fallbackParams, noCache);
if (fallbackResults.length > 0) {
return { results: fallbackResults, query, tryTimes };
}
}
return { results, query: originalQuery, tryTimes };
}
async doSearch(
params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
noCache: boolean = false,
) {
const response = await this.cachedSearch(params, noCache);
let results = [];
switch (params.variant) {
case 'images': results = (response as SerperImageSearchResponse).images; break;
case 'news': results = (response as SerperNewsSearchResponse).news; break;
case 'web': default: results = (response as SerperWebSearchResponse).organic; break;
}
// Return early if we got results or fallback is disabled
if (results.length > 0 || !useFallback) {
return { response, query: originalQuery, tryTimes };
}
// Try with progressively shorter queries
const terms = originalQuery.trim().split(/\s+/);
this.logger.info(`No results for "${originalQuery}", trying fallback queries`);
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(originalQuery);
while (terms.length > 1) {
containsRTL ? terms.shift() : terms.pop(); // Remove last term
const shortenedQuery = terms.join(' ');
const fallbackParams = { ...params, q: shortenedQuery };
const fallbackResponse = await this.cachedSearch(fallbackParams, noCache);
let fallbackResults: any[] = [];
switch (params.variant) {
case 'images': fallbackResults = (fallbackResponse as SerperImageSearchResponse).images; break;
case 'news': fallbackResults = (fallbackResponse as SerperNewsSearchResponse).news; break;
case 'web': default: fallbackResults = (fallbackResponse as SerperWebSearchResponse).organic; break;
}
tryTimes++;
if (fallbackResults.length > 0) {
return { response: fallbackResponse, query: shortenedQuery, tryTimes };
}
}
return { response, query: originalQuery, tryTimes };
return results;
}
async *fetchSearchResults(
@ -651,7 +654,7 @@ export class SearcherHost extends RPCHost {
metadata.fallback = fallbackQuery;
}
assignMeta(formatted, metadata);
assignMeta(formatted, metadata);
return final;
}

View File

@ -275,6 +275,8 @@ export class SerpHost extends RPCHost {
}
let realQuery = q;
let queryTerms = q.split(/\s+/g).filter((x) => !!x);
let results = await this.cachedSearch(variant, {
provider: searchEngine,
q,
@ -289,14 +291,21 @@ export class SerpHost extends RPCHost {
if (fallback && !results?.length && (!page || page === 1)) {
let tryTimes = 1;
const containsRTL = /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\u0590-\u05FF\uFB1D-\uFB4F\u0700-\u074F\u0780-\u07BF\u07C0-\u07FF]/.test(q);
let terms = q.split(/\s+/g).filter((x) => !!x);
terms = containsRTL ? terms.slice(10) : terms.slice(0, 10); // don't try to fallback on more than 10 terms
while (terms.length > 1) {
containsRTL ? terms.shift() : terms.pop(); // reduce the query by one term at a time
realQuery = terms.join(' ').trim();
if (!realQuery) {
const lastResort = (containsRTL ? queryTerms.slice(queryTerms.length - 2) : queryTerms.slice(0, 2)).join(' ');
const n = 4;
let terms: string[] = [];
while (tryTimes <= n) {
const delta = Math.ceil(queryTerms.length / n) * tryTimes;
terms = containsRTL ? queryTerms.slice(0, queryTerms.length - delta) : queryTerms.slice(delta);
const query = terms.join(' ');
if (!query) {
break;
}
if (realQuery === query) {
continue;
}
tryTimes += 1;
realQuery = query;
this.logger.info(`Retrying search with fallback query: "${realQuery}"`);
results = await this.cachedSearch(variant, {
provider: searchEngine,
@ -306,11 +315,25 @@ export class SerpHost extends RPCHost {
hl,
location,
}, crawlerOptions);
tryTimes += 1;
if (results?.length) {
break;
}
}
if (!results?.length && realQuery.length > lastResort.length) {
realQuery = lastResort;
this.logger.info(`Retrying search with fallback query: "${realQuery}"`);
tryTimes += 1;
results = await this.cachedSearch(variant, {
provider: searchEngine,
q: realQuery,
num,
gl,
hl,
location,
}, crawlerOptions);
}
chargeAmountScaler *= tryTimes;
}