fix(adaptive): url hash

This commit is contained in:
Zhaofeng Miao 2024-09-23 16:21:46 +08:00
parent 39e49cac63
commit 3f88f8d2f7

View File

@ -21,9 +21,13 @@ import { Timestamp } from 'firebase-admin/firestore';
const md5Hasher = new HashManager('md5', 'hex'); const md5Hasher = new HashManager('md5', 'hex');
const removeURLHash = (url: string) => { const removeURLHash = (url: string) => {
const o = new URL(url); try {
o.hash = ''; const o = new URL(url);
return o.toString(); o.hash = '';
return o.toString();
} catch (e) {
return url;
}
} }
@singleton() @singleton()
@ -440,7 +444,7 @@ export class AdaptiveCrawlerHost extends RPCHost {
}[]; }[];
}; };
return json.results.filter(r => r.relevance_score > 0.3).map(r => r.document.text); return json.results.filter(r => r.relevance_score > 0.3).map(r => removeURLHash(r.document.text));
} }
getIndex(user?: JinaEmbeddingsTokenAccount) { getIndex(user?: JinaEmbeddingsTokenAccount) {