mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 04:05:59 +08:00
fix(adaptive): url hash
This commit is contained in:
parent
39e49cac63
commit
3f88f8d2f7
@ -21,9 +21,13 @@ import { Timestamp } from 'firebase-admin/firestore';
|
|||||||
|
|
||||||
const md5Hasher = new HashManager('md5', 'hex');
|
const md5Hasher = new HashManager('md5', 'hex');
|
||||||
const removeURLHash = (url: string) => {
|
const removeURLHash = (url: string) => {
|
||||||
|
try {
|
||||||
const o = new URL(url);
|
const o = new URL(url);
|
||||||
o.hash = '';
|
o.hash = '';
|
||||||
return o.toString();
|
return o.toString();
|
||||||
|
} catch (e) {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@singleton()
|
@singleton()
|
||||||
@ -440,7 +444,7 @@ export class AdaptiveCrawlerHost extends RPCHost {
|
|||||||
}[];
|
}[];
|
||||||
};
|
};
|
||||||
|
|
||||||
return json.results.filter(r => r.relevance_score > 0.3).map(r => r.document.text);
|
return json.results.filter(r => r.relevance_score > 0.3).map(r => removeURLHash(r.document.text));
|
||||||
}
|
}
|
||||||
|
|
||||||
getIndex(user?: JinaEmbeddingsTokenAccount) {
|
getIndex(user?: JinaEmbeddingsTokenAccount) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user