feat(adaptive-crawl): disable invalid link

This commit is contained in:
Zhaofeng Miao 2024-09-25 14:18:28 +08:00
parent 3f88f8d2f7
commit 8008e53d57

View File

@ -418,11 +418,22 @@ export class AdaptiveCrawlerHost extends RPCHost {
query: string; query: string;
links: Record<string, string>; links: Record<string, string>;
}) { }) {
const invalidSuffix = [
'.zip',
'.docx',
'.pptx',
'.xlsx',
];
const validLinks = Object.entries(links)
.map(([title, link]) => link)
.filter(link => link.startsWith('http') && !invalidSuffix.some(suffix => link.endsWith(suffix)));
const data = { const data = {
model: 'jina-reranker-v2-base-multilingual', model: 'jina-reranker-v2-base-multilingual',
query, query,
top_n: 15, top_n: 15,
documents: Object.entries(links).map(([title, link]) => link) documents: validLinks,
}; };
const response = await fetch('https://api.jina.ai/v1/rerank', { const response = await fetch('https://api.jina.ai/v1/rerank', {