From 8008e53d5720f24552ec50f2adc145c0dcef3c90 Mon Sep 17 00:00:00 2001 From: Zhaofeng Miao <522856232@qq.com> Date: Wed, 25 Sep 2024 14:18:28 +0800 Subject: [PATCH] feat(adaptive-crawl): disable invalid link --- .../src/cloud-functions/adaptive-crawler.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/backend/functions/src/cloud-functions/adaptive-crawler.ts b/backend/functions/src/cloud-functions/adaptive-crawler.ts index 30cab87..ebf11fe 100644 --- a/backend/functions/src/cloud-functions/adaptive-crawler.ts +++ b/backend/functions/src/cloud-functions/adaptive-crawler.ts @@ -418,11 +418,22 @@ export class AdaptiveCrawlerHost extends RPCHost { query: string; links: Record; }) { + const invalidSuffix = [ + '.zip', + '.docx', + '.pptx', + '.xlsx', + ]; + + const validLinks = Object.entries(links) + .map(([title, link]) => link) + .filter(link => link.startsWith('http') && !invalidSuffix.some(suffix => link.endsWith(suffix))); + const data = { model: 'jina-reranker-v2-base-multilingual', query, top_n: 15, - documents: Object.entries(links).map(([title, link]) => link) + documents: validLinks, }; const response = await fetch('https://api.jina.ai/v1/rerank', {