mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-19 01:55:59 +08:00
feat(adaptive-crawl): disable invalid link
This commit is contained in:
parent
3f88f8d2f7
commit
8008e53d57
@ -418,11 +418,22 @@ export class AdaptiveCrawlerHost extends RPCHost {
|
|||||||
query: string;
|
query: string;
|
||||||
links: Record<string, string>;
|
links: Record<string, string>;
|
||||||
}) {
|
}) {
|
||||||
|
const invalidSuffix = [
|
||||||
|
'.zip',
|
||||||
|
'.docx',
|
||||||
|
'.pptx',
|
||||||
|
'.xlsx',
|
||||||
|
];
|
||||||
|
|
||||||
|
const validLinks = Object.entries(links)
|
||||||
|
.map(([title, link]) => link)
|
||||||
|
.filter(link => link.startsWith('http') && !invalidSuffix.some(suffix => link.endsWith(suffix)));
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
model: 'jina-reranker-v2-base-multilingual',
|
model: 'jina-reranker-v2-base-multilingual',
|
||||||
query,
|
query,
|
||||||
top_n: 15,
|
top_n: 15,
|
||||||
documents: Object.entries(links).map(([title, link]) => link)
|
documents: validLinks,
|
||||||
};
|
};
|
||||||
|
|
||||||
const response = await fetch('https://api.jina.ai/v1/rerank', {
|
const response = await fetch('https://api.jina.ai/v1/rerank', {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user