mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-06 02:06:05 +08:00
Nick: re-ranker safety + unit tests
This commit is contained in:
parent
aa26dbe74e
commit
95bea6a391
68
apps/api/src/lib/ranker.test.ts
Normal file
68
apps/api/src/lib/ranker.test.ts
Normal file
@ -0,0 +1,68 @@
|
||||
import { performRanking } from './ranker';
|
||||
|
||||
describe('performRanking', () => {
|
||||
it('should rank links based on similarity to search query', async () => {
|
||||
const linksWithContext = [
|
||||
'url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds',
|
||||
'url: https://example.com/cats, title: Cat care guide, description: Everything about cats',
|
||||
'url: https://example.com/pets, title: General pet care, description: Care for all types of pets'
|
||||
];
|
||||
|
||||
const links = [
|
||||
'https://example.com/dogs',
|
||||
'https://example.com/cats',
|
||||
'https://example.com/pets'
|
||||
];
|
||||
|
||||
const searchQuery = 'cats training';
|
||||
|
||||
const result = await performRanking(linksWithContext, links, searchQuery);
|
||||
|
||||
// Should return array of objects with link, linkWithContext, score, originalIndex
|
||||
expect(result).toBeInstanceOf(Array);
|
||||
expect(result.length).toBe(3);
|
||||
|
||||
// First result should be the dogs page since query is about dogs
|
||||
expect(result[0].link).toBe('https://example.com/cats');
|
||||
|
||||
// Each result should have required properties
|
||||
result.forEach(item => {
|
||||
expect(item).toHaveProperty('link');
|
||||
expect(item).toHaveProperty('linkWithContext');
|
||||
expect(item).toHaveProperty('score');
|
||||
expect(item).toHaveProperty('originalIndex');
|
||||
expect(typeof item.score).toBe('number');
|
||||
expect(item.score).toBeGreaterThanOrEqual(0);
|
||||
expect(item.score).toBeLessThanOrEqual(1);
|
||||
});
|
||||
|
||||
// Scores should be in descending order
|
||||
for (let i = 1; i < result.length; i++) {
|
||||
expect(result[i].score).toBeLessThanOrEqual(result[i-1].score);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle empty inputs', async () => {
|
||||
const result = await performRanking([], [], '');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should maintain original order for equal scores', async () => {
|
||||
const linksWithContext = [
|
||||
'url: https://example.com/1, title: Similar content A, description: test',
|
||||
'url: https://example.com/2, title: Similar content B, description: test'
|
||||
];
|
||||
|
||||
const links = [
|
||||
'https://example.com/1',
|
||||
'https://example.com/2'
|
||||
];
|
||||
|
||||
const searchQuery = 'test';
|
||||
|
||||
const result = await performRanking(linksWithContext, links, searchQuery);
|
||||
|
||||
// If scores are equal, original order should be maintained
|
||||
expect(result[0].originalIndex).toBeLessThan(result[1].originalIndex);
|
||||
});
|
||||
});
|
@ -42,29 +42,43 @@ const textToVector = (searchQuery: string, text: string): number[] => {
|
||||
|
||||
async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) {
|
||||
try {
|
||||
// Handle invalid inputs
|
||||
if (!searchQuery || !linksWithContext.length || !links.length) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Sanitize search query by removing null characters
|
||||
const sanitizedQuery = searchQuery;
|
||||
|
||||
// Generate embeddings for the search query
|
||||
const queryEmbedding = await getEmbedding(searchQuery);
|
||||
const queryEmbedding = await getEmbedding(sanitizedQuery);
|
||||
|
||||
// Generate embeddings for each link and calculate similarity
|
||||
const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => {
|
||||
const linkEmbedding = await getEmbedding(linkWithContext);
|
||||
|
||||
// console.log("linkEmbedding", linkEmbedding);
|
||||
// const linkVector = textToVector(searchQuery, linkWithContext);
|
||||
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
|
||||
// console.log("score", score);
|
||||
return {
|
||||
link: links[index], // Use corresponding link from links array
|
||||
linkWithContext,
|
||||
score,
|
||||
originalIndex: index // Store original position
|
||||
};
|
||||
try {
|
||||
const linkEmbedding = await getEmbedding(linkWithContext);
|
||||
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
|
||||
|
||||
return {
|
||||
link: links[index],
|
||||
linkWithContext,
|
||||
score,
|
||||
originalIndex: index
|
||||
};
|
||||
} catch (err) {
|
||||
// If embedding fails for a link, return with score 0
|
||||
return {
|
||||
link: links[index],
|
||||
linkWithContext,
|
||||
score: 0,
|
||||
originalIndex: index
|
||||
};
|
||||
}
|
||||
}));
|
||||
|
||||
// Sort links based on similarity scores while preserving original order for equal scores
|
||||
linksAndScores.sort((a, b) => {
|
||||
const scoreDiff = b.score - a.score;
|
||||
// If scores are equal, maintain original order
|
||||
return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff;
|
||||
});
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user