Nick: re-ranker safety + unit tests

This commit is contained in:
Nicolas 2024-11-24 19:34:56 -08:00
parent aa26dbe74e
commit 95bea6a391
2 changed files with 96 additions and 14 deletions

View File

@ -0,0 +1,68 @@
import { performRanking } from './ranker';
describe('performRanking', () => {
it('should rank links based on similarity to search query', async () => {
const linksWithContext = [
'url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds',
'url: https://example.com/cats, title: Cat care guide, description: Everything about cats',
'url: https://example.com/pets, title: General pet care, description: Care for all types of pets'
];
const links = [
'https://example.com/dogs',
'https://example.com/cats',
'https://example.com/pets'
];
const searchQuery = 'cats training';
const result = await performRanking(linksWithContext, links, searchQuery);
// Should return array of objects with link, linkWithContext, score, originalIndex
expect(result).toBeInstanceOf(Array);
expect(result.length).toBe(3);
// First result should be the dogs page since query is about dogs
expect(result[0].link).toBe('https://example.com/cats');
// Each result should have required properties
result.forEach(item => {
expect(item).toHaveProperty('link');
expect(item).toHaveProperty('linkWithContext');
expect(item).toHaveProperty('score');
expect(item).toHaveProperty('originalIndex');
expect(typeof item.score).toBe('number');
expect(item.score).toBeGreaterThanOrEqual(0);
expect(item.score).toBeLessThanOrEqual(1);
});
// Scores should be in descending order
for (let i = 1; i < result.length; i++) {
expect(result[i].score).toBeLessThanOrEqual(result[i-1].score);
}
});
it('should handle empty inputs', async () => {
const result = await performRanking([], [], '');
expect(result).toEqual([]);
});
it('should maintain original order for equal scores', async () => {
const linksWithContext = [
'url: https://example.com/1, title: Similar content A, description: test',
'url: https://example.com/2, title: Similar content B, description: test'
];
const links = [
'https://example.com/1',
'https://example.com/2'
];
const searchQuery = 'test';
const result = await performRanking(linksWithContext, links, searchQuery);
// If scores are equal, original order should be maintained
expect(result[0].originalIndex).toBeLessThan(result[1].originalIndex);
});
});

View File

@ -42,29 +42,43 @@ const textToVector = (searchQuery: string, text: string): number[] => {
async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) {
try {
// Handle invalid inputs
if (!searchQuery || !linksWithContext.length || !links.length) {
return [];
}
// Sanitize search query by removing null characters
const sanitizedQuery = searchQuery;
// Generate embeddings for the search query
const queryEmbedding = await getEmbedding(searchQuery);
const queryEmbedding = await getEmbedding(sanitizedQuery);
// Generate embeddings for each link and calculate similarity
const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => {
const linkEmbedding = await getEmbedding(linkWithContext);
// console.log("linkEmbedding", linkEmbedding);
// const linkVector = textToVector(searchQuery, linkWithContext);
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
// console.log("score", score);
return {
link: links[index], // Use corresponding link from links array
linkWithContext,
score,
originalIndex: index // Store original position
};
try {
const linkEmbedding = await getEmbedding(linkWithContext);
const score = cosineSimilarity(queryEmbedding, linkEmbedding);
return {
link: links[index],
linkWithContext,
score,
originalIndex: index
};
} catch (err) {
// If embedding fails for a link, return with score 0
return {
link: links[index],
linkWithContext,
score: 0,
originalIndex: index
};
}
}));
// Sort links based on similarity scores while preserving original order for equal scores
linksAndScores.sort((a, b) => {
const scoreDiff = b.score - a.score;
// If scores are equal, maintain original order
return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff;
});