mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-15 00:35:56 +08:00
Nick:
This commit is contained in:
parent
d547192f37
commit
2c391b0105
@ -31,9 +31,15 @@ Return only a concise sentece or 2 focused on the essential data points that the
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function buildRerankerSystemPrompt(): string {
|
export function buildRerankerSystemPrompt(): string {
|
||||||
return "You are a relevance expert. Analyze the provided URLs and their content to determine their relevance to the user's query and intent. For each URL, assign a relevance score between 0 and 1, where 1 means highly relevant and 0 means not relevant at all. Only include URLs that are actually relevant to the query.";
|
return `You are a relevance expert scoring links from a website the user is trying to
|
||||||
|
extract information from. Analyze the provided URLs and their content
|
||||||
|
to determine their relevance to the user's query and intent.
|
||||||
|
For each URL, assign a relevance score between 0 and 1, where 1
|
||||||
|
means highly relevant and we should extract the content from it and 0 means not relevant at all, we should not extract the content from it.
|
||||||
|
Always return all the links scored that you are giving. Do not omit links.
|
||||||
|
Always return the links in the same order they were provided. If the user wants the content from all the links, all links should be scored 1.`;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function buildRerankerUserPrompt(searchQuery: string): string {
|
export function buildRerankerUserPrompt(searchQuery: string): string {
|
||||||
return `Given these URLs and their content, identify which ones are relevant to the user's extraction request: "${searchQuery}". Return an array of relevant links with their relevance scores (0-1). Higher scores should be given to URLs that directly address the user's extraction request. Be very mindful with the links you select, as if they are not that relevant it may affect the quality of the extraction.`;
|
return `Given these URLs, rank which ones are relevant to the user's extraction intent: "${searchQuery}".`;
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import { searchSimilarPages } from "./index/pinecone";
|
|||||||
import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { buildRerankerUserPrompt } from "./build-prompts";
|
import { buildRerankerUserPrompt } from "./build-prompts";
|
||||||
import { buildRerankerSystemPrompt } from "./build-prompts";
|
import { buildRerankerSystemPrompt } from "./build-prompts";
|
||||||
|
import { dumpToFile } from "./helpers/dump-to-file";
|
||||||
|
|
||||||
const cohere = new CohereClient({
|
const cohere = new CohereClient({
|
||||||
token: process.env.COHERE_API_KEY,
|
token: process.env.COHERE_API_KEY,
|
||||||
@ -158,7 +159,7 @@ function filterAndProcessLinks(
|
|||||||
}
|
}
|
||||||
|
|
||||||
export type RerankerResult = {
|
export type RerankerResult = {
|
||||||
mapDocument: (MapDocument & { relevanceScore?: number })[];
|
mapDocument: (MapDocument & { relevanceScore?: number; reason?: string })[];
|
||||||
tokensUsed: number;
|
tokensUsed: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -170,7 +171,7 @@ export type RerankerOptions = {
|
|||||||
|
|
||||||
export async function rerankLinksWithLLM(options: RerankerOptions): Promise<RerankerResult> {
|
export async function rerankLinksWithLLM(options: RerankerOptions): Promise<RerankerResult> {
|
||||||
const { links, searchQuery, urlTraces } = options;
|
const { links, searchQuery, urlTraces } = options;
|
||||||
const chunkSize = 100;
|
const chunkSize = 20;
|
||||||
const chunks: MapDocument[][] = [];
|
const chunks: MapDocument[][] = [];
|
||||||
const TIMEOUT_MS = 20000;
|
const TIMEOUT_MS = 20000;
|
||||||
const MAX_RETRIES = 2;
|
const MAX_RETRIES = 2;
|
||||||
@ -193,7 +194,7 @@ export async function rerankLinksWithLLM(options: RerankerOptions): Promise<Rera
|
|||||||
properties: {
|
properties: {
|
||||||
url: { type: "string" },
|
url: { type: "string" },
|
||||||
relevanceScore: { type: "number" },
|
relevanceScore: { type: "number" },
|
||||||
reason: { type: "string" },
|
reason: { type: "string", description: "The reason why you chose the score for this link given the intent." },
|
||||||
},
|
},
|
||||||
required: ["url", "relevanceScore", "reason"],
|
required: ["url", "relevanceScore", "reason"],
|
||||||
},
|
},
|
||||||
@ -219,6 +220,7 @@ export async function rerankLinksWithLLM(options: RerankerOptions): Promise<Rera
|
|||||||
setTimeout(() => resolve(null), TIMEOUT_MS);
|
setTimeout(() => resolve(null), TIMEOUT_MS);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// dumpToFile(new Date().toISOString(),[buildRerankerSystemPrompt(), buildRerankerUserPrompt(searchQuery), schema, linksContent])
|
||||||
const completionPromise = generateOpenAICompletions(
|
const completionPromise = generateOpenAICompletions(
|
||||||
logger.child({
|
logger.child({
|
||||||
method: "rerankLinksWithLLM",
|
method: "rerankLinksWithLLM",
|
||||||
@ -233,7 +235,7 @@ export async function rerankLinksWithLLM(options: RerankerOptions): Promise<Rera
|
|||||||
},
|
},
|
||||||
linksContent,
|
linksContent,
|
||||||
undefined,
|
undefined,
|
||||||
true,
|
true
|
||||||
);
|
);
|
||||||
|
|
||||||
const completion = await Promise.race([
|
const completion = await Promise.race([
|
||||||
@ -282,7 +284,7 @@ export async function rerankLinksWithLLM(options: RerankerOptions): Promise<Rera
|
|||||||
.map((result) => {
|
.map((result) => {
|
||||||
const link = links.find((link) => link.url === result.url);
|
const link = links.find((link) => link.url === result.url);
|
||||||
if (link) {
|
if (link) {
|
||||||
return { ...link, relevanceScore: result.relevanceScore ? parseFloat(result.relevanceScore) : 0 };
|
return { ...link, relevanceScore: result.relevanceScore ? parseFloat(result.relevanceScore) : 0, reason: result.reason };
|
||||||
}
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
})
|
})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user