mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 13:00:37 +08:00
Nick:
This commit is contained in:
parent
d430cfcbfb
commit
a4f15260a7
@ -10,8 +10,6 @@ import {
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { getJobPriority } from "../../lib/job-priority";
|
||||
import { PlanType } from "../../types";
|
||||
import { rerankDocuments } from "../../lib/extract/reranker";
|
||||
import { generateBasicCompletion } from "../../lib/extract/completions";
|
||||
import { getMapResults } from "./map";
|
||||
|
||||
|
||||
@ -40,7 +38,7 @@ export async function extractController(
|
||||
const mappedDocuments: MapDocument[] = [];
|
||||
|
||||
const prompt = req.body.prompt;
|
||||
const keywords = await generateBasicCompletion(`If the user's prompt is: "${prompt}", what are the most important keywords besides the extraction task? Output only the keywords, separated by commas.`);
|
||||
// const keywords = await generateBasicCompletion(`If the user's prompt is: "${prompt}", what are the most important keywords besides the extraction task? Output only the keywords, separated by commas.`);
|
||||
|
||||
for (const url of urls) {
|
||||
if (url.endsWith("/*")) {
|
||||
@ -57,15 +55,15 @@ export async function extractController(
|
||||
subId: req.acuc?.sub_id,
|
||||
includeMetadata: true
|
||||
});
|
||||
// top 3 links
|
||||
const top3Links = (mapResults.links as MapDocument[]).slice(0, 3);
|
||||
console.log(top3Links);
|
||||
// console.log(top3Links);
|
||||
mappedDocuments.push(...(mapResults.links as MapDocument[]));
|
||||
// transform mappedUrls to just documents
|
||||
// we quickly rerank
|
||||
const rerank = await rerankDocuments(mappedDocuments.map(x => `URL: ${x.url}\nTITLE: ${x.title}\nDESCRIPTION: ${x.description}`), "What URLs are most relevant to the following prompt: " + (req.body.prompt || '').toLocaleLowerCase().replace("extract", " ").replace("extract ", " "));
|
||||
console.log(rerank);
|
||||
// // top 3 links
|
||||
// const top3Links = (mapResults.links as MapDocument[]).slice(0, 3);
|
||||
// console.log(top3Links);
|
||||
// // console.log(top3Links);
|
||||
// mappedDocuments.push(...(mapResults.links as MapDocument[]));
|
||||
// // transform mappedUrls to just documents
|
||||
// // we quickly rerank
|
||||
// const rerank = await rerankDocuments(mappedDocuments.map(x => `URL: ${x.url}\nTITLE: ${x.title}\nDESCRIPTION: ${x.description}`), "What URLs are most relevant to the following prompt: " + (req.body.prompt || '').toLocaleLowerCase().replace("extract", " ").replace("extract ", " "));
|
||||
// console.log(rerank);
|
||||
} else {
|
||||
mappedDocuments.push({ url });
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ export async function generateFinalExtraction({
|
||||
const jsonCompletion = await openai.beta.chat.completions.parse({
|
||||
model,
|
||||
messages: [
|
||||
{ role: "system", content: systemPrompt },
|
||||
{ role: "system", content: systemPrompt ?? "" },
|
||||
{ role: "user", content: [{ type: "text", text: extractionContent }] },
|
||||
{
|
||||
role: "user",
|
||||
@ -108,9 +108,8 @@ export async function generateFinalExtraction({
|
||||
}
|
||||
|
||||
const extraction = jsonCompletion.choices[0].message.parsed;
|
||||
|
||||
return {
|
||||
content: extraction,
|
||||
content: extraction ?? "",
|
||||
metadata: {
|
||||
numTokens,
|
||||
warning,
|
||||
|
@ -18,6 +18,7 @@ import { logger } from "../lib/logger";
|
||||
import { scrapeStatusController } from "../controllers/v1/scrape-status";
|
||||
import { concurrencyCheckController } from "../controllers/v1/concurrency-check";
|
||||
import { batchScrapeController } from "../controllers/v1/batch-scrape";
|
||||
import { extractController } from "../controllers/v1/extract";
|
||||
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
|
||||
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
|
||||
// import { searchController } from "../../src/controllers/v1/search";
|
||||
@ -178,6 +179,14 @@ v1Router.ws(
|
||||
crawlStatusWSController
|
||||
);
|
||||
|
||||
v1Router.post(
|
||||
"/extract",
|
||||
authMiddleware(RateLimiterMode.Scrape),
|
||||
checkCreditsMiddleware(1),
|
||||
blocklistMiddleware,
|
||||
wrap(extractController)
|
||||
);
|
||||
|
||||
|
||||
|
||||
// v1Router.post("/crawlWebsitePreview", crawlPreviewController);
|
||||
@ -199,3 +208,4 @@ v1Router.delete(
|
||||
// Health/Probe routes
|
||||
// v1Router.get("/health/liveness", livenessController);
|
||||
// v1Router.get("/health/readiness", readinessController);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user