mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-04 08:10:38 +08:00
search port
This commit is contained in:
parent
86326f34e9
commit
fc08ff450d
@ -9,6 +9,7 @@ import { search } from "../search";
|
|||||||
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
|
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
|
||||||
import { v4 as uuidv4 } from "uuid";
|
import { v4 as uuidv4 } from "uuid";
|
||||||
import { Logger } from "../lib/logger";
|
import { Logger } from "../lib/logger";
|
||||||
|
import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service";
|
||||||
|
|
||||||
export async function searchHelper(
|
export async function searchHelper(
|
||||||
jobId: string,
|
jobId: string,
|
||||||
@ -75,26 +76,28 @@ export async function searchHelper(
|
|||||||
|
|
||||||
// filter out social media links
|
// filter out social media links
|
||||||
|
|
||||||
|
const jobDatas = res.map(x => {
|
||||||
|
const url = x.url;
|
||||||
|
const uuid = uuidv4();
|
||||||
|
return {
|
||||||
|
name: uuid,
|
||||||
|
data: {
|
||||||
|
url,
|
||||||
|
mode: "single_urls",
|
||||||
|
crawlerOptions: crawlerOptions,
|
||||||
|
team_id: team_id,
|
||||||
|
pageOptions: pageOptions,
|
||||||
|
},
|
||||||
|
opts: {
|
||||||
|
jobId: uuid,
|
||||||
|
priority: 10,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
})
|
||||||
|
|
||||||
|
const jobs = await getScrapeQueue().addBulk(jobDatas);
|
||||||
|
|
||||||
const a = new WebScraperDataProvider();
|
const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]);
|
||||||
await a.setOptions({
|
|
||||||
jobId,
|
|
||||||
mode: "single_urls",
|
|
||||||
urls: res.map((r) => r.url).slice(0, Math.min(searchOptions.limit ?? 5, 5)),
|
|
||||||
crawlerOptions: {
|
|
||||||
...crawlerOptions,
|
|
||||||
},
|
|
||||||
pageOptions: {
|
|
||||||
...pageOptions,
|
|
||||||
onlyMainContent: pageOptions?.onlyMainContent ?? true,
|
|
||||||
fetchPageContent: pageOptions?.fetchPageContent ?? true,
|
|
||||||
includeHtml: pageOptions?.includeHtml ?? false,
|
|
||||||
removeTags: pageOptions?.removeTags ?? [],
|
|
||||||
fallback: false,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const docs = await a.getDocuments(false);
|
|
||||||
|
|
||||||
if (docs.length === 0) {
|
if (docs.length === 0) {
|
||||||
return { success: true, error: "No search results found", returnCode: 200 };
|
return { success: true, error: "No search results found", returnCode: 200 };
|
||||||
@ -109,19 +112,6 @@ export async function searchHelper(
|
|||||||
return { success: true, error: "No page found", returnCode: 200, data: docs };
|
return { success: true, error: "No page found", returnCode: 200, data: docs };
|
||||||
}
|
}
|
||||||
|
|
||||||
const billingResult = await billTeam(
|
|
||||||
team_id,
|
|
||||||
filteredDocs.length
|
|
||||||
);
|
|
||||||
if (!billingResult.success) {
|
|
||||||
return {
|
|
||||||
success: false,
|
|
||||||
error:
|
|
||||||
"Failed to bill team. Insufficient credits or subscription not found.",
|
|
||||||
returnCode: 402,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
data: filteredDocs,
|
data: filteredDocs,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user