mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 05:05:55 +08:00
Nick: fixed extract issues
This commit is contained in:
parent
6d77879d68
commit
79e335636a
@ -61,7 +61,7 @@ export async function extractController(
|
||||
const baseUrl = url.replace("/*", "");
|
||||
// const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any
|
||||
|
||||
const allowExternalLinks = req.body.allowExternalLinks ?? true;
|
||||
const allowExternalLinks = req.body.allowExternalLinks;
|
||||
let urlWithoutWww = baseUrl.replace("www.", "");
|
||||
let mapUrl =
|
||||
req.body.prompt && allowExternalLinks
|
||||
@ -84,6 +84,8 @@ export async function extractController(
|
||||
includeSubdomains: req.body.includeSubdomains,
|
||||
});
|
||||
|
||||
// console.log("mapResults", mapResults);
|
||||
|
||||
let mappedLinks = mapResults.links as MapDocument[];
|
||||
// Limit number of links to MAX_EXTRACT_LIMIT
|
||||
mappedLinks = mappedLinks.slice(0, MAX_EXTRACT_LIMIT);
|
||||
@ -92,6 +94,7 @@ export async function extractController(
|
||||
(x) =>
|
||||
`url: ${x.url}, title: ${x.title}, description: ${x.description}`,
|
||||
);
|
||||
// console.log("mappedLinksRerank", mappedLinksRerank);
|
||||
|
||||
// Filter by path prefix if present
|
||||
// wrong
|
||||
@ -150,15 +153,20 @@ export async function extractController(
|
||||
} else {
|
||||
// Handle direct URLs without glob pattern
|
||||
if (!isUrlBlocked(url)) {
|
||||
// console.log("url", url);
|
||||
return [url];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
});
|
||||
|
||||
// console.log("urlPromises", urlPromises.length);
|
||||
|
||||
// Wait for all URL processing to complete and flatten results
|
||||
const processedUrls = await Promise.all(urlPromises);
|
||||
links.push(...processedUrls.flat());
|
||||
const flattenedUrls = processedUrls.flat().filter(url => url); // Filter out any null/undefined values
|
||||
links.push(...flattenedUrls);
|
||||
// console.log("links", links.length, "flattenedUrls", flattenedUrls.length);
|
||||
|
||||
if (links.length === 0) {
|
||||
return res.status(400).json({
|
||||
|
@ -198,7 +198,7 @@ export const extractV1Options = z
|
||||
limit: z.number().int().positive().finite().safe().optional(),
|
||||
ignoreSitemap: z.boolean().default(false),
|
||||
includeSubdomains: z.boolean().default(true),
|
||||
allowExternalLinks: z.boolean().default(true),
|
||||
allowExternalLinks: z.boolean().default(false),
|
||||
origin: z.string().optional().default("api"),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
})
|
||||
|
Loading…
x
Reference in New Issue
Block a user