Nick: fixed extract issues

This commit is contained in:
Nicolas 2024-12-17 16:40:45 -03:00
parent 6d77879d68
commit 79e335636a
2 changed files with 11 additions and 3 deletions

View File

@ -61,7 +61,7 @@ export async function extractController(
const baseUrl = url.replace("/*", ""); const baseUrl = url.replace("/*", "");
// const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any // const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any
const allowExternalLinks = req.body.allowExternalLinks ?? true; const allowExternalLinks = req.body.allowExternalLinks;
let urlWithoutWww = baseUrl.replace("www.", ""); let urlWithoutWww = baseUrl.replace("www.", "");
let mapUrl = let mapUrl =
req.body.prompt && allowExternalLinks req.body.prompt && allowExternalLinks
@ -84,6 +84,8 @@ export async function extractController(
includeSubdomains: req.body.includeSubdomains, includeSubdomains: req.body.includeSubdomains,
}); });
// console.log("mapResults", mapResults);
let mappedLinks = mapResults.links as MapDocument[]; let mappedLinks = mapResults.links as MapDocument[];
// Limit number of links to MAX_EXTRACT_LIMIT // Limit number of links to MAX_EXTRACT_LIMIT
mappedLinks = mappedLinks.slice(0, MAX_EXTRACT_LIMIT); mappedLinks = mappedLinks.slice(0, MAX_EXTRACT_LIMIT);
@ -92,6 +94,7 @@ export async function extractController(
(x) => (x) =>
`url: ${x.url}, title: ${x.title}, description: ${x.description}`, `url: ${x.url}, title: ${x.title}, description: ${x.description}`,
); );
// console.log("mappedLinksRerank", mappedLinksRerank);
// Filter by path prefix if present // Filter by path prefix if present
// wrong // wrong
@ -150,15 +153,20 @@ export async function extractController(
} else { } else {
// Handle direct URLs without glob pattern // Handle direct URLs without glob pattern
if (!isUrlBlocked(url)) { if (!isUrlBlocked(url)) {
// console.log("url", url);
return [url]; return [url];
} }
return []; return [];
} }
}); });
// console.log("urlPromises", urlPromises.length);
// Wait for all URL processing to complete and flatten results // Wait for all URL processing to complete and flatten results
const processedUrls = await Promise.all(urlPromises); const processedUrls = await Promise.all(urlPromises);
links.push(...processedUrls.flat()); const flattenedUrls = processedUrls.flat().filter(url => url); // Filter out any null/undefined values
links.push(...flattenedUrls);
// console.log("links", links.length, "flattenedUrls", flattenedUrls.length);
if (links.length === 0) { if (links.length === 0) {
return res.status(400).json({ return res.status(400).json({

View File

@ -198,7 +198,7 @@ export const extractV1Options = z
limit: z.number().int().positive().finite().safe().optional(), limit: z.number().int().positive().finite().safe().optional(),
ignoreSitemap: z.boolean().default(false), ignoreSitemap: z.boolean().default(false),
includeSubdomains: z.boolean().default(true), includeSubdomains: z.boolean().default(true),
allowExternalLinks: z.boolean().default(true), allowExternalLinks: z.boolean().default(false),
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
timeout: z.number().int().positive().finite().safe().default(60000), timeout: z.number().int().positive().finite().safe().default(60000),
}) })