mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 20:48:58 +08:00
(fix/map) Map failed to filter by path if indexed (#1333)
* Nick: * Update map.ts * Update map.ts
This commit is contained in:
parent
f87e11712c
commit
134de67a3b
@ -56,6 +56,7 @@ export async function getMapResults({
|
||||
allowExternalLinks,
|
||||
abort = new AbortController().signal, // noop
|
||||
mock,
|
||||
filterByPath = true,
|
||||
}: {
|
||||
url: string;
|
||||
search?: string;
|
||||
@ -70,6 +71,7 @@ export async function getMapResults({
|
||||
allowExternalLinks?: boolean;
|
||||
abort?: AbortSignal;
|
||||
mock?: string;
|
||||
filterByPath?: boolean;
|
||||
}): Promise<MapResult> {
|
||||
const id = uuidv4();
|
||||
let links: string[] = [url];
|
||||
@ -247,6 +249,29 @@ export async function getMapResults({
|
||||
links = links.filter((x) => isSameSubdomain(x, url));
|
||||
}
|
||||
|
||||
// Filter by path if enabled
|
||||
if (filterByPath && !allowExternalLinks) {
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
const urlPath = urlObj.pathname;
|
||||
// Only apply path filtering if the URL has a significant path (not just '/' or empty)
|
||||
// This means we only filter by path if the user has not selected a root domain
|
||||
if (urlPath && urlPath !== '/' && urlPath.length > 1) {
|
||||
links = links.filter(link => {
|
||||
try {
|
||||
const linkObj = new URL(link);
|
||||
return linkObj.pathname.startsWith(urlPath);
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
// If URL parsing fails, continue without path filtering
|
||||
logger.warn(`Failed to parse URL for path filtering: ${url}`, { error: e });
|
||||
}
|
||||
}
|
||||
|
||||
// remove duplicates that could be due to http/https or www
|
||||
links = removeDuplicateUrls(links);
|
||||
}
|
||||
@ -300,6 +325,7 @@ export async function mapController(
|
||||
plan: req.auth.plan,
|
||||
abort: abort.signal,
|
||||
mock: req.body.useMock,
|
||||
filterByPath: req.body.filterByPath !== false,
|
||||
}),
|
||||
...(req.body.timeout !== undefined ? [
|
||||
new Promise((resolve, reject) => setTimeout(() => {
|
||||
|
@ -506,6 +506,7 @@ export const mapRequestSchema = crawlerOptions
|
||||
limit: z.number().min(1).max(30000).default(5000),
|
||||
timeout: z.number().positive().finite().optional(),
|
||||
useMock: z.string().optional(),
|
||||
filterByPath: z.boolean().default(true),
|
||||
})
|
||||
.strict(strictMessage);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user