diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 30245aa5..ebb0b324 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -56,6 +56,7 @@ export async function getMapResults({ allowExternalLinks, abort = new AbortController().signal, // noop mock, + filterByPath = true, }: { url: string; search?: string; @@ -70,6 +71,7 @@ export async function getMapResults({ allowExternalLinks?: boolean; abort?: AbortSignal; mock?: string; + filterByPath?: boolean; }): Promise { const id = uuidv4(); let links: string[] = [url]; @@ -247,6 +249,29 @@ export async function getMapResults({ links = links.filter((x) => isSameSubdomain(x, url)); } + // Filter by path if enabled + if (filterByPath && !allowExternalLinks) { + try { + const urlObj = new URL(url); + const urlPath = urlObj.pathname; + // Only apply path filtering if the URL has a significant path (not just '/' or empty) + // This means we only filter by path if the user has not selected a root domain + if (urlPath && urlPath !== '/' && urlPath.length > 1) { + links = links.filter(link => { + try { + const linkObj = new URL(link); + return linkObj.pathname.startsWith(urlPath); + } catch (e) { + return false; + } + }); + } + } catch (e) { + // If URL parsing fails, continue without path filtering + logger.warn(`Failed to parse URL for path filtering: ${url}`, { error: e }); + } + } + // remove duplicates that could be due to http/https or www links = removeDuplicateUrls(links); } @@ -300,6 +325,7 @@ export async function mapController( plan: req.auth.plan, abort: abort.signal, mock: req.body.useMock, + filterByPath: req.body.filterByPath !== false, }), ...(req.body.timeout !== undefined ? [ new Promise((resolve, reject) => setTimeout(() => { diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 5a71da9c..1e462549 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -506,6 +506,7 @@ export const mapRequestSchema = crawlerOptions limit: z.number().min(1).max(30000).default(5000), timeout: z.number().positive().finite().optional(), useMock: z.string().optional(), + filterByPath: z.boolean().default(true), }) .strict(strictMessage);