diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index af094442..b58e52ab 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -469,9 +469,7 @@ describe("POST /v1/map", () => { it.concurrent("should return a successful response with a valid API key", async () => { const mapRequest = { - url: "https://roastmywebsite.ai", - includeSubdomains: true, - search: "test", + url: "https://roastmywebsite.ai" }; const response: ScrapeResponseRequestTest = await request(TEST_URL) @@ -491,6 +489,30 @@ describe("POST /v1/map", () => { expect(links.length).toBeGreaterThan(0); }); + it.concurrent("should return a successful response with a valid API key and search", async () => { + const mapRequest = { + url: "https://usemotion.com", + search: "pricing" + }; + + const response: ScrapeResponseRequestTest = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send(mapRequest); + + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("success", true); + expect(response.body).toHaveProperty("links"); + if (!("links" in response.body)) { + throw new Error("Expected response body to have 'links' property"); + } + const links = response.body.links as unknown[]; + expect(Array.isArray(links)).toBe(true); + expect(links.length).toBeGreaterThan(0); + expect(links[0]).toContain("usemotion.com/pricing"); + }); + it.concurrent("should return an error for invalid URL", async () => { const mapRequest = { url: "invalid-url", diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 78cfda04..a50b7615 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -26,11 +26,10 @@ export async function mapController( const id = uuidv4(); let links: string[] = [req.body.url]; - const crawlerOptions = legacyCrawlerOptions(req.body); const sc: StoredCrawl = { originUrl: req.body.url, - crawlerOptions, + crawlerOptions: legacyCrawlerOptions(req.body), pageOptions: {}, team_id: req.auth.team_id, createdAt: Date.now(), @@ -39,7 +38,7 @@ export async function mapController( const crawler = crawlToCrawler(id, sc); const sitemap = - sc.crawlerOptions.ignoreSitemap || req.body.search + req.body.ignoreSitemap ? null : await crawler.tryGetSitemap(); @@ -58,16 +57,19 @@ export async function mapController( }); if (mapResults.length > 0) { - mapResults.map((x) => { - if (req.body.search) { - links.unshift(x.url); - } else { + if (req.body.search) { + // Ensure all map results are first, maintaining their order + links = [mapResults[0].url, ...mapResults.slice(1).map(x => x.url), ...links]; + } else { + mapResults.map((x) => { links.push(x.url); - } - }); + }); + } } - links = links.map((x) => checkAndUpdateURLForMap(x).url); + links = links.map((x) => checkAndUpdateURLForMap(x).url.trim()); + + // allows for subdomains to be included links = links.filter((x) => isSameDomain(x, req.body.url)); diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 77a9f2dd..333f582e 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -118,6 +118,7 @@ export const mapRequestSchema = crawlerOptions.extend({ origin: z.string().optional().default("api"), includeSubdomains: z.boolean().default(false), search: z.string().optional(), + ignoreSitemap: z.boolean().default(false), }); // export type MapRequest = { diff --git a/apps/api/src/lib/validateUrl.ts b/apps/api/src/lib/validateUrl.ts index c4f002ef..fa2698e7 100644 --- a/apps/api/src/lib/validateUrl.ts +++ b/apps/api/src/lib/validateUrl.ts @@ -113,7 +113,7 @@ export const checkAndUpdateURLForMap = (url: string) => { } // remove any query params - url = url.split("?")[0]; + url = url.split("?")[0].trim(); return { urlObj: typedUrlObj, url: url }; };