diff --git a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts index c8281edd..144661bb 100644 --- a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts @@ -804,6 +804,46 @@ describe("E2E Tests for API Routes", () => { expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined(); }, 180000); + it.concurrent("should crawl external content links when allowed", async () => { + const crawlInitResponse = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://mendable.ai", + crawlerOptions: { + allowExternalContentLinks: true, + ignoreSitemap: true, + returnOnlyUrls: true, + limit: 50 + } + }); + + expect(crawlInitResponse.statusCode).toBe(200); + expect(crawlInitResponse.body).toHaveProperty("jobId"); + + let crawlStatus: string; + let crawlData = []; + while (crawlStatus !== "completed") { + const statusResponse = await request(TEST_URL) + .get(`/v0/crawl/status/${crawlInitResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + crawlStatus = statusResponse.body.status; + if (statusResponse.body.data) { + crawlData = statusResponse.body.data; + } + if (crawlStatus !== "completed") { + await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again + } + } + console.log(crawlData) + expect(crawlData.length).toBeGreaterThan(0); + expect(crawlData).toEqual(expect.arrayContaining([ + expect.objectContaining({ url: expect.stringContaining("https://firecrawl.dev/?ref=mendable+banner") }), + expect.objectContaining({ url: expect.stringContaining("https://mendable.ai/pricing") }), + expect.objectContaining({ url: expect.stringContaining("https://x.com/CalebPeffer") }) + ])); + }, 180000); // 3 minutes timeout }); describe("POST /v0/crawlWebsitePreview", () => {