From 009df6c930062c8e6901ed8948f647c2c99e95ea Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Wed, 26 Jun 2024 09:54:25 -0300
Subject: [PATCH] Added crawl limit unit test

I think this test is over relying on mocks but I have no idea on how to fix this without changing the code arch structure
---
 .../WebScraper/__tests__/crawler.test.ts      | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
index 6d383708..32c8b0a0 100644
--- a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
+++ b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
@@ -188,5 +188,38 @@ describe('WebCrawler', () => {
     // Check that the backward link is included if allowBackwardCrawling is true
     expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
   });
+
+  it('should respect the limit parameter by not returning more links than specified', async () => {
+    const initialUrl = 'http://example.com';
+    const limit = 2;  // Set a limit for the number of links
+
+    crawler = new WebCrawler({
+      initialUrl: initialUrl,
+      includes: [],
+      excludes: [],
+      limit: limit,  // Apply the limit
+      maxCrawledDepth: 10
+    });
+
+    // Mock sitemap fetching function to return more links than the limit
+    crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
+      initialUrl,
+      initialUrl + '/page1',
+      initialUrl + '/page2',
+      initialUrl + '/page3'
+    ]);
+
+    const filteredLinks = crawler['filterLinks'](
+      [initialUrl, initialUrl + '/page1', initialUrl + '/page2', initialUrl + '/page3'],
+      limit,
+      10
+    );
+
+    expect(filteredLinks.length).toBe(limit);  // Check if the number of results respects the limit
+    expect(filteredLinks).toEqual([
+      initialUrl,
+      initialUrl + '/page1'
+    ]);
+  });
 });