From 009df6c930062c8e6901ed8948f647c2c99e95ea Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 26 Jun 2024 09:54:25 -0300 Subject: [PATCH] Added crawl limit unit test I think this test is over relying on mocks but I have no idea on how to fix this without changing the code arch structure --- .../WebScraper/__tests__/crawler.test.ts | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts index 6d383708..32c8b0a0 100644 --- a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts +++ b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts @@ -188,5 +188,38 @@ describe('WebCrawler', () => { // Check that the backward link is included if allowBackwardCrawling is true expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true); }); + + it('should respect the limit parameter by not returning more links than specified', async () => { + const initialUrl = 'http://example.com'; + const limit = 2; // Set a limit for the number of links + + crawler = new WebCrawler({ + initialUrl: initialUrl, + includes: [], + excludes: [], + limit: limit, // Apply the limit + maxCrawledDepth: 10 + }); + + // Mock sitemap fetching function to return more links than the limit + crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([ + initialUrl, + initialUrl + '/page1', + initialUrl + '/page2', + initialUrl + '/page3' + ]); + + const filteredLinks = crawler['filterLinks']( + [initialUrl, initialUrl + '/page1', initialUrl + '/page2', initialUrl + '/page3'], + limit, + 10 + ); + + expect(filteredLinks.length).toBe(limit); // Check if the number of results respects the limit + expect(filteredLinks).toEqual([ + initialUrl, + initialUrl + '/page1' + ]); + }); });