mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 07:35:54 +08:00
Merge pull request #323 from mendableai/tests/crawl-limit-unit-tests
[Tests] Added crawl limit unit test
This commit is contained in:
commit
6d0c7a9ccd
@ -188,5 +188,38 @@ describe('WebCrawler', () => {
|
|||||||
// Check that the backward link is included if allowBackwardCrawling is true
|
// Check that the backward link is included if allowBackwardCrawling is true
|
||||||
expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
|
expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should respect the limit parameter by not returning more links than specified', async () => {
|
||||||
|
const initialUrl = 'http://example.com';
|
||||||
|
const limit = 2; // Set a limit for the number of links
|
||||||
|
|
||||||
|
crawler = new WebCrawler({
|
||||||
|
initialUrl: initialUrl,
|
||||||
|
includes: [],
|
||||||
|
excludes: [],
|
||||||
|
limit: limit, // Apply the limit
|
||||||
|
maxCrawledDepth: 10
|
||||||
|
});
|
||||||
|
|
||||||
|
// Mock sitemap fetching function to return more links than the limit
|
||||||
|
crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
|
||||||
|
initialUrl,
|
||||||
|
initialUrl + '/page1',
|
||||||
|
initialUrl + '/page2',
|
||||||
|
initialUrl + '/page3'
|
||||||
|
]);
|
||||||
|
|
||||||
|
const filteredLinks = crawler['filterLinks'](
|
||||||
|
[initialUrl, initialUrl + '/page1', initialUrl + '/page2', initialUrl + '/page3'],
|
||||||
|
limit,
|
||||||
|
10
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(filteredLinks.length).toBe(limit); // Check if the number of results respects the limit
|
||||||
|
expect(filteredLinks).toEqual([
|
||||||
|
initialUrl,
|
||||||
|
initialUrl + '/page1'
|
||||||
|
]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user