diff --git a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts index 30a836ba..0ee3493b 100644 --- a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts +++ b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts @@ -1,3 +1,7 @@ +import { scrapSingleUrl } from '../single_url'; +import { PageOptions } from '../../../lib/entities'; + + jest.mock('../single_url', () => { const originalModule = jest.requireActual('../single_url'); originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('Test

Roast

'); @@ -5,9 +9,6 @@ jest.mock('../single_url', () => { return originalModule; }); -import { scrapSingleUrl } from '../single_url'; -import { PageOptions } from '../../../lib/entities'; - describe('scrapSingleUrl', () => { it('should handle includeHtml option correctly', async () => { const url = 'https://roastmywebsite.ai'; @@ -22,82 +23,6 @@ describe('scrapSingleUrl', () => { }, 10000); }); -import { scrapSingleUrl } from '../single_url'; -import { PageOptions } from '../../../lib/entities'; - -// Mock the fetchHtmlContent function -jest.mock('../single_url', () => { - const originalModule = jest.requireActual('../single_url'); - originalModule.fetchHtmlContent = jest.fn().mockResolvedValue(` - - Test Page - - Absolute Link - Relative Link - Page Link - Fragment Link - Email Link - - - `); - return originalModule; -}); - -describe('scrapSingleUrl with linksOnPage', () => { - const baseUrl = 'https://test.com'; - - it('should not include linksOnPage when option is false', async () => { - const pageOptions: PageOptions = {}; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toBeUndefined(); - }); - - it('should include linksOnPage when option is true', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toBeDefined(); - expect(Array.isArray(result.linksOnPage)).toBe(true); - }); - - it('should correctly handle absolute URLs', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('https://example.com'); - }); - - it('should correctly handle relative URLs', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('https://test.com/relative'); - }); - - it('should correctly handle page URLs', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('https://test.com/page'); - }); - - it('should not include fragment-only links', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).not.toContain('#fragment'); - expect(result.linksOnPage).not.toContain('https://test.com/#fragment'); - }); - - it('should include mailto links', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('mailto:test@example.com'); - }); - - it('should return unique links', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - const uniqueLinks = new Set(result.linksOnPage); - expect(result.linksOnPage?.length).toBe(uniqueLinks.size); - }); -}); - it('should return a list of links on the mendable.ai page', async () => { const url = 'https://mendable.ai'; const pageOptions: PageOptions = { includeHtml: true }; @@ -109,7 +34,3 @@ it('should return a list of links on the mendable.ai page', async () => { expect(Array.isArray(result.linksOnPage)).toBe(true); expect(result.linksOnPage.length).toBeGreaterThan(0); }, 10000); - - - -