From da3c6bca374c9d51a21ede7812730b04465b315a Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:23:22 -0700 Subject: [PATCH] Caleb: added a simple test --- .../WebScraper/__tests__/single_url.test.ts | 87 +------------------ 1 file changed, 4 insertions(+), 83 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts index 30a836ba..0ee3493b 100644 --- a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts +++ b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts @@ -1,3 +1,7 @@ +import { scrapSingleUrl } from '../single_url'; +import { PageOptions } from '../../../lib/entities'; + + jest.mock('../single_url', () => { const originalModule = jest.requireActual('../single_url'); originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('Test

Roast

'); @@ -5,9 +9,6 @@ jest.mock('../single_url', () => { return originalModule; }); -import { scrapSingleUrl } from '../single_url'; -import { PageOptions } from '../../../lib/entities'; - describe('scrapSingleUrl', () => { it('should handle includeHtml option correctly', async () => { const url = 'https://roastmywebsite.ai'; @@ -22,82 +23,6 @@ describe('scrapSingleUrl', () => { }, 10000); }); -import { scrapSingleUrl } from '../single_url'; -import { PageOptions } from '../../../lib/entities'; - -// Mock the fetchHtmlContent function -jest.mock('../single_url', () => { - const originalModule = jest.requireActual('../single_url'); - originalModule.fetchHtmlContent = jest.fn().mockResolvedValue(` - - Test Page - - Absolute Link - Relative Link - Page Link - Fragment Link - Email Link - - - `); - return originalModule; -}); - -describe('scrapSingleUrl with linksOnPage', () => { - const baseUrl = 'https://test.com'; - - it('should not include linksOnPage when option is false', async () => { - const pageOptions: PageOptions = {}; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toBeUndefined(); - }); - - it('should include linksOnPage when option is true', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toBeDefined(); - expect(Array.isArray(result.linksOnPage)).toBe(true); - }); - - it('should correctly handle absolute URLs', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('https://example.com'); - }); - - it('should correctly handle relative URLs', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('https://test.com/relative'); - }); - - it('should correctly handle page URLs', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('https://test.com/page'); - }); - - it('should not include fragment-only links', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).not.toContain('#fragment'); - expect(result.linksOnPage).not.toContain('https://test.com/#fragment'); - }); - - it('should include mailto links', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - expect(result.linksOnPage).toContain('mailto:test@example.com'); - }); - - it('should return unique links', async () => { - const pageOptions: PageOptions = { }; - const result = await scrapSingleUrl(baseUrl, pageOptions); - const uniqueLinks = new Set(result.linksOnPage); - expect(result.linksOnPage?.length).toBe(uniqueLinks.size); - }); -}); - it('should return a list of links on the mendable.ai page', async () => { const url = 'https://mendable.ai'; const pageOptions: PageOptions = { includeHtml: true }; @@ -109,7 +34,3 @@ it('should return a list of links on the mendable.ai page', async () => { expect(Array.isArray(result.linksOnPage)).toBe(true); expect(result.linksOnPage.length).toBeGreaterThan(0); }, 10000); - - - -