diff --git a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
index 30a836ba..0ee3493b 100644
--- a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
+++ b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
@@ -1,3 +1,7 @@
+import { scrapSingleUrl } from '../single_url';
+import { PageOptions } from '../../../lib/entities';
+
+
jest.mock('../single_url', () => {
const originalModule = jest.requireActual('../single_url');
originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('
TestRoast
');
@@ -5,9 +9,6 @@ jest.mock('../single_url', () => {
return originalModule;
});
-import { scrapSingleUrl } from '../single_url';
-import { PageOptions } from '../../../lib/entities';
-
describe('scrapSingleUrl', () => {
it('should handle includeHtml option correctly', async () => {
const url = 'https://roastmywebsite.ai';
@@ -22,82 +23,6 @@ describe('scrapSingleUrl', () => {
}, 10000);
});
-import { scrapSingleUrl } from '../single_url';
-import { PageOptions } from '../../../lib/entities';
-
-// Mock the fetchHtmlContent function
-jest.mock('../single_url', () => {
- const originalModule = jest.requireActual('../single_url');
- originalModule.fetchHtmlContent = jest.fn().mockResolvedValue(`
-
- Test Page
-
- Absolute Link
- Relative Link
- Page Link
- Fragment Link
- Email Link
-
-
- `);
- return originalModule;
-});
-
-describe('scrapSingleUrl with linksOnPage', () => {
- const baseUrl = 'https://test.com';
-
- it('should not include linksOnPage when option is false', async () => {
- const pageOptions: PageOptions = {};
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- expect(result.linksOnPage).toBeUndefined();
- });
-
- it('should include linksOnPage when option is true', async () => {
- const pageOptions: PageOptions = { };
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- expect(result.linksOnPage).toBeDefined();
- expect(Array.isArray(result.linksOnPage)).toBe(true);
- });
-
- it('should correctly handle absolute URLs', async () => {
- const pageOptions: PageOptions = { };
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- expect(result.linksOnPage).toContain('https://example.com');
- });
-
- it('should correctly handle relative URLs', async () => {
- const pageOptions: PageOptions = { };
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- expect(result.linksOnPage).toContain('https://test.com/relative');
- });
-
- it('should correctly handle page URLs', async () => {
- const pageOptions: PageOptions = { };
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- expect(result.linksOnPage).toContain('https://test.com/page');
- });
-
- it('should not include fragment-only links', async () => {
- const pageOptions: PageOptions = { };
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- expect(result.linksOnPage).not.toContain('#fragment');
- expect(result.linksOnPage).not.toContain('https://test.com/#fragment');
- });
-
- it('should include mailto links', async () => {
- const pageOptions: PageOptions = { };
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- expect(result.linksOnPage).toContain('mailto:test@example.com');
- });
-
- it('should return unique links', async () => {
- const pageOptions: PageOptions = { };
- const result = await scrapSingleUrl(baseUrl, pageOptions);
- const uniqueLinks = new Set(result.linksOnPage);
- expect(result.linksOnPage?.length).toBe(uniqueLinks.size);
- });
-});
-
it('should return a list of links on the mendable.ai page', async () => {
const url = 'https://mendable.ai';
const pageOptions: PageOptions = { includeHtml: true };
@@ -109,7 +34,3 @@ it('should return a list of links on the mendable.ai page', async () => {
expect(Array.isArray(result.linksOnPage)).toBe(true);
expect(result.linksOnPage.length).toBeGreaterThan(0);
}, 10000);
-
-
-
-