diff --git a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
index 63408eaf..30a836ba 100644
--- a/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
+++ b/apps/api/src/scraper/WebScraper/__tests__/single_url.test.ts
@@ -22,6 +22,81 @@ describe('scrapSingleUrl', () => {
}, 10000);
});
+import { scrapSingleUrl } from '../single_url';
+import { PageOptions } from '../../../lib/entities';
+
+// Mock the fetchHtmlContent function
+jest.mock('../single_url', () => {
+ const originalModule = jest.requireActual('../single_url');
+ originalModule.fetchHtmlContent = jest.fn().mockResolvedValue(`
+
+
Test Page
+
+ Absolute Link
+ Relative Link
+ Page Link
+ Fragment Link
+ Email Link
+
+
+ `);
+ return originalModule;
+});
+
+describe('scrapSingleUrl with linksOnPage', () => {
+ const baseUrl = 'https://test.com';
+
+ it('should not include linksOnPage when option is false', async () => {
+ const pageOptions: PageOptions = {};
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ expect(result.linksOnPage).toBeUndefined();
+ });
+
+ it('should include linksOnPage when option is true', async () => {
+ const pageOptions: PageOptions = { };
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ expect(result.linksOnPage).toBeDefined();
+ expect(Array.isArray(result.linksOnPage)).toBe(true);
+ });
+
+ it('should correctly handle absolute URLs', async () => {
+ const pageOptions: PageOptions = { };
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ expect(result.linksOnPage).toContain('https://example.com');
+ });
+
+ it('should correctly handle relative URLs', async () => {
+ const pageOptions: PageOptions = { };
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ expect(result.linksOnPage).toContain('https://test.com/relative');
+ });
+
+ it('should correctly handle page URLs', async () => {
+ const pageOptions: PageOptions = { };
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ expect(result.linksOnPage).toContain('https://test.com/page');
+ });
+
+ it('should not include fragment-only links', async () => {
+ const pageOptions: PageOptions = { };
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ expect(result.linksOnPage).not.toContain('#fragment');
+ expect(result.linksOnPage).not.toContain('https://test.com/#fragment');
+ });
+
+ it('should include mailto links', async () => {
+ const pageOptions: PageOptions = { };
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ expect(result.linksOnPage).toContain('mailto:test@example.com');
+ });
+
+ it('should return unique links', async () => {
+ const pageOptions: PageOptions = { };
+ const result = await scrapSingleUrl(baseUrl, pageOptions);
+ const uniqueLinks = new Set(result.linksOnPage);
+ expect(result.linksOnPage?.length).toBe(uniqueLinks.size);
+ });
+});
it('should return a list of links on the mendable.ai page', async () => {
const url = 'https://mendable.ai';
@@ -36,3 +111,5 @@ it('should return a list of links on the mendable.ai page', async () => {
}, 10000);
+
+