mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-15 16:05:56 +08:00
Caleb: added a simple test
This commit is contained in:
parent
0b3c0ede49
commit
da3c6bca37
@ -1,3 +1,7 @@
|
|||||||
|
import { scrapSingleUrl } from '../single_url';
|
||||||
|
import { PageOptions } from '../../../lib/entities';
|
||||||
|
|
||||||
|
|
||||||
jest.mock('../single_url', () => {
|
jest.mock('../single_url', () => {
|
||||||
const originalModule = jest.requireActual('../single_url');
|
const originalModule = jest.requireActual('../single_url');
|
||||||
originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('<html><head><title>Test</title></head><body><h1>Roast</h1></body></html>');
|
originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('<html><head><title>Test</title></head><body><h1>Roast</h1></body></html>');
|
||||||
@ -5,9 +9,6 @@ jest.mock('../single_url', () => {
|
|||||||
return originalModule;
|
return originalModule;
|
||||||
});
|
});
|
||||||
|
|
||||||
import { scrapSingleUrl } from '../single_url';
|
|
||||||
import { PageOptions } from '../../../lib/entities';
|
|
||||||
|
|
||||||
describe('scrapSingleUrl', () => {
|
describe('scrapSingleUrl', () => {
|
||||||
it('should handle includeHtml option correctly', async () => {
|
it('should handle includeHtml option correctly', async () => {
|
||||||
const url = 'https://roastmywebsite.ai';
|
const url = 'https://roastmywebsite.ai';
|
||||||
@ -22,82 +23,6 @@ describe('scrapSingleUrl', () => {
|
|||||||
}, 10000);
|
}, 10000);
|
||||||
});
|
});
|
||||||
|
|
||||||
import { scrapSingleUrl } from '../single_url';
|
|
||||||
import { PageOptions } from '../../../lib/entities';
|
|
||||||
|
|
||||||
// Mock the fetchHtmlContent function
|
|
||||||
jest.mock('../single_url', () => {
|
|
||||||
const originalModule = jest.requireActual('../single_url');
|
|
||||||
originalModule.fetchHtmlContent = jest.fn().mockResolvedValue(`
|
|
||||||
<html>
|
|
||||||
<head><title>Test Page</title></head>
|
|
||||||
<body>
|
|
||||||
<a href="https://example.com">Absolute Link</a>
|
|
||||||
<a href="/relative">Relative Link</a>
|
|
||||||
<a href="page">Page Link</a>
|
|
||||||
<a href="#fragment">Fragment Link</a>
|
|
||||||
<a href="mailto:test@example.com">Email Link</a>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`);
|
|
||||||
return originalModule;
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('scrapSingleUrl with linksOnPage', () => {
|
|
||||||
const baseUrl = 'https://test.com';
|
|
||||||
|
|
||||||
it('should not include linksOnPage when option is false', async () => {
|
|
||||||
const pageOptions: PageOptions = {};
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
expect(result.linksOnPage).toBeUndefined();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should include linksOnPage when option is true', async () => {
|
|
||||||
const pageOptions: PageOptions = { };
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
expect(result.linksOnPage).toBeDefined();
|
|
||||||
expect(Array.isArray(result.linksOnPage)).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should correctly handle absolute URLs', async () => {
|
|
||||||
const pageOptions: PageOptions = { };
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
expect(result.linksOnPage).toContain('https://example.com');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should correctly handle relative URLs', async () => {
|
|
||||||
const pageOptions: PageOptions = { };
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
expect(result.linksOnPage).toContain('https://test.com/relative');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should correctly handle page URLs', async () => {
|
|
||||||
const pageOptions: PageOptions = { };
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
expect(result.linksOnPage).toContain('https://test.com/page');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should not include fragment-only links', async () => {
|
|
||||||
const pageOptions: PageOptions = { };
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
expect(result.linksOnPage).not.toContain('#fragment');
|
|
||||||
expect(result.linksOnPage).not.toContain('https://test.com/#fragment');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should include mailto links', async () => {
|
|
||||||
const pageOptions: PageOptions = { };
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
expect(result.linksOnPage).toContain('mailto:test@example.com');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return unique links', async () => {
|
|
||||||
const pageOptions: PageOptions = { };
|
|
||||||
const result = await scrapSingleUrl(baseUrl, pageOptions);
|
|
||||||
const uniqueLinks = new Set(result.linksOnPage);
|
|
||||||
expect(result.linksOnPage?.length).toBe(uniqueLinks.size);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return a list of links on the mendable.ai page', async () => {
|
it('should return a list of links on the mendable.ai page', async () => {
|
||||||
const url = 'https://mendable.ai';
|
const url = 'https://mendable.ai';
|
||||||
const pageOptions: PageOptions = { includeHtml: true };
|
const pageOptions: PageOptions = { includeHtml: true };
|
||||||
@ -109,7 +34,3 @@ it('should return a list of links on the mendable.ai page', async () => {
|
|||||||
expect(Array.isArray(result.linksOnPage)).toBe(true);
|
expect(Array.isArray(result.linksOnPage)).toBe(true);
|
||||||
expect(result.linksOnPage.length).toBeGreaterThan(0);
|
expect(result.linksOnPage.length).toBeGreaterThan(0);
|
||||||
}, 10000);
|
}, 10000);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user