mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-08 16:19:02 +08:00
Nick: added canonical tests
This commit is contained in:
parent
aef040b41e
commit
f25c0c6d21
39
apps/api/src/lib/canonical-url.test.ts
Normal file
39
apps/api/src/lib/canonical-url.test.ts
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import { normalizeUrl } from './canonical-url';
|
||||||
|
|
||||||
|
describe('normalizeUrl', () => {
|
||||||
|
it('should remove protocol and www from URL', () => {
|
||||||
|
const url = 'https://www.example.com';
|
||||||
|
const expected = 'example.com';
|
||||||
|
expect(normalizeUrl(url)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should remove only protocol if www is not present', () => {
|
||||||
|
const url = 'https://example.com';
|
||||||
|
const expected = 'example.com';
|
||||||
|
expect(normalizeUrl(url)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle URLs without protocol', () => {
|
||||||
|
const url = 'www.example.com';
|
||||||
|
const expected = 'example.com';
|
||||||
|
expect(normalizeUrl(url)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle URLs without protocol and www', () => {
|
||||||
|
const url = 'example.com';
|
||||||
|
const expected = 'example.com';
|
||||||
|
expect(normalizeUrl(url)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle URLs with paths', () => {
|
||||||
|
const url = 'https://www.example.com/path/to/resource';
|
||||||
|
const expected = 'example.com';
|
||||||
|
expect(normalizeUrl(url)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle invalid URLs gracefully', () => {
|
||||||
|
const url = 'not a valid url';
|
||||||
|
const expected = 'not a valid url';
|
||||||
|
expect(normalizeUrl(url)).toBe(expected);
|
||||||
|
});
|
||||||
|
});
|
@ -1,7 +1,8 @@
|
|||||||
export function normalizeUrl(url: string) {
|
export function normalizeUrl(url: string) {
|
||||||
url = url.replace(/^https?:\/\//, "").replace(/^www\./, "");
|
try {
|
||||||
if (url.endsWith("/")) {
|
const hostname = new URL(url).hostname;
|
||||||
url = url.slice(0, -1);
|
return hostname.replace(/^www\./, "");
|
||||||
|
} catch (error) {
|
||||||
|
return url.replace(/^https?:\/\//, "").replace(/^www\./, "").split('/')[0];
|
||||||
}
|
}
|
||||||
return url;
|
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user