mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-08 16:19:02 +08:00
Nick: added canonical tests
This commit is contained in:
parent
aef040b41e
commit
f25c0c6d21
39
apps/api/src/lib/canonical-url.test.ts
Normal file
39
apps/api/src/lib/canonical-url.test.ts
Normal file
@ -0,0 +1,39 @@
|
||||
import { normalizeUrl } from './canonical-url';
|
||||
|
||||
describe('normalizeUrl', () => {
|
||||
it('should remove protocol and www from URL', () => {
|
||||
const url = 'https://www.example.com';
|
||||
const expected = 'example.com';
|
||||
expect(normalizeUrl(url)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should remove only protocol if www is not present', () => {
|
||||
const url = 'https://example.com';
|
||||
const expected = 'example.com';
|
||||
expect(normalizeUrl(url)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle URLs without protocol', () => {
|
||||
const url = 'www.example.com';
|
||||
const expected = 'example.com';
|
||||
expect(normalizeUrl(url)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle URLs without protocol and www', () => {
|
||||
const url = 'example.com';
|
||||
const expected = 'example.com';
|
||||
expect(normalizeUrl(url)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle URLs with paths', () => {
|
||||
const url = 'https://www.example.com/path/to/resource';
|
||||
const expected = 'example.com';
|
||||
expect(normalizeUrl(url)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle invalid URLs gracefully', () => {
|
||||
const url = 'not a valid url';
|
||||
const expected = 'not a valid url';
|
||||
expect(normalizeUrl(url)).toBe(expected);
|
||||
});
|
||||
});
|
@ -1,7 +1,8 @@
|
||||
export function normalizeUrl(url: string) {
|
||||
url = url.replace(/^https?:\/\//, "").replace(/^www\./, "");
|
||||
if (url.endsWith("/")) {
|
||||
url = url.slice(0, -1);
|
||||
try {
|
||||
const hostname = new URL(url).hostname;
|
||||
return hostname.replace(/^www\./, "");
|
||||
} catch (error) {
|
||||
return url.replace(/^https?:\/\//, "").replace(/^www\./, "").split('/')[0];
|
||||
}
|
||||
return url;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user