diff --git a/apps/api/src/lib/canonical-url.test.ts b/apps/api/src/lib/canonical-url.test.ts new file mode 100644 index 00000000..0a2c3acd --- /dev/null +++ b/apps/api/src/lib/canonical-url.test.ts @@ -0,0 +1,39 @@ +import { normalizeUrl } from './canonical-url'; + +describe('normalizeUrl', () => { + it('should remove protocol and www from URL', () => { + const url = 'https://www.example.com'; + const expected = 'example.com'; + expect(normalizeUrl(url)).toBe(expected); + }); + + it('should remove only protocol if www is not present', () => { + const url = 'https://example.com'; + const expected = 'example.com'; + expect(normalizeUrl(url)).toBe(expected); + }); + + it('should handle URLs without protocol', () => { + const url = 'www.example.com'; + const expected = 'example.com'; + expect(normalizeUrl(url)).toBe(expected); + }); + + it('should handle URLs without protocol and www', () => { + const url = 'example.com'; + const expected = 'example.com'; + expect(normalizeUrl(url)).toBe(expected); + }); + + it('should handle URLs with paths', () => { + const url = 'https://www.example.com/path/to/resource'; + const expected = 'example.com'; + expect(normalizeUrl(url)).toBe(expected); + }); + + it('should handle invalid URLs gracefully', () => { + const url = 'not a valid url'; + const expected = 'not a valid url'; + expect(normalizeUrl(url)).toBe(expected); + }); +}); diff --git a/apps/api/src/lib/canonical-url.ts b/apps/api/src/lib/canonical-url.ts index cbb33f8b..fedea09d 100644 --- a/apps/api/src/lib/canonical-url.ts +++ b/apps/api/src/lib/canonical-url.ts @@ -1,7 +1,8 @@ export function normalizeUrl(url: string) { - url = url.replace(/^https?:\/\//, "").replace(/^www\./, ""); - if (url.endsWith("/")) { - url = url.slice(0, -1); + try { + const hostname = new URL(url).hostname; + return hostname.replace(/^www\./, ""); + } catch (error) { + return url.replace(/^https?:\/\//, "").replace(/^www\./, "").split('/')[0]; } - return url; } \ No newline at end of file