export const protocolIncluded = (url: string) => { // if :// not in the start of the url assume http (maybe https?) // regex checks if :// appears before any . return /^([^.:]+:\/\/)/.test(url); }; const getURLobj = (s: string) => { // URL fails if we dont include the protocol ie google.com let error = false; let urlObj = {}; try { urlObj = new URL(s); } catch (err) { error = true; } return { error, urlObj }; }; export const checkAndUpdateURL = (url: string) => { if (!protocolIncluded(url)) { url = `http://${url}`; } const { error, urlObj } = getURLobj(url); if (error) { throw new Error("Invalid URL"); } const typedUrlObj = urlObj as URL; if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") { throw new Error("Invalid URL"); } return { urlObj: typedUrlObj, url: url }; }; export const checkUrl = (url: string) => { const { error, urlObj } = getURLobj(url); if (error) { throw new Error("Invalid URL"); } const typedUrlObj = urlObj as URL; if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") { throw new Error("Invalid URL"); } if ((url.split(".")[0].match(/:/g) || []).length !== 1) { throw new Error("Invalid URL. Invalid protocol."); // for this one: http://http://example.com } return url; }; /** * Same domain check * It checks if the domain of the url is the same as the base url * It accounts true for subdomains and www.subdomains * @param url * @param baseUrl * @returns */ export function isSameDomain(url: string, baseUrl: string) { const { urlObj: urlObj1, error: error1 } = getURLobj(url); const { urlObj: urlObj2, error: error2 } = getURLobj(baseUrl); if (error1 || error2) { return false; } const typedUrlObj1 = urlObj1 as URL; const typedUrlObj2 = urlObj2 as URL; const cleanHostname = (hostname: string) => { return hostname.startsWith("www.") ? hostname.slice(4) : hostname; }; const domain1 = cleanHostname(typedUrlObj1.hostname) .split(".") .slice(-2) .join("."); const domain2 = cleanHostname(typedUrlObj2.hostname) .split(".") .slice(-2) .join("."); return domain1 === domain2; } export function isSameSubdomain(url: string, baseUrl: string) { const { urlObj: urlObj1, error: error1 } = getURLobj(url); const { urlObj: urlObj2, error: error2 } = getURLobj(baseUrl); if (error1 || error2) { return false; } const typedUrlObj1 = urlObj1 as URL; const typedUrlObj2 = urlObj2 as URL; const cleanHostname = (hostname: string) => { return hostname.startsWith("www.") ? hostname.slice(4) : hostname; }; const domain1 = cleanHostname(typedUrlObj1.hostname) .split(".") .slice(-2) .join("."); const domain2 = cleanHostname(typedUrlObj2.hostname) .split(".") .slice(-2) .join("."); const subdomain1 = cleanHostname(typedUrlObj1.hostname) .split(".") .slice(0, -2) .join("."); const subdomain2 = cleanHostname(typedUrlObj2.hostname) .split(".") .slice(0, -2) .join("."); // Check if the domains are the same and the subdomains are the same return domain1 === domain2 && subdomain1 === subdomain2; } export const checkAndUpdateURLForMap = (url: string) => { if (!protocolIncluded(url)) { url = `http://${url}`; } // remove last slash if present if (url.endsWith("/")) { url = url.slice(0, -1); } const { error, urlObj } = getURLobj(url); if (error) { throw new Error("Invalid URL"); } const typedUrlObj = urlObj as URL; if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") { throw new Error("Invalid URL"); } // remove any query params // url = url.split("?")[0].trim(); return { urlObj: typedUrlObj, url: url }; }; export function removeDuplicateUrls(urls: string[]): string[] { const urlMap = new Map(); for (const url of urls) { const parsedUrl = new URL(url); const protocol = parsedUrl.protocol; const hostname = parsedUrl.hostname.replace(/^www\./, ""); const path = parsedUrl.pathname + parsedUrl.search + parsedUrl.hash; const key = `${hostname}${path}`; if (!urlMap.has(key)) { urlMap.set(key, url); } else { const existingUrl = new URL(urlMap.get(key)!); const existingProtocol = existingUrl.protocol; if (protocol === "https:" && existingProtocol === "http:") { urlMap.set(key, url); } else if ( protocol === existingProtocol && !parsedUrl.hostname.startsWith("www.") && existingUrl.hostname.startsWith("www.") ) { urlMap.set(key, url); } } } return [...new Set(Array.from(urlMap.values()))]; }