From a4cf814f70ab56b986645372647980e89252f60b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adem=C3=ADlson=20F=2E=20Tonato?= Date: Fri, 27 Dec 2024 19:18:53 +0000 Subject: [PATCH] feat: return favicon url when scraping --- apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts b/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts index 66cf30cc..1f494893 100644 --- a/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts +++ b/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts @@ -8,6 +8,7 @@ export function extractMetadata( ): Partial { let title: string | undefined = undefined; let description: string | undefined = undefined; + let favicon: string | undefined = undefined; let language: string | undefined = undefined; let keywords: string | undefined = undefined; let robots: string | undefined = undefined; @@ -42,6 +43,12 @@ export function extractMetadata( try { title = soup("title").first().text().trim() || undefined; description = soup('meta[name="description"]').attr("content") || undefined; + + const faviconLink = soup('link[rel="icon"]').attr("href") || soup('link[rel*="icon"]').first().attr("href") || undefined; + if (faviconLink) { + const baseUrl = new URL(meta.url).origin; + favicon = faviconLink.startsWith('http') ? faviconLink : `${baseUrl}${faviconLink}`; + } // Assuming the language is part of the URL as per the regex pattern language = soup("html").attr("lang") || undefined; @@ -121,6 +128,7 @@ export function extractMetadata( return { title, description, + favicon, language, keywords, robots,