From 8d22fe9d97fd5dda2be5fa8ac4bdfc629202b262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adem=C3=ADlson=20Tonato?= Date: Tue, 3 Jun 2025 17:09:17 +0100 Subject: [PATCH] feat: enhance metadata extraction by including 'itemprop' attribute in HTML --- apps/api/sharedLibs/html-transformer/src/lib.rs | 2 +- apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/sharedLibs/html-transformer/src/lib.rs b/apps/api/sharedLibs/html-transformer/src/lib.rs index 1e4f8302..4018746b 100644 --- a/apps/api/sharedLibs/html-transformer/src/lib.rs +++ b/apps/api/sharedLibs/html-transformer/src/lib.rs @@ -124,7 +124,7 @@ pub unsafe extern "C" fn extract_metadata(html: *const libc::c_char) -> *mut lib let meta = meta.as_node().as_element().unwrap(); let attrs = meta.attributes.borrow(); - if let Some(name) = attrs.get("name").or_else(|| attrs.get("property")) { + if let Some(name) = attrs.get("name").or_else(|| attrs.get("property")).or_else(|| attrs.get("itemprop")) { if let Some(content) = attrs.get("content") { if let Some(v) = out.get(name) { match v { diff --git a/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts b/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts index 61d5ab04..20a6dd7b 100644 --- a/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts +++ b/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts @@ -133,7 +133,7 @@ export async function extractMetadata( // Extract all meta tags for custom metadata soup("meta").each((i, elem) => { try { - const name = soup(elem).attr("name") || soup(elem).attr("property"); + const name = soup(elem).attr("name") || soup(elem).attr("property") || soup(elem).attr("itemprop"); const content = soup(elem).attr("content"); if (name && content) {