mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 01:15:52 +08:00
feat: enhance metadata extraction by including 'itemprop' attribute in HTML
This commit is contained in:
parent
e108ff3525
commit
8d22fe9d97
@ -124,7 +124,7 @@ pub unsafe extern "C" fn extract_metadata(html: *const libc::c_char) -> *mut lib
|
||||
let meta = meta.as_node().as_element().unwrap();
|
||||
let attrs = meta.attributes.borrow();
|
||||
|
||||
if let Some(name) = attrs.get("name").or_else(|| attrs.get("property")) {
|
||||
if let Some(name) = attrs.get("name").or_else(|| attrs.get("property")).or_else(|| attrs.get("itemprop")) {
|
||||
if let Some(content) = attrs.get("content") {
|
||||
if let Some(v) = out.get(name) {
|
||||
match v {
|
||||
|
@ -133,7 +133,7 @@ export async function extractMetadata(
|
||||
// Extract all meta tags for custom metadata
|
||||
soup("meta").each((i, elem) => {
|
||||
try {
|
||||
const name = soup(elem).attr("name") || soup(elem).attr("property");
|
||||
const name = soup(elem).attr("name") || soup(elem).attr("property") || soup(elem).attr("itemprop");
|
||||
const content = soup(elem).attr("content");
|
||||
|
||||
if (name && content) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user