mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-20 05:39:05 +08:00
feat(html): always pick largest image from srcset
This commit is contained in:
parent
655753cd27
commit
e824303d87
@ -114,6 +114,30 @@ export const removeUnwantedElements = (
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// always return biggest image
|
||||||
|
soup("img[srcset]").each((_, el) => {
|
||||||
|
const sizes = el.attribs.srcset.split(",").map(x => {
|
||||||
|
const tok = x.trim().split(" ");
|
||||||
|
return {
|
||||||
|
url: tok[0],
|
||||||
|
size: parseInt((tok[1] ?? "1x").slice(0, -1), 10),
|
||||||
|
isX: (tok[1] ?? "").endsWith("x")
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
if (sizes.every(x => x.isX) && el.attribs.src) {
|
||||||
|
sizes.push({
|
||||||
|
url: el.attribs.src,
|
||||||
|
size: 1,
|
||||||
|
isX: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
sizes.sort((a,b) => b.size - a.size);
|
||||||
|
|
||||||
|
el.attribs.src = sizes[0]?.url;
|
||||||
|
});
|
||||||
|
|
||||||
const cleanedHtml = soup.html();
|
const cleanedHtml = soup.html();
|
||||||
return cleanedHtml;
|
return cleanedHtml;
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user