mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-04-19 12:39:42 +08:00
fix(scraper): improve charset detection regex to accurately parse meta tags (#1265)
This commit is contained in:
parent
bf1a79588e
commit
7bf04d409a
@ -74,7 +74,7 @@ export async function scrapeURLWithFetch(
|
|||||||
|
|
||||||
const buf = Buffer.from(await x.arrayBuffer());
|
const buf = Buffer.from(await x.arrayBuffer());
|
||||||
let text = buf.toString("utf8");
|
let text = buf.toString("utf8");
|
||||||
const charset = (text.match(/charset=["']?(.+?)["']?>/) ?? [])[1]
|
const charset = (text.match(/<meta\b[^>]*charset\s*=\s*["']?([^"'\s\/>]+)/i) ?? [])[1]
|
||||||
try {
|
try {
|
||||||
if (charset) {
|
if (charset) {
|
||||||
text = new TextDecoder(charset.trim()).decode(buf);
|
text = new TextDecoder(charset.trim()).decode(buf);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user