mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-04-18 12:09:42 +08:00
fix(scraper): improve charset detection regex to accurately parse meta tags (#1265)
This commit is contained in:
parent
bf1a79588e
commit
7bf04d409a
@ -74,7 +74,7 @@ export async function scrapeURLWithFetch(
|
||||
|
||||
const buf = Buffer.from(await x.arrayBuffer());
|
||||
let text = buf.toString("utf8");
|
||||
const charset = (text.match(/charset=["']?(.+?)["']?>/) ?? [])[1]
|
||||
const charset = (text.match(/<meta\b[^>]*charset\s*=\s*["']?([^"'\s\/>]+)/i) ?? [])[1]
|
||||
try {
|
||||
if (charset) {
|
||||
text = new TextDecoder(charset.trim()).decode(buf);
|
||||
|
Loading…
x
Reference in New Issue
Block a user