fix(scraper): improve charset detection regex to accurately parse meta tags (#1265)

This commit is contained in:
Grass Huang 2025-02-27 00:31:06 +08:00 committed by GitHub
parent bf1a79588e
commit 7bf04d409a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -74,7 +74,7 @@ export async function scrapeURLWithFetch(
const buf = Buffer.from(await x.arrayBuffer());
let text = buf.toString("utf8");
const charset = (text.match(/charset=["']?(.+?)["']?>/) ?? [])[1]
const charset = (text.match(/<meta\b[^>]*charset\s*=\s*["']?([^"'\s\/>]+)/i) ?? [])[1]
try {
if (charset) {
text = new TextDecoder(charset.trim()).decode(buf);