mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-07-30 08:12:00 +08:00
fix: meta charset hint
This commit is contained in:
parent
5f83d862dd
commit
75a4dbdd79
@ -827,6 +827,10 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||
throw new AssertionFailureError(`Failed to access ${url}: file too large`);
|
||||
}
|
||||
snapshot.html = await readFile(await file.filePath, encoding);
|
||||
const innerCharset = snapshot.html.slice(0, 1024).match(/<meta[^>]+text\/html;\s*?charset=([^>"]+)\"/i)?.[1]?.toLowerCase();
|
||||
if (innerCharset && innerCharset !== encoding) {
|
||||
snapshot.html = await readFile(await file.filePath, innerCharset);
|
||||
}
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user