mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-07-31 09:02:04 +08:00
fix: meta charset hint
This commit is contained in:
parent
5f83d862dd
commit
75a4dbdd79
@ -827,6 +827,10 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
throw new AssertionFailureError(`Failed to access ${url}: file too large`);
|
throw new AssertionFailureError(`Failed to access ${url}: file too large`);
|
||||||
}
|
}
|
||||||
snapshot.html = await readFile(await file.filePath, encoding);
|
snapshot.html = await readFile(await file.filePath, encoding);
|
||||||
|
const innerCharset = snapshot.html.slice(0, 1024).match(/<meta[^>]+text\/html;\s*?charset=([^>"]+)\"/i)?.[1]?.toLowerCase();
|
||||||
|
if (innerCharset && innerCharset !== encoding) {
|
||||||
|
snapshot.html = await readFile(await file.filePath, innerCharset);
|
||||||
|
}
|
||||||
|
|
||||||
return snapshot;
|
return snapshot;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user