fix: detect poorly transformed contents

This commit is contained in:
yanlong.wang 2024-10-28 14:52:13 +08:00
parent a8793114bb
commit 9242bb393a
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 51 additions and 3 deletions

View File

@ -295,9 +295,10 @@ export class SnapshotFormatter extends AsyncService {
}
if (
!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))
this.isPoorlyTransformed(contentText, toBeTurnedToMd)
&& toBeTurnedToMd !== jsDomElementOfHTML
) {
toBeTurnedToMd = jsDomElementOfHTML;
try {
contentText = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
} catch (err) {
@ -310,7 +311,7 @@ export class SnapshotFormatter extends AsyncService {
}
}
}
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
if (this.isPoorlyTransformed(contentText, toBeTurnedToMd)) {
contentText = snapshot.text;
}
} while (false);
@ -551,11 +552,58 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
return delimiter + extraSpace + content + (delimiter === '```' && !content.endsWith(extraSpace) ? extraSpace : '') + delimiter;
}
});
turnDownService.addRule('flattened-tables', {
filter: (node) => {
if (node.tagName !== 'TABLE') {
return false;
}
let parentHasTable = false;
let ptr = node.parentElement;
while (ptr) {
if (ptr.tagName === 'TABLE') {
parentHasTable = true;
break;
}
ptr = ptr.parentElement;
}
return parentHasTable;
},
replacement: (innerText) => {
return innerText.trim();
}
});
return turnDownService;
}
isPoorlyTransformed(content?: string, node?: Element) {
if (!content) {
return true;
}
if (content.startsWith('<') && content.endsWith('>')) {
return true;
}
if (content.includes('<table') && content.includes('</table>')) {
const tableElms = node?.querySelectorAll('table') || [];
const deepTableElms = node?.querySelectorAll('table table');
if ((deepTableElms?.length || 0) / tableElms.length > 0.6) {
return true;
}
const tbodyElms = node?.querySelectorAll('tbody') || [];
const deepTbodyElms = node?.querySelectorAll('tbody tbody');
if ((deepTbodyElms?.length || 0) / tbodyElms.length > 0.6) {
return true;
}
}
return false;
}
}
const snapshotFormatter = container.resolve(SnapshotFormatter);

@ -1 +1 @@
Subproject commit 2b78c2af94ead181a1f9d864531132e30405fa7e
Subproject commit 7bdc246a49a3a30f785a98fef46569131505b99a