diff --git a/backend/functions/src/services/html-to-md.ts b/backend/functions/src/services/html-to-md.ts
index 208f9c3..e9e723d 100644
--- a/backend/functions/src/services/html-to-md.ts
+++ b/backend/functions/src/services/html-to-md.ts
@@ -368,7 +368,7 @@ export class MDBlockQuote extends MarkdownASTParentNode {
export const flowContents = [MDBlockQuote, MDCode, MDHeading, MDHTML, MDList, MDThematicBreak, MDParagraph, MDMath, MDTable];
export const phrasingContent = [MDLineBreak, MDEmphasis, MDStrong, MDHTML, MDImage, MDInlineCode, MDInlineMath, MDLink, MDLiteral, MDDelete];
-export const childrenAllowedNodes = new Map([
+export const childrenAllowedNodes = new Map([
[MDBlockQuote, flowContents],
[MDHeading, phrasingContent],
[MDList, [MDListItem]],
@@ -377,7 +377,6 @@ export const childrenAllowedNodes = new Map([
[MDTable, [MDTableHeader, MDTableRow]],
[MDTableHeader, [MDTableHeading]],
[MDTableRow, [MDTableCell]],
-
]);
export class HTMLToMarkdownJob {
@@ -389,10 +388,213 @@ export class HTMLToMarkdownJob {
metadata: Record = {};
constructor(public dom: Document) {
+
}
restFlow() {
this.ptr = this.root;
+ this.stack.length = 0;
+ this.stack.push(this.root);
+ }
+
+ checkIfAllowedToHaveChild(cls: typeof MarkdownASTNode) {
+ const ptrCls = this.ptr.constructor;
+ const allowedClasses = childrenAllowedNodes.get(ptrCls as typeof MarkdownASTNode);
+ if (allowedClasses?.includes(cls)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ seekToInsert(cls: typeof MarkdownASTNode) {
+ while (true) {
+ if (this.checkIfAllowedToHaveChild(cls)) {
+ return;
+ }
+
+ if (this.stack.length >= 2) {
+ this.stack.pop()!;
+ this.ptr = this.stack[this.stack.length - 1];
+
+ continue;
+ }
+
+ break;
+ }
+
+ this.restFlow();
+ }
+
+ newBlockquote() {
+ const node = new MDBlockQuote();
+ this.restFlow();
+ (this.ptr as MarkdownASTRoot).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newHeading(n: 1 | 2 | 3 | 4 | 5 | 6) {
+ const node = new MDHeading();
+ node.level = n;
+ this.restFlow();
+ (this.ptr as MarkdownASTRoot).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newParagraph() {
+ const node = new MDParagraph();
+ this.restFlow();
+ (this.ptr as MarkdownASTRoot).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newList(ordered: boolean = false) {
+ this.seekToInsert(MDList);
+ const node = new MDList();
+ node.ordered = ordered;
+ (this.ptr as MarkdownASTParentNode).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newListItem(ordered: boolean = false) {
+ this.seekToInsert(MDListItem);
+ if (this.ptr === this.root) {
+ this.newList(ordered);
+ }
+ const node = new MDListItem();
+ (this.ptr as MarkdownASTParentNode).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newTable() {
+ this.seekToInsert(MDTable);
+ const node = new MDTable();
+ (this.ptr as MarkdownASTParentNode).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newTableHeader() {
+ this.seekToInsert(MDTableHeader);
+ if (this.ptr === this.root) {
+ this.newTable();
+ }
+ const node = new MDTableHeader();
+ (this.ptr as MarkdownASTParentNode).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newTableRow() {
+ this.seekToInsert(MDTableCell);
+ if (this.ptr === this.root) {
+ this.newTable();
+ }
+ const node = new MDTableCell();
+ (this.ptr as MarkdownASTParentNode).children.push(node);
+ this.stack.push(node);
+ this.ptr = node;
+
+ return node;
+ }
+
+ newCode(inline?: boolean) {
+ const node = inline ? new MDInlineCode : new MDCode();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newHTML() {
+ const node = new MDHTML();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newMath(inline?: boolean) {
+ const node = inline ? new MDInlineMath : new MDMath();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newLineBreak() {
+ const node = new MDLineBreak();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newEmphasis() {
+ const node = new MDEmphasis();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newString() {
+ const node = new MDLiteral();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newImage() {
+ const node = new MDImage();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newLiteral() {
+ const node = new MDLiteral();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newDelete() {
+ const node = new MDDelete();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
+ }
+
+ newLink() {
+ const node = new MDLink();
+ this.seekToInsert(node.constructor as typeof MarkdownASTNode);
+ (this.ptr as MarkdownASTRoot).children.push(node);
+
+ return node;
}
walk() {
@@ -411,6 +613,8 @@ export class HTMLToMarkdownJob {
}
);
+ tw.nextSibling();
+ tw.firstChild();
}