Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions packages/markdown-html/src/HtmlTransformer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

const fs = require('fs');
const CiceroMarkTransformer = require('@accordproject/markdown-cicero').CiceroMarkTransformer;
const { CommonMarkModel } = require('@accordproject/markdown-common');
const HtmlTransformer = require('./HtmlTransformer');

let htmlTransformer = null;
Expand Down Expand Up @@ -78,6 +79,139 @@ describe('markdown <-> html', () => {
});
});

describe('html table deserialization', () => {
const commonmarkNamespace = CommonMarkModel.NAMESPACE;

it('deserializes a table caption before the table', () => {
const ciceroMarkDom = htmlTransformer.toCiceroMark(`
<table>
<caption>Employee Details</caption>
<thead>
<tr><th>Name</th><th>Role</th></tr>
</thead>
<tbody>
<tr><td>Ada</td><td>Engineer</td></tr>
</tbody>
</table>
`);

expect(ciceroMarkDom.nodes).toHaveLength(2);
expect(ciceroMarkDom.nodes[0]).toEqual({
$class: `${commonmarkNamespace}.Paragraph`,
nodes: [
{
$class: `${commonmarkNamespace}.Strong`,
nodes: [
{
$class: `${commonmarkNamespace}.Text`,
text: 'Employee Details'
}
]
}
]
});
expect(ciceroMarkDom.nodes[1].$class).toBe(`${commonmarkNamespace}.Table`);
});

it('flattens block content in a caption to inline-only Strong children', () => {
const ciceroMarkDom = htmlTransformer.toCiceroMark(`
<table>
<caption><p>See <em>also</em></p></caption>
<thead>
<tr><th>Name</th></tr>
</thead>
<tbody>
<tr><td>Ada</td></tr>
</tbody>
</table>
`);

expect(ciceroMarkDom.nodes).toHaveLength(2);
const strong = ciceroMarkDom.nodes[0].nodes[0];
expect(strong.$class).toBe(`${commonmarkNamespace}.Strong`);
// every child of Strong must be inline (no Paragraph leaked in)
strong.nodes.forEach(child => {
expect(child.$class).not.toBe(`${commonmarkNamespace}.Paragraph`);
});
expect(strong.nodes).toEqual([
{ $class: `${commonmarkNamespace}.Text`, text: 'See ' },
{
$class: `${commonmarkNamespace}.Emph`,
nodes: [{ $class: `${commonmarkNamespace}.Text`, text: 'also' }]
}
]);
expect(ciceroMarkDom.nodes[1].$class).toBe(`${commonmarkNamespace}.Table`);
});

it('does not leak a caption node when it appears outside a table', () => {
const ciceroMarkDom = htmlTransformer.toCiceroMark('<caption>Orphan caption</caption>');

// the old caption rule produced a node with no $class; ensure none leak
ciceroMarkDom.nodes.forEach(n => expect(typeof n.$class).toBe('string'));
expect(ciceroMarkDom.nodes.some(n => n.type === 'caption')).toBe(false);
});

it('normalizes whitespace in table cells', () => {
const ciceroMarkDom = htmlTransformer.toCiceroMark(`
<table>
<tbody>
<tr>
<td>
First
Second
</td>
</tr>
</tbody>
</table>
`);

const cellNodes = ciceroMarkDom.nodes[0].nodes[0].nodes[0].nodes[0].nodes;
expect(cellNodes).toEqual([
{
$class: `${commonmarkNamespace}.Text`,
text: 'First Second'
}
]);
});

it('promotes tbody rows with header cells to table head when thead is missing', () => {
const ciceroMarkDom = htmlTransformer.toCiceroMark(`
<table>
<tbody>
<tr><th>Name</th><th>Role</th></tr>
<tr><td>Ada</td><td>Engineer</td></tr>
</tbody>
</table>
`);

const table = ciceroMarkDom.nodes[0];
expect(table.nodes).toHaveLength(2);
expect(table.nodes[0].$class).toBe(`${commonmarkNamespace}.TableHead`);
expect(table.nodes[0].nodes[0].nodes.map(cell => cell.$class)).toEqual([
`${commonmarkNamespace}.HeaderCell`,
`${commonmarkNamespace}.HeaderCell`
]);
expect(table.nodes[1].$class).toBe(`${commonmarkNamespace}.TableBody`);
expect(table.nodes[1].nodes).toHaveLength(1);
});

it('keeps tables without captions as a single table node', () => {
const ciceroMarkDom = htmlTransformer.toCiceroMark(`
<table>
<thead>
<tr><th>Name</th><th>Role</th></tr>
</thead>
<tbody>
<tr><td>Ada</td><td>Engineer</td></tr>
</tbody>
</table>
`);

expect(ciceroMarkDom.nodes).toHaveLength(1);
expect(ciceroMarkDom.nodes[0].$class).toBe(`${commonmarkNamespace}.Table`);
});
});

/**
* Get the name and contents of all ciceromark test files
* @returns {*} an array of name/contents tuples
Expand Down
163 changes: 155 additions & 8 deletions packages/markdown-html/src/rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -498,42 +498,189 @@ const HTML_BLOCK_RULE = {
}
};

// CommonMark inline node classes. A <caption> may contain flow content
// (paragraphs, lists, etc.), but a Strong node may only contain inline
// children, so caption content is flattened to these classes before it is
// wrapped in Strong.
const INLINE_CLASSES = [
'Text', 'Emph', 'Strong', 'Code', 'Link', 'Image',
'Softbreak', 'Linebreak', 'HtmlInline'
].map(name => `${CommonMarkModel.NAMESPACE}.${name}`);

/**
* Flatten caption content to inline-only nodes so it can be safely wrapped in
* a Strong node. Block-level wrappers (e.g. Paragraph) are unwrapped to their
* inline children; nodes with no inline content are dropped.
* @param {Array} nodes - the list of nodes
* @returns {Array} - the inline-only list of nodes
*/
function toInlineNodes(nodes) {
if (!nodes) {
return [];
}
const list = Array.isArray(nodes) ? nodes : [nodes];
return list.reduce((acc, node) => {
if (!node) {
return acc;
}
if (INLINE_CLASSES.includes(node.$class)) {
acc.push(node);
} else if (node.nodes) {
acc.push(...toInlineNodes(node.nodes));
}
return acc;
}, []);
}

/**
* Clean table cell nodes by removing Softbreaks and normalizing whitespace.
* @param {Array} nodes - the list of nodes
* @returns {Array} - the cleaned list of nodes
*/
function cleanTableNodes(nodes) {
const NS = CommonMarkModel.NAMESPACE;
const TEXT = `${NS}.Text`;
const SOFT = `${NS}.Softbreak`;

if (!nodes) {
return [];
}
nodes = Array.isArray(nodes) ? nodes : [nodes];

const merged = nodes.reduce((acc, node) => {
if (!node) {
return acc;
}

let newNode = { ...node };
if (newNode.nodes) {
newNode = { ...newNode, nodes: cleanTableNodes(newNode.nodes) };
}

if (newNode.$class === SOFT) {
newNode = { $class: TEXT, text: ' ' };
}

const last = acc[acc.length - 1];
if (last && last.$class === TEXT && newNode.$class === TEXT) {
last.text += newNode.text;
} else {
acc.push(newNode);
}

return acc;
}, []);

// Normalize whitespace inside Text nodes
merged.forEach(n => {
if (n.$class === TEXT) {
n.text = n.text.replace(/\s+/g, ' ');
}
});

if (merged.length > 0 && merged[0].$class === TEXT) {
merged[0].text = merged[0].text.replace(/^\s+/, '');
}
if (merged.length > 0 && merged[merged.length - 1].$class === TEXT) {
merged[merged.length - 1].text = merged[merged.length - 1].text.replace(/\s+$/, '');
}

return merged.filter(n => n.$class !== TEXT || n.text.length > 0);
}
Comment thread
mttrbrts marked this conversation as resolved.


const TABLE_RULE = {
deserialize(el, next, ignoreSpace) {
if (el.tagName && el.tagName.toLowerCase() === 'table') {
return {
const children = next(el.childNodes, ignoreSpace);
let tableNodes = children.filter(node =>
node.$class === `${CommonMarkModel.NAMESPACE}.TableHead` ||
node.$class === `${CommonMarkModel.NAMESPACE}.TableBody`
);

let head = tableNodes.find(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableHead`);
const body = tableNodes.find(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableBody`);

if (!head && body && body.nodes && body.nodes.length > 0) {
const firstRow = body.nodes[0];
const hasHeaderCells = firstRow.nodes && firstRow.nodes.some(n => n.$class === `${CommonMarkModel.NAMESPACE}.HeaderCell`);

if (hasHeaderCells) {
head = {
$class: `${CommonMarkModel.NAMESPACE}.TableHead`,
nodes: [firstRow]
};
const newBody = {
$class: `${CommonMarkModel.NAMESPACE}.TableBody`,
nodes: body.nodes.slice(1)
};
tableNodes = [head, newBody];
}
}

const table = {
$class: `${CommonMarkModel.NAMESPACE}.Table`,
nodes: next(el.childNodes),
nodes: tableNodes,
};

// A <caption> is handled here (rather than via its own rule) so the
// caption node never leaks into the output when it appears outside
// of a table. Its content is flattened to inline-only nodes so the
// Strong wrapper stays valid CommonMark, and the bolded caption is
// emitted as its own Paragraph block before the table - block-level
// spacing is left to the Markdown serializer.
const captionElement = Array.from(el.childNodes).find(
child => child.tagName && child.tagName.toLowerCase() === 'caption'
);
if (captionElement) {
const captionNodes = cleanTableNodes(toInlineNodes(next(captionElement.childNodes, ignoreSpace)));
if (captionNodes.length > 0) {
const captionParagraph = {
$class: `${CommonMarkModel.NAMESPACE}.Paragraph`,
nodes: [
{
$class: `${CommonMarkModel.NAMESPACE}.Strong`,
nodes: captionNodes
}
]
};
return [captionParagraph, table];
}
}

return table;
Comment thread
mttrbrts marked this conversation as resolved.
}
if (el.tagName && el.tagName.toLowerCase() === 'thead') {
const nodes = next(el.childNodes);
return {
$class: `${CommonMarkModel.NAMESPACE}.TableHead`,
nodes: next(el.childNodes),
nodes: nodes.filter(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableRow`),
};
}
if (el.tagName && el.tagName.toLowerCase() === 'tbody') {
const nodes = next(el.childNodes);
return {
$class: `${CommonMarkModel.NAMESPACE}.TableBody`,
nodes: next(el.childNodes),
nodes: nodes.filter(n => n.$class === `${CommonMarkModel.NAMESPACE}.TableRow`),
};
}
if (el.tagName && el.tagName.toLowerCase() === 'tr') {
const nodes = next(el.childNodes);
return {
$class: `${CommonMarkModel.NAMESPACE}.TableRow`,
nodes: next(el.childNodes),
nodes: nodes.filter(n => n.$class === `${CommonMarkModel.NAMESPACE}.HeaderCell` || n.$class === `${CommonMarkModel.NAMESPACE}.TableCell`),
};
}
if (el.tagName && el.tagName.toLowerCase() === 'th') {
return {
$class: `${CommonMarkModel.NAMESPACE}.HeaderCell`,
nodes: next(el.childNodes),
nodes: cleanTableNodes(next(el.childNodes)),
};
}
if (el.tagName && el.tagName.toLowerCase() === 'td') {
return {
$class: `${CommonMarkModel.NAMESPACE}.TableCell`,
nodes: next(el.childNodes),
nodes: cleanTableNodes(next(el.childNodes)),
};
}
},
Expand Down Expand Up @@ -564,4 +711,4 @@ const rules = [
];


module.exports = rules;
module.exports = rules;
Loading