Tune mfmToHtml (#7841)

* Tune mfmToHtml

* typo

* add
This commit is contained in:
MeiMei 2021-09-26 01:57:38 +09:00 committed by GitHub
parent 42092812ea
commit 3d99b1f7b9
2 changed files with 136 additions and 12 deletions

View file

@ -5,7 +5,9 @@ import { URL } from 'url';
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
export function fromHtml(html: string, hashtagNames?: string[]): string { export function fromHtml(html: string, hashtagNames?: string[]): string | null {
if (html == null) return null;
const dom = parse5.parseFragment(html); const dom = parse5.parseFragment(html);
let text = ''; let text = '';
@ -19,6 +21,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
function getText(node: parse5.Node): string { function getText(node: parse5.Node): string {
if (treeAdapter.isTextNode(node)) return node.value; if (treeAdapter.isTextNode(node)) return node.value;
if (!treeAdapter.isElementNode(node)) return ''; if (!treeAdapter.isElementNode(node)) return '';
if (node.nodeName === 'br') return '\n';
if (node.childNodes) { if (node.childNodes) {
return node.childNodes.map(n => getText(n)).join(''); return node.childNodes.map(n => getText(n)).join('');
@ -27,6 +30,14 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
return ''; return '';
} }
function appendChildren(childNodes: parse5.ChildNode[]): void {
if (childNodes) {
for (const n of childNodes) {
analyze(n);
}
}
}
function analyze(node: parse5.Node) { function analyze(node: parse5.Node) {
if (treeAdapter.isTextNode(node)) { if (treeAdapter.isTextNode(node)) {
text += node.value; text += node.value;
@ -42,6 +53,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
break; break;
case 'a': case 'a':
{
const txt = getText(node); const txt = getText(node);
const rel = node.attrs.find(x => x.name === 'rel'); const rel = node.attrs.find(x => x.name === 'rel');
const href = node.attrs.find(x => x.name === 'href'); const href = node.attrs.find(x => x.name === 'href');
@ -87,23 +99,111 @@ export function fromHtml(html: string, hashtagNames?: string[]): string {
text += generateLink(); text += generateLink();
} }
break; break;
}
case 'h1':
{
text += '【';
appendChildren(node.childNodes);
text += '】\n';
break;
}
case 'b':
case 'strong':
{
text += '**';
appendChildren(node.childNodes);
text += '**';
break;
}
case 'small':
{
text += '<small>';
appendChildren(node.childNodes);
text += '</small>';
break;
}
case 's':
case 'del':
{
text += '~~';
appendChildren(node.childNodes);
text += '~~';
break;
}
case 'i':
case 'em':
{
text += '<i>';
appendChildren(node.childNodes);
text += '</i>';
break;
}
// block code (<pre><code>)
case 'pre': {
if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
text += '```\n';
text += getText(node.childNodes[0]);
text += '\n```\n';
} else {
appendChildren(node.childNodes);
}
break;
}
// inline code (<code>)
case 'code': {
text += '`';
appendChildren(node.childNodes);
text += '`';
break;
}
case 'blockquote': {
const t = getText(node);
if (t) {
text += '> ';
text += t.split('\n').join(`\n> `);
}
break;
}
case 'p': case 'p':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
{
text += '\n\n'; text += '\n\n';
if (node.childNodes) { appendChildren(node.childNodes);
for (const n of node.childNodes) {
analyze(n);
}
}
break; break;
}
default: // other block elements
if (node.childNodes) { case 'div':
for (const n of node.childNodes) { case 'header':
analyze(n); case 'footer':
} case 'article':
} case 'li':
case 'dt':
case 'dd':
{
text += '\n';
appendChildren(node.childNodes);
break; break;
}
default: // includes inline elements
{
appendChildren(node.childNodes);
break;
}
} }
} }
} }

View file

@ -19,6 +19,30 @@ describe('toHtml', () => {
}); });
describe('fromHtml', () => { describe('fromHtml', () => {
it('p', () => {
assert.deepStrictEqual(fromHtml('<p>a</p><p>b</p>'), 'a\n\nb');
});
it('block element', () => {
assert.deepStrictEqual(fromHtml('<div>a</div><div>b</div>'), 'a\nb');
});
it('inline element', () => {
assert.deepStrictEqual(fromHtml('<ul><li>a</li><li>b</li></ul>'), 'a\nb');
});
it('block code', () => {
assert.deepStrictEqual(fromHtml('<pre><code>a\nb</code></pre>'), '```\na\nb\n```');
});
it('inline code', () => {
assert.deepStrictEqual(fromHtml('<code>a</code>'), '`a`');
});
it('quote', () => {
assert.deepStrictEqual(fromHtml('<blockquote>a\nb</blockquote>'), '> a\n> b');
});
it('br', () => { it('br', () => {
assert.deepStrictEqual(fromHtml('<p>abc<br><br/>d</p>'), 'abc\n\nd'); assert.deepStrictEqual(fromHtml('<p>abc<br><br/>d</p>'), 'abc\n\nd');
}); });