Using markdown_it

This document can be opened to execute with Jupytext!

markdown-it-py may be used as an API via the markdown_it package.

The raw text is first parsed to syntax ‘tokens’, then these are converted to other formats using ‘renderers’.

Quick-Start

The simplest way to understand how text will be parsed is using:

from markdown_it import MarkdownIt
md = MarkdownIt()
md.render("some *text*")
'<p>some <em>text</em></p>\n'
for token in md.parse("some *text*"):
    print(token)
    print()
Token(type='paragraph_open', tag='p', nesting=1, attrs=None, map=[0, 1], level=0, children=None, content='', markup='', info='', meta={}, block=True, hidden=False)

Token(type='inline', tag='', nesting=0, attrs=None, map=[0, 1], level=1, children=[Token(type='text', tag='', nesting=0, attrs=None, map=None, level=0, children=None, content='some ', markup='', info='', meta={}, block=False, hidden=False), Token(type='em_open', tag='em', nesting=1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False), Token(type='text', tag='', nesting=0, attrs=None, map=None, level=1, children=None, content='text', markup='', info='', meta={}, block=False, hidden=False), Token(type='em_close', tag='em', nesting=-1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False)], content='some *text*', markup='', info='', meta={}, block=True, hidden=False)

Token(type='paragraph_close', tag='p', nesting=-1, attrs=None, map=None, level=0, children=None, content='', markup='', info='', meta={}, block=True, hidden=False)

The Parser

The MarkdownIt class is instantiated with parsing configuration options, dictating the syntax rules and additional options for the parser and renderer. You can define this configuration via a preset name ('zero', 'commonmark' or 'default'), or by directly supplying a dictionary.

from markdown_it.presets import zero
zero.make()
{'options': {'maxNesting': 20,
  'html': False,
  'linkify': False,
  'typographer': False,
  'quotes': '“”‘’',
  'xhtmlOut': False,
  'breaks': False,
  'langPrefix': 'language-',
  'highlight': None},
 'components': {'core': {'rules': ['normalize', 'block', 'inline']},
  'block': {'rules': ['paragraph']},
  'inline': {'rules': ['text'], 'rules2': ['balance_pairs', 'text_collapse']}}}
md = MarkdownIt("zero")
md.options
{'maxNesting': 20,
 'html': False,
 'linkify': False,
 'typographer': False,
 'quotes': '“”‘’',
 'xhtmlOut': False,
 'breaks': False,
 'langPrefix': 'language-',
 'highlight': None}
print(md.get_active_rules())
{'core': ['normalize', 'block', 'inline'], 'block': ['paragraph'], 'inline': ['text'], 'inline2': ['balance_pairs', 'text_collapse']}
print(md.get_all_rules())
{'core': ['normalize', 'block', 'inline', 'linkify', 'replacements', 'smartquotes'], 'block': ['table', 'code', 'fence', 'blockquote', 'hr', 'list', 'reference', 'heading', 'lheading', 'html_block', 'paragraph'], 'inline': ['text', 'newline', 'escape', 'backticks', 'strikethrough', 'emphasis', 'link', 'image', 'autolink', 'html_inline', 'entity'], 'inline2': ['balance_pairs', 'strikethrough', 'emphasis', 'text_collapse']}

You can find all the parsing rules in the source code: parser_core.py, parser_block.py, parser_inline.py. Any of the parsing rules can be enabled/disabled, and these methods are chainable:

md.render("- __*emphasise this*__")
'<p>- __*emphasise this*__</p>\n'
md.enable(["list", "emphasis"]).render("- __*emphasise this*__")
'<ul>\n<li><strong><em>emphasise this</em></strong></li>\n</ul>\n'

You can temporarily modify rules with the reset_rules context manager.

with md.reset_rules():
    md.disable("emphasis")
    print(md.render("__*emphasise this*__"))
md.render("__*emphasise this*__")
<p>__*emphasise this*__</p>
'<p><strong><em>emphasise this</em></strong></p>\n'

Additionally renderInline runs the parser with all block syntax rules disabled.

md.renderInline("__*emphasise this*__")
'<strong><em>emphasise this</em></strong>'

Plugins load

Plugins load collections of additional syntax rules and render methods into the parser

from markdown_it import MarkdownIt
from markdown_it.extensions.front_matter import front_matter_plugin
from markdown_it.extensions.footnote import footnote_plugin

md = (
    MarkdownIt()
    .use(front_matter_plugin)
    .use(footnote_plugin)
    .enable('table')
)
text = ("""
---
a: 1
---

a | b
- | -
1 | 2

A footnote [^1]

[^1]: some details
""")
md.render(text)
/home/docs/checkouts/readthedocs.org/user_builds/markdown-it-py/envs/latest/lib/python3.7/site-packages/ipykernel_launcher.py:2: DeprecationWarning: `markdown_it.extensions` is deprecated, import from `mdit_py_plugins` instead
  
/home/docs/checkouts/readthedocs.org/user_builds/markdown-it-py/envs/latest/lib/python3.7/site-packages/ipykernel_launcher.py:3: DeprecationWarning: `markdown_it.extensions` is deprecated, import from `mdit_py_plugins` instead
  This is separate from the ipykernel package so we can avoid doing imports until
'<hr />\n<h2>a: 1</h2>\n<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>1</td>\n<td>2</td>\n</tr>\n</tbody>\n</table>\n<p>A footnote <sup class="footnote-ref"><a href="#fn1" id="fnref1">[1]</a></sup></p>\n<hr class="footnotes-sep" />\n<section class="footnotes">\n<ol class="footnotes-list">\n<li id="fn1" class="footnote-item"><p>some details <a href="#fnref1" class="footnote-backref">↩︎</a></p>\n</li>\n</ol>\n</section>\n'

The Token Stream

Before rendering, the text is parsed to a flat token stream of block level syntax elements, with nesting defined by opening (1) and closing (-1) attributes:

md = MarkdownIt("commonmark")
tokens = md.parse("""
Here's some *text*

1. a list

> a *quote*""")
[(t.type, t.nesting) for t in tokens]
[('paragraph_open', 1),
 ('inline', 0),
 ('paragraph_close', -1),
 ('ordered_list_open', 1),
 ('list_item_open', 1),
 ('paragraph_open', 1),
 ('inline', 0),
 ('paragraph_close', -1),
 ('list_item_close', -1),
 ('ordered_list_close', -1),
 ('blockquote_open', 1),
 ('paragraph_open', 1),
 ('inline', 0),
 ('paragraph_close', -1),
 ('blockquote_close', -1)]

Naturally all openings should eventually be closed, such that:

sum([t.nesting for t in tokens]) == 0
True

All tokens are the same class, which can also be created outside the parser:

tokens[0]
Token(type='paragraph_open', tag='p', nesting=1, attrs=None, map=[1, 2], level=0, children=None, content='', markup='', info='', meta={}, block=True, hidden=False)
from markdown_it.token import Token
token = Token("paragraph_open", "p", 1, block=True, map=[1, 2])
token == tokens[0]
True

The 'inline' type token contain the inline tokens as children:

tokens[1]
Token(type='inline', tag='', nesting=0, attrs=None, map=[1, 2], level=1, children=[Token(type='text', tag='', nesting=0, attrs=None, map=None, level=0, children=None, content="Here's some ", markup='', info='', meta={}, block=False, hidden=False), Token(type='em_open', tag='em', nesting=1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False), Token(type='text', tag='', nesting=0, attrs=None, map=None, level=1, children=None, content='text', markup='', info='', meta={}, block=False, hidden=False), Token(type='em_close', tag='em', nesting=-1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False)], content="Here's some *text*", markup='', info='', meta={}, block=True, hidden=False)

You can serialize a token (and its children) to a JSONable dictionary using:

print(tokens[1].as_dict())
{'type': 'inline', 'tag': '', 'nesting': 0, 'attrs': None, 'map': [1, 2], 'level': 1, 'children': [{'type': 'text', 'tag': '', 'nesting': 0, 'attrs': None, 'map': None, 'level': 0, 'children': None, 'content': "Here's some ", 'markup': '', 'info': '', 'meta': {}, 'block': False, 'hidden': False}, {'type': 'em_open', 'tag': 'em', 'nesting': 1, 'attrs': None, 'map': None, 'level': 0, 'children': None, 'content': '', 'markup': '*', 'info': '', 'meta': {}, 'block': False, 'hidden': False}, {'type': 'text', 'tag': '', 'nesting': 0, 'attrs': None, 'map': None, 'level': 1, 'children': None, 'content': 'text', 'markup': '', 'info': '', 'meta': {}, 'block': False, 'hidden': False}, {'type': 'em_close', 'tag': 'em', 'nesting': -1, 'attrs': None, 'map': None, 'level': 0, 'children': None, 'content': '', 'markup': '*', 'info': '', 'meta': {}, 'block': False, 'hidden': False}], 'content': "Here's some *text*", 'markup': '', 'info': '', 'meta': {}, 'block': True, 'hidden': False}

This dictionary can also be deserialized:

Token.from_dict(tokens[1].as_dict())
Token(type='inline', tag='', nesting=0, attrs=None, map=[1, 2], level=1, children=[Token(type='text', tag='', nesting=0, attrs=None, map=None, level=0, children=None, content="Here's some ", markup='', info='', meta={}, block=False, hidden=False), Token(type='em_open', tag='em', nesting=1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False), Token(type='text', tag='', nesting=0, attrs=None, map=None, level=1, children=None, content='text', markup='', info='', meta={}, block=False, hidden=False), Token(type='em_close', tag='em', nesting=-1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False)], content="Here's some *text*", markup='', info='', meta={}, block=True, hidden=False)

In some use cases nest_tokens may be useful, to collapse the opening/closing tokens into single tokens:

from markdown_it.token import nest_tokens
nested_tokens = nest_tokens(tokens)
[t.type for t in nested_tokens]
['paragraph_open', 'ordered_list_open', 'blockquote_open']

This introduces a single additional class NestedTokens, containing an opening, closing and children, which can be a list of mixed Token and NestedTokens.

nested_tokens[0]
NestedTokens(opening=Token(type='paragraph_open', tag='p', nesting=1, attrs=None, map=[1, 2], level=0, children=None, content='', markup='', info='', meta={}, block=True, hidden=False), closing=Token(type='paragraph_close', tag='p', nesting=-1, attrs=None, map=None, level=0, children=None, content='', markup='', info='', meta={}, block=True, hidden=False), children=[Token(type='inline', tag='', nesting=0, attrs=None, map=[1, 2], level=1, children=[Token(type='text', tag='', nesting=0, attrs=None, map=None, level=0, children=None, content="Here's some ", markup='', info='', meta={}, block=False, hidden=False), NestedTokens(opening=Token(type='em_open', tag='em', nesting=1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False), closing=Token(type='em_close', tag='em', nesting=-1, attrs=None, map=None, level=0, children=None, content='', markup='*', info='', meta={}, block=False, hidden=False), children=[Token(type='text', tag='', nesting=0, attrs=None, map=None, level=1, children=None, content='text', markup='', info='', meta={}, block=False, hidden=False)])], content="Here's some *text*", markup='', info='', meta={}, block=True, hidden=False)])

Renderers

After the token stream is generated, it’s passed to a renderer. It then plays all the tokens, passing each to a rule with the same name as token type.

Renderer rules are located in md.renderer.rules and are simple functions with the same signature:

def function(renderer, tokens, idx, options, env):
  return htmlResult

You can inject render methods into the instantiated render class.

md = MarkdownIt("commonmark")

def render_em_open(self, tokens, idx, options, env):
    return '<em class="myclass">'

md.add_render_rule("em_open", render_em_open)
md.render("*a*")
'<p><em class="myclass">a</em></p>\n'

This is a slight change to the JS version, where the renderer argument is at the end. Also add_render_rule method is specific to Python, rather than adding directly to the md.renderer.rules, this ensures the method is bound to the renderer.

You can also subclass a render and add the method there:

from markdown_it.renderer import RendererHTML

class MyRenderer(RendererHTML):
    def em_open(self, tokens, idx, options, env):
        return '<em class="myclass">'

md = MarkdownIt("commonmark", renderer_cls=MyRenderer)
md.render("*a*")
'<p><em class="myclass">a</em></p>\n'

Plugins can support multiple render types, using the __ouput__ attribute (this is currently a Python only feature).

from markdown_it.renderer import RendererHTML

class MyRenderer1(RendererHTML):
    __output__ = "html1"

class MyRenderer2(RendererHTML):
    __output__ = "html2"

def plugin(md):
    def render_em_open1(self, tokens, idx, options, env):
        return '<em class="myclass1">'
    def render_em_open2(self, tokens, idx, options, env):
        return '<em class="myclass2">'
    md.add_render_rule("em_open", render_em_open1, fmt="html1")
    md.add_render_rule("em_open", render_em_open2, fmt="html2")

md = MarkdownIt("commonmark", renderer_cls=MyRenderer1).use(plugin)
print(md.render("*a*"))

md = MarkdownIt("commonmark", renderer_cls=MyRenderer2).use(plugin)
print(md.render("*a*"))
<p><em class="myclass1">a</em></p>

<p><em class="myclass2">a</em></p>

Here’s a more concrete example; let’s replace images with vimeo links to player’s iframe:

import re
from markdown_it import MarkdownIt

vimeoRE = re.compile(r'^https?:\/\/(www\.)?vimeo.com\/(\d+)($|\/)')

def render_vimeo(self, tokens, idx, options, env):
    token = tokens[idx]
    aIndex = token.attrIndex('src')
    if (vimeoRE.match(token.attrs[aIndex][1])):

        ident = vimeoRE.match(token.attrs[aIndex][1])[2]

        return ('<div class="embed-responsive embed-responsive-16by9">\n' +
               '  <iframe class="embed-responsive-item" src="//player.vimeo.com/video/' +
                ident + '"></iframe>\n' +
               '</div>\n')
    return self.image(tokens, idx, options, env)

md = MarkdownIt("commonmark")
md.add_render_rule("image", render_vimeo)
print(md.render("![](https://www.vimeo.com/123)"))
<p><div class="embed-responsive embed-responsive-16by9">
  <iframe class="embed-responsive-item" src="//player.vimeo.com/video/123"></iframe>
</div>
</p>

Here is another example, how to add target="_blank" to all links:

from markdown_it import MarkdownIt

def render_blank_link(self, tokens, idx, options, env):
    aIndex = tokens[idx].attrIndex('target')
    if (aIndex < 0):
        tokens[idx].attrPush(['target', '_blank']) # add new attribute
    else:
        tokens[idx].attrs[aIndex][1] = '_blank'  # replace value of existing attr

    # pass token to default renderer.
    return self.renderToken(tokens, idx, options, env)

md = MarkdownIt("commonmark")
md.add_render_rule("link_open", render_blank_link)
print(md.render("[a]\n\n[a]: b"))
<p><a href="b" target="_blank">a</a></p>