Source code for markdown_it.rules_inline.state_inline

from __future__ import annotations

from collections import namedtuple
from collections.abc import MutableMapping
from dataclasses import dataclass
from typing import TYPE_CHECKING

from .._compat import DATACLASS_KWARGS
from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
from ..ruler import StateBase
from ..token import Token

if TYPE_CHECKING:
    from markdown_it import MarkdownIt


[docs]@dataclass(**DATACLASS_KWARGS)
class Delimiter:
    # Char code of the starting marker (number).
    marker: int

    # Total length of these series of delimiters.
    length: int

    # An amount of characters before this one that's equivalent to
    # current one. In plain English: if this delimiter does not open
    # an emphasis, neither do previous `jump` characters.
    #
    # Used to skip sequences like "*****" in one step, for 1st asterisk
    # value will be 0, for 2nd it's 1 and so on.
    jump: int

    # A position of the token this delimiter corresponds to.
    token: int

    # If this delimiter is matched as a valid opener, `end` will be
    # equal to its position, otherwise it's `-1`.
    end: int

    # Boolean flags that determine if this delimiter could open or close
    # an emphasis.
    open: bool
    close: bool

    level: bool | None = None


Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])


[docs]class StateInline(StateBase):
    def __init__(
        self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token]
    ):
        self.src = src
        self.env = env
        self.md = md
        self.tokens = outTokens
        self.tokens_meta: list[dict | None] = [None] * len(outTokens)

        self.pos = 0
        self.posMax = len(self.src)
        self.level = 0
        self.pending = ""
        self.pendingLevel = 0

        # Stores { start: end } pairs. Useful for backtrack
        # optimization of pairs parse (emphasis, strikes).
        self.cache: dict[int, int] = {}

        # List of emphasis-like delimiters for current tag
        self.delimiters: list[Delimiter] = []

        # Stack of delimiter lists for upper level tags
        self._prev_delimiters: list[list[Delimiter]] = []

        # backticklength => last seen position
        self.backticks: dict[int, int] = {}
        self.backticksScanned = False

    def __repr__(self):
        return (
            f"{self.__class__.__name__}"
            f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
        )

[docs]    def pushPending(self):
        token = Token("text", "", 0)
        token.content = self.pending
        token.level = self.pendingLevel
        self.tokens.append(token)
        self.pending = ""
        return token

[docs]    def push(self, ttype, tag, nesting):
        """Push new token to "stream".
        If pending text exists - flush it as text token
        """
        if self.pending:
            self.pushPending()

        token = Token(ttype, tag, nesting)
        token_meta = None

        if nesting < 0:
            # closing tag
            self.level -= 1
            self.delimiters = self._prev_delimiters.pop()

        token.level = self.level

        if nesting > 0:
            # opening tag
            self.level += 1
            self._prev_delimiters.append(self.delimiters)
            self.delimiters = []
            token_meta = {"delimiters": self.delimiters}

        self.pendingLevel = self.level
        self.tokens.append(token)
        self.tokens_meta.append(token_meta)
        return token

[docs]    def scanDelims(self, start, canSplitWord):
        """
        Scan a sequence of emphasis-like markers, and determine whether
        it can start an emphasis sequence or end an emphasis sequence.

         - start - position to scan from (it should point at a valid marker);
         - canSplitWord - determine if these markers can be found inside a word

        """
        pos = start
        left_flanking = True
        right_flanking = True
        maximum = self.posMax
        marker = self.srcCharCode[start]

        # treat beginning of the line as a whitespace
        lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20

        while pos < maximum and self.srcCharCode[pos] == marker:
            pos += 1

        count = pos - start

        # treat end of the line as a whitespace
        nextChar = self.srcCharCode[pos] if pos < maximum else 0x20

        isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
        isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))

        isLastWhiteSpace = isWhiteSpace(lastChar)
        isNextWhiteSpace = isWhiteSpace(nextChar)

        if isNextWhiteSpace:
            left_flanking = False
        elif isNextPunctChar:
            if not (isLastWhiteSpace or isLastPunctChar):
                left_flanking = False

        if isLastWhiteSpace:
            right_flanking = False
        elif isLastPunctChar:
            if not (isNextWhiteSpace or isNextPunctChar):
                right_flanking = False

        if not canSplitWord:
            can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
            can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
        else:
            can_open = left_flanking
            can_close = right_flanking

        return Scanned(can_open, can_close, count)
markdown-it-py

Source code for markdown_it.rules_inline.state_inline