- Added support for inline links emphasis detection in headings.

- Improved memory and speed.
- Improved constants.
- Added tests.
This commit is contained in:
Franco Masotti 2022-04-18 13:02:51 +02:00
parent efdeee7ecd
commit 83f3fc657d
Signed by: frnmst
GPG Key ID: 24116ED85666780A
20 changed files with 2037 additions and 437 deletions

View File

@ -50,9 +50,9 @@ Status
======================= ===================== ============ ======================================================================================================== =============================================
Parser Status Alias of Supported parser version Source
======================= ===================== ============ ======================================================================================================== =============================================
``cmark`` |most| Version 0.30 (2021-06-19) https://github.com/commonmark/cmark
``cmark`` |most| Version 0.30 (2021-06-19) (a.k.a 0.30.0) https://github.com/commonmark/cmark
``commonmarker`` |good| ``github`` https://github.com/gjtorikian/commonmarker
``github`` |good| Version 0.29-gfm (2019-04-06) https://github.com/github/cmark-gfm
``github`` |good| Version 0.29-gfm (2019-04-06) (a.k.a 0.29.gfm.0) https://github.com/github/cmark-gfm
``goldmark`` |most| ``cmark`` https://github.com/yuin/goldmark
``gitlab`` |partial| Latest unknown version https://docs.gitlab.com/ee/user/markdown.html
``redcarpet`` |low| `Redcarpet v3.5.0 <https://github.com/vmg/redcarpet/tree/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae>`_ https://github.com/vmg/redcarpet
@ -81,59 +81,61 @@ G GitLab modified Redcarpet
Status history
^^^^^^^^^^^^^^
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| md-toc | ``standard`` | ``cmark`` | ``commonmarker`` | ``github`` | ``gitlab`` | ``goldmark`` | ``redcarpet`` |
+=================+==================+==================+==================+=================+=================+==================+==================+
| 0.0.1 | \- | \- | \- | unknown version | \- | \- | \- |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 1.0.0 | unknown version | \- | \- | lastest version | G | \- | |r1| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 2.0.0 | \- | lastest version | lastest version | C | G | \- | |r2| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 2.0.1 | \- | lastest version | lastest version | C | G | \- | |r2| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 3.0.0 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 3.1.0 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 4.0.0 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 5.0.0 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 5.0.1 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 6.0.0 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 6.0.1 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 6.0.2 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.0.0 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.0.1 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.0.2 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.0.3 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.0.4 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.0.5 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.1.0 | \- | ``github`` | ``github`` | 0.28-gfm | ``github`` | \- | v3.5.0 |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 7.2.0 | \- | 0.28 [#f1]_ | 0.28-gfm | 0.28-gfm | ``github`` | \- | v3.5.0 |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 8.0.0 | \- | 0.29 | ``github`` | 0.29-gfm | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 8.0.1 | \- | 0.29 | ``github`` | 0.29-gfm | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 8.1.0 | \- | 0.29 | ``github`` | 0.29-gfm | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 8.1.1 | \- | 0.30 | ``github`` | 0.29-gfm [#f2]_ | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
| 8.1.2 | \- | 0.30 | ``github`` | 0.29-gfm [#f2]_ | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+-----------------+-----------------+------------------+------------------+
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| md-toc | ``standard`` | ``cmark`` | ``commonmarker`` | ``github`` | ``gitlab`` | ``goldmark`` | ``redcarpet`` |
+=================+==================+==================+==================+=====================+=================+==================+==================+
| 0.0.1 | \- | \- | \- | unknown version | \- | \- | \- |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 1.0.0 | unknown version | \- | \- | lastest version | G | \- | |r1| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 2.0.0 | \- | lastest version | lastest version | C | G | \- | |r2| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 2.0.1 | \- | lastest version | lastest version | C | G | \- | |r2| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 3.0.0 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 3.1.0 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 4.0.0 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 5.0.0 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 5.0.1 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 6.0.0 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 6.0.1 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 6.0.2 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.0.0 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.0.1 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.0.2 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.0.3 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.0.4 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.0.5 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | |r3| |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.1.0 | \- | ``github`` | ``github`` | 0.28.gfm.? | ``github`` | \- | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 7.2.0 | \- | 0.28.? [#f1]_ | 0.28.gfm.? | 0.28.gfm.? | ``github`` | \- | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 8.0.0 | \- | 0.29.? | ``github`` | 0.29.gfm.? | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 8.0.1 | \- | 0.29.? | ``github`` | 0.29.gfm.? | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 8.1.0 | \- | 0.29.? | ``github`` | 0.29.gfm.? | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 8.1.1 | \- | 0.30.? | ``github`` | 0.29.gfm.? [#f2]_ | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 8.1.2 | \- | 0.30.? | ``github`` | 0.29-gfm.? [#f2]_ | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
| 8.1.3 | \- | 0.30.0 | ``github`` | 0.29-gfm.0 [#f2]_ | lastest version | ``cmark`` | v3.5.0 |
+-----------------+------------------+------------------+------------------+---------------------+-----------------+------------------+------------------+
.. [#f1] used alias ``github``
.. [#f2] when this version was released GFM still needed to catch up with cmark

View File

@ -28,11 +28,10 @@ import sys
import fpyutils
from . import generic
from .cmark import inlines_c, node_h
from .cmark import inlines_c, node_h, references_h
from .constants import common_defaults
from .constants import parser as md_parser
from .exceptions import (CannotTreatUnicodeString, GithubEmptyLinkLabel,
GithubOverflowCharsLinkLabel,
from .exceptions import (GithubEmptyLinkLabel, GithubOverflowCharsLinkLabel,
GithubOverflowOrderedListMarker,
StdinIsNotAFileToBeWritten,
StringCannotContainNewlines,
@ -295,7 +294,7 @@ def build_toc(filename: str,
# Save the TOC line with the indentation.
toc.append(build_toc_line(toc_line_no_indent,
no_of_indentation_spaces_curr) + newline_string)
no_of_indentation_spaces_curr) + newline_string)
header_type_prev = header_type_curr
@ -712,7 +711,7 @@ def remove_emphasis(line: str, parser: str = 'github') -> list:
"""
if parser in ['github', 'cmark', 'gitlab', 'commonmarker', 'goldmark', 'redcarpet']:
mem = None
refmap = None
refmap = references_h._cmarkCmarkReferenceMap()
parent = node_h._cmarkCmarkNode()
parent.data = line

View File

@ -23,7 +23,9 @@ r"""A cmark implementation file."""
import copy
import sys
from ..constants import parser as md_parser
from .buffer_h import _cmark_CMARK_BUF_INIT, _cmarkCmarkStrbuf
from .cmark_ctype_c import _cmark_cmark_ispunct, _cmark_cmark_isspace
from .cmark_h import _cmarkCmarkMem
# License E applies to this file except for non derivative code:
@ -42,6 +44,11 @@ def _cmark_cmark_strbuf_init(mem: _cmarkCmarkMem, buf: _cmarkCmarkStrbuf, initia
_cmark_cmark_strbuf_grow(buf, initial_size)
# 0.30
def _cmark_S_strbuf_grow_by(buf: _cmarkCmarkStrbuf, add: int):
_cmark_cmark_strbuf_grow(buf, buf.size + add)
# 0.29, 0.30
def _cmark_cmark_strbuf_grow(buf: _cmarkCmarkStrbuf, target_size: int):
# Instead of using assert just raise a ValueError
@ -57,7 +64,7 @@ def _cmark_cmark_strbuf_grow(buf: _cmarkCmarkStrbuf, target_size: int):
# Truncate number to a length of 30 bits.
target_size &= INT32_MAX
if target_size > INT32_MAX / 2:
if target_size > int(INT32_MAX / 2):
print("[cmark] _cmark_cmark_strbuf_grow requests buffer with size > " + str(INT32_MAX / 2) + ", aborting")
sys.exit(1)
@ -84,6 +91,7 @@ def _cmark_cmark_strbuf_clear(buf: _cmarkCmarkStrbuf):
buf.size = 0
if buf.asize > 0:
del buf.ptr
buf.ptr = str()
@ -98,13 +106,47 @@ def _cmark_cmark_strbuf_set(buf: _cmarkCmarkStrbuf, data: str, length: int):
# alternative to
# memmove(buf->ptr, data, len)
buf.ptr = copy.deepcopy(data[0:length])
buf.ptr = copy.deepcopy(data[0:length - 0])
buf.size = length
# No need to set termination character
# buf.ptr[buf.size] = '\0'
# 0.30
# Add a single character to a buffer.
def _cmark_cmark_strbuf_putc(buf: _cmarkCmarkStrbuf, c: int):
_cmark_S_strbuf_grow_by(buf, 1)
buf.ptr = buf.ptr[:buf.size - 1] + chr(c & 0xFF) + buf.ptr[:buf.size + 1:]
buf.size += 1
# No need for the terminator character.
# buf->ptr[buf->size] = '\0';
# 0.30
def _cmark_cmark_strbuf_put(buf: _cmarkCmarkStrbuf, data: str,
len: int):
if len <= 0:
return
_cmark_S_strbuf_grow_by(buf, len)
# Alternative to
# memmove(buf.ptr + buf.size, data, len)
buf.ptr = buf.ptr[:buf.size - 1] + copy.deepcopy(data[:len - 0]) # + buf.ptr[buf.size + 1:]
buf.size += len
# No need for line terminator.
# buf.ptr[buf.size] = '\0';
# 0.30
def _cmark_cmark_strbuf_puts(buf: _cmarkCmarkStrbuf, string: str):
_cmark_cmark_strbuf_put(buf, string, len(string))
# 0.29, 0.30
def _cmark_cmark_strbuf_detach(buf: _cmarkCmarkStrbuf) -> str:
data: str = buf.ptr
@ -119,12 +161,12 @@ def _cmark_cmark_strbuf_detach(buf: _cmarkCmarkStrbuf) -> str:
# 0.29, 0.30
def _cmark_cmark_strbuf_truncate(buf: _cmarkCmarkStrbuf, len: int):
if len < 0:
len = 0
def _cmark_cmark_strbuf_truncate(buf: _cmarkCmarkStrbuf, length: int):
if length < 0:
length = 0
if len < buf.size:
buf.size = len
if length < buf.size:
buf.size = length
# No need for the terminator character.
# buf.ptr[buf.size] = '\0'
@ -139,11 +181,82 @@ def _cmark_cmark_strbuf_drop(buf: _cmarkCmarkStrbuf, n: int):
if buf.size:
# Alternative to
# memmove(buf->ptr, buf->ptr + n, buf->size);
buf.ptr = copy.deepcopy(buf.ptr[n:buf.size])
buf.ptr = copy.deepcopy(buf.ptr[n:buf.size - n])
# No need for the terminator character.
# buf->ptr[buf->size] = '\0';
def _cmark_cmark_strbuf_rtrim(buf: _cmarkCmarkStrbuf):
if not buf.size:
return
while buf.size > 0:
if not _cmark_cmark_isspace(buf.ptr[buf.size - 1]):
break
buf.size -= 1
# buf->ptr[buf->size] = '\0';
# 0.30
def _cmark_cmark_strbuf_trim(buf: _cmarkCmarkStrbuf):
i: int = 0
if not buf.size:
return
while i < buf.size and _cmark_cmark_isspace(buf.ptr[i]):
i += 1
_cmark_cmark_strbuf_drop(buf, i)
_cmark_cmark_strbuf_rtrim(buf)
# Destructively modify string, collapsing consecutive
# space and newline characters into a single space.
# 0.30
def _cmark_cmark_strbuf_normalize_whitespace(s: _cmarkCmarkStrbuf):
last_char_was_space: bool = False
r: int = 0
w: int = 0
for r in range(0, s.size):
if _cmark_cmark_isspace(s.ptr[r]):
if not last_char_was_space:
s.ptr = s.ptr[0:w - 1] + ' ' + s.ptr[w + 1:]
w += 1
last_char_was_space = True
else:
s.ptr = s.ptr[0:w - 1] + s.ptr[r] + s.ptr[w + 1:]
w += 1
last_char_was_space = False
_cmark_cmark_strbuf_truncate(s, w)
# 0.30
# Destructively unescape a string: remove backslashes before punctuation chars.
def _cmark_cmark_strbuf_unescape(buf: _cmarkCmarkStrbuf):
r: int = 0
w: int = 0
while r < buf.size:
if buf.ptr[r] == '\\' and _cmark_cmark_ispunct(ord(buf.ptr[r + 1])):
r += 1
# buf->ptr[w] = buf->ptr[r];
bptr = [buf.ptr[0:w - 1], buf.ptr[r], buf.ptr[w + 1:]]
buf.ptr = ''.join(bptr)
w += 1
r += 1
_cmark_cmark_strbuf_truncate(buf, w)
if __name__ == '__main__':
pass

View File

@ -29,6 +29,13 @@ from .cmark_h import _cmarkCmarkMem
# 0.29, 0.30
class _cmarkCmarkStrbuf:
__slots__ = [
'mem',
'ptr',
'asize',
'size',
]
def __init__(self):
self.mem: _cmarkCmarkMem = None
self.ptr: str = str()
@ -40,7 +47,7 @@ class _cmarkCmarkStrbuf:
# #define CMARK_BUF_INIT(mem) \
# { mem, cmark_strbuf__initbuf, 0, 0 }
# 0.29, 0.30
def _cmark_CMARK_BUF_INIT(mem):
def _cmark_CMARK_BUF_INIT(mem: _cmarkCmarkMem):
b = _cmarkCmarkStrbuf()
b.mem = mem

View File

@ -23,34 +23,57 @@ r"""The cmark implementation file."""
import copy
from ..constants import parser as md_parser
from .cmark_ctype_c import _cmark_cmark_isspace
# License E applies to this file except for non derivative code:
# in that case the license header at the top of the file applies.
# See docs/copyright_license.rst
# Returns 1 if c is a "whitespace" character as defined by the spec.
# int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
# The only defined whitespaces in the spec are Unicode whitespaces.
# 0.30
class _cmarkCmarkChunk:
r"""See chunk.h file."""
__slots__ = [
'data',
'length',
]
def __init__(self, data: str = None, length: int = 0, alloc: int = 0):
def __init__(self, data: str = None, length: int = 0):
self.data: str = data
self.length: int = length
# Returns 1 if c is a "whitespace" character as defined by the spec.
# int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
# The only defined whitespaces in the spec are Unicode whitespaces.
# 0.30
def _cmark_cmark_chunk_free(c: _cmarkCmarkChunk):
c.data = None
c.length = 0
# 0.30
def _cmark_cmark_chunk_ltrim(c: _cmarkCmarkChunk):
while c.length > 0 and _cmark_cmark_isspace(ord(c.data[0])):
c.data += 1
c.len -= 1
# 0.30
def _cmark_cmark_chunk_rtrim(c: _cmarkCmarkChunk):
while c.length > 0:
# if (!cmark_isspace(c->data[c->len - 1]))
if not c.data[c.length - 1] in md_parser['cmark']['pseudo-re']['UWC']:
if not _cmark_cmark_isspace(ord(c.data[c.length - 1])):
break
c.length -= 1
# 0.30
def _cmark_cmark_chunk_trim(c: _cmarkCmarkChunk):
_cmark_cmark_chunk_ltrim(c)
_cmark_cmark_chunk_rtrim(c)
# 0.30
def _cmark_cmark_chunk_literal(data: str) -> _cmarkCmarkChunk:
length: int
@ -66,8 +89,8 @@ def _cmark_cmark_chunk_literal(data: str) -> _cmarkCmarkChunk:
# 0.29, 0.30
def _cmark_cmark_chunk_dup(ch: _cmarkCmarkChunk, pos: int, length: int) -> str:
c = _cmarkCmarkChunk(copy.deepcopy(ch.data[pos: pos + length]), length)
def _cmark_cmark_chunk_dup(ch: _cmarkCmarkChunk, pos: int, length: int) -> _cmarkCmarkChunk:
c = _cmarkCmarkChunk(copy.deepcopy(ch.data[pos:]), length)
return c

View File

@ -27,11 +27,21 @@ from ..constants import parser as md_parser
# See docs/copyright_license.rst
# Return True if c is a "whitespace" character as defined by the spec.
# 0.30
def _cmark_cmark_isspace(char: int) -> bool:
value = False
if chr(char) in md_parser['cmark']['pseudo-re']['UWC']:
value = True
return value
# Return True if c is an ascii punctuation character.
# 0.29, 0.30
def _cmark_cmark_ispunct(char: int, parser: str = 'github') -> bool:
def _cmark_cmark_ispunct(char: int) -> bool:
value = False
if chr(char) in md_parser[parser]['pseudo-re']['APC']:
if chr(char) in md_parser['cmark']['pseudo-re']['APC']:
value = True
return value

29
md_toc/cmark/houdini_h.py Normal file
View File

@ -0,0 +1,29 @@
#
# houdini_h.py
#
# Copyright (C) 2017-2022 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com)
#
# This file is part of md-toc.
#
# md-toc is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# md-toc is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with md-toc. If not, see <http://www.gnu.org/licenses/>.
#
r"""A cmark implementation file."""
def _cmark_HOUDINI_ESCAPED_SIZE(x: int) -> int:
return (x * 12) / 10
def _cmark_HOUDINI_UNESCAPED_SIZE(x: int) -> int:
return x

View File

@ -0,0 +1,173 @@
#
# houdini_html_u_c.py
#
# Copyright (C) 2017-2022 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com)
#
# This file is part of md-toc.
#
# md-toc is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# md-toc is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with md-toc. If not, see <http://www.gnu.org/licenses/>.
#
r"""A cmark implementation file."""
import re
from ..constants import parser as md_parser
from ..generic import _strncmp
from .buffer_c import (_cmark_cmark_strbuf_grow, _cmark_cmark_strbuf_put,
_cmark_cmark_strbuf_putc, _cmark_cmark_strbuf_puts)
from .buffer_h import _cmarkCmarkStrbuf
from .houdini_h import _cmark_HOUDINI_UNESCAPED_SIZE
from .utf8_c import _cmark_cmark_utf8proc_encode_char
# Recursive function of a binary search.
# 0.30.
def _cmark_S_lookup(i: int, low: int, hi: int, s: str, length: int) -> str:
j: int
cmp: int = _strncmp(s, md_parser['cmark']['re']['ENTITIES']['entities'][i]['entity'], length)
if cmp == 0 and md_parser['cmark']['re']['ENTITIES']['entities'][i]['entity'][length] == 0:
return md_parser['cmark']['re']['ENTITIES']['entities'][i]['bytes']
elif cmp == -1 and i > low:
j = i - ((i - low) / 2)
if j == i:
j -= 1
return _cmark_S_lookup(j, low, i - 1, s, length)
elif cmp == 1 and i < hi:
j = i + ((hi - i) / 2)
if j == i:
j += 1
return _cmark_S_lookup(j, i + 1, hi, s, length)
else:
return None
# 0.30.
def _cmark_S_lookup_entity(s: str, length: int):
return _cmark_S_lookup(md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] / 2, 0, md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] - 1, s, length)
# 0.30.
def _cmark_houdini_unescape_ent(ob: _cmarkCmarkStrbuf, src: str, size: int) -> int:
i: int = 0
if size >= 3 and src[0] == '#':
codepoint: int = 0
num_digits: int = 0
max_digits: int = 7
if re.match(r'\d', src[1]):
i = 1
while i < size and re.match(r'\d', src[i]):
codepoint = (codepoint * 10) + (ord(src[i]) - ord('0'))
if codepoint >= 0x110000:
# Keep counting digits but
# avoid integer overflow.
codepoint = 0x110000
i += 1
num_digits = i - 1
max_digits = 7
elif src[1] == 'x' or src[1] == 'X':
i = 2
while i < size and re.match(r'[\dA-Fa-f]', src[i]):
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9)
if codepoint >= 0x110000:
# Keep counting digits but
# avoid integer overflow.
codepoint = 0x110000
i += 1
num_digits = i - 2
max_digits = 6
if (num_digits >= 1
and num_digits <= max_digits
and i < size
and src[i] == ';'):
if (codepoint == 0
or (codepoint >= 0xD800 and codepoint < 0xE000)
or codepoint >= 0x110000):
codepoint = 0xFFFD
_cmark_cmark_utf8proc_encode_char(codepoint, ob)
return i + 1
else:
if size > md_parser['cmark']['re']['ENTITIES']['CMARK_ENTITY_MAX_LENGTH']:
size = md_parser['cmark']['re']['ENTITIES']['CMARK_ENTITY_MAX_LENGTH']
for i in range(md_parser['cmark']['re']['ENTITIES']['CMARK_ENTITY_MIN_LENGTH'], size):
if src[i] == ' ':
break
if src[i] == ';':
entity: str = _cmark_S_lookup_entity(src, i)
if entity is not None:
_cmark_cmark_strbuf_puts(ob, entity)
return i + 1
break
return 0
# 0.30.
def _cmark_houdini_unescape_html(ob: _cmarkCmarkStrbuf, src: str,
size: int) -> int:
i: int = 0
org: int
ent: int
while i < size:
org = i
while i < size and src[i] != '&':
i += 1
if i > org:
if org == 0:
if i >= size:
return 0
_cmark_cmark_strbuf_grow(ob, _cmark_HOUDINI_UNESCAPED_SIZE(size))
_cmark_cmark_strbuf_put(ob, src[org:], i - org)
# escaping
if i >= size:
break
i += 1
ent = _cmark_houdini_unescape_ent(ob, src[i:], size - i)
i += ent
# not really an entity
if ent == 0:
_cmark_cmark_strbuf_putc(ob, ord('&'))
return 1
def _cmark_houdini_unescape_html_f(ob: _cmarkCmarkStrbuf,
src: str,
size: int):
if not _cmark_houdini_unescape_html(ob, src, size):
_cmark_cmark_strbuf_put(ob, src, size)

View File

@ -25,15 +25,22 @@ import copy
from ..constants import parser as md_parser
from ..generic import _noop, _replace_substring
from .buffer_c import (_cmark_cmark_strbuf_detach, _cmark_cmark_strbuf_drop,
_cmark_cmark_strbuf_set, _cmark_cmark_strbuf_truncate)
_cmark_cmark_strbuf_set, _cmark_cmark_strbuf_truncate,
_cmark_cmark_strbuf_unescape)
from .buffer_h import _cmark_CMARK_BUF_INIT, _cmarkCmarkStrbuf
from .chunk_h import (_cmark_cmark_chunk_dup, _cmark_cmark_chunk_literal,
_cmark_cmark_chunk_rtrim, _cmarkCmarkChunk)
from .cmark_ctype_c import _cmark_cmark_ispunct
from .chunk_h import (_cmark_cmark_chunk_dup, _cmark_cmark_chunk_free,
_cmark_cmark_chunk_literal, _cmark_cmark_chunk_rtrim,
_cmark_cmark_chunk_trim, _cmarkCmarkChunk)
from .cmark_ctype_c import _cmark_cmark_ispunct, _cmark_cmark_isspace
from .cmark_h import _cmarkCmarkMem
from .node_c import _cmark_cmark_node_free, _cmark_cmark_node_set_literal
from .houdini_html_u_c import _cmark_houdini_unescape_html_f
from .node_c import (_cmark_cmark_node_free, _cmark_cmark_node_insert_after,
_cmark_cmark_node_insert_before,
_cmark_cmark_node_set_literal, _cmark_cmark_node_unlink)
from .node_h import _cmarkCmarkNode
from .reference_h import _cmarkCmarkReferenceMap
from .references_c import _cmark_cmark_reference_lookup
from .references_h import _cmarkCmarkReference, _cmarkCmarkReferenceMap
from .scanners_h import _cmark_scan_link_title, _cmark_scan_spacechars
from .utf8_c import (_cmark_cmark_utf8proc_is_punctuation,
_cmark_cmark_utf8proc_is_space,
_cmark_cmark_utf8proc_iterate)
@ -44,29 +51,43 @@ from .utf8_c import (_cmark_cmark_utf8proc_is_punctuation,
# 0.29, 0.30
def _cmark_make_linebreak(mem):
_cmark_make_simple(mem, md_parser['cmark']['cmark_node_type']['CMARK_NODE_LINEBREAK'])
def _cmark_make_linebreak(mem: _cmarkCmarkMem):
return _cmark_make_simple(mem, md_parser['cmark']['cmark_node_type']['CMARK_NODE_LINEBREAK'])
def _cmark_make_emph(mem: _cmarkCmarkMem):
return _cmark_make_simple(mem, md_parser['cmark']['cmark_node_type']['CMARK_NODE_EMPH'])
def _cmark_make_strong(mem: _cmarkCmarkMem):
return _cmark_make_simple(mem, md_parser['cmark']['cmark_node_type']['CMARK_NODE_STRONG'])
class _cmarkDelimiter:
r"""A list node with attributes useful for processing emphasis."""
def __init__(self, delim_char: str, length: int):
self.previous = None
self.next = None
__slots__ = [
'previous',
'next',
'inl_text',
'length',
'delim_char',
'can_open',
'can_close',
'offset',
]
# _cmarkCmarkNode
self.inl_text = None
def __init__(self):
self.previous: _cmarkDelimiter = None
self.next: _cmarkDelimiter = None
self.inl_text: _cmarkCmarkNode = None
self.length: int = 0
self.delim_char = str()
self.can_open: bool = False
self.can_close: bool = False
self.literal = None
self.delim_char = delim_char
self.length = length
self.offset = 0
self.original_length = length
self.active = True
self.can_open = False
self.can_close = False
# Extra attribute.
self.offset: int = 0
def __str__(self):
if self.delim_char is not None:
@ -81,111 +102,73 @@ class _cmarkDelimiter:
el = '== element ' + hex(id(self)) + " =="
it = 'inl_text = ' + str(self.inl_text)
li = 'literal = ' + str(self.literal)
de = 'delim_char = ' + self.delim_char
le = 'length = ' + str(self.length)
of = 'offset = ' + str(self.offset)
oi = 'original_length = ' + str(self.original_length)
ac = 'active = ' + str(self.active)
pr = 'previous = ' + previous
ne = 'next = ' + next
co = 'can_open = ' + str(self.can_open)
cc = 'can_close = ' + str(self.can_close)
return (el + '\n' + it + '\n' + li + '\n' + de + '\n' + le + '\n' + of + '\n' + oi + '\n'
return (el + '\n' + it + '\n' + '\n' + de + '\n' + le + '\n' + of + '\n'
+ ac + '\n' + pr + '\n' + ne + '\n' + co + '\n'
+ cc + '\n')
class _cmarkBracket:
__slots__ = [
'previous',
'previous_delimiter',
'inl_text',
'position',
'image',
'active',
'bracket_after'
]
def __init__(self):
self.previous = None
self.previous_delimiter = None
# _cmarkCmarkNode
self.inl_text = None
self.position = 0
self.image = False
self.active = True
self.bracket_after = False
self.previous: _cmarkBracket = None
self.previous_delimiter: _cmarkDelimiter = None
self.inl_text: _cmarkCmarkNode = None
self.position: bool = 0
self.image: bool = False
self.active: bool = True
self.bracket_after: bool = False
class _cmarkSubject:
r"""A double linked list useful for processing emphasis."""
__slots__ = [
'mem',
'input',
'flags',
'line',
'pos',
'block_offset',
'column_offset',
'refmap',
'last_delim',
'last_bracket',
'backticks',
'scanned_for_backticks',
]
def __init__(self):
# cmark_mem
self.mem = None
self.line = 0
self.pos = 0
self.block_offset = 0
self.column_offset = 0
self.start = None
self.last_delim = None
self.last_bracket = None
# This corresponds to the line.
# cmark_chunk input
self.mem: _cmarkCmarkMem = None
self.input: _cmarkCmarkChunk = None
self.flags: int = 0
self.line: int = 0
self.pos: int = 0
self.block_offset: int = 0
self.column_offset: int = 0
self.refmap: _cmarkCmarkReferenceMap = None
self.backticks: list = list(range(0, md_parser['cmark']['generic']['MAXBACKTICKS']))
self.last_delim: _cmarkDelimiter = None
self.last_bracket: _cmarkBracket = None
self.backticks: list = list(range(0, md_parser['cmark']['generic']['MAXBACKTICKS'] + 1))
self.scanned_for_backticks: bool = False
def push(self, node: _cmarkDelimiter):
r"""Add a new node."""
if self.start is None and self.last_delim is None:
# Empty list.
self.start = self.last_delim = node
else:
self.last_delim.next = node
node.previous = self.last_delim
if node.previous is not None:
# Connect last exising node to new node.
node.previous.next = node
self.last_delim = self.last_delim.next
node.next = None
def pop(self) -> _cmarkDelimiter:
r"""Remove the last node."""
node = None
if self.start is None and self.last_delim is None:
pass
else:
node = self.last_delim
self.last_delim = node.previous
if self.last_delim is None:
self.start = None
return node
def extract(self, delim: _cmarkDelimiter):
r"""Remove a specific node.
This method is equivalent to the remove_delimiter
function in inlines.c
"""
if delim is None:
return
if delim.next is None:
# end of list:
if delim != self.last_delim:
raise ValueError
self.last_delim = delim.previous
else:
delim.next.previous = delim.previous
if delim.previous is not None:
delim.previous.next = delim.next
# subj.mem.free(delim)
def scroll(self):
r"""Print the list."""
print(self.start)
@ -209,14 +192,14 @@ def _cmark_make_literal(subj: _cmarkSubject, t: int, start_column: int, end_colu
e.mem = copy.deepcopy(subj.mem)
e.type = t
e.start_line = e.end_line = subj.line
# columns are NOT 1 based.
# columns are (NOT) 1 based. FIXME
e.start_column: int = start_column + subj.column_offset + subj.block_offset
e.end_column: int = end_column + subj.column_offset + subj.block_offset
return e
# 0.30
def _cmark_make_simple(mem, t: int) -> _cmarkCmarkNode:
def _cmark_make_simple(mem: _cmarkCmarkMem, t: int) -> _cmarkCmarkNode:
e = _cmarkCmarkNode()
e.mem = copy.deepcopy(mem)
e.type = t
@ -230,21 +213,50 @@ def _cmark_make_str(subj: _cmarkSubject, sc: int, ec: int, s: _cmarkCmarkChunk)
# ec = end cloumn
e = _cmark_make_literal(subj, md_parser['cmark']['cmark_node_type']['CMARK_NODE_TEXT'], sc, ec)
# Realloc with NULL ptr is equal to malloc, so no need to translate
# this operation:
# e->data = (unsigned char *)subj->mem->realloc(NULL, s.len + 1);
if s.data is not None:
e.data = copy.deepcopy(s.data)
e.data = copy.deepcopy(s.data[:s.length])
# No need to add line terminator (\0).
# e->data[s.len] = 0;
e.length = s.length
return e
# Like cmark_node_append_child but without costly sanity checks.
# Assumes that child was newly created.
def _cmark_append_child(node: _cmarkCmarkNode, child: _cmarkCmarkNode):
old_last_child: _cmarkCmarkNode = node.last_child
child.next = None
child.prev = old_last_child
child.parent = node
node.last_child = child
if old_last_child:
old_last_child.next = child
else:
# Also set first_child if node previously had no children.
node.first_child = child
# Duplicate a chunk by creating a copy of the buffer not by reusing the
# buffer like cmark_chunk_dup does.
# 0.30
def _cmark_subject_from_buf(mem, line_number: int,
def _cmark_cmark_strdup(mem: _cmarkCmarkMem, src: str) -> str:
if src is None:
return None
length: int = len(src)
data: str
data = copy.deepcopy(src[:length + 1])
return data
# 0.30
def _cmark_subject_from_buf(mem: _cmarkCmarkMem, line_number: int,
block_offset: int, e: _cmarkSubject, chunk: _cmarkCmarkChunk,
refmap: _cmarkCmarkReferenceMap):
i: int
@ -258,18 +270,16 @@ def _cmark_subject_from_buf(mem, line_number: int,
e.refmap = refmap
e.last_delim = None
e.last_bracket = None
for i in range(0, md_parser['cmark']['generic']['MAXBACKTICKS']):
e.backticks[i] = 0
e.scanned_for_backticks = False
# 0.30
def _cmark_isbacktick(c: int) -> int:
backtick: int = 0
def _cmark_isbacktick(c: int) -> bool:
backtick: bool = False
if chr(c) == '`':
backtick = 1
backtick = True
return backtick
@ -291,15 +301,15 @@ def _cmark_peek_at(subj: _cmarkSubject, pos: int) -> int:
return ord(subj.input.data[pos])
# Return true if there are more characters in the subject.
# 0.30
def _cmark_is_eof(subj: _cmarkSubject):
r"""Return true if there are more characters in the subject."""
def _cmark_is_eof(subj: _cmarkSubject) -> bool:
return subj.pos >= subj.input.length
# Advance the subject. Doesn't check for eof.
# 0.29, 0.30
def _cmark_advance(subj: _cmarkSubject):
# Advance the subject. Doesn't check for eof.
subj.pos += 1
@ -316,45 +326,54 @@ def _cmark_skip_line_end(subj: _cmarkSubject) -> bool:
return seen_line_end_char or _cmark_is_eof(subj)
# Custom function.
def _cmark_take_while_loop_condition(subj: _cmarkSubject, function_name: str) -> bool:
condition: bool = False
c = _cmark_peek_char(subj)
if function_name == '_cmark_isbacktick':
condition = _cmark_isbacktick(c)
return condition
# Take characters while a predicate holds, and return a string.
# Get backtick spanning.
# 0.29, 0.30
def _cmark_take_while(subj: _cmarkSubject) -> _cmarkCmarkChunk:
def _cmark_take_while(subj: _cmarkSubject, function_name: str) -> _cmarkCmarkChunk:
c: int
startpos: int = subj.pos
len: int = 0
length: int = 0
c = _cmark_peek_char(subj)
while _cmark_isbacktick(c):
while _cmark_take_while_loop_condition(subj, '_cmark_isbacktick'):
_cmark_advance(subj)
len += 1
c = _cmark_peek_char(subj)
length += 1
return _cmark_cmark_chunk_dup(subj.input, startpos, len)
return _cmark_cmark_chunk_dup(subj.input, startpos, length)
# Return the number of newlines in a given span of text in a subject. If
# the number is greater than zero, also return the number of characters
# between the last newline and the end of the span in `since_newline`.
# 0.29, 0.30
def _cmark_count_newlines(subj: _cmarkSubject, start: int, length: int) -> tuple:
def _cmark_count_newlines(subj: _cmarkSubject, sfrom: int, length: int) -> tuple:
nls: int = 0
since_nl: int = 0
while length > 0:
if subj.input.data[start] == '\n':
if subj.input.data[sfrom] == '\n':
nls += 1
since_nl = 0
else:
since_nl += 1
start += 1
sfrom += 1
length -= 1
if not nls:
return 0
since_newline = since_nl
since_newline = copy.deepcopy(since_nl)
return nls, since_newline
@ -363,7 +382,7 @@ def _cmark_count_newlines(subj: _cmarkSubject, start: int, length: int) -> tuple
# of text in `subj`.
# 0.29, 0.30
def _cmark_adjust_subj_node_newlines(subj: _cmarkSubject, node: _cmarkCmarkNode, matchlen: int, extra: int, options: int):
if not options & md_parser['cmark']['generic']['CMARK_OPT_SOURCEPOS']:
if not (options & md_parser['cmark']['generic']['CMARK_OPT_SOURCEPOS']):
return
newlines: int
@ -400,7 +419,7 @@ def _cmark_scan_to_closing_backticks(subj: _cmarkSubject, openticklength: int) -
c: int
c = _cmark_peek_char(subj)
while not _cmark_isbacktick(c):
while c and not _cmark_isbacktick(c):
_cmark_advance(subj)
c = _cmark_peek_char(subj)
if _cmark_is_eof(subj):
@ -464,14 +483,12 @@ def _cmark_S_normalize_code(s: _cmarkCmarkStrbuf):
# Assumes that the subject has a backtick at the current position.
# 0.30
def _cmark_handle_backticks(subj: _cmarkSubject, options: int) -> _cmarkCmarkNode:
openticks: _cmarkCmarkChunk = _cmark_take_while(subj)
openticks: _cmarkCmarkChunk = _cmark_take_while(subj, '_cmark_isbacktick')
startpos: int = subj.pos
endpos: int = _cmark_scan_to_closing_backticks(subj, openticks.length)
# not found
if endpos == 0:
# rewind
subj.pos = startpos
if endpos == 0: # not found
subj.pos = startpos # rewind
return _cmark_make_str(subj, subj.pos, subj.pos, openticks)
else:
buf = _cmark_CMARK_BUF_INIT(subj.mem)
@ -479,7 +496,7 @@ def _cmark_handle_backticks(subj: _cmarkSubject, options: int) -> _cmarkCmarkNod
_cmark_S_normalize_code(buf)
node: _cmarkCmarkNode = _cmark_make_literal(subj, md_parser['cmark']['cmark_node_type']['CMARK_NODE_CODE'], startpos, endpos - openticks.length - 1)
node.len = buf.size
node.length = buf.size
node.data = _cmark_cmark_strbuf_detach(buf)
_cmark_adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.length, options)
return node
@ -505,7 +522,7 @@ def _cmark_scan_delims(subj: _cmarkSubject, c: str) -> tuple:
while _cmark_peek_at(subj, before_char_pos) >> 6 == 2 and before_char_pos > 0:
before_char_pos -= 1
length, before_char = _cmark_cmark_utf8proc_iterate(subj.input.data[before_char_pos:before_char_pos + 1],
length, before_char = _cmark_cmark_utf8proc_iterate(subj.input.data[before_char_pos:],
subj.pos - before_char_pos)
if length == -1:
@ -513,13 +530,13 @@ def _cmark_scan_delims(subj: _cmarkSubject, c: str) -> tuple:
if c == '\'' or c == '"':
numdelims += 1
_cmark_advance(subj)
_cmark_advance(subj) # limit to 1 delim for quotes
else:
while chr(_cmark_peek_char(subj)) == c:
numdelims += 1
_cmark_advance(subj)
length, after_char = _cmark_cmark_utf8proc_iterate(subj.input.data[subj.pos:subj.pos + 1], subj.input.length - subj.pos)
length, after_char = _cmark_cmark_utf8proc_iterate(subj.input.data[subj.pos:], subj.input.length - subj.pos)
if length == -1:
after_char = 10
@ -548,7 +565,8 @@ def _cmark_scan_delims(subj: _cmarkSubject, c: str) -> tuple:
elif c == '\'' or c == '"':
if (left_flanking and
(not right_flanking or before_char == '(' or before_char == '[')
and before_char != ']' and before_char != ')'):
and before_char != ']'
and before_char != ')'):
can_open = True
can_close = right_flanking
else:
@ -558,6 +576,22 @@ def _cmark_scan_delims(subj: _cmarkSubject, c: str) -> tuple:
return numdelims, can_open, can_close
# 0.30
def _cmark_remove_delimiter(subj: _cmarkSubject, delim: _cmarkDelimiter):
if delim is None:
return
if delim.next is None:
# end of list:
if delim != subj.last_delim:
raise ValueError
subj.last_delim = delim.previous
else:
delim.next.previous = delim.previous
if delim.previous is not None:
delim.previous.next = delim.next
del delim
# 0.30
def _cmark_pop_bracket(subj: _cmarkSubject):
b: _cmarkBracket
@ -565,20 +599,24 @@ def _cmark_pop_bracket(subj: _cmarkSubject):
return
b = subj.last_bracket
subj.last_bracket = subj.last_bracket.previous
# No need to free.
# subj->mem->free(b);
_noop(b)
# subj->mem->free(b);
del b
# 0.29, 0.30
def _cmark_push_delimiter(subj: _cmarkSubject, c: str, can_open: bool,
can_close: bool, inl_text: _cmarkCmarkNode):
delim = _cmarkDelimiter(c, inl_text.length)
delim = _cmarkDelimiter()
delim.delim_char = c
delim.can_open = can_open
delim.can_close = can_close
delim.inl_text = inl_text
subj.push(delim)
# List operations are handled in the class definition.
delim.length = inl_text.length
delim.previous = subj.last_delim
delim.next = None
if delim.previous is not None:
delim.previous.next = delim
subj.last_delim = delim
# 0.29, 0.30
@ -600,10 +638,10 @@ def _cmark_push_bracket(subj: _cmarkSubject, image: bool, inl_text: _cmarkCmarkN
# 0.29, 0.30
def _cmark_handle_delim(subj: _cmarkSubject, c: str, smart: bool = False) -> _cmarkCmarkNode:
numdelims: int
inl_text: _cmarkCmarkNode
can_open: bool
can_close: bool
inl_text: _cmarkCmarkNode
contents: str
contents: _cmarkCmarkChunk
numdelims, can_open, can_close = _cmark_scan_delims(subj, c)
@ -619,7 +657,7 @@ def _cmark_handle_delim(subj: _cmarkSubject, c: str, smart: bool = False) -> _cm
inl_text = _cmark_make_str(subj, subj.pos - numdelims, subj.pos - 1, contents)
if (can_open or can_close) and (not (c == '\'' or c == '"')):
if (can_open or can_close) and (not (c == '\'' or c == '"') or smart):
_cmark_push_delimiter(subj, c, can_open, can_close, inl_text)
return inl_text
@ -632,7 +670,9 @@ def _cmark_process_emphasis(subj: _cmarkSubject, stack_bottom: _cmarkDelimiter,
openers_bottom_index: int = 0
opener_found: bool
openers_bottom_index: int = 0
openers_bottom: list = [stack_bottom, stack_bottom, stack_bottom, stack_bottom, stack_bottom, stack_bottom, stack_bottom, stack_bottom, stack_bottom]
openers_bottom: list = [stack_bottom, stack_bottom, stack_bottom,
stack_bottom, stack_bottom, stack_bottom,
stack_bottom, stack_bottom, stack_bottom]
# move back to first relevant delim.
while closer is not None and closer.previous is not stack_bottom:
@ -696,13 +736,13 @@ def _cmark_process_emphasis(subj: _cmarkSubject, stack_bottom: _cmarkDelimiter,
# we can remove a closer that can't be an
# opener, once we've seen there's no
# matching opener:
subj.extract(old_closer)
_cmark_remove_delimiter(subj, old_closer)
else:
closer = closer.next
# free all delimiters in list until stack_bottom:
while subj.last_delim is not None and subj.last_delim != stack_bottom:
subj.extract(subj.last_delim)
_cmark_remove_delimiter(subj, subj.last_delim)
# 0.29, 0.30
@ -729,32 +769,43 @@ def _cmark_remove_emph(subj: _cmarkSubject, opener: _cmarkDelimiter, closer: _cm
opener_num_chars -= use_delims
closer_num_chars -= use_delims
opener_inl.length = opener_num_chars
closer_inl.length = closer_num_chars
# opener_inl->data[opener_num_chars] = 0;
# No need to add string terminators.
closer_inl.length = closer_num_chars
# closer_inl->data[closer_num_chars] = 0;
# No need to add string terminators.
# opener_inl->data[opener_num_chars] = 0;
# closer_inl->data[closer_num_chars] = 0;
# free delimiters between opener and closer
delim = closer.previous
while delim is not None and delim != opener:
tmp_delim = delim.previous
subj.extract(delim)
_cmark_remove_delimiter(subj, delim)
delim = tmp_delim
# IGNORE
############
# Not useful for emphasis detection but we keep it as reference.
#
# create new emph or strong, and splice it in to our inlines
# between the opener and closer
# emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem);
# tmp = opener_inl->next;
# while (tmp && tmp != closer_inl) {
# tmpnext = tmp->next;
# cmark_node_unlink(tmp);
# append_child(emph, tmp);
# tmp = tmpnext;
# }
# cmark_node_insert_after(opener_inl, emph);
#
# between the opener and closer #
if use_delims == 1: #
emph = _cmark_make_emph(subj.mem) #
else: #
emph = _cmark_make_strong(subj.mem) #
tmp = opener_inl.next #
while tmp and tmp != closer_inl: #
tmpnext = tmp.next #
_cmark_cmark_node_unlink(tmp) #
_cmark_append_child(emph, tmp) #
tmp = tmpnext #
_cmark_cmark_node_insert_after(opener_inl, emph) #
emph.start_line = opener_inl.start_line #
emph.end_line = closer_inl.end_line #
emph.start_column = opener_inl.start_column #
emph.end_column = closer_inl.end_column #
#############
# Custom variables and computations.
opener_relative_start = opener_inl.end_column - use_delims + 1 - opener.offset
@ -771,7 +822,7 @@ def _cmark_remove_emph(subj: _cmarkSubject, opener: _cmarkDelimiter, closer: _cm
# if opener has 0 characters, remove it and its associated inline
if opener_num_chars == 0:
_cmark_cmark_node_free(opener_inl)
subj.extract(opener)
_cmark_remove_delimiter(subj, opener)
# if closer has 0 characters, remove it and its associated inline
if closer_num_chars == 0:
@ -779,7 +830,7 @@ def _cmark_remove_emph(subj: _cmarkSubject, opener: _cmarkDelimiter, closer: _cm
_cmark_cmark_node_free(closer_inl)
# remove closer from list
tmp_delim = closer.next
subj.extract(closer)
_cmark_remove_delimiter(subj, closer)
closer = tmp_delim
return closer
@ -791,8 +842,7 @@ def _cmark_handle_backslash(subj: _cmarkSubject):
_cmark_advance(subj)
nextchar: str = _cmark_peek_char(subj)
# only ascii symbols and newline can be escaped
if _cmark_cmark_ispunct(nextchar):
if _cmark_cmark_ispunct(nextchar): # only ascii symbols and newline can be escaped
_cmark_advance(subj)
return _cmark_make_str(subj, subj.pos - 2, subj.pos - 1, _cmark_cmark_chunk_dup(subj.input, subj.pos - 1, 1))
elif (not _cmark_is_eof(subj)) and _cmark_skip_line_end(subj):
@ -801,6 +851,315 @@ def _cmark_handle_backslash(subj: _cmarkSubject):
return _cmark_make_str(subj, subj.pos - 1, subj.pos - 1, _cmark_cmark_chunk_literal('\\'))