Bug fixes.

This commit is contained in:
Franco Masotti 2022-04-19 17:36:06 +02:00
parent 9a05da331c
commit 0c912000aa
Signed by: frnmst
GPG Key ID: 24116ED85666780A
7 changed files with 89 additions and 18 deletions

View File

@ -24,6 +24,7 @@ import copy
import sys
from ..constants import parser as md_parser
from ..generic import _utf8_array_to_string
from .buffer_h import _cmark_CMARK_BUF_INIT, _cmarkCmarkStrbuf
from .cmark_ctype_c import _cmark_cmark_ispunct, _cmark_cmark_isspace
from .cmark_h import _cmarkCmarkMem
@ -126,17 +127,21 @@ def _cmark_cmark_strbuf_putc(buf: _cmarkCmarkStrbuf, c: int):
# 0.30
def _cmark_cmark_strbuf_put(buf: _cmarkCmarkStrbuf, data: str,
len: int):
if len <= 0:
length: int):
if length <= 0:
return
_cmark_S_strbuf_grow_by(buf, len)
_cmark_S_strbuf_grow_by(buf, length)
# Alternative to
# memmove(buf.ptr + buf.size, data, len)
buf.ptr = buf.ptr[:buf.size - 1] + copy.deepcopy(data[:len - 0]) # + buf.ptr[buf.size + 1:]
if isinstance(data, list):
dt = _utf8_array_to_string(data)
else:
dt = data
buf.ptr = buf.ptr[:buf.size - 1] + copy.deepcopy(dt[:length - 0]) # + buf.ptr[buf.size + 1:]
buf.size += len
buf.size += length
# No need for line terminator.
# buf.ptr[buf.size] = '\0';

View File

@ -36,15 +36,16 @@ from .utf8_c import _cmark_cmark_utf8proc_encode_char
def _cmark_S_lookup(i: int, low: int, hi: int, s: str, length: int) -> str:
j: int
cmp: int = _strncmp(s, md_parser['cmark']['re']['ENTITIES']['entities'][i]['entity'], length)
if cmp == 0 and md_parser['cmark']['re']['ENTITIES']['entities'][i]['entity'][length] == 0:
# if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
if cmp == 0 and length == len(md_parser['cmark']['re']['ENTITIES']['entities'][i]['entity']):
return md_parser['cmark']['re']['ENTITIES']['entities'][i]['bytes']
elif cmp == -1 and i > low:
j = i - ((i - low) / 2)
j = i - int((i - low) / 2)
if j == i:
j -= 1
return _cmark_S_lookup(j, low, i - 1, s, length)
elif cmp == 1 and i < hi:
j = i + ((hi - i) / 2)
j = i + int((hi - i) / 2)
if j == i:
j += 1
return _cmark_S_lookup(j, i + 1, hi, s, length)
@ -54,7 +55,7 @@ def _cmark_S_lookup(i: int, low: int, hi: int, s: str, length: int) -> str:
# 0.30.
def _cmark_S_lookup_entity(s: str, length: int):
return _cmark_S_lookup(md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] / 2, 0, md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] - 1, s, length)
return _cmark_S_lookup(int(md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] / 2), 0, md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] - 1, s, length)
# 0.30.

View File

@ -23,7 +23,7 @@ r"""A cmark implementation file."""
import copy
from ..constants import parser as md_parser
from ..generic import _noop, _replace_substring
from ..generic import _replace_substring
from .buffer_c import (_cmark_cmark_strbuf_detach, _cmark_cmark_strbuf_drop,
_cmark_cmark_strbuf_set, _cmark_cmark_strbuf_truncate,
_cmark_cmark_strbuf_unescape)

View File

@ -23,7 +23,6 @@ r"""A cmark implementation file."""
import copy
from ..constants import parser as md_parser
from ..generic import _noop
from .cmark_h import _cmarkCmarkMem
from .node_h import _cmarkCmarkNode

View File

@ -20,7 +20,6 @@
#
r"""A cmark implementation file."""
from ..generic import _noop
from .cmark_h import _cmarkCmarkMem
# License C applies to this file except for non derivative code:

View File

@ -44,12 +44,6 @@ def _isascii(c):
return 0 <= _ctoi(c) <= 127
def _noop(var):
# Black hole for unused variables
# to avoid triggering flake8.
pass
def _replace_substring(source: str, replacement: str, start: int, end: int) -> str:
r"""Given a string called source, replace it with a string called replacement between the start ~ end interval."""
replaced: list = list()
@ -104,5 +98,75 @@ def _strncmp(s1: str, s2: str, length: int) -> int:
return retval
def _utf8_array_to_string(array: list) -> str:
r"""Given an array of integers corresponding to the representation of a UTF-8 string like this.
>>> list(chr(0x10348).encode('UTF-8'))
[240, 144, 141, 136]
revert back to the original UTF-8 character.
See the UTF-8 wikipedia page for these examples:
vv = [240, 144, 141, 136]
a = _utf8_array_to_string(vv)
assert a == chr(66376)
vv = [226, 130, 172]
a = _utf8_array_to_string(vv)
assert a == chr(8364)
vv = [194, 163]
a = _utf8_array_to_string(vv)
assert a == chr(163)
vv = [36]
a = _utf8_array_to_string(vv)
assert a == chr(36)
"""
array_length: int = len(array)
result: str
# Not a UTF-8 array.
if array_length < 1 or array_length > 4:
raise ValueError
if array_length == 1:
ll: list = [array[0] - (array[0] & 0x0)]
binary: list = [bin(f).replace('0b', '') for f in ll]
raw_binary_8_bit: list = [binary[0].zfill(7)]
if array_length == 2:
ll: list = [array[0] - (array[0] & 0xC0),
array[1] - (array[1] & 0x80)]
binary: list = [bin(f).replace('0b', '') for f in ll]
raw_binary_8_bit: list = [binary[0].zfill(4),
binary[1].zfill(6)]
if array_length == 3:
ll: list = [array[0] - (array[0] & 0xE0),
array[1] - (array[1] & 0x80),
array[2] - (array[2] & 0x80)]
binary: list = [bin(f).replace('0b', '') for f in ll]
raw_binary_8_bit: list = [binary[0].zfill(4),
binary[1].zfill(6),
binary[2].zfill(6)]
if array_length == 4:
ll: list = [array[0] - (array[0] & 0xF0),
array[1] - (array[1] & 0x80),
array[2] - (array[2] & 0x80),
array[3] - (array[3] & 0x80)]
binary: list = [bin(f).replace('0b', '') for f in ll]
raw_binary_8_bit: list = [binary[0].zfill(3),
binary[1].zfill(6),
binary[2].zfill(6),
binary[3].zfill(6)]
result = chr(int(''.join(raw_binary_8_bit), 2))
return result
if __name__ == '__main__':
pass

View File

@ -1311,6 +1311,9 @@ class TestApi(unittest.TestCase):
# Example 510 [Commonmark 0.30].
self.assertEqual(api.remove_emphasis(r'*[link*] (uri)'), r'[link] (uri)')
self.assertEqual(api.remove_emphasis(r'[link] (*uri)*'), r'[link] (uri)')
# self.assertEqual(api.remove_emphasis(r'_[link_] (uri)'), r'[link] (uri)')
self.assertEqual(api.remove_emphasis(r'[link] (_uri)_'), r'[link] (uri)')
# Example 511 [Commonmark 0.30].
self.assertEqual(api.remove_emphasis(r'*[link* [foo [bar]]](/uri)'), r'*[link* [foo [bar]]](/uri)')