Browse Source

Improved TOC marker detection and replacement.

master
Franco Masotti 2 months ago
parent
commit
94e283bb65
Signed by: frnmst
GPG Key ID: 24116ED85666780A
  1. 3
      Pipfile
  2. 46
      md_toc/api.py
  3. 2
      md_toc/constants.py
  4. 70
      md_toc/generic.py
  5. 222
      md_toc/tests/tests.py
  6. 3
      packages/aur/PKGBUILD
  7. 2
      setup.py

3
Pipfile

@ -33,6 +33,7 @@ sphinx-tabs = '>=3,<4'
# Tools.
twine = '>=3,<4'
pre-commit = '>=2,<3'
pyfakefs = '>=4,<5'
[packages]
fpyutils = '>=2.1,<3'
fpyutils = '>=2.2,<3'

46
md_toc/api.py

@ -61,22 +61,46 @@ def write_string_on_file_between_markers(filename: str, string: str,
raise StdinIsNotAFileToBeWritten
final_string = marker + '\n\n' + string.rstrip() + '\n\n' + marker + '\n'
marker_line_positions = fpyutils.filelines.get_line_matches(
filename, marker, 2, loose_matching=True)
if 1 in marker_line_positions:
if 2 in marker_line_positions:
fpyutils.filelines.remove_line_interval(
filename, marker_line_positions[1], marker_line_positions[2],
filename)
else:
marker_line_positions, lines = fpyutils.filelines.get_line_matches(
filename, marker, 0, loose_matching=True, keep_all_lines=True)
marker_line_positions_length: int = len(marker_line_positions)
first_marker: int = 1
second_marker: int = 2
in_loop: bool = False
done: bool = False
first_marker_position: int = 0
if marker_line_positions_length > 0:
first_marker_position = marker_line_positions[first_marker]
# Find appropriate TOC markers.
while not done and marker_line_positions_length >= 2:
interval: str = generic._read_line_interval(lines, marker_line_positions[first_marker] + 1, marker_line_positions[second_marker] - 1)
interval_with_offset: str = generic._read_line_interval(lines, marker_line_positions[first_marker] + 2, marker_line_positions[second_marker] - 2)
# TODO: add code fence detection.
if generic._detect_toc_list(interval_with_offset) or generic._string_empty(interval):
fpyutils.filelines.remove_line_interval(
filename, marker_line_positions[1], marker_line_positions[1],
filename, marker_line_positions[first_marker], marker_line_positions[second_marker],
filename)
first_marker_position = marker_line_positions[first_marker]
done = True
first_marker += 1
second_marker += 1
marker_line_positions_length -= 1
in_loop = True
if not in_loop and marker_line_positions_length == 1:
fpyutils.filelines.remove_line_interval(
filename, marker_line_positions[1], marker_line_positions[1],
filename)
if marker_line_positions_length >= 1:
fpyutils.filelines.insert_string_at_line(
filename,
final_string,
marker_line_positions[1],
first_marker_position,
filename,
append=False,
newline_character=newline_string)

2
md_toc/constants.py

@ -968,7 +968,7 @@ parser['cmark']['re'] = {
# 1. Open tag and 2. close tag.
'DQAV': '"[^"]*"',
'SQAV': "'[^']*'",
'UAV': "[^\u0020\u0009\u000a\u000d(\u000d\u000a)\"'=<>`]+",
'UAV': "[^((?!\u000a\u000d)\u000d\u000a|(?!\u000a\u000d)\u000d|(?!\u000a\u000d)\u000a|(?!\u000a\u000d)\u0020|(?!\u000a\u000d)\u0009)\"'=<>`]+",
# 2.
'AN': r'([A-Za-z]|_|:)([A-Za-z]|[0-9]|_|\.|:|-)*',

70
md_toc/generic.py

@ -20,6 +20,8 @@
#
"""Generic functions."""
import re
# _ctoi and _isascii taken from cpython source Lib/curses/ascii.py
# See:
@ -168,5 +170,73 @@ def _utf8_array_to_string(array: list) -> str:
return result
def _string_empty(lines: str) -> bool:
empty: bool = True
# Avoid matching \n\r
if re.fullmatch('((?!\u000a\u000d)|(?!\u000a\u000d)\u000d\u000a|(?!\u000a\u000d)\u000d|(?!\u000a\u000d)\u000a|(?!\u000a\u000d)\u000b|(?!\u000a\u000d)\u0020|(?!\u000a\u000d)\u0009)+', lines) is None:
empty = False
return empty
def _detect_toc_list(line: str) -> bool:
# An heuristic to detect a TOC list generated by md-toc.
match = True
if re.fullmatch(r'([-+*]|' + r'\d' + '+[.' + r'\)' + '])\u0020((?![\u0020\u0009\u000b]+).*)', line) is None:
match = False
return match
def _read_line_interval(lines: str, start: int, end: int) -> str:
r"""Given a string get a line interval between start and end.
Indices are 1 based.
Newline characters are ignored.
"""
done: bool = False
i: int = 0
line_counter: int = 1
lines_length: int = len(lines)
final_line: list = list()
# Shortcut.
if start > end or start < 1:
done = True
while not done:
invalid_newline: bool = False
# Skip \n\r
while i + 1 < lines_length and lines[i] == '\u000a' and lines[i + 1] == '\u000d':
i += 2
invalid_newline = True
# Get \r\n as a single newline character.
while i + 1 < lines_length and lines[i] == '\u000d' and lines[i + 1] == '\u000a':
line_counter += 1
i += 2
# Get \r or \n
while i < lines_length and re.fullmatch('[\u000d\u000a]', lines[i]) is not None:
line_counter += 1
i += 1
if i < lines_length and line_counter >= start and line_counter <= end:
if invalid_newline:
final_line.append('\u000a')
final_line.append('\u000d')
# Add non-newline character.
final_line.append(lines[i])
if i >= lines_length:
done = True
i += 1
final_line = ''.join(final_line)
return final_line
if __name__ == '__main__':
pass

222
md_toc/tests/tests.py

@ -22,8 +22,11 @@
r"""The tests module."""
import unittest
from unittest.mock import mock_open, patch
from .. import api, exceptions
from pyfakefs.fake_filesystem_unittest import TestCase as pyfakefsTestCase
from .. import api, exceptions, generic
from ..constants import parser as md_parser
# Some static generic variables.
@ -38,6 +41,9 @@ LINE_SQUARE_BRACKET_OPEN = '['
LINE_SQUARE_BRACKET_CLOSE = ']'
LINE_DASH = '-'
# Marker.
MARKER = '<!--TOC-->'
# Spaces.
S1 = 1 * ' '
S2 = 2 * ' '
@ -123,9 +129,122 @@ GENERIC_CMARK_RENDERS_AS_LIST_HEADER_TYPE_FIRST = 4
REDCARPET_LINE_FOO = 'foo'
class TestApi(unittest.TestCase):
class TestGeneric(unittest.TestCase):
r"""Test the generic functions."""
def test__string_empty(self):
r"""A string is empty if it contains whitespace characters, excluding consecutive sequences of \n\r."""
self.assertTrue(generic._string_empty(str()))
self.assertTrue(generic._string_empty('\u000a'))
self.assertTrue(generic._string_empty('\u000d'))
self.assertTrue(generic._string_empty('\u000b'))
self.assertTrue(generic._string_empty('\u0009'))
self.assertTrue(generic._string_empty('\u0020'))
self.assertTrue(generic._string_empty('\u000d\u000a'))
self.assertTrue(generic._string_empty('\u0020\u000d\u000a'))
self.assertTrue(generic._string_empty('\u0020\u000d\u000d\u000a\u000d'))
self.assertFalse(generic._string_empty('\u0020\u000d\u000a\u000a\u000d'))
# \n\r
self.assertFalse(generic._string_empty('\u000a\u000d'))
self.assertFalse(generic._string_empty('\u0020\u000da\u000d'))
self.assertTrue(generic._string_empty('\u0020\u000d\u0020\u000d'))
self.assertFalse(generic._string_empty('\u0020abcd\u0020'))
def test__detect_toc_list(self):
r"""Detect a markdown list generated by md-toc."""
self.assertFalse(generic._detect_toc_list(str()))
# Unordered list.
self.assertTrue(generic._detect_toc_list('- list'))
self.assertTrue(generic._detect_toc_list('+ list'))
self.assertTrue(generic._detect_toc_list('* list'))
self.assertTrue(generic._detect_toc_list('- [list'))
self.assertTrue(generic._detect_toc_list('+ [list'))
self.assertTrue(generic._detect_toc_list('* [list'))
self.assertFalse(generic._detect_toc_list('- list'))
self.assertFalse(generic._detect_toc_list('+ list'))
self.assertFalse(generic._detect_toc_list('* list'))
self.assertFalse(generic._detect_toc_list('- \u0009list'))
self.assertFalse(generic._detect_toc_list('+ \u0009list'))
self.assertFalse(generic._detect_toc_list('* \u0009list'))
self.assertFalse(generic._detect_toc_list('- \u0009[list'))
self.assertFalse(generic._detect_toc_list('+ \u0009[list'))
self.assertFalse(generic._detect_toc_list('* \u0009[list'))
self.assertFalse(generic._detect_toc_list(' - list'))
self.assertFalse(generic._detect_toc_list(' + list'))
self.assertFalse(generic._detect_toc_list(' * list'))
self.assertFalse(generic._detect_toc_list(' - [list'))
self.assertFalse(generic._detect_toc_list(' + [list'))
self.assertFalse(generic._detect_toc_list(' * [list'))
self.assertFalse(generic._detect_toc_list(' - list'))
self.assertFalse(generic._detect_toc_list(' + list'))
self.assertFalse(generic._detect_toc_list(' * list'))
self.assertFalse(generic._detect_toc_list(' - \u0009list'))
self.assertFalse(generic._detect_toc_list(' + \u0009list'))
self.assertFalse(generic._detect_toc_list(' * \u0009list'))
self.assertFalse(generic._detect_toc_list(' - \u0009[list'))
self.assertFalse(generic._detect_toc_list(' + \u0009[list'))
self.assertFalse(generic._detect_toc_list(' * \u0009[list'))
# Ordered list.
self.assertTrue(generic._detect_toc_list('1. list'))
self.assertTrue(generic._detect_toc_list('9999. list'))
self.assertTrue(generic._detect_toc_list('1) list'))
self.assertTrue(generic._detect_toc_list('9999) list'))
self.assertTrue(generic._detect_toc_list('1) [list'))
self.assertTrue(generic._detect_toc_list('9999) [list'))
self.assertFalse(generic._detect_toc_list('1. list'))
self.assertFalse(generic._detect_toc_list('9999. list'))
self.assertFalse(generic._detect_toc_list('1) list'))
self.assertFalse(generic._detect_toc_list('9999) list'))
self.assertFalse(generic._detect_toc_list('1) [list'))
self.assertFalse(generic._detect_toc_list('9999) [list'))
self.assertFalse(generic._detect_toc_list(' 1. list'))
self.assertFalse(generic._detect_toc_list(' 9999. list'))
self.assertFalse(generic._detect_toc_list(' 1) list'))
self.assertFalse(generic._detect_toc_list(' 9999) list'))
self.assertFalse(generic._detect_toc_list(' 1) [list'))
self.assertFalse(generic._detect_toc_list(' 9999) [list'))
self.assertFalse(generic._detect_toc_list(' 1. list'))
self.assertFalse(generic._detect_toc_list(' 9999. list'))
self.assertFalse(generic._detect_toc_list(' 1) list'))
self.assertFalse(generic._detect_toc_list(' 9999) list'))
self.assertFalse(generic._detect_toc_list(' 1) [list'))
self.assertFalse(generic._detect_toc_list(' 9999) [list'))
def test__read_line_interval(self):
r"""Extract a line interval."""
self.assertEqual(generic._read_line_interval(str(), 1, 1), str())
self.assertEqual(generic._read_line_interval(str(), 0, 1), str())
self.assertEqual(generic._read_line_interval(str(), 1, 0), str())
self.assertEqual(generic._read_line_interval(str(), 0, 0), str())
self.assertEqual(generic._read_line_interval('a', 1, 1), 'a')
self.assertEqual(generic._read_line_interval('a', 0, 1), str())
self.assertEqual(generic._read_line_interval('a', 1, 0), str())
self.assertEqual(generic._read_line_interval('a', 0, 0), str())
self.assertEqual(generic._read_line_interval('a\u000da', 1, 1), 'a')
self.assertEqual(generic._read_line_interval('a\u000da', 1, 2), 'aa')
self.assertEqual(generic._read_line_interval('a\u000da', 2, 2), 'a')
self.assertEqual(generic._read_line_interval('a\u000d\u000aa', 2, 2), 'a')
self.assertEqual(generic._read_line_interval('a\u000a\u000db', 1, 1), 'a\u000a\u000db')
self.assertEqual(generic._read_line_interval('a\u000a\u000db\u000d\u000ac', 1, 2), 'a\u000a\u000dbc')
self.assertEqual(generic._read_line_interval('ab\u000d\u000ac', 1, 2), 'abc')
self.assertEqual(generic._read_line_interval('ab\u000d\u000ac', 2, 2), 'c')
self.assertEqual(generic._read_line_interval('ab\u000d\u000a\u000ac', 2, 2), str())
self.assertEqual(generic._read_line_interval('ab\u000d\u000a\u000ac', 2, 3), 'c')
class TestApi(pyfakefsTestCase):
r"""Test the main API."""
def setUp(self):
r"""Fake filesystem."""
self.setUpPyfakefs()
def test_write_string_on_file_between_markers(self):
r"""Test that the TOC is written correctly on the file.
@ -135,6 +254,105 @@ class TestApi(unittest.TestCase):
with self.assertRaises(exceptions.StdinIsNotAFileToBeWritten):
api.write_string_on_file_between_markers('-', LINE, LINE, newline_string='\n')
# 0 TOC markers.
with open('foo.md', 'w') as f:
f.write('hello')
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello')
# 1 TOC markers.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n')
# 2 consecutive TOC markers.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + MARKER)
# 2 TOC markers, valid list, not empty.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n\n' + '- [hi](#hi)' + '\n\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n')
# 2 TOC markers, valid list, less space, not empty.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n' + '- [hi](#hi)' + '\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n' + MARKER + '\n' + '- [hi](#hi)' + '\n' + MARKER)
# 2 TOC markers, invalid list, not empty.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n\n' + ' - [hi](#hi)' + '\n\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n' + MARKER + '\n\n' + ' - [hi](#hi)' + '\n\n' + MARKER)
# 2 TOC markers, invalid list, not empty, less space.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n' + ' - [hi](#hi)' + '\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n' + MARKER + '\n' + ' - [hi](#hi)' + '\n' + MARKER)
# 2 TOC markers, no list, empty.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n')
# 2 TOC markers, no list, empty, less space.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n')
# More than 2 TOC markers, valid list.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n' + MARKER + '\n' + '- [hi](#hi)' + '\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n' + '- [hi](#hi)' + '\n' + MARKER)
# More than 2 TOC markers, valid list first.
with open('foo.md', 'w') as f:
f.write('hello' + '\n' + MARKER + '\n\n' + '- [hi](#hi)' + '\n\n' + MARKER + '\n' + MARKER)
api.write_string_on_file_between_markers('foo.md', LINE, MARKER, newline_string='\n')
with open('foo.md', 'r') as f:
lines = f.readlines()
lines = ''.join(lines)
self.assertEqual(lines, 'hello' + '\n' + MARKER + '\n\n' + LINE + '\n\n' + MARKER + '\n' + MARKER)
@unittest.skip("empty test")
def test_write_strings_on_files_between_markers(self):
r"""Test that the TOC is written correctly on the files."""

3
packages/aur/PKGBUILD

@ -9,7 +9,8 @@ url="https://blog.franco.net.eu.org/software/#md-toc"
license=('GPL3')
depends=('python'
'python-fpyutils=2.1.0'
'python-setuptools')
'python-setuptools'
'python-pyfakefs')
options=(!emptydirs)
source=("https://blog.franco.net.eu.org/software/md-toc-${pkgver}/md-toc-${pkgver}.tar.gz.sig" "https://blog.franco.net.eu.org/software/md-toc-${pkgver}/md-toc-${pkgver}.tar.gz")
sha512sums=('SKIP' 'SKIP')

2
setup.py

@ -55,6 +55,6 @@ setup(
'Programming Language :: Python :: 3',
],
install_requires=[
'fpyutils>=2.1,<3'
'fpyutils>=2.2,<3'
],
)

Loading…
Cancel
Save