From 9d053e611ff000d29fd72986611702109187397c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 26 Apr 2023 23:49:38 +0100 Subject: [PATCH 1/4] Fixes test_bug_148. Currently test_bug_148 will fail as Bug 148 was never fixed. Note, this test is currently supposed to be failed by the current toml, until the next PR I submit, which will create another version that will pass it. That's the whole point of this PR (put your TDD hat on). This PR changes the expected outputs on the LHSs to the same outputs from tomli-w and tomlkit. This test function in its factory form has been tested against tomli-w, tomlkit and against the patched version I will shortly submit in my next PR. It has also been tested in its inverse form (toml.loads on the LHS instead of toml.dumps on the RHS) against tomli, tomli-w, tomlkit, my patched version (toml_tools), against the Python 3.11 native tomllib, and against this libraries own toml.loads. All pass. Also changes normal string literals to raw string literals, to make it easier to count the number of back slashes in the input to actual, and in the expected result . This test could be more extensive, e.g. to make sure r'\'*n + '\x64' -> r'\\'*n +'d'. Great! Lets write more tests! Just lets do one thing and one thing only here - further tests deserve their own PRs. --- tests/test_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 1acc26f..c1a7c71 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -18,9 +18,9 @@ def test_bug_148(): - assert 'a = "\\u0064"\n' == toml.dumps({'a': '\\x64'}) - assert 'a = "\\\\x64"\n' == toml.dumps({'a': '\\\\x64'}) - assert 'a = "\\\\\\u0064"\n' == toml.dumps({'a': '\\\\\\x64'}) + assert r'a = "\\x64"' + '\n' == toml.dumps({'a': r'\x64'}) + assert r'a = "\\\\x64"' + '\n' == toml.dumps({'a': r'\\x64'}) + assert r'a = "\\\\\\x64"' + '\n' == toml.dumps({'a': r'\\\x64'}) def test_bug_144(): From e3eb77dcad821926fdc599f81c47a0c4fa8b3c86 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 27 Apr 2023 00:12:38 +0100 Subject: [PATCH 2/4] Replaces buggy deep magic with a regex. Fixes #148 (5 years too late) Also adds some sorely needed comments. Apologies if I got carried away with those, but they're only comments. --- toml/encoder.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/toml/encoder.py b/toml/encoder.py index bf17a72..e2c5cd0 100644 --- a/toml/encoder.py +++ b/toml/encoder.py @@ -83,11 +83,26 @@ def dumps(o, encoder=None): return retval +_Python_escaped_hex = re.compile(r"""(?(\\\\)*) # pairs only of literal backslashes + # (in a normal string literal 4 are + # needed to match 1; in a raw string + # literal 4 will match 2 backslashes) + (\\x) # Matches a Python Hex escape prefix in a repr string + # (used for extended-ASCII + # chars, e.g. repr('\xad')) + """, + flags = re.VERBOSE) + +def _Python_escaped_hex_to_escaped_toml(m): + return m.group('literal_backslashes') + '\\u00' + + def _dump_str(v): if sys.version_info < (3,) and hasattr(v, 'decode') and isinstance(v, str): v = v.decode('utf-8') - v = "%r" % v - if v[0] == 'u': + v = "%r" % v # basically v = repr(v) + if v[0] == 'u': # ditch any Python 2 unicode-literal-string's u prefix v = v[1:] singlequote = v.startswith("'") if singlequote or v.startswith('"'): @@ -95,23 +110,13 @@ def _dump_str(v): if singlequote: v = v.replace("\\'", "'") v = v.replace('"', '\\"') - v = v.split("\\x") - while len(v) > 1: - i = -1 - if not v[0]: - v = v[1:] - v[0] = v[0].replace("\\\\", "\\") - # No, I don't know why != works and == breaks - joinx = v[0][i] != "\\" - while v[0][:i] and v[0][i] == "\\": - joinx = not joinx - i -= 1 - if joinx: - joiner = "x" - else: - joiner = "u00" - v = [v[0] + joiner + v[1]] + v[2:] - return unicode('"' + v[0] + '"') + + v = re.sub(_Python_escaped_hex, _Python_escaped_hex_to_escaped_toml, v) + + return '"%s"' % v # They're not as popular as double quoted strings, but + # TOML also supports single quoted literal strings. + # However these cannot contain escapes, and the repr + # above might have introduced escapes. def _dump_float(v): From 562a8972f79f442831782e56b0e9ff24c3b37a7e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 27 Apr 2023 00:26:03 +0100 Subject: [PATCH 3/4] Support running tests on windows. --- tests/test_api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_api.py b/tests/test_api.py index c1a7c71..fd2b8d1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -232,6 +232,14 @@ def test_commutativity(): assert o == toml.loads(toml.dumps(o)) +@pytest.mark.skipif(sys.platform == 'win32', reason = '''This test's expected result + is a hardcoded POSIX file + path. str(pathlib.Path(..)) + returns a path formatted + according to the local + platform. So this test + will always fail on + Windows''') def test_pathlib(): if (3, 4) <= sys.version_info: import pathlib From 4af6b9a152834c371b8661c02b05f48b20176ce6 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 27 Apr 2023 00:52:29 +0100 Subject: [PATCH 4/4] Batch file wrappers to use with the toml-test go binary in Windows --- tests/decoding_test.bat | 8 ++++++++ tests/decoding_test2.bat | 8 ++++++++ tests/decoding_test3.bat | 8 ++++++++ 3 files changed, 24 insertions(+) create mode 100644 tests/decoding_test.bat create mode 100644 tests/decoding_test2.bat create mode 100644 tests/decoding_test3.bat diff --git a/tests/decoding_test.bat b/tests/decoding_test.bat new file mode 100644 index 0000000..9dad40b --- /dev/null +++ b/tests/decoding_test.bat @@ -0,0 +1,8 @@ +@ECHO OFF +REM https://stackoverflow.com/a/4580120/20785734 +setlocal +set PYTHONPATH=%~dp0 +python tests/decoding_test.py +endlocal + + diff --git a/tests/decoding_test2.bat b/tests/decoding_test2.bat new file mode 100644 index 0000000..3716560 --- /dev/null +++ b/tests/decoding_test2.bat @@ -0,0 +1,8 @@ +@ECHO OFF +REM https://stackoverflow.com/a/4580120/20785734 +setlocal +set PYTHONPATH=%~dp0 +py -2 tests/decoding_test.py +endlocal + + diff --git a/tests/decoding_test3.bat b/tests/decoding_test3.bat new file mode 100644 index 0000000..d67a880 --- /dev/null +++ b/tests/decoding_test3.bat @@ -0,0 +1,8 @@ +@ECHO OFF +REM https://stackoverflow.com/a/4580120/20785734 +setlocal +set PYTHONPATH=%~dp0 +py -3 tests/decoding_test.py +endlocal + +