diff --git a/tests/decoding_test.bat b/tests/decoding_test.bat new file mode 100644 index 0000000..9dad40b --- /dev/null +++ b/tests/decoding_test.bat @@ -0,0 +1,8 @@ +@ECHO OFF +REM https://stackoverflow.com/a/4580120/20785734 +setlocal +set PYTHONPATH=%~dp0 +python tests/decoding_test.py +endlocal + + diff --git a/tests/decoding_test2.bat b/tests/decoding_test2.bat new file mode 100644 index 0000000..3716560 --- /dev/null +++ b/tests/decoding_test2.bat @@ -0,0 +1,8 @@ +@ECHO OFF +REM https://stackoverflow.com/a/4580120/20785734 +setlocal +set PYTHONPATH=%~dp0 +py -2 tests/decoding_test.py +endlocal + + diff --git a/tests/decoding_test3.bat b/tests/decoding_test3.bat new file mode 100644 index 0000000..d67a880 --- /dev/null +++ b/tests/decoding_test3.bat @@ -0,0 +1,8 @@ +@ECHO OFF +REM https://stackoverflow.com/a/4580120/20785734 +setlocal +set PYTHONPATH=%~dp0 +py -3 tests/decoding_test.py +endlocal + + diff --git a/tests/test_api.py b/tests/test_api.py index 1acc26f..fd2b8d1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -18,9 +18,9 @@ def test_bug_148(): - assert 'a = "\\u0064"\n' == toml.dumps({'a': '\\x64'}) - assert 'a = "\\\\x64"\n' == toml.dumps({'a': '\\\\x64'}) - assert 'a = "\\\\\\u0064"\n' == toml.dumps({'a': '\\\\\\x64'}) + assert r'a = "\\x64"' + '\n' == toml.dumps({'a': r'\x64'}) + assert r'a = "\\\\x64"' + '\n' == toml.dumps({'a': r'\\x64'}) + assert r'a = "\\\\\\x64"' + '\n' == toml.dumps({'a': r'\\\x64'}) def test_bug_144(): @@ -232,6 +232,14 @@ def test_commutativity(): assert o == toml.loads(toml.dumps(o)) +@pytest.mark.skipif(sys.platform == 'win32', reason = '''This test's expected result + is a hardcoded POSIX file + path. str(pathlib.Path(..)) + returns a path formatted + according to the local + platform. So this test + will always fail on + Windows''') def test_pathlib(): if (3, 4) <= sys.version_info: import pathlib diff --git a/toml/encoder.py b/toml/encoder.py index bf17a72..e2c5cd0 100644 --- a/toml/encoder.py +++ b/toml/encoder.py @@ -83,11 +83,26 @@ def dumps(o, encoder=None): return retval +_Python_escaped_hex = re.compile(r"""(?(\\\\)*) # pairs only of literal backslashes + # (in a normal string literal 4 are + # needed to match 1; in a raw string + # literal 4 will match 2 backslashes) + (\\x) # Matches a Python Hex escape prefix in a repr string + # (used for extended-ASCII + # chars, e.g. repr('\xad')) + """, + flags = re.VERBOSE) + +def _Python_escaped_hex_to_escaped_toml(m): + return m.group('literal_backslashes') + '\\u00' + + def _dump_str(v): if sys.version_info < (3,) and hasattr(v, 'decode') and isinstance(v, str): v = v.decode('utf-8') - v = "%r" % v - if v[0] == 'u': + v = "%r" % v # basically v = repr(v) + if v[0] == 'u': # ditch any Python 2 unicode-literal-string's u prefix v = v[1:] singlequote = v.startswith("'") if singlequote or v.startswith('"'): @@ -95,23 +110,13 @@ def _dump_str(v): if singlequote: v = v.replace("\\'", "'") v = v.replace('"', '\\"') - v = v.split("\\x") - while len(v) > 1: - i = -1 - if not v[0]: - v = v[1:] - v[0] = v[0].replace("\\\\", "\\") - # No, I don't know why != works and == breaks - joinx = v[0][i] != "\\" - while v[0][:i] and v[0][i] == "\\": - joinx = not joinx - i -= 1 - if joinx: - joiner = "x" - else: - joiner = "u00" - v = [v[0] + joiner + v[1]] + v[2:] - return unicode('"' + v[0] + '"') + + v = re.sub(_Python_escaped_hex, _Python_escaped_hex_to_escaped_toml, v) + + return '"%s"' % v # They're not as popular as double quoted strings, but + # TOML also supports single quoted literal strings. + # However these cannot contain escapes, and the repr + # above might have introduced escapes. def _dump_float(v):