From f90ee8001693bfff05ffce01ccf1ecec09b4963a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
Date: Mon, 9 Sep 2024 10:18:50 +0200
Subject: [PATCH] remove six

---
 debug-info.py                       |  2 +-
 html5lib/_inputstream.py            |  8 ++++----
 html5lib/_tokenizer.py              |  2 --
 html5lib/_trie/py.py                |  4 +---
 html5lib/_utils.py                  | 14 +++-----------
 html5lib/filters/lint.py            | 30 ++++++++++++++---------------
 html5lib/filters/sanitizer.py       |  5 ++---
 html5lib/html5parser.py             |  4 +---
 html5lib/serializer.py              |  6 ++----
 html5lib/tests/test_meta.py         |  8 +-------
 html5lib/tests/test_parser2.py      |  8 --------
 html5lib/tests/test_stream.py       | 11 ++++-------
 html5lib/tests/test_tokenizer2.py   |  6 ++----
 html5lib/tests/test_treewalkers.py  |  3 +--
 html5lib/tests/tokenizer.py         |  9 ++++-----
 html5lib/treebuilders/base.py       |  6 ++----
 html5lib/treebuilders/etree.py      |  4 +---
 html5lib/treebuilders/etree_lxml.py |  3 ---
 html5lib/treewalkers/etree.py       |  4 +---
 html5lib/treewalkers/etree_lxml.py  |  4 +---
 requirements-oldest.txt             |  3 +--
 requirements-test.txt               |  1 +
 requirements.txt                    |  1 -
 setup.py                            |  1 -
 24 files changed, 47 insertions(+), 100 deletions(-)

diff --git a/debug-info.py b/debug-info.py
index 7e1b6fd0..5523067c 100644
--- a/debug-info.py
+++ b/debug-info.py
@@ -11,7 +11,7 @@
     "maxsize": sys.maxsize
 }
 
-search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"]
+search_modules = ["chardet", "genshi", "html5lib", "lxml"]
 found_modules = []
 
 for m in search_modules:
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index 54c5c498..57a220a4 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -1,6 +1,6 @@
 
-from six import text_type
-from six.moves import http_client, urllib
+import http.client
+import urllib.response
 
 import codecs
 import re
@@ -124,10 +124,10 @@ def _readFromBuffer(self, bytes):
 def HTMLInputStream(source, **kwargs):
     # Work around Python bug #20007: read(0) closes the connection.
     # http://bugs.python.org/issue20007
-    if (isinstance(source, http_client.HTTPResponse) or
+    if (isinstance(source, http.client.HTTPResponse) or
         # Also check for addinfourl wrapping HTTPResponse
         (isinstance(source, urllib.response.addbase) and
-         isinstance(source.fp, http_client.HTTPResponse))):
+         isinstance(source.fp, http.client.HTTPResponse))):
         isUnicode = False
     elif hasattr(source, "read"):
         isUnicode = isinstance(source.read(0), text_type)
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index 782310ec..75dab441 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -1,6 +1,4 @@
 
-from six import unichr as chr
-
 from collections import deque, OrderedDict
 from sys import version_info
 
diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
index 92f6f861..bc6363c4 100644
--- a/html5lib/_trie/py.py
+++ b/html5lib/_trie/py.py
@@ -1,5 +1,3 @@
-from six import text_type
-
 from bisect import bisect_left
 
 from ._base import Trie as ABCTrie
@@ -7,7 +5,7 @@
 
 class Trie(ABCTrie):
     def __init__(self, data):
-        if not all(isinstance(x, text_type) for x in data.keys()):
+        if not all(isinstance(x, str) for x in data.keys()):
             raise TypeError("All keys must be strings")
 
         self._data = data
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 2e74c07f..5853e81d 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -3,15 +3,7 @@
 
 from collections.abc import Mapping
 
-from six import text_type, PY3
-
-if PY3:
-    import xml.etree.ElementTree as default_etree
-else:
-    try:
-        import xml.etree.ElementTree as default_etree
-    except ImportError:
-        import xml.etree.ElementTree as default_etree
+import xml.etree.ElementTree as default_etree
 
 
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@@ -27,10 +19,10 @@
 # escapes.
 try:
     _x = eval('"\\uD800"')  # pylint:disable=eval-used
-    if not isinstance(_x, text_type):
+    if not isinstance(_x, str):
         # We need this with u"" because of http://bugs.jython.org/issue2039
         _x = eval('u"\\uD800"')  # pylint:disable=eval-used
-        assert isinstance(_x, text_type)
+        assert isinstance(_x, str)
 except Exception:
     supports_lone_surrogates = False
 else:
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index cd7a6a43..0d47f921 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -1,6 +1,4 @@
 
-from six import text_type
-
 from . import base
 from ..constants import namespaces, voidElements
 
@@ -32,9 +30,9 @@ def __iter__(self):
             if type in ("StartTag", "EmptyTag"):
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 assert isinstance(token["data"], dict)
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
@@ -44,18 +42,18 @@ def __iter__(self):
                 if type == "StartTag" and self.require_matching_tags:
                     open_elements.append((namespace, name))
                 for (namespace, name), value in token["data"].items():
-                    assert namespace is None or isinstance(namespace, text_type)
+                    assert namespace is None or isinstance(namespace, str)
                     assert namespace != ""
-                    assert isinstance(name, text_type)
+                    assert isinstance(name, str)
                     assert name != ""
-                    assert isinstance(value, text_type)
+                    assert isinstance(value, str)
 
             elif type == "EndTag":
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
                     assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
@@ -65,26 +63,26 @@ def __iter__(self):
 
             elif type == "Comment":
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
 
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
                 assert data != ""
                 if type == "SpaceCharacters":
                     assert data.strip(spaceCharacters) == ""
 
             elif type == "Doctype":
                 name = token["name"]
-                assert name is None or isinstance(name, text_type)
-                assert token["publicId"] is None or isinstance(name, text_type)
-                assert token["systemId"] is None or isinstance(name, text_type)
+                assert name is None or isinstance(name, str)
+                assert token["publicId"] is None or isinstance(name, str)
+                assert token["systemId"] is None or isinstance(name, str)
 
             elif type == "Entity":
-                assert isinstance(token["name"], text_type)
+                assert isinstance(token["name"], str)
 
             elif type == "SerializerError":
-                assert isinstance(token["data"], text_type)
+                assert isinstance(token["data"], str)
 
             else:
                 assert False, "Unknown token type: %(type)s" % {"type": type}
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index 2dc4583d..94c8602c 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -9,10 +9,9 @@
 
 import re
 import warnings
+from urllib.parse import urlparse
 from xml.sax.saxutils import escape, unescape
 
-from six.moves import urllib_parse as urlparse
-
 from . import base
 from ..constants import namespaces, prefixes
 
@@ -845,7 +844,7 @@ def allowed_token(self, token):
                 # remove replacement characters from unescaped characters
                 val_unescaped = val_unescaped.replace("\ufffd", "")
                 try:
-                    uri = urlparse.urlparse(val_unescaped)
+                    uri = urlparse(val_unescaped)
                 except ValueError:
                     uri = None
                     del attrs[attr]
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 3fe78b6b..91d71a88 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,5 +1,3 @@
-from six import viewkeys
-
 from . import _inputstream
 from . import _tokenizer
 
@@ -2773,7 +2771,7 @@ def processEndTag(self, token):
 
 
 def adjust_attributes(token, replacements):
-    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+    needs_adjustment = token['data'].keys() & replacements.keys()
     if needs_adjustment:
         token['data'] = type(token['data'])((replacements.get(k, k), v)
                                             for k, v in token['data'].items())
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
index 34f1b7e3..ed52593f 100644
--- a/html5lib/serializer.py
+++ b/html5lib/serializer.py
@@ -1,5 +1,3 @@
-from six import text_type
-
 import re
 
 from codecs import register_error, xmlcharrefreplace_errors
@@ -221,14 +219,14 @@ def __init__(self, **kwargs):
         self.strict = False
 
     def encode(self, string):
-        assert isinstance(string, text_type)
+        assert isinstance(string, str)
         if self.encoding:
             return string.encode(self.encoding, "htmlentityreplace")
         else:
             return string
 
     def encodeStrict(self, string):
-        assert isinstance(string, text_type)
+        assert isinstance(string, str)
         if self.encoding:
             return string.encode(self.encoding, "strict")
         else:
diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
index aa7e35e2..2fc6140d 100644
--- a/html5lib/tests/test_meta.py
+++ b/html5lib/tests/test_meta.py
@@ -1,5 +1,3 @@
-
-import six
 from unittest.mock import Mock
 
 from . import support
@@ -26,11 +24,7 @@ def test_errorMessage():
     r = support.errorMessage(input, expected, actual)
 
     # Assertions!
-    if six.PY2:
-        assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
-    else:
-        assert six.PY3
-        assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
+    assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
 
     assert input.__repr__.call_count == 1
     assert expected.__repr__.call_count == 1
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index f30595b4..da76cd41 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -1,6 +1,3 @@
-
-from six import PY2, text_type
-
 import io
 
 from . import support  # noqa
@@ -73,11 +70,6 @@ def test_debug_log():
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
 
-    if PY2:
-        for i, log in enumerate(expected):
-            log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
-            expected[i] = tuple(log)
-
     assert parser.log == expected
 
 
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index 7dce2b1d..0512419c 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -7,8 +7,8 @@
 
 import pytest
 
-import six
-from six.moves import http_client, urllib
+import http.client
+import urllib.response
 
 from html5lib._inputstream import (BufferedStream, HTMLInputStream,
                                    HTMLUnicodeInputStream, HTMLBinaryInputStream)
@@ -190,7 +190,7 @@ def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
 
-    source = http_client.HTTPResponse(FakeSocket())
+    source = http.client.HTTPResponse(FakeSocket())
     source.begin()
     stream = HTMLInputStream(source)
     assert stream.charsUntil(" ") == "Text"
@@ -201,15 +201,12 @@ def test_python_issue_20007_b():
     Make sure we have a work-around for Python bug #20007
     http://bugs.python.org/issue20007
     """
-    if six.PY2:
-        return
-
     class FakeSocket:
         def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
 
-    source = http_client.HTTPResponse(FakeSocket())
+    source = http.client.HTTPResponse(FakeSocket())
     source.begin()
     wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
     stream = HTMLInputStream(wrapped)
diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py
index f8a74eee..4e993571 100644
--- a/html5lib/tests/test_tokenizer2.py
+++ b/html5lib/tests/test_tokenizer2.py
@@ -1,8 +1,6 @@
 
 import io
 
-from six import unichr, text_type
-
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib.constants import tokenTypes
 
@@ -15,7 +13,7 @@ def ignore_parse_errors(toks):
 
 def test_maintain_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
     stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
 
     toks = HTMLTokenizer(stream)
@@ -48,7 +46,7 @@ def test_duplicate_attribute():
 
 def test_maintain_duplicate_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
     stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
 
     toks = HTMLTokenizer(stream)
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 89e20dab..22ee0cb7 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -2,7 +2,6 @@
 import itertools
 import sys
 
-from six import unichr, text_type
 import pytest
 
 try:
@@ -150,7 +149,7 @@ def test_maintain_attribute_order(treeName):
         pytest.skip("Treebuilder not loaded")
 
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
     data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
 
     parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index 9ba19b16..d2605a12 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -5,7 +5,6 @@
 import re
 
 import pytest
-from six import unichr
 
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib import constants, _utils
@@ -145,15 +144,15 @@ def repl(m):
                 low = int(m.group(2), 16)
                 if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF:
                     cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
-                    return unichr(cp)
+                    return chr(cp)
                 else:
-                    return unichr(high) + unichr(low)
+                    return chr(high) + chr(low)
             else:
-                return unichr(int(m.group(1), 16))
+                return chr(int(m.group(1), 16))
         try:
             return _surrogateRe.sub(repl, inp)
         except ValueError:
-            # This occurs when unichr throws ValueError, which should
+            # This occurs when chr throws ValueError, which should
             # only be for a lone-surrogate.
             if _utils.supports_lone_surrogates:
                 raise
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index 125ed82c..3fec12c4 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -1,5 +1,3 @@
-from six import text_type
-
 from ..constants import scopingElements, tableInsertModeElements, namespaces
 
 # The scope markers are inserted when entering object elements,
@@ -199,7 +197,7 @@ def elementInScope(self, target, variant=None):
         # match any node with that name
         exactNode = hasattr(target, "nameTuple")
         if not exactNode:
-            if isinstance(target, text_type):
+            if isinstance(target, str):
                 target = (namespaces["html"], target)
             assert isinstance(target, tuple)
 
@@ -322,7 +320,7 @@ def _setInsertFromTable(self, value):
 
     def insertElementNormal(self, token):
         name = token["name"]
-        assert isinstance(name, text_type), "Element %s not unicode" % name
+        assert isinstance(name, str), "Element %s not unicode" % name
         namespace = token.get("namespace", self.defaultNamespace)
         element = self.elementClass(name, namespace)
         element.attributes = token["data"]
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index bd20b957..f9564fe0 100644
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -1,7 +1,5 @@
 # pylint:disable=protected-access
 
-from six import text_type
-
 import re
 
 from copy import copy
@@ -221,7 +219,7 @@ def serializeElement(element, indent=0):
             elif element.tag == ElementTreeCommentType:
                 rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             else:
-                assert isinstance(element.tag, text_type), \
+                assert isinstance(element.tag, str), \
                     "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
                 nsmatch = tag_regexp.match(element.tag)
 
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 3e88d76e..b0be4617 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -24,7 +24,6 @@
 from .. import _ihatexml
 
 import lxml.etree as etree
-from six import PY3, binary_type
 
 
 fullTree = True
@@ -204,8 +203,6 @@ def _coerceKey(self, key):
 
             def __getitem__(self, key):
                 value = self._element._element.attrib[self._coerceKey(key)]
-                if not PY3 and isinstance(value, binary_type):
-                    value = value.decode("ascii")
                 return value
 
             def __setitem__(self, key, value):
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index ef5e914c..41607f52 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -2,8 +2,6 @@
 from collections import OrderedDict
 import re
 
-from six import string_types
-
 from . import base
 from .._utils import moduleFactoryFactory
 
@@ -50,7 +48,7 @@ def getNodeDetails(self, node):
                 return base.COMMENT, node.text
 
             else:
-                assert isinstance(node.tag, string_types), type(node.tag)
+                assert isinstance(node.tag, str), type(node.tag)
                 # This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:
diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
index af6c260d..0ec633ac 100644
--- a/html5lib/treewalkers/etree_lxml.py
+++ b/html5lib/treewalkers/etree_lxml.py
@@ -1,5 +1,3 @@
-from six import text_type
-
 from collections import OrderedDict
 
 from lxml import etree
@@ -13,7 +11,7 @@
 def ensure_str(s):
     if s is None:
         return None
-    elif isinstance(s, text_type):
+    elif isinstance(s, str):
         return s
     else:
         return s.decode("ascii", "strict")
diff --git a/requirements-oldest.txt b/requirements-oldest.txt
index 68d0f13d..07b659a5 100644
--- a/requirements-oldest.txt
+++ b/requirements-oldest.txt
@@ -1,7 +1,6 @@
 # This allows us to install the actually oldest supported dependencies and test whether that works.
 
 # requirements.txt
-six==1.9
 webencodings==0.5.1
 
 # requirements-optional.txt
@@ -26,4 +25,4 @@ pytest==5.4.2 ; python_version >= '3'
 coverage==5.1
 pytest-expect==1.1.0
 mock==3.0.5 ; python_version < '3.6'
-mock==4.0.2 ; python_version >= '3.6'
\ No newline at end of file
+mock==4.0.2 ; python_version >= '3.6'
diff --git a/requirements-test.txt b/requirements-test.txt
index aca31f5e..1415d163 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -6,5 +6,6 @@ pytest>=4.6.10,<5 ; python_version < '3'
 pytest>=5.4.2,<8 ; python_version >= '3'
 coverage>=5.1,<6
 pytest-expect>=1.1.0,<2
+six>=1.9 # required by pytest-expect
 mock>=3.0.5,<4 ; python_version < '3.3'
 setuptools; python_version >= '3.12'
diff --git a/requirements.txt b/requirements.txt
index ae7ec3d0..be8fcb77 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1 @@
-six>=1.9
 webencodings
diff --git a/setup.py b/setup.py
index afab2904..9fbcc24f 100644
--- a/setup.py
+++ b/setup.py
@@ -102,7 +102,6 @@ def default_environment():
       maintainer_email='james@hoppipolla.co.uk',
       packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
       install_requires=[
-          'six>=1.9',
           'webencodings>=0.5.1',
       ],
       python_requires=">=3.8",