ashleysommer · ashleysommer · Sep 25, 2024 · Sep 9, 2024
diff --git a/debug-info.py b/debug-info.py
@@ -11,7 +11,7 @@
     "maxsize": sys.maxsize
 }
 
-search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"]
+search_modules = ["chardet", "genshi", "html5lib", "lxml"]
 found_modules = []
 
 for m in search_modules:

diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
@@ -1,6 +1,6 @@
 
-from six import text_type
-from six.moves import http_client, urllib
+import http.client
+import urllib.response
 
 import codecs
 import re
@@ -124,10 +124,10 @@ def _readFromBuffer(self, bytes):
 def HTMLInputStream(source, **kwargs):
     # Work around Python bug #20007: read(0) closes the connection.
     # http://bugs.python.org/issue20007
-    if (isinstance(source, http_client.HTTPResponse) or
+    if (isinstance(source, http.client.HTTPResponse) or
         # Also check for addinfourl wrapping HTTPResponse
         (isinstance(source, urllib.response.addbase) and
-         isinstance(source.fp, http_client.HTTPResponse))):
+         isinstance(source.fp, http.client.HTTPResponse))):
         isUnicode = False
     elif hasattr(source, "read"):
         isUnicode = isinstance(source.read(0), text_type)

diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
@@ -1,6 +1,4 @@
 
-from six import unichr as chr
-
 from collections import deque, OrderedDict
 from sys import version_info
 

diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
@@ -1,13 +1,11 @@
-from six import text_type
-
 from bisect import bisect_left
 
 from ._base import Trie as ABCTrie
 
 
 class Trie(ABCTrie):
     def __init__(self, data):
-        if not all(isinstance(x, text_type) for x in data.keys()):
+        if not all(isinstance(x, str) for x in data.keys()):
             raise TypeError("All keys must be strings")
 
         self._data = data

diff --git a/html5lib/_utils.py b/html5lib/_utils.py
@@ -3,15 +3,7 @@
 
 from collections.abc import Mapping
 
-from six import text_type, PY3
-
-if PY3:
-    import xml.etree.ElementTree as default_etree
-else:
-    try:
-        import xml.etree.ElementTree as default_etree
-    except ImportError:
-        import xml.etree.ElementTree as default_etree
+import xml.etree.ElementTree as default_etree
 
 
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@@ -27,10 +19,10 @@
 # escapes.
 try:
     _x = eval('"\\uD800"')  # pylint:disable=eval-used
-    if not isinstance(_x, text_type):
+    if not isinstance(_x, str):
         # We need this with u"" because of http://bugs.jython.org/issue2039
         _x = eval('u"\\uD800"')  # pylint:disable=eval-used
-        assert isinstance(_x, text_type)
+        assert isinstance(_x, str)
 except Exception:
     supports_lone_surrogates = False
 else:

diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
@@ -1,6 +1,4 @@
 
-from six import text_type
-
 from . import base
 from ..constants import namespaces, voidElements
 
@@ -32,9 +30,9 @@ def __iter__(self):
             if type in ("StartTag", "EmptyTag"):
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 assert isinstance(token["data"], dict)
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
@@ -44,18 +42,18 @@ def __iter__(self):
                 if type == "StartTag" and self.require_matching_tags:
                     open_elements.append((namespace, name))
                 for (namespace, name), value in token["data"].items():
-                    assert namespace is None or isinstance(namespace, text_type)
+                    assert namespace is None or isinstance(namespace, str)
                     assert namespace != ""
-                    assert isinstance(name, text_type)
+                    assert isinstance(name, str)
                     assert name != ""
-                    assert isinstance(value, text_type)
+                    assert isinstance(value, str)
 
             elif type == "EndTag":
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
                     assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
@@ -65,26 +63,26 @@ def __iter__(self):
 
             elif type == "Comment":
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
 
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
                 assert data != ""
                 if type == "SpaceCharacters":
                     assert data.strip(spaceCharacters) == ""
 
             elif type == "Doctype":
                 name = token["name"]
-                assert name is None or isinstance(name, text_type)
-                assert token["publicId"] is None or isinstance(name, text_type)
-                assert token["systemId"] is None or isinstance(name, text_type)
+                assert name is None or isinstance(name, str)
+                assert token["publicId"] is None or isinstance(name, str)
+                assert token["systemId"] is None or isinstance(name, str)
 
             elif type == "Entity":
-                assert isinstance(token["name"], text_type)
+                assert isinstance(token["name"], str)
 
             elif type == "SerializerError":
-                assert isinstance(token["data"], text_type)
+                assert isinstance(token["data"], str)
 
             else:
                 assert False, "Unknown token type: %(type)s" % {"type": type}

diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
@@ -9,10 +9,9 @@
 
 import re
 import warnings
+from urllib.parse import urlparse
 from xml.sax.saxutils import escape, unescape
 
-from six.moves import urllib_parse as urlparse
-
 from . import base
 from ..constants import namespaces, prefixes
 
@@ -845,7 +844,7 @@ def allowed_token(self, token):
                 # remove replacement characters from unescaped characters
                 val_unescaped = val_unescaped.replace("\ufffd", "")
                 try:
-                    uri = urlparse.urlparse(val_unescaped)
+                    uri = urlparse(val_unescaped)
                 except ValueError:
                     uri = None
                     del attrs[attr]

diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
@@ -1,5 +1,3 @@
-from six import viewkeys
-
 from . import _inputstream
 from . import _tokenizer
 
@@ -2773,7 +2771,7 @@ def processEndTag(self, token):
 
 
 def adjust_attributes(token, replacements):
-    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+    needs_adjustment = token['data'].keys() & replacements.keys()
     if needs_adjustment:
         token['data'] = type(token['data'])((replacements.get(k, k), v)
                                             for k, v in token['data'].items())

diff --git a/html5lib/serializer.py b/html5lib/serializer.py
@@ -1,5 +1,3 @@
-from six import text_type
-
 import re
 
 from codecs import register_error, xmlcharrefreplace_errors
@@ -221,14 +219,14 @@ def __init__(self, **kwargs):
         self.strict = False
 
     def encode(self, string):
-        assert isinstance(string, text_type)
+        assert isinstance(string, str)
         if self.encoding:
             return string.encode(self.encoding, "htmlentityreplace")
         else:
             return string
 
     def encodeStrict(self, string):
-        assert isinstance(string, text_type)
+        assert isinstance(string, str)
         if self.encoding:
             return string.encode(self.encoding, "strict")
         else:

diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
@@ -1,5 +1,3 @@
-
-import six
 from unittest.mock import Mock
 
 from . import support
@@ -26,11 +24,7 @@ def test_errorMessage():
     r = support.errorMessage(input, expected, actual)
 
     # Assertions!
-    if six.PY2:
-        assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
-    else:
-        assert six.PY3
-        assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
+    assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
 
     assert input.__repr__.call_count == 1
     assert expected.__repr__.call_count == 1

diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
@@ -1,6 +1,3 @@
-
-from six import PY2, text_type
-
 import io
 
 from . import support  # noqa
@@ -73,11 +70,6 @@ def test_debug_log():
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
 
-    if PY2:
-        for i, log in enumerate(expected):
-            log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
-            expected[i] = tuple(log)
-
     assert parser.log == expected
 
 

diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
@@ -7,8 +7,8 @@
 
 import pytest
 
-import six
-from six.moves import http_client, urllib
+import http.client
+import urllib.response
 
 from html5lib._inputstream import (BufferedStream, HTMLInputStream,
                                    HTMLUnicodeInputStream, HTMLBinaryInputStream)
@@ -190,7 +190,7 @@ def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
 
-    source = http_client.HTTPResponse(FakeSocket())
+    source = http.client.HTTPResponse(FakeSocket())
     source.begin()
     stream = HTMLInputStream(source)
     assert stream.charsUntil(" ") == "Text"
@@ -201,15 +201,12 @@ def test_python_issue_20007_b():
     Make sure we have a work-around for Python bug #20007
     http://bugs.python.org/issue20007
     """
-    if six.PY2:
-        return
-
     class FakeSocket:
         def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
 
-    source = http_client.HTTPResponse(FakeSocket())
+    source = http.client.HTTPResponse(FakeSocket())
     source.begin()
     wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
     stream = HTMLInputStream(wrapped)

diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py
@@ -1,8 +1,6 @@
 
 import io
 
-from six import unichr, text_type
-
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib.constants import tokenTypes
 
@@ -15,7 +13,7 @@ def ignore_parse_errors(toks):
 
 def test_maintain_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
     stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
 
     toks = HTMLTokenizer(stream)
@@ -48,7 +46,7 @@ def test_duplicate_attribute():
 
 def test_maintain_duplicate_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
     stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
 
     toks = HTMLTokenizer(stream)

diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
@@ -2,7 +2,6 @@
 import itertools
 import sys
 
-from six import unichr, text_type
 import pytest
 
 try:
@@ -150,7 +149,7 @@ def test_maintain_attribute_order(treeName):
         pytest.skip("Treebuilder not loaded")
 
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
     data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
 
     parser = html5parser.HTMLParser(tree=treeAPIs["builder"])

diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
@@ -5,7 +5,6 @@
 import re
 
 import pytest
-from six import unichr
 
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib import constants, _utils
@@ -145,15 +144,15 @@ def repl(m):
                 low = int(m.group(2), 16)
                 if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF:
                     cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
-                    return unichr(cp)
+                    return chr(cp)
                 else:
-                    return unichr(high) + unichr(low)
+                    return chr(high) + chr(low)
             else:
-                return unichr(int(m.group(1), 16))
+                return chr(int(m.group(1), 16))
         try:
             return _surrogateRe.sub(repl, inp)
         except ValueError:
-            # This occurs when unichr throws ValueError, which should
+            # This occurs when chr throws ValueError, which should
             # only be for a lone-surrogate.
             if _utils.supports_lone_surrogates:
                 raise

diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
@@ -1,5 +1,3 @@
-from six import text_type
-
 from ..constants import scopingElements, tableInsertModeElements, namespaces
 
 # The scope markers are inserted when entering object elements,
@@ -199,7 +197,7 @@ def elementInScope(self, target, variant=None):
         # match any node with that name
         exactNode = hasattr(target, "nameTuple")
         if not exactNode:
-            if isinstance(target, text_type):
+            if isinstance(target, str):
                 target = (namespaces["html"], target)
             assert isinstance(target, tuple)
 
@@ -322,7 +320,7 @@ def _setInsertFromTable(self, value):
 
     def insertElementNormal(self, token):
         name = token["name"]
-        assert isinstance(name, text_type), "Element %s not unicode" % name
+        assert isinstance(name, str), "Element %s not unicode" % name
         namespace = token.get("namespace", self.defaultNamespace)
         element = self.elementClass(name, namespace)
         element.attributes = token["data"]