From f90ee8001693bfff05ffce01ccf1ecec09b4963a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 9 Sep 2024 10:18:50 +0200 Subject: [PATCH] remove six --- debug-info.py | 2 +- html5lib/_inputstream.py | 8 ++++---- html5lib/_tokenizer.py | 2 -- html5lib/_trie/py.py | 4 +--- html5lib/_utils.py | 14 +++----------- html5lib/filters/lint.py | 30 ++++++++++++++--------------- html5lib/filters/sanitizer.py | 5 ++--- html5lib/html5parser.py | 4 +--- html5lib/serializer.py | 6 ++---- html5lib/tests/test_meta.py | 8 +------- html5lib/tests/test_parser2.py | 8 -------- html5lib/tests/test_stream.py | 11 ++++------- html5lib/tests/test_tokenizer2.py | 6 ++---- html5lib/tests/test_treewalkers.py | 3 +-- html5lib/tests/tokenizer.py | 9 ++++----- html5lib/treebuilders/base.py | 6 ++---- html5lib/treebuilders/etree.py | 4 +--- html5lib/treebuilders/etree_lxml.py | 3 --- html5lib/treewalkers/etree.py | 4 +--- html5lib/treewalkers/etree_lxml.py | 4 +--- requirements-oldest.txt | 3 +-- requirements-test.txt | 1 + requirements.txt | 1 - setup.py | 1 - 24 files changed, 47 insertions(+), 100 deletions(-) diff --git a/debug-info.py b/debug-info.py index 7e1b6fd0..5523067c 100644 --- a/debug-info.py +++ b/debug-info.py @@ -11,7 +11,7 @@ "maxsize": sys.maxsize } -search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"] +search_modules = ["chardet", "genshi", "html5lib", "lxml"] found_modules = [] for m in search_modules: diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index 54c5c498..57a220a4 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -1,6 +1,6 @@ -from six import text_type -from six.moves import http_client, urllib +import http.client +import urllib.response import codecs import re @@ -124,10 +124,10 @@ def _readFromBuffer(self, bytes): def HTMLInputStream(source, **kwargs): # Work around Python bug #20007: read(0) closes the connection. # http://bugs.python.org/issue20007 - if (isinstance(source, http_client.HTTPResponse) or + if (isinstance(source, http.client.HTTPResponse) or # Also check for addinfourl wrapping HTTPResponse (isinstance(source, urllib.response.addbase) and - isinstance(source.fp, http_client.HTTPResponse))): + isinstance(source.fp, http.client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index 782310ec..75dab441 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -1,6 +1,4 @@ -from six import unichr as chr - from collections import deque, OrderedDict from sys import version_info diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py index 92f6f861..bc6363c4 100644 --- a/html5lib/_trie/py.py +++ b/html5lib/_trie/py.py @@ -1,5 +1,3 @@ -from six import text_type - from bisect import bisect_left from ._base import Trie as ABCTrie @@ -7,7 +5,7 @@ class Trie(ABCTrie): def __init__(self, data): - if not all(isinstance(x, text_type) for x in data.keys()): + if not all(isinstance(x, str) for x in data.keys()): raise TypeError("All keys must be strings") self._data = data diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 2e74c07f..5853e81d 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -3,15 +3,7 @@ from collections.abc import Mapping -from six import text_type, PY3 - -if PY3: - import xml.etree.ElementTree as default_etree -else: - try: - import xml.etree.ElementTree as default_etree - except ImportError: - import xml.etree.ElementTree as default_etree +import xml.etree.ElementTree as default_etree __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", @@ -27,10 +19,10 @@ # escapes. try: _x = eval('"\\uD800"') # pylint:disable=eval-used - if not isinstance(_x, text_type): + if not isinstance(_x, str): # We need this with u"" because of http://bugs.jython.org/issue2039 _x = eval('u"\\uD800"') # pylint:disable=eval-used - assert isinstance(_x, text_type) + assert isinstance(_x, str) except Exception: supports_lone_surrogates = False else: diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index cd7a6a43..0d47f921 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,6 +1,4 @@ -from six import text_type - from . import base from ..constants import namespaces, voidElements @@ -32,9 +30,9 @@ def __iter__(self): if type in ("StartTag", "EmptyTag"): namespace = token["namespace"] name = token["name"] - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" assert isinstance(token["data"], dict) if (not namespace or namespace == namespaces["html"]) and name in voidElements: @@ -44,18 +42,18 @@ def __iter__(self): if type == "StartTag" and self.require_matching_tags: open_elements.append((namespace, name)) for (namespace, name), value in token["data"].items(): - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" - assert isinstance(value, text_type) + assert isinstance(value, str) elif type == "EndTag": namespace = token["namespace"] name = token["name"] - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" if (not namespace or namespace == namespaces["html"]) and name in voidElements: assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} @@ -65,26 +63,26 @@ def __iter__(self): elif type == "Comment": data = token["data"] - assert isinstance(data, text_type) + assert isinstance(data, str) elif type in ("Characters", "SpaceCharacters"): data = token["data"] - assert isinstance(data, text_type) + assert isinstance(data, str) assert data != "" if type == "SpaceCharacters": assert data.strip(spaceCharacters) == "" elif type == "Doctype": name = token["name"] - assert name is None or isinstance(name, text_type) - assert token["publicId"] is None or isinstance(name, text_type) - assert token["systemId"] is None or isinstance(name, text_type) + assert name is None or isinstance(name, str) + assert token["publicId"] is None or isinstance(name, str) + assert token["systemId"] is None or isinstance(name, str) elif type == "Entity": - assert isinstance(token["name"], text_type) + assert isinstance(token["name"], str) elif type == "SerializerError": - assert isinstance(token["data"], text_type) + assert isinstance(token["data"], str) else: assert False, "Unknown token type: %(type)s" % {"type": type} diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index 2dc4583d..94c8602c 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -9,10 +9,9 @@ import re import warnings +from urllib.parse import urlparse from xml.sax.saxutils import escape, unescape -from six.moves import urllib_parse as urlparse - from . import base from ..constants import namespaces, prefixes @@ -845,7 +844,7 @@ def allowed_token(self, token): # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") try: - uri = urlparse.urlparse(val_unescaped) + uri = urlparse(val_unescaped) except ValueError: uri = None del attrs[attr] diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 3fe78b6b..91d71a88 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,5 +1,3 @@ -from six import viewkeys - from . import _inputstream from . import _tokenizer @@ -2773,7 +2771,7 @@ def processEndTag(self, token): def adjust_attributes(token, replacements): - needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) + needs_adjustment = token['data'].keys() & replacements.keys() if needs_adjustment: token['data'] = type(token['data'])((replacements.get(k, k), v) for k, v in token['data'].items()) diff --git a/html5lib/serializer.py b/html5lib/serializer.py index 34f1b7e3..ed52593f 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -1,5 +1,3 @@ -from six import text_type - import re from codecs import register_error, xmlcharrefreplace_errors @@ -221,14 +219,14 @@ def __init__(self, **kwargs): self.strict = False def encode(self, string): - assert isinstance(string, text_type) + assert isinstance(string, str) if self.encoding: return string.encode(self.encoding, "htmlentityreplace") else: return string def encodeStrict(self, string): - assert isinstance(string, text_type) + assert isinstance(string, str) if self.encoding: return string.encode(self.encoding, "strict") else: diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py index aa7e35e2..2fc6140d 100644 --- a/html5lib/tests/test_meta.py +++ b/html5lib/tests/test_meta.py @@ -1,5 +1,3 @@ - -import six from unittest.mock import Mock from . import support @@ -26,11 +24,7 @@ def test_errorMessage(): r = support.errorMessage(input, expected, actual) # Assertions! - if six.PY2: - assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r - else: - assert six.PY3 - assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r + assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r assert input.__repr__.call_count == 1 assert expected.__repr__.call_count == 1 diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index f30595b4..da76cd41 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,6 +1,3 @@ - -from six import PY2, text_type - import io from . import support # noqa @@ -73,11 +70,6 @@ def test_debug_log(): ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}), ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})] - if PY2: - for i, log in enumerate(expected): - log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log] - expected[i] = tuple(log) - assert parser.log == expected diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index 7dce2b1d..0512419c 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -7,8 +7,8 @@ import pytest -import six -from six.moves import http_client, urllib +import http.client +import urllib.response from html5lib._inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) @@ -190,7 +190,7 @@ def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - source = http_client.HTTPResponse(FakeSocket()) + source = http.client.HTTPResponse(FakeSocket()) source.begin() stream = HTMLInputStream(source) assert stream.charsUntil(" ") == "Text" @@ -201,15 +201,12 @@ def test_python_issue_20007_b(): Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ - if six.PY2: - return - class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - source = http_client.HTTPResponse(FakeSocket()) + source = http.client.HTTPResponse(FakeSocket()) source.begin() wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") stream = HTMLInputStream(wrapped) diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py index f8a74eee..4e993571 100644 --- a/html5lib/tests/test_tokenizer2.py +++ b/html5lib/tests/test_tokenizer2.py @@ -1,8 +1,6 @@ import io -from six import unichr, text_type - from html5lib._tokenizer import HTMLTokenizer from html5lib.constants import tokenTypes @@ -15,7 +13,7 @@ def ignore_parse_errors(toks): def test_maintain_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] stream = io.StringIO("") toks = HTMLTokenizer(stream) @@ -48,7 +46,7 @@ def test_duplicate_attribute(): def test_maintain_duplicate_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] stream = io.StringIO("") toks = HTMLTokenizer(stream) diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 89e20dab..22ee0cb7 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -2,7 +2,6 @@ import itertools import sys -from six import unichr, text_type import pytest try: @@ -150,7 +149,7 @@ def test_maintain_attribute_order(treeName): pytest.skip("Treebuilder not loaded") # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] data = "" parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index 9ba19b16..d2605a12 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -5,7 +5,6 @@ import re import pytest -from six import unichr from html5lib._tokenizer import HTMLTokenizer from html5lib import constants, _utils @@ -145,15 +144,15 @@ def repl(m): low = int(m.group(2), 16) if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF: cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000 - return unichr(cp) + return chr(cp) else: - return unichr(high) + unichr(low) + return chr(high) + chr(low) else: - return unichr(int(m.group(1), 16)) + return chr(int(m.group(1), 16)) try: return _surrogateRe.sub(repl, inp) except ValueError: - # This occurs when unichr throws ValueError, which should + # This occurs when chr throws ValueError, which should # only be for a lone-surrogate. if _utils.supports_lone_surrogates: raise diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index 125ed82c..3fec12c4 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -1,5 +1,3 @@ -from six import text_type - from ..constants import scopingElements, tableInsertModeElements, namespaces # The scope markers are inserted when entering object elements, @@ -199,7 +197,7 @@ def elementInScope(self, target, variant=None): # match any node with that name exactNode = hasattr(target, "nameTuple") if not exactNode: - if isinstance(target, text_type): + if isinstance(target, str): target = (namespaces["html"], target) assert isinstance(target, tuple) @@ -322,7 +320,7 @@ def _setInsertFromTable(self, value): def insertElementNormal(self, token): name = token["name"] - assert isinstance(name, text_type), "Element %s not unicode" % name + assert isinstance(name, str), "Element %s not unicode" % name namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index bd20b957..f9564fe0 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -1,7 +1,5 @@ # pylint:disable=protected-access -from six import text_type - import re from copy import copy @@ -221,7 +219,7 @@ def serializeElement(element, indent=0): elif element.tag == ElementTreeCommentType: rv.append("|%s" % (' ' * indent, element.text)) else: - assert isinstance(element.tag, text_type), \ + assert isinstance(element.tag, str), \ "Expected unicode, got %s, %s" % (type(element.tag), element.tag) nsmatch = tag_regexp.match(element.tag) diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 3e88d76e..b0be4617 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -24,7 +24,6 @@ from .. import _ihatexml import lxml.etree as etree -from six import PY3, binary_type fullTree = True @@ -204,8 +203,6 @@ def _coerceKey(self, key): def __getitem__(self, key): value = self._element._element.attrib[self._coerceKey(key)] - if not PY3 and isinstance(value, binary_type): - value = value.decode("ascii") return value def __setitem__(self, key, value): diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index ef5e914c..41607f52 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -2,8 +2,6 @@ from collections import OrderedDict import re -from six import string_types - from . import base from .._utils import moduleFactoryFactory @@ -50,7 +48,7 @@ def getNodeDetails(self, node): return base.COMMENT, node.text else: - assert isinstance(node.tag, string_types), type(node.tag) + assert isinstance(node.tag, str), type(node.tag) # This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index af6c260d..0ec633ac 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -1,5 +1,3 @@ -from six import text_type - from collections import OrderedDict from lxml import etree @@ -13,7 +11,7 @@ def ensure_str(s): if s is None: return None - elif isinstance(s, text_type): + elif isinstance(s, str): return s else: return s.decode("ascii", "strict") diff --git a/requirements-oldest.txt b/requirements-oldest.txt index 68d0f13d..07b659a5 100644 --- a/requirements-oldest.txt +++ b/requirements-oldest.txt @@ -1,7 +1,6 @@ # This allows us to install the actually oldest supported dependencies and test whether that works. # requirements.txt -six==1.9 webencodings==0.5.1 # requirements-optional.txt @@ -26,4 +25,4 @@ pytest==5.4.2 ; python_version >= '3' coverage==5.1 pytest-expect==1.1.0 mock==3.0.5 ; python_version < '3.6' -mock==4.0.2 ; python_version >= '3.6' \ No newline at end of file +mock==4.0.2 ; python_version >= '3.6' diff --git a/requirements-test.txt b/requirements-test.txt index aca31f5e..1415d163 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -6,5 +6,6 @@ pytest>=4.6.10,<5 ; python_version < '3' pytest>=5.4.2,<8 ; python_version >= '3' coverage>=5.1,<6 pytest-expect>=1.1.0,<2 +six>=1.9 # required by pytest-expect mock>=3.0.5,<4 ; python_version < '3.3' setuptools; python_version >= '3.12' diff --git a/requirements.txt b/requirements.txt index ae7ec3d0..be8fcb77 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -six>=1.9 webencodings diff --git a/setup.py b/setup.py index afab2904..9fbcc24f 100644 --- a/setup.py +++ b/setup.py @@ -102,7 +102,6 @@ def default_environment(): maintainer_email='james@hoppipolla.co.uk', packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=[ - 'six>=1.9', 'webencodings>=0.5.1', ], python_requires=">=3.8",