Skip to content

remove six #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion debug-info.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"maxsize": sys.maxsize
}

search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"]
search_modules = ["chardet", "genshi", "html5lib", "lxml"]
found_modules = []

for m in search_modules:
Expand Down
8 changes: 4 additions & 4 deletions html5lib/_inputstream.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

from six import text_type
from six.moves import http_client, urllib
import http.client
import urllib.response

import codecs
import re
Expand Down Expand Up @@ -124,10 +124,10 @@ def _readFromBuffer(self, bytes):
def HTMLInputStream(source, **kwargs):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
if (isinstance(source, http_client.HTTPResponse) or
if (isinstance(source, http.client.HTTPResponse) or
# Also check for addinfourl wrapping HTTPResponse
(isinstance(source, urllib.response.addbase) and
isinstance(source.fp, http_client.HTTPResponse))):
isinstance(source.fp, http.client.HTTPResponse))):
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type)
Expand Down
2 changes: 0 additions & 2 deletions html5lib/_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@

from six import unichr as chr

from collections import deque, OrderedDict
from sys import version_info

Expand Down
4 changes: 1 addition & 3 deletions html5lib/_trie/py.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from six import text_type

from bisect import bisect_left

from ._base import Trie as ABCTrie


class Trie(ABCTrie):
def __init__(self, data):
if not all(isinstance(x, text_type) for x in data.keys()):
if not all(isinstance(x, str) for x in data.keys()):
raise TypeError("All keys must be strings")

self._data = data
Expand Down
14 changes: 3 additions & 11 deletions html5lib/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,7 @@

from collections.abc import Mapping

from six import text_type, PY3

if PY3:
import xml.etree.ElementTree as default_etree
else:
try:
import xml.etree.ElementTree as default_etree
except ImportError:
import xml.etree.ElementTree as default_etree
import xml.etree.ElementTree as default_etree


__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
Expand All @@ -27,10 +19,10 @@
# escapes.
try:
_x = eval('"\\uD800"') # pylint:disable=eval-used
if not isinstance(_x, text_type):
if not isinstance(_x, str):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"') # pylint:disable=eval-used
assert isinstance(_x, text_type)
assert isinstance(_x, str)
except Exception:
supports_lone_surrogates = False
else:
Expand Down
30 changes: 14 additions & 16 deletions html5lib/filters/lint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@

from six import text_type

from . import base
from ..constants import namespaces, voidElements

Expand Down Expand Up @@ -32,9 +30,9 @@ def __iter__(self):
if type in ("StartTag", "EmptyTag"):
namespace = token["namespace"]
name = token["name"]
assert namespace is None or isinstance(namespace, text_type)
assert namespace is None or isinstance(namespace, str)
assert namespace != ""
assert isinstance(name, text_type)
assert isinstance(name, str)
assert name != ""
assert isinstance(token["data"], dict)
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
Expand All @@ -44,18 +42,18 @@ def __iter__(self):
if type == "StartTag" and self.require_matching_tags:
open_elements.append((namespace, name))
for (namespace, name), value in token["data"].items():
assert namespace is None or isinstance(namespace, text_type)
assert namespace is None or isinstance(namespace, str)
assert namespace != ""
assert isinstance(name, text_type)
assert isinstance(name, str)
assert name != ""
assert isinstance(value, text_type)
assert isinstance(value, str)

elif type == "EndTag":
namespace = token["namespace"]
name = token["name"]
assert namespace is None or isinstance(namespace, text_type)
assert namespace is None or isinstance(namespace, str)
assert namespace != ""
assert isinstance(name, text_type)
assert isinstance(name, str)
assert name != ""
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
Expand All @@ -65,26 +63,26 @@ def __iter__(self):

elif type == "Comment":
data = token["data"]
assert isinstance(data, text_type)
assert isinstance(data, str)

elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
assert isinstance(data, text_type)
assert isinstance(data, str)
assert data != ""
if type == "SpaceCharacters":
assert data.strip(spaceCharacters) == ""

elif type == "Doctype":
name = token["name"]
assert name is None or isinstance(name, text_type)
assert token["publicId"] is None or isinstance(name, text_type)
assert token["systemId"] is None or isinstance(name, text_type)
assert name is None or isinstance(name, str)
assert token["publicId"] is None or isinstance(name, str)
assert token["systemId"] is None or isinstance(name, str)

elif type == "Entity":
assert isinstance(token["name"], text_type)
assert isinstance(token["name"], str)

elif type == "SerializerError":
assert isinstance(token["data"], text_type)
assert isinstance(token["data"], str)

else:
assert False, "Unknown token type: %(type)s" % {"type": type}
Expand Down
5 changes: 2 additions & 3 deletions html5lib/filters/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@

import re
import warnings
from urllib.parse import urlparse
from xml.sax.saxutils import escape, unescape

from six.moves import urllib_parse as urlparse

from . import base
from ..constants import namespaces, prefixes

Expand Down Expand Up @@ -845,7 +844,7 @@ def allowed_token(self, token):
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
try:
uri = urlparse.urlparse(val_unescaped)
uri = urlparse(val_unescaped)
except ValueError:
uri = None
del attrs[attr]
Expand Down
4 changes: 1 addition & 3 deletions html5lib/html5parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from six import viewkeys

from . import _inputstream
from . import _tokenizer

Expand Down Expand Up @@ -2773,7 +2771,7 @@ def processEndTag(self, token):


def adjust_attributes(token, replacements):
needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
needs_adjustment = token['data'].keys() & replacements.keys()
if needs_adjustment:
token['data'] = type(token['data'])((replacements.get(k, k), v)
for k, v in token['data'].items())
Expand Down
6 changes: 2 additions & 4 deletions html5lib/serializer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from six import text_type

import re

from codecs import register_error, xmlcharrefreplace_errors
Expand Down Expand Up @@ -221,14 +219,14 @@ def __init__(self, **kwargs):
self.strict = False

def encode(self, string):
assert isinstance(string, text_type)
assert isinstance(string, str)
if self.encoding:
return string.encode(self.encoding, "htmlentityreplace")
else:
return string

def encodeStrict(self, string):
assert isinstance(string, text_type)
assert isinstance(string, str)
if self.encoding:
return string.encode(self.encoding, "strict")
else:
Expand Down
8 changes: 1 addition & 7 deletions html5lib/tests/test_meta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@

import six
from unittest.mock import Mock

from . import support
Expand All @@ -26,11 +24,7 @@ def test_errorMessage():
r = support.errorMessage(input, expected, actual)

# Assertions!
if six.PY2:
assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
else:
assert six.PY3
assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r

assert input.__repr__.call_count == 1
assert expected.__repr__.call_count == 1
Expand Down
8 changes: 0 additions & 8 deletions html5lib/tests/test_parser2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@

from six import PY2, text_type

import io

from . import support # noqa
Expand Down Expand Up @@ -73,11 +70,6 @@ def test_debug_log():
('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]

if PY2:
for i, log in enumerate(expected):
log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
expected[i] = tuple(log)

assert parser.log == expected


Expand Down
11 changes: 4 additions & 7 deletions html5lib/tests/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

import pytest

import six
from six.moves import http_client, urllib
import http.client
import urllib.response

from html5lib._inputstream import (BufferedStream, HTMLInputStream,
HTMLUnicodeInputStream, HTMLBinaryInputStream)
Expand Down Expand Up @@ -190,7 +190,7 @@ def makefile(self, _mode, _bufsize=None):
# pylint:disable=unused-argument
return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")

source = http_client.HTTPResponse(FakeSocket())
source = http.client.HTTPResponse(FakeSocket())
source.begin()
stream = HTMLInputStream(source)
assert stream.charsUntil(" ") == "Text"
Expand All @@ -201,15 +201,12 @@ def test_python_issue_20007_b():
Make sure we have a work-around for Python bug #20007
http://bugs.python.org/issue20007
"""
if six.PY2:
return

class FakeSocket:
def makefile(self, _mode, _bufsize=None):
# pylint:disable=unused-argument
return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")

source = http_client.HTTPResponse(FakeSocket())
source = http.client.HTTPResponse(FakeSocket())
source.begin()
wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
stream = HTMLInputStream(wrapped)
Expand Down
6 changes: 2 additions & 4 deletions html5lib/tests/test_tokenizer2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@

import io

from six import unichr, text_type

from html5lib._tokenizer import HTMLTokenizer
from html5lib.constants import tokenTypes

Expand All @@ -15,7 +13,7 @@ def ignore_parse_errors(toks):

def test_maintain_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")

toks = HTMLTokenizer(stream)
Expand Down Expand Up @@ -48,7 +46,7 @@ def test_duplicate_attribute():

def test_maintain_duplicate_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")

toks = HTMLTokenizer(stream)
Expand Down
3 changes: 1 addition & 2 deletions html5lib/tests/test_treewalkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import itertools
import sys

from six import unichr, text_type
import pytest

try:
Expand Down Expand Up @@ -150,7 +149,7 @@ def test_maintain_attribute_order(treeName):
pytest.skip("Treebuilder not loaded")

# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"

parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
Expand Down
9 changes: 4 additions & 5 deletions html5lib/tests/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import re

import pytest
from six import unichr

from html5lib._tokenizer import HTMLTokenizer
from html5lib import constants, _utils
Expand Down Expand Up @@ -145,15 +144,15 @@ def repl(m):
low = int(m.group(2), 16)
if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF:
cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
return unichr(cp)
return chr(cp)
else:
return unichr(high) + unichr(low)
return chr(high) + chr(low)
else:
return unichr(int(m.group(1), 16))
return chr(int(m.group(1), 16))
try:
return _surrogateRe.sub(repl, inp)
except ValueError:
# This occurs when unichr throws ValueError, which should
# This occurs when chr throws ValueError, which should
# only be for a lone-surrogate.
if _utils.supports_lone_surrogates:
raise
Expand Down
6 changes: 2 additions & 4 deletions html5lib/treebuilders/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from six import text_type

from ..constants import scopingElements, tableInsertModeElements, namespaces

# The scope markers are inserted when entering object elements,
Expand Down Expand Up @@ -199,7 +197,7 @@ def elementInScope(self, target, variant=None):
# match any node with that name
exactNode = hasattr(target, "nameTuple")
if not exactNode:
if isinstance(target, text_type):
if isinstance(target, str):
target = (namespaces["html"], target)
assert isinstance(target, tuple)

Expand Down Expand Up @@ -322,7 +320,7 @@ def _setInsertFromTable(self, value):

def insertElementNormal(self, token):
name = token["name"]
assert isinstance(name, text_type), "Element %s not unicode" % name
assert isinstance(name, str), "Element %s not unicode" % name
namespace = token.get("namespace", self.defaultNamespace)
element = self.elementClass(name, namespace)
element.attributes = token["data"]
Expand Down
Loading