Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove six #581

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions .appveyor.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .github/workflows/python-tox.yml
Original file line number Diff line number Diff line change
@@ -12,9 +12,6 @@ jobs:
os: [ubuntu-latest, windows-latest]
deps: [base, optional]
include:
- python: "pypy-2.7"
os: ubuntu-latest
deps: base
- python: "pypy-3.10"
os: ubuntu-latest
deps: base
16 changes: 2 additions & 14 deletions README.rst
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ or:

By default, the ``document`` will be an ``xml.etree`` element instance.
Whenever possible, html5lib chooses the accelerated ``ElementTree``
implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
implementation.

Two other tree types are supported: ``xml.dom.minidom`` and
``lxml.etree``. To use an alternative format, specify the name of
@@ -41,18 +41,6 @@ a treebuilder:
with open("mydocument.html", "rb") as f:
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")

When using with ``urllib2`` (Python 2), the charset from HTTP should be
pass into html5lib as follows:

.. code-block:: python

from contextlib import closing
from urllib2 import urlopen
import html5lib

with closing(urlopen("http://example.com/")) as f:
document = html5lib.parse(f, transport_encoding=f.info().getparam("charset"))

When using with ``urllib.request`` (Python 3), the charset from HTTP
should be pass into html5lib as follows:

@@ -90,7 +78,7 @@ More documentation is available at https://html5lib.readthedocs.io/.
Installation
------------

html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install:
html5lib works on CPython 3.8+ and PyPy. To install:

.. code-block:: bash

3 changes: 1 addition & 2 deletions debug-info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import print_function, unicode_literals

import platform
import sys
@@ -12,7 +11,7 @@
"maxsize": sys.maxsize
}

search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"]
search_modules = ["chardet", "genshi", "html5lib", "lxml"]
found_modules = []

for m in search_modules:
3 changes: 1 addition & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# html5lib documentation build configuration file, created by
# sphinx-quickstart on Wed May 8 00:04:49 2013.
@@ -100,7 +99,7 @@
}


class CExtMock(object):
class CExtMock:
"""Required for autodoc on readthedocs.org where you cannot build C extensions."""
def __init__(self, *args, **kwargs):
pass
1 change: 0 additions & 1 deletion html5lib/__init__.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,6 @@
* :func:`~.serializer.serialize`
"""

from __future__ import absolute_import, division, unicode_literals

from .html5parser import HTMLParser, parse, parseFragment
from .treebuilders import getTreeBuilder
3 changes: 1 addition & 2 deletions html5lib/_ihatexml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import re
import warnings
@@ -181,7 +180,7 @@ def escapeRegexp(string):
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")


class InfosetFilter(object):
class InfosetFilter:
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")

def __init__(self,
17 changes: 8 additions & 9 deletions html5lib/_inputstream.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type
from six.moves import http_client, urllib
import http.client
import urllib.response

import codecs
import re
@@ -48,7 +47,7 @@
charsUntilRegEx = {}


class BufferedStream(object):
class BufferedStream:
"""Buffering for streams that do not have buffering of their own

The buffer is implemented as a list of chunks on the assumption that
@@ -125,10 +124,10 @@ def _readFromBuffer(self, bytes):
def HTMLInputStream(source, **kwargs):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
if (isinstance(source, http_client.HTTPResponse) or
if (isinstance(source, http.client.HTTPResponse) or
# Also check for addinfourl wrapping HTTPResponse
(isinstance(source, urllib.response.addbase) and
isinstance(source.fp, http_client.HTTPResponse))):
isinstance(source.fp, http.client.HTTPResponse))):
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type)
@@ -145,7 +144,7 @@ def HTMLInputStream(source, **kwargs):
return HTMLBinaryInputStream(source, **kwargs)


class HTMLUnicodeInputStream(object):
class HTMLUnicodeInputStream:
"""Provides a unicode stream of characters to the HTMLTokenizer.

This class takes care of character encoding and removing or replacing
@@ -673,7 +672,7 @@ def jumpTo(self, bytes):
return True


class EncodingParser(object):
class EncodingParser:
"""Mini parser for detecting character encoding from meta elements"""

def __init__(self, data):
@@ -861,7 +860,7 @@ def getAttribute(self):
attrValue.append(c)


class ContentAttrParser(object):
class ContentAttrParser:
def __init__(self, data):
assert isinstance(data, bytes)
self.data = data
5 changes: 1 addition & 4 deletions html5lib/_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from six import unichr as chr

from collections import deque, OrderedDict
from sys import version_info
@@ -24,7 +21,7 @@
attributeMap = OrderedDict


class HTMLTokenizer(object):
class HTMLTokenizer:
""" This class takes care of tokenizing HTML.

* self.currentToken
1 change: 0 additions & 1 deletion html5lib/_trie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from .py import Trie

6 changes: 1 addition & 5 deletions html5lib/_trie/_base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
from __future__ import absolute_import, division, unicode_literals

try:
from collections.abc import Mapping
except ImportError: # Python 2.7
from collections import Mapping
from collections.abc import Mapping


class Trie(Mapping):
5 changes: 1 addition & 4 deletions html5lib/_trie/py.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type

from bisect import bisect_left

from ._base import Trie as ABCTrie


class Trie(ABCTrie):
def __init__(self, data):
if not all(isinstance(x, text_type) for x in data.keys()):
if not all(isinstance(x, str) for x in data.keys()):
raise TypeError("All keys must be strings")

self._data = data
22 changes: 5 additions & 17 deletions html5lib/_utils.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,9 @@
from __future__ import absolute_import, division, unicode_literals

from types import ModuleType

try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping

from six import text_type, PY3
from collections.abc import Mapping

if PY3:
import xml.etree.ElementTree as default_etree
else:
try:
import xml.etree.cElementTree as default_etree
except ImportError:
import xml.etree.ElementTree as default_etree
import xml.etree.ElementTree as default_etree


__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@@ -31,10 +19,10 @@
# escapes.
try:
_x = eval('"\\uD800"') # pylint:disable=eval-used
if not isinstance(_x, text_type):
if not isinstance(_x, str):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"') # pylint:disable=eval-used
assert isinstance(_x, text_type)
assert isinstance(_x, str)
except Exception:
supports_lone_surrogates = False
else:
@@ -122,7 +110,7 @@ def moduleFactoryFactory(factory):
moduleCache = {}

def moduleFactory(baseModule, *args, **kwargs):
if isinstance(ModuleType.__name__, type("")):
if isinstance(ModuleType.__name__, str):
name = "_%s_factory" % baseModule.__name__
else:
name = b"_%s_factory" % baseModule.__name__
1 change: 0 additions & 1 deletion html5lib/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

import string

1 change: 0 additions & 1 deletion html5lib/filters/alphabeticalattributes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from . import base

3 changes: 1 addition & 2 deletions html5lib/filters/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import absolute_import, division, unicode_literals


class Filter(object):
class Filter:
def __init__(self, source):
self.source = source

1 change: 0 additions & 1 deletion html5lib/filters/inject_meta_charset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from . import base

31 changes: 14 additions & 17 deletions html5lib/filters/lint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from six import text_type

from . import base
from ..constants import namespaces, voidElements
@@ -33,9 +30,9 @@ def __iter__(self):
if type in ("StartTag", "EmptyTag"):
namespace = token["namespace"]
name = token["name"]
assert namespace is None or isinstance(namespace, text_type)
assert namespace is None or isinstance(namespace, str)
assert namespace != ""
assert isinstance(name, text_type)
assert isinstance(name, str)
assert name != ""
assert isinstance(token["data"], dict)
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
@@ -45,18 +42,18 @@ def __iter__(self):
if type == "StartTag" and self.require_matching_tags:
open_elements.append((namespace, name))
for (namespace, name), value in token["data"].items():
assert namespace is None or isinstance(namespace, text_type)
assert namespace is None or isinstance(namespace, str)
assert namespace != ""
assert isinstance(name, text_type)
assert isinstance(name, str)
assert name != ""
assert isinstance(value, text_type)
assert isinstance(value, str)

elif type == "EndTag":
namespace = token["namespace"]
name = token["name"]
assert namespace is None or isinstance(namespace, text_type)
assert namespace is None or isinstance(namespace, str)
assert namespace != ""
assert isinstance(name, text_type)
assert isinstance(name, str)
assert name != ""
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
@@ -66,26 +63,26 @@ def __iter__(self):

elif type == "Comment":
data = token["data"]
assert isinstance(data, text_type)
assert isinstance(data, str)

elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
assert isinstance(data, text_type)
assert isinstance(data, str)
assert data != ""
if type == "SpaceCharacters":
assert data.strip(spaceCharacters) == ""

elif type == "Doctype":
name = token["name"]
assert name is None or isinstance(name, text_type)
assert token["publicId"] is None or isinstance(name, text_type)
assert token["systemId"] is None or isinstance(name, text_type)
assert name is None or isinstance(name, str)
assert token["publicId"] is None or isinstance(name, str)
assert token["systemId"] is None or isinstance(name, str)

elif type == "Entity":
assert isinstance(token["name"], text_type)
assert isinstance(token["name"], str)

elif type == "SerializerError":
assert isinstance(token["data"], text_type)
assert isinstance(token["data"], str)

else:
assert False, "Unknown token type: %(type)s" % {"type": type}
1 change: 0 additions & 1 deletion html5lib/filters/optionaltags.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from __future__ import absolute_import, division, unicode_literals

from . import base

Loading