diff --git a/CHANGES.rst b/CHANGES.rst index fd50e9b6..a92802a5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -32,6 +32,11 @@ dev (master) * Fix edge case where Retry-After header was still respected even when explicitly opted out of. (Pull #1607) +* Remove dependency on ``rfc3986`` for URL parsing. + +* Fix issue where URLs containing invalid characters within ``Url.auth`` would + raise an exception instead of percent-encoding those characters. + 1.25.3 (2019-05-23) ------------------- diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 6f0362c3..0855f00a 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -278,5 +278,9 @@ In chronological order: * James Meickle * Improve handling of Retry-After header +* Chris Jerdonek + * Remove a spurious TypeError from the exception chain inside + HTTPConnectionPool._make_request(), also for BaseExceptions. + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/Makefile b/Makefile deleted file mode 100644 index e62952bd..00000000 --- a/Makefile +++ /dev/null @@ -1,70 +0,0 @@ -REQUIREMENTS_FILE=dev-requirements.txt -REQUIREMENTS_OUT=dev-requirements.txt.log -SETUP_OUT=*.egg-info - -.PHONY: all -all: setup requirements - -.PHONY: virtualenv -virtualenv: -ifndef VIRTUAL_ENV - $(error Must be run inside of a virtualenv) -endif - -.PHONY: setup -setup: virtualenv $(SETUP_OUT) - -.PHONY: $(SETUP_OUT) -$(SETUP_OUT): setup.py setup.cfg - python setup.py develop - touch $(SETUP_OUT) - -.PHONY: requirements -requirements: setup $(REQUIREMENTS_OUT) - -.PHONY: piprot -piprot: setup - pip install piprot - piprot -x $(REQUIREMENTS_FILE) - -.PHONY: $(REQUIREMENTS_OUT) -$(REQUIREMENTS_OUT): $(REQUIREMENTS_FILE) - pip install -r $(REQUIREMENTS_FILE) | tee -a $(REQUIREMENTS_OUT) - python setup.py develop - -.PHONY: clean -clean: - find . -name "*.py[oc]" -delete - find . -name "__pycache__" -delete - rm -f $(REQUIREMENTS_OUT) - rm -rf docs/_build build/ dist/ - -.PHONY: test -test: requirements - tox - -.PHONY: test-quick -test-quick: requirements - tox -e py36 - tox -e py27 - -.PHONY: test-all -test-all: requirements - tox - -.PHONY: test-gae -test-gae: requirements -ifndef GAE_PYTHONPATH - $(error GAE_PYTHONPATH must be set) -endif - tox -e gae - -.PHONY: docs -docs: - tox -e docs - -.PHONY: release -release: - ./release.sh - - diff --git a/README.rst b/README.rst index ea8b930d..50361ca7 100644 --- a/README.rst +++ b/README.rst @@ -96,8 +96,8 @@ Tidelift will coordinate the fix and disclosure with maintainers. Maintainers ----------- -- `@theacodes `_ (Thea Flowers) - `@sethmlarson `_ (Seth M. Larson) +- `@theacodes `_ (Thea Flowers) - `@haikuginger `_ (Jess Shapiro) - `@lukasa `_ (Cory Benfield) - `@sigmavirus24 `_ (Ian Cordasco) @@ -130,7 +130,7 @@ development `_'s work on an ongoing basis -- Abbott (2018-present), sponsors `@sethmlarson `_'s work on an ongoing basis +- Abbott (2018-2019), sponsored `@sethmlarson `_'s work on urllib3. +- Google Cloud Platform (2018-2019), sponsored `@theacodes `_'s work on urllib3. - Akamai (2017-2018), sponsored `@haikuginger `_'s work on urllib3 -- Hewlett Packard Enterprise (2016-2017), sponsored `@Lukasa’s `_ work on urllib3 +- Hewlett Packard Enterprise (2016-2017), sponsored `@Lukasa’s `_ work on urllib3. diff --git a/_travis/upload_coverage.sh b/_travis/upload_coverage.sh index e86f5904..9e803d25 100755 --- a/_travis/upload_coverage.sh +++ b/_travis/upload_coverage.sh @@ -4,5 +4,5 @@ set -exo pipefail if [[ -e .coverage ]]; then python -m pip install codecov - codecov --env TRAVIS_OS_NAME,NOX_SESSION + python -m codecov --env TRAVIS_OS_NAME,NOX_SESSION fi diff --git a/dev-requirements.txt b/dev-requirements.txt index fb5b4c30..754ae3b1 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -6,12 +6,11 @@ tornado==5.1.1 PySocks==1.6.8 pkginfo==1.4.2 pytest-random-order==1.0.4;python_version>="3.5" -pytest-timeout==1.3.1 -pytest==4.0.1 -pytest-cov==2.6.0 +pytest-timeout==1.3.3 +pytest==4.6.4 +pytest-cov==2.7.1 h11==0.8.0 cryptography==2.6.1 -pluggy==0.11.0 # https://github.com/ionelmc/python-lazy-object-proxy/issues/30 lazy-object-proxy==1.4.0 diff --git a/docs/contributing.rst b/docs/contributing.rst index c28bc479..c66b28ad 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -13,6 +13,8 @@ If you wish to add a new feature or fix a bug: to start making your changes. #. Write a test which shows that the bug was fixed or that the feature works as expected. +#. Format your changes with black using command `$ nox -s blacken` and lint your + changes using command `nox -s lint`. #. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to ``CONTRIBUTORS.txt``. @@ -20,47 +22,53 @@ If you wish to add a new feature or fix a bug: Setting up your development environment --------------------------------------- -It is recommended, and even enforced by the make file, that you use a -`virtualenv -`_:: +In order to setup the development environment all that you need is +`nox `_ installed in your machine:: - $ python3 -m venv venv3 - $ source venv3/bin/activate - $ pip install -r dev-requirements.txt + $ pip install --user --upgrade nox Running the tests ----------------- We use some external dependencies, multiple interpreters and code coverage -analysis while running test suite. Our ``Makefile`` handles much of this for -you as long as you're running it `inside of a virtualenv -`_:: - - $ make test-quick - [... magically installs dependencies and runs tests on your virtualenv] - Ran 182 tests in 1.633s - - OK (SKIP=6) - -There is also a make target for running all of our tests and multiple python +analysis while running test suite. Our ``noxfile.py`` handles much of this for +you:: + + $ nox --sessions test-2.7 test-3.7 + [ Nox will create virtualenv, install the specified dependencies, and run the commands in order.] + nox > Running session test-2.7 + ....... + ....... + nox > Session test-2.7 was successful. + ....... + ....... + nox > Running session test-3.7 + ....... + ....... + nox > Session test-3.7 was successful. + +There is also a nox command for running all of our tests and multiple python versions. - $ make test-all + $ nox --sessions test Note that code coverage less than 100% is regarded as a failing run. Some platform-specific tests are skipped unless run in that platform. To make sure the code works in all of urllib3's supported platforms, you can run our ``tox`` suite:: - $ make test-all - [... tox creates a virtualenv for every platform and runs tests inside of each] - py27: commands succeeded - py34: commands succeeded - py35: commands succeeded - py36: commands succeeded - py37: commands succeeded - pypy: commands succeeded + $ nox --sessions test + [ Nox will create virtualenv, install the specified dependencies, and run the commands in order.] + ....... + ....... + nox > Session test-2.7 was successful. + nox > Session test-3.4 was successful. + nox > Session test-3.5 was successful. + nox > Session test-3.6 was successful. + nox > Session test-3.7 was successful. + nox > Session test-3.8 was successful. + nox > Session test-pypy was successful. Our test suite `runs continuously on Travis CI `_ with every pull request. diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index de4ccc44..afa8ac4c 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -1,5 +1,4 @@ import threading -import unittest import pytest from tornado import ioloop, web @@ -20,7 +19,7 @@ def consume_socket(sock, chunks=65536): pass -class SocketDummyServerTestCase(unittest.TestCase): +class SocketDummyServerTestCase(object): """ A simple socket-based server is created for this class that is good for exactly one request. @@ -67,7 +66,7 @@ def start_basic_handler(cls, **kw): ) @classmethod - def tearDownClass(cls): + def teardown_class(cls): if hasattr(cls, "server_thread"): cls.server_thread.join(0.1) @@ -102,10 +101,10 @@ def _start_server(cls, socket_handler): cls.port = cls.server_thread.port -class HTTPDummyServerTestCase(unittest.TestCase): +class HTTPDummyServerTestCase(object): """ A simple HTTP server that runs when your test class runs - Have your unittest class inherit from this one, and then a simple server + Have your test class inherit from this one, and then a simple server will start when your tests run, and automatically shut down when they complete. For examples of what test requests you can send to the server, see the TestingApp in dummyserver/handlers.py. @@ -132,11 +131,11 @@ def _stop_server(cls): cls.server_thread.join() @classmethod - def setUpClass(cls): + def setup_class(cls): cls._start_server() @classmethod - def tearDownClass(cls): + def teardown_class(cls): cls._stop_server() @@ -151,7 +150,7 @@ class IPV6HTTPSDummyServerTestCase(HTTPSDummyServerTestCase): host = "::1" -class HTTPDummyProxyTestCase(unittest.TestCase): +class HTTPDummyProxyTestCase(object): http_host = "localhost" http_host_alt = "127.0.0.1" @@ -164,7 +163,7 @@ class HTTPDummyProxyTestCase(unittest.TestCase): proxy_host_alt = "127.0.0.1" @classmethod - def setUpClass(cls): + def setup_class(cls): cls.io_loop = ioloop.IOLoop.current() app = web.Application([(r".*", TestingApp)]) @@ -185,7 +184,7 @@ def setUpClass(cls): cls.server_thread = run_loop_in_thread(cls.io_loop) @classmethod - def tearDownClass(cls): + def teardown_class(cls): cls.io_loop.add_callback(cls.http_server.stop) cls.io_loop.add_callback(cls.https_server.stop) cls.io_loop.add_callback(cls.proxy_server.stop) diff --git a/noxfile.py b/noxfile.py index e1e2d1cf..fbe99ea3 100644 --- a/noxfile.py +++ b/noxfile.py @@ -69,6 +69,8 @@ def blacken(session): session.install("black") session.run("black", "src", "dummyserver", "test", "noxfile.py", "setup.py") + lint(session) + @nox.session def lint(session): diff --git a/setup.cfg b/setup.cfg index 0c1a1c24..48bc352d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,4 +23,5 @@ requires-dist = brotlipy>=0.6.0; extra == 'brotli' [tool:pytest] -xfail_strict=true +xfail_strict = true +python_classes = Test *TestCase diff --git a/setup.py b/setup.py index 187e5a12..c0e97532 100755 --- a/setup.py +++ b/setup.py @@ -56,7 +56,6 @@ "urllib3.packages", "urllib3.packages.ssl_match_hostname", "urllib3.packages.backports", - "urllib3.packages.rfc3986", "urllib3.contrib", "urllib3.contrib._securetransport", "urllib3.util", diff --git a/src/urllib3/_async/connectionpool.py b/src/urllib3/_async/connectionpool.py index d1e7fcaa..03a821fe 100644 --- a/src/urllib3/_async/connectionpool.py +++ b/src/urllib3/_async/connectionpool.py @@ -27,7 +27,6 @@ from urllib3.packages.ssl_match_hostname import CertificateError from urllib3.packages import six from urllib3.packages.six.moves import queue -from urllib3.packages.rfc3986.normalizers import normalize_host from .request import RequestMethods from .response import HTTPResponse from .connection import HTTP1Connection @@ -43,7 +42,7 @@ BaseSSLError, ) from ..util.timeout import Timeout -from ..util.url import get_host, Url, NORMALIZABLE_SCHEMES +from ..util.url import get_host, Url, _normalize_host as normalize_host from ..util.queue import LifoQueue try: @@ -908,6 +907,8 @@ def _normalize_host(host, scheme): Normalize hosts for comparisons and use with sockets. """ + host = normalize_host(host, scheme) + # httplib doesn't like it when we include brackets in IPv6 addresses # Specifically, if we include brackets but also pass the port then # httplib crazily doubles up the square brackets on the Host header. @@ -915,7 +916,5 @@ def _normalize_host(host, scheme): # However, for backward compatibility reasons we can't actually # *assert* that. See http://bugs.python.org/issue28539 if host.startswith("[") and host.endswith("]"): - host = host.strip("[]") - if scheme in NORMALIZABLE_SCHEMES: - host = normalize_host(host) + host = host[1:-1] return host diff --git a/src/urllib3/packages/rfc3986/__init__.py b/src/urllib3/packages/rfc3986/__init__.py deleted file mode 100644 index d953d2b6..00000000 --- a/src/urllib3/packages/rfc3986/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -An implementation of semantics and validations described in RFC 3986. - -See http://rfc3986.readthedocs.io/ for detailed documentation. - -:copyright: (c) 2014 Rackspace -:license: Apache v2.0, see LICENSE for details -""" - -from .api import iri_reference -from .api import IRIReference -from .api import is_valid_uri -from .api import normalize_uri -from .api import uri_reference -from .api import URIReference -from .api import urlparse -from .parseresult import ParseResult - -__title__ = "rfc3986" -__author__ = "Ian Stapleton Cordasco" -__author_email__ = "graffatcolmingov@gmail.com" -__license__ = "Apache v2.0" -__copyright__ = "Copyright 2014 Rackspace" -__version__ = "1.3.2" - -__all__ = ( - "ParseResult", - "URIReference", - "IRIReference", - "is_valid_uri", - "normalize_uri", - "uri_reference", - "iri_reference", - "urlparse", - "__title__", - "__author__", - "__author_email__", - "__license__", - "__copyright__", - "__version__", -) diff --git a/src/urllib3/packages/rfc3986/_mixin.py b/src/urllib3/packages/rfc3986/_mixin.py deleted file mode 100644 index 4ddcb2df..00000000 --- a/src/urllib3/packages/rfc3986/_mixin.py +++ /dev/null @@ -1,371 +0,0 @@ -"""Module containing the implementation of the URIMixin class.""" -import warnings - -from . import exceptions as exc -from . import misc -from . import normalizers -from . import validators - - -class URIMixin(object): - """Mixin with all shared methods for URIs and IRIs.""" - - __hash__ = tuple.__hash__ - - def authority_info(self): - """Return a dictionary with the ``userinfo``, ``host``, and ``port``. - - If the authority is not valid, it will raise a - :class:`~rfc3986.exceptions.InvalidAuthority` Exception. - - :returns: - ``{'userinfo': 'username:password', 'host': 'www.example.com', - 'port': '80'}`` - :rtype: dict - :raises rfc3986.exceptions.InvalidAuthority: - If the authority is not ``None`` and can not be parsed. - """ - if not self.authority: - return {"userinfo": None, "host": None, "port": None} - - match = self._match_subauthority() - - if match is None: - # In this case, we have an authority that was parsed from the URI - # Reference, but it cannot be further parsed by our - # misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid - # authority. - raise exc.InvalidAuthority(self.authority.encode(self.encoding)) - - # We had a match, now let's ensure that it is actually a valid host - # address if it is IPv4 - matches = match.groupdict() - host = matches.get("host") - - if ( - host - and misc.IPv4_MATCHER.match(host) - and not validators.valid_ipv4_host_address(host) - ): - # If we have a host, it appears to be IPv4 and it does not have - # valid bytes, it is an InvalidAuthority. - raise exc.InvalidAuthority(self.authority.encode(self.encoding)) - - return matches - - def _match_subauthority(self): - return misc.SUBAUTHORITY_MATCHER.match(self.authority) - - @property - def host(self): - """If present, a string representing the host.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority["host"] - - @property - def port(self): - """If present, the port extracted from the authority.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority["port"] - - @property - def userinfo(self): - """If present, the userinfo extracted from the authority.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority["userinfo"] - - def is_absolute(self): - """Determine if this URI Reference is an absolute URI. - - See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation. - - :returns: ``True`` if it is an absolute URI, ``False`` otherwise. - :rtype: bool - """ - return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit())) - - def is_valid(self, **kwargs): - """Determine if the URI is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param bool require_scheme: Set to ``True`` if you wish to require the - presence of the scheme component. - :param bool require_authority: Set to ``True`` if you wish to require - the presence of the authority component. - :param bool require_path: Set to ``True`` if you wish to require the - presence of the path component. - :param bool require_query: Set to ``True`` if you wish to require the - presence of the query component. - :param bool require_fragment: Set to ``True`` if you wish to require - the presence of the fragment component. - :returns: ``True`` if the URI is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - validators = [ - (self.scheme_is_valid, kwargs.get("require_scheme", False)), - (self.authority_is_valid, kwargs.get("require_authority", False)), - (self.path_is_valid, kwargs.get("require_path", False)), - (self.query_is_valid, kwargs.get("require_query", False)), - (self.fragment_is_valid, kwargs.get("require_fragment", False)), - ] - return all(v(r) for v, r in validators) - - def authority_is_valid(self, require=False): - """Determine if the authority component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param bool require: - Set to ``True`` to require the presence of this component. - :returns: - ``True`` if the authority is valid. ``False`` otherwise. - :rtype: - bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - try: - self.authority_info() - except exc.InvalidAuthority: - return False - - return validators.authority_is_valid( - self.authority, host=self.host, require=require - ) - - def scheme_is_valid(self, require=False): - """Determine if the scheme component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the scheme is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.scheme_is_valid(self.scheme, require) - - def path_is_valid(self, require=False): - """Determine if the path component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the path is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.path_is_valid(self.path, require) - - def query_is_valid(self, require=False): - """Determine if the query component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the query is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.query_is_valid(self.query, require) - - def fragment_is_valid(self, require=False): - """Determine if the fragment component is valid. - - .. deprecated:: 1.1.0 - - Use the Validator object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the fragment is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.fragment_is_valid(self.fragment, require) - - def normalized_equality(self, other_ref): - """Compare this URIReference to another URIReference. - - :param URIReference other_ref: (required), The reference with which - we're comparing. - :returns: ``True`` if the references are equal, ``False`` otherwise. - :rtype: bool - """ - return tuple(self.normalize()) == tuple(other_ref.normalize()) - - def resolve_with(self, base_uri, strict=False): - """Use an absolute URI Reference to resolve this relative reference. - - Assuming this is a relative reference that you would like to resolve, - use the provided base URI to resolve it. - - See http://tools.ietf.org/html/rfc3986#section-5 for more information. - - :param base_uri: Either a string or URIReference. It must be an - absolute URI or it will raise an exception. - :returns: A new URIReference which is the result of resolving this - reference using ``base_uri``. - :rtype: :class:`URIReference` - :raises rfc3986.exceptions.ResolutionError: - If the ``base_uri`` is not an absolute URI. - """ - if not isinstance(base_uri, URIMixin): - base_uri = type(self).from_string(base_uri) - - if not base_uri.is_absolute(): - raise exc.ResolutionError(base_uri) - - # This is optional per - # http://tools.ietf.org/html/rfc3986#section-5.2.1 - base_uri = base_uri.normalize() - - # The reference we're resolving - resolving = self - - if not strict and resolving.scheme == base_uri.scheme: - resolving = resolving.copy_with(scheme=None) - - # http://tools.ietf.org/html/rfc3986#page-32 - if resolving.scheme is not None: - target = resolving.copy_with( - path=normalizers.normalize_path(resolving.path) - ) - else: - if resolving.authority is not None: - target = resolving.copy_with( - scheme=base_uri.scheme, - path=normalizers.normalize_path(resolving.path), - ) - else: - if resolving.path is None: - if resolving.query is not None: - query = resolving.query - else: - query = base_uri.query - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=base_uri.path, - query=query, - ) - else: - if resolving.path.startswith("/"): - path = normalizers.normalize_path(resolving.path) - else: - path = normalizers.normalize_path( - misc.merge_paths(base_uri, resolving.path) - ) - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=path, - query=resolving.query, - ) - return target - - def unsplit(self): - """Create a URI string from the components. - - :returns: The URI Reference reconstituted as a string. - :rtype: str - """ - # See http://tools.ietf.org/html/rfc3986#section-5.3 - result_list = [] - if self.scheme: - result_list.extend([self.scheme, ":"]) - if self.authority: - result_list.extend(["//", self.authority]) - if self.path: - result_list.append(self.path) - if self.query is not None: - result_list.extend(["?", self.query]) - if self.fragment is not None: - result_list.extend(["#", self.fragment]) - return "".join(result_list) - - def copy_with( - self, - scheme=misc.UseExisting, - authority=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - ): - """Create a copy of this reference with the new components. - - :param str scheme: - (optional) The scheme to use for the new reference. - :param str authority: - (optional) The authority to use for the new reference. - :param str path: - (optional) The path to use for the new reference. - :param str query: - (optional) The query to use for the new reference. - :param str fragment: - (optional) The fragment to use for the new reference. - :returns: - New URIReference with provided components. - :rtype: - URIReference - """ - attributes = { - "scheme": scheme, - "authority": authority, - "path": path, - "query": query, - "fragment": fragment, - } - for key, value in list(attributes.items()): - if value is misc.UseExisting: - del attributes[key] - uri = self._replace(**attributes) - uri.encoding = self.encoding - return uri diff --git a/src/urllib3/packages/rfc3986/abnf_regexp.py b/src/urllib3/packages/rfc3986/abnf_regexp.py deleted file mode 100644 index c461443d..00000000 --- a/src/urllib3/packages/rfc3986/abnf_regexp.py +++ /dev/null @@ -1,262 +0,0 @@ -# -*- coding: utf-8 -*- -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module for the regular expressions crafted from ABNF.""" - -import sys - -# https://tools.ietf.org/html/rfc3986#page-13 -GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@" -GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) -# https://tools.ietf.org/html/rfc3986#page-13 -SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;=" -SUB_DELIMITERS_SET = set(SUB_DELIMITERS) -# Escape the '*' for use in regular expressions -SUB_DELIMITERS_RE = r"!$&'()\*+,;=" -RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET) -ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" -DIGIT = "0123456789" -# https://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r"._!-" -UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) -NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET) -# We need to escape the '-' in this case: -UNRESERVED_RE = r"A-Za-z0-9._~\-" - -# Percent encoded character values -PERCENT_ENCODED = PCT_ENCODED = "%[A-Fa-f0-9]{2}" -PCHAR = "([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":@]|%s)" % PCT_ENCODED - -# NOTE(sigmavirus24): We're going to use more strict regular expressions -# than appear in Appendix B for scheme. This will prevent over-eager -# consuming of items that aren't schemes. -SCHEME_RE = "[a-zA-Z][a-zA-Z0-9+.-]*" -_AUTHORITY_RE = "[^/?#]*" -_PATH_RE = "[^?#]*" -_QUERY_RE = "[^#]*" -_FRAGMENT_RE = ".*" - -# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B -COMPONENT_PATTERN_DICT = { - "scheme": SCHEME_RE, - "authority": _AUTHORITY_RE, - "path": _PATH_RE, - "query": _QUERY_RE, - "fragment": _FRAGMENT_RE, -} - -# See http://tools.ietf.org/html/rfc3986#appendix-B -# In this case, we name each of the important matches so we can use -# SRE_Match#groupdict to parse the values out if we so choose. This is also -# modified to ignore other matches that are not important to the parsing of -# the reference so we can also simply use SRE_Match#groups. -URL_PARSING_RE = ( - r"(?:(?P{scheme}):)?(?://(?P{authority}))?" - r"(?P{path})(?:\?(?P{query}))?" - r"(?:#(?P{fragment}))?" -).format(**COMPONENT_PATTERN_DICT) - - -# ######################### -# Authority Matcher Section -# ######################### - -# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 -# The pattern for a regular name, e.g., www.google.com, api.github.com -REGULAR_NAME_RE = REG_NAME = "((?:{0}|[{1}])*)".format( - "%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + UNRESERVED_RE -) -# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, -IPv4_RE = r"([0-9]{1,3}\.){3}[0-9]{1,3}" -# Hexadecimal characters used in each piece of an IPv6 address -HEXDIG_RE = "[0-9A-Fa-f]{1,4}" -# Least-significant 32 bits of an IPv6 address -LS32_RE = "({hex}:{hex}|{ipv4})".format(hex=HEXDIG_RE, ipv4=IPv4_RE) -# Substitutions into the following patterns for IPv6 patterns defined -# http://tools.ietf.org/html/rfc3986#page-20 -_subs = {"hex": HEXDIG_RE, "ls32": LS32_RE} - -# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details -# about ABNF (Augmented Backus-Naur Form) use in the comments -variations = [ - # 6( h16 ":" ) ls32 - "(%(hex)s:){6}%(ls32)s" % _subs, - # "::" 5( h16 ":" ) ls32 - "::(%(hex)s:){5}%(ls32)s" % _subs, - # [ h16 ] "::" 4( h16 ":" ) ls32 - "(%(hex)s)?::(%(hex)s:){4}%(ls32)s" % _subs, - # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 - "((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s" % _subs, - # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 - "((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s" % _subs, - # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 - "((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s" % _subs, - # [ *4( h16 ":" ) h16 ] "::" ls32 - "((%(hex)s:){0,4}%(hex)s)?::%(ls32)s" % _subs, - # [ *5( h16 ":" ) h16 ] "::" h16 - "((%(hex)s:){0,5}%(hex)s)?::%(hex)s" % _subs, - # [ *6( h16 ":" ) h16 ] "::" - "((%(hex)s:){0,6}%(hex)s)?::" % _subs, -] - -IPv6_RE = "(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7})|({8}))".format(*variations) - -IPv_FUTURE_RE = r"v[0-9A-Fa-f]+\.[%s]+" % (UNRESERVED_RE + SUB_DELIMITERS_RE + ":") - -# RFC 6874 Zone ID ABNF -ZONE_ID = "(?:[" + UNRESERVED_RE + "]|" + PCT_ENCODED + ")+" - -IPv6_ADDRZ_RFC4007_RE = IPv6_RE + "(?:(?:%25|%)" + ZONE_ID + ")?" -IPv6_ADDRZ_RE = IPv6_RE + "(?:%25" + ZONE_ID + ")?" - -IP_LITERAL_RE = r"\[({0}|{1})\]".format(IPv6_ADDRZ_RFC4007_RE, IPv_FUTURE_RE) - -# Pattern for matching the host piece of the authority -HOST_RE = HOST_PATTERN = "({0}|{1}|{2})".format(REG_NAME, IPv4_RE, IP_LITERAL_RE) -USERINFO_RE = "^([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":]|%s)+" % (PCT_ENCODED) -PORT_RE = "[0-9]{1,5}" - -# #################### -# Path Matcher Section -# #################### - -# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information -# about the path patterns defined below. -segments = { - "segment": PCHAR + "*", - # Non-zero length segment - "segment-nz": PCHAR + "+", - # Non-zero length segment without ":" - "segment-nz-nc": PCHAR.replace(":", "") + "+", -} - -# Path types taken from Section 3.3 (linked above) -PATH_EMPTY = "^$" -PATH_ROOTLESS = "%(segment-nz)s(/%(segment)s)*" % segments -PATH_NOSCHEME = "%(segment-nz-nc)s(/%(segment)s)*" % segments -PATH_ABSOLUTE = "/(%s)?" % PATH_ROOTLESS -PATH_ABEMPTY = "(/%(segment)s)*" % segments -PATH_RE = "^(%s|%s|%s|%s|%s)$" % ( - PATH_ABEMPTY, - PATH_ABSOLUTE, - PATH_NOSCHEME, - PATH_ROOTLESS, - PATH_EMPTY, -) - -FRAGMENT_RE = QUERY_RE = ( - "^([/?:@" + UNRESERVED_RE + SUB_DELIMITERS_RE + "]|%s)*$" % PCT_ENCODED -) - -# ########################## -# Relative reference matcher -# ########################## - -# See http://tools.ietf.org/html/rfc3986#section-4.2 for details -RELATIVE_PART_RE = "(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - PATH_ABEMPTY, - PATH_ABSOLUTE, - PATH_NOSCHEME, - PATH_EMPTY, -) - -# See http://tools.ietf.org/html/rfc3986#section-3 for definition -HIER_PART_RE = "(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - PATH_ABEMPTY, - PATH_ABSOLUTE, - PATH_ROOTLESS, - PATH_EMPTY, -) - -# ############### -# IRIs / RFC 3987 -# ############### - -# Only wide-unicode gets the high-ranges of UCSCHAR -if sys.maxunicode > 0xFFFF: # pragma: no cover - IPRIVATE = u"\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD" - UCSCHAR_RE = ( - u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" - u"\U00010000-\U0001FFFD\U00020000-\U0002FFFD" - u"\U00030000-\U0003FFFD\U00040000-\U0004FFFD" - u"\U00050000-\U0005FFFD\U00060000-\U0006FFFD" - u"\U00070000-\U0007FFFD\U00080000-\U0008FFFD" - u"\U00090000-\U0009FFFD\U000A0000-\U000AFFFD" - u"\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD" - u"\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD" - ) -else: # pragma: no cover - IPRIVATE = u"\uE000-\uF8FF" - UCSCHAR_RE = u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" - -IUNRESERVED_RE = u"A-Za-z0-9\\._~\\-" + UCSCHAR_RE -IPCHAR = u"([" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":@]|%s)" % PCT_ENCODED - -isegments = { - "isegment": IPCHAR + u"*", - # Non-zero length segment - "isegment-nz": IPCHAR + u"+", - # Non-zero length segment without ":" - "isegment-nz-nc": IPCHAR.replace(":", "") + u"+", -} - -IPATH_ROOTLESS = u"%(isegment-nz)s(/%(isegment)s)*" % isegments -IPATH_NOSCHEME = u"%(isegment-nz-nc)s(/%(isegment)s)*" % isegments -IPATH_ABSOLUTE = u"/(?:%s)?" % IPATH_ROOTLESS -IPATH_ABEMPTY = u"(?:/%(isegment)s)*" % isegments -IPATH_RE = u"^(?:%s|%s|%s|%s|%s)$" % ( - IPATH_ABEMPTY, - IPATH_ABSOLUTE, - IPATH_NOSCHEME, - IPATH_ROOTLESS, - PATH_EMPTY, -) - -IREGULAR_NAME_RE = IREG_NAME = u"(?:{0}|[{1}])*".format( - u"%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + IUNRESERVED_RE -) - -IHOST_RE = IHOST_PATTERN = u"({0}|{1}|{2})".format(IREG_NAME, IPv4_RE, IP_LITERAL_RE) - -IUSERINFO_RE = ( - u"^(?:[" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":]|%s)+" % (PCT_ENCODED) -) - -IFRAGMENT_RE = ( - u"^(?:[/?:@" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u"]|%s)*$" % PCT_ENCODED -) -IQUERY_RE = ( - u"^(?:[/?:@" - + IUNRESERVED_RE - + SUB_DELIMITERS_RE - + IPRIVATE - + u"]|%s)*$" % PCT_ENCODED -) - -IRELATIVE_PART_RE = u"(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - IPATH_ABEMPTY, - IPATH_ABSOLUTE, - IPATH_NOSCHEME, - PATH_EMPTY, -) - -IHIER_PART_RE = u"(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - IPATH_ABEMPTY, - IPATH_ABSOLUTE, - IPATH_ROOTLESS, - PATH_EMPTY, -) diff --git a/src/urllib3/packages/rfc3986/api.py b/src/urllib3/packages/rfc3986/api.py deleted file mode 100644 index 1e098b34..00000000 --- a/src/urllib3/packages/rfc3986/api.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Module containing the simple and functional API for rfc3986. - -This module defines functions and provides access to the public attributes -and classes of rfc3986. -""" - -from .iri import IRIReference -from .parseresult import ParseResult -from .uri import URIReference - - -def uri_reference(uri, encoding="utf-8"): - """Parse a URI string into a URIReference. - - This is a convenience function. You could achieve the same end by using - ``URIReference.from_string(uri)``. - - :param str uri: The URI which needs to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: A parsed URI - :rtype: :class:`URIReference` - """ - return URIReference.from_string(uri, encoding) - - -def iri_reference(iri, encoding="utf-8"): - """Parse a IRI string into an IRIReference. - - This is a convenience function. You could achieve the same end by using - ``IRIReference.from_string(iri)``. - - :param str iri: The IRI which needs to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: A parsed IRI - :rtype: :class:`IRIReference` - """ - return IRIReference.from_string(iri, encoding) - - -def is_valid_uri(uri, encoding="utf-8", **kwargs): - """Determine if the URI given is valid. - - This is a convenience function. You could use either - ``uri_reference(uri).is_valid()`` or - ``URIReference.from_string(uri).is_valid()`` to achieve the same result. - - :param str uri: The URI to be validated. - :param str encoding: The encoding of the string provided - :param bool require_scheme: Set to ``True`` if you wish to require the - presence of the scheme component. - :param bool require_authority: Set to ``True`` if you wish to require the - presence of the authority component. - :param bool require_path: Set to ``True`` if you wish to require the - presence of the path component. - :param bool require_query: Set to ``True`` if you wish to require the - presence of the query component. - :param bool require_fragment: Set to ``True`` if you wish to require the - presence of the fragment component. - :returns: ``True`` if the URI is valid, ``False`` otherwise. - :rtype: bool - """ - return URIReference.from_string(uri, encoding).is_valid(**kwargs) - - -def normalize_uri(uri, encoding="utf-8"): - """Normalize the given URI. - - This is a convenience function. You could use either - ``uri_reference(uri).normalize().unsplit()`` or - ``URIReference.from_string(uri).normalize().unsplit()`` instead. - - :param str uri: The URI to be normalized. - :param str encoding: The encoding of the string provided - :returns: The normalized URI. - :rtype: str - """ - normalized_reference = URIReference.from_string(uri, encoding).normalize() - return normalized_reference.unsplit() - - -def urlparse(uri, encoding="utf-8"): - """Parse a given URI and return a ParseResult. - - This is a partial replacement of the standard library's urlparse function. - - :param str uri: The URI to be parsed. - :param str encoding: The encoding of the string provided. - :returns: A parsed URI - :rtype: :class:`~rfc3986.parseresult.ParseResult` - """ - return ParseResult.from_string(uri, encoding, strict=False) diff --git a/src/urllib3/packages/rfc3986/builder.py b/src/urllib3/packages/rfc3986/builder.py deleted file mode 100644 index bbabfaf2..00000000 --- a/src/urllib3/packages/rfc3986/builder.py +++ /dev/null @@ -1,301 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2017 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module containing the logic for the URIBuilder object.""" -from . import compat -from . import normalizers -from . import uri - - -class URIBuilder(object): - """Object to aid in building up a URI Reference from parts. - - .. note:: - - This object should be instantiated by the user, but it's recommended - that it is not provided with arguments. Instead, use the available - method to populate the fields. - - """ - - def __init__( - self, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - ): - """Initialize our URI builder. - - :param str scheme: - (optional) - :param str userinfo: - (optional) - :param str host: - (optional) - :param int port: - (optional) - :param str path: - (optional) - :param str query: - (optional) - :param str fragment: - (optional) - """ - self.scheme = scheme - self.userinfo = userinfo - self.host = host - self.port = port - self.path = path - self.query = query - self.fragment = fragment - - def __repr__(self): - """Provide a convenient view of our builder object.""" - formatstr = ( - "URIBuilder(scheme={b.scheme}, userinfo={b.userinfo}, " - "host={b.host}, port={b.port}, path={b.path}, " - "query={b.query}, fragment={b.fragment})" - ) - return formatstr.format(b=self) - - def add_scheme(self, scheme): - """Add a scheme to our builder object. - - After normalizing, this will generate a new URIBuilder instance with - the specified scheme and all other attributes the same. - - .. code-block:: python - - >>> URIBuilder().add_scheme('HTTPS') - URIBuilder(scheme='https', userinfo=None, host=None, port=None, - path=None, query=None, fragment=None) - - """ - scheme = normalizers.normalize_scheme(scheme) - return URIBuilder( - scheme=scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_credentials(self, username, password): - """Add credentials as the userinfo portion of the URI. - - .. code-block:: python - - >>> URIBuilder().add_credentials('root', 's3crete') - URIBuilder(scheme=None, userinfo='root:s3crete', host=None, - port=None, path=None, query=None, fragment=None) - - >>> URIBuilder().add_credentials('root', None) - URIBuilder(scheme=None, userinfo='root', host=None, - port=None, path=None, query=None, fragment=None) - """ - if username is None: - raise ValueError("Username cannot be None") - userinfo = normalizers.normalize_username(username) - - if password is not None: - userinfo = "{}:{}".format( - userinfo, normalizers.normalize_password(password) - ) - - return URIBuilder( - scheme=self.scheme, - userinfo=userinfo, - host=self.host, - port=self.port, - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_host(self, host): - """Add hostname to the URI. - - .. code-block:: python - - >>> URIBuilder().add_host('google.com') - URIBuilder(scheme=None, userinfo=None, host='google.com', - port=None, path=None, query=None, fragment=None) - - """ - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=normalizers.normalize_host(host), - port=self.port, - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_port(self, port): - """Add port to the URI. - - .. code-block:: python - - >>> URIBuilder().add_port(80) - URIBuilder(scheme=None, userinfo=None, host=None, port='80', - path=None, query=None, fragment=None) - - >>> URIBuilder().add_port(443) - URIBuilder(scheme=None, userinfo=None, host=None, port='443', - path=None, query=None, fragment=None) - - """ - port_int = int(port) - if port_int < 0: - raise ValueError( - "ports are not allowed to be negative. You provided {}".format(port_int) - ) - if port_int > 65535: - raise ValueError( - "ports are not allowed to be larger than 65535. " - "You provided {}".format(port_int) - ) - - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port="{}".format(port_int), - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_path(self, path): - """Add a path to the URI. - - .. code-block:: python - - >>> URIBuilder().add_path('sigmavirus24/rfc3985') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path='/sigmavirus24/rfc3986', query=None, fragment=None) - - >>> URIBuilder().add_path('/checkout.php') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path='/checkout.php', query=None, fragment=None) - - """ - if not path.startswith("/"): - path = "/{}".format(path) - - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=normalizers.normalize_path(path), - query=self.query, - fragment=self.fragment, - ) - - def add_query_from(self, query_items): - """Generate and add a query a dictionary or list of tuples. - - .. code-block:: python - - >>> URIBuilder().add_query_from({'a': 'b c'}) - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query='a=b+c', fragment=None) - - >>> URIBuilder().add_query_from([('a', 'b c')]) - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query='a=b+c', fragment=None) - - """ - query = normalizers.normalize_query(compat.urlencode(query_items)) - - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=query, - fragment=self.fragment, - ) - - def add_query(self, query): - """Add a pre-formated query string to the URI. - - .. code-block:: python - - >>> URIBuilder().add_query('a=b&c=d') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query='a=b&c=d', fragment=None) - - """ - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=normalizers.normalize_query(query), - fragment=self.fragment, - ) - - def add_fragment(self, fragment): - """Add a fragment to the URI. - - .. code-block:: python - - >>> URIBuilder().add_fragment('section-2.6.1') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment='section-2.6.1') - - """ - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=self.query, - fragment=normalizers.normalize_fragment(fragment), - ) - - def finalize(self): - """Create a URIReference from our builder. - - .. code-block:: python - - >>> URIBuilder().add_scheme('https').add_host('github.com' - ... ).add_path('sigmavirus24/rfc3986').finalize().unsplit() - 'https://github.com/sigmavirus24/rfc3986' - - >>> URIBuilder().add_scheme('https').add_host('github.com' - ... ).add_path('sigmavirus24/rfc3986').add_credentials( - ... 'sigmavirus24', 'not-re@l').finalize().unsplit() - 'https://sigmavirus24:not-re%40l@github.com/sigmavirus24/rfc3986' - - """ - return uri.URIReference( - self.scheme, - normalizers.normalize_authority((self.userinfo, self.host, self.port)), - self.path, - self.query, - self.fragment, - ) diff --git a/src/urllib3/packages/rfc3986/compat.py b/src/urllib3/packages/rfc3986/compat.py deleted file mode 100644 index 36e490ab..00000000 --- a/src/urllib3/packages/rfc3986/compat.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Compatibility module for Python 2 and 3 support.""" -import sys - -try: - from urllib.parse import quote as urlquote -except ImportError: # Python 2.x - from urllib import quote as urlquote - -try: - from urllib.parse import urlencode -except ImportError: # Python 2.x - from urllib import urlencode - -__all__ = ("to_bytes", "to_str", "urlquote", "urlencode") - -PY3 = (3, 0) <= sys.version_info < (4, 0) -PY2 = (2, 6) <= sys.version_info < (2, 8) - - -if PY3: - unicode = str # Python 3.x - - -def to_str(b, encoding="utf-8"): - """Ensure that b is text in the specified encoding.""" - if hasattr(b, "decode") and not isinstance(b, unicode): - b = b.decode(encoding) - return b - - -def to_bytes(s, encoding="utf-8"): - """Ensure that s is converted to bytes from the encoding.""" - if hasattr(s, "encode") and not isinstance(s, bytes): - s = s.encode(encoding) - return s diff --git a/src/urllib3/packages/rfc3986/exceptions.py b/src/urllib3/packages/rfc3986/exceptions.py deleted file mode 100644 index 865f9bd7..00000000 --- a/src/urllib3/packages/rfc3986/exceptions.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -"""Exceptions module for rfc3986.""" - -from . import compat - - -class RFC3986Exception(Exception): - """Base class for all rfc3986 exception classes.""" - - pass - - -class InvalidAuthority(RFC3986Exception): - """Exception when the authority string is invalid.""" - - def __init__(self, authority): - """Initialize the exception with the invalid authority.""" - super(InvalidAuthority, self).__init__( - u"The authority ({0}) is not valid.".format(compat.to_str(authority)) - ) - - -class InvalidPort(RFC3986Exception): - """Exception when the port is invalid.""" - - def __init__(self, port): - """Initialize the exception with the invalid port.""" - super(InvalidPort, self).__init__('The port ("{0}") is not valid.'.format(port)) - - -class ResolutionError(RFC3986Exception): - """Exception to indicate a failure to resolve a URI.""" - - def __init__(self, uri): - """Initialize the error with the failed URI.""" - super(ResolutionError, self).__init__( - "{0} is not an absolute URI.".format(uri.unsplit()) - ) - - -class ValidationError(RFC3986Exception): - """Exception raised during Validation of a URI.""" - - pass - - -class MissingComponentError(ValidationError): - """Exception raised when a required component is missing.""" - - def __init__(self, uri, *component_names): - """Initialize the error with the missing component name.""" - verb = "was" - if len(component_names) > 1: - verb = "were" - - self.uri = uri - self.components = sorted(component_names) - components = ", ".join(self.components) - super(MissingComponentError, self).__init__( - "{} {} required but missing".format(components, verb), uri, self.components - ) - - -class UnpermittedComponentError(ValidationError): - """Exception raised when a component has an unpermitted value.""" - - def __init__(self, component_name, component_value, allowed_values): - """Initialize the error with the unpermitted component.""" - super(UnpermittedComponentError, self).__init__( - "{} was required to be one of {!r} but was {!r}".format( - component_name, list(sorted(allowed_values)), component_value - ), - component_name, - component_value, - allowed_values, - ) - self.component_name = component_name - self.component_value = component_value - self.allowed_values = allowed_values - - -class PasswordForbidden(ValidationError): - """Exception raised when a URL has a password in the userinfo section.""" - - def __init__(self, uri): - """Initialize the error with the URI that failed validation.""" - unsplit = getattr(uri, "unsplit", lambda: uri) - super(PasswordForbidden, self).__init__( - '"{}" contained a password when validation forbade it'.format(unsplit()) - ) - self.uri = uri - - -class InvalidComponentsError(ValidationError): - """Exception raised when one or more components are invalid.""" - - def __init__(self, uri, *component_names): - """Initialize the error with the invalid component name(s).""" - verb = "was" - if len(component_names) > 1: - verb = "were" - - self.uri = uri - self.components = sorted(component_names) - components = ", ".join(self.components) - super(InvalidComponentsError, self).__init__( - "{} {} found to be invalid".format(components, verb), uri, self.components - ) - - -class MissingDependencyError(RFC3986Exception): - """Exception raised when an IRI is encoded without the 'idna' module.""" diff --git a/src/urllib3/packages/rfc3986/iri.py b/src/urllib3/packages/rfc3986/iri.py deleted file mode 100644 index a15e8386..00000000 --- a/src/urllib3/packages/rfc3986/iri.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Module containing the implementation of the IRIReference class.""" -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Copyright (c) 2015 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import namedtuple - -from . import compat -from . import exceptions -from . import misc -from . import normalizers -from . import uri - - -try: - import idna -except ImportError: # pragma: no cover - idna = None - - -class IRIReference(namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin): - """Immutable object representing a parsed IRI Reference. - - Can be encoded into an URIReference object via the procedure - specified in RFC 3987 Section 3.1 - - .. note:: - The IRI submodule is a new interface and may possibly change in - the future. Check for changes to the interface when upgrading. - """ - - slots = () - - def __new__(cls, scheme, authority, path, query, fragment, encoding="utf-8"): - """Create a new IRIReference.""" - ref = super(IRIReference, cls).__new__( - cls, scheme or None, authority or None, path or None, query, fragment - ) - ref.encoding = encoding - return ref - - def __eq__(self, other): - """Compare this reference to another.""" - other_ref = other - if isinstance(other, tuple): - other_ref = self.__class__(*other) - elif not isinstance(other, IRIReference): - try: - other_ref = self.__class__.from_string(other) - except TypeError: - raise TypeError( - "Unable to compare {0}() to {1}()".format( - type(self).__name__, type(other).__name__ - ) - ) - - # See http://tools.ietf.org/html/rfc3986#section-6.2 - return tuple(self) == tuple(other_ref) - - def _match_subauthority(self): - return misc.ISUBAUTHORITY_MATCHER.match(self.authority) - - @classmethod - def from_string(cls, iri_string, encoding="utf-8"): - """Parse a IRI reference from the given unicode IRI string. - - :param str iri_string: Unicode IRI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: :class:`IRIReference` or subclass thereof - """ - iri_string = compat.to_str(iri_string, encoding) - - split_iri = misc.IRI_MATCHER.match(iri_string).groupdict() - return cls( - split_iri["scheme"], - split_iri["authority"], - normalizers.encode_component(split_iri["path"], encoding), - normalizers.encode_component(split_iri["query"], encoding), - normalizers.encode_component(split_iri["fragment"], encoding), - encoding, - ) - - def encode(self, idna_encoder=None): # noqa: C901 - """Encode an IRIReference into a URIReference instance. - - If the ``idna`` module is installed or the ``rfc3986[idna]`` - extra is used then unicode characters in the IRI host - component will be encoded with IDNA2008. - - :param idna_encoder: - Function that encodes each part of the host component - If not given will raise an exception if the IRI - contains a host component. - :rtype: uri.URIReference - :returns: A URI reference - """ - authority = self.authority - if authority: - if idna_encoder is None: - if idna is None: # pragma: no cover - raise exceptions.MissingDependencyError( - "Could not import the 'idna' module " - "and the IRI hostname requires encoding" - ) - - def idna_encoder(name): - if any(ord(c) > 128 for c in name): - try: - return idna.encode( - name.lower(), strict=True, std3_rules=True - ) - except idna.IDNAError: - raise exceptions.InvalidAuthority(self.authority) - return name - - authority = "" - if self.host: - authority = ".".join( - [compat.to_str(idna_encoder(part)) for part in self.host.split(".")] - ) - - if self.userinfo is not None: - authority = ( - normalizers.encode_component(self.userinfo, self.encoding) - + "@" - + authority - ) - - if self.port is not None: - authority += ":" + str(self.port) - - return uri.URIReference( - self.scheme, - authority, - path=self.path, - query=self.query, - fragment=self.fragment, - encoding=self.encoding, - ) diff --git a/src/urllib3/packages/rfc3986/misc.py b/src/urllib3/packages/rfc3986/misc.py deleted file mode 100644 index 353a6292..00000000 --- a/src/urllib3/packages/rfc3986/misc.py +++ /dev/null @@ -1,125 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Module containing compiled regular expressions and constants. - -This module contains important constants, patterns, and compiled regular -expressions for parsing and validating URIs and their components. -""" - -import re - -from . import abnf_regexp - -# These are enumerated for the named tuple used as a superclass of -# URIReference -URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"] - -important_characters = { - "generic_delimiters": abnf_regexp.GENERIC_DELIMITERS, - "sub_delimiters": abnf_regexp.SUB_DELIMITERS, - # We need to escape the '*' in this case - "re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE, - "unreserved_chars": abnf_regexp.UNRESERVED_CHARS, - # We need to escape the '-' in this case: - "re_unreserved": abnf_regexp.UNRESERVED_RE, -} - -# For details about delimiters and reserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.2 -GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET -SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET -RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET -# For details about unreserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET -NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET - -URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE) - -SUBAUTHORITY_MATCHER = re.compile( - ( - "^(?:(?P{0})@)?" # userinfo - "(?P{1})" # host - ":?(?P{2})?$" # port - ).format(abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE) -) - - -HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$") -IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$") -IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$") - -# Used by host validator -IPv6_NO_RFC4007_MATCHER = re.compile(r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE)) - -# Matcher used to validate path components -PATH_MATCHER = re.compile(abnf_regexp.PATH_RE) - - -# ################################## -# Query and Fragment Matcher Section -# ################################## - -QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE) - -FRAGMENT_MATCHER = QUERY_MATCHER - -# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 -SCHEME_MATCHER = re.compile("^{0}$".format(abnf_regexp.SCHEME_RE)) - -RELATIVE_REF_MATCHER = re.compile( - r"^%s(\?%s)?(#%s)?$" - % (abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE, abnf_regexp.FRAGMENT_RE) -) - -# See http://tools.ietf.org/html/rfc3986#section-4.3 -ABSOLUTE_URI_MATCHER = re.compile( - r"^%s:%s(\?%s)?$" - % ( - abnf_regexp.COMPONENT_PATTERN_DICT["scheme"], - abnf_regexp.HIER_PART_RE, - abnf_regexp.QUERY_RE[1:-1], - ) -) - -# ############### -# IRIs / RFC 3987 -# ############### - -IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE) - -ISUBAUTHORITY_MATCHER = re.compile( - ( - u"^(?:(?P{0})@)?" # iuserinfo - u"(?P{1})" # ihost - u":?(?P{2})?$" # port - ).format(abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE), - re.UNICODE, -) - - -# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 -def merge_paths(base_uri, relative_path): - """Merge a base URI's path with a relative URI's path.""" - if base_uri.path is None and base_uri.authority is not None: - return "/" + relative_path - else: - path = base_uri.path or "" - index = path.rfind("/") - return path[:index] + "/" + relative_path - - -UseExisting = object() diff --git a/src/urllib3/packages/rfc3986/normalizers.py b/src/urllib3/packages/rfc3986/normalizers.py deleted file mode 100644 index 0d702b6d..00000000 --- a/src/urllib3/packages/rfc3986/normalizers.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module with functions to normalize components.""" -import re - -from . import compat -from . import misc - - -def normalize_scheme(scheme): - """Normalize the scheme component.""" - return scheme.lower() - - -def normalize_authority(authority): - """Normalize an authority tuple to a string.""" - userinfo, host, port = authority - result = "" - if userinfo: - result += normalize_percent_characters(userinfo) + "@" - if host: - result += normalize_host(host) - if port: - result += ":" + port - return result - - -def normalize_username(username): - """Normalize a username to make it safe to include in userinfo.""" - return compat.urlquote(username) - - -def normalize_password(password): - """Normalize a password to make safe for userinfo.""" - return compat.urlquote(password) - - -def normalize_host(host): - """Normalize a host string.""" - if misc.IPv6_MATCHER.match(host): - percent = host.find("%") - if percent != -1: - percent_25 = host.find("%25") - - # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25' - # from RFC 6874. If the host is '[%25]' then we - # assume RFC 4007 and normalize to '[%2525]' - if ( - percent_25 == -1 - or percent < percent_25 - or (percent == percent_25 and percent_25 == len(host) - 4) - ): - host = host.replace("%", "%25", 1) - - # Don't normalize the casing of the Zone ID - return host[:percent].lower() + host[percent:] - - return host.lower() - - -def normalize_path(path): - """Normalize the path string.""" - if not path: - return path - - path = normalize_percent_characters(path) - return remove_dot_segments(path) - - -def normalize_query(query): - """Normalize the query string.""" - if not query: - return query - return normalize_percent_characters(query) - - -def normalize_fragment(fragment): - """Normalize the fragment string.""" - if not fragment: - return fragment - return normalize_percent_characters(fragment) - - -PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}") - - -def normalize_percent_characters(s): - """All percent characters should be upper-cased. - - For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. - """ - matches = set(PERCENT_MATCHER.findall(s)) - for m in matches: - if not m.isupper(): - s = s.replace(m, m.upper()) - return s - - -def remove_dot_segments(s): - """Remove dot segments from the string. - - See also Section 5.2.4 of :rfc:`3986`. - """ - # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code - segments = s.split("/") # Turn the path into a list of segments - output = [] # Initialize the variable to use to store output - - for segment in segments: - # '.' is the current directory, so ignore it, it is superfluous - if segment == ".": - continue - # Anything other than '..', should be appended to the output - elif segment != "..": - output.append(segment) - # In this case segment == '..', if we can, we should pop the last - # element - elif output: - output.pop() - - # If the path starts with '/' and the output is empty or the first string - # is non-empty - if s.startswith("/") and (not output or output[0]): - output.insert(0, "") - - # If the path starts with '/.' or '/..' ensure we add one more empty - # string to add a trailing '/' - if s.endswith(("/.", "/..")): - output.append("") - - return "/".join(output) - - -def encode_component(uri_component, encoding): - """Encode the specific component in the provided encoding.""" - if uri_component is None: - return uri_component - - # Try to see if the component we're encoding is already percent-encoded - # so we can skip all '%' characters but still encode all others. - percent_encodings = len( - PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding)) - ) - - uri_bytes = compat.to_bytes(uri_component, encoding) - is_percent_encoded = percent_encodings == uri_bytes.count(b"%") - - encoded_uri = bytearray() - - for i in range(0, len(uri_bytes)): - # Will return a single character bytestring on both Python 2 & 3 - byte = uri_bytes[i : i + 1] - byte_ord = ord(byte) - if (is_percent_encoded and byte == b"%") or ( - byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED - ): - encoded_uri.extend(byte) - continue - encoded_uri.extend("%{0:02x}".format(byte_ord).encode().upper()) - - return encoded_uri.decode(encoding) diff --git a/src/urllib3/packages/rfc3986/parseresult.py b/src/urllib3/packages/rfc3986/parseresult.py deleted file mode 100644 index 74d12c25..00000000 --- a/src/urllib3/packages/rfc3986/parseresult.py +++ /dev/null @@ -1,457 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2015 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module containing the urlparse compatibility logic.""" -from collections import namedtuple - -from . import compat -from . import exceptions -from . import misc -from . import normalizers -from . import uri - -__all__ = ("ParseResult", "ParseResultBytes") - -PARSED_COMPONENTS = ("scheme", "userinfo", "host", "port", "path", "query", "fragment") - - -class ParseResultMixin(object): - def _generate_authority(self, attributes): - # I swear I did not align the comparisons below. That's just how they - # happened to align based on pep8 and attribute lengths. - userinfo, host, port = (attributes[p] for p in ("userinfo", "host", "port")) - if self.userinfo != userinfo or self.host != host or self.port != port: - if port: - port = "{0}".format(port) - return normalizers.normalize_authority( - ( - compat.to_str(userinfo, self.encoding), - compat.to_str(host, self.encoding), - port, - ) - ) - return self.authority - - def geturl(self): - """Shim to match the standard library method.""" - return self.unsplit() - - @property - def hostname(self): - """Shim to match the standard library.""" - return self.host - - @property - def netloc(self): - """Shim to match the standard library.""" - return self.authority - - @property - def params(self): - """Shim to match the standard library.""" - return self.query - - -class ParseResult(namedtuple("ParseResult", PARSED_COMPONENTS), ParseResultMixin): - """Implementation of urlparse compatibility class. - - This uses the URIReference logic to handle compatibility with the - urlparse.ParseResult class. - """ - - slots = () - - def __new__( - cls, - scheme, - userinfo, - host, - port, - path, - query, - fragment, - uri_ref, - encoding="utf-8", - ): - """Create a new ParseResult.""" - parse_result = super(ParseResult, cls).__new__( - cls, - scheme or None, - userinfo or None, - host, - port or None, - path or None, - query, - fragment, - ) - parse_result.encoding = encoding - parse_result.reference = uri_ref - return parse_result - - @classmethod - def from_parts( - cls, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - encoding="utf-8", - ): - """Create a ParseResult instance from its parts.""" - authority = "" - if userinfo is not None: - authority += userinfo + "@" - if host is not None: - authority += host - if port is not None: - authority += ":{0}".format(port) - uri_ref = uri.URIReference( - scheme=scheme, - authority=authority, - path=path, - query=query, - fragment=fragment, - encoding=encoding, - ).normalize() - userinfo, host, port = authority_from(uri_ref, strict=True) - return cls( - scheme=uri_ref.scheme, - userinfo=userinfo, - host=host, - port=port, - path=uri_ref.path, - query=uri_ref.query, - fragment=uri_ref.fragment, - uri_ref=uri_ref, - encoding=encoding, - ) - - @classmethod - def from_string( - cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True - ): - """Parse a URI from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :param bool strict: Parse strictly according to :rfc:`3986` if True. - If False, parse similarly to the standard library's urlparse - function. - :returns: :class:`ParseResult` or subclass thereof - """ - reference = uri.URIReference.from_string(uri_string, encoding) - if not lazy_normalize: - reference = reference.normalize() - userinfo, host, port = authority_from(reference, strict) - - return cls( - scheme=reference.scheme, - userinfo=userinfo, - host=host, - port=port, - path=reference.path, - query=reference.query, - fragment=reference.fragment, - uri_ref=reference, - encoding=encoding, - ) - - @property - def authority(self): - """Return the normalized authority.""" - return self.reference.authority - - def copy_with( - self, - scheme=misc.UseExisting, - userinfo=misc.UseExisting, - host=misc.UseExisting, - port=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - ): - """Create a copy of this instance replacing with specified parts.""" - attributes = zip( - PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment) - ) - attrs_dict = {} - for name, value in attributes: - if value is misc.UseExisting: - value = getattr(self, name) - attrs_dict[name] = value - authority = self._generate_authority(attrs_dict) - ref = self.reference.copy_with( - scheme=attrs_dict["scheme"], - authority=authority, - path=attrs_dict["path"], - query=attrs_dict["query"], - fragment=attrs_dict["fragment"], - ) - return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict) - - def encode(self, encoding=None): - """Convert to an instance of ParseResultBytes.""" - encoding = encoding or self.encoding - attrs = dict( - zip( - PARSED_COMPONENTS, - ( - attr.encode(encoding) if hasattr(attr, "encode") else attr - for attr in self - ), - ) - ) - return ParseResultBytes(uri_ref=self.reference, encoding=encoding, **attrs) - - def unsplit(self, use_idna=False): - """Create a URI string from the components. - - :returns: The parsed URI reconstituted as a string. - :rtype: str - """ - parse_result = self - if use_idna and self.host: - hostbytes = self.host.encode("idna") - host = hostbytes.decode(self.encoding) - parse_result = self.copy_with(host=host) - return parse_result.reference.unsplit() - - -class ParseResultBytes( - namedtuple("ParseResultBytes", PARSED_COMPONENTS), ParseResultMixin -): - """Compatibility shim for the urlparse.ParseResultBytes object.""" - - def __new__( - cls, - scheme, - userinfo, - host, - port, - path, - query, - fragment, - uri_ref, - encoding="utf-8", - lazy_normalize=True, - ): - """Create a new ParseResultBytes instance.""" - parse_result = super(ParseResultBytes, cls).__new__( - cls, - scheme or None, - userinfo or None, - host, - port or None, - path or None, - query or None, - fragment or None, - ) - parse_result.encoding = encoding - parse_result.reference = uri_ref - parse_result.lazy_normalize = lazy_normalize - return parse_result - - @classmethod - def from_parts( - cls, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - encoding="utf-8", - lazy_normalize=True, - ): - """Create a ParseResult instance from its parts.""" - authority = "" - if userinfo is not None: - authority += userinfo + "@" - if host is not None: - authority += host - if port is not None: - authority += ":{0}".format(int(port)) - uri_ref = uri.URIReference( - scheme=scheme, - authority=authority, - path=path, - query=query, - fragment=fragment, - encoding=encoding, - ) - if not lazy_normalize: - uri_ref = uri_ref.normalize() - to_bytes = compat.to_bytes - userinfo, host, port = authority_from(uri_ref, strict=True) - return cls( - scheme=to_bytes(scheme, encoding), - userinfo=to_bytes(userinfo, encoding), - host=to_bytes(host, encoding), - port=port, - path=to_bytes(path, encoding), - query=to_bytes(query, encoding), - fragment=to_bytes(fragment, encoding), - uri_ref=uri_ref, - encoding=encoding, - lazy_normalize=lazy_normalize, - ) - - @classmethod - def from_string( - cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True - ): - """Parse a URI from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :param bool strict: Parse strictly according to :rfc:`3986` if True. - If False, parse similarly to the standard library's urlparse - function. - :returns: :class:`ParseResultBytes` or subclass thereof - """ - reference = uri.URIReference.from_string(uri_string, encoding) - if not lazy_normalize: - reference = reference.normalize() - userinfo, host, port = authority_from(reference, strict) - - to_bytes = compat.to_bytes - return cls( - scheme=to_bytes(reference.scheme, encoding), - userinfo=to_bytes(userinfo, encoding), - host=to_bytes(host, encoding), - port=port, - path=to_bytes(reference.path, encoding), - query=to_bytes(reference.query, encoding), - fragment=to_bytes(reference.fragment, encoding), - uri_ref=reference, - encoding=encoding, - lazy_normalize=lazy_normalize, - ) - - @property - def authority(self): - """Return the normalized authority.""" - return self.reference.authority.encode(self.encoding) - - def copy_with( - self, - scheme=misc.UseExisting, - userinfo=misc.UseExisting, - host=misc.UseExisting, - port=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - lazy_normalize=True, - ): - """Create a copy of this instance replacing with specified parts.""" - attributes = zip( - PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment) - ) - attrs_dict = {} - for name, value in attributes: - if value is misc.UseExisting: - value = getattr(self, name) - if not isinstance(value, bytes) and hasattr(value, "encode"): - value = value.encode(self.encoding) - attrs_dict[name] = value - authority = self._generate_authority(attrs_dict) - to_str = compat.to_str - ref = self.reference.copy_with( - scheme=to_str(attrs_dict["scheme"], self.encoding), - authority=to_str(authority, self.encoding), - path=to_str(attrs_dict["path"], self.encoding), - query=to_str(attrs_dict["query"], self.encoding), - fragment=to_str(attrs_dict["fragment"], self.encoding), - ) - if not lazy_normalize: - ref = ref.normalize() - return ParseResultBytes( - uri_ref=ref, - encoding=self.encoding, - lazy_normalize=lazy_normalize, - **attrs_dict - ) - - def unsplit(self, use_idna=False): - """Create a URI bytes object from the components. - - :returns: The parsed URI reconstituted as a string. - :rtype: bytes - """ - parse_result = self - if use_idna and self.host: - # self.host is bytes, to encode to idna, we need to decode it - # first - host = self.host.decode(self.encoding) - hostbytes = host.encode("idna") - parse_result = self.copy_with(host=hostbytes) - if self.lazy_normalize: - parse_result = parse_result.copy_with(lazy_normalize=False) - uri = parse_result.reference.unsplit() - return uri.encode(self.encoding) - - -def split_authority(authority): - # Initialize our expected return values - userinfo = host = port = None - # Initialize an extra var we may need to use - extra_host = None - # Set-up rest in case there is no userinfo portion - rest = authority - - if "@" in authority: - userinfo, rest = authority.rsplit("@", 1) - - # Handle IPv6 host addresses - if rest.startswith("["): - host, rest = rest.split("]", 1) - host += "]" - - if ":" in rest: - extra_host, port = rest.split(":", 1) - elif not host and rest: - host = rest - - if extra_host and not host: - host = extra_host - - return userinfo, host, port - - -def authority_from(reference, strict): - try: - subauthority = reference.authority_info() - except exceptions.InvalidAuthority: - if strict: - raise - userinfo, host, port = split_authority(reference.authority) - else: - # Thanks to Richard Barrell for this idea: - # https://twitter.com/0x2ba22e11/status/617338811975139328 - userinfo, host, port = ( - subauthority.get(p) for p in ("userinfo", "host", "port") - ) - - if port: - try: - port = int(port) - except ValueError: - raise exceptions.InvalidPort(port) - return userinfo, host, port diff --git a/src/urllib3/packages/rfc3986/uri.py b/src/urllib3/packages/rfc3986/uri.py deleted file mode 100644 index 61104ab1..00000000 --- a/src/urllib3/packages/rfc3986/uri.py +++ /dev/null @@ -1,152 +0,0 @@ -"""Module containing the implementation of the URIReference class.""" -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Copyright (c) 2015 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import namedtuple - -from . import compat -from . import misc -from . import normalizers -from ._mixin import URIMixin - - -class URIReference(namedtuple("URIReference", misc.URI_COMPONENTS), URIMixin): - """Immutable object representing a parsed URI Reference. - - .. note:: - - This class is not intended to be directly instantiated by the user. - - This object exposes attributes for the following components of a - URI: - - - scheme - - authority - - path - - query - - fragment - - .. attribute:: scheme - - The scheme that was parsed for the URI Reference. For example, - ``http``, ``https``, ``smtp``, ``imap``, etc. - - .. attribute:: authority - - Component of the URI that contains the user information, host, - and port sub-components. For example, - ``google.com``, ``127.0.0.1:5000``, ``username@[::1]``, - ``username:password@example.com:443``, etc. - - .. attribute:: path - - The path that was parsed for the given URI Reference. For example, - ``/``, ``/index.php``, etc. - - .. attribute:: query - - The query component for a given URI Reference. For example, ``a=b``, - ``a=b%20c``, ``a=b+c``, ``a=b,c=d,e=%20f``, etc. - - .. attribute:: fragment - - The fragment component of a URI. For example, ``section-3.1``. - - This class also provides extra attributes for easier access to information - like the subcomponents of the authority component. - - .. attribute:: userinfo - - The user information parsed from the authority. - - .. attribute:: host - - The hostname, IPv4, or IPv6 adddres parsed from the authority. - - .. attribute:: port - - The port parsed from the authority. - """ - - slots = () - - def __new__(cls, scheme, authority, path, query, fragment, encoding="utf-8"): - """Create a new URIReference.""" - ref = super(URIReference, cls).__new__( - cls, scheme or None, authority or None, path or None, query, fragment - ) - ref.encoding = encoding - return ref - - __hash__ = tuple.__hash__ - - def __eq__(self, other): - """Compare this reference to another.""" - other_ref = other - if isinstance(other, tuple): - other_ref = URIReference(*other) - elif not isinstance(other, URIReference): - try: - other_ref = URIReference.from_string(other) - except TypeError: - raise TypeError( - "Unable to compare URIReference() to {0}()".format( - type(other).__name__ - ) - ) - - # See http://tools.ietf.org/html/rfc3986#section-6.2 - naive_equality = tuple(self) == tuple(other_ref) - return naive_equality or self.normalized_equality(other_ref) - - def normalize(self): - """Normalize this reference as described in Section 6.2.2. - - This is not an in-place normalization. Instead this creates a new - URIReference. - - :returns: A new reference object with normalized components. - :rtype: URIReference - """ - # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in - # this method. - return URIReference( - normalizers.normalize_scheme(self.scheme or ""), - normalizers.normalize_authority((self.userinfo, self.host, self.port)), - normalizers.normalize_path(self.path or ""), - normalizers.normalize_query(self.query), - normalizers.normalize_fragment(self.fragment), - self.encoding, - ) - - @classmethod - def from_string(cls, uri_string, encoding="utf-8"): - """Parse a URI reference from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: :class:`URIReference` or subclass thereof - """ - uri_string = compat.to_str(uri_string, encoding) - - split_uri = misc.URI_MATCHER.match(uri_string).groupdict() - return cls( - split_uri["scheme"], - split_uri["authority"], - normalizers.encode_component(split_uri["path"], encoding), - normalizers.encode_component(split_uri["query"], encoding), - normalizers.encode_component(split_uri["fragment"], encoding), - encoding, - ) diff --git a/src/urllib3/packages/rfc3986/validators.py b/src/urllib3/packages/rfc3986/validators.py deleted file mode 100644 index a60ae91b..00000000 --- a/src/urllib3/packages/rfc3986/validators.py +++ /dev/null @@ -1,435 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2017 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module containing the validation logic for rfc3986.""" -from . import exceptions -from . import misc -from . import normalizers - - -class Validator(object): - """Object used to configure validation of all objects in rfc3986. - - .. versionadded:: 1.0 - - Example usage:: - - >>> from rfc3986 import api, validators - >>> uri = api.uri_reference('https://github.com/') - >>> validator = validators.Validator().require_presence_of( - ... 'scheme', 'host', 'path', - ... ).allow_schemes( - ... 'http', 'https', - ... ).allow_hosts( - ... '127.0.0.1', 'github.com', - ... ) - >>> validator.validate(uri) - >>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com') - >>> validator.validate(invalid_uri) - Traceback (most recent call last): - ... - rfc3986.exceptions.MissingComponentError: ('path was required but - missing', URIReference(scheme=u'imap', authority=u'mail.google.com', - path=None, query=None, fragment=None), ['path']) - - """ - - COMPONENT_NAMES = frozenset( - ["scheme", "userinfo", "host", "port", "path", "query", "fragment"] - ) - - def __init__(self): - """Initialize our default validations.""" - self.allowed_schemes = set() - self.allowed_hosts = set() - self.allowed_ports = set() - self.allow_password = True - self.required_components = { - "scheme": False, - "userinfo": False, - "host": False, - "port": False, - "path": False, - "query": False, - "fragment": False, - } - self.validated_components = self.required_components.copy() - - def allow_schemes(self, *schemes): - """Require the scheme to be one of the provided schemes. - - .. versionadded:: 1.0 - - :param schemes: - Schemes, without ``://`` that are allowed. - :returns: - The validator instance. - :rtype: - Validator - """ - for scheme in schemes: - self.allowed_schemes.add(normalizers.normalize_scheme(scheme)) - return self - - def allow_hosts(self, *hosts): - """Require the host to be one of the provided hosts. - - .. versionadded:: 1.0 - - :param hosts: - Hosts that are allowed. - :returns: - The validator instance. - :rtype: - Validator - """ - for host in hosts: - self.allowed_hosts.add(normalizers.normalize_host(host)) - return self - - def allow_ports(self, *ports): - """Require the port to be one of the provided ports. - - .. versionadded:: 1.0 - - :param ports: - Ports that are allowed. - :returns: - The validator instance. - :rtype: - Validator - """ - for port in ports: - port_int = int(port, base=10) - if 0 <= port_int <= 65535: - self.allowed_ports.add(port) - return self - - def allow_use_of_password(self): - """Allow passwords to be present in the URI. - - .. versionadded:: 1.0 - - :returns: - The validator instance. - :rtype: - Validator - """ - self.allow_password = True - return self - - def forbid_use_of_password(self): - """Prevent passwords from being included in the URI. - - .. versionadded:: 1.0 - - :returns: - The validator instance. - :rtype: - Validator - """ - self.allow_password = False - return self - - def check_validity_of(self, *components): - """Check the validity of the components provided. - - This can be specified repeatedly. - - .. versionadded:: 1.1 - - :param components: - Names of components from :attr:`Validator.COMPONENT_NAMES`. - :returns: - The validator instance. - :rtype: - Validator - """ - components = [c.lower() for c in components] - for component in components: - if component not in self.COMPONENT_NAMES: - raise ValueError('"{}" is not a valid component'.format(component)) - self.validated_components.update({component: True for component in components}) - return self - - def require_presence_of(self, *components): - """Require the components provided. - - This can be specified repeatedly. - - .. versionadded:: 1.0 - - :param components: - Names of components from :attr:`Validator.COMPONENT_NAMES`. - :returns: - The validator instance. - :rtype: - Validator - """ - components = [c.lower() for c in components] - for component in components: - if component not in self.COMPONENT_NAMES: - raise ValueError('"{}" is not a valid component'.format(component)) - self.required_components.update({component: True for component in components}) - return self - - def validate(self, uri): - """Check a URI for conditions specified on this validator. - - .. versionadded:: 1.0 - - :param uri: - Parsed URI to validate. - :type uri: - rfc3986.uri.URIReference - :raises MissingComponentError: - When a required component is missing. - :raises UnpermittedComponentError: - When a component is not one of those allowed. - :raises PasswordForbidden: - When a password is present in the userinfo component but is - not permitted by configuration. - :raises InvalidComponentsError: - When a component was found to be invalid. - """ - if not self.allow_password: - check_password(uri) - - required_components = [ - component - for component, required in self.required_components.items() - if required - ] - validated_components = [ - component - for component, required in self.validated_components.items() - if required - ] - if required_components: - ensure_required_components_exist(uri, required_components) - if validated_components: - ensure_components_are_valid(uri, validated_components) - - ensure_one_of(self.allowed_schemes, uri, "scheme") - ensure_one_of(self.allowed_hosts, uri, "host") - ensure_one_of(self.allowed_ports, uri, "port") - - -def check_password(uri): - """Assert that there is no password present in the uri.""" - userinfo = uri.userinfo - if not userinfo: - return - credentials = userinfo.split(":", 1) - if len(credentials) <= 1: - return - raise exceptions.PasswordForbidden(uri) - - -def ensure_one_of(allowed_values, uri, attribute): - """Assert that the uri's attribute is one of the allowed values.""" - value = getattr(uri, attribute) - if value is not None and allowed_values and value not in allowed_values: - raise exceptions.UnpermittedComponentError(attribute, value, allowed_values) - - -def ensure_required_components_exist(uri, required_components): - """Assert that all required components are present in the URI.""" - missing_components = sorted( - [ - component - for component in required_components - if getattr(uri, component) is None - ] - ) - if missing_components: - raise exceptions.MissingComponentError(uri, *missing_components) - - -def is_valid(value, matcher, require): - """Determine if a value is valid based on the provided matcher. - - :param str value: - Value to validate. - :param matcher: - Compiled regular expression to use to validate the value. - :param require: - Whether or not the value is required. - """ - if require: - return value is not None and matcher.match(value) - - # require is False and value is not None - return value is None or matcher.match(value) - - -def authority_is_valid(authority, host=None, require=False): - """Determine if the authority string is valid. - - :param str authority: - The authority to validate. - :param str host: - (optional) The host portion of the authority to validate. - :param bool require: - (optional) Specify if authority must not be None. - :returns: - ``True`` if valid, ``False`` otherwise - :rtype: - bool - """ - validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require) - if validated and host is not None: - return host_is_valid(host, require) - return validated - - -def host_is_valid(host, require=False): - """Determine if the host string is valid. - - :param str host: - The host to validate. - :param bool require: - (optional) Specify if host must not be None. - :returns: - ``True`` if valid, ``False`` otherwise - :rtype: - bool - """ - validated = is_valid(host, misc.HOST_MATCHER, require) - if validated and host is not None and misc.IPv4_MATCHER.match(host): - return valid_ipv4_host_address(host) - elif validated and host is not None and misc.IPv6_MATCHER.match(host): - return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None - return validated - - -def scheme_is_valid(scheme, require=False): - """Determine if the scheme is valid. - - :param str scheme: - The scheme string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a scheme. - :returns: - ``True`` if the scheme is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(scheme, misc.SCHEME_MATCHER, require) - - -def path_is_valid(path, require=False): - """Determine if the path component is valid. - - :param str path: - The path string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a path. - :returns: - ``True`` if the path is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(path, misc.PATH_MATCHER, require) - - -def query_is_valid(query, require=False): - """Determine if the query component is valid. - - :param str query: - The query string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a query. - :returns: - ``True`` if the query is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(query, misc.QUERY_MATCHER, require) - - -def fragment_is_valid(fragment, require=False): - """Determine if the fragment component is valid. - - :param str fragment: - The fragment string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a fragment. - :returns: - ``True`` if the fragment is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(fragment, misc.FRAGMENT_MATCHER, require) - - -def valid_ipv4_host_address(host): - """Determine if the given host is a valid IPv4 address.""" - # If the host exists, and it might be IPv4, check each byte in the - # address. - return all([0 <= int(byte, base=10) <= 255 for byte in host.split(".")]) - - -_COMPONENT_VALIDATORS = { - "scheme": scheme_is_valid, - "path": path_is_valid, - "query": query_is_valid, - "fragment": fragment_is_valid, -} - -_SUBAUTHORITY_VALIDATORS = set(["userinfo", "host", "port"]) - - -def subauthority_component_is_valid(uri, component): - """Determine if the userinfo, host, and port are valid.""" - try: - subauthority_dict = uri.authority_info() - except exceptions.InvalidAuthority: - return False - - # If we can parse the authority into sub-components and we're not - # validating the port, we can assume it's valid. - if component == "host": - return host_is_valid(subauthority_dict["host"]) - elif component != "port": - return True - - try: - port = int(subauthority_dict["port"]) - except TypeError: - # If the port wasn't provided it'll be None and int(None) raises a - # TypeError - return True - - return 0 <= port <= 65535 - - -def ensure_components_are_valid(uri, validated_components): - """Assert that all components are valid in the URI.""" - invalid_components = set([]) - for component in validated_components: - if component in _SUBAUTHORITY_VALIDATORS: - if not subauthority_component_is_valid(uri, component): - invalid_components.add(component) - # Python's peephole optimizer means that while this continue *is* - # actually executed, coverage.py cannot detect that. See also, - # https://bitbucket.org/ned/coveragepy/issues/198/continue-marked-as-not-covered - continue # nocov: Python 2.7, 3.3, 3.4 - - validator = _COMPONENT_VALIDATORS[component] - if not validator(getattr(uri, component)): - invalid_components.add(component) - - if invalid_components: - raise exceptions.InvalidComponentsError(uri, *invalid_components) diff --git a/src/urllib3/packages/six.py b/src/urllib3/packages/six.py index 26bbb434..31442409 100644 --- a/src/urllib3/packages/six.py +++ b/src/urllib3/packages/six.py @@ -1,6 +1,4 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -# Copyright (c) 2010-2015 Benjamin Peterson +# Copyright (c) 2010-2019 Benjamin Peterson # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,6 +18,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +"""Utilities for writing code that runs on Python 2 and 3""" + from __future__ import absolute_import import functools @@ -29,7 +29,7 @@ import types __author__ = "Benjamin Peterson " -__version__ = "1.10.0" +__version__ = "1.12.0" # Useful for very coarse version differentiation. @@ -242,6 +242,7 @@ class _MovedItems(_LazyModule): MovedAttribute("map", "itertools", "builtins", "imap", "map"), MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), MovedAttribute( "reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload" @@ -267,12 +268,13 @@ class _MovedItems(_LazyModule): MovedModule("html_entities", "htmlentitydefs", "html.entities"), MovedModule("html_parser", "HTMLParser", "html.parser"), MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), MovedModule( "email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart" ), MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), @@ -339,10 +341,14 @@ class Module_six_moves_urllib_parse(_LazyModule): MovedAttribute("quote_plus", "urllib", "urllib.parse"), MovedAttribute("unquote", "urllib", "urllib.parse"), MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute( + "unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes" + ), MovedAttribute("urlencode", "urllib", "urllib.parse"), MovedAttribute("splitquery", "urllib", "urllib.parse"), MovedAttribute("splittag", "urllib", "urllib.parse"), MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), MovedAttribute("uses_params", "urlparse", "urllib.parse"), @@ -424,6 +430,8 @@ class Module_six_moves_urllib_request(_LazyModule): MovedAttribute("URLopener", "urllib", "urllib.request"), MovedAttribute("FancyURLopener", "urllib", "urllib.request"), MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), ] for attr in _urllib_request_moved_attributes: setattr(Module_six_moves_urllib_request, attr.name, attr) @@ -665,6 +673,7 @@ def u(s): StringIO = io.StringIO BytesIO = io.BytesIO + del io _assertCountEqual = "assertCountEqual" if sys.version_info[1] <= 1: _assertRaisesRegex = "assertRaisesRegexp" @@ -718,11 +727,15 @@ def assertRegex(self, *args, **kwargs): exec_ = getattr(moves.builtins, "exec") def reraise(tp, value, tb=None): - if value is None: - value = tp() - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None else: @@ -741,7 +754,10 @@ def exec_(_code_, _globs_=None, _locs_=None): exec_( """def reraise(tp, value, tb=None): - raise tp, value, tb + try: + raise tp, value, tb + finally: + tb = None """ ) @@ -749,15 +765,21 @@ def exec_(_code_, _globs_=None, _locs_=None): if sys.version_info[:2] == (3, 2): exec_( """def raise_from(value, from_value): - if from_value is None: - raise value - raise value from from_value + try: + if from_value is None: + raise value + raise value from from_value + finally: + value = None """ ) elif sys.version_info[:2] > (3, 2): exec_( """def raise_from(value, from_value): - raise value from from_value + try: + raise value from from_value + finally: + value = None """ ) else: @@ -864,10 +886,14 @@ def with_metaclass(meta, *bases): # This requires a bit of explanation: the basic idea is to make a dummy # metaclass for one level of class instantiation that replaces itself with # the actual metaclass. - class metaclass(meta): + class metaclass(type): def __new__(cls, name, this_bases, d): return meta(name, bases, d) + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) + return type.__new__(metaclass, "temporary_class", (), {}) @@ -884,11 +910,71 @@ def wrapper(cls): orig_vars.pop(slots_var) orig_vars.pop("__dict__", None) orig_vars.pop("__weakref__", None) + if hasattr(cls, "__qualname__"): + orig_vars["__qualname__"] = cls.__qualname__ return metaclass(cls.__name__, cls.__bases__, orig_vars) return wrapper +def ensure_binary(s, encoding="utf-8", errors="strict"): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, text_type): + return s.encode(encoding, errors) + elif isinstance(s, binary_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + +def ensure_str(s, encoding="utf-8", errors="strict"): + """Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + if PY2 and isinstance(s, text_type): + s = s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + s = s.decode(encoding, errors) + return s + + +def ensure_text(s, encoding="utf-8", errors="strict"): + """Coerce *s* to six.text_type. + + For Python 2: + - `unicode` -> `unicode` + - `str` -> `unicode` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if isinstance(s, binary_type): + return s.decode(encoding, errors) + elif isinstance(s, text_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + def python_2_unicode_compatible(klass): """ A decorator that defines __unicode__ and __str__ methods under Python 2. diff --git a/src/urllib3/util/ssl_.py b/src/urllib3/util/ssl_.py index 7a6ae168..a3cc5103 100644 --- a/src/urllib3/util/ssl_.py +++ b/src/urllib3/util/ssl_.py @@ -3,18 +3,17 @@ import logging import warnings import hmac -import re from binascii import hexlify, unhexlify from hashlib import md5, sha1, sha256 +from .url import IPV4_RE, BRACELESS_IPV6_ADDRZ_RE from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning from ..packages.ssl_match_hostname import ( match_hostname as _match_hostname, CertificateError, ) from ..packages import six -from ..packages.rfc3986 import abnf_regexp SSLContext = None @@ -44,13 +43,6 @@ def _const_compare_digest_backport(a, b): _const_compare_digest = getattr(hmac, "compare_digest", _const_compare_digest_backport) -# Borrow rfc3986's regular expressions for IPv4 -# and IPv6 addresses for use in is_ipaddress() -_IP_ADDRESS_REGEX = re.compile( - r"^(?:%s|%s|%s)$" - % (abnf_regexp.IPv4_RE, abnf_regexp.IPv6_RE, abnf_regexp.IPv6_ADDRZ_RFC4007_RE) -) - try: # Test for SSL features import ssl from ssl import wrap_socket, CERT_REQUIRED @@ -445,7 +437,7 @@ def match_hostname(cert, asserted_hostname): try: _match_hostname(cert, asserted_hostname) except CertificateError as e: - log.error( + log.warning( "Certificate did not match expected hostname: %s. " "Certificate: %s", asserted_hostname, cert, @@ -466,7 +458,7 @@ def is_ipaddress(hostname): if six.PY3 and isinstance(hostname, bytes): # IDN A-label bytes are ASCII compatible. hostname = hostname.decode("ascii") - return _IP_ADDRESS_REGEX.match(hostname) is not None + return bool(IPV4_RE.match(hostname) or BRACELESS_IPV6_ADDRZ_RE.match(hostname)) def _is_key_file_encrypted(key_file): diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index f225cd8b..73c39319 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -3,10 +3,7 @@ from collections import namedtuple from ..exceptions import LocationParseError -from ..packages import six, rfc3986 -from ..packages.rfc3986.exceptions import RFC3986Exception, ValidationError -from ..packages.rfc3986.validators import Validator -from ..packages.rfc3986 import abnf_regexp, normalizers, compat, misc +from ..packages import six url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"] @@ -15,12 +12,68 @@ # urllib3 infers URLs without a scheme (None) to be http. NORMALIZABLE_SCHEMES = ("http", "https", None) -# Regex for detecting URLs with schemes. RFC 3986 Section 3.1 -SCHEME_REGEX = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+\-]*:|/)") +# Almost all of these patterns were derived from the +# 'rfc3986' module: https://github.com/python-hyper/rfc3986 +PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}") +SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)") +URI_RE = re.compile( + r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" + r"(?://([^/?#]*))?" + r"([^?#]*)" + r"(?:\?([^#]*))?" + r"(?:#(.*))?$", + re.UNICODE | re.DOTALL, +) + +IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}" +HEX_PAT = "[0-9A-Fa-f]{1,4}" +LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT) +_subs = {"hex": HEX_PAT, "ls32": LS32_PAT} +_variations = [ + # 6( h16 ":" ) ls32 + "(?:%(hex)s:){6}%(ls32)s", + # "::" 5( h16 ":" ) ls32 + "::(?:%(hex)s:){5}%(ls32)s", + # [ h16 ] "::" 4( h16 ":" ) ls32 + "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s", + # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s", + # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s", + # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s", + # [ *4( h16 ":" ) h16 ] "::" ls32 + "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s", + # [ *5( h16 ":" ) h16 ] "::" h16 + "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s", + # [ *6( h16 ":" ) h16 ] "::" + "(?:(?:%(hex)s:){0,6}%(hex)s)?::", +] + +UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-" +IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")" +ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" +IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]" +REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" + +IPV4_RE = re.compile("^" + IPV4_PAT + "$") +IPV6_RE = re.compile("^" + IPV6_PAT + "$") +IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$") +BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") +ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") + +SUBAUTHORITY_PAT = (u"^(?:(.*)@)?" u"(%s|%s|%s)" u"(?::([0-9]{0,5}))?$") % ( + REG_NAME_PAT, + IPV4_PAT, + IPV6_ADDRZ_PAT, +) +SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL) -PATH_CHARS = ( - abnf_regexp.UNRESERVED_CHARS_SET | abnf_regexp.SUB_DELIMITERS_SET | {":", "@", "/"} +ZONE_ID_CHARS = set( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789._!-" ) +USERINFO_CHARS = ZONE_ID_CHARS | set("$&'()*+,;=:") +PATH_CHARS = USERINFO_CHARS | {"@", "/"} QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"} @@ -154,20 +207,24 @@ def split_first(s, delims): def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): """Percent-encodes a URI component without reapplying - onto an already percent-encoded component. Based on - rfc3986.normalizers.encode_component() + onto an already percent-encoded component. """ if component is None: return component + component = six.ensure_text(component) + # Try to see if the component we're encoding is already percent-encoded # so we can skip all '%' characters but still encode all others. - percent_encodings = len( - normalizers.PERCENT_MATCHER.findall(compat.to_str(component, encoding)) - ) + percent_encodings = PERCENT_RE.findall(component) + + # Normalize existing percent-encoded bytes. + for enc in percent_encodings: + if not enc.isupper(): + component = component.replace(enc, enc.upper()) uri_bytes = component.encode("utf-8", "surrogatepass") - is_percent_encoded = percent_encodings == uri_bytes.count(b"%") + is_percent_encoded = len(percent_encodings) == uri_bytes.count(b"%") encoded_component = bytearray() @@ -180,17 +237,96 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): ): encoded_component.extend(byte) continue - encoded_component.extend("%{0:02x}".format(byte_ord).encode().upper()) + encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper())) return encoded_component.decode(encoding) +def _remove_path_dot_segments(path): + # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code + segments = path.split("/") # Turn the path into a list of segments + output = [] # Initialize the variable to use to store output + + for segment in segments: + # '.' is the current directory, so ignore it, it is superfluous + if segment == ".": + continue + # Anything other than '..', should be appended to the output + elif segment != "..": + output.append(segment) + # In this case segment == '..', if we can, we should pop the last + # element + elif output: + output.pop() + + # If the path starts with '/' and the output is empty or the first string + # is non-empty + if path.startswith("/") and (not output or output[0]): + output.insert(0, "") + + # If the path starts with '/.' or '/..' ensure we add one more empty + # string to add a trailing '/' + if path.endswith(("/.", "/..")): + output.append("") + + return "/".join(output) + + +def _normalize_host(host, scheme): + if host: + if isinstance(host, six.binary_type): + host = six.ensure_str(host) + + if scheme in NORMALIZABLE_SCHEMES: + is_ipv6 = IPV6_ADDRZ_RE.match(host) + if is_ipv6: + match = ZONE_ID_RE.search(host) + if match: + start, end = match.span(1) + zone_id = host[start:end] + + if zone_id.startswith("%25") and zone_id != "%25": + zone_id = zone_id[3:] + else: + zone_id = zone_id[1:] + zone_id = "%" + _encode_invalid_chars(zone_id, ZONE_ID_CHARS) + return host[:start].lower() + zone_id + host[end:] + else: + return host.lower() + elif not IPV4_RE.match(host): + return six.ensure_str( + b".".join([_idna_encode(label) for label in host.split(".")]) + ) + return host + + +def _idna_encode(name): + if name and any([ord(x) > 128 for x in name]): + try: + import idna + except ImportError: + six.raise_from( + LocationParseError("Unable to parse URL without the 'idna' module"), + None, + ) + try: + return idna.encode(name.lower(), strict=True, std3_rules=True) + except idna.IDNAError: + six.raise_from( + LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None + ) + return name.lower().encode("ascii") + + def parse_url(url): """ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is performed to parse incomplete urls. Fields not provided will be None. This parser is RFC 3986 compliant. + The parser logic and helper functions are based heavily on + work done in the ``rfc3986`` module. + :param str url: URL to parse into a :class:`.Url` namedtuple. Partly backwards-compatible with :mod:`urlparse`. @@ -208,90 +344,72 @@ def parse_url(url): # Empty return Url() - is_string = not isinstance(url, six.binary_type) - - # RFC 3986 doesn't like URLs that have a host but don't start - # with a scheme and we support URLs like that so we need to - # detect that problem and add an empty scheme indication. - # We don't get hurt on path-only URLs here as it's stripped - # off and given an empty scheme anyways. - if not SCHEME_REGEX.search(url): + source_url = url + if not SCHEME_RE.search(url): url = "//" + url - def idna_encode(name): - if name and any([ord(x) > 128 for x in name]): - try: - import idna - except ImportError: - raise LocationParseError( - "Unable to parse URL without the 'idna' module" - ) - try: - return idna.encode(name.lower(), strict=True, std3_rules=True) - except idna.IDNAError: - raise LocationParseError(u"Name '%s' is not a valid IDNA label" % name) - return name - - try: - split_iri = misc.IRI_MATCHER.match(compat.to_str(url)).groupdict() - iri_ref = rfc3986.IRIReference( - split_iri["scheme"], - split_iri["authority"], - _encode_invalid_chars(split_iri["path"], PATH_CHARS), - _encode_invalid_chars(split_iri["query"], QUERY_CHARS), - _encode_invalid_chars(split_iri["fragment"], FRAGMENT_CHARS), - ) - has_authority = iri_ref.authority is not None - uri_ref = iri_ref.encode(idna_encoder=idna_encode) - except (ValueError, RFC3986Exception): - return six.raise_from(LocationParseError(url), None) - - # rfc3986 strips the authority if it's invalid - if has_authority and uri_ref.authority is None: - raise LocationParseError(url) - - # Only normalize schemes we understand to not break http+unix - # or other schemes that don't follow RFC 3986. - if uri_ref.scheme is None or uri_ref.scheme.lower() in NORMALIZABLE_SCHEMES: - uri_ref = uri_ref.normalize() - - # Validate all URIReference components and ensure that all - # components that were set before are still set after - # normalization has completed. - validator = Validator() try: - validator.check_validity_of(*validator.COMPONENT_NAMES).validate(uri_ref) - except ValidationError: - return six.raise_from(LocationParseError(url), None) + scheme, authority, path, query, fragment = URI_RE.match(url).groups() + normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES + + if scheme: + scheme = scheme.lower() + + if authority: + auth, host, port = SUBAUTHORITY_RE.match(authority).groups() + if auth and normalize_uri: + auth = _encode_invalid_chars(auth, USERINFO_CHARS) + if port == "": + port = None + else: + auth, host, port = None, None, None + + if port is not None: + port = int(port) + if not (0 <= port <= 65535): + raise LocationParseError(url) + + host = _normalize_host(host, scheme) + + if normalize_uri and path: + path = _remove_path_dot_segments(path) + path = _encode_invalid_chars(path, PATH_CHARS) + if normalize_uri and query: + query = _encode_invalid_chars(query, QUERY_CHARS) + if normalize_uri and fragment: + fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS) + + except (ValueError, AttributeError): + return six.raise_from(LocationParseError(source_url), None) # For the sake of backwards compatibility we put empty # string values for path if there are any defined values # beyond the path in the URL. # TODO: Remove this when we break backwards compatibility. - path = uri_ref.path if not path: - if uri_ref.query is not None or uri_ref.fragment is not None: + if query is not None or fragment is not None: path = "" else: path = None # Ensure that each part of the URL is a `str` for # backwards compatibility. - def to_input_type(x): - if x is None: - return None - elif not is_string and not isinstance(x, six.binary_type): - return x.encode("utf-8") - return x + if isinstance(url, six.text_type): + ensure_func = six.ensure_text + else: + ensure_func = six.ensure_str + + def ensure_type(x): + return x if x is None else ensure_func(x) return Url( - scheme=to_input_type(uri_ref.scheme), - auth=to_input_type(uri_ref.userinfo), - host=to_input_type(uri_ref.host), - port=int(uri_ref.port) if uri_ref.port is not None else None, - path=to_input_type(path), - query=to_input_type(uri_ref.query), - fragment=to_input_type(uri_ref.fragment), + scheme=ensure_type(scheme), + auth=ensure_type(auth), + host=ensure_type(host), + port=port, + path=ensure_type(path), + query=ensure_type(query), + fragment=ensure_type(fragment), ) diff --git a/test/appengine/test_gae_manager.py b/test/appengine/test_gae_manager.py index 572c387b..c41ec141 100644 --- a/test/appengine/test_gae_manager.py +++ b/test/appengine/test_gae_manager.py @@ -37,7 +37,7 @@ def _absolute_url(self, path): # that URLFetch is used by the connection manager. @pytest.mark.usefixtures("testbed") class TestGAEConnectionManager(test_connectionpool.TestConnectionPool): - def setUp(self): + def setup_method(self, method): self.manager = appengine.AppEngineManager() self.pool = MockPool(self.host, self.port, self.manager) @@ -105,7 +105,7 @@ def test_exceptions(self): @pytest.mark.usefixtures("testbed") class TestGAEConnectionManagerWithSSL(dummyserver.testcase.HTTPSDummyServerTestCase): - def setUp(self): + def setup_method(self, method): self.manager = appengine.AppEngineManager() self.pool = MockPool(self.host, self.port, self.manager, "https") @@ -119,7 +119,7 @@ def test_exceptions(self): @pytest.mark.usefixtures("testbed") class TestGAERetry(test_connectionpool.TestRetry): - def setUp(self): + def setup_method(self, method): self.manager = appengine.AppEngineManager() self.pool = MockPool(self.host, self.port, self.manager) @@ -159,7 +159,7 @@ def test_retry_return_in_response(self): @pytest.mark.usefixtures("testbed") class TestGAERetryAfter(test_connectionpool.TestRetryAfter): - def setUp(self): + def setup_method(self, method): # Disable urlfetch which doesn't respect Retry-After header. self.manager = appengine.AppEngineManager(urlfetch_retries=False) self.pool = MockPool(self.host, self.port, self.manager) diff --git a/test/appengine/test_urlfetch.py b/test/appengine/test_urlfetch.py index 056de924..2e727db0 100644 --- a/test/appengine/test_urlfetch.py +++ b/test/appengine/test_urlfetch.py @@ -4,7 +4,6 @@ import httplib import StringIO -import unittest from mock import patch import pytest @@ -45,7 +44,7 @@ def test_urlfetch_called_with_http(self): @pytest.mark.usefixtures("sandbox") -class TestHTTPS(unittest.TestCase): +class TestHTTPS(object): @pytest.mark.xfail( reason="This is not yet supported by urlfetch, presence of the ssl " "module will bypass urlfetch." diff --git a/test/contrib/test_pyopenssl.py b/test/contrib/test_pyopenssl.py index e4680594..abbf387d 100644 --- a/test/contrib/test_pyopenssl.py +++ b/test/contrib/test_pyopenssl.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import os -import unittest import mock import pytest @@ -54,7 +53,7 @@ def teardown_module(): ) -class TestPyOpenSSLHelpers(unittest.TestCase): +class TestPyOpenSSLHelpers(object): """ Tests for PyOpenSSL helper functions. """ diff --git a/test/contrib/test_pyopenssl_dependencies.py b/test/contrib/test_pyopenssl_dependencies.py index c5e93e06..bbb5833d 100644 --- a/test/contrib/test_pyopenssl_dependencies.py +++ b/test/contrib/test_pyopenssl_dependencies.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import unittest import pytest from mock import patch, Mock @@ -28,7 +27,7 @@ def teardown_module(): pass -class TestPyOpenSSLInjection(unittest.TestCase): +class TestPyOpenSSLInjection(object): """ Tests for error handling in pyopenssl's 'inject_into urllib3' """ diff --git a/test/test_connection.py b/test/test_connection.py index e86c4c17..73eb633b 100644 --- a/test/test_connection.py +++ b/test/test_connection.py @@ -35,7 +35,7 @@ def test_match_hostname_mismatch(self): cert = {"subjectAltName": [("DNS", "foo")]} asserted_hostname = "bar" try: - with mock.patch("urllib3.util.ssl_.log.error") as mock_log: + with mock.patch("urllib3.util.ssl_.log.warning") as mock_log: match_hostname(cert, asserted_hostname) except CertificateError as e: assert "hostname 'bar' doesn't match 'foo'" in str(e) diff --git a/test/test_no_ssl.py b/test/test_no_ssl.py index 7d1a129f..bddd2526 100644 --- a/test/test_no_ssl.py +++ b/test/test_no_ssl.py @@ -6,7 +6,6 @@ """ import sys -import unittest import pytest @@ -64,15 +63,16 @@ def pop(self): module_stash = ModuleStash("urllib3") -class TestWithoutSSL(unittest.TestCase): - def setUp(self): +class TestWithoutSSL(object): + @classmethod + def setup_class(self): sys.modules.pop("ssl", None) sys.modules.pop("_ssl", None) module_stash.stash() sys.meta_path.insert(0, ssl_blocker) - def tearDown(self): + def teardown_class(self): sys.meta_path.remove(ssl_blocker) module_stash.pop() diff --git a/test/test_response.py b/test/test_response.py index fa89ab86..8626d7e0 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -433,7 +433,7 @@ def test_deflate_streaming_tell_intermediate_point(self): class MockCompressedDataReading(BytesIO): """ - A ByteIO-like reader returning ``payload`` in ``NUMBER_OF_READS`` + A BytesIO-like reader returning ``payload`` in ``NUMBER_OF_READS`` calls to ``read``. """ diff --git a/test/test_sync_connection.py b/test/test_sync_connection.py index 301c92f0..1ff15064 100644 --- a/test/test_sync_connection.py +++ b/test/test_sync_connection.py @@ -11,7 +11,6 @@ import errno import socket import ssl -import unittest import h11 @@ -169,7 +168,7 @@ def close(self): self._closed = True -class TestUnusualSocketConditions(unittest.TestCase): +class TestUnusualSocketConditions: """ This class contains tests that take strict control over sockets and selectors. The goal here is to simulate unusual network conditions that are diff --git a/test/test_util.py b/test/test_util.py index b32df789..401fc2b2 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -155,10 +155,6 @@ def test_invalid_host(self, location): @pytest.mark.parametrize( "url", [ - "http://user\\@google.com", - "http://google\\.com", - "user\\@google.com", - "http://user@user@google.com/", # Invalid IDNA labels u"http://\uD7FF.com", u"http://❤️", @@ -181,7 +177,12 @@ def test_invalid_url(self, url): ), ("HTTPS://Example.Com/?Key=Value", "https://example.com/?Key=Value"), ("Https://Example.Com/#Fragment", "https://example.com/#Fragment"), - ("[::Ff%etH0%Ff]/%ab%Af", "[::ff%25etH0%Ff]/%AB%AF"), + ("[::1%25]", "[::1%25]"), + ("[::Ff%etH0%Ff]/%ab%Af", "[::ff%etH0%FF]/%AB%AF"), + ( + "http://user:pass@[AaAa::Ff%25etH0%Ff]/%ab%Af", + "http://user:pass@[aaaa::ff%etH0%FF]/%AB%AF", + ), # Invalid characters for the query/fragment getting encoded ( 'http://google.com/p[]?parameter[]="hello"#fragment#', @@ -200,6 +201,22 @@ def test_parse_url_normalization(self, url, expected_normalized_url): actual_normalized_url = parse_url(url).url assert actual_normalized_url == expected_normalized_url + @pytest.mark.parametrize("char", [chr(i) for i in range(0x00, 0x21)] + ["\x7F"]) + def test_control_characters_are_percent_encoded(self, char): + percent_char = "%" + (hex(ord(char))[2:].zfill(2).upper()) + url = parse_url( + "http://user{0}@example.com/path{0}?query{0}#fragment{0}".format(char) + ) + + assert url == Url( + "http", + auth="user" + percent_char, + host="example.com", + path="/path" + percent_char, + query="query" + percent_char, + fragment="fragment" + percent_char, + ) + parse_url_host_map = [ ("http://google.com/mail", Url("http", host="google.com", path="/mail")), ("http://google.com/mail/", Url("http", host="google.com", path="/mail/")), @@ -261,6 +278,15 @@ def test_parse_url_normalization(self, url, expected_normalized_url): u"http://Königsgäßchen.de/straße", Url("http", host="xn--knigsgchen-b4a3dun.de", path="/stra%C3%9Fe"), ), + # Percent-encode in userinfo + ( + u"http://user@email.com:password@example.com/", + Url("http", auth="user%40email.com:password", host="example.com", path="/"), + ), + ( + u'http://user":quoted@example.com/', + Url("http", auth="user%22:quoted", host="example.com", path="/"), + ), # Unicode Surrogates (u"http://google.com/\uD800", Url("http", host="google.com", path="%ED%A0%80")), ( diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 97d05e81..cf1c2e52 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -1,7 +1,6 @@ import logging import socket import sys -import unittest import time import warnings import pytest @@ -227,10 +226,10 @@ def test_create_connection_timeout(self): class TestConnectionPool(HTTPDummyServerTestCase): - def setUp(self): + def setup_method(self, method): self.pool = HTTPConnectionPool(self.host, self.port) - def tearDown(self): + def teardown_method(self): self.pool.close() def test_get(self): @@ -715,7 +714,3 @@ def test_mixed_case_hostname(self): with HTTPConnectionPool("LoCaLhOsT", self.port) as pool: response = pool.request("GET", "http://LoCaLhOsT:%d/" % self.port) assert response.status == 200 - - -if __name__ == "__main__": - unittest.main() diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index f77232e9..f70b381c 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -3,7 +3,6 @@ import logging import ssl import sys -import unittest import warnings import mock @@ -83,10 +82,10 @@ class TestHTTPS(HTTPSDummyServerTestCase): tls_protocol_name = None - def setUp(self): + def setup_method(self, method): self._pool = HTTPSConnectionPool(self.host, self.port, ca_certs=DEFAULT_CA) - def tearDown(self): + def teardown_method(self, method): self._pool.close() def test_simple(self): @@ -706,7 +705,3 @@ def test_can_validate_ipv6_san(self): ) as https_pool: r = https_pool.request("GET", "/") assert r.status == 200 - - -if __name__ == "__main__": - unittest.main() diff --git a/test/with_dummyserver/test_poolmanager.py b/test/with_dummyserver/test_poolmanager.py index 0246cec4..cf7a94e6 100644 --- a/test/with_dummyserver/test_poolmanager.py +++ b/test/with_dummyserver/test_poolmanager.py @@ -1,4 +1,3 @@ -import unittest import io import json import time @@ -14,7 +13,9 @@ class TestPoolManager(HTTPDummyServerTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestPoolManager, self).setup_class() self.base_url = "http://%s:%d" % (self.host, self.port) self.base_url_alt = "http://%s:%d" % (self.host_alt, self.port) @@ -360,7 +361,9 @@ def test_cleanup_on_connection_error(self): class TestRetry(HTTPDummyServerTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestRetry, self).setup_class() self.base_url = "http://%s:%d" % (self.host, self.port) self.base_url_alt = "http://%s:%d" % (self.host_alt, self.port) @@ -587,7 +590,9 @@ def test_redirect_put_file(self): class TestRetryAfter(HTTPDummyServerTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestRetryAfter, self).setup_class() self.base_url = "http://%s:%d" % (self.host, self.port) self.base_url_alt = "http://%s:%d" % (self.host_alt, self.port) @@ -661,7 +666,8 @@ def test_redirect_after(self): class TestFileBodiesOnRetryOrRedirect(HTTPDummyServerTestCase): - def setUp(self): + def setup_class(self): + super(TestFileBodiesOnRetryOrRedirect, self).setup_class() self.base_url = "http://%s:%d" % (self.host, self.port) self.base_url_alt = "http://%s:%d" % (self.host_alt, self.port) @@ -711,13 +717,11 @@ def tell(self): @pytest.mark.skipif(not HAS_IPV6, reason="IPv6 is not supported on this system") class TestIPv6PoolManager(IPv6HTTPDummyServerTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestIPv6PoolManager, self).setup_class() self.base_url = "http://[%s]:%d" % (self.host, self.port) def test_ipv6(self): with PoolManager() as http: http.request("GET", self.base_url) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py index 3482a8e1..0154e4b2 100644 --- a/test/with_dummyserver/test_proxy_poolmanager.py +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -1,5 +1,4 @@ import json -import unittest import pytest @@ -14,7 +13,9 @@ class TestHTTPProxyManager(HTTPDummyProxyTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestHTTPProxyManager, self).setup_class() self.http_url = "http://%s:%d" % (self.http_host, self.http_port) self.http_url_alt = "http://%s:%d" % (self.http_host_alt, self.http_port) self.https_url = "https://%s:%d" % (self.https_host, self.https_port) @@ -351,7 +352,9 @@ def test_scheme_host_case_insensitive(self): class TestIPv6HTTPProxyManager(IPv6HTTPDummyProxyTestCase): - def setUp(self): + @classmethod + def setup_class(self): + HTTPDummyProxyTestCase.setup_class() self.http_url = "http://%s:%d" % (self.http_host, self.http_port) self.http_url_alt = "http://%s:%d" % (self.http_host_alt, self.http_port) self.https_url = "https://%s:%d" % (self.https_host, self.https_port) @@ -365,7 +368,3 @@ def test_basic_ipv6_proxy(self): r = http.request("GET", "%s/" % self.https_url) assert r.status == 200 - - -if __name__ == "__main__": - unittest.main()