From c02698afebfaf057003d0d9e7f53e5dca419ed58 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 24 Jun 2019 20:16:56 -0500 Subject: [PATCH 01/11] Don't install gcp-devrel-py-tools on Windows+Python 3.4 (#1638) --- _travis/upload_coverage.sh | 4 ++-- dev-requirements.txt | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/_travis/upload_coverage.sh b/_travis/upload_coverage.sh index da47a7c1..d153cc15 100755 --- a/_travis/upload_coverage.sh +++ b/_travis/upload_coverage.sh @@ -3,6 +3,6 @@ set -exo pipefail if [[ -e .coverage ]]; then - python3 -m pip install codecov - codecov --env TRAVIS_OS_NAME,NOX_SESSION + python3.6 -m pip install codecov + python3.6 -m codecov --env TRAVIS_OS_NAME,NOX_SESSION fi diff --git a/dev-requirements.txt b/dev-requirements.txt index 2592fe2c..95cdf8d0 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -14,4 +14,6 @@ lazy-object-proxy==1.4.0 # https://github.com/GoogleCloudPlatform/python-repo-tools/issues/23 pylint<2.0;python_version<="2.7" -gcp-devrel-py-tools + +# Because typed-ast doesn't provide Python 3.4+Windows wheels +gcp-devrel-py-tools;python_version>='3.5' or sys_platform != 'win32' From cdfc9a539cc27f5704f8bcd46d34301b3d218aff Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 24 Jun 2019 20:42:44 -0500 Subject: [PATCH 02/11] Remove the Makefile as it's not maintained (#1629) --- Makefile | 70 -------------------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 Makefile diff --git a/Makefile b/Makefile deleted file mode 100644 index e62952bd..00000000 --- a/Makefile +++ /dev/null @@ -1,70 +0,0 @@ -REQUIREMENTS_FILE=dev-requirements.txt -REQUIREMENTS_OUT=dev-requirements.txt.log -SETUP_OUT=*.egg-info - -.PHONY: all -all: setup requirements - -.PHONY: virtualenv -virtualenv: -ifndef VIRTUAL_ENV - $(error Must be run inside of a virtualenv) -endif - -.PHONY: setup -setup: virtualenv $(SETUP_OUT) - -.PHONY: $(SETUP_OUT) -$(SETUP_OUT): setup.py setup.cfg - python setup.py develop - touch $(SETUP_OUT) - -.PHONY: requirements -requirements: setup $(REQUIREMENTS_OUT) - -.PHONY: piprot -piprot: setup - pip install piprot - piprot -x $(REQUIREMENTS_FILE) - -.PHONY: $(REQUIREMENTS_OUT) -$(REQUIREMENTS_OUT): $(REQUIREMENTS_FILE) - pip install -r $(REQUIREMENTS_FILE) | tee -a $(REQUIREMENTS_OUT) - python setup.py develop - -.PHONY: clean -clean: - find . -name "*.py[oc]" -delete - find . -name "__pycache__" -delete - rm -f $(REQUIREMENTS_OUT) - rm -rf docs/_build build/ dist/ - -.PHONY: test -test: requirements - tox - -.PHONY: test-quick -test-quick: requirements - tox -e py36 - tox -e py27 - -.PHONY: test-all -test-all: requirements - tox - -.PHONY: test-gae -test-gae: requirements -ifndef GAE_PYTHONPATH - $(error GAE_PYTHONPATH must be set) -endif - tox -e gae - -.PHONY: docs -docs: - tox -e docs - -.PHONY: release -release: - ./release.sh - - From 19ff80fcd085a6a60a55ca0c526a9977ec8014e3 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 3 Jul 2019 08:27:00 -0500 Subject: [PATCH 03/11] Update RECENT_DATE to 2019-1-1 (#1645) --- src/urllib3/connection.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py index 27bc1867..a138bb2a 100644 --- a/src/urllib3/connection.py +++ b/src/urllib3/connection.py @@ -54,11 +54,9 @@ class ConnectionError(Exception): port_by_scheme = {"http": 80, "https": 443} -# When updating RECENT_DATE, move it to within two years of the current date, -# and not less than 6 months ago. -# Example: if Today is 2018-01-01, then RECENT_DATE should be any date on or -# after 2016-01-01 (today - 2 years) AND before 2017-07-01 (today - 6 months) -RECENT_DATE = datetime.date(2017, 6, 30) +# When it comes time to update this value as a part of regular maintenance +# (ie test_recent_date is failing) update it to ~6 months before the current date. +RECENT_DATE = datetime.date(2019, 1, 1) class DummyConnection(object): From 2c055b4a60affa3529660b928fff375fde3993f7 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 4 Jul 2019 12:47:10 -0500 Subject: [PATCH 04/11] Also skip dotted FQDN tests on TRAVIS_INFRA=unknown (#1648) --- test/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/__init__.py b/test/__init__.py index ebaec6f5..31720c59 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -186,7 +186,7 @@ def fails_on_travis_gce(test): @functools.wraps(test) def wrapper(*args, **kwargs): - if os.environ.get("TRAVIS_INFRA") == "gce": + if os.environ.get("TRAVIS_INFRA") in ("gce", "unknown"): pytest.xfail("%s is expected to fail on Travis GCE builds" % test.__name__) return test(*args, **kwargs) From 1ef62aba7a813c33ddf1141c233de290d2e06ca8 Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Sat, 6 Jul 2019 03:25:41 +0400 Subject: [PATCH 05/11] Stop using unittest (#1649) --- dev-requirements.txt | 5 ++--- dummyserver/testcase.py | 19 +++++++++---------- setup.cfg | 1 + test/appengine/test_gae_manager.py | 8 ++++---- test/appengine/test_urlfetch.py | 3 +-- test/contrib/test_pyopenssl.py | 3 +-- test/contrib/test_pyopenssl_dependencies.py | 3 +-- test/test_no_ssl.py | 8 ++++---- test/with_dummyserver/test_connectionpool.py | 5 ----- test/with_dummyserver/test_https.py | 5 ----- test/with_dummyserver/test_poolmanager.py | 13 ++++++------- .../test_proxy_poolmanager.py | 13 ++++++------- 12 files changed, 35 insertions(+), 51 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 95cdf8d0..9a576a23 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,9 +5,8 @@ wheel==0.30.0 tornado==5.1.1 PySocks==1.6.8 pkginfo==1.4.2 -pytest-timeout==1.3.1 -pytest==4.0.0 -pluggy==0.11.0 +pytest-timeout==1.3.3 +pytest==4.6.4 # https://github.com/ionelmc/python-lazy-object-proxy/issues/30 lazy-object-proxy==1.4.0 diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index bb3b07ad..bf83b6b4 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -1,5 +1,4 @@ import threading -import unittest import pytest from tornado import ioloop, web @@ -20,7 +19,7 @@ def consume_socket(sock, chunks=65536): pass -class SocketDummyServerTestCase(unittest.TestCase): +class SocketDummyServerTestCase(object): """ A simple socket-based server is created for this class that is good for exactly one request. @@ -67,7 +66,7 @@ def start_basic_handler(cls, **kw): ) @classmethod - def tearDownClass(cls): + def teardown_class(cls): if hasattr(cls, "server_thread"): cls.server_thread.join(0.1) @@ -101,10 +100,10 @@ def _start_server(cls, socket_handler): cls.port = cls.server_thread.port -class HTTPDummyServerTestCase(unittest.TestCase): +class HTTPDummyServerTestCase(object): """ A simple HTTP server that runs when your test class runs - Have your unittest class inherit from this one, and then a simple server + Have your test class inherit from this one, and then a simple server will start when your tests run, and automatically shut down when they complete. For examples of what test requests you can send to the server, see the TestingApp in dummyserver/handlers.py. @@ -131,11 +130,11 @@ def _stop_server(cls): cls.server_thread.join() @classmethod - def setUpClass(cls): + def setup_class(cls): cls._start_server() @classmethod - def tearDownClass(cls): + def teardown_class(cls): cls._stop_server() @@ -150,7 +149,7 @@ class IPV6HTTPSDummyServerTestCase(HTTPSDummyServerTestCase): host = "::1" -class HTTPDummyProxyTestCase(unittest.TestCase): +class HTTPDummyProxyTestCase(object): http_host = "localhost" http_host_alt = "127.0.0.1" @@ -163,7 +162,7 @@ class HTTPDummyProxyTestCase(unittest.TestCase): proxy_host_alt = "127.0.0.1" @classmethod - def setUpClass(cls): + def setup_class(cls): cls.io_loop = ioloop.IOLoop.current() app = web.Application([(r".*", TestingApp)]) @@ -184,7 +183,7 @@ def setUpClass(cls): cls.server_thread = run_loop_in_thread(cls.io_loop) @classmethod - def tearDownClass(cls): + def teardown_class(cls): cls.io_loop.add_callback(cls.http_server.stop) cls.io_loop.add_callback(cls.https_server.stop) cls.io_loop.add_callback(cls.proxy_server.stop) diff --git a/setup.cfg b/setup.cfg index 16d64d20..35958347 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,3 +23,4 @@ requires-dist = [tool:pytest] xfail_strict = true +python_classes = Test *TestCase diff --git a/test/appengine/test_gae_manager.py b/test/appengine/test_gae_manager.py index 572c387b..c41ec141 100644 --- a/test/appengine/test_gae_manager.py +++ b/test/appengine/test_gae_manager.py @@ -37,7 +37,7 @@ def _absolute_url(self, path): # that URLFetch is used by the connection manager. @pytest.mark.usefixtures("testbed") class TestGAEConnectionManager(test_connectionpool.TestConnectionPool): - def setUp(self): + def setup_method(self, method): self.manager = appengine.AppEngineManager() self.pool = MockPool(self.host, self.port, self.manager) @@ -105,7 +105,7 @@ def test_exceptions(self): @pytest.mark.usefixtures("testbed") class TestGAEConnectionManagerWithSSL(dummyserver.testcase.HTTPSDummyServerTestCase): - def setUp(self): + def setup_method(self, method): self.manager = appengine.AppEngineManager() self.pool = MockPool(self.host, self.port, self.manager, "https") @@ -119,7 +119,7 @@ def test_exceptions(self): @pytest.mark.usefixtures("testbed") class TestGAERetry(test_connectionpool.TestRetry): - def setUp(self): + def setup_method(self, method): self.manager = appengine.AppEngineManager() self.pool = MockPool(self.host, self.port, self.manager) @@ -159,7 +159,7 @@ def test_retry_return_in_response(self): @pytest.mark.usefixtures("testbed") class TestGAERetryAfter(test_connectionpool.TestRetryAfter): - def setUp(self): + def setup_method(self, method): # Disable urlfetch which doesn't respect Retry-After header. self.manager = appengine.AppEngineManager(urlfetch_retries=False) self.pool = MockPool(self.host, self.port, self.manager) diff --git a/test/appengine/test_urlfetch.py b/test/appengine/test_urlfetch.py index 056de924..2e727db0 100644 --- a/test/appengine/test_urlfetch.py +++ b/test/appengine/test_urlfetch.py @@ -4,7 +4,6 @@ import httplib import StringIO -import unittest from mock import patch import pytest @@ -45,7 +44,7 @@ def test_urlfetch_called_with_http(self): @pytest.mark.usefixtures("sandbox") -class TestHTTPS(unittest.TestCase): +class TestHTTPS(object): @pytest.mark.xfail( reason="This is not yet supported by urlfetch, presence of the ssl " "module will bypass urlfetch." diff --git a/test/contrib/test_pyopenssl.py b/test/contrib/test_pyopenssl.py index 56656dae..4f6e37cb 100644 --- a/test/contrib/test_pyopenssl.py +++ b/test/contrib/test_pyopenssl.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import os -import unittest import mock import pytest @@ -49,7 +48,7 @@ def teardown_module(): ) -class TestPyOpenSSLHelpers(unittest.TestCase): +class TestPyOpenSSLHelpers(object): """ Tests for PyOpenSSL helper functions. """ diff --git a/test/contrib/test_pyopenssl_dependencies.py b/test/contrib/test_pyopenssl_dependencies.py index c5e93e06..bbb5833d 100644 --- a/test/contrib/test_pyopenssl_dependencies.py +++ b/test/contrib/test_pyopenssl_dependencies.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import unittest import pytest from mock import patch, Mock @@ -28,7 +27,7 @@ def teardown_module(): pass -class TestPyOpenSSLInjection(unittest.TestCase): +class TestPyOpenSSLInjection(object): """ Tests for error handling in pyopenssl's 'inject_into urllib3' """ diff --git a/test/test_no_ssl.py b/test/test_no_ssl.py index 7d1a129f..bddd2526 100644 --- a/test/test_no_ssl.py +++ b/test/test_no_ssl.py @@ -6,7 +6,6 @@ """ import sys -import unittest import pytest @@ -64,15 +63,16 @@ def pop(self): module_stash = ModuleStash("urllib3") -class TestWithoutSSL(unittest.TestCase): - def setUp(self): +class TestWithoutSSL(object): + @classmethod + def setup_class(self): sys.modules.pop("ssl", None) sys.modules.pop("_ssl", None) module_stash.stash() sys.meta_path.insert(0, ssl_blocker) - def tearDown(self): + def teardown_class(self): sys.meta_path.remove(ssl_blocker) module_stash.pop() diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index 7dfdba81..601cc3b9 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -2,7 +2,6 @@ import logging import socket import sys -import unittest import time import warnings import pytest @@ -1159,7 +1158,3 @@ def test_pool_size_redirect(self): ) as pool: pool.urlopen("GET", "/redirect", preload_content=False) assert pool.num_connections == 1 - - -if __name__ == "__main__": - unittest.main() diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index 521c03e9..8f40e8b9 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -3,7 +3,6 @@ import logging import ssl import sys -import unittest import warnings import mock @@ -784,7 +783,3 @@ def test_can_validate_ipv6_san(self): ) as https_pool: r = https_pool.request("GET", "/") assert r.status == 200 - - -if __name__ == "__main__": - unittest.main() diff --git a/test/with_dummyserver/test_poolmanager.py b/test/with_dummyserver/test_poolmanager.py index 7aa7b036..dc348c39 100644 --- a/test/with_dummyserver/test_poolmanager.py +++ b/test/with_dummyserver/test_poolmanager.py @@ -1,4 +1,3 @@ -import unittest import json import pytest @@ -12,7 +11,9 @@ class TestPoolManager(HTTPDummyServerTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestPoolManager, self).setup_class() self.base_url = "http://%s:%d" % (self.host, self.port) self.base_url_alt = "http://%s:%d" % (self.host_alt, self.port) @@ -348,13 +349,11 @@ def test_http_with_ca_cert_dir(self): @pytest.mark.skipif(not HAS_IPV6, reason="IPv6 is not supported on this system") class TestIPv6PoolManager(IPv6HTTPDummyServerTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestIPv6PoolManager, self).setup_class() self.base_url = "http://[%s]:%d" % (self.host, self.port) def test_ipv6(self): with PoolManager() as http: http.request("GET", self.base_url) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py index eed47b1d..af91f04a 100644 --- a/test/with_dummyserver/test_proxy_poolmanager.py +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -1,6 +1,5 @@ import json import socket -import unittest import pytest @@ -15,7 +14,9 @@ class TestHTTPProxyManager(HTTPDummyProxyTestCase): - def setUp(self): + @classmethod + def setup_class(self): + super(TestHTTPProxyManager, self).setup_class() self.http_url = "http://%s:%d" % (self.http_host, self.http_port) self.http_url_alt = "http://%s:%d" % (self.http_host_alt, self.http_port) self.https_url = "https://%s:%d" % (self.https_host, self.https_port) @@ -368,7 +369,9 @@ def test_scheme_host_case_insensitive(self): class TestIPv6HTTPProxyManager(IPv6HTTPDummyProxyTestCase): - def setUp(self): + @classmethod + def setup_class(self): + HTTPDummyProxyTestCase.setup_class() self.http_url = "http://%s:%d" % (self.http_host, self.http_port) self.http_url_alt = "http://%s:%d" % (self.http_host_alt, self.http_port) self.https_url = "https://%s:%d" % (self.https_host, self.https_port) @@ -383,7 +386,3 @@ def test_basic_ipv6_proxy(self): r = http.request("GET", "%s/" % self.https_url) assert r.status == 200 - - -if __name__ == "__main__": - unittest.main() From 582b3cbee60f2d9411c4adb9de8f2b1da7a5044a Mon Sep 17 00:00:00 2001 From: Ratan Kulshreshtha Date: Sat, 6 Jul 2019 04:56:21 +0530 Subject: [PATCH 06/11] Change references to Makefile into Nox (#1643) --- docs/contributing.rst | 60 ++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index c28bc479..c66b28ad 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -13,6 +13,8 @@ If you wish to add a new feature or fix a bug: to start making your changes. #. Write a test which shows that the bug was fixed or that the feature works as expected. +#. Format your changes with black using command `$ nox -s blacken` and lint your + changes using command `nox -s lint`. #. Send a pull request and bug the maintainer until it gets merged and published. :) Make sure to add yourself to ``CONTRIBUTORS.txt``. @@ -20,47 +22,53 @@ If you wish to add a new feature or fix a bug: Setting up your development environment --------------------------------------- -It is recommended, and even enforced by the make file, that you use a -`virtualenv -`_:: +In order to setup the development environment all that you need is +`nox `_ installed in your machine:: - $ python3 -m venv venv3 - $ source venv3/bin/activate - $ pip install -r dev-requirements.txt + $ pip install --user --upgrade nox Running the tests ----------------- We use some external dependencies, multiple interpreters and code coverage -analysis while running test suite. Our ``Makefile`` handles much of this for -you as long as you're running it `inside of a virtualenv -`_:: - - $ make test-quick - [... magically installs dependencies and runs tests on your virtualenv] - Ran 182 tests in 1.633s - - OK (SKIP=6) - -There is also a make target for running all of our tests and multiple python +analysis while running test suite. Our ``noxfile.py`` handles much of this for +you:: + + $ nox --sessions test-2.7 test-3.7 + [ Nox will create virtualenv, install the specified dependencies, and run the commands in order.] + nox > Running session test-2.7 + ....... + ....... + nox > Session test-2.7 was successful. + ....... + ....... + nox > Running session test-3.7 + ....... + ....... + nox > Session test-3.7 was successful. + +There is also a nox command for running all of our tests and multiple python versions. - $ make test-all + $ nox --sessions test Note that code coverage less than 100% is regarded as a failing run. Some platform-specific tests are skipped unless run in that platform. To make sure the code works in all of urllib3's supported platforms, you can run our ``tox`` suite:: - $ make test-all - [... tox creates a virtualenv for every platform and runs tests inside of each] - py27: commands succeeded - py34: commands succeeded - py35: commands succeeded - py36: commands succeeded - py37: commands succeeded - pypy: commands succeeded + $ nox --sessions test + [ Nox will create virtualenv, install the specified dependencies, and run the commands in order.] + ....... + ....... + nox > Session test-2.7 was successful. + nox > Session test-3.4 was successful. + nox > Session test-3.5 was successful. + nox > Session test-3.6 was successful. + nox > Session test-3.7 was successful. + nox > Session test-3.8 was successful. + nox > Session test-pypy was successful. Our test suite `runs continuously on Travis CI `_ with every pull request. From c894210a893c1089169dc74bfec47da0121ed28e Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Fri, 5 Jul 2019 16:27:00 -0700 Subject: [PATCH 07/11] Remove spurious TypeError from exception chain also for BaseException. (#1637) --- CONTRIBUTORS.txt | 4 ++++ src/urllib3/connectionpool.py | 7 ++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 6f0362c3..0855f00a 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -278,5 +278,9 @@ In chronological order: * James Meickle * Improve handling of Retry-After header +* Chris Jerdonek + * Remove a spurious TypeError from the exception chain inside + HTTPConnectionPool._make_request(), also for BaseExceptions. + * [Your name or handle] <[email or website]> * [Brief summary of your changes] diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py index 77c31589..ee53c4b8 100644 --- a/src/urllib3/connectionpool.py +++ b/src/urllib3/connectionpool.py @@ -409,9 +409,10 @@ def _make_request( # Python 3 try: httplib_response = conn.getresponse() - except Exception as e: - # Remove the TypeError from the exception chain in Python 3; - # otherwise it looks like a programming error was the cause. + except BaseException as e: + # Remove the TypeError from the exception chain in + # Python 3 (including for exceptions like SystemExit). + # Otherwise it looks like a bug in the code. six.raise_from(e, None) except (SocketTimeout, BaseSSLError, SocketError) as e: self._raise_timeout(err=e, url=url, timeout_value=read_timeout) From 3387b207f52dd584c92393053a16c4f9244af089 Mon Sep 17 00:00:00 2001 From: Thea Flowers Date: Mon, 8 Jul 2019 13:58:01 -0700 Subject: [PATCH 08/11] Update README to reflect Seth's primary maintainership (#1651) --- README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 5884fa9b..ab8988ae 100644 --- a/README.rst +++ b/README.rst @@ -94,8 +94,8 @@ Tidelift will coordinate the fix and disclosure with maintainers. Maintainers ----------- -- `@theacodes `_ (Thea Flowers) - `@sethmlarson `_ (Seth M. Larson) +- `@theacodes `_ (Thea Flowers) - `@haikuginger `_ (Jess Shapiro) - `@lukasa `_ (Cory Benfield) - `@sigmavirus24 `_ (Ian Cordasco) @@ -128,7 +128,7 @@ development `_'s work on an ongoing basis -- Abbott (2018-present), sponsors `@sethmlarson `_'s work on an ongoing basis +- Abbott (2018-2019), sponsored `@sethmlarson `_'s work on urllib3. +- Google Cloud Platform (2018-2019), sponsored `@theacodes `_'s work on urllib3. - Akamai (2017-2018), sponsored `@haikuginger `_'s work on urllib3 -- Hewlett Packard Enterprise (2016-2017), sponsored `@Lukasa’s `_ work on urllib3 +- Hewlett Packard Enterprise (2016-2017), sponsored `@Lukasa’s `_ work on urllib3. From 15acc9f8b4c88942123bd966b0c60a9075605a62 Mon Sep 17 00:00:00 2001 From: Min ho Kim Date: Fri, 26 Jul 2019 23:05:39 +1000 Subject: [PATCH 09/11] Fix typo (#1654) --- test/test_response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_response.py b/test/test_response.py index 23a63be1..65a5d62a 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -434,7 +434,7 @@ def test_deflate_streaming_tell_intermediate_point(self): class MockCompressedDataReading(BytesIO): """ - A ByteIO-like reader returning ``payload`` in ``NUMBER_OF_READS`` + A BytesIO-like reader returning ``payload`` in ``NUMBER_OF_READS`` calls to ``read``. """ From 5b047b645f5f93900d5e2fc31230848c25eb1f5f Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 26 Jul 2019 10:42:51 -0500 Subject: [PATCH 10/11] Percent-encode invalid characters within auth section (#1647) --- CHANGES.rst | 5 + _travis/upload_coverage.sh | 4 +- noxfile.py | 2 + setup.py | 1 - src/urllib3/connectionpool.py | 9 +- src/urllib3/packages/rfc3986/__init__.py | 56 --- src/urllib3/packages/rfc3986/_mixin.py | 371 ---------------- src/urllib3/packages/rfc3986/abnf_regexp.py | 262 ----------- src/urllib3/packages/rfc3986/api.py | 106 ----- src/urllib3/packages/rfc3986/builder.py | 301 ------------- src/urllib3/packages/rfc3986/compat.py | 49 --- src/urllib3/packages/rfc3986/exceptions.py | 112 ----- src/urllib3/packages/rfc3986/iri.py | 150 ------- src/urllib3/packages/rfc3986/misc.py | 125 ------ src/urllib3/packages/rfc3986/normalizers.py | 172 -------- src/urllib3/packages/rfc3986/parseresult.py | 457 -------------------- src/urllib3/packages/rfc3986/uri.py | 152 ------- src/urllib3/packages/rfc3986/validators.py | 435 ------------------- src/urllib3/packages/six.py | 118 ++++- src/urllib3/util/ssl_.py | 12 +- src/urllib3/util/url.py | 282 ++++++++---- test/test_util.py | 36 +- 22 files changed, 348 insertions(+), 2869 deletions(-) delete mode 100644 src/urllib3/packages/rfc3986/__init__.py delete mode 100644 src/urllib3/packages/rfc3986/_mixin.py delete mode 100644 src/urllib3/packages/rfc3986/abnf_regexp.py delete mode 100644 src/urllib3/packages/rfc3986/api.py delete mode 100644 src/urllib3/packages/rfc3986/builder.py delete mode 100644 src/urllib3/packages/rfc3986/compat.py delete mode 100644 src/urllib3/packages/rfc3986/exceptions.py delete mode 100644 src/urllib3/packages/rfc3986/iri.py delete mode 100644 src/urllib3/packages/rfc3986/misc.py delete mode 100644 src/urllib3/packages/rfc3986/normalizers.py delete mode 100644 src/urllib3/packages/rfc3986/parseresult.py delete mode 100644 src/urllib3/packages/rfc3986/uri.py delete mode 100644 src/urllib3/packages/rfc3986/validators.py diff --git a/CHANGES.rst b/CHANGES.rst index 55a9c0c9..e84b80fc 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,6 +9,11 @@ dev (master) * Fix edge case where Retry-After header was still respected even when explicitly opted out of. (Pull #1607) +* Remove dependency on ``rfc3986`` for URL parsing. + +* Fix issue where URLs containing invalid characters within ``Url.auth`` would + raise an exception instead of percent-encoding those characters. + 1.25.3 (2019-05-23) ------------------- diff --git a/_travis/upload_coverage.sh b/_travis/upload_coverage.sh index d153cc15..c772ec3f 100755 --- a/_travis/upload_coverage.sh +++ b/_travis/upload_coverage.sh @@ -3,6 +3,6 @@ set -exo pipefail if [[ -e .coverage ]]; then - python3.6 -m pip install codecov - python3.6 -m codecov --env TRAVIS_OS_NAME,NOX_SESSION + python3 -m pip install codecov + python3 -m codecov --env TRAVIS_OS_NAME,NOX_SESSION fi diff --git a/noxfile.py b/noxfile.py index bb6cf104..9cca9dec 100644 --- a/noxfile.py +++ b/noxfile.py @@ -77,6 +77,8 @@ def blacken(session): session.install("black") session.run("black", "src", "dummyserver", "test", "noxfile.py", "setup.py") + lint(session) + @nox.session def lint(session): diff --git a/setup.py b/setup.py index e190dbfc..de06f2da 100755 --- a/setup.py +++ b/setup.py @@ -57,7 +57,6 @@ "urllib3.packages", "urllib3.packages.ssl_match_hostname", "urllib3.packages.backports", - "urllib3.packages.rfc3986", "urllib3.contrib", "urllib3.contrib._securetransport", "urllib3.util", diff --git a/src/urllib3/connectionpool.py b/src/urllib3/connectionpool.py index ee53c4b8..dcf72dfd 100644 --- a/src/urllib3/connectionpool.py +++ b/src/urllib3/connectionpool.py @@ -26,7 +26,6 @@ from .packages.ssl_match_hostname import CertificateError from .packages import six from .packages.six.moves import queue -from .packages.rfc3986.normalizers import normalize_host from .connection import ( port_by_scheme, DummyConnection, @@ -44,7 +43,7 @@ from .util.response import assert_header_parsing from .util.retry import Retry from .util.timeout import Timeout -from .util.url import get_host, Url, NORMALIZABLE_SCHEMES +from .util.url import get_host, Url, _normalize_host as normalize_host from .util.queue import LifoQueue @@ -1027,6 +1026,8 @@ def _normalize_host(host, scheme): Normalize hosts for comparisons and use with sockets. """ + host = normalize_host(host, scheme) + # httplib doesn't like it when we include brackets in IPv6 addresses # Specifically, if we include brackets but also pass the port then # httplib crazily doubles up the square brackets on the Host header. @@ -1034,7 +1035,5 @@ def _normalize_host(host, scheme): # However, for backward compatibility reasons we can't actually # *assert* that. See http://bugs.python.org/issue28539 if host.startswith("[") and host.endswith("]"): - host = host.strip("[]") - if scheme in NORMALIZABLE_SCHEMES: - host = normalize_host(host) + host = host[1:-1] return host diff --git a/src/urllib3/packages/rfc3986/__init__.py b/src/urllib3/packages/rfc3986/__init__.py deleted file mode 100644 index d953d2b6..00000000 --- a/src/urllib3/packages/rfc3986/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -An implementation of semantics and validations described in RFC 3986. - -See http://rfc3986.readthedocs.io/ for detailed documentation. - -:copyright: (c) 2014 Rackspace -:license: Apache v2.0, see LICENSE for details -""" - -from .api import iri_reference -from .api import IRIReference -from .api import is_valid_uri -from .api import normalize_uri -from .api import uri_reference -from .api import URIReference -from .api import urlparse -from .parseresult import ParseResult - -__title__ = "rfc3986" -__author__ = "Ian Stapleton Cordasco" -__author_email__ = "graffatcolmingov@gmail.com" -__license__ = "Apache v2.0" -__copyright__ = "Copyright 2014 Rackspace" -__version__ = "1.3.2" - -__all__ = ( - "ParseResult", - "URIReference", - "IRIReference", - "is_valid_uri", - "normalize_uri", - "uri_reference", - "iri_reference", - "urlparse", - "__title__", - "__author__", - "__author_email__", - "__license__", - "__copyright__", - "__version__", -) diff --git a/src/urllib3/packages/rfc3986/_mixin.py b/src/urllib3/packages/rfc3986/_mixin.py deleted file mode 100644 index 4ddcb2df..00000000 --- a/src/urllib3/packages/rfc3986/_mixin.py +++ /dev/null @@ -1,371 +0,0 @@ -"""Module containing the implementation of the URIMixin class.""" -import warnings - -from . import exceptions as exc -from . import misc -from . import normalizers -from . import validators - - -class URIMixin(object): - """Mixin with all shared methods for URIs and IRIs.""" - - __hash__ = tuple.__hash__ - - def authority_info(self): - """Return a dictionary with the ``userinfo``, ``host``, and ``port``. - - If the authority is not valid, it will raise a - :class:`~rfc3986.exceptions.InvalidAuthority` Exception. - - :returns: - ``{'userinfo': 'username:password', 'host': 'www.example.com', - 'port': '80'}`` - :rtype: dict - :raises rfc3986.exceptions.InvalidAuthority: - If the authority is not ``None`` and can not be parsed. - """ - if not self.authority: - return {"userinfo": None, "host": None, "port": None} - - match = self._match_subauthority() - - if match is None: - # In this case, we have an authority that was parsed from the URI - # Reference, but it cannot be further parsed by our - # misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid - # authority. - raise exc.InvalidAuthority(self.authority.encode(self.encoding)) - - # We had a match, now let's ensure that it is actually a valid host - # address if it is IPv4 - matches = match.groupdict() - host = matches.get("host") - - if ( - host - and misc.IPv4_MATCHER.match(host) - and not validators.valid_ipv4_host_address(host) - ): - # If we have a host, it appears to be IPv4 and it does not have - # valid bytes, it is an InvalidAuthority. - raise exc.InvalidAuthority(self.authority.encode(self.encoding)) - - return matches - - def _match_subauthority(self): - return misc.SUBAUTHORITY_MATCHER.match(self.authority) - - @property - def host(self): - """If present, a string representing the host.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority["host"] - - @property - def port(self): - """If present, the port extracted from the authority.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority["port"] - - @property - def userinfo(self): - """If present, the userinfo extracted from the authority.""" - try: - authority = self.authority_info() - except exc.InvalidAuthority: - return None - return authority["userinfo"] - - def is_absolute(self): - """Determine if this URI Reference is an absolute URI. - - See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation. - - :returns: ``True`` if it is an absolute URI, ``False`` otherwise. - :rtype: bool - """ - return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit())) - - def is_valid(self, **kwargs): - """Determine if the URI is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param bool require_scheme: Set to ``True`` if you wish to require the - presence of the scheme component. - :param bool require_authority: Set to ``True`` if you wish to require - the presence of the authority component. - :param bool require_path: Set to ``True`` if you wish to require the - presence of the path component. - :param bool require_query: Set to ``True`` if you wish to require the - presence of the query component. - :param bool require_fragment: Set to ``True`` if you wish to require - the presence of the fragment component. - :returns: ``True`` if the URI is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - validators = [ - (self.scheme_is_valid, kwargs.get("require_scheme", False)), - (self.authority_is_valid, kwargs.get("require_authority", False)), - (self.path_is_valid, kwargs.get("require_path", False)), - (self.query_is_valid, kwargs.get("require_query", False)), - (self.fragment_is_valid, kwargs.get("require_fragment", False)), - ] - return all(v(r) for v, r in validators) - - def authority_is_valid(self, require=False): - """Determine if the authority component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param bool require: - Set to ``True`` to require the presence of this component. - :returns: - ``True`` if the authority is valid. ``False`` otherwise. - :rtype: - bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - try: - self.authority_info() - except exc.InvalidAuthority: - return False - - return validators.authority_is_valid( - self.authority, host=self.host, require=require - ) - - def scheme_is_valid(self, require=False): - """Determine if the scheme component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the scheme is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.scheme_is_valid(self.scheme, require) - - def path_is_valid(self, require=False): - """Determine if the path component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the path is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.path_is_valid(self.path, require) - - def query_is_valid(self, require=False): - """Determine if the query component is valid. - - .. deprecated:: 1.1.0 - - Use the :class:`~rfc3986.validators.Validator` object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the query is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.query_is_valid(self.query, require) - - def fragment_is_valid(self, require=False): - """Determine if the fragment component is valid. - - .. deprecated:: 1.1.0 - - Use the Validator object instead. - - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the fragment is valid. ``False`` otherwise. - :rtype: bool - """ - warnings.warn( - "Please use rfc3986.validators.Validator instead. " - "This method will be eventually removed.", - DeprecationWarning, - ) - return validators.fragment_is_valid(self.fragment, require) - - def normalized_equality(self, other_ref): - """Compare this URIReference to another URIReference. - - :param URIReference other_ref: (required), The reference with which - we're comparing. - :returns: ``True`` if the references are equal, ``False`` otherwise. - :rtype: bool - """ - return tuple(self.normalize()) == tuple(other_ref.normalize()) - - def resolve_with(self, base_uri, strict=False): - """Use an absolute URI Reference to resolve this relative reference. - - Assuming this is a relative reference that you would like to resolve, - use the provided base URI to resolve it. - - See http://tools.ietf.org/html/rfc3986#section-5 for more information. - - :param base_uri: Either a string or URIReference. It must be an - absolute URI or it will raise an exception. - :returns: A new URIReference which is the result of resolving this - reference using ``base_uri``. - :rtype: :class:`URIReference` - :raises rfc3986.exceptions.ResolutionError: - If the ``base_uri`` is not an absolute URI. - """ - if not isinstance(base_uri, URIMixin): - base_uri = type(self).from_string(base_uri) - - if not base_uri.is_absolute(): - raise exc.ResolutionError(base_uri) - - # This is optional per - # http://tools.ietf.org/html/rfc3986#section-5.2.1 - base_uri = base_uri.normalize() - - # The reference we're resolving - resolving = self - - if not strict and resolving.scheme == base_uri.scheme: - resolving = resolving.copy_with(scheme=None) - - # http://tools.ietf.org/html/rfc3986#page-32 - if resolving.scheme is not None: - target = resolving.copy_with( - path=normalizers.normalize_path(resolving.path) - ) - else: - if resolving.authority is not None: - target = resolving.copy_with( - scheme=base_uri.scheme, - path=normalizers.normalize_path(resolving.path), - ) - else: - if resolving.path is None: - if resolving.query is not None: - query = resolving.query - else: - query = base_uri.query - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=base_uri.path, - query=query, - ) - else: - if resolving.path.startswith("/"): - path = normalizers.normalize_path(resolving.path) - else: - path = normalizers.normalize_path( - misc.merge_paths(base_uri, resolving.path) - ) - target = resolving.copy_with( - scheme=base_uri.scheme, - authority=base_uri.authority, - path=path, - query=resolving.query, - ) - return target - - def unsplit(self): - """Create a URI string from the components. - - :returns: The URI Reference reconstituted as a string. - :rtype: str - """ - # See http://tools.ietf.org/html/rfc3986#section-5.3 - result_list = [] - if self.scheme: - result_list.extend([self.scheme, ":"]) - if self.authority: - result_list.extend(["//", self.authority]) - if self.path: - result_list.append(self.path) - if self.query is not None: - result_list.extend(["?", self.query]) - if self.fragment is not None: - result_list.extend(["#", self.fragment]) - return "".join(result_list) - - def copy_with( - self, - scheme=misc.UseExisting, - authority=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - ): - """Create a copy of this reference with the new components. - - :param str scheme: - (optional) The scheme to use for the new reference. - :param str authority: - (optional) The authority to use for the new reference. - :param str path: - (optional) The path to use for the new reference. - :param str query: - (optional) The query to use for the new reference. - :param str fragment: - (optional) The fragment to use for the new reference. - :returns: - New URIReference with provided components. - :rtype: - URIReference - """ - attributes = { - "scheme": scheme, - "authority": authority, - "path": path, - "query": query, - "fragment": fragment, - } - for key, value in list(attributes.items()): - if value is misc.UseExisting: - del attributes[key] - uri = self._replace(**attributes) - uri.encoding = self.encoding - return uri diff --git a/src/urllib3/packages/rfc3986/abnf_regexp.py b/src/urllib3/packages/rfc3986/abnf_regexp.py deleted file mode 100644 index c461443d..00000000 --- a/src/urllib3/packages/rfc3986/abnf_regexp.py +++ /dev/null @@ -1,262 +0,0 @@ -# -*- coding: utf-8 -*- -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module for the regular expressions crafted from ABNF.""" - -import sys - -# https://tools.ietf.org/html/rfc3986#page-13 -GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@" -GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) -# https://tools.ietf.org/html/rfc3986#page-13 -SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;=" -SUB_DELIMITERS_SET = set(SUB_DELIMITERS) -# Escape the '*' for use in regular expressions -SUB_DELIMITERS_RE = r"!$&'()\*+,;=" -RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET) -ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" -DIGIT = "0123456789" -# https://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r"._!-" -UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) -NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET) -# We need to escape the '-' in this case: -UNRESERVED_RE = r"A-Za-z0-9._~\-" - -# Percent encoded character values -PERCENT_ENCODED = PCT_ENCODED = "%[A-Fa-f0-9]{2}" -PCHAR = "([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":@]|%s)" % PCT_ENCODED - -# NOTE(sigmavirus24): We're going to use more strict regular expressions -# than appear in Appendix B for scheme. This will prevent over-eager -# consuming of items that aren't schemes. -SCHEME_RE = "[a-zA-Z][a-zA-Z0-9+.-]*" -_AUTHORITY_RE = "[^/?#]*" -_PATH_RE = "[^?#]*" -_QUERY_RE = "[^#]*" -_FRAGMENT_RE = ".*" - -# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B -COMPONENT_PATTERN_DICT = { - "scheme": SCHEME_RE, - "authority": _AUTHORITY_RE, - "path": _PATH_RE, - "query": _QUERY_RE, - "fragment": _FRAGMENT_RE, -} - -# See http://tools.ietf.org/html/rfc3986#appendix-B -# In this case, we name each of the important matches so we can use -# SRE_Match#groupdict to parse the values out if we so choose. This is also -# modified to ignore other matches that are not important to the parsing of -# the reference so we can also simply use SRE_Match#groups. -URL_PARSING_RE = ( - r"(?:(?P{scheme}):)?(?://(?P{authority}))?" - r"(?P{path})(?:\?(?P{query}))?" - r"(?:#(?P{fragment}))?" -).format(**COMPONENT_PATTERN_DICT) - - -# ######################### -# Authority Matcher Section -# ######################### - -# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 -# The pattern for a regular name, e.g., www.google.com, api.github.com -REGULAR_NAME_RE = REG_NAME = "((?:{0}|[{1}])*)".format( - "%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + UNRESERVED_RE -) -# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, -IPv4_RE = r"([0-9]{1,3}\.){3}[0-9]{1,3}" -# Hexadecimal characters used in each piece of an IPv6 address -HEXDIG_RE = "[0-9A-Fa-f]{1,4}" -# Least-significant 32 bits of an IPv6 address -LS32_RE = "({hex}:{hex}|{ipv4})".format(hex=HEXDIG_RE, ipv4=IPv4_RE) -# Substitutions into the following patterns for IPv6 patterns defined -# http://tools.ietf.org/html/rfc3986#page-20 -_subs = {"hex": HEXDIG_RE, "ls32": LS32_RE} - -# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details -# about ABNF (Augmented Backus-Naur Form) use in the comments -variations = [ - # 6( h16 ":" ) ls32 - "(%(hex)s:){6}%(ls32)s" % _subs, - # "::" 5( h16 ":" ) ls32 - "::(%(hex)s:){5}%(ls32)s" % _subs, - # [ h16 ] "::" 4( h16 ":" ) ls32 - "(%(hex)s)?::(%(hex)s:){4}%(ls32)s" % _subs, - # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 - "((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s" % _subs, - # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 - "((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s" % _subs, - # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 - "((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s" % _subs, - # [ *4( h16 ":" ) h16 ] "::" ls32 - "((%(hex)s:){0,4}%(hex)s)?::%(ls32)s" % _subs, - # [ *5( h16 ":" ) h16 ] "::" h16 - "((%(hex)s:){0,5}%(hex)s)?::%(hex)s" % _subs, - # [ *6( h16 ":" ) h16 ] "::" - "((%(hex)s:){0,6}%(hex)s)?::" % _subs, -] - -IPv6_RE = "(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7})|({8}))".format(*variations) - -IPv_FUTURE_RE = r"v[0-9A-Fa-f]+\.[%s]+" % (UNRESERVED_RE + SUB_DELIMITERS_RE + ":") - -# RFC 6874 Zone ID ABNF -ZONE_ID = "(?:[" + UNRESERVED_RE + "]|" + PCT_ENCODED + ")+" - -IPv6_ADDRZ_RFC4007_RE = IPv6_RE + "(?:(?:%25|%)" + ZONE_ID + ")?" -IPv6_ADDRZ_RE = IPv6_RE + "(?:%25" + ZONE_ID + ")?" - -IP_LITERAL_RE = r"\[({0}|{1})\]".format(IPv6_ADDRZ_RFC4007_RE, IPv_FUTURE_RE) - -# Pattern for matching the host piece of the authority -HOST_RE = HOST_PATTERN = "({0}|{1}|{2})".format(REG_NAME, IPv4_RE, IP_LITERAL_RE) -USERINFO_RE = "^([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":]|%s)+" % (PCT_ENCODED) -PORT_RE = "[0-9]{1,5}" - -# #################### -# Path Matcher Section -# #################### - -# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information -# about the path patterns defined below. -segments = { - "segment": PCHAR + "*", - # Non-zero length segment - "segment-nz": PCHAR + "+", - # Non-zero length segment without ":" - "segment-nz-nc": PCHAR.replace(":", "") + "+", -} - -# Path types taken from Section 3.3 (linked above) -PATH_EMPTY = "^$" -PATH_ROOTLESS = "%(segment-nz)s(/%(segment)s)*" % segments -PATH_NOSCHEME = "%(segment-nz-nc)s(/%(segment)s)*" % segments -PATH_ABSOLUTE = "/(%s)?" % PATH_ROOTLESS -PATH_ABEMPTY = "(/%(segment)s)*" % segments -PATH_RE = "^(%s|%s|%s|%s|%s)$" % ( - PATH_ABEMPTY, - PATH_ABSOLUTE, - PATH_NOSCHEME, - PATH_ROOTLESS, - PATH_EMPTY, -) - -FRAGMENT_RE = QUERY_RE = ( - "^([/?:@" + UNRESERVED_RE + SUB_DELIMITERS_RE + "]|%s)*$" % PCT_ENCODED -) - -# ########################## -# Relative reference matcher -# ########################## - -# See http://tools.ietf.org/html/rfc3986#section-4.2 for details -RELATIVE_PART_RE = "(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - PATH_ABEMPTY, - PATH_ABSOLUTE, - PATH_NOSCHEME, - PATH_EMPTY, -) - -# See http://tools.ietf.org/html/rfc3986#section-3 for definition -HIER_PART_RE = "(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - PATH_ABEMPTY, - PATH_ABSOLUTE, - PATH_ROOTLESS, - PATH_EMPTY, -) - -# ############### -# IRIs / RFC 3987 -# ############### - -# Only wide-unicode gets the high-ranges of UCSCHAR -if sys.maxunicode > 0xFFFF: # pragma: no cover - IPRIVATE = u"\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD" - UCSCHAR_RE = ( - u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" - u"\U00010000-\U0001FFFD\U00020000-\U0002FFFD" - u"\U00030000-\U0003FFFD\U00040000-\U0004FFFD" - u"\U00050000-\U0005FFFD\U00060000-\U0006FFFD" - u"\U00070000-\U0007FFFD\U00080000-\U0008FFFD" - u"\U00090000-\U0009FFFD\U000A0000-\U000AFFFD" - u"\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD" - u"\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD" - ) -else: # pragma: no cover - IPRIVATE = u"\uE000-\uF8FF" - UCSCHAR_RE = u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF" - -IUNRESERVED_RE = u"A-Za-z0-9\\._~\\-" + UCSCHAR_RE -IPCHAR = u"([" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":@]|%s)" % PCT_ENCODED - -isegments = { - "isegment": IPCHAR + u"*", - # Non-zero length segment - "isegment-nz": IPCHAR + u"+", - # Non-zero length segment without ":" - "isegment-nz-nc": IPCHAR.replace(":", "") + u"+", -} - -IPATH_ROOTLESS = u"%(isegment-nz)s(/%(isegment)s)*" % isegments -IPATH_NOSCHEME = u"%(isegment-nz-nc)s(/%(isegment)s)*" % isegments -IPATH_ABSOLUTE = u"/(?:%s)?" % IPATH_ROOTLESS -IPATH_ABEMPTY = u"(?:/%(isegment)s)*" % isegments -IPATH_RE = u"^(?:%s|%s|%s|%s|%s)$" % ( - IPATH_ABEMPTY, - IPATH_ABSOLUTE, - IPATH_NOSCHEME, - IPATH_ROOTLESS, - PATH_EMPTY, -) - -IREGULAR_NAME_RE = IREG_NAME = u"(?:{0}|[{1}])*".format( - u"%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + IUNRESERVED_RE -) - -IHOST_RE = IHOST_PATTERN = u"({0}|{1}|{2})".format(IREG_NAME, IPv4_RE, IP_LITERAL_RE) - -IUSERINFO_RE = ( - u"^(?:[" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":]|%s)+" % (PCT_ENCODED) -) - -IFRAGMENT_RE = ( - u"^(?:[/?:@" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u"]|%s)*$" % PCT_ENCODED -) -IQUERY_RE = ( - u"^(?:[/?:@" - + IUNRESERVED_RE - + SUB_DELIMITERS_RE - + IPRIVATE - + u"]|%s)*$" % PCT_ENCODED -) - -IRELATIVE_PART_RE = u"(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - IPATH_ABEMPTY, - IPATH_ABSOLUTE, - IPATH_NOSCHEME, - PATH_EMPTY, -) - -IHIER_PART_RE = u"(//%s%s|%s|%s|%s)" % ( - COMPONENT_PATTERN_DICT["authority"], - IPATH_ABEMPTY, - IPATH_ABSOLUTE, - IPATH_ROOTLESS, - PATH_EMPTY, -) diff --git a/src/urllib3/packages/rfc3986/api.py b/src/urllib3/packages/rfc3986/api.py deleted file mode 100644 index 1e098b34..00000000 --- a/src/urllib3/packages/rfc3986/api.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Module containing the simple and functional API for rfc3986. - -This module defines functions and provides access to the public attributes -and classes of rfc3986. -""" - -from .iri import IRIReference -from .parseresult import ParseResult -from .uri import URIReference - - -def uri_reference(uri, encoding="utf-8"): - """Parse a URI string into a URIReference. - - This is a convenience function. You could achieve the same end by using - ``URIReference.from_string(uri)``. - - :param str uri: The URI which needs to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: A parsed URI - :rtype: :class:`URIReference` - """ - return URIReference.from_string(uri, encoding) - - -def iri_reference(iri, encoding="utf-8"): - """Parse a IRI string into an IRIReference. - - This is a convenience function. You could achieve the same end by using - ``IRIReference.from_string(iri)``. - - :param str iri: The IRI which needs to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: A parsed IRI - :rtype: :class:`IRIReference` - """ - return IRIReference.from_string(iri, encoding) - - -def is_valid_uri(uri, encoding="utf-8", **kwargs): - """Determine if the URI given is valid. - - This is a convenience function. You could use either - ``uri_reference(uri).is_valid()`` or - ``URIReference.from_string(uri).is_valid()`` to achieve the same result. - - :param str uri: The URI to be validated. - :param str encoding: The encoding of the string provided - :param bool require_scheme: Set to ``True`` if you wish to require the - presence of the scheme component. - :param bool require_authority: Set to ``True`` if you wish to require the - presence of the authority component. - :param bool require_path: Set to ``True`` if you wish to require the - presence of the path component. - :param bool require_query: Set to ``True`` if you wish to require the - presence of the query component. - :param bool require_fragment: Set to ``True`` if you wish to require the - presence of the fragment component. - :returns: ``True`` if the URI is valid, ``False`` otherwise. - :rtype: bool - """ - return URIReference.from_string(uri, encoding).is_valid(**kwargs) - - -def normalize_uri(uri, encoding="utf-8"): - """Normalize the given URI. - - This is a convenience function. You could use either - ``uri_reference(uri).normalize().unsplit()`` or - ``URIReference.from_string(uri).normalize().unsplit()`` instead. - - :param str uri: The URI to be normalized. - :param str encoding: The encoding of the string provided - :returns: The normalized URI. - :rtype: str - """ - normalized_reference = URIReference.from_string(uri, encoding).normalize() - return normalized_reference.unsplit() - - -def urlparse(uri, encoding="utf-8"): - """Parse a given URI and return a ParseResult. - - This is a partial replacement of the standard library's urlparse function. - - :param str uri: The URI to be parsed. - :param str encoding: The encoding of the string provided. - :returns: A parsed URI - :rtype: :class:`~rfc3986.parseresult.ParseResult` - """ - return ParseResult.from_string(uri, encoding, strict=False) diff --git a/src/urllib3/packages/rfc3986/builder.py b/src/urllib3/packages/rfc3986/builder.py deleted file mode 100644 index bbabfaf2..00000000 --- a/src/urllib3/packages/rfc3986/builder.py +++ /dev/null @@ -1,301 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2017 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module containing the logic for the URIBuilder object.""" -from . import compat -from . import normalizers -from . import uri - - -class URIBuilder(object): - """Object to aid in building up a URI Reference from parts. - - .. note:: - - This object should be instantiated by the user, but it's recommended - that it is not provided with arguments. Instead, use the available - method to populate the fields. - - """ - - def __init__( - self, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - ): - """Initialize our URI builder. - - :param str scheme: - (optional) - :param str userinfo: - (optional) - :param str host: - (optional) - :param int port: - (optional) - :param str path: - (optional) - :param str query: - (optional) - :param str fragment: - (optional) - """ - self.scheme = scheme - self.userinfo = userinfo - self.host = host - self.port = port - self.path = path - self.query = query - self.fragment = fragment - - def __repr__(self): - """Provide a convenient view of our builder object.""" - formatstr = ( - "URIBuilder(scheme={b.scheme}, userinfo={b.userinfo}, " - "host={b.host}, port={b.port}, path={b.path}, " - "query={b.query}, fragment={b.fragment})" - ) - return formatstr.format(b=self) - - def add_scheme(self, scheme): - """Add a scheme to our builder object. - - After normalizing, this will generate a new URIBuilder instance with - the specified scheme and all other attributes the same. - - .. code-block:: python - - >>> URIBuilder().add_scheme('HTTPS') - URIBuilder(scheme='https', userinfo=None, host=None, port=None, - path=None, query=None, fragment=None) - - """ - scheme = normalizers.normalize_scheme(scheme) - return URIBuilder( - scheme=scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_credentials(self, username, password): - """Add credentials as the userinfo portion of the URI. - - .. code-block:: python - - >>> URIBuilder().add_credentials('root', 's3crete') - URIBuilder(scheme=None, userinfo='root:s3crete', host=None, - port=None, path=None, query=None, fragment=None) - - >>> URIBuilder().add_credentials('root', None) - URIBuilder(scheme=None, userinfo='root', host=None, - port=None, path=None, query=None, fragment=None) - """ - if username is None: - raise ValueError("Username cannot be None") - userinfo = normalizers.normalize_username(username) - - if password is not None: - userinfo = "{}:{}".format( - userinfo, normalizers.normalize_password(password) - ) - - return URIBuilder( - scheme=self.scheme, - userinfo=userinfo, - host=self.host, - port=self.port, - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_host(self, host): - """Add hostname to the URI. - - .. code-block:: python - - >>> URIBuilder().add_host('google.com') - URIBuilder(scheme=None, userinfo=None, host='google.com', - port=None, path=None, query=None, fragment=None) - - """ - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=normalizers.normalize_host(host), - port=self.port, - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_port(self, port): - """Add port to the URI. - - .. code-block:: python - - >>> URIBuilder().add_port(80) - URIBuilder(scheme=None, userinfo=None, host=None, port='80', - path=None, query=None, fragment=None) - - >>> URIBuilder().add_port(443) - URIBuilder(scheme=None, userinfo=None, host=None, port='443', - path=None, query=None, fragment=None) - - """ - port_int = int(port) - if port_int < 0: - raise ValueError( - "ports are not allowed to be negative. You provided {}".format(port_int) - ) - if port_int > 65535: - raise ValueError( - "ports are not allowed to be larger than 65535. " - "You provided {}".format(port_int) - ) - - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port="{}".format(port_int), - path=self.path, - query=self.query, - fragment=self.fragment, - ) - - def add_path(self, path): - """Add a path to the URI. - - .. code-block:: python - - >>> URIBuilder().add_path('sigmavirus24/rfc3985') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path='/sigmavirus24/rfc3986', query=None, fragment=None) - - >>> URIBuilder().add_path('/checkout.php') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path='/checkout.php', query=None, fragment=None) - - """ - if not path.startswith("/"): - path = "/{}".format(path) - - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=normalizers.normalize_path(path), - query=self.query, - fragment=self.fragment, - ) - - def add_query_from(self, query_items): - """Generate and add a query a dictionary or list of tuples. - - .. code-block:: python - - >>> URIBuilder().add_query_from({'a': 'b c'}) - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query='a=b+c', fragment=None) - - >>> URIBuilder().add_query_from([('a', 'b c')]) - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query='a=b+c', fragment=None) - - """ - query = normalizers.normalize_query(compat.urlencode(query_items)) - - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=query, - fragment=self.fragment, - ) - - def add_query(self, query): - """Add a pre-formated query string to the URI. - - .. code-block:: python - - >>> URIBuilder().add_query('a=b&c=d') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query='a=b&c=d', fragment=None) - - """ - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=normalizers.normalize_query(query), - fragment=self.fragment, - ) - - def add_fragment(self, fragment): - """Add a fragment to the URI. - - .. code-block:: python - - >>> URIBuilder().add_fragment('section-2.6.1') - URIBuilder(scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment='section-2.6.1') - - """ - return URIBuilder( - scheme=self.scheme, - userinfo=self.userinfo, - host=self.host, - port=self.port, - path=self.path, - query=self.query, - fragment=normalizers.normalize_fragment(fragment), - ) - - def finalize(self): - """Create a URIReference from our builder. - - .. code-block:: python - - >>> URIBuilder().add_scheme('https').add_host('github.com' - ... ).add_path('sigmavirus24/rfc3986').finalize().unsplit() - 'https://github.com/sigmavirus24/rfc3986' - - >>> URIBuilder().add_scheme('https').add_host('github.com' - ... ).add_path('sigmavirus24/rfc3986').add_credentials( - ... 'sigmavirus24', 'not-re@l').finalize().unsplit() - 'https://sigmavirus24:not-re%40l@github.com/sigmavirus24/rfc3986' - - """ - return uri.URIReference( - self.scheme, - normalizers.normalize_authority((self.userinfo, self.host, self.port)), - self.path, - self.query, - self.fragment, - ) diff --git a/src/urllib3/packages/rfc3986/compat.py b/src/urllib3/packages/rfc3986/compat.py deleted file mode 100644 index 36e490ab..00000000 --- a/src/urllib3/packages/rfc3986/compat.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Compatibility module for Python 2 and 3 support.""" -import sys - -try: - from urllib.parse import quote as urlquote -except ImportError: # Python 2.x - from urllib import quote as urlquote - -try: - from urllib.parse import urlencode -except ImportError: # Python 2.x - from urllib import urlencode - -__all__ = ("to_bytes", "to_str", "urlquote", "urlencode") - -PY3 = (3, 0) <= sys.version_info < (4, 0) -PY2 = (2, 6) <= sys.version_info < (2, 8) - - -if PY3: - unicode = str # Python 3.x - - -def to_str(b, encoding="utf-8"): - """Ensure that b is text in the specified encoding.""" - if hasattr(b, "decode") and not isinstance(b, unicode): - b = b.decode(encoding) - return b - - -def to_bytes(s, encoding="utf-8"): - """Ensure that s is converted to bytes from the encoding.""" - if hasattr(s, "encode") and not isinstance(s, bytes): - s = s.encode(encoding) - return s diff --git a/src/urllib3/packages/rfc3986/exceptions.py b/src/urllib3/packages/rfc3986/exceptions.py deleted file mode 100644 index 865f9bd7..00000000 --- a/src/urllib3/packages/rfc3986/exceptions.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -"""Exceptions module for rfc3986.""" - -from . import compat - - -class RFC3986Exception(Exception): - """Base class for all rfc3986 exception classes.""" - - pass - - -class InvalidAuthority(RFC3986Exception): - """Exception when the authority string is invalid.""" - - def __init__(self, authority): - """Initialize the exception with the invalid authority.""" - super(InvalidAuthority, self).__init__( - u"The authority ({0}) is not valid.".format(compat.to_str(authority)) - ) - - -class InvalidPort(RFC3986Exception): - """Exception when the port is invalid.""" - - def __init__(self, port): - """Initialize the exception with the invalid port.""" - super(InvalidPort, self).__init__('The port ("{0}") is not valid.'.format(port)) - - -class ResolutionError(RFC3986Exception): - """Exception to indicate a failure to resolve a URI.""" - - def __init__(self, uri): - """Initialize the error with the failed URI.""" - super(ResolutionError, self).__init__( - "{0} is not an absolute URI.".format(uri.unsplit()) - ) - - -class ValidationError(RFC3986Exception): - """Exception raised during Validation of a URI.""" - - pass - - -class MissingComponentError(ValidationError): - """Exception raised when a required component is missing.""" - - def __init__(self, uri, *component_names): - """Initialize the error with the missing component name.""" - verb = "was" - if len(component_names) > 1: - verb = "were" - - self.uri = uri - self.components = sorted(component_names) - components = ", ".join(self.components) - super(MissingComponentError, self).__init__( - "{} {} required but missing".format(components, verb), uri, self.components - ) - - -class UnpermittedComponentError(ValidationError): - """Exception raised when a component has an unpermitted value.""" - - def __init__(self, component_name, component_value, allowed_values): - """Initialize the error with the unpermitted component.""" - super(UnpermittedComponentError, self).__init__( - "{} was required to be one of {!r} but was {!r}".format( - component_name, list(sorted(allowed_values)), component_value - ), - component_name, - component_value, - allowed_values, - ) - self.component_name = component_name - self.component_value = component_value - self.allowed_values = allowed_values - - -class PasswordForbidden(ValidationError): - """Exception raised when a URL has a password in the userinfo section.""" - - def __init__(self, uri): - """Initialize the error with the URI that failed validation.""" - unsplit = getattr(uri, "unsplit", lambda: uri) - super(PasswordForbidden, self).__init__( - '"{}" contained a password when validation forbade it'.format(unsplit()) - ) - self.uri = uri - - -class InvalidComponentsError(ValidationError): - """Exception raised when one or more components are invalid.""" - - def __init__(self, uri, *component_names): - """Initialize the error with the invalid component name(s).""" - verb = "was" - if len(component_names) > 1: - verb = "were" - - self.uri = uri - self.components = sorted(component_names) - components = ", ".join(self.components) - super(InvalidComponentsError, self).__init__( - "{} {} found to be invalid".format(components, verb), uri, self.components - ) - - -class MissingDependencyError(RFC3986Exception): - """Exception raised when an IRI is encoded without the 'idna' module.""" diff --git a/src/urllib3/packages/rfc3986/iri.py b/src/urllib3/packages/rfc3986/iri.py deleted file mode 100644 index a15e8386..00000000 --- a/src/urllib3/packages/rfc3986/iri.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Module containing the implementation of the IRIReference class.""" -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Copyright (c) 2015 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import namedtuple - -from . import compat -from . import exceptions -from . import misc -from . import normalizers -from . import uri - - -try: - import idna -except ImportError: # pragma: no cover - idna = None - - -class IRIReference(namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin): - """Immutable object representing a parsed IRI Reference. - - Can be encoded into an URIReference object via the procedure - specified in RFC 3987 Section 3.1 - - .. note:: - The IRI submodule is a new interface and may possibly change in - the future. Check for changes to the interface when upgrading. - """ - - slots = () - - def __new__(cls, scheme, authority, path, query, fragment, encoding="utf-8"): - """Create a new IRIReference.""" - ref = super(IRIReference, cls).__new__( - cls, scheme or None, authority or None, path or None, query, fragment - ) - ref.encoding = encoding - return ref - - def __eq__(self, other): - """Compare this reference to another.""" - other_ref = other - if isinstance(other, tuple): - other_ref = self.__class__(*other) - elif not isinstance(other, IRIReference): - try: - other_ref = self.__class__.from_string(other) - except TypeError: - raise TypeError( - "Unable to compare {0}() to {1}()".format( - type(self).__name__, type(other).__name__ - ) - ) - - # See http://tools.ietf.org/html/rfc3986#section-6.2 - return tuple(self) == tuple(other_ref) - - def _match_subauthority(self): - return misc.ISUBAUTHORITY_MATCHER.match(self.authority) - - @classmethod - def from_string(cls, iri_string, encoding="utf-8"): - """Parse a IRI reference from the given unicode IRI string. - - :param str iri_string: Unicode IRI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: :class:`IRIReference` or subclass thereof - """ - iri_string = compat.to_str(iri_string, encoding) - - split_iri = misc.IRI_MATCHER.match(iri_string).groupdict() - return cls( - split_iri["scheme"], - split_iri["authority"], - normalizers.encode_component(split_iri["path"], encoding), - normalizers.encode_component(split_iri["query"], encoding), - normalizers.encode_component(split_iri["fragment"], encoding), - encoding, - ) - - def encode(self, idna_encoder=None): # noqa: C901 - """Encode an IRIReference into a URIReference instance. - - If the ``idna`` module is installed or the ``rfc3986[idna]`` - extra is used then unicode characters in the IRI host - component will be encoded with IDNA2008. - - :param idna_encoder: - Function that encodes each part of the host component - If not given will raise an exception if the IRI - contains a host component. - :rtype: uri.URIReference - :returns: A URI reference - """ - authority = self.authority - if authority: - if idna_encoder is None: - if idna is None: # pragma: no cover - raise exceptions.MissingDependencyError( - "Could not import the 'idna' module " - "and the IRI hostname requires encoding" - ) - - def idna_encoder(name): - if any(ord(c) > 128 for c in name): - try: - return idna.encode( - name.lower(), strict=True, std3_rules=True - ) - except idna.IDNAError: - raise exceptions.InvalidAuthority(self.authority) - return name - - authority = "" - if self.host: - authority = ".".join( - [compat.to_str(idna_encoder(part)) for part in self.host.split(".")] - ) - - if self.userinfo is not None: - authority = ( - normalizers.encode_component(self.userinfo, self.encoding) - + "@" - + authority - ) - - if self.port is not None: - authority += ":" + str(self.port) - - return uri.URIReference( - self.scheme, - authority, - path=self.path, - query=self.query, - fragment=self.fragment, - encoding=self.encoding, - ) diff --git a/src/urllib3/packages/rfc3986/misc.py b/src/urllib3/packages/rfc3986/misc.py deleted file mode 100644 index 353a6292..00000000 --- a/src/urllib3/packages/rfc3986/misc.py +++ /dev/null @@ -1,125 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Module containing compiled regular expressions and constants. - -This module contains important constants, patterns, and compiled regular -expressions for parsing and validating URIs and their components. -""" - -import re - -from . import abnf_regexp - -# These are enumerated for the named tuple used as a superclass of -# URIReference -URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"] - -important_characters = { - "generic_delimiters": abnf_regexp.GENERIC_DELIMITERS, - "sub_delimiters": abnf_regexp.SUB_DELIMITERS, - # We need to escape the '*' in this case - "re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE, - "unreserved_chars": abnf_regexp.UNRESERVED_CHARS, - # We need to escape the '-' in this case: - "re_unreserved": abnf_regexp.UNRESERVED_RE, -} - -# For details about delimiters and reserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.2 -GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET -SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET -RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET -# For details about unreserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET -NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET - -URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE) - -SUBAUTHORITY_MATCHER = re.compile( - ( - "^(?:(?P{0})@)?" # userinfo - "(?P{1})" # host - ":?(?P{2})?$" # port - ).format(abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE) -) - - -HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$") -IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$") -IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$") - -# Used by host validator -IPv6_NO_RFC4007_MATCHER = re.compile(r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE)) - -# Matcher used to validate path components -PATH_MATCHER = re.compile(abnf_regexp.PATH_RE) - - -# ################################## -# Query and Fragment Matcher Section -# ################################## - -QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE) - -FRAGMENT_MATCHER = QUERY_MATCHER - -# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 -SCHEME_MATCHER = re.compile("^{0}$".format(abnf_regexp.SCHEME_RE)) - -RELATIVE_REF_MATCHER = re.compile( - r"^%s(\?%s)?(#%s)?$" - % (abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE, abnf_regexp.FRAGMENT_RE) -) - -# See http://tools.ietf.org/html/rfc3986#section-4.3 -ABSOLUTE_URI_MATCHER = re.compile( - r"^%s:%s(\?%s)?$" - % ( - abnf_regexp.COMPONENT_PATTERN_DICT["scheme"], - abnf_regexp.HIER_PART_RE, - abnf_regexp.QUERY_RE[1:-1], - ) -) - -# ############### -# IRIs / RFC 3987 -# ############### - -IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE) - -ISUBAUTHORITY_MATCHER = re.compile( - ( - u"^(?:(?P{0})@)?" # iuserinfo - u"(?P{1})" # ihost - u":?(?P{2})?$" # port - ).format(abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE), - re.UNICODE, -) - - -# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 -def merge_paths(base_uri, relative_path): - """Merge a base URI's path with a relative URI's path.""" - if base_uri.path is None and base_uri.authority is not None: - return "/" + relative_path - else: - path = base_uri.path or "" - index = path.rfind("/") - return path[:index] + "/" + relative_path - - -UseExisting = object() diff --git a/src/urllib3/packages/rfc3986/normalizers.py b/src/urllib3/packages/rfc3986/normalizers.py deleted file mode 100644 index 0d702b6d..00000000 --- a/src/urllib3/packages/rfc3986/normalizers.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module with functions to normalize components.""" -import re - -from . import compat -from . import misc - - -def normalize_scheme(scheme): - """Normalize the scheme component.""" - return scheme.lower() - - -def normalize_authority(authority): - """Normalize an authority tuple to a string.""" - userinfo, host, port = authority - result = "" - if userinfo: - result += normalize_percent_characters(userinfo) + "@" - if host: - result += normalize_host(host) - if port: - result += ":" + port - return result - - -def normalize_username(username): - """Normalize a username to make it safe to include in userinfo.""" - return compat.urlquote(username) - - -def normalize_password(password): - """Normalize a password to make safe for userinfo.""" - return compat.urlquote(password) - - -def normalize_host(host): - """Normalize a host string.""" - if misc.IPv6_MATCHER.match(host): - percent = host.find("%") - if percent != -1: - percent_25 = host.find("%25") - - # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25' - # from RFC 6874. If the host is '[%25]' then we - # assume RFC 4007 and normalize to '[%2525]' - if ( - percent_25 == -1 - or percent < percent_25 - or (percent == percent_25 and percent_25 == len(host) - 4) - ): - host = host.replace("%", "%25", 1) - - # Don't normalize the casing of the Zone ID - return host[:percent].lower() + host[percent:] - - return host.lower() - - -def normalize_path(path): - """Normalize the path string.""" - if not path: - return path - - path = normalize_percent_characters(path) - return remove_dot_segments(path) - - -def normalize_query(query): - """Normalize the query string.""" - if not query: - return query - return normalize_percent_characters(query) - - -def normalize_fragment(fragment): - """Normalize the fragment string.""" - if not fragment: - return fragment - return normalize_percent_characters(fragment) - - -PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}") - - -def normalize_percent_characters(s): - """All percent characters should be upper-cased. - - For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. - """ - matches = set(PERCENT_MATCHER.findall(s)) - for m in matches: - if not m.isupper(): - s = s.replace(m, m.upper()) - return s - - -def remove_dot_segments(s): - """Remove dot segments from the string. - - See also Section 5.2.4 of :rfc:`3986`. - """ - # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code - segments = s.split("/") # Turn the path into a list of segments - output = [] # Initialize the variable to use to store output - - for segment in segments: - # '.' is the current directory, so ignore it, it is superfluous - if segment == ".": - continue - # Anything other than '..', should be appended to the output - elif segment != "..": - output.append(segment) - # In this case segment == '..', if we can, we should pop the last - # element - elif output: - output.pop() - - # If the path starts with '/' and the output is empty or the first string - # is non-empty - if s.startswith("/") and (not output or output[0]): - output.insert(0, "") - - # If the path starts with '/.' or '/..' ensure we add one more empty - # string to add a trailing '/' - if s.endswith(("/.", "/..")): - output.append("") - - return "/".join(output) - - -def encode_component(uri_component, encoding): - """Encode the specific component in the provided encoding.""" - if uri_component is None: - return uri_component - - # Try to see if the component we're encoding is already percent-encoded - # so we can skip all '%' characters but still encode all others. - percent_encodings = len( - PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding)) - ) - - uri_bytes = compat.to_bytes(uri_component, encoding) - is_percent_encoded = percent_encodings == uri_bytes.count(b"%") - - encoded_uri = bytearray() - - for i in range(0, len(uri_bytes)): - # Will return a single character bytestring on both Python 2 & 3 - byte = uri_bytes[i : i + 1] - byte_ord = ord(byte) - if (is_percent_encoded and byte == b"%") or ( - byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED - ): - encoded_uri.extend(byte) - continue - encoded_uri.extend("%{0:02x}".format(byte_ord).encode().upper()) - - return encoded_uri.decode(encoding) diff --git a/src/urllib3/packages/rfc3986/parseresult.py b/src/urllib3/packages/rfc3986/parseresult.py deleted file mode 100644 index 74d12c25..00000000 --- a/src/urllib3/packages/rfc3986/parseresult.py +++ /dev/null @@ -1,457 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2015 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module containing the urlparse compatibility logic.""" -from collections import namedtuple - -from . import compat -from . import exceptions -from . import misc -from . import normalizers -from . import uri - -__all__ = ("ParseResult", "ParseResultBytes") - -PARSED_COMPONENTS = ("scheme", "userinfo", "host", "port", "path", "query", "fragment") - - -class ParseResultMixin(object): - def _generate_authority(self, attributes): - # I swear I did not align the comparisons below. That's just how they - # happened to align based on pep8 and attribute lengths. - userinfo, host, port = (attributes[p] for p in ("userinfo", "host", "port")) - if self.userinfo != userinfo or self.host != host or self.port != port: - if port: - port = "{0}".format(port) - return normalizers.normalize_authority( - ( - compat.to_str(userinfo, self.encoding), - compat.to_str(host, self.encoding), - port, - ) - ) - return self.authority - - def geturl(self): - """Shim to match the standard library method.""" - return self.unsplit() - - @property - def hostname(self): - """Shim to match the standard library.""" - return self.host - - @property - def netloc(self): - """Shim to match the standard library.""" - return self.authority - - @property - def params(self): - """Shim to match the standard library.""" - return self.query - - -class ParseResult(namedtuple("ParseResult", PARSED_COMPONENTS), ParseResultMixin): - """Implementation of urlparse compatibility class. - - This uses the URIReference logic to handle compatibility with the - urlparse.ParseResult class. - """ - - slots = () - - def __new__( - cls, - scheme, - userinfo, - host, - port, - path, - query, - fragment, - uri_ref, - encoding="utf-8", - ): - """Create a new ParseResult.""" - parse_result = super(ParseResult, cls).__new__( - cls, - scheme or None, - userinfo or None, - host, - port or None, - path or None, - query, - fragment, - ) - parse_result.encoding = encoding - parse_result.reference = uri_ref - return parse_result - - @classmethod - def from_parts( - cls, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - encoding="utf-8", - ): - """Create a ParseResult instance from its parts.""" - authority = "" - if userinfo is not None: - authority += userinfo + "@" - if host is not None: - authority += host - if port is not None: - authority += ":{0}".format(port) - uri_ref = uri.URIReference( - scheme=scheme, - authority=authority, - path=path, - query=query, - fragment=fragment, - encoding=encoding, - ).normalize() - userinfo, host, port = authority_from(uri_ref, strict=True) - return cls( - scheme=uri_ref.scheme, - userinfo=userinfo, - host=host, - port=port, - path=uri_ref.path, - query=uri_ref.query, - fragment=uri_ref.fragment, - uri_ref=uri_ref, - encoding=encoding, - ) - - @classmethod - def from_string( - cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True - ): - """Parse a URI from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :param bool strict: Parse strictly according to :rfc:`3986` if True. - If False, parse similarly to the standard library's urlparse - function. - :returns: :class:`ParseResult` or subclass thereof - """ - reference = uri.URIReference.from_string(uri_string, encoding) - if not lazy_normalize: - reference = reference.normalize() - userinfo, host, port = authority_from(reference, strict) - - return cls( - scheme=reference.scheme, - userinfo=userinfo, - host=host, - port=port, - path=reference.path, - query=reference.query, - fragment=reference.fragment, - uri_ref=reference, - encoding=encoding, - ) - - @property - def authority(self): - """Return the normalized authority.""" - return self.reference.authority - - def copy_with( - self, - scheme=misc.UseExisting, - userinfo=misc.UseExisting, - host=misc.UseExisting, - port=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - ): - """Create a copy of this instance replacing with specified parts.""" - attributes = zip( - PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment) - ) - attrs_dict = {} - for name, value in attributes: - if value is misc.UseExisting: - value = getattr(self, name) - attrs_dict[name] = value - authority = self._generate_authority(attrs_dict) - ref = self.reference.copy_with( - scheme=attrs_dict["scheme"], - authority=authority, - path=attrs_dict["path"], - query=attrs_dict["query"], - fragment=attrs_dict["fragment"], - ) - return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict) - - def encode(self, encoding=None): - """Convert to an instance of ParseResultBytes.""" - encoding = encoding or self.encoding - attrs = dict( - zip( - PARSED_COMPONENTS, - ( - attr.encode(encoding) if hasattr(attr, "encode") else attr - for attr in self - ), - ) - ) - return ParseResultBytes(uri_ref=self.reference, encoding=encoding, **attrs) - - def unsplit(self, use_idna=False): - """Create a URI string from the components. - - :returns: The parsed URI reconstituted as a string. - :rtype: str - """ - parse_result = self - if use_idna and self.host: - hostbytes = self.host.encode("idna") - host = hostbytes.decode(self.encoding) - parse_result = self.copy_with(host=host) - return parse_result.reference.unsplit() - - -class ParseResultBytes( - namedtuple("ParseResultBytes", PARSED_COMPONENTS), ParseResultMixin -): - """Compatibility shim for the urlparse.ParseResultBytes object.""" - - def __new__( - cls, - scheme, - userinfo, - host, - port, - path, - query, - fragment, - uri_ref, - encoding="utf-8", - lazy_normalize=True, - ): - """Create a new ParseResultBytes instance.""" - parse_result = super(ParseResultBytes, cls).__new__( - cls, - scheme or None, - userinfo or None, - host, - port or None, - path or None, - query or None, - fragment or None, - ) - parse_result.encoding = encoding - parse_result.reference = uri_ref - parse_result.lazy_normalize = lazy_normalize - return parse_result - - @classmethod - def from_parts( - cls, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - encoding="utf-8", - lazy_normalize=True, - ): - """Create a ParseResult instance from its parts.""" - authority = "" - if userinfo is not None: - authority += userinfo + "@" - if host is not None: - authority += host - if port is not None: - authority += ":{0}".format(int(port)) - uri_ref = uri.URIReference( - scheme=scheme, - authority=authority, - path=path, - query=query, - fragment=fragment, - encoding=encoding, - ) - if not lazy_normalize: - uri_ref = uri_ref.normalize() - to_bytes = compat.to_bytes - userinfo, host, port = authority_from(uri_ref, strict=True) - return cls( - scheme=to_bytes(scheme, encoding), - userinfo=to_bytes(userinfo, encoding), - host=to_bytes(host, encoding), - port=port, - path=to_bytes(path, encoding), - query=to_bytes(query, encoding), - fragment=to_bytes(fragment, encoding), - uri_ref=uri_ref, - encoding=encoding, - lazy_normalize=lazy_normalize, - ) - - @classmethod - def from_string( - cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True - ): - """Parse a URI from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :param bool strict: Parse strictly according to :rfc:`3986` if True. - If False, parse similarly to the standard library's urlparse - function. - :returns: :class:`ParseResultBytes` or subclass thereof - """ - reference = uri.URIReference.from_string(uri_string, encoding) - if not lazy_normalize: - reference = reference.normalize() - userinfo, host, port = authority_from(reference, strict) - - to_bytes = compat.to_bytes - return cls( - scheme=to_bytes(reference.scheme, encoding), - userinfo=to_bytes(userinfo, encoding), - host=to_bytes(host, encoding), - port=port, - path=to_bytes(reference.path, encoding), - query=to_bytes(reference.query, encoding), - fragment=to_bytes(reference.fragment, encoding), - uri_ref=reference, - encoding=encoding, - lazy_normalize=lazy_normalize, - ) - - @property - def authority(self): - """Return the normalized authority.""" - return self.reference.authority.encode(self.encoding) - - def copy_with( - self, - scheme=misc.UseExisting, - userinfo=misc.UseExisting, - host=misc.UseExisting, - port=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - lazy_normalize=True, - ): - """Create a copy of this instance replacing with specified parts.""" - attributes = zip( - PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment) - ) - attrs_dict = {} - for name, value in attributes: - if value is misc.UseExisting: - value = getattr(self, name) - if not isinstance(value, bytes) and hasattr(value, "encode"): - value = value.encode(self.encoding) - attrs_dict[name] = value - authority = self._generate_authority(attrs_dict) - to_str = compat.to_str - ref = self.reference.copy_with( - scheme=to_str(attrs_dict["scheme"], self.encoding), - authority=to_str(authority, self.encoding), - path=to_str(attrs_dict["path"], self.encoding), - query=to_str(attrs_dict["query"], self.encoding), - fragment=to_str(attrs_dict["fragment"], self.encoding), - ) - if not lazy_normalize: - ref = ref.normalize() - return ParseResultBytes( - uri_ref=ref, - encoding=self.encoding, - lazy_normalize=lazy_normalize, - **attrs_dict - ) - - def unsplit(self, use_idna=False): - """Create a URI bytes object from the components. - - :returns: The parsed URI reconstituted as a string. - :rtype: bytes - """ - parse_result = self - if use_idna and self.host: - # self.host is bytes, to encode to idna, we need to decode it - # first - host = self.host.decode(self.encoding) - hostbytes = host.encode("idna") - parse_result = self.copy_with(host=hostbytes) - if self.lazy_normalize: - parse_result = parse_result.copy_with(lazy_normalize=False) - uri = parse_result.reference.unsplit() - return uri.encode(self.encoding) - - -def split_authority(authority): - # Initialize our expected return values - userinfo = host = port = None - # Initialize an extra var we may need to use - extra_host = None - # Set-up rest in case there is no userinfo portion - rest = authority - - if "@" in authority: - userinfo, rest = authority.rsplit("@", 1) - - # Handle IPv6 host addresses - if rest.startswith("["): - host, rest = rest.split("]", 1) - host += "]" - - if ":" in rest: - extra_host, port = rest.split(":", 1) - elif not host and rest: - host = rest - - if extra_host and not host: - host = extra_host - - return userinfo, host, port - - -def authority_from(reference, strict): - try: - subauthority = reference.authority_info() - except exceptions.InvalidAuthority: - if strict: - raise - userinfo, host, port = split_authority(reference.authority) - else: - # Thanks to Richard Barrell for this idea: - # https://twitter.com/0x2ba22e11/status/617338811975139328 - userinfo, host, port = ( - subauthority.get(p) for p in ("userinfo", "host", "port") - ) - - if port: - try: - port = int(port) - except ValueError: - raise exceptions.InvalidPort(port) - return userinfo, host, port diff --git a/src/urllib3/packages/rfc3986/uri.py b/src/urllib3/packages/rfc3986/uri.py deleted file mode 100644 index 61104ab1..00000000 --- a/src/urllib3/packages/rfc3986/uri.py +++ /dev/null @@ -1,152 +0,0 @@ -"""Module containing the implementation of the URIReference class.""" -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Copyright (c) 2015 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import namedtuple - -from . import compat -from . import misc -from . import normalizers -from ._mixin import URIMixin - - -class URIReference(namedtuple("URIReference", misc.URI_COMPONENTS), URIMixin): - """Immutable object representing a parsed URI Reference. - - .. note:: - - This class is not intended to be directly instantiated by the user. - - This object exposes attributes for the following components of a - URI: - - - scheme - - authority - - path - - query - - fragment - - .. attribute:: scheme - - The scheme that was parsed for the URI Reference. For example, - ``http``, ``https``, ``smtp``, ``imap``, etc. - - .. attribute:: authority - - Component of the URI that contains the user information, host, - and port sub-components. For example, - ``google.com``, ``127.0.0.1:5000``, ``username@[::1]``, - ``username:password@example.com:443``, etc. - - .. attribute:: path - - The path that was parsed for the given URI Reference. For example, - ``/``, ``/index.php``, etc. - - .. attribute:: query - - The query component for a given URI Reference. For example, ``a=b``, - ``a=b%20c``, ``a=b+c``, ``a=b,c=d,e=%20f``, etc. - - .. attribute:: fragment - - The fragment component of a URI. For example, ``section-3.1``. - - This class also provides extra attributes for easier access to information - like the subcomponents of the authority component. - - .. attribute:: userinfo - - The user information parsed from the authority. - - .. attribute:: host - - The hostname, IPv4, or IPv6 adddres parsed from the authority. - - .. attribute:: port - - The port parsed from the authority. - """ - - slots = () - - def __new__(cls, scheme, authority, path, query, fragment, encoding="utf-8"): - """Create a new URIReference.""" - ref = super(URIReference, cls).__new__( - cls, scheme or None, authority or None, path or None, query, fragment - ) - ref.encoding = encoding - return ref - - __hash__ = tuple.__hash__ - - def __eq__(self, other): - """Compare this reference to another.""" - other_ref = other - if isinstance(other, tuple): - other_ref = URIReference(*other) - elif not isinstance(other, URIReference): - try: - other_ref = URIReference.from_string(other) - except TypeError: - raise TypeError( - "Unable to compare URIReference() to {0}()".format( - type(other).__name__ - ) - ) - - # See http://tools.ietf.org/html/rfc3986#section-6.2 - naive_equality = tuple(self) == tuple(other_ref) - return naive_equality or self.normalized_equality(other_ref) - - def normalize(self): - """Normalize this reference as described in Section 6.2.2. - - This is not an in-place normalization. Instead this creates a new - URIReference. - - :returns: A new reference object with normalized components. - :rtype: URIReference - """ - # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in - # this method. - return URIReference( - normalizers.normalize_scheme(self.scheme or ""), - normalizers.normalize_authority((self.userinfo, self.host, self.port)), - normalizers.normalize_path(self.path or ""), - normalizers.normalize_query(self.query), - normalizers.normalize_fragment(self.fragment), - self.encoding, - ) - - @classmethod - def from_string(cls, uri_string, encoding="utf-8"): - """Parse a URI reference from the given unicode URI string. - - :param str uri_string: Unicode URI to be parsed into a reference. - :param str encoding: The encoding of the string provided - :returns: :class:`URIReference` or subclass thereof - """ - uri_string = compat.to_str(uri_string, encoding) - - split_uri = misc.URI_MATCHER.match(uri_string).groupdict() - return cls( - split_uri["scheme"], - split_uri["authority"], - normalizers.encode_component(split_uri["path"], encoding), - normalizers.encode_component(split_uri["query"], encoding), - normalizers.encode_component(split_uri["fragment"], encoding), - encoding, - ) diff --git a/src/urllib3/packages/rfc3986/validators.py b/src/urllib3/packages/rfc3986/validators.py deleted file mode 100644 index a60ae91b..00000000 --- a/src/urllib3/packages/rfc3986/validators.py +++ /dev/null @@ -1,435 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2017 Ian Stapleton Cordasco -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module containing the validation logic for rfc3986.""" -from . import exceptions -from . import misc -from . import normalizers - - -class Validator(object): - """Object used to configure validation of all objects in rfc3986. - - .. versionadded:: 1.0 - - Example usage:: - - >>> from rfc3986 import api, validators - >>> uri = api.uri_reference('https://github.com/') - >>> validator = validators.Validator().require_presence_of( - ... 'scheme', 'host', 'path', - ... ).allow_schemes( - ... 'http', 'https', - ... ).allow_hosts( - ... '127.0.0.1', 'github.com', - ... ) - >>> validator.validate(uri) - >>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com') - >>> validator.validate(invalid_uri) - Traceback (most recent call last): - ... - rfc3986.exceptions.MissingComponentError: ('path was required but - missing', URIReference(scheme=u'imap', authority=u'mail.google.com', - path=None, query=None, fragment=None), ['path']) - - """ - - COMPONENT_NAMES = frozenset( - ["scheme", "userinfo", "host", "port", "path", "query", "fragment"] - ) - - def __init__(self): - """Initialize our default validations.""" - self.allowed_schemes = set() - self.allowed_hosts = set() - self.allowed_ports = set() - self.allow_password = True - self.required_components = { - "scheme": False, - "userinfo": False, - "host": False, - "port": False, - "path": False, - "query": False, - "fragment": False, - } - self.validated_components = self.required_components.copy() - - def allow_schemes(self, *schemes): - """Require the scheme to be one of the provided schemes. - - .. versionadded:: 1.0 - - :param schemes: - Schemes, without ``://`` that are allowed. - :returns: - The validator instance. - :rtype: - Validator - """ - for scheme in schemes: - self.allowed_schemes.add(normalizers.normalize_scheme(scheme)) - return self - - def allow_hosts(self, *hosts): - """Require the host to be one of the provided hosts. - - .. versionadded:: 1.0 - - :param hosts: - Hosts that are allowed. - :returns: - The validator instance. - :rtype: - Validator - """ - for host in hosts: - self.allowed_hosts.add(normalizers.normalize_host(host)) - return self - - def allow_ports(self, *ports): - """Require the port to be one of the provided ports. - - .. versionadded:: 1.0 - - :param ports: - Ports that are allowed. - :returns: - The validator instance. - :rtype: - Validator - """ - for port in ports: - port_int = int(port, base=10) - if 0 <= port_int <= 65535: - self.allowed_ports.add(port) - return self - - def allow_use_of_password(self): - """Allow passwords to be present in the URI. - - .. versionadded:: 1.0 - - :returns: - The validator instance. - :rtype: - Validator - """ - self.allow_password = True - return self - - def forbid_use_of_password(self): - """Prevent passwords from being included in the URI. - - .. versionadded:: 1.0 - - :returns: - The validator instance. - :rtype: - Validator - """ - self.allow_password = False - return self - - def check_validity_of(self, *components): - """Check the validity of the components provided. - - This can be specified repeatedly. - - .. versionadded:: 1.1 - - :param components: - Names of components from :attr:`Validator.COMPONENT_NAMES`. - :returns: - The validator instance. - :rtype: - Validator - """ - components = [c.lower() for c in components] - for component in components: - if component not in self.COMPONENT_NAMES: - raise ValueError('"{}" is not a valid component'.format(component)) - self.validated_components.update({component: True for component in components}) - return self - - def require_presence_of(self, *components): - """Require the components provided. - - This can be specified repeatedly. - - .. versionadded:: 1.0 - - :param components: - Names of components from :attr:`Validator.COMPONENT_NAMES`. - :returns: - The validator instance. - :rtype: - Validator - """ - components = [c.lower() for c in components] - for component in components: - if component not in self.COMPONENT_NAMES: - raise ValueError('"{}" is not a valid component'.format(component)) - self.required_components.update({component: True for component in components}) - return self - - def validate(self, uri): - """Check a URI for conditions specified on this validator. - - .. versionadded:: 1.0 - - :param uri: - Parsed URI to validate. - :type uri: - rfc3986.uri.URIReference - :raises MissingComponentError: - When a required component is missing. - :raises UnpermittedComponentError: - When a component is not one of those allowed. - :raises PasswordForbidden: - When a password is present in the userinfo component but is - not permitted by configuration. - :raises InvalidComponentsError: - When a component was found to be invalid. - """ - if not self.allow_password: - check_password(uri) - - required_components = [ - component - for component, required in self.required_components.items() - if required - ] - validated_components = [ - component - for component, required in self.validated_components.items() - if required - ] - if required_components: - ensure_required_components_exist(uri, required_components) - if validated_components: - ensure_components_are_valid(uri, validated_components) - - ensure_one_of(self.allowed_schemes, uri, "scheme") - ensure_one_of(self.allowed_hosts, uri, "host") - ensure_one_of(self.allowed_ports, uri, "port") - - -def check_password(uri): - """Assert that there is no password present in the uri.""" - userinfo = uri.userinfo - if not userinfo: - return - credentials = userinfo.split(":", 1) - if len(credentials) <= 1: - return - raise exceptions.PasswordForbidden(uri) - - -def ensure_one_of(allowed_values, uri, attribute): - """Assert that the uri's attribute is one of the allowed values.""" - value = getattr(uri, attribute) - if value is not None and allowed_values and value not in allowed_values: - raise exceptions.UnpermittedComponentError(attribute, value, allowed_values) - - -def ensure_required_components_exist(uri, required_components): - """Assert that all required components are present in the URI.""" - missing_components = sorted( - [ - component - for component in required_components - if getattr(uri, component) is None - ] - ) - if missing_components: - raise exceptions.MissingComponentError(uri, *missing_components) - - -def is_valid(value, matcher, require): - """Determine if a value is valid based on the provided matcher. - - :param str value: - Value to validate. - :param matcher: - Compiled regular expression to use to validate the value. - :param require: - Whether or not the value is required. - """ - if require: - return value is not None and matcher.match(value) - - # require is False and value is not None - return value is None or matcher.match(value) - - -def authority_is_valid(authority, host=None, require=False): - """Determine if the authority string is valid. - - :param str authority: - The authority to validate. - :param str host: - (optional) The host portion of the authority to validate. - :param bool require: - (optional) Specify if authority must not be None. - :returns: - ``True`` if valid, ``False`` otherwise - :rtype: - bool - """ - validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require) - if validated and host is not None: - return host_is_valid(host, require) - return validated - - -def host_is_valid(host, require=False): - """Determine if the host string is valid. - - :param str host: - The host to validate. - :param bool require: - (optional) Specify if host must not be None. - :returns: - ``True`` if valid, ``False`` otherwise - :rtype: - bool - """ - validated = is_valid(host, misc.HOST_MATCHER, require) - if validated and host is not None and misc.IPv4_MATCHER.match(host): - return valid_ipv4_host_address(host) - elif validated and host is not None and misc.IPv6_MATCHER.match(host): - return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None - return validated - - -def scheme_is_valid(scheme, require=False): - """Determine if the scheme is valid. - - :param str scheme: - The scheme string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a scheme. - :returns: - ``True`` if the scheme is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(scheme, misc.SCHEME_MATCHER, require) - - -def path_is_valid(path, require=False): - """Determine if the path component is valid. - - :param str path: - The path string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a path. - :returns: - ``True`` if the path is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(path, misc.PATH_MATCHER, require) - - -def query_is_valid(query, require=False): - """Determine if the query component is valid. - - :param str query: - The query string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a query. - :returns: - ``True`` if the query is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(query, misc.QUERY_MATCHER, require) - - -def fragment_is_valid(fragment, require=False): - """Determine if the fragment component is valid. - - :param str fragment: - The fragment string to validate. - :param bool require: - (optional) Set to ``True`` to require the presence of a fragment. - :returns: - ``True`` if the fragment is valid. ``False`` otherwise. - :rtype: - bool - """ - return is_valid(fragment, misc.FRAGMENT_MATCHER, require) - - -def valid_ipv4_host_address(host): - """Determine if the given host is a valid IPv4 address.""" - # If the host exists, and it might be IPv4, check each byte in the - # address. - return all([0 <= int(byte, base=10) <= 255 for byte in host.split(".")]) - - -_COMPONENT_VALIDATORS = { - "scheme": scheme_is_valid, - "path": path_is_valid, - "query": query_is_valid, - "fragment": fragment_is_valid, -} - -_SUBAUTHORITY_VALIDATORS = set(["userinfo", "host", "port"]) - - -def subauthority_component_is_valid(uri, component): - """Determine if the userinfo, host, and port are valid.""" - try: - subauthority_dict = uri.authority_info() - except exceptions.InvalidAuthority: - return False - - # If we can parse the authority into sub-components and we're not - # validating the port, we can assume it's valid. - if component == "host": - return host_is_valid(subauthority_dict["host"]) - elif component != "port": - return True - - try: - port = int(subauthority_dict["port"]) - except TypeError: - # If the port wasn't provided it'll be None and int(None) raises a - # TypeError - return True - - return 0 <= port <= 65535 - - -def ensure_components_are_valid(uri, validated_components): - """Assert that all components are valid in the URI.""" - invalid_components = set([]) - for component in validated_components: - if component in _SUBAUTHORITY_VALIDATORS: - if not subauthority_component_is_valid(uri, component): - invalid_components.add(component) - # Python's peephole optimizer means that while this continue *is* - # actually executed, coverage.py cannot detect that. See also, - # https://bitbucket.org/ned/coveragepy/issues/198/continue-marked-as-not-covered - continue # nocov: Python 2.7, 3.3, 3.4 - - validator = _COMPONENT_VALIDATORS[component] - if not validator(getattr(uri, component)): - invalid_components.add(component) - - if invalid_components: - raise exceptions.InvalidComponentsError(uri, *invalid_components) diff --git a/src/urllib3/packages/six.py b/src/urllib3/packages/six.py index 26bbb434..31442409 100644 --- a/src/urllib3/packages/six.py +++ b/src/urllib3/packages/six.py @@ -1,6 +1,4 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -# Copyright (c) 2010-2015 Benjamin Peterson +# Copyright (c) 2010-2019 Benjamin Peterson # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,6 +18,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +"""Utilities for writing code that runs on Python 2 and 3""" + from __future__ import absolute_import import functools @@ -29,7 +29,7 @@ import types __author__ = "Benjamin Peterson " -__version__ = "1.10.0" +__version__ = "1.12.0" # Useful for very coarse version differentiation. @@ -242,6 +242,7 @@ class _MovedItems(_LazyModule): MovedAttribute("map", "itertools", "builtins", "imap", "map"), MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("getoutput", "commands", "subprocess"), MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), MovedAttribute( "reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload" @@ -267,12 +268,13 @@ class _MovedItems(_LazyModule): MovedModule("html_entities", "htmlentitydefs", "html.entities"), MovedModule("html_parser", "HTMLParser", "html.parser"), MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"), MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), MovedModule( "email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart" ), MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), - MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), @@ -339,10 +341,14 @@ class Module_six_moves_urllib_parse(_LazyModule): MovedAttribute("quote_plus", "urllib", "urllib.parse"), MovedAttribute("unquote", "urllib", "urllib.parse"), MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute( + "unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes" + ), MovedAttribute("urlencode", "urllib", "urllib.parse"), MovedAttribute("splitquery", "urllib", "urllib.parse"), MovedAttribute("splittag", "urllib", "urllib.parse"), MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("splitvalue", "urllib", "urllib.parse"), MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), MovedAttribute("uses_params", "urlparse", "urllib.parse"), @@ -424,6 +430,8 @@ class Module_six_moves_urllib_request(_LazyModule): MovedAttribute("URLopener", "urllib", "urllib.request"), MovedAttribute("FancyURLopener", "urllib", "urllib.request"), MovedAttribute("proxy_bypass", "urllib", "urllib.request"), + MovedAttribute("parse_http_list", "urllib2", "urllib.request"), + MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"), ] for attr in _urllib_request_moved_attributes: setattr(Module_six_moves_urllib_request, attr.name, attr) @@ -665,6 +673,7 @@ def u(s): StringIO = io.StringIO BytesIO = io.BytesIO + del io _assertCountEqual = "assertCountEqual" if sys.version_info[1] <= 1: _assertRaisesRegex = "assertRaisesRegexp" @@ -718,11 +727,15 @@ def assertRegex(self, *args, **kwargs): exec_ = getattr(moves.builtins, "exec") def reraise(tp, value, tb=None): - if value is None: - value = tp() - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value + try: + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + finally: + value = None + tb = None else: @@ -741,7 +754,10 @@ def exec_(_code_, _globs_=None, _locs_=None): exec_( """def reraise(tp, value, tb=None): - raise tp, value, tb + try: + raise tp, value, tb + finally: + tb = None """ ) @@ -749,15 +765,21 @@ def exec_(_code_, _globs_=None, _locs_=None): if sys.version_info[:2] == (3, 2): exec_( """def raise_from(value, from_value): - if from_value is None: - raise value - raise value from from_value + try: + if from_value is None: + raise value + raise value from from_value + finally: + value = None """ ) elif sys.version_info[:2] > (3, 2): exec_( """def raise_from(value, from_value): - raise value from from_value + try: + raise value from from_value + finally: + value = None """ ) else: @@ -864,10 +886,14 @@ def with_metaclass(meta, *bases): # This requires a bit of explanation: the basic idea is to make a dummy # metaclass for one level of class instantiation that replaces itself with # the actual metaclass. - class metaclass(meta): + class metaclass(type): def __new__(cls, name, this_bases, d): return meta(name, bases, d) + @classmethod + def __prepare__(cls, name, this_bases): + return meta.__prepare__(name, bases) + return type.__new__(metaclass, "temporary_class", (), {}) @@ -884,11 +910,71 @@ def wrapper(cls): orig_vars.pop(slots_var) orig_vars.pop("__dict__", None) orig_vars.pop("__weakref__", None) + if hasattr(cls, "__qualname__"): + orig_vars["__qualname__"] = cls.__qualname__ return metaclass(cls.__name__, cls.__bases__, orig_vars) return wrapper +def ensure_binary(s, encoding="utf-8", errors="strict"): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, text_type): + return s.encode(encoding, errors) + elif isinstance(s, binary_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + +def ensure_str(s, encoding="utf-8", errors="strict"): + """Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + if PY2 and isinstance(s, text_type): + s = s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + s = s.decode(encoding, errors) + return s + + +def ensure_text(s, encoding="utf-8", errors="strict"): + """Coerce *s* to six.text_type. + + For Python 2: + - `unicode` -> `unicode` + - `str` -> `unicode` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if isinstance(s, binary_type): + return s.decode(encoding, errors) + elif isinstance(s, text_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + def python_2_unicode_compatible(klass): """ A decorator that defines __unicode__ and __str__ methods under Python 2. diff --git a/src/urllib3/util/ssl_.py b/src/urllib3/util/ssl_.py index 2fd4ca52..90caa071 100644 --- a/src/urllib3/util/ssl_.py +++ b/src/urllib3/util/ssl_.py @@ -2,14 +2,13 @@ import errno import warnings import hmac -import re from binascii import hexlify, unhexlify from hashlib import md5, sha1, sha256 +from .url import IPV4_RE, BRACELESS_IPV6_ADDRZ_RE from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning from ..packages import six -from ..packages.rfc3986 import abnf_regexp SSLContext = None @@ -36,13 +35,6 @@ def _const_compare_digest_backport(a, b): _const_compare_digest = getattr(hmac, "compare_digest", _const_compare_digest_backport) -# Borrow rfc3986's regular expressions for IPv4 -# and IPv6 addresses for use in is_ipaddress() -_IP_ADDRESS_REGEX = re.compile( - r"^(?:%s|%s|%s)$" - % (abnf_regexp.IPv4_RE, abnf_regexp.IPv6_RE, abnf_regexp.IPv6_ADDRZ_RFC4007_RE) -) - try: # Test for SSL features import ssl from ssl import wrap_socket, CERT_REQUIRED @@ -389,7 +381,7 @@ def is_ipaddress(hostname): if six.PY3 and isinstance(hostname, bytes): # IDN A-label bytes are ASCII compatible. hostname = hostname.decode("ascii") - return _IP_ADDRESS_REGEX.match(hostname) is not None + return bool(IPV4_RE.match(hostname) or BRACELESS_IPV6_ADDRZ_RE.match(hostname)) def _is_key_file_encrypted(key_file): diff --git a/src/urllib3/util/url.py b/src/urllib3/util/url.py index f225cd8b..73c39319 100644 --- a/src/urllib3/util/url.py +++ b/src/urllib3/util/url.py @@ -3,10 +3,7 @@ from collections import namedtuple from ..exceptions import LocationParseError -from ..packages import six, rfc3986 -from ..packages.rfc3986.exceptions import RFC3986Exception, ValidationError -from ..packages.rfc3986.validators import Validator -from ..packages.rfc3986 import abnf_regexp, normalizers, compat, misc +from ..packages import six url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"] @@ -15,12 +12,68 @@ # urllib3 infers URLs without a scheme (None) to be http. NORMALIZABLE_SCHEMES = ("http", "https", None) -# Regex for detecting URLs with schemes. RFC 3986 Section 3.1 -SCHEME_REGEX = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+\-]*:|/)") +# Almost all of these patterns were derived from the +# 'rfc3986' module: https://github.com/python-hyper/rfc3986 +PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}") +SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)") +URI_RE = re.compile( + r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" + r"(?://([^/?#]*))?" + r"([^?#]*)" + r"(?:\?([^#]*))?" + r"(?:#(.*))?$", + re.UNICODE | re.DOTALL, +) + +IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}" +HEX_PAT = "[0-9A-Fa-f]{1,4}" +LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT) +_subs = {"hex": HEX_PAT, "ls32": LS32_PAT} +_variations = [ + # 6( h16 ":" ) ls32 + "(?:%(hex)s:){6}%(ls32)s", + # "::" 5( h16 ":" ) ls32 + "::(?:%(hex)s:){5}%(ls32)s", + # [ h16 ] "::" 4( h16 ":" ) ls32 + "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s", + # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s", + # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s", + # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s", + # [ *4( h16 ":" ) h16 ] "::" ls32 + "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s", + # [ *5( h16 ":" ) h16 ] "::" h16 + "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s", + # [ *6( h16 ":" ) h16 ] "::" + "(?:(?:%(hex)s:){0,6}%(hex)s)?::", +] + +UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-" +IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")" +ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" +IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]" +REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" + +IPV4_RE = re.compile("^" + IPV4_PAT + "$") +IPV6_RE = re.compile("^" + IPV6_PAT + "$") +IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$") +BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$") +ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$") + +SUBAUTHORITY_PAT = (u"^(?:(.*)@)?" u"(%s|%s|%s)" u"(?::([0-9]{0,5}))?$") % ( + REG_NAME_PAT, + IPV4_PAT, + IPV6_ADDRZ_PAT, +) +SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL) -PATH_CHARS = ( - abnf_regexp.UNRESERVED_CHARS_SET | abnf_regexp.SUB_DELIMITERS_SET | {":", "@", "/"} +ZONE_ID_CHARS = set( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789._!-" ) +USERINFO_CHARS = ZONE_ID_CHARS | set("$&'()*+,;=:") +PATH_CHARS = USERINFO_CHARS | {"@", "/"} QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"} @@ -154,20 +207,24 @@ def split_first(s, delims): def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): """Percent-encodes a URI component without reapplying - onto an already percent-encoded component. Based on - rfc3986.normalizers.encode_component() + onto an already percent-encoded component. """ if component is None: return component + component = six.ensure_text(component) + # Try to see if the component we're encoding is already percent-encoded # so we can skip all '%' characters but still encode all others. - percent_encodings = len( - normalizers.PERCENT_MATCHER.findall(compat.to_str(component, encoding)) - ) + percent_encodings = PERCENT_RE.findall(component) + + # Normalize existing percent-encoded bytes. + for enc in percent_encodings: + if not enc.isupper(): + component = component.replace(enc, enc.upper()) uri_bytes = component.encode("utf-8", "surrogatepass") - is_percent_encoded = percent_encodings == uri_bytes.count(b"%") + is_percent_encoded = len(percent_encodings) == uri_bytes.count(b"%") encoded_component = bytearray() @@ -180,17 +237,96 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"): ): encoded_component.extend(byte) continue - encoded_component.extend("%{0:02x}".format(byte_ord).encode().upper()) + encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper())) return encoded_component.decode(encoding) +def _remove_path_dot_segments(path): + # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code + segments = path.split("/") # Turn the path into a list of segments + output = [] # Initialize the variable to use to store output + + for segment in segments: + # '.' is the current directory, so ignore it, it is superfluous + if segment == ".": + continue + # Anything other than '..', should be appended to the output + elif segment != "..": + output.append(segment) + # In this case segment == '..', if we can, we should pop the last + # element + elif output: + output.pop() + + # If the path starts with '/' and the output is empty or the first string + # is non-empty + if path.startswith("/") and (not output or output[0]): + output.insert(0, "") + + # If the path starts with '/.' or '/..' ensure we add one more empty + # string to add a trailing '/' + if path.endswith(("/.", "/..")): + output.append("") + + return "/".join(output) + + +def _normalize_host(host, scheme): + if host: + if isinstance(host, six.binary_type): + host = six.ensure_str(host) + + if scheme in NORMALIZABLE_SCHEMES: + is_ipv6 = IPV6_ADDRZ_RE.match(host) + if is_ipv6: + match = ZONE_ID_RE.search(host) + if match: + start, end = match.span(1) + zone_id = host[start:end] + + if zone_id.startswith("%25") and zone_id != "%25": + zone_id = zone_id[3:] + else: + zone_id = zone_id[1:] + zone_id = "%" + _encode_invalid_chars(zone_id, ZONE_ID_CHARS) + return host[:start].lower() + zone_id + host[end:] + else: + return host.lower() + elif not IPV4_RE.match(host): + return six.ensure_str( + b".".join([_idna_encode(label) for label in host.split(".")]) + ) + return host + + +def _idna_encode(name): + if name and any([ord(x) > 128 for x in name]): + try: + import idna + except ImportError: + six.raise_from( + LocationParseError("Unable to parse URL without the 'idna' module"), + None, + ) + try: + return idna.encode(name.lower(), strict=True, std3_rules=True) + except idna.IDNAError: + six.raise_from( + LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None + ) + return name.lower().encode("ascii") + + def parse_url(url): """ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is performed to parse incomplete urls. Fields not provided will be None. This parser is RFC 3986 compliant. + The parser logic and helper functions are based heavily on + work done in the ``rfc3986`` module. + :param str url: URL to parse into a :class:`.Url` namedtuple. Partly backwards-compatible with :mod:`urlparse`. @@ -208,90 +344,72 @@ def parse_url(url): # Empty return Url() - is_string = not isinstance(url, six.binary_type) - - # RFC 3986 doesn't like URLs that have a host but don't start - # with a scheme and we support URLs like that so we need to - # detect that problem and add an empty scheme indication. - # We don't get hurt on path-only URLs here as it's stripped - # off and given an empty scheme anyways. - if not SCHEME_REGEX.search(url): + source_url = url + if not SCHEME_RE.search(url): url = "//" + url - def idna_encode(name): - if name and any([ord(x) > 128 for x in name]): - try: - import idna - except ImportError: - raise LocationParseError( - "Unable to parse URL without the 'idna' module" - ) - try: - return idna.encode(name.lower(), strict=True, std3_rules=True) - except idna.IDNAError: - raise LocationParseError(u"Name '%s' is not a valid IDNA label" % name) - return name - - try: - split_iri = misc.IRI_MATCHER.match(compat.to_str(url)).groupdict() - iri_ref = rfc3986.IRIReference( - split_iri["scheme"], - split_iri["authority"], - _encode_invalid_chars(split_iri["path"], PATH_CHARS), - _encode_invalid_chars(split_iri["query"], QUERY_CHARS), - _encode_invalid_chars(split_iri["fragment"], FRAGMENT_CHARS), - ) - has_authority = iri_ref.authority is not None - uri_ref = iri_ref.encode(idna_encoder=idna_encode) - except (ValueError, RFC3986Exception): - return six.raise_from(LocationParseError(url), None) - - # rfc3986 strips the authority if it's invalid - if has_authority and uri_ref.authority is None: - raise LocationParseError(url) - - # Only normalize schemes we understand to not break http+unix - # or other schemes that don't follow RFC 3986. - if uri_ref.scheme is None or uri_ref.scheme.lower() in NORMALIZABLE_SCHEMES: - uri_ref = uri_ref.normalize() - - # Validate all URIReference components and ensure that all - # components that were set before are still set after - # normalization has completed. - validator = Validator() try: - validator.check_validity_of(*validator.COMPONENT_NAMES).validate(uri_ref) - except ValidationError: - return six.raise_from(LocationParseError(url), None) + scheme, authority, path, query, fragment = URI_RE.match(url).groups() + normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES + + if scheme: + scheme = scheme.lower() + + if authority: + auth, host, port = SUBAUTHORITY_RE.match(authority).groups() + if auth and normalize_uri: + auth = _encode_invalid_chars(auth, USERINFO_CHARS) + if port == "": + port = None + else: + auth, host, port = None, None, None + + if port is not None: + port = int(port) + if not (0 <= port <= 65535): + raise LocationParseError(url) + + host = _normalize_host(host, scheme) + + if normalize_uri and path: + path = _remove_path_dot_segments(path) + path = _encode_invalid_chars(path, PATH_CHARS) + if normalize_uri and query: + query = _encode_invalid_chars(query, QUERY_CHARS) + if normalize_uri and fragment: + fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS) + + except (ValueError, AttributeError): + return six.raise_from(LocationParseError(source_url), None) # For the sake of backwards compatibility we put empty # string values for path if there are any defined values # beyond the path in the URL. # TODO: Remove this when we break backwards compatibility. - path = uri_ref.path if not path: - if uri_ref.query is not None or uri_ref.fragment is not None: + if query is not None or fragment is not None: path = "" else: path = None # Ensure that each part of the URL is a `str` for # backwards compatibility. - def to_input_type(x): - if x is None: - return None - elif not is_string and not isinstance(x, six.binary_type): - return x.encode("utf-8") - return x + if isinstance(url, six.text_type): + ensure_func = six.ensure_text + else: + ensure_func = six.ensure_str + + def ensure_type(x): + return x if x is None else ensure_func(x) return Url( - scheme=to_input_type(uri_ref.scheme), - auth=to_input_type(uri_ref.userinfo), - host=to_input_type(uri_ref.host), - port=int(uri_ref.port) if uri_ref.port is not None else None, - path=to_input_type(path), - query=to_input_type(uri_ref.query), - fragment=to_input_type(uri_ref.fragment), + scheme=ensure_type(scheme), + auth=ensure_type(auth), + host=ensure_type(host), + port=port, + path=ensure_type(path), + query=ensure_type(query), + fragment=ensure_type(fragment), ) diff --git a/test/test_util.py b/test/test_util.py index 37c09ab2..73fad715 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -154,10 +154,6 @@ def test_invalid_host(self, location): @pytest.mark.parametrize( "url", [ - "http://user\\@google.com", - "http://google\\.com", - "user\\@google.com", - "http://user@user@google.com/", # Invalid IDNA labels u"http://\uD7FF.com", u"http://❤️", @@ -180,7 +176,12 @@ def test_invalid_url(self, url): ), ("HTTPS://Example.Com/?Key=Value", "https://example.com/?Key=Value"), ("Https://Example.Com/#Fragment", "https://example.com/#Fragment"), - ("[::Ff%etH0%Ff]/%ab%Af", "[::ff%25etH0%Ff]/%AB%AF"), + ("[::1%25]", "[::1%25]"), + ("[::Ff%etH0%Ff]/%ab%Af", "[::ff%etH0%FF]/%AB%AF"), + ( + "http://user:pass@[AaAa::Ff%25etH0%Ff]/%ab%Af", + "http://user:pass@[aaaa::ff%etH0%FF]/%AB%AF", + ), # Invalid characters for the query/fragment getting encoded ( 'http://google.com/p[]?parameter[]="hello"#fragment#', @@ -199,6 +200,22 @@ def test_parse_url_normalization(self, url, expected_normalized_url): actual_normalized_url = parse_url(url).url assert actual_normalized_url == expected_normalized_url + @pytest.mark.parametrize("char", [chr(i) for i in range(0x00, 0x21)] + ["\x7F"]) + def test_control_characters_are_percent_encoded(self, char): + percent_char = "%" + (hex(ord(char))[2:].zfill(2).upper()) + url = parse_url( + "http://user{0}@example.com/path{0}?query{0}#fragment{0}".format(char) + ) + + assert url == Url( + "http", + auth="user" + percent_char, + host="example.com", + path="/path" + percent_char, + query="query" + percent_char, + fragment="fragment" + percent_char, + ) + parse_url_host_map = [ ("http://google.com/mail", Url("http", host="google.com", path="/mail")), ("http://google.com/mail/", Url("http", host="google.com", path="/mail/")), @@ -260,6 +277,15 @@ def test_parse_url_normalization(self, url, expected_normalized_url): u"http://Königsgäßchen.de/straße", Url("http", host="xn--knigsgchen-b4a3dun.de", path="/stra%C3%9Fe"), ), + # Percent-encode in userinfo + ( + u"http://user@email.com:password@example.com/", + Url("http", auth="user%40email.com:password", host="example.com", path="/"), + ), + ( + u'http://user":quoted@example.com/', + Url("http", auth="user%22:quoted", host="example.com", path="/"), + ), # Unicode Surrogates (u"http://google.com/\uD800", Url("http", host="google.com", path="%ED%A0%80")), ( From deb21bc858bb45a7052b2ebb0be16aab704b6291 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 26 Jul 2019 12:45:22 -0500 Subject: [PATCH 11/11] Change log level of CertificateError within match_hostname (#1657) --- src/urllib3/connection.py | 2 +- test/test_connection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/urllib3/connection.py b/src/urllib3/connection.py index a138bb2a..86b9198a 100644 --- a/src/urllib3/connection.py +++ b/src/urllib3/connection.py @@ -429,7 +429,7 @@ def _match_hostname(cert, asserted_hostname): try: match_hostname(cert, asserted_hostname) except CertificateError as e: - log.error( + log.warning( "Certificate did not match expected hostname: %s. " "Certificate: %s", asserted_hostname, cert, diff --git a/test/test_connection.py b/test/test_connection.py index 5969a089..77794898 100644 --- a/test/test_connection.py +++ b/test/test_connection.py @@ -32,7 +32,7 @@ def test_match_hostname_mismatch(self): cert = {"subjectAltName": [("DNS", "foo")]} asserted_hostname = "bar" try: - with mock.patch("urllib3.connection.log.error") as mock_log: + with mock.patch("urllib3.connection.log.warning") as mock_log: _match_hostname(cert, asserted_hostname) except CertificateError as e: assert "hostname 'bar' doesn't match 'foo'" in str(e)