From 366ebbb752306b18df94b1c6b9bed8e7c89f317f Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Mon, 7 Oct 2019 19:07:26 -0400 Subject: [PATCH] package_manager: Py3 fix: parse_package_metadata processes strings I attempted to update the version of package_manager to the latest, since I noticed the line numbers in error messages did not match. I got the following error, which I think this change should fix. The problem is that dpkg_parser calls this function with the output of gzip.open(path, 'rb'), so the input is binary (python3 str). To fix it, convert the data argument from binary to text if necessary. dpkg_parser command failed: Traceback (most recent call last): File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main "__main__", mod_spec) File "/usr/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "dpkg_parser/file/downloaded/__main__.py", line 196, in File "dpkg_parser/file/downloaded/__main__.py", line 77, in main File "dpkg_parser/file/downloaded/__main__.py", line 191, in download_package_list File "dpkg_parser/file/downloaded/distroless/package_manager/parse_metadata.py", line 34, in parse_package_metadata File "/usr/lib/python3.6/re.py", line 172, in match return _compile(pattern, flags).match(string) TypeError: cannot use a string pattern on a bytes-like object --- package_manager/parse_metadata.py | 9 +++++++-- package_manager/parse_metadata_test.py | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/package_manager/parse_metadata.py b/package_manager/parse_metadata.py index 738538b9c..41c3a52ae 100644 --- a/package_manager/parse_metadata.py +++ b/package_manager/parse_metadata.py @@ -13,7 +13,7 @@ # limitations under the License. import re -from six import itervalues +import six INDEX_KEY = "Package" FILENAME_KEY = "Filename" @@ -22,6 +22,11 @@ def parse_package_metadata(data, mirror_url, snapshot, package_prefix): """ Takes a debian package list, changes the relative urls to absolute urls, and saves the resulting metadata as a json file """ + + # this is called with the output of gzip.open, but should be strings + if isinstance(data, six.binary_type): + data = data.decode('utf-8') + raw_entries = [line.rstrip() for line in data.splitlines()] parsed_entries = {} current_key = None @@ -55,7 +60,7 @@ def parse_package_metadata(data, mirror_url, snapshot, package_prefix): # The Filename Key is a relative url pointing to the .deb package # Here, we're rewriting the metadata with the absolute urls, # which is a concatenation of the mirror + '/debian/' + relative_path - for pkg_data in itervalues(parsed_entries): + for pkg_data in six.itervalues(parsed_entries): if package_prefix: pkg_data[FILENAME_KEY] = package_prefix + pkg_data[FILENAME_KEY] else: diff --git a/package_manager/parse_metadata_test.py b/package_manager/parse_metadata_test.py index 4f83243fb..7d0a4594e 100644 --- a/package_manager/parse_metadata_test.py +++ b/package_manager/parse_metadata_test.py @@ -8,7 +8,8 @@ class TestParseMetadata(unittest.TestCase): def setUp(self): current_dir = os.path.dirname(__file__) filename = os.path.join(current_dir, 'testdata', 'Packages.txt') - with open(filename) as f: + # parse_package_metadata is called on the binary output of gzip.open + with open(filename, 'rb') as f: data = f.read() self.data = data self.mirror_url = "http://debian.org"