Skip to content

Commit

Permalink
package_manager: Py3 fix: parse_package_metadata processes strings
Browse files Browse the repository at this point in the history
I attempted to update the version of package_manager to the latest,
since I noticed the line numbers in error messages did not match. I
got the following error, which I think this change should fix.

The problem is that dpkg_parser calls this function with the output
of gzip.open(path, 'rb'), so the input is binary (python3 str). To
fix it, convert the data argument from binary to text if necessary.

dpkg_parser command failed: Traceback (most recent call last):
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "dpkg_parser/file/downloaded/__main__.py", line 196, in <module>
File "dpkg_parser/file/downloaded/__main__.py", line 77, in main
File "dpkg_parser/file/downloaded/__main__.py", line 191, in download_package_list
File "dpkg_parser/file/downloaded/distroless/package_manager/parse_metadata.py",
    line 34, in parse_package_metadata
File "/usr/lib/python3.6/re.py", line 172, in match
return _compile(pattern, flags).match(string)
TypeError: cannot use a string pattern on a bytes-like object
  • Loading branch information
Evan Jones committed Oct 7, 2019
1 parent f905a66 commit 366ebbb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
9 changes: 7 additions & 2 deletions package_manager/parse_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
import re

from six import itervalues
import six

INDEX_KEY = "Package"
FILENAME_KEY = "Filename"
Expand All @@ -22,6 +22,11 @@
def parse_package_metadata(data, mirror_url, snapshot, package_prefix):
""" Takes a debian package list, changes the relative urls to absolute urls,
and saves the resulting metadata as a json file """

# this is called with the output of gzip.open, but should be strings
if isinstance(data, six.binary_type):
data = data.decode('utf-8')

raw_entries = [line.rstrip() for line in data.splitlines()]
parsed_entries = {}
current_key = None
Expand Down Expand Up @@ -55,7 +60,7 @@ def parse_package_metadata(data, mirror_url, snapshot, package_prefix):
# The Filename Key is a relative url pointing to the .deb package
# Here, we're rewriting the metadata with the absolute urls,
# which is a concatenation of the mirror + '/debian/' + relative_path
for pkg_data in itervalues(parsed_entries):
for pkg_data in six.itervalues(parsed_entries):
if package_prefix:
pkg_data[FILENAME_KEY] = package_prefix + pkg_data[FILENAME_KEY]
else:
Expand Down
3 changes: 2 additions & 1 deletion package_manager/parse_metadata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ class TestParseMetadata(unittest.TestCase):
def setUp(self):
current_dir = os.path.dirname(__file__)
filename = os.path.join(current_dir, 'testdata', 'Packages.txt')
with open(filename) as f:
# parse_package_metadata is called on the binary output of gzip.open
with open(filename, 'rb') as f:
data = f.read()
self.data = data
self.mirror_url = "http://debian.org"
Expand Down

0 comments on commit 366ebbb

Please sign in to comment.