From 2c24ffbf04b1fab7f38a9c31497f20d9348641e9 Mon Sep 17 00:00:00 2001 From: Matt Rickard Date: Wed, 5 Jul 2017 15:46:29 -0700 Subject: [PATCH] Refactor dpkg rules * Allows multiple sources on the dpkg() rule * Automatically renames packages that are not valid bazel rule names. Prints out a warning with instructions on how to reference the resulting deb. * Allows multiple packages to be part of the same dpkg() rule * Bumps version of dpkg_parser.par to v0.2 Address feedback More feedback, pylint --- WORKSPACE | 94 +++++++++----------------- base/BUILD | 8 ++- cacerts/cacerts.bzl | 4 +- cc/BUILD | 5 +- java/BUILD | 5 +- package_manager/BUILD | 18 ++++- package_manager/dpkg.bzl | 30 +++++--- package_manager/dpkg_parser.py | 78 ++++++++++++++------- package_manager/package_manager.bzl | 6 +- package_manager/parse_metadata.py | 4 +- package_manager/parse_metadata_test.py | 5 +- package_manager/testdata/checksum.txt | 1 + package_manager/util.py | 15 ++++ package_manager/util_test.py | 10 +++ python2.7/BUILD | 9 +-- 15 files changed, 175 insertions(+), 117 deletions(-) create mode 100644 package_manager/testdata/checksum.txt create mode 100644 package_manager/util.py create mode 100644 package_manager/util_test.py diff --git a/WORKSPACE b/WORKSPACE index 766d39461..97e3a22a3 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -14,7 +14,7 @@ load( "//package_manager:package_manager.bzl", "package_manager_repositories", "dpkg_src", - "dpkg", + "dpkg_list", ) package_manager_repositories() @@ -23,73 +23,43 @@ dpkg_src( name = "debian_jessie", arch = "amd64", distro = "jessie", - url = "http://deb.debian.org", + sha256 = "8ff5e7a54d4e75bbbcd2f43ebc7cb4a082fbc5493bc9fb2dcdaaeacba6e76dee", + snapshot = "20170701T034145Z", + url = "http://snapshot.debian.org/archive", ) dpkg_src( name = "debian_jessie_backports", arch = "amd64", distro = "jessie-backports", - url = "http://deb.debian.org", -) - -# For the glibc base image. -dpkg( - name = "libc6", - source = "@debian_jessie//file:Packages.json", -) - -dpkg( - name = "ca-certificates", - source = "@debian_jessie//file:Packages.json", -) - -dpkg( - name = "openssl", - source = "@debian_jessie//file:Packages.json", -) - -dpkg( - name = "libssl1.0.0", - source = "@debian_jessie//file:Packages.json", -) - -# For Java -dpkg( - name = "zlib1g", - source = "@debian_jessie//file:Packages.json", -) - -dpkg( - name = "openjdk-8-jre-headless", - source = "@debian_jessie_backports//file:Packages.json", -) - -dpkg( - name = "libgcc1", - source = "@debian_jessie//file:Packages.json", -) - -http_file( - name = "libstdcpp6", - sha256 = "f1509bbabd78e89c861de16931aec5988e1215649688fd4f8dfe1af875a7fbef", - url = "http://deb.debian.org/debian/pool/main/g/gcc-4.9/libstdc++6_4.9.2-10_amd64.deb", -) - -# For Python -dpkg( - name = "libpython2.7-minimal", - source = "@debian_jessie//file:Packages.json", -) - -dpkg( - name = "python2.7-minimal", - source = "@debian_jessie//file:Packages.json", -) - -dpkg( - name = "libpython2.7-stdlib", - source = "@debian_jessie//file:Packages.json", + sha256 = "2a493443581bdb4be071359f7fb62122741f233d3596545d88239a4e4ec445e8", + snapshot = "20170701T034145Z", + url = "http://snapshot.debian.org/archive", +) + +dpkg_list( + name = "package_bundle", + packages = [ + "libc6", + "ca-certificates", + "openssl", + "libssl1.0.0", + + #java + "zlib1g", + "libgcc1", + "libstdc++6", + "openjdk-8-jre-headless", + + #python + "libpython2.7-minimal", + "python2.7-minimal", + "libpython2.7-stdlib", + ], + sources = [ + "@debian_jessie//file:Packages.json", + "@debian_jessie_backports//file:Packages.json", + ], ) # For Jetty diff --git a/base/BUILD b/base/BUILD index 65d489f40..507bb0e54 100644 --- a/base/BUILD +++ b/base/BUILD @@ -28,13 +28,15 @@ cacerts( name = "cacerts", ) +load("@package_bundle//file:packages.bzl", "packages") + docker_build( name = "base", base = ":with_tmp", debs = [ - "@libc6//file:pkg.deb", - "@libssl1.0.0//file:pkg.deb", - "@openssl//file:pkg.deb", + packages["libc6"], + packages["libssl1.0.0"], + packages["openssl"], ], tars = [ ":base_passwd.passwd.tar", diff --git a/cacerts/cacerts.bzl b/cacerts/cacerts.bzl index b2a97d167..6e5e89ad1 100644 --- a/cacerts/cacerts.bzl +++ b/cacerts/cacerts.bzl @@ -6,10 +6,12 @@ def _impl(ctx): inputs = [ctx.executable._extract, ctx.file.deb], outputs = [ctx.outputs.out]) +load("@package_bundle//file:packages.bzl", "packages") + cacerts = rule( attrs = { "deb": attr.label( - default = Label("@ca-certificates//file:pkg.deb"), + default = Label(packages["ca-certificates"]), allow_files = [".deb"], single_file = True, ), diff --git a/cc/BUILD b/cc/BUILD index 337b54d19..53cc811a9 100644 --- a/cc/BUILD +++ b/cc/BUILD @@ -1,13 +1,14 @@ package(default_visibility = ["//visibility:public"]) load("@io_bazel_rules_docker//docker:docker.bzl", "docker_build") +load("@package_bundle//file:packages.bzl", "packages") # An intermediate image for Java and other "mostly statically" compiled languages docker_build( name = "cc", base = "//base:base", debs = [ - "@libgcc1//file:pkg.deb", - "@libstdcpp6//file", + packages["libgcc1"], + packages["libstdc++6"], ], ) diff --git a/java/BUILD b/java/BUILD index ad5ef32cb..f58629f1c 100644 --- a/java/BUILD +++ b/java/BUILD @@ -1,13 +1,14 @@ package(default_visibility = ["//visibility:public"]) load("@io_bazel_rules_docker//docker:docker.bzl", "docker_build") +load("@package_bundle//file:packages.bzl", "packages") docker_build( name = "java8", base = "//cc:cc", debs = [ - "@zlib1g//file:pkg.deb", - "@openjdk-8-jre-headless//file:pkg.deb", + packages["zlib1g"], + packages["openjdk-8-jre-headless"], ], entrypoint = [ "/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java", diff --git a/package_manager/BUILD b/package_manager/BUILD index 603217e5a..433493a01 100644 --- a/package_manager/BUILD +++ b/package_manager/BUILD @@ -5,7 +5,10 @@ par_binary( srcs = glob(["**/*.py"]), main = "dpkg_parser.py", visibility = ["//visibility:public"], - deps = [":parse_metadata"], + deps = [ + ":parse_metadata", + ":util", + ], ) py_library( @@ -13,6 +16,11 @@ py_library( srcs = ["parse_metadata.py"], ) +py_library( + name = "util", + srcs = ["util.py"], +) + py_test( name = "parse_metadata_test", size = "small", @@ -20,3 +28,11 @@ py_test( data = ["testdata/Packages.txt"], deps = [":parse_metadata"], ) + +py_test( + name = "util_test", + size = "small", + srcs = ["util_test.py"], + data = ["testdata/checksum.txt"], + deps = [":util"], +) diff --git a/package_manager/dpkg.bzl b/package_manager/dpkg.bzl index 37d88f6dd..ea4b1eec7 100644 --- a/package_manager/dpkg.bzl +++ b/package_manager/dpkg.bzl @@ -1,25 +1,28 @@ -def _dpkg_impl(repository_ctx): +def _dpkg_list_impl(repository_ctx): repository_ctx.file("file/BUILD", """ package(default_visibility = ["//visibility:public"]) -exports_files(["pkg.deb"]) +deb_files = glob(["*.deb"]) +exports_files(deb_files + ["packages.bzl"]) """) args = [ repository_ctx.path(repository_ctx.attr._dpkg_parser), - "--packages-file", repository_ctx.path(repository_ctx.attr.source), - "--package-name", repository_ctx.name + "--package-files", ",".join([repository_ctx.path(src_path) for src_path in repository_ctx.attr.sources]), + "--packages", ",".join(repository_ctx.attr.packages), + "--workspace-name", repository_ctx.name, ] result = repository_ctx.execute(args) if result.return_code: fail("dpkg_parser command failed: %s (%s)" % (result.stderr, " ".join(args))) -_dpkg = repository_rule( - _dpkg_impl, +_dpkg_list = repository_rule( + _dpkg_list_impl, attrs = { - "source": attr.label( - allow_single_file = True, + "sources": attr.label_list( + allow_files = True, ), + "packages": attr.string_list(), "_dpkg_parser": attr.label( executable = True, default = Label("@dpkg_parser//file:dpkg_parser.par"), @@ -38,8 +41,11 @@ exports_files(["Packages.json"]) "--download-and-extract-only=True", "--mirror-url=" + repository_ctx.attr.url, "--arch=" + repository_ctx.attr.arch, - "--distro=" + repository_ctx.attr.distro + "--distro=" + repository_ctx.attr.distro, + "--snapshot=" + repository_ctx.attr.snapshot, + "--sha256=" + repository_ctx.attr.sha256, ] + result = repository_ctx.execute(args) if result.return_code: fail("dpkg_parser command failed: %s (%s)" % (result.stderr, " ".join(args))) @@ -50,6 +56,8 @@ _dpkg_src = repository_rule( "url": attr.string(), "arch": attr.string(), "distro": attr.string(), + "snapshot": attr.string(), + "sha256": attr.string(), "_dpkg_parser": attr.label( executable = True, default = Label("@dpkg_parser//file:dpkg_parser.par"), @@ -58,8 +66,8 @@ _dpkg_src = repository_rule( }, ) -def dpkg(**kwargs): - _dpkg(**kwargs) +def dpkg_list(**kwargs): + _dpkg_list(**kwargs) def dpkg_src(**kwargs): _dpkg_src(**kwargs) diff --git a/package_manager/dpkg_parser.py b/package_manager/dpkg_parser.py index f244a51ff..a730a445d 100644 --- a/package_manager/dpkg_parser.py +++ b/package_manager/dpkg_parser.py @@ -14,25 +14,32 @@ import argparse import gzip -import io import urllib2 import json +import os from package_manager.parse_metadata import parse_package_metadata +from package_manager import util + +OUT_FOLDER = "file" +PACKAGES_FILE_NAME = os.path.join(OUT_FOLDER,"Packages.json") +PACKAGE_MAP_FILE_NAME = os.path.join(OUT_FOLDER,"packages.bzl") +DEB_FILE_NAME = os.path.join(OUT_FOLDER,"pkg.deb") -PACKAGES_FILE_NAME = "file/Packages.json" -DEB_FILE_NAME = "file/pkg.deb" FILENAME_KEY = "Filename" +SHA256_KEY = "SHA256" parser = argparse.ArgumentParser( description="Downloads a deb package from a package source file" ) -parser.add_argument("--packages-file", action='store', - help='The file path of the Packages.gz file') -parser.add_argument("--package-name", action='store', - help='The name of the package to search for and download') +parser.add_argument("--package-files", action='store', + help='A list of Packages.gz files to use') +parser.add_argument("--packages", action='store', + help='A comma delimited list of packages to search for and download') +parser.add_argument("--workspace-name", action='store', + help='The name of the current bazel workspace') parser.add_argument("--download-and-extract-only", action='store', help='If True, download Packages.gz and make urls absolute from mirror url') @@ -42,31 +49,49 @@ help='The target architecture for the package list') parser.add_argument("--distro", action='store', help='The target distribution for the package list') +parser.add_argument("--snapshot", action='store', + help='The snapshot date to download') +parser.add_argument("--sha256", action='store', + help='The sha256 checksum to validate for the Packages.gz file') def main(): """ A tool for downloading debian packages and package metadata """ args = parser.parse_args() if args.download_and_extract_only: - download_package_list(args.mirror_url, args.distro, args.arch) + download_package_list(args.mirror_url, args.distro, args.arch, args.snapshot, args.sha256) else: - download_dpkg(args.packages_file, args.package_name) + download_dpkg(args.package_files, args.packages, args.workspace_name) -def download_dpkg(packages_file, package_name): +def download_dpkg(package_files, packages, workspace_name): """ Using an unzipped, json package file with full urls, downloads a .deb package Uses the 'Filename' key to download the .deb package """ - with open(packages_file, 'rb') as f: - metadata = json.load(f) - pkg = metadata[package_name] - buf = urllib2.urlopen(pkg[FILENAME_KEY]) - with open(DEB_FILE_NAME, 'w') as f: - f.write(buf.read()) - - -def download_package_list(mirror_url, distro, arch): + package_to_rule_map = {} + for pkg_name in packages.split(","): + for package_file in package_files.split(","): + with open(package_file, 'rb') as f: + metadata = json.load(f) + if pkg_name in metadata: + pkg = metadata[pkg_name] + buf = urllib2.urlopen(pkg[FILENAME_KEY]) + package_to_rule_map[pkg_name] = util.package_to_rule(workspace_name, pkg_name) + out_file = os.path.join("file", util.encode_package_name(pkg_name)) + with open(out_file, 'w') as f: + f.write(buf.read()) + expected_checksum = util.sha256_checksum(out_file) + actual_checksum = pkg[SHA256_KEY] + if actual_checksum != expected_checksum: + raise Exception("Wrong checksum for package %s. Expected: %s, Actual: %s", pkg_name, expected_checksum, actual_checksum) + break + else: + raise Exception("Package %s not found in any of the sources" % pkg_name) + with open(PACKAGE_MAP_FILE_NAME, 'w') as f: + f.write("packages = " + json.dumps(package_to_rule_map)) + +def download_package_list(mirror_url, distro, arch, snapshot, sha256): """Downloads a debian package list, expands the relative urls, and saves the metadata as a json file @@ -96,17 +121,24 @@ def download_package_list(mirror_url, distro, arch): SHA256: 52ec3ac93cf8ba038fbcefe1e78f26ca1d59356cdc95e60f987c3f52b3f5e7ef """ - url = "%s/debian/dists/%s/main/binary-%s/Packages.gz" % ( + url = "%s/debian/%s/dists/%s/main/binary-%s/Packages.gz" % ( mirror_url, + snapshot, distro, arch ) buf = urllib2.urlopen(url) - f = gzip.GzipFile(fileobj=io.BytesIO(buf.read())) - data = f.read() - metadata = parse_package_metadata(data, mirror_url) + with open("Packages.gz", 'w') as f: + f.write(buf.read()) + actual_sha256 = util.sha256_checksum("Packages.gz") + if sha256 != actual_sha256: + raise Exception("sha256 of Packages.gz don't match: Expected: %s, Actual:%s" %(sha256, actual_sha256)) + with gzip.open("Packages.gz", 'rb') as f: + data = f.read() + metadata = parse_package_metadata(data, mirror_url, snapshot) with open(PACKAGES_FILE_NAME, 'w') as f: json.dump(metadata, f) if __name__ == "__main__": main() + diff --git a/package_manager/package_manager.bzl b/package_manager/package_manager.bzl index 7024b8618..1c8105f0b 100644 --- a/package_manager/package_manager.bzl +++ b/package_manager/package_manager.bzl @@ -1,9 +1,9 @@ -load(":dpkg.bzl", "dpkg", "dpkg_src") +load(":dpkg.bzl", "dpkg_list", "dpkg_src") def package_manager_repositories(): native.http_file( name = "dpkg_parser", - url = ('https://storage.googleapis.com/distroless/package_manager_tools/0.1/dpkg_parser.par'), + url = ('https://storage.googleapis.com/distroless/package_manager_tools/v0.3/dpkg_parser.par'), executable = True, - sha256 = "5fc18fbd571996010409162fe0124cd308b85a9610f1ceb4f8b3048f312b9cd0", + sha256 = "41683aa3e3202e3ca2a5d13e84e03853414f7eaa1a87f9313d55ec4b35f8c31c", ) diff --git a/package_manager/parse_metadata.py b/package_manager/parse_metadata.py index 9e9b1a7c8..aeff11575 100644 --- a/package_manager/parse_metadata.py +++ b/package_manager/parse_metadata.py @@ -17,7 +17,7 @@ FILENAME_KEY = "Filename" SEPARATOR = ":" -def parse_package_metadata(data, mirror_url): +def parse_package_metadata(data, mirror_url, snapshot): """ Takes a debian package list, changes the relative urls to absolute urls, and saves the resulting metadata as a json file """ raw_entries = [line.rstrip() for line in data.splitlines()] @@ -54,5 +54,5 @@ def parse_package_metadata(data, mirror_url): # Here, we're rewriting the metadata with the absolute urls, # which is a concatenation of the mirror + '/debian/' + relative_path for pkg_data in parsed_entries.itervalues(): - pkg_data[FILENAME_KEY] = mirror_url + "/debian/" + pkg_data[FILENAME_KEY] + pkg_data[FILENAME_KEY] = mirror_url + "/debian/" + snapshot + "/" + pkg_data[FILENAME_KEY] return parsed_entries diff --git a/package_manager/parse_metadata_test.py b/package_manager/parse_metadata_test.py index 755bd0cbe..3cc64e731 100644 --- a/package_manager/parse_metadata_test.py +++ b/package_manager/parse_metadata_test.py @@ -5,7 +5,6 @@ class TestParseMetadata(unittest.TestCase): - def setUp(self): current_dir = os.path.dirname(__file__) filename = os.path.join(current_dir, 'testdata', 'Packages.txt') @@ -13,13 +12,13 @@ def setUp(self): data = f.read() self.data = data self.mirror_url = "http://debian.org" - self.metadata = parse_package_metadata(self.data, self.mirror_url) + self.metadata = parse_package_metadata(self.data, self.mirror_url, "20170701") def test_url_rewrite(self): """ Relative url should have gotten rewritten with absolute url """ self.assertEqual( self.metadata["libnewlib-dev"]["Filename"], - self.mirror_url + "/debian/" + "pool/main/n/newlib/libnewlib-dev_2.1.0+git20140818.1a8323b-2_all.deb") + self.mirror_url + "/debian/20170701/" + "pool/main/n/newlib/libnewlib-dev_2.1.0+git20140818.1a8323b-2_all.deb") def test_get_all_packages(self): """ Parser should identify all packages """ diff --git a/package_manager/testdata/checksum.txt b/package_manager/testdata/checksum.txt new file mode 100644 index 000000000..a526ebc92 --- /dev/null +++ b/package_manager/testdata/checksum.txt @@ -0,0 +1 @@ +file for testings sha256 diff --git a/package_manager/util.py b/package_manager/util.py new file mode 100644 index 000000000..cbdf035f3 --- /dev/null +++ b/package_manager/util.py @@ -0,0 +1,15 @@ +import hashlib +import base64 + +def sha256_checksum(filename, block_size=65536): + sha256 = hashlib.sha256() + with open(filename, 'rb') as f: + for block in iter(lambda: f.read(block_size), b''): + sha256.update(block) + return sha256.hexdigest() + +def package_to_rule(workspace_name, s): + return "@" + workspace_name + "//file:" + encode_package_name(s) + +def encode_package_name(s): + return base64.urlsafe_b64encode(s) + ".deb" diff --git a/package_manager/util_test.py b/package_manager/util_test.py new file mode 100644 index 000000000..1c91bcd72 --- /dev/null +++ b/package_manager/util_test.py @@ -0,0 +1,10 @@ +import unittest +from package_manager import util + +CHECKSUM_TXT = "1915adb697103d42655711e7b00a7dbe398a33d7719d6370c01001273010d069" + +class TestUtil(unittest.TestCase): + + def test_sha256(self): + actual = util.sha256_checksum("checksum_txt") + self.assertEqual(CHECKSUM_TXT, actual) diff --git a/python2.7/BUILD b/python2.7/BUILD index c73659490..17ee9b43b 100644 --- a/python2.7/BUILD +++ b/python2.7/BUILD @@ -2,15 +2,16 @@ package(default_visibility = ["//visibility:public"]) load("@io_bazel_rules_docker//docker:docker.bzl", "docker_build") load("@runtimes_common//structure_tests:tests.bzl", "structure_test") +load("@package_bundle//file:packages.bzl", "packages") docker_build( name = "python27", base = "//base:base", debs = [ - "@zlib1g//file:pkg.deb", - "@python2.7-minimal//file:pkg.deb", - "@libpython2.7-minimal//file:pkg.deb", - "@libpython2.7-stdlib//file:pkg.deb", + packages["zlib1g"], + packages["python2.7-minimal"], + packages["libpython2.7-minimal"], + packages["libpython2.7-stdlib"], ], entrypoint = [ "/usr/bin/python2.7",