Skip to content

Commit

Permalink
Add support for Packages.xz
Browse files Browse the repository at this point in the history
- Also update workspace to use Packages.xz instead of Packages.gz
- Users may chose to still use Packages.gz if they want if expliclty specify packages_url
- this is a breaking change to dpkg_src rule
  - rename input: packages_gz_url -> packages_url
  - when using snapshots will only look for Packages.xz, the sha256 must reference
    a Packages.xz, it will not use the Packages.gz.

Signed-off-by: Appu Goundan <[email protected]>
  • Loading branch information
loosebazooka committed Aug 6, 2021
1 parent 70f4a32 commit 91c6625
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 79 deletions.
2 changes: 1 addition & 1 deletion WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ load(
dpkg_src(
name = arch + "_" + name + "_security",
package_prefix = "https://snapshot.debian.org/archive/debian-security/{}/".format(DEBIAN_SECURITY_SNAPSHOT),
packages_gz_url = "https://snapshot.debian.org/archive/debian-security/{}/dists/{}/updates/main/binary-{}/Packages.gz".format(DEBIAN_SECURITY_SNAPSHOT, distro, arch),
packages_url = "https://snapshot.debian.org/archive/debian-security/{}/dists/{}/updates/main/binary-{}/Packages.xz".format(DEBIAN_SECURITY_SNAPSHOT, distro, arch),
sha256 = SHA256s[arch][name]["security"],
)
for arch in ARCHITECTURES
Expand Down
34 changes: 17 additions & 17 deletions checksums.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -17,44 +17,44 @@ VERSIONS = [
("debian10", "buster"),
]

DEBIAN_SNAPSHOT = "20210730T091322Z"
DEBIAN_SNAPSHOT = "20210806T092041Z"

DEBIAN_SECURITY_SNAPSHOT = "20210729T232246Z"
DEBIAN_SECURITY_SNAPSHOT = "20210805T221508Z"

SHA256s = {
"amd64": {
"debian10": {
"main": "cac8ec1569eb723def822f5b44cd6b0c159062af7f3d138d662ee734298f88aa",
"updates": "b3759d1c539914be5553a925c7f10a8726819ff9637400f6b334d8ae3a1117da",
"security": "93797038e310073971f9a794f3b823d31402b1076507f3aa2d3c6b16e84e6586",
"main": "3530cbc6c78b6cadda80c10d949f511abd4a7f33d3492ed17d36a7ecc591a5fd",
"updates": "d9c9c7624856a0b66caabdc7596d7e1dd98c3795652728f72c153417fa1aa441",
"security": "eace8da20f2bcc9c5540b04d09f21e9434c0cf2784c6a67b7f4c8d3fa55b34ba",
},
},
"arm": {
"debian10": {
"main": "1a971c6a8dd807e476c26705ebb56eeb4be4f626d67654085cc27b5241e6d7f7",
"updates": "0dbc70e6bcb7e6f8753e0f5dc2df00c79b584b26870b405127e02dcde18b723b",
"security": "e64a89286b5cc5330e6507002b93248d6feb9620a70ed7046964ab71a60fd0a1",
"main": "7f51ba4df1837b4f5b299aa46e533fd006e2dc1b07727e7b32fe87bea9a8be5d",
"updates": "aee652955e83b5aa1cd4a4b8c48c68f8fce8d22cc7ab07ea51c58e022a10c8a7",
"security": "403a7eaa02e79cb2f0bcedbdcc02013efdccf28eda15c671e4ed92c61b8d0bac",
},
},
"arm64": {
"debian10": {
"main": "764700430a2e940aed47f91c19ea767b47945b082faec0610952a2f400bc7a3b",
"updates": "677f4154256ef1edd3091eb24bba7c8fc3f7c564effad705f68eb1d4cd17bd82",
"security": "38f4ce26137da63aecfd483f4979b0259f4667bb62d42b6ad9e479243e31c56c",
"main": "cf63979c9d3f7a411f30f62632626552436a583531a5bdcfc051d63fda8af8a3",
"updates": "c82c25bcec6b5f2d375e30b8afb8ccf98ef3a2a20dcce1b8e6fa80562bc8195a",
"security": "7bd46c2456e14cea0cd1c769539a7ecd39d7df0fb82dd48791825a5e93bb8c29",
},
},
"s390x": {
"debian10": {
"main": "faaa3f0282c888ac0ceaa4c28a0d39355df99477fadbbc3f78d2c11808082721",
"updates": "45a5e37dc68bfd2d248bf99267242a293fd13b9f56e857d2bbd560a8378ee428",
"security": "7fe06895b1e0ce2ab4422c13a79f1fc4483f0eac8952bd1325e0fbd0e8293698",
"main": "449258775fd2d7f1a6b30cb5537c25ba9ef88b0b3a41269e61005de6d8a6fb5e",
"updates": "ab318a9532ec967f496284120c2450c27a15dfad97ea326c0c1698f39b9e80ad",
"security": "bce5bda5d51f7ae6e461345e5f0f7242abec769975ff590c7908dc546380b167",
},
},
"ppc64le": {
"debian10": {
"main": "0321fc30f8277db65e80313b11eee0d0f8c9aa48adf0a670bb6826cd3cdefdaf",
"updates": "be0d6353263b069fda7a3cef92f11e3783db3af3dfa8c825c1ed7672dc86ae9c",
"security": "16a301525791ff9f20c3c4cfa0208c4864bda365b5970570e14d5cd1d12e8a4e",
"main": "2d4499fd08d0e2d73bee40c114198ac93098e8d14530e6f5e19a6838f5774b16",
"updates": "f3d29f5654fc1bfdc4f96f4cd02b8a4507b8869da7ee31a354ef856e227633e0",
"security": "9b2b1ebabba4457b07bf3be90ea02d262dbeffbbdad6a81d08a5924747f2700e",
},
},
}
38 changes: 19 additions & 19 deletions package_manager/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Minimal python library to download Debian packages from a snapshot of a Debian p

`dpkg_src` and `dpkg_list` are [repository rules](https://docs.bazel.build/versions/master/skylark/repository_rules.html), and therefore made to be used in the `WORKSPACE`.

First, set up the package source with `dpkg_src` rule. This example uses a snapshot of debian stretch from November 1st 2017. The rule outputs a `file:Packages.json` which contains a parsed and formatted `Packages.gz` for `dpkg_list` to consume.
First, set up the package source with `dpkg_src` rule. This example uses a snapshot of debian stretch from November 1st 2017. The rule outputs a `file:Packages.json` which contains a parsed and formatted `Packages.xz`(*not* `Packages.gz`) for `dpkg_list` to consume.

```python
dpkg_src(
Expand All @@ -21,15 +21,15 @@ dpkg_src(
)
```

You can also set up the package source using the full url for the `Packages.gz` file. The `package_prefix` is used to
prepend to the value of `Filename` in the `Packages.gz` file. In the following example, if the value of `Filename` is
`pool/jdk1.8/b/bazel/bazel_0.7.0_amd64.deb`, then the `.deb` artifact will later be downloaded from
`http://storage.googleapis.com/bazel-apt/pool/jdk1.8/b/bazel/bazel_0.7.0_amd64.deb`.
You can also set up the package source using the full url for the `Packages.xz` or `Packages.gz` file.
The `package_prefix` is used to prepend to the value of `Filename` in the archive. In the following
example, if the value of `Filename` is `pool/jdk1.8/b/bazel/bazel_0.7.0_amd64.deb`, then the `.deb` artifact
will later be downloaded from `http://storage.googleapis.com/bazel-apt/pool/jdk1.8/b/bazel/bazel_0.7.0_amd64.deb`.

```python
dpkg_src(
name = "bazel_apt",
packages_gz_url = "http://storage.googleapis.com/bazel-apt/dists/stable/jdk1.8/binary-amd64/Packages.gz",
packages_url = "http://storage.googleapis.com/bazel-apt/dists/stable/jdk1.8/binary-amd64/Packages.gz",
package_prefix = "http://storage.googleapis.com/bazel-apt/",
sha256 = "0fc4c6988ebf24705cfab0050cb5ad58e5b2aeb0e8cfb8921898a1809042416c",
)
Expand Down Expand Up @@ -77,17 +77,17 @@ container_image(

# Reference

## dpkg_src
## `dpkg_src`

```python
dpkg_src(name, url, arch, distro, snapshot, packages_gz_url, package_prefix, sha256, dpkg_parser)
dpkg_src(name, url, arch, distro, snapshot, packages_url, package_prefix, sha256, dpkg_parser)
```

A rule that downloads a `Packages.gz` snapshot file and parses it into a readable format for `dpkg_list`.
It supports snapshots from [http://snapshot.debian.org/](http://snapshot.debian.org/). (You can find out more about the format and sources available there.)
It also supports retrieving `Packages.gz` file from a given full url.
A rule that downloads a `Packages.xz` snapshot file and parses it into a readable format for `dpkg_list`.
It supports snapshots from [http://snapshot.debian.org/](http://snapshot.debian.org/) using `Packages.xz`. (You can find out more about the format and sources available there.)
It also supports retrieving `Packages.xz` or `Packages.gz` file from a given full url.

Either a set of {`url`, `arch`, `distro`, `snapshot`} or a set of {`packages_gz_url`, `package_prefix`} must be set.
Either a set of {`url`, `arch`, `distro`, `snapshot`} or a set of {`packages_url`, `package_prefix`} must be set.

<table class="table table-condensed table-bordered table-params">
<colgroup>
Expand All @@ -110,7 +110,7 @@ Either a set of {`url`, `arch`, `distro`, `snapshot`} or a set of {`packages_gz_
<td><code>url</code></td>
<td>
<p><code>the base url of the package repository</code></p>
<p>The url that hosts snapshots of Packages.gz files.</p>
<p>The url that hosts snapshots of Packages.xz files.</p>
</td>
</tr>
<tr>
Expand All @@ -129,33 +129,33 @@ Either a set of {`url`, `arch`, `distro`, `snapshot`} or a set of {`packages_gz_
<tr>
<td><code>snapshot</code></td>
<td>
<p><code>the snapshot date of the Packages.gz</code></p>
<p><code>the snapshot date of the Packages.xz</code></p>
<p>Format: YYYYMMDDTHHMMSSZ. You can query a list of possible dates for snapshot.debian.org at <a href=
'http://snapshot.debian.org/archive/debian/?year=2009;month=10'>http://snapshot.debian.org/archive/debian/?year=2009;month=10</a>
</td>
</tr>
<tr>
<td><code>packages_gz_url</code></td>
<td><code>packages_url</code></td>
<td>
<p><code>the full url for the Packages.gz file</code></p>
<p><code>the full url for the Packages.xz or Packages.gz file</code></p>
</td>
</tr>
<tr>
<td><code>package_prefix</code></td>
<td>
<p><code>the prefix to prepend to the value of Filename in the Packages.gz file</code></p>
<p><code>the prefix to prepend to the value of Filename in the Packages file</code></p>
</td>
</tr>
<tr>
<td><code>sha256</code></td>
<td>
<p><code>the sha256 of the Packages.gz file, required</code></p>
<p><code>the sha256 of the Packages file, required</code></p>
</td>
</tr>
<tr>
<td><code>dpkg_parser</code></td>
<td>
<p><code>A binary that translates a Packages.gz file into a format readable by dpkg_list, required</code></p>
<p><code>A binary that translates a Packages file into a format readable by dpkg_list, required</code></p>
</td>
</tr>
</tbody>
Expand Down
4 changes: 2 additions & 2 deletions package_manager/dpkg.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ exports_files(["Packages.json", "os_release.tar"])
"--arch=" + repository_ctx.attr.arch,
"--distro=" + repository_ctx.attr.distro,
"--snapshot=" + repository_ctx.attr.snapshot,
"--packages-gz-url=" + repository_ctx.attr.packages_gz_url,
"--packages-url=" + repository_ctx.attr.packages_url,
"--package-prefix=" + repository_ctx.attr.package_prefix,
"--sha256=" + repository_ctx.attr.sha256,
]
Expand All @@ -61,7 +61,7 @@ _dpkg_src = repository_rule(
"arch": attr.string(),
"distro": attr.string(),
"snapshot": attr.string(),
"packages_gz_url": attr.string(),
"packages_url": attr.string(),
"package_prefix": attr.string(),
"sha256": attr.string(),
},
Expand Down
55 changes: 31 additions & 24 deletions package_manager/dpkg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import argparse
import gzip
import json
import lzma # requires python3
import os
import subprocess

Expand All @@ -39,7 +40,7 @@
)

parser.add_argument("--package-files", action='store',
help='A list of Packages.gz files to use')
help='A list of Packages.xz/gz files to use')
parser.add_argument("--packages", action='store',
help='A comma delimited list of packages to search for and download')
parser.add_argument("--workspace-name", action='store',
Expand All @@ -48,7 +49,7 @@
help='If set, the output path of the versions file to be generated')

parser.add_argument("--download-and-extract-only", action='store',
help='If True, download Packages.gz and make urls absolute from mirror url')
help='If True, download Packages.xz/gz and make urls absolute from mirror url')
parser.add_argument("--mirror-url", action='store',
help='The base url for the package list mirror')
parser.add_argument("--arch", action='store',
Expand All @@ -58,11 +59,11 @@
parser.add_argument("--snapshot", action='store',
help='The snapshot date to download')
parser.add_argument("--sha256", action='store',
help='The sha256 checksum to validate for the Packages.gz file')
parser.add_argument("--packages-gz-url", action='store',
help='The full url for the Packages.gz file')
help='The sha256 checksum to validate for the Packages.xz/gz file')
parser.add_argument("--packages-url", action='store',
help='The full url for the Packages.xz/gz file')
parser.add_argument("--package-prefix", action='store',
help='The prefix to prepend to the value of Filename key in the Packages.gz file.')
help='The prefix to prepend to the value of Filename key in the Packages.xz/gz file.')


def main():
Expand All @@ -75,14 +76,14 @@ def main():
args.arch = "ppc64el"
elif args.arch == "arm":
args.arch = "armhf"
if args.packages_gz_url and 'ppc64le' in args.packages_gz_url:
args.packages_gz_url = args.packages_gz_url.replace("ppc64le", "ppc64el")
elif args.packages_gz_url and '-arm/' in args.packages_gz_url:
args.packages_gz_url = args.packages_gz_url.replace("-arm/", "-armhf/")
if args.packages_url and 'ppc64le' in args.packages_url:
args.packages_url = args.packages_url.replace("ppc64le", "ppc64el")
elif args.packages_url and '-arm/' in args.packages_url:
args.packages_url = args.packages_url.replace("-arm/", "-armhf/")

if args.download_and_extract_only:
download_package_list(args.mirror_url,args.distro, args.arch, args.snapshot, args.sha256,
args.packages_gz_url, args.package_prefix)
args.packages_url, args.package_prefix)
util.build_os_release_tar(args.distro, OS_RELEASE_FILE_NAME, OS_RELEASE_PATH, OS_RELEASE_TAR_FILE_NAME)
else:
download_dpkg(args.package_files, args.packages, args.workspace_name, args.versionsfile)
Expand Down Expand Up @@ -133,11 +134,11 @@ def download_and_save(url, out_file):
print("error running wget: %s", e.output)
raise

def download_package_list(mirror_url, distro, arch, snapshot, sha256, packages_gz_url, package_prefix):
def download_package_list(mirror_url, distro, arch, snapshot, sha256, packages_url, package_prefix):
"""Downloads a debian package list, expands the relative urls,
and saves the metadata as a json file
A debian package list is a gzipped, newline delimited, colon separated
A debian package list is a (xz|gzip)-ipped, newline delimited, colon separated
file with metadata about all the packages available in that repository.
Multiline keys are indented with spaces.
Expand All @@ -164,28 +165,34 @@ def download_package_list(mirror_url, distro, arch, snapshot, sha256, packages_g
"""

if bool(packages_gz_url) != bool(package_prefix):
raise Exception("packages_gz_url and package_prefix must be specified or skipped at the same time.")
if bool(packages_url) != bool(package_prefix):
raise Exception("packages_url and package_prefix must be specified or skipped at the same time.")

if (not packages_gz_url) and (not mirror_url or not snapshot or not distro or not arch):
raise Exception("If packages_gz_url is not specified, all of mirror_url, snapshot, "
if (not packages_url) and (not mirror_url or not snapshot or not distro or not arch):
raise Exception("If packages_url is not specified, all of mirror_url, snapshot, "
"distro and arch must be specified.")

url = packages_gz_url
url = packages_url
if not url:
url = "%s/debian/%s/dists/%s/main/binary-%s/Packages.gz" % (
url = "%s/debian/%s/dists/%s/main/binary-%s/Packages.xz" % (
mirror_url,
snapshot,
distro,
arch
)

download_and_save(url, "Packages.gz")
actual_sha256 = util.sha256_checksum("Packages.gz")

packages_copy = url.split('/')[-1]
download_and_save(url, packages_copy)
actual_sha256 = util.sha256_checksum(packages_copy)
if sha256 != actual_sha256:
raise Exception("sha256 of Packages.gz don't match: Expected: %s, Actual:%s" %(sha256, actual_sha256))
with gzip.open("Packages.gz", 'rb') as f:
data = f.read()
raise Exception("sha256 of %s don't match: Expected: %s, Actual:%s" %(packages_copy, sha256, actual_sha256))
if packages_copy.endswith(".gz"):
with gzip.open(packages_copy, 'rb') as f:
data = f.read()
else:
with lzma.open("Packages.xz", 'rb') as f:
data = f.read()
metadata = parse_package_metadata(data, mirror_url, snapshot, package_prefix)
with open(PACKAGES_FILE_NAME, 'w') as f:
json.dump(metadata, f)
Expand Down
2 changes: 1 addition & 1 deletion package_manager/parse_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def parse_package_metadata(data, mirror_url, snapshot, package_prefix):
""" Takes a debian package list, changes the relative urls to absolute urls,
and saves the resulting metadata as a json file """

# this is called with the output of gzip.open, but should be strings
# this is called with the output of lzma.open, but should be strings
if isinstance(data, six.binary_type):
data = data.decode('utf-8')

Expand Down
Loading

0 comments on commit 91c6625

Please sign in to comment.