Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality for verifying tarball signatures to automated ingestion scripts #208

Merged
merged 10 commits into from
Mar 21, 2025
12 changes: 6 additions & 6 deletions .github/workflows/build-test-release-client-packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
fpm_opts: "--debug -n cvmfs-config-eessi-${{ steps.get_version.outputs.version }} -t tar -a all -s dir -C ./package --description 'CVMFS configuration package for EESSI.'"

- name: Upload packages as build artifacts
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # v3.1.0
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: linux_packages
path: cvmfs-config-eessi*
Expand Down Expand Up @@ -135,7 +135,7 @@ jobs:
run: sudo apt-get update && sudo apt-get install cvmfs

- name: Download cvmfs-config-eessi package
uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
name: linux_packages

Expand Down Expand Up @@ -174,7 +174,7 @@ jobs:
dnf install -y cvmfs cvmfs-config-none

- name: Download cvmfs-config-eessi package
uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
name: linux_packages

Expand Down Expand Up @@ -215,7 +215,7 @@ jobs:
run: sudo apt-get update && sudo apt-get install cvmfs

- name: Download cvmfs-config-eessi package
uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
name: linux_packages

Expand Down Expand Up @@ -288,7 +288,7 @@ jobs:
run: |
echo ::set-output name=version::${GITHUB_REF#refs/tags/}

- uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
- uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
path: ./build_artifacts

Expand Down Expand Up @@ -319,7 +319,7 @@ jobs:
- name: Checkout
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
- uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
path: ./build_artifacts

Expand Down
6 changes: 6 additions & 0 deletions scripts/automated_ingestion/automated_ingestion.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ download_dir = /where/to/store/download/tarballs
ingestion_script = /absolute/path/to/ingest-tarball.sh
metadata_file_extension = .meta.txt

[signatures]
signatures_required = no
signature_file_extension = .sig
signature_verification_script = /absolute/path/to/sign_verify_file_ssh.sh
allowed_signers_file = /path/to/allowed_signers

[aws]
staging_buckets = {
"software.eessi.io-2023.06": "software.eessi.io",
Expand Down
120 changes: 102 additions & 18 deletions scripts/automated_ingestion/eessitarball.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@ def __init__(self, object_name, config, git_staging_repo, s3, bucket, cvmfs_repo
self.config = config
self.git_repo = git_staging_repo
self.metadata_file = object_name + config['paths']['metadata_file_extension']
self.metadata_sig_file = self.metadata_file + config['signatures']['signature_file_extension']
self.object = object_name
self.object_sig = object_name + config['signatures']['signature_file_extension']
self.s3 = s3
self.bucket = bucket
self.cvmfs_repo = cvmfs_repo
self.local_path = os.path.join(config['paths']['download_dir'], os.path.basename(object_name))
self.local_sig_path = self.local_path + config['signatures']['signature_file_extension']
self.local_metadata_path = self.local_path + config['paths']['metadata_file_extension']
self.local_metadata_sig_path = self.local_metadata_path + config['signatures']['signature_file_extension']
self.url = f'https://{bucket}.s3.amazonaws.com/{object_name}'

self.states = {
Expand All @@ -48,22 +52,42 @@ def download(self, force=False):
"""
Download this tarball and its corresponding metadata file, if this hasn't been already done.
"""
if force or not os.path.exists(self.local_path):
try:
self.s3.download_file(self.bucket, self.object, self.local_path)
except:
logging.error(
f'Failed to download tarball {self.object} from {self.bucket} to {self.local_path}.'
)
self.local_path = None
if force or not os.path.exists(self.local_metadata_path):
try:
self.s3.download_file(self.bucket, self.metadata_file, self.local_metadata_path)
except:
logging.error(
f'Failed to download metadata file {self.metadata_file} from {self.bucket} to {self.local_metadata_path}.'
)
self.local_metadata_path = None
files = [
(self.object, self.local_path, self.object_sig, self.local_sig_path),
(self.metadata_file, self.local_metadata_path, self.metadata_sig_file, self.local_metadata_sig_path),
Comment on lines +56 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth to rename elements to clearly notify what they refer to and have the naming consistent? For example,

(self.object_remote_path, self.object_local_path, self.object_sig_remote_path, self.object_sig_local_path),
(self.metadata_remote_path, self.metadata_local_path, self.metadata_sig_remote_path, self.metadata_sig_local_path),

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This originated from the object terminology used by the boto client, but I agree that it would be more clear to use the names like you proposed. Is it okay to do that in a follow-up PR, as it would require changes across the entire script?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, perfectly fine to do it in a follow-up PR.

]
skip = False
for (object, local_file, sig_object, local_sig_file) in files:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly synchronise naming with potential changes in files array, e.g.,

for (remote_path, local_path, sig_remote_path, sig_local_path) in files:

if force or not os.path.exists(local_file):
# First we try to download signature file, which may or may not be available
# and may be optional or required.
try:
self.s3.download_file(self.bucket, sig_object, local_sig_file)
except:
if self.config['signatures'].getboolean('signatures_required', True):
logging.error(
f'Failed to download signature file {sig_object} for {object} from {self.bucket} to {local_sig_file}.'
)
skip = True
break
else:
logging.warning(
f'Failed to download signature file {sig_object} for {object} from {self.bucket} to {local_sig_file}. ' +
'Ignoring this, because signatures are not required with the current configuration.'
)
# Now we download the file itself.
try:
self.s3.download_file(self.bucket, object, local_file)
except:
logging.error(
f'Failed to download {object} from {self.bucket} to {local_file}.'
)
skip = True
break
# If any required download failed, make sure to skip this tarball completely.
if skip:
self.local_path = None
self.local_metadata_path = None

def find_state(self):
"""Find the state of this tarball by searching through the state directories in the git repository."""
Expand Down Expand Up @@ -156,6 +180,49 @@ def run_handler(self):
handler = self.states[self.state]['handler']
handler()

def verify_signatures(self):
"""Verify the signatures of the downloaded tarball and metadata file using the corresponding signature files."""

sig_missing_msg = 'Signature file %s is missing.'
sig_missing = False
for sig_file in [self.local_sig_path, self.local_metadata_sig_path]:
if not os.path.exists(sig_file):
logging.warning(sig_missing_msg % sig_file)
sig_missing = True

if sig_missing:
# If signature files are missing, we return a failure,
# unless the configuration specifies that signatures are not required.
if self.config['signatures'].getboolean('signatures_required', True):
return False
else:
return True

# If signatures are provided, we should always verify them, regardless of the signatures_required.
# In order to do so, we need the verification script and an allowed signers file.
verify_script = self.config['signatures']['signature_verification_script']
allowed_signers_file = self.config['signatures']['allowed_signers_file']
if not os.path.exists(verify_script):
logging.error(f'Unable to verify signatures, the specified signature verification script does not exist!')
return False

if not os.path.exists(allowed_signers_file):
logging.error(f'Unable to verify signatures, the specified allowed signers file does not exist!')
return False

for (file, sig_file) in [(self.local_path, self.local_sig_path), (self.local_metadata_path, self.local_metadata_sig_path)]:
verify_cmd = subprocess.run(
[verify_script, '--verify', '--allowed-signers-file', allowed_signers_file, '--file', file, '--signature-file', sig_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if verify_cmd.returncode == 0:
logging.debug(f'Signature for {file} successfully verified.')
else:
logging.error(f'Failed to verify signature for {file}.')
return False

return True

def verify_checksum(self):
"""Verify the checksum of the downloaded tarball with the one in its metadata file."""
local_sha256 = sha256sum(self.local_path)
Expand All @@ -171,13 +238,26 @@ def ingest(self):
#TODO: check if there is an open issue for this tarball, and if there is, skip it.
logging.info(f'Tarball {self.object} is ready to be ingested.')
self.download()
logging.info('Verifying its signature...')
if not self.verify_signatures():
issue_msg = f'Failed to verify signatures for `{self.object}`'
logging.error(issue_msg)
if not self.issue_exists(issue_msg, state='open'):
self.git_repo.create_issue(title=issue_msg, body=issue_msg)
return
else:
logging.debug(f'Signatures of {self.object} and its metadata file successfully verified.')

logging.info('Verifying its checksum...')
if not self.verify_checksum():
logging.error('Checksum of downloaded tarball does not match the one in its metadata file!')
# Open issue?
issue_msg = f'Failed to verify checksum for `{self.object}`'
logging.error(issue_msg)
if not self.issue_exists(issue_msg, state='open'):
self.git_repo.create_issue(title=issue_msg, body=issue_msg)
return
else:
logging.debug(f'Checksum of {self.object} matches the one in its metadata file.')

script = self.config['paths']['ingestion_script']
sudo = ['sudo'] if self.config['cvmfs'].getboolean('ingest_as_root', True) else []
logging.info(f'Running the ingestion script for {self.object}...')
Expand Down Expand Up @@ -222,6 +302,10 @@ def mark_new_tarball_as_staged(self):
logging.warn('Skipping this tarball...')
return

# Verify the signatures of the tarball and metadata file.
if not self.verify_signatures():
logging.warn('Signature verification of the tarball or its metadata failed, skipping this tarball...')

contents = ''
with open(self.local_metadata_path, 'r') as meta:
contents = meta.read()
Expand Down
Loading