diff --git a/.github/workflows/update_container_image.yaml b/.github/workflows/update_container_image.yaml new file mode 100644 index 000000000..8f25697ac --- /dev/null +++ b/.github/workflows/update_container_image.yaml @@ -0,0 +1,61 @@ +name: Build and Push Container Image + +on: + push: + # Run when tags are pushed (same event that runs update_tag.yaml) + tags: + - '*' # or 'v*' if you only care about version tags + # Optional: also build on main when container files change + branches: + - main + paths: + - 'container-files/Dockerfile-frecli' + - 'container-files/runscript.sh' + - 'container-files/cylc-flow-tools.yaml' + - '.github/workflows/update_container_image.yaml' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: "noaa-gfdl/ubuntu-20-frecli-2025" + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Determine image tag + id: set_tag + run: | + # Use the pushed tag if this is a tag ref; otherwise fall back. + if [[ "$GITHUB_REF" == refs/tags/* ]]; then + TAG="${GITHUB_REF#refs/tags/}" + elif [[ "${GITHUB_REF_NAME}" == "main" ]]; then + TAG="latest" + else + TAG="${GITHUB_SHA::7}" + fi + echo "docker_tag=$TAG" >> "$GITHUB_OUTPUT" + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ./container-files/Dockerfile-frecli + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.set_tag.outputs.docker_tag }} diff --git a/.github/workflows/update_tag.yaml b/.github/workflows/update_tag.yaml index 380d7069c..1de57fd8a 100644 --- a/.github/workflows/update_tag.yaml +++ b/.github/workflows/update_tag.yaml @@ -43,6 +43,12 @@ jobs: find docs -type f -exec sed -i "s/$old_tag/$new_tag/g" {} + fi + - name: Update fre-cli version for container environment + run: | + new_tag="${{ steps.get_tag.outputs.tag }}" + # Targeted update: only replace version on the fre-cli dependency line + sed -i "s/\(noaa-gfdl::fre-cli ==\)[0-9.]\+/\1$new_tag/" container-files/cylc-flow-tools.yaml + - name: Create pull request uses: peter-evans/create-pull-request@v7.0.6 with: diff --git a/container-files/Dockerfile-frecli b/container-files/Dockerfile-frecli new file mode 100644 index 000000000..4557e761d --- /dev/null +++ b/container-files/Dockerfile-frecli @@ -0,0 +1,33 @@ +FROM condaforge/mambaforge:24.9.2-0 as builder +#LABEL maintainer "Ciheim Brown" +## Description ## +# condaforge is based on a stripped down ubuntu image. We need some extra bits for frerun + fremake +#### + +# Set where the conda installation goes +ARG conda_location=/app/cylc-flow-tools + +# Change to encrypted HTTP +RUN sed -i 's/http/https/g' /etc/apt/sources.list + +# apt installs to /usr/bin/ +RUN apt update \ + && apt -y install uuid-runtime time csh python bc + +# Update conda because this build container may be stale. +RUN conda update -n base -c conda-forge conda + +# Copy runscript in container and make it executable +COPY container-files/runscript.sh /app/exec/runscript.sh +RUN chmod +x /app/exec/runscript.sh + +# Set up conda environment directory for cylc workflow +COPY container-files/cylc-flow-tools.yaml ${conda_location}/cylc-flow-tools.yaml +RUN mamba env create --file ${conda_location}/cylc-flow-tools.yaml -p ${conda_location} + +# Maybe perhaps tag an external mounted volume /mnt2 as being safe... +# RUN git config --global --add safe.directory /mnt2 + +RUN conda install urwid==2.* + +ENTRYPOINT ["/bin/bash"] diff --git a/container-files/README.md b/container-files/README.md new file mode 100644 index 000000000..4d5e815a6 --- /dev/null +++ b/container-files/README.md @@ -0,0 +1,109 @@ +# Post-Processing Container + +Previously, many GFDL workflows and configurations have only been accessible on gitlab. This is disadvantageous for outside collaboration, flexibility, community development. While the FRE workflow can now be conda installed, another deployment method of containerization has been developed. Containerzation of the FRE workflow at GFDL bolsters portability while also simplifying the environment set-up for the user. With the environment set-up done through the container build and runscript, this post-processing container work allows for more effective sharing of the workflow. + +## PULLING CONTAINER FROM REGISTRY +To pull the frecli container image from the NOAA-GFDL github container registry, use this command: + +docker pull ghcr.io/noaa-gfdl/hpc-me/ppp:latest + + +## BUILDING LOCALLY + +**In order to build the container, the user needs to have podman access on gaea. If needed, put in a helpdesk ticket.** + +Files used to build container: + +- Dockerfile-frecli +- cylc-flow-tools environment yaml +- runscript.sh + +The container will house the fre-cli tools and subtools, and any necessary packages needed for those tools. + +Using podman and apptainer to build, follow these steps: + +``` +## Clone the fre-cli repository +git clone https://github.com/NOAA-GFDL/fre-cli.git + +## Navigate into fre-cli/container-files +cd fre-cli/container-files + +## Build a container image +podman build -f Dockerfile-frecli -t latest + +## Save the image to a local tar file +# It is recommended to name the container after the post-processing experiment name +podman save -o [name of container].tar localhost/latest + +## Create the singularity image file (sif) from the tar file +apptainer build --disable-cache [name of container].sif docker-archive://[name of container].tar +``` + +## SETUP +Now that the FRE workflows container is created, certain files and directories must be made accessible. + +#### Repos and Configuration files + +In order to run the post-processing workflow, certain repositories and files are needed: + +1. `fre-workflows` cloned repository + - Can be found here: https://github.com/NOAA-GFDL/fre-workflows + +2. Directory that will include folders and files for container set-up and running (could be named `frecli-container-setup` for example) + - The setup/output directory consists of a few subdirectories: pp, ptmp, and temp (these are created through the runscript.sh in this repository for the container) + - ***Ensure you create the empty `frecli-container-setup` folder in an area with enough space as this is where the post-processing run output will be populated.*** + +3. Yaml configuration files are also needed. + - Publicly available example yaml configuration files can be found here: https://github.com/NOAA-GFDL/fre-examples + +#### Data files + +Additionally, history files and grid spec files are needed. + +**If on Gaea**, history files and grid spec files are usually available in a certain location; retrieve their locations + +- Paths to the history folder and grid spec file will be mounted into the container as read only folders/files + +**If not on Gaea**, history file and grid spec data should be transferred to the `frecli-container-setup` location in: + +- `frecli-container-setup/history/` +- `frecli-container-setup/[experiment]_grid/` + +FOR CLOUD USERS: Preparing for cloud usage requires history files and container image/runscript to be transferred to the cloud resource. The recommended method of file transfer is with Globus in which files should be transferred to the cloud resource’s lustre folder. + +Refer to globus documentation here: [Globus Online Data Transfer](https://docs.rdhpcs.noaa.gov/data/globus_online_data_transfer.html) + +#### Configuration Edits + +Regarding the yaml configurations, some paths need to be edited to reference the file location mounted inside the container. These include: + +- `&GRID_SPEC96` "/mnt/[experimentname]_grid/[gridSpec file] +- `history_dir`: "/mnt/history" +- `pp_dir`: "/mnt/pp" +- `ptmp_dir`: "/mnt/ptmp" + +## RUNNING + +To run the container, follow these steps: + +``` +## Use apptainer or singularity to run +# Make sure directories are writable +# Bind in necessary locations (setup folder, workflow folder, data locations) +apptainer exec --writable-tmpfs --bind [Path/to/setup/folder]:/mnt --bind [Path/to/fre-worflows]:/mnt2 --bind [Path/to/gridspec location]:/mnt/[experiment-name]_grid:ro --bind [Path/to/history/files]:/mnt/history:ro [Path/to/created/container] /app/exec/runscript.sh +``` +NOTE: It is essential that binding is done correctly as the container’s runscript relies heavily on these paths. + +Here, +- `--writable-tmpfs` allows files in the container to be editable, but temporarily (as long as the container is running) +- `--bind` mounts that +- `ro` refers to `read-only`, so that data files are not corrupted in any way. + +At this point, the container’s runscript will begin to run. User input is required, listing the experiment, platform, target, and post-processing yaml file. + +The experiment will be installed, configuration files will be validated, and the experiment should kick off. + +## REVIEW + +The setup-output directory created earlier will hold pp output for review. It will also hold a newly created cylc-run directory. diff --git a/container-files/cylc-flow-tools.yaml b/container-files/cylc-flow-tools.yaml new file mode 100644 index 000000000..caf980748 --- /dev/null +++ b/container-files/cylc-flow-tools.yaml @@ -0,0 +1,12 @@ +name: cylc-flow-tools +channels: + - conda-forge + - NOAA-GFDL +dependencies: + - noaa-gfdl::hsm + - noaa-gfdl::fre-cli ==2025.04 + - noaa-gfdl::fre-python-tools + - conda-forge::nco + - conda-forge::rsync + - conda-forge::make + - conda-forge::vim diff --git a/container-files/runscript.sh b/container-files/runscript.sh new file mode 100755 index 000000000..08f5aefe6 --- /dev/null +++ b/container-files/runscript.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +## TO-DO: +## - automate rebuilding container when there is an update in fre-cli +## - checks for the status of the workflow (before installation step) + +# Initialize ppp-setup +# Set environment variables +export TMPDIR=/mnt/temp +export HOME=/mnt + +## Set path to defined global.cylc file +## Currently just using degault global.cylc; jobs run in bcakground +# export CYLC_CONF_PATH=/mnt + +## TO-DO: run on non-cloud environment to assess if conda initializations steps are the same +# Initializations for conda environment in container +conda init --all +source /opt/conda/etc/profile.d/conda.sh +conda deactivate +conda activate /app/cylc-flow-tools + +get_user_input () { + # User input + echo Please Enter Experiment Name: + read -r expname + + echo Please Enter Platform: + read -r plat + + echo Please Enter Target: + read -r targ + + echo Please Enter Path to model yaml file: + read -r yamlfile +} + +create_dirs () { + # Create necessary paths used in workflow + paths=("${HOME}/pp" "${HOME}/ptmp" "${HOME}/temp") + + for p in ${paths[@]}; do + if [ -d $p ]; then + echo -e "Path $p previously created. Removing..." + rm -rf $p + echo -e " Creating new $p\n" + mkdir -p $p + else + mkdir -p $p + fi + done +} + +check_exit_status () { + if [ $? -ne 0 ]; then + echo "$1 failed" + exit 1 + fi +} + +fre_pp_steps () { + # experiment cleaned if previously installed + if [ -d /mnt/cylc-run/${name} ]; then + echo -e "\n${name} previously installed" + echo " Removing ${name}... " + cylc clean ${name} + fi + + ## Checkout + echo -e "\nCreating $name directory in ${HOME}/cylc-src/${name} ...... " + fre -v pp checkout -e ${expname} -p ${plat} -t ${targ} + check_exit_status "CHECKOUT" + #Not sure if needed because if no global.cylc found, cylc uses default, which utilizes background jobs anyway ... + #export CYLC_CONF_PATH=/mnt/cylc-src/${name}/generic-global-config/ + + ## Configure the rose-suite and rose-app files for the workflow + echo -e "\nRunning fre pp configure-yaml to configure the rose-suite and rose-app files ..." + fre -v pp configure-yaml -e ${expname} -p ${plat} -t ${targ} -y ${yamlfile} + check_exit_status "CONFIGURE-YAML" + + ## Validate the configuration files + echo -e "\nRunning fre pp validate to validate rose-suite and rose-app configuration files for workflow ... " + fre -v pp validate -e ${expname} -p ${plat} -t ${targ} || echo "validate, no kill" + check_exit_status "VALIDATE" + + # Install + echo -e "\nRunning fre pp install to install the workflow in ${HOME}/cylc-run/${name} ... " + fre -v pp install -e ${expname} -p ${plat} -t ${targ} + check_exit_status "INSTALL" + + ## RUN + echo -e "\nRunning the workflow with cylc play ... " + cylc play --no-detach --debug -s 'STALL_TIMEOUT="PT0S"' ${name} + check_exit_status "RUN" + + # Put log in output file + cylc cat-log ${name} > "/mnt/log.out" + check_exit_status "Writing to log.out" +} + +main () { + # Run set-up and fre-cli post-processing steps # + + # Set user-input + get_user_input + #Create directories needed for post-processing + create_dirs + + # Run the post-processing steps + fre_pp_steps +} + +# Run main function +main