From edaf54ce41a47e0309fdbba7141562b357fd5a99 Mon Sep 17 00:00:00 2001 From: Charlie Drage Date: Sat, 23 Nov 2024 12:53:01 -0500 Subject: [PATCH] update Signed-off-by: Charlie Drage --- instructlab-nvidia/Containerfile | 22 +++++++++++++++++++++ instructlab-nvidia/main.sh | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 instructlab-nvidia/Containerfile create mode 100755 instructlab-nvidia/main.sh diff --git a/instructlab-nvidia/Containerfile b/instructlab-nvidia/Containerfile new file mode 100644 index 0000000..1899c55 --- /dev/null +++ b/instructlab-nvidia/Containerfile @@ -0,0 +1,22 @@ +# **Description:** + +FROM quay.io/rh-aiservices-bu/instructlab-workbench-code-server-cuda:0.21.0 + +#! TODO: Completely remove /usr/local/cuda/compat from $LD_LIBRARY_PATH, export it then remove /usr/local/cuda/compat + +#! Required arguments +ARG HUGGINGFACE_API + +#! Initialize the "ilab" configuration so it does not complain +RUN ilab config init --non-interactive + +#! Download models to the default directory so we can bundle them in the image when training +RUN ilab model download --hf-token $HUGGINGFACE_API + +RUN ilab model download -rp instructlab/granite-7b-lab + +#! Add the script +ADD main.sh /main.sh + +#! Entrypoint +ENTRYPOINT ["/main.sh"] \ No newline at end of file diff --git a/instructlab-nvidia/main.sh b/instructlab-nvidia/main.sh new file mode 100755 index 0000000..a0c5dbb --- /dev/null +++ b/instructlab-nvidia/main.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Completely remove /usr/local/cuda/compat from $LD_LIBRARY_PATH, export it then remove /usr/local/cuda/compat +# Why? Because we are running this in nvidia-container-toolkit, the drivers can get confused if they see the compat directory +# since it'll be different versions sometimes vs the host. +export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's|/usr/local/cuda/compat:||g') + +# Make directories +mkdir -p /output/generated_data +mkdir -p /output/trained_model + +# Git clone from ARG in container to /workspace folder +git clone $GIT_REPO workspace + +# Remove any previous configurations +rm -rf ~/.config/instructlab + +# Copy the config file over / overriding the current one +ilab config init --config workspace/config.yaml --taxonomy-path workspace --non-interactive + +# Run the synthetic data generation command +ilab data generate --taxonomy-base empty --output-dir /output/generated_data + +# Find what file starts with train_* and use that +TRAIN_FILE=$(find /output/generated_data -type f -name "train_*" | head -n 1) + +# Train +# TODO: Specify the ACTUAL model you want to train with... +# training can be done on any model I believe. +ilab train --data-path /output/generated_data --data-output-dir $(echo $TRAIN_FILE) --model-path ~/.cache/instructlab/models/instructlab/granite-7b-lab --device cuda + +# Convert to GGUF? +ilab model convert --model-dir /output/trained_model \ No newline at end of file