diff --git a/.gitignore b/.gitignore index a5309e6..e77f55e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ -build*/ +build/ +faiss* +faiss/ +cuda* diff --git a/platform/build-faiss/README.md b/platform/build-faiss/README.md new file mode 100644 index 0000000..645411f --- /dev/null +++ b/platform/build-faiss/README.md @@ -0,0 +1,57 @@ +# Set up an Ubuntu 22.04 machine to build FAISS + +## Setup for build in Ubuntu 22.04 with podman + +Add podman + +```sh +sudo apt install podman -y +``` + +Run build in podman: + +```sh +podman run --rm -it -v ${PWD}/dev/origin/:/origin ubuntu:22.04 /bin/bash /origin/build.sh +``` + +This should produce two files: + +* python*.whl + + a python wheel for faiss deployment + +* faiss-libs.tgz + + a set of libraries for FAISS. Note Intel libraries are still required as well. + +## Setup for Ubuntu 22.04 bare metal in OCI +Assumptions: + +/dev/nvme0n1 exists and can be reformatted +NVIDIA GPU installed + +## base setup + +Add python prerequisites +Mount /dev/nvme0n1 on /models +Link .cache and .local from ubuntu to /models + +```sh +bash add_dev.sh +``` + +## Build prerequisites + +Add Nvidia and Intel OneAPK libraries needed to build FAISS + +```sh +bash faiss-prereqs.sh +``` + +## Build FAISS + +Download the git repository and build it! + +```sh +bash build-faiss.sh +``` diff --git a/platform/build-faiss/add_dev.sh b/platform/build-faiss/add_dev.sh new file mode 100644 index 0000000..76671e5 --- /dev/null +++ b/platform/build-faiss/add_dev.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +sudo apt-get update && sudo apt-get dist-upgrade -y + +# mount nvme disk on /models +sudo mkdir /models +sudo mkfs.xfs /dev/nvme0n1 +echo '/dev/nvme0n1 /models xfs defaults 0 2' | sudo tee -a /etc/fstab +sudo mount -a +sudo chmod 777 /models + +# Add pointers to large data dirs into the 'ubuntu' user $HOME +mkdir /models/cache +mv ~/.cache ~/.cache.orig +ln -s /models/cache ~/.cache +mkdir /models/dev +ln -s /models/dev +mkdir /models/local +ln -s /models/local ~/.local + +echo 'export PATH=$HOME/.local/bin:$PATH' >> ~/.bashrc diff --git a/platform/build-faiss/build-faiss.sh b/platform/build-faiss/build-faiss.sh new file mode 100644 index 0000000..8a6aa27 --- /dev/null +++ b/platform/build-faiss/build-faiss.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +git clone https://github.com/facebookresearch/faiss +cd faiss + +# Configure paths and set environment variables +export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin +source /opt/intel/oneapi/setvars.sh + +#export CC=gcc-12 +#export CXX=g++-12 +# Configure using cmake +#export CXX=g++-11 + +LD_LIBRARY_PATH=/usr/local/lib MKLROOT=/opt/intel/oneapi/mkl/2023.2.0/ cmake -B build \ + -DBUILD_SHARED_LIBS=ON \ + -DBUILD_TESTING=ON \ + -DFAISS_ENABLE_GPU=ON \ + -DFAISS_OPT_LEVEL=axv2 \ + -DFAISS_ENABLE_C_API=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DBLA_VENDOR=Intel10_64_dyn -Wno-dev . +#cmake -B build . \ + -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_GPU=ON \ + -DFAISS_ENABLE_PYTHON=ON \ + -DFAISS_ENABLE_RAFT=OFF \ + -DBUILD_TESTING=ON \ + -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_C_API=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DFAISS_OPT_LEVEL=avx2 -Wno-dev + +# Now build faiss + +make -C build -j$(nproc) faiss +make -C build -j$(nproc) swigfaiss +pushd build/faiss/python;python3 setup.py bdist_wheel;popd + +# and install it. NOTE: this will install into the pyenv virtualenv 'aw' from the begining of the script + +sudo -E make -C build -j$(nproc) install +pip install --force-reinstall build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl +cp build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl ../ + +# add libraries to /usr/local/lib +mkdir -p faiss-libs + +for n in build/faiss/python/*so build/faiss/*so + do + sudo cp $n /usr/local/lib/ + cp $n faiss-libs/ + done +tar cfz ../faiss-libs.tgz faiss-libs/* +rm -rf faiss-libs + +# Add ldconfig settings for intel and faiss libraries + +echo '/opt/intel/oneapi/mkl/2023.1.0/lib/intel64' | sudo tee /etc/ld.so.conf.d/aw_intel.conf +echo '/usr/local/lib' | sudo tee /etc/ld.so.conf.d/aw_faiss.conf + +# Update the ld cache + +sudo -E ldconfig + +cd .. +rm -rf faiss diff --git a/platform/build-faiss/build-prereqs.sh b/platform/build-faiss/build-prereqs.sh new file mode 100644 index 0000000..636080b --- /dev/null +++ b/platform/build-faiss/build-prereqs.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -e +export PATH=$HOME/.local/bin:$PATH +export DEBIAN_FRONTEND=noninteractive + +cat < /dev/null +echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo -E tee /etc/apt/sources.list.d/oneAPI.list + +# ensure we're using the latest cmake +wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo -E tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null +echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo -E tee /etc/apt/sources.list.d/kitware.list >/dev/null + +# add the cuda tools to build against +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb +sudo -E dpkg -i cuda-keyring_1.1-1_all.deb + +# Update and install MKL, Cmake, and Cuda-toolkit +sudo -E apt-get update +sudo -E apt install intel-oneapi-mkl cmake cuda-11-8 -y + +#Verify python and pytorch work + +python3 -c 'import torch; print(f"Is CUDA Available: {torch.cuda.is_available()}")' + diff --git a/platform/build-faiss/build.sh b/platform/build-faiss/build.sh new file mode 100644 index 0000000..c438795 --- /dev/null +++ b/platform/build-faiss/build.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +if [ -d /origin ]; then + cd /origin/platform/build-faiss +else + echo "artificialwisdomai/origin project needs to exist" + exit 1 +fi + +if [[ ! `id -u` -eq 0 ]]; then + echo "This needs to run as root" + exit 1 +fi + +export PATH=$HOME/.local/bin:$PATH +export DEBIAN_FRONTEND=noninteractive + +apt-get update && apt-get dist-upgrade -y + +# Install python and build essentials and essential libraries +apt-get install -y python3-venv python3-pip python3-dev build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev liblzma-dev libsqlite3-dev libreadline-dev libbz2-dev neovim curl git wget + +# Update Setuptools +python3 -m pip install -U pip setuptools wheel + +# Add a couple Python prerequisites +pip install numpy swig torch + +# Get Intel OneAPI for BLAS support +# From: https://www.intel.com/content/www/us/en/docs/oneapi/installation-guide-linux/2023-0/apt.html + +# download the key to system keyring +wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ +| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + +# add signed entry to apt sources and configure the APT client to use Intel repository: +echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list + +apt update +apt install dkms intel-basekit -y + +## Get CUDA and install it + +curl -sLO https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run +bash $PWD/cuda_*run --silent --toolkit --driver --no-man-page + +# ensure we're using the latest cmake +wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null + +echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null + +# add the cuda tools to build against + +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb +dpkg -i cuda-keyring_1.1-1_all.deb +apt-get update +apt-get install cmake cuda-toolkit -y + +#Verify python and pytorch work + +python3 -c 'import torch; print(f"Is CUDA Available: {torch.cuda.is_available()}")' + +git clone https://github.com/facebookresearch/faiss +cd faiss + +# Configure paths and set environment variables +export PATH=$PATH:$HOME/.local/bin:/usr/local/cuda/bin +source /opt/intel/oneapi/setvars.sh + +# Configure using cmake + +LD_LIBRARY_PATH=/usr/local/lib MKLROOT=/opt/intel/oneapi/mkl/2023.2.0/ CXX=g++-11 cmake -B build \ + -DBUILD_SHARED_LIBS=ON \ + -DBUILD_TESTING=ON \ + -DFAISS_ENABLE_GPU=ON \ + -DFAISS_OPT_LEVEL=avx2 \ + -DFAISS_ENABLE_C_API=ON \ + -DFAISS_ENABLE_PYTHON=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DFAISS_ENABLE_RAFT=OFF \ + -DBLA_VENDOR=Intel10_64_dyn -Wno-dev . + +# Now build faiss + +make -C build -j$(nproc) faiss +make -C build -j$(nproc) swigfaiss +pushd build/faiss/python;python3 setup.py bdist_wheel;popd + +# and install it. NOTE: this will install into the pyenv virtualenv 'aw' from the begining of the script + +make -C build -j$(nproc) install +#pip install --force-reinstall build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl +cp build/faiss/python/dist/faiss-1.7.4-py3-none-any.whl ../ + +# add libraries to /usr/local/lib +mkdir -p ../faiss-libs + +for n in build/faiss/python/*so build/faiss/*so + do + cp $n ../faiss-libs/ + done +tar cfz ../faiss-libs.tgz ../faiss-libs/* +rm -rf ../faiss-libs + +cd .. +#rm -rf faiss diff --git a/retrieval/run_training.sh b/retrieval/run_training.sh new file mode 100644 index 0000000..1de8303 --- /dev/null +++ b/retrieval/run_training.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +git clone https://github.com/istio/istio.io.git /tmp/istio/ +mv /tmp/istio/content/en ./en + +rm -rf chunks *json +mkdir -p ./chunks + +if [ -d .venv ]; then + source .venv/bin/activate +else + python3 -m venv .venv + source .venv/bin/activate + pip install -U pip wheel -r requirements.txt + pip install ../platform/build-faiss/faiss-1.7.4-py3-none-any.whl +fi + +python3 train.py diff --git a/retrieval/train.py b/retrieval/train.py index 3e520a5..d6ebf1d 100644 --- a/retrieval/train.py +++ b/retrieval/train.py @@ -80,7 +80,7 @@ retro=retro, knn=2, chunk_size=64, - documents_path="/home/sdake/en", + documents_path="./en", # models/RedPajama-Data-1T-Sample", glob="**/*.md", chunks_memmap_path="./chunks/train.chunks.dat", @@ -132,7 +132,7 @@ " [aw.a]•[/aw.a] [aw.b]retrieval_model[/aw.b][aw.a]=[/aw.a][aw.b]artificialwisdomai[/aw.b][aw.a]/[/aw.a][aw.b]retroformer [aw.a]•[/aw.a] [aw.b]foundation_model[/aw.b][aw.a]=[/aw.a][aw.b]mosaicml[/aw.b][aw.a]/[/aw.a][aw.b]mpt30b[/aw.b] [aw.a]•[/aw.a] " ) for epoch in range(EPOCH_MAX): - dataloader = iter(wrapper.get_dataloader(batch_size=4, shuffle=True)) + dataloader = iter(wrapper.get_dataloader(batch_size=2, shuffle=True)) task_id = progress_bar.add_task( description="Epoch {}".format(epoch), loss="loss=nil", total=len(dataloader) )