From 8191ee21aaf979a347b32558a50c50cb53f7494c Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 9 Jun 2024 20:16:49 +0200 Subject: [PATCH 1/6] {2023.06,2023a} PyTorch-bundle v2.1.2 --- .../pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index 81f4afd70e..6041163ec8 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -75,3 +75,7 @@ easyconfigs: from-pr: 19987 - tensorboard-2.15.1-gfbf-2023a.eb - tqdm-4.66.1-GCCcore-12.3.0.eb + - PyTorch-bundle-2.1.2-foss-2023a.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20382 + options: + from-pr: 20382 From c8c59c2ac0da6a9c41bd7ff21a655cc4367e004e Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Sun, 9 Jun 2024 21:18:57 +0200 Subject: [PATCH 2/6] use tmp for host-injections --- bot/build.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bot/build.sh b/bot/build.sh index de73faef0b..70d30a8c10 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -276,9 +276,12 @@ fi # Retain location for host injections so we don't reinstall CUDA # (Always need to run the driver installation as available driver may change) -if [[ ! -z ${SHARED_FS_PATH} ]]; then - BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") -fi +# use $STORAGE for host-injections (maybe shared fs is not behaving well) +mkdir -p ${STORAGE}/host_injections +BUILD_STEP_ARGS+=("--host-injections" "${STORAGE}/host-injections") +#if [[ ! -z ${SHARED_FS_PATH} ]]; then +# BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") +#fi # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) From 70ff7fcaa42b3e93e6d986deb4f799d7efee6389 Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 11 Jun 2024 04:59:00 +0200 Subject: [PATCH 3/6] fix order of imports --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 849a135fa8..49d2c4ce29 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -5,8 +5,8 @@ import easybuild.tools.environment as env from easybuild.easyblocks.generic.configuremake import obtain_config_guess -from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS from easybuild.easyblocks.python import EXTS_FILTER_PYTHON_PACKAGES +from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS from easybuild.tools.build_log import EasyBuildError, print_msg from easybuild.tools.config import build_option, update_build_option from easybuild.tools.filetools import apply_regex_substitutions, copy_file, remove_file, symlink, which From 6f7b35d09169a37fd7324e6c5f4fcddd7f1adcbb Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 11 Jun 2024 05:36:27 +0200 Subject: [PATCH 4/6] set TORCHVISION_* env vars --- eb_hooks.py | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 49d2c4ce29..530b362123 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -299,6 +299,44 @@ def parse_hook_pybind11_replace_catch2(ec, eprefix): build_deps[idx] = (catch2_name, catch2_version) +def parse_hook_pytorch_bundle_torchvision_setenv(ec, eprefix): + """ + Set TORCHVISION_{INCLUDE,LIBRARY}, initially for non-CUDA version only + """ + if ec.name == 'PyTorch-bundle' and ec.version in ['2.1.2']: + if not hasattr(ec, 'versionsuffix') or (ec.versionsuffix and not 'CUDA' in ec.versionsuffix): + print_msg("parse_hook for PyTorch-bundle without CUDA: extslist '%s'", ec['exts_list']) + print_msg("New exts_list: '%s'", ec['exts_list']) + ec_dict = ec.asdict() + libpng_root = os.getenv('EBROOTLIBPNG') + libpng_include = os.path.join(libpng_root, 'include') + libpng_lib = os.path.join(libpng_root, 'lib') + libjpeg_turbo_root = os.getenv('EBROOTLIBJPEGMINTURBO') + libjpeg_turbo_include = os.path.join(libjpeg_turbo_root, 'include') + libjpeg_turbo_lib = os.path.join(libjpeg_turbo_root, 'lib') + exts_list_new = [] + torchvision_include = 'export TORCHVISION_INCLUDE=%s:%s' % (libpng_include, libjpeg_turbo_include) + torchvision_library = 'export TORCHVISION_LIBRARY=%s:%s' % (libpng_lib, libjpeg_turbo_lib) + for item in ec_dict['exts_list']: + if item[0] != 'torchvision': + exts_list_new.append(item) + else: + ext_dict = item[2] + if 'preinstallopts' in ext_dict: + raise EasyBuildError("found value for 'preinstallopts' for extension 'torchvision'," + " but expected NONE") + else: + # add preinstallopts + ext_dict['preinstallopts'] = torchvision_include + '; ' + torchvision_library + exts_list_new.append((item[0], item[1], ext_dict)) + ec['exts_list'] = exts_list_new + print_msg("New exts_list: '%s'", ec['exts_list']) + else: + print_msg("parse_hook for PyTorch-bundle for CUDA -> leaving preinstallopts unchanged") + else: + raise EasyBuildError("PyTorch-bundle-specific hook triggered for non-PyTorch-bundle easyconfig?!") + + def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix): """ Disable check for QtWebEngine in Qt5 as workaround for problem with determining glibc version. @@ -312,7 +350,6 @@ def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix): raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!") - def parse_hook_sentencepiece_disable_tcmalloc_aarch64(ec, eprefix): """ Disable using TC_Malloc on 'aarch64/generic' @@ -943,6 +980,7 @@ def pre_module_hook_librosa_augment_modluafooter(self, *args, **kwargs): 'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors, 'Pillow-SIMD' : parse_hook_Pillow_SIMD_harcoded_paths, 'pybind11': parse_hook_pybind11_replace_catch2, + 'PyTorch-bundle': parse_hook_pytorch_bundle_torchvision_setenv, 'Qt5': parse_hook_qt5_check_qtwebengine_disable, 'SentencePiece': parse_hook_sentencepiece_disable_tcmalloc_aarch64, 'UCX': parse_hook_ucx_eprefix, From 278ae61db464d60cf1717467b0d126a419afca1f Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 11 Jun 2024 06:24:12 +0200 Subject: [PATCH 5/6] EBROOT* env vars aren't set in parse hooks --- eb_hooks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index 530b362123..3adf7cb98e 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -308,10 +308,12 @@ def parse_hook_pytorch_bundle_torchvision_setenv(ec, eprefix): print_msg("parse_hook for PyTorch-bundle without CUDA: extslist '%s'", ec['exts_list']) print_msg("New exts_list: '%s'", ec['exts_list']) ec_dict = ec.asdict() - libpng_root = os.getenv('EBROOTLIBPNG') + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + eessi_software_path = get_eessi_envvar('EESSI_SOFTWARE_PATH') + libpng_root = os.path.join(eessi_software_path, "software", "libpng", "1.6.39-GCCcore-12.3.0") libpng_include = os.path.join(libpng_root, 'include') libpng_lib = os.path.join(libpng_root, 'lib') - libjpeg_turbo_root = os.getenv('EBROOTLIBJPEGMINTURBO') + libjpeg_turbo_root = os.path.join(eessi_software_path, "software", "libjpeg-turbo", "2.1.5.1-GCCcore-12.3.0") libjpeg_turbo_include = os.path.join(libjpeg_turbo_root, 'include') libjpeg_turbo_lib = os.path.join(libjpeg_turbo_root, 'lib') exts_list_new = [] From 2ef5b5ad1afa6fd46fe8239092efba2eb9ff059a Mon Sep 17 00:00:00 2001 From: Thomas Roeblitz Date: Tue, 11 Jun 2024 08:50:39 +0200 Subject: [PATCH 6/6] fix shell syntax for adding exports --- eb_hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eb_hooks.py b/eb_hooks.py index 3adf7cb98e..976c1ba777 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -329,7 +329,7 @@ def parse_hook_pytorch_bundle_torchvision_setenv(ec, eprefix): " but expected NONE") else: # add preinstallopts - ext_dict['preinstallopts'] = torchvision_include + '; ' + torchvision_library + ext_dict['preinstallopts'] = torchvision_include + ' && ' + torchvision_library + ' && ' exts_list_new.append((item[0], item[1], ext_dict)) ec['exts_list'] = exts_list_new print_msg("New exts_list: '%s'", ec['exts_list'])