From c65e0fbfea3a3a9de5558b154dd9261a1815311b Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 16 Mar 2026 14:53:17 +0000 Subject: [PATCH] Stage module and project bin scripts as task input files [ci fast] Replace the bind-mount mechanism for module binaries and project bin/ scripts with input file staging. Bin scripts are staged into a hidden .bin/ directory in each task's work directory and made available via PATH. On local/HPC executors, scripts are symlinked (zero-cost). On cloud executors, scripts are uploaded once to {workDir}/.nextflow/bin/ and staged per-task via standard cloud download commands. Project bin scripts are filtered to only those referenced in the task script, avoiding unnecessary staging for large bin/ directories. The nextflow.enable.moduleBinaries feature flag is deprecated with a warning. Setting it to false is honored for one release cycle. Signed-off-by: Jonathan Manning Co-Authored-By: Claude Opus 4.6 Signed-off-by: Jonathan Manning --- docs/module.md | 16 +-- docs/reference/feature-flags.md | 4 +- .../src/main/groovy/nextflow/NF.groovy | 4 + .../main/groovy/nextflow/NextflowMeta.groovy | 4 + .../main/groovy/nextflow/cli/CmdRun.groovy | 10 +- .../executor/BashWrapperBuilder.groovy | 10 +- .../groovy/nextflow/processor/TaskBean.groovy | 15 ++ .../nextflow/processor/TaskHasher.groovy | 8 +- .../nextflow/processor/TaskProcessor.groovy | 136 ++++++++++++++---- .../groovy/nextflow/processor/TaskRun.groovy | 5 + .../script/bundle/ResourcesBundle.groovy | 24 ++++ .../nextflow/executor/command-run.txt | 1 + .../groovy/nextflow/cli/CmdRunTest.groovy | 5 +- .../executor/BashWrapperBuilderTest.groovy | 31 +++- .../nextflow/processor/TaskBeanTest.groovy | 6 +- .../processor/TaskProcessorTest.groovy | 117 ++++++++++++--- .../script/bundle/ResourcesBundleTest.groovy | 24 ++++ .../nextflow/k8s/K8sTaskHandlerTest.groovy | 4 +- 18 files changed, 350 insertions(+), 74 deletions(-) diff --git a/docs/module.md b/docs/module.md index 73d92fe4e2..9709cd8eb2 100644 --- a/docs/module.md +++ b/docs/module.md @@ -253,13 +253,13 @@ baseDir Modules can define binary scripts that are locally scoped to the processes defined by the tasks. -To enable this feature, set the following flag in your pipeline script or configuration file: +The binary scripts can be placed in any of the following directories within the module: -```nextflow -nextflow.enable.moduleBinaries = true -``` +- `/resources/bin` +- `/resources/usr/bin` +- `/resources/usr/local/bin` -The binary scripts must be placed in the module directory named `/resources/usr/bin`: +For example: ``` @@ -271,11 +271,7 @@ The binary scripts must be placed in the module directory named `/re └── another-module-script2.py ``` -Those scripts will be made accessible like any other command in the task environment, provided they have been granted the Linux execute permissions. - -:::{note} -This feature requires the use of a local or shared file system for the pipeline work directory, or {ref}`wave-page` when using cloud-based executors. -::: +Those scripts will be made accessible like any other command in the task environment, provided they have been granted the Linux execute permissions. Module binaries work on all executors, including cloud-based executors. ## Sharing modules diff --git a/docs/reference/feature-flags.md b/docs/reference/feature-flags.md index 9f687b60fb..7358afcd9f 100644 --- a/docs/reference/feature-flags.md +++ b/docs/reference/feature-flags.md @@ -20,7 +20,9 @@ Feature flags with the `nextflow.preview` prefix can cause pipelines run with ne : Defines the DSL version to use (`1` or `2`). `nextflow.enable.moduleBinaries` -: When `true`, enables the use of modules with binary scripts. See {ref}`module-binaries` for more information. +: :::{deprecated} 25.04.0 + ::: +: Module binaries are now enabled by default. This flag is no longer required. See {ref}`module-binaries` for more information. `nextflow.enable.strict` : :::{deprecated} 26.04.0 diff --git a/modules/nextflow/src/main/groovy/nextflow/NF.groovy b/modules/nextflow/src/main/groovy/nextflow/NF.groovy index 0fd841e4b5..9e77d87ccf 100644 --- a/modules/nextflow/src/main/groovy/nextflow/NF.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/NF.groovy @@ -62,6 +62,10 @@ class NF { NextflowMeta.instance.isModuleBinariesEnabled() } + static boolean isModuleBinariesDisabled() { + NextflowMeta.instance.isModuleBinariesDisabled() + } + static boolean isRecurseEnabled() { NextflowMeta.instance.preview.recursion } diff --git a/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy b/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy index 82315653d9..ed3a1c9fd5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/NextflowMeta.groovy @@ -124,8 +124,12 @@ class NextflowMeta { return enable.moduleBinaries } + boolean moduleBinariesDisabled + void moduleBinaries(boolean mode) { enable.moduleBinaries = mode + if( !mode ) + moduleBinariesDisabled = true } } diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 08cb119d23..b8d44f92fe 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -486,11 +486,17 @@ class CmdRun extends CmdBase implements HubOptions { } static void detectModuleBinaryFeature(ConfigMap config) { - final moduleBinaries = config.navigate('nextflow.enable.moduleBinaries', false) + final moduleBinaries = config.navigate('nextflow.enable.moduleBinaries') + if( moduleBinaries == null ) + return if( moduleBinaries ) { - log.debug "Enabling module binaries" + log.warn "Configuration `nextflow.enable.moduleBinaries` is no longer needed -- module binaries are now enabled by default" NextflowMeta.instance.moduleBinaries(true) } + else { + log.warn "Configuration `nextflow.enable.moduleBinaries = false` is deprecated and will be ignored in a future version -- module binaries are now always enabled" + NextflowMeta.instance.moduleBinaries(false) + } } static void detectStrictFeature(ConfigMap config, Map sysEnv) { diff --git a/modules/nextflow/src/main/groovy/nextflow/executor/BashWrapperBuilder.groovy b/modules/nextflow/src/main/groovy/nextflow/executor/BashWrapperBuilder.groovy index 3005364cdf..c61fa8ac77 100644 --- a/modules/nextflow/src/main/groovy/nextflow/executor/BashWrapperBuilder.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/executor/BashWrapperBuilder.groovy @@ -391,6 +391,7 @@ class BashWrapperBuilder { binding.trace_cmd = getTraceCommand(interpreter) binding.launch_cmd = getLaunchCommand(interpreter,env) binding.stage_cmd = getStageCommand() + binding.module_bin_path = binFilesStaged ? getBinPathScript() : null binding.unstage_cmd = getUnstageCommand() binding.unstage_controls = changeDir || shouldUnstageControls() ? getUnstageControls() : null @@ -700,9 +701,6 @@ class BashWrapperBuilder { if( stageInMode != 'copy' && allowContainerMounts ) builder.addMountForInputs(inputFiles) - if( allowContainerMounts ) - builder.addMounts(binDirs) - if(this.containerMount) builder.addMount(containerMount) @@ -775,6 +773,12 @@ class BashWrapperBuilder { p != -1 ? "nxf_module_load ${name.substring(0,p)} ${name.substring(p+1)}" : "nxf_module_load ${name}" } + protected String getBinPathScript() { + final binDir = TaskRun.BIN_DIR + "chmod +x ${binDir}/*\n" + + "export PATH=\"\$PWD/${binDir}:\$PATH\"" + } + protected String getStageCommand() { 'nxf_stage' } protected String getUnstageCommand() { 'nxf_unstage' } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskBean.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskBean.groovy index 1458dee615..644651f188 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskBean.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskBean.groovy @@ -20,6 +20,7 @@ import java.nio.file.Path import groovy.transform.CompileStatic import groovy.transform.PackageScope +import groovy.util.logging.Slf4j import nextflow.container.ContainerConfig import nextflow.executor.BashWrapperBuilder import nextflow.executor.TaskArrayExecutor @@ -30,6 +31,7 @@ import nextflow.util.MemoryUnit * * @author Paolo Di Tommaso */ +@Slf4j @CompileStatic class TaskBean implements Serializable, Cloneable { @@ -117,6 +119,8 @@ class TaskBean implements Serializable, Cloneable { Boolean stageFileEnabled + boolean binFilesStaged + @PackageScope TaskBean() { shell = BashWrapperBuilder.BASH @@ -166,6 +170,17 @@ class TaskBean implements Serializable, Cloneable { this.statsEnabled = task.getProcessor().getSession().statsEnabled this.inputFiles = task.getInputFilesMap() + final moduleBinFiles = task.getProcessor().getModuleBinFiles() ?: Collections.emptyMap() + final projectBinFiles = task.getProcessor().getReferencedProjectBinFiles(task.source) ?: Collections.emptyMap() + this.binFilesStaged = !moduleBinFiles.isEmpty() || !projectBinFiles.isEmpty() + if( binFilesStaged ) { + this.inputFiles.putAll(moduleBinFiles) + for( Map.Entry e : projectBinFiles ) { + if( moduleBinFiles.containsKey(e.key) ) + log.warn "Project bin script '${e.key.substring(TaskRun.BIN_DIR.length() + 1)}' overrides module bin script with the same name" + this.inputFiles.put(e.key, e.value) + } + } this.outputFiles = task.getOutputFilesNames() this.binDirs = task.getProcessor().getBinDirs() this.stageInMode = task.config.getStageInMode() diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHasher.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHasher.groovy index 723488a6de..784cd3bcea 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskHasher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskHasher.groovy @@ -96,6 +96,12 @@ class TaskHasher { keys.addAll(binEntries) } + final moduleBinFiles = processor.getModuleBinFiles() + if( moduleBinFiles ) { + log.trace "Task: ${task.processor.name} > Adding module bin files: ${-> moduleBinFiles.values().join('; ')}" + keys.addAll(moduleBinFiles.values()) + } + // add environment modules (`module` directive) final modules = task.getConfig().getModule() if( modules ) { @@ -212,7 +218,7 @@ class TaskHasher { @Memoized List getTaskBinEntries(String script) { List result = [] - final tokenizer = new StringTokenizer(script, " \t\n\r\f()[]{};&|<>`") + final tokenizer = new StringTokenizer(script, TaskProcessor.SCRIPT_TOKEN_DELIMITERS) while( tokenizer.hasMoreTokens() ) { final token = tokenizer.nextToken() final path = session.binEntries.get(token) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index f0ff2152f0..ef511e976c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -19,6 +19,7 @@ import static nextflow.processor.ErrorStrategy.* import java.nio.file.FileSystems import java.nio.file.Path +import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.atomic.AtomicBoolean import java.util.concurrent.atomic.AtomicInteger import java.util.concurrent.atomic.AtomicIntegerArray @@ -64,6 +65,7 @@ import nextflow.executor.Executor import nextflow.executor.StoredTaskHandler import nextflow.extension.CH import nextflow.extension.DataflowHelper +import nextflow.extension.FilesEx import nextflow.file.FileHelper import nextflow.file.FileHolder import nextflow.file.FilePorter @@ -1568,16 +1570,114 @@ class TaskProcessor { return meta?.isModule() ? meta.getModuleBundle() : null } - @Memoized + /** + * @deprecated Bin dirs are no longer bind-mounted. Bin scripts are now staged + * as input files under {@link TaskRun#BIN_DIR}. Kept for backward compatibility + * with plugins (e.g. nf-k8s) that reference this method. + */ + @Deprecated protected List getBinDirs() { - final result = new ArrayList(10) - // module bundle bin dir have priority, add before - final bundle = session.enableModuleBinaries() ? getModuleBundle() : null - if( bundle!=null ) - result.addAll(bundle.getBinDirs()) - // then add project bin dir - if( executor.binDir ) - result.add(executor.binDir) + return Collections.emptyList() + } + + /** + * Collect module bin files to be staged into the task work directory. + * These are scripts from the module's resources/bin/ directory that will + * be staged as input files under {@link TaskRun#BIN_DIR} and made available via PATH. + * + * @return A map of staging name to file path, e.g. {@code '.bin/myscript.sh' -> /path/to/file} + */ + @Memoized + Map getModuleBinFiles() { + if( NF.isModuleBinariesDisabled() ) + return Collections.emptyMap() + final bundle = getModuleBundle() + if( bundle == null ) + return Collections.emptyMap() + final rawFiles = bundle.getBinFiles() + if( rawFiles.isEmpty() ) + return Collections.emptyMap() + final files = isLocalWorkDir() ? rawFiles : uploadBinFiles(rawFiles) + return prefixBinFiles(files) + } + + /** + * Collect project-level bin files to be staged into the task work directory. + * These are scripts from the project's bin/ directory that will be staged + * as input files under {@link TaskRun#BIN_DIR} and made available via PATH. + * + * @return A map of staging name to file path, e.g. {@code '.bin/myscript.sh' -> /path/to/file} + */ + @Memoized + Map getProjectBinFiles() { + final entries = session.binEntries + if( !entries ) + return Collections.emptyMap() + final files = isLocalWorkDir() ? entries : uploadBinFiles(entries) + return prefixBinFiles(files) + } + + static final String SCRIPT_TOKEN_DELIMITERS = " \t\n\r\f()[]{};&|<>`" + + /** + * Filter project bin files to only those referenced in the given script. + * Memoized so tokenization runs once per unique script source across all tasks in a process. + */ + @Memoized + Map getReferencedProjectBinFiles(String script) { + final allBinFiles = getProjectBinFiles() + if( !allBinFiles || !script ) + return Collections.emptyMap() + final referenced = new LinkedHashMap(allBinFiles.size()) + final tokenizer = new StringTokenizer(script, SCRIPT_TOKEN_DELIMITERS) + while( tokenizer.hasMoreTokens() ) { + final key = TaskRun.BIN_DIR + '/' + tokenizer.nextToken() + final path = allBinFiles.get(key) + if( path ) + referenced.put(key, path) + } + return referenced + } + + /** + * Prefix all keys in the given map with the bin staging directory name. + */ + private static Map prefixBinFiles(Map files) { + if( files.isEmpty() ) + return Collections.emptyMap() + final prefix = TaskRun.BIN_DIR + '/' + final result = new LinkedHashMap(files.size()) + for( Map.Entry e : files ) { + result.put(prefix + e.key, e.value) + } + return result + } + + private static final ConcurrentHashMap uploadedBinFiles = new ConcurrentHashMap<>() + + @TestOnly + static void resetBinFileUploadCache() { + uploadedBinFiles.clear() + } + + /** + * Upload bin files to cloud storage so they can be staged by cloud copy strategies. + * Files are uploaded to {@code {workDir}/.nextflow/bin/}. Uses a shared cache + * so that multiple processors sharing the same work directory upload each file only once. + */ + @PackageScope + Map uploadBinFiles(Map files) { + final stageDir = executor.workDir.resolve('.nextflow/bin') + FilesEx.mkdirs(stageDir) + final result = new LinkedHashMap(files.size()) + for( Map.Entry e : files ) { + final target = stageDir.resolve(e.key) + final uploaded = uploadedBinFiles.putIfAbsent(e.value, target) + if( uploaded == null ) { + FileHelper.copyPath(e.value, target) + } + result.put(e.key, target) + } return result } @@ -1604,24 +1704,6 @@ class TaskProcessor { log.debug "Invalid 'session.config.env' object: ${session.config.env?.class?.name}" } - // append the 'bin' folder to the task environment - List paths - if( isLocalWorkDir() && (paths=getBinDirs()) ) { - for( Path it : paths ) { - if( result.containsKey('PATH') ) { - // note: do not escape potential blanks in the bin path because the PATH - // variable is enclosed in `"` when in rendered in the launcher script -- see #630 - result['PATH'] = "${result['PATH']}:${it}".toString() - } - else { - // note: append custom bin path *after* the system PATH - // to prevent unnecessary network round-trip for each command - // when the added path is a shared file system directory - result['PATH'] = "\$PATH:${it}".toString() - } - } - } - return Collections.unmodifiableMap(result) } diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index 2213783bfe..11e9ac7aca 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -562,6 +562,11 @@ class TaskRun implements Cloneable { static final public String CMD_TRACE = '.command.trace' static final public String CMD_ENV = '.command.env' + /** + * The directory name used for staging bin scripts in the task work directory + */ + static final public String BIN_DIR = '.bin' + String toString( ) { "id: $id; name: $name; type: $type; exit: ${exitStatus==Integer.MAX_VALUE ? '-' : exitStatus}; error: $error; workDir: $workDir" diff --git a/modules/nextflow/src/main/groovy/nextflow/script/bundle/ResourcesBundle.groovy b/modules/nextflow/src/main/groovy/nextflow/script/bundle/ResourcesBundle.groovy index 5de1c3d4ba..78bfb3051b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/bundle/ResourcesBundle.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/bundle/ResourcesBundle.groovy @@ -202,6 +202,7 @@ class ResourcesBundle { } final private static List BIN_PATHS = ['bin','usr/bin','usr/local/bin'] + final private static List BIN_PREFIXES = BIN_PATHS.collect { it + '/' } List getBinDirs() { final result = new ArrayList(10) @@ -213,4 +214,27 @@ class ResourcesBundle { Collections.sort(result) return result } + + /** + * Collect all executable files under bin directories in this bundle. + * + * @return A map of filename to file path, e.g. {@code 'myscript.sh' -> /path/to/resources/bin/myscript.sh} + */ + Map getBinFiles() { + final result = new LinkedHashMap(10) + for( Map.Entry it : content ) { + if( hasBinPrefix(it.key) && Files.isRegularFile(it.value) ) { + result.put(it.value.getFileName().toString(), it.value) + } + } + return result + } + + private static boolean hasBinPrefix(String key) { + for( String pfx : BIN_PREFIXES ) { + if( key.startsWith(pfx) ) + return true + } + return false + } } diff --git a/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt b/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt index 7c267ae96b..efcf907496 100644 --- a/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt +++ b/modules/nextflow/src/main/resources/nextflow/executor/command-run.txt @@ -171,6 +171,7 @@ nxf_main() { [[ $NXF_SCRATCH ]] && cd $NXF_SCRATCH export NXF_TASK_WORKDIR="$PWD" {{stage_cmd}} + {{module_bin_path}} set +e (set -o pipefail; (nxf_launch | tee {{stdout_file}}) 3>&1 1>&2 2>&3 | tee {{stderr_file}}) & diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdRunTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdRunTest.groovy index edd3b56c33..07929ed554 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdRunTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdRunTest.groovy @@ -341,14 +341,15 @@ class CmdRunTest extends Specification { NextflowMeta.instance.isModuleBinariesEnabled() == EXPECTED cleanup: - NextflowMeta.instance.moduleBinaries(false) + NextflowMeta.instance.enable.moduleBinaries = false + NextflowMeta.instance.moduleBinariesDisabled = false where: INITIAL | CONFIG | EXPECTED true | [nextflow: [enable: [ moduleBinaries: true ]]] | true false | [nextflow: [enable: [ moduleBinaries: true ]]] | true false | [nextflow: [enable: [ moduleBinaries: false ]]] | false - true | [nextflow: [enable: [ moduleBinaries: false ]]] | true + true | [nextflow: [enable: [ moduleBinaries: false ]]] | false false | [:] | false true | [:] | true } diff --git a/modules/nextflow/src/test/groovy/nextflow/executor/BashWrapperBuilderTest.groovy b/modules/nextflow/src/test/groovy/nextflow/executor/BashWrapperBuilderTest.groovy index 0a34b825df..a15d238166 100644 --- a/modules/nextflow/src/test/groovy/nextflow/executor/BashWrapperBuilderTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/executor/BashWrapperBuilderTest.groovy @@ -202,7 +202,7 @@ class BashWrapperBuilderTest extends Specification { def bash = Spy(new BashWrapperBuilder(Mock(TaskBean))) and: bash.getEnvironment() >> [:] - bash.getBinDirs() >> [Paths.get('/my/bin') ] + bash.getBinDirs() >> [] bash.getWorkDir() >> Paths.get('/my/work/dir') bash.isStatsEnabled() >> false bash.getStageInMode() >> 'symlink' @@ -224,7 +224,7 @@ class BashWrapperBuilderTest extends Specification { builder instanceof SingularityBuilder builder.env == ['NXF_TASK_WORKDIR', 'FOO','BAR'] builder.workDir == Paths.get('/my/work/dir') - builder.mounts == [ Paths.get('/my/bin') ] + builder.mounts == [] } def 'should add resolved inputs'() { @@ -511,6 +511,33 @@ class BashWrapperBuilderTest extends Specification { folder?.deleteDir() } + def 'should stage module bin files and set PATH' () { + given: + def folder = Paths.get('/work/dir') + def inputs = [ + 'sample.fq': Paths.get('/some/data/sample.fq'), + '.bin/myscript.sh': Paths.get('/modules/tool/resources/bin/myscript.sh'), + ] + + when: + def binding = newBashWrapperBuilder([ + workDir: folder, + targetDir: folder, + inputFiles: inputs, + binFilesStaged: true ]).makeBinding() + + then: + binding.stage_inputs.contains('mkdir -p .bin && ln -s /modules/tool/resources/bin/myscript.sh .bin/myscript.sh') + binding.module_bin_path.contains('export PATH="$PWD/.bin:$PATH"') + } + + def 'should not set module bin PATH when no module bins' () { + when: + def binding = newBashWrapperBuilder().makeBinding() + then: + binding.module_bin_path == null + } + def 'should include sync command' () { given: SysEnv.push([NXF_ENABLE_FS_SYNC: 'true']) diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskBeanTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskBeanTest.groovy index d133eff5db..17a42a6777 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskBeanTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskBeanTest.groovy @@ -38,7 +38,9 @@ class TaskBeanTest extends Specification { session.getStatsEnabled() >> true def process = Mock(TaskProcessor) { - getBinDirs() >> [Paths.get('/bin/dir') ] + getBinDirs() >> [] + getModuleBinFiles() >> [:] + getProjectBinFiles() >> [:] } process.getConfig() >> Mock(ProcessConfig) process.getSession() >> session @@ -99,7 +101,7 @@ class TaskBeanTest extends Specification { bean.inputFiles == [file_1: Paths.get('/file/one'), file_2: Paths.get('/file/two')] bean.outputFiles == [ 'simple.txt', 'my/path/file.bam' ] bean.workDir == Paths.get('/work/dir') - bean.binDirs == [Paths.get('/bin/dir')] + bean.binDirs == [] bean.stageInMode == 'link' bean.stageOutMode == 'rsync' diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy index a4f6c7ef2d..3b27d37cfc 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskProcessorTest.groovy @@ -75,53 +75,126 @@ class TaskProcessorTest extends Specification { def session = new Session([env: [X:"1", Y:"2"]]) session.setBaseDir(home) def processor = createProcessor('task1', session) - def builder = new ProcessBuilder() - builder.environment().putAll( processor.getProcessEnvironment() ) + def env = processor.getProcessEnvironment() then: noExceptionThrown() - builder.environment().X == '1' - builder.environment().Y == '2' - builder.environment().PATH == "\$PATH:${binFolder.toString()}" + env.X == '1' + env.Y == '2' + !env.containsKey('PATH') when: session = new Session([env: [X:"1", Y:"2", PATH:'/some']]) session.setBaseDir(home) processor = createProcessor('task1', session) - builder = new ProcessBuilder() - builder.environment().putAll( processor.getProcessEnvironment() ) + env = processor.getProcessEnvironment() then: noExceptionThrown() - builder.environment().X == '1' - builder.environment().Y == '2' - builder.environment().PATH == "/some:${binFolder.toString()}" + env.X == '1' + env.Y == '2' + env.PATH == '/some' cleanup: home.deleteDir() } - @Unroll - def 'should add module bin paths to task env' () { + def 'should not inject bin dirs into process environment' () { given: def session = Mock(Session) { getConfig() >> [:] } def executor = Mock(Executor) { getBinDir() >> Path.of('/project/bin')} and: TaskProcessor processor = Spy(TaskProcessor, constructorArgs: [[session:session, executor:executor]]) - and: + when: def result = processor.getProcessEnvironment() then: - session.enableModuleBinaries() >> MODULE_BIN - processor.getModuleBundle() >> Mock(ResourcesBundle) { getBinDirs() >> [Path.of('/foo'), Path.of('/bar')] } - processor.isLocalWorkDir() >> LOCAL + result == [:] + } + + def 'should collect module bin files for staging' () { + given: + def session = Mock(Session) { getConfig() >> [:] } + def executor = Mock(Executor) { getBinDir() >> null } + and: + def rawBinFiles = ['script.sh': Path.of('/modules/foo/resources/bin/script.sh')] + def bundle = Mock(ResourcesBundle) { getBinFiles() >> rawBinFiles } and: - result == EXPECTED + TaskProcessor processor = Spy(TaskProcessor, constructorArgs: [[session:session, executor:executor]]) + processor.getModuleBundle() >> bundle + processor.isLocalWorkDir() >> true - where: - LOCAL | MODULE_BIN | EXPECTED - false | false | [:] - true | false | [PATH:'$PATH:/project/bin'] - true | true | [PATH:'$PATH:/foo:/bar:/project/bin'] + when: + def result = processor.getModuleBinFiles() + then: + result == ['.bin/script.sh': Path.of('/modules/foo/resources/bin/script.sh')] + } + + def 'should upload module bin files for cloud work dir' () { + given: + def session = Mock(Session) { getConfig() >> [:] } + def cloudWorkDir = Mock(Path) + def executor = Mock(Executor) { getBinDir() >> null; getWorkDir() >> cloudWorkDir } + and: + def rawBinFiles = ['script.sh': Path.of('/modules/foo/resources/bin/script.sh')] + def bundle = Mock(ResourcesBundle) { getBinFiles() >> rawBinFiles } + and: + TaskProcessor processor = Spy(TaskProcessor, constructorArgs: [[session:session, executor:executor]]) + processor.getModuleBundle() >> bundle + processor.isLocalWorkDir() >> false + def uploadedFiles = ['script.sh': Path.of('/cloud/bin/script.sh')] + processor.uploadBinFiles(rawBinFiles) >> uploadedFiles + + when: + def result = processor.getModuleBinFiles() + then: + result == ['.bin/script.sh': Path.of('/cloud/bin/script.sh')] + } + + def 'should return empty map when no module bundle' () { + given: + def session = Mock(Session) { getConfig() >> [:] } + def executor = Mock(Executor) { getBinDir() >> null } + and: + TaskProcessor processor = Spy(TaskProcessor, constructorArgs: [[session:session, executor:executor]]) + processor.getModuleBundle() >> null + + when: + def result = processor.getModuleBinFiles() + then: + result == [:] + } + + def 'should collect project bin files for staging' () { + given: + def binEntries = ['script.sh': Path.of('/project/bin/script.sh'), 'tool.py': Path.of('/project/bin/tool.py')] + def session = Mock(Session) { getConfig() >> [:]; getBinEntries() >> binEntries } + def executor = Mock(Executor) { getBinDir() >> Path.of('/project/bin') } + and: + TaskProcessor processor = Spy(TaskProcessor, constructorArgs: [[session:session, executor:executor]]) + processor.isLocalWorkDir() >> true + + when: + def result = processor.getProjectBinFiles() + then: + result == ['.bin/script.sh': Path.of('/project/bin/script.sh'), '.bin/tool.py': Path.of('/project/bin/tool.py')] + } + + def 'should upload project bin files for cloud work dir' () { + given: + def binEntries = ['script.sh': Path.of('/project/bin/script.sh')] + def session = Mock(Session) { getConfig() >> [:]; getBinEntries() >> binEntries } + def cloudWorkDir = Mock(Path) + def executor = Mock(Executor) { getBinDir() >> Path.of('/project/bin'); getWorkDir() >> cloudWorkDir } + and: + TaskProcessor processor = Spy(TaskProcessor, constructorArgs: [[session:session, executor:executor]]) + processor.isLocalWorkDir() >> false + def uploadedFiles = ['script.sh': Path.of('/cloud/bin/script.sh')] + processor.uploadBinFiles(binEntries) >> uploadedFiles + + when: + def result = processor.getProjectBinFiles() + then: + result == ['.bin/script.sh': Path.of('/cloud/bin/script.sh')] } def 'should fetch interpreter from shebang line'() { diff --git a/modules/nextflow/src/test/groovy/nextflow/script/bundle/ResourcesBundleTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/bundle/ResourcesBundleTest.groovy index 63cfd7553b..1722f31826 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/bundle/ResourcesBundleTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/bundle/ResourcesBundleTest.groovy @@ -231,4 +231,28 @@ class ResourcesBundleTest extends Specification { } + def 'should return bin files for staging' () { + given: + def root = folder.resolve('mod2'); root.mkdir() + and: + root.resolve('bin').mkdirs() + root.resolve('bin/script1.sh').text = '#!/bin/bash\necho hello' + root.resolve('bin/script2.py').text = '#!/usr/bin/env python\nprint("hello")' + root.resolve('usr/local/bin').mkdirs() + root.resolve('usr/local/bin/tool.sh').text = '#!/bin/bash\necho tool' + and: + root.resolve('data').mkdirs() + root.resolve('data/file.txt').text = 'data' + + when: + def bundle = ResourcesBundle.scan(root) + def binFiles = bundle.getBinFiles() + + then: + binFiles.size() == 3 + binFiles['script1.sh'] == root.resolve('bin/script1.sh') + binFiles['script2.py'] == root.resolve('bin/script2.py') + binFiles['tool.sh'] == root.resolve('usr/local/bin/tool.sh') + } + } diff --git a/plugins/nf-k8s/src/test/nextflow/k8s/K8sTaskHandlerTest.groovy b/plugins/nf-k8s/src/test/nextflow/k8s/K8sTaskHandlerTest.groovy index e63e36b0f4..96e6d8c465 100644 --- a/plugins/nf-k8s/src/test/nextflow/k8s/K8sTaskHandlerTest.groovy +++ b/plugins/nf-k8s/src/test/nextflow/k8s/K8sTaskHandlerTest.groovy @@ -698,9 +698,9 @@ class K8sTaskHandlerTest extends Specification { then: 1 * k8sConfig.getAutoMountHostPaths() >> true 1 * wrapper.getInputFiles() >> ['foo': Paths.get('/base_path/foo.txt'), 'bar': Paths.get('/base_path/bar.txt')] - 1 * wrapper.getBinDirs() >> [ Paths.get('/user/bin') ] + 1 * wrapper.getBinDirs() >> [] 1 * wrapper.getWorkDir() >> Paths.get('/work/dir') - mounts == ['/base_path', '/user/bin', '/work/dir'] + mounts == ['/base_path', '/work/dir'] }