Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/google.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,22 @@ Notes:
- A container image must be specified to execute processes. You can use a different Docker image for each process using one or more {ref}`config-process-selectors`.
- Make sure to specify the project ID, not the project name.
- Make sure to specify a location where Google Batch is available. Refer to the [Google Batch documentation](https://cloud.google.com/batch/docs/get-started#locations) for region availability.
- By default, Google Batch output unstaging uses `copy` when `stageOutMode` is not set, because unstaging goes from local scratch to a gcsfuse-mounted work directory and `move` can fail with overlapping outputs or symlinked paths.

Optional transfer settings:

```groovy
google {
batch {
// Default is 'posix' when unset
stageInCopyTransport = 'gcloud' // or 'gsutil' / 'posix'
stageOutCopyTransport = 'gcloud' // or 'gsutil' / 'posix'
}
}
```

When `gcloud` or `gsutil` is selected, CLI staging is used for `copy` mode with fallback to POSIX mount copy.
Users are responsible for ensuring the selected CLI transport is available in the task container/runtime environment.

Read the {ref}`Google configuration<config-google>` section to learn more about advanced configuration options.

Expand Down
24 changes: 24 additions & 0 deletions docs/reference/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -960,11 +960,20 @@ The following settings are available:
`google.batch.cpuPlatform`
: The [minimum CPU Platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform#specifications), e.g. `'Intel Skylake'` (default: none).

`google.batch.delayBetweenAttempts`
: Delay between transfer retry attempts (default: `10 sec`).

`google.batch.gcloudCli`
: The `gcloud` executable path used for CLI staging (default: `gcloud` from `PATH`).

`google.batch.gcsfuseOptions`
: :::{versionadded} 25.03.0-edge
:::
: List of custom mount options for `gcsfuse` (default: `['-o rw', '-implicit-dirs']`).

`google.batch.gsutilCli`
: The `gsutil` executable path used for CLI staging (default: `gsutil` from `PATH`).

`google.batch.installOpsAgent`
: Enables Ops Agent installation on Google Batch instances for enhanced monitoring and logging (default: `false`). See the [Google Batch documentation](https://docs.cloud.google.com/batch/docs/create-run-job-ops-agent) for details.

Expand All @@ -987,6 +996,12 @@ The following settings are available:
: Max number of execution attempts of a job interrupted by a Compute Engine Spot reclaim event (default: `0`).
: See also: `google.batch.autoRetryExitCodes`

`google.batch.maxParallelTransfers`
: Max parallel upload/download transfer operations per task script (default: `4`).

`google.batch.maxTransferAttempts`
: Max transfer retry attempts used by built-in transfer retry wrappers (default: `1`).

`google.batch.network`
: The URL of an existing network resource to which the VM will be attached.

Expand All @@ -1007,6 +1022,15 @@ The following settings are available:
`google.batch.spot`
: Enable the use of spot virtual machines (default: `false`).

`google.batch.stageInCopyTransport`
: Stage-in transport for `copy` mode. Can be `posix`, `gcloud`, or `gsutil` (default: `posix`).
: When set to `gcloud` or `gsutil`, CLI staging is used with fallback to POSIX mount copy.

`google.batch.stageOutCopyTransport`
: Stage-out transport for `copy` mode. Can be `posix`, `gcloud`, or `gsutil` (default: `posix`).
: When set to `gcloud` or `gsutil`, CLI staging is used with fallback to POSIX mount copy.
: Users are responsible for ensuring the selected CLI transport is available in the task runtime/container environment.

`google.batch.subnetwork`
: The URL of an existing subnetwork resource in the network to which the VM will be attached.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
/*
* Copyright 2013-2026, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package nextflow.cloud.google.batch

import groovy.transform.CompileStatic
import groovy.transform.Memoized
import nextflow.cloud.google.batch.client.BatchConfig
import nextflow.executor.BashFunLib
import nextflow.util.Escape

/**
* Bash helpers for Google Cloud Storage transfers in Google Batch tasks.
* Order within each transport is controlled by {@code NXF_STAGE_IN_COPY_TRANSPORT} / {@code NXF_STAGE_OUT_COPY_TRANSPORT}.
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
@CompileStatic
class GoogleBatchBashLib extends BashFunLib<GoogleBatchBashLib> {

private String gcloudCli = 'gcloud'

private String gsutilCli = 'gsutil'

private BatchConfig batchConfig

GoogleBatchBashLib withGcloudCli(String value) {
if( value )
this.gcloudCli = value
return this
}

GoogleBatchBashLib withGsutilCli(String value) {
if( value )
this.gsutilCli = value
return this
}

GoogleBatchBashLib withBatchConfig(BatchConfig config) {
this.batchConfig = config
return this
}

protected String gsLib() {
final gcloud = Escape.path(gcloudCli ?: 'gcloud')
final gsutil = Escape.path(gsutilCli ?: 'gsutil')
final mountRoot = GoogleBatchScriptLauncher.MOUNT_ROOT
"""
export NXF_GCLOUD=$gcloud
export NXF_GSUTIL=$gsutil
export NXF_GS_MOUNT_ROOT="$mountRoot"

nxf_gs_uri_to_mount() {
case "\$1" in
gs://*)
local p=\${1#gs://}
local bucket=\${p%%/*}
if [[ "\$bucket" == "\$p" ]]; then
echo "\$NXF_GS_MOUNT_ROOT/\$bucket"
else
local rest=\${p#*/}
echo "\$NXF_GS_MOUNT_ROOT/\$bucket/\$rest"
fi
;;
*)
echo ""
;;
esac
}

nxf_gs_download_gcloud() {
command -v "\$NXF_GCLOUD" >/dev/null 2>&1 || return 1
local source=\$1
local target=\$2
local basedir=\$(dirname "\$2")
mkdir -p "\$basedir"
if \$NXF_GCLOUD storage cp "\$source" "\$target" 2>/dev/null; then
return 0
fi
mkdir -p "\$target"
if \$NXF_GCLOUD storage cp --recursive "\$source" "\$target" >/dev/null 2>&1; then
return 0
fi
rm -rf "\$target"
return 1
}

nxf_gs_download_gsutil() {
command -v "\$NXF_GSUTIL" >/dev/null 2>&1 || return 1
local source=\$1
local target=\$2
local basedir=\$(dirname "\$2")
mkdir -p "\$basedir"
local ret
ret=\$(\$NXF_GSUTIL cp "\$source" "\$target" 2>&1) || {
mkdir -p "\$target"
\$NXF_GSUTIL -m cp -r "\$source" "\$target" >/dev/null || {
rm -rf "\$target"
return 1
}
}
return 0
}

nxf_gs_download_mount() {
local source=\$1
local target=\$2
local ms
ms=\$(nxf_gs_uri_to_mount "\$source")
[[ -n "\$ms" ]] || return 1
[[ -e "\$ms" || -d "\$ms" ]] || return 1
mkdir -p "\$(dirname "\$target")"
cp -fRL "\$ms" "\$target" || return 1
return 0
}

nxf_gs_download() {
local source=\$1
local target=\$2
case "\${NXF_STAGE_IN_COPY_TRANSPORT:-posix}" in
gsutil)
if nxf_gs_download_gsutil "\$source" "\$target"; then return 0; fi
if nxf_gs_download_gcloud "\$source" "\$target"; then return 0; fi
if nxf_gs_download_mount "\$source" "\$target"; then return 0; fi
;;
gcloud)
if nxf_gs_download_gcloud "\$source" "\$target"; then return 0; fi
if nxf_gs_download_gsutil "\$source" "\$target"; then return 0; fi
if nxf_gs_download_mount "\$source" "\$target"; then return 0; fi
;;
posix|*)
if nxf_gs_download_mount "\$source" "\$target"; then return 0; fi
;;
esac
>&2 echo "Unable to download path: \$source"
return 1
}

nxf_gs_upload_try_gcloud() {
command -v "\$NXF_GCLOUD" >/dev/null 2>&1 || return 1
local name=\$1
local gspath=\$2
if [[ -d "\$name" ]]; then
\$NXF_GCLOUD storage cp --recursive "\$name" "\$gspath/\$name" || return 1
else
\$NXF_GCLOUD storage cp "\$name" "\$gspath/\$name" || return 1
fi
}

nxf_gs_upload_try_gsutil() {
command -v "\$NXF_GSUTIL" >/dev/null 2>&1 || return 1
local name=\$1
local gspath=\$2
if [[ -d "\$name" ]]; then
\$NXF_GSUTIL -m cp -r "\$name" "\$gspath/\$name" || return 1
else
\$NXF_GSUTIL cp "\$name" "\$gspath/\$name" || return 1
fi
}

nxf_gs_upload_try_mount() {
local name=\$1
local gspath=\$2
local dest
dest=\$(nxf_gs_uri_to_mount "\$gspath/\$name")
[[ -n "\$dest" ]] || return 1
mkdir -p "\$(dirname "\$dest")"
cp -fRL "\$name" "\$dest" || return 1
}

nxf_gs_upload() {
local name=\$1
local gspath=\${2%/}
case "\${NXF_STAGE_OUT_COPY_TRANSPORT:-posix}" in
gsutil)
if nxf_gs_upload_try_gsutil "\$name" "\$gspath"; then return 0; fi
if nxf_gs_upload_try_gcloud "\$name" "\$gspath"; then return 0; fi
if nxf_gs_upload_try_mount "\$name" "\$gspath"; then return 0; fi
;;
gcloud)
if nxf_gs_upload_try_gcloud "\$name" "\$gspath"; then return 0; fi
if nxf_gs_upload_try_gsutil "\$name" "\$gspath"; then return 0; fi
if nxf_gs_upload_try_mount "\$name" "\$gspath"; then return 0; fi
;;
posix|*)
if nxf_gs_upload_try_mount "\$name" "\$gspath"; then return 0; fi
;;
esac
>&2 echo "Unable to upload path: \$name"
return 1
}
""".stripIndent(true)
}

protected String transportExports() {
final cfg = batchConfig
if( !cfg )
return ''
final inTr = cfg.stageInCopyTransport ?: BatchConfig.COPY_TRANSPORT_POSIX
final outTr = cfg.stageOutCopyTransport ?: BatchConfig.COPY_TRANSPORT_POSIX
"""
export NXF_STAGE_IN_COPY_TRANSPORT=${Escape.path(inTr)}
export NXF_STAGE_OUT_COPY_TRANSPORT=${Escape.path(outTr)}
""".stripIndent(true)
}

@Override
String render() {
super.render() + transportExports() + gsLib()
}

@Memoized
static String script(BatchConfig config) {
new GoogleBatchBashLib()
.includeCoreFun(true)
.withMaxParallelTransfers(config.maxParallelTransfers)
.withMaxTransferAttempts(config.maxTransferAttempts)
.withDelayBetweenAttempts(config.delayBetweenAttempts)
.withGcloudCli(config.gcloudCli)
.withGsutilCli(config.gsutilCli)
.withBatchConfig(config)
.render()
}
}
Loading