Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,211 changes: 1,211 additions & 0 deletions CHANGELOG_ALTOS.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions assets/protocols.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"alevin": {
"10XV1": {
"protocol": "10xv1",
"protocol": "1{b[14]u[10]x:}2{r:}",
"whitelist": "assets/whitelist/10x_V1_barcode_whitelist.txt.gz"
},
"10XV2": {
Expand All @@ -13,7 +13,7 @@
"whitelist": "assets/whitelist/10x_V3_barcode_whitelist.txt.gz"
},
"10XV4": {
"protocol": "10xv4",
"protocol": "1{b[16]u[12]x:}2{r:}",
"whitelist": "assets/whitelist/10x_V4_barcode_whitelist.txt.gz"
},
"dropseq": {
Expand Down
10 changes: 10 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,18 @@ process {
enabled: false
]
}
withName: AUTO_DETECT_PROTOCOL {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: false
]
}
}



if(params.aligner == "cellranger") {
process {
withName: CELLRANGER_MKGTF {
Expand Down
95 changes: 95 additions & 0 deletions modules/local/auto_detect_protocol.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@

process AUTO_DETECT_PROTOCOL {
tag "$meta.id"
label 'process_single'

conda 'conda-forge::jq=1.6'
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/jq:1.6' :
'biocontainers/jq:1.6' }"

input:
// the first FastQ file in `reads` is expected to contain the cell barcodes
tuple val(meta), path(reads)
val aligner
path protocol_json
path barcode_whitelist

output:
tuple val(meta), path(reads), emit: ch_fastq
env PROTOCOL, emit: protocol
env EXTRA_ARGS, emit: extra_args
path "*.txt.gz", emit: whitelist
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
"""

# convert protocols.json to table
TABLE=\$(
jq -r '
."$aligner" |
to_entries[] |
"\\(.key)\\t\\(.value.protocol//"")\\t\\(.value.whitelist//"")\\t\\(.value.extra_args//"")"
' "${protocol_json}"
)

# iterate over all protocols defined for the selected aligner
MATCHING_FRACTIONS=\$(cut -f1 <<<"\$TABLE" | while read KEY; do
# uncompress whitelist
WHITELIST=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f3)
[ -n "\$WHITELIST" ] || continue # skip protocols without whitelist
WHITELIST_FILE=\$(basename "\$WHITELIST")

gzip -dcf "\$WHITELIST_FILE" > barcodes

# subsample the FastQ file
gzip -dcf "${reads[0]}" |
awk 'FNR % 4 == 2' | # extract the read sequence from FastQ
head -n 100000 > reads || true # the first 100k reads should suffice

# extract the barcodes from the FastQ reads and count how many are valid barcodes
awk -v KEY="\$KEY" -v OFS='\\t' '
{ \$0 = substr(\$0, 1, 14) } # the barcode is in the first 14 bases; 10X V2/3 barcodes are trimmed
FILENAME == "barcodes" { barcodes[\$0] } # cache barcodes in memory
FILENAME == "reads" && \$0 in barcodes { count++ } # count matches for each chemistry
END { print KEY, count/FNR } # output fraction of matching barcodes for each chemitry
' barcodes reads

done | sort -k2,2gr)

# only trust the auto-detection if exactly one protocol matches
echo -e "These were the fractions of matching barcodes by protocol:\\n\$MATCHING_FRACTIONS"
MATCHING_PROTOCOLS_COUNT=\$(awk '\$2>=0.7' <<<"\$MATCHING_FRACTIONS" | wc -l)
if [ \$MATCHING_PROTOCOLS_COUNT -ne 1 ]; then
echo "ERROR: Found \$MATCHING_PROTOCOLS_COUNT matching protocols."
exit 1
fi
KEY=\$(cut -f1 <<<"\$MATCHING_FRACTIONS" | head -n1)

# extract attributes of chosen protocol
PROTOCOL=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f2)
WHITELIST_PATH=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f3)
WHITELIST=\$(basename "\$WHITELIST_PATH")

# Remove all other whitelist files
for file in \$PWD/*.txt.gz; do
FILE_NAME=\$(basename "\$file")
[ "\$FILE_NAME" != "\$WHITELIST" ] && rm "\$FILE_NAME"
done

# Copy the chosen whitelist file
cp "\$WHITELIST" "whitelist.txt.gz"


EXTRA_ARGS=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f4)
echo \$PWD/*.txt.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
jq: \$(jq --version | cut -d- -f2)
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions modules/local/simpleaf_index.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ process SIMPLEAF_INDEX {
simpleaf set-paths

# run simpleaf index

simpleaf \\
index \\
--threads $task.cpus \\
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/cat/fastq/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 84 additions & 0 deletions modules/nf-core/cat/fastq/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions modules/nf-core/cat/fastq/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions modules/nf-core/cat/fastq/nextflow.config

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading