-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsamtool.sh
More file actions
executable file
·67 lines (51 loc) · 2.72 KB
/
samtool.sh
File metadata and controls
executable file
·67 lines (51 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
#SBATCH -D /home/dmvelasc/Data/Velasco_BGI
#SBATCH -o /home/dmvelasc/Data/Velasco_BGI/slurm-log/sam-stdout-%A_%a.txt
#SBATCH -e /home/dmvelasc/Data/Velasco_BGI/slurm-log/sam-stderr-%A_%a.txt
#SBATCH -J sam
#SBATCH -p serial
#SBATCH -a 1-9
#SBATCH -n 4
#SBATCH -c 4
set -e
set -u
# -D is like qsub -cwd
# -o is like qsub -o
# -e is like qsub -e
# -J is like qsub -j
# %A is array job ID
# %a is array job index
# %j is job allocation number
# convert SAM to BAM and sort
# Declare directories
dir1="/home/dmvelasc/Software/samtools" # SAMtools program directory
dir2="/home/dmvelasc/Data/references" # reference directory
dir3="/home/dmvelasc/Data/Velasco_BGI" # sequence directory prefix
dir4="/home/dmvelasc/Software/vcftools_0.1.11/bin" # VCFtools program directory
dir5="/home/dmvelasc/Script"
# Declare arrays (these will need to change depending on sequence data organization)
declare -a accession=(DPRU0194 DPRU0579 DPRU1467.9 DPRU2327.16 DPRU2493.7 FPS-Lovell UCD-fenzliana UCD-TNP USDA-arabica)
# Declare number variables
x=$SLURM_ARRAY_TASK_ID
i=$(( x-1 ))
# convert SAM to BAM
# -b output in BAM format
# -h output with headers
# -u output uncompressed (better for piping)
# -S input is SAM format
# filter SAM and convert to BAM (changed to below for 2013-10-27 9:10)
#"$dir1"/samtools view -Sh -F 4 -q 10 "$dir3"/"${accession["$i"]}".sam | "$dir5"/filter.py | "$dir1"/samtools view -bhuS - > "$dir3"/"${accession["$i"]}".bam
# filter SAM and convert to BAM
"$dir1"/samtools view -Sh -F 4 -q 10 "$dir3"/"${accession["$i"]}".sam | "$dir1"/samtools view -bhuS - > "$dir3"/"${accession["$i"]}".bam
# "$dir1"/samtools view -bhuS "$dir3"/"${accession["$i"]}".sam -o "$dir3"/"${accession["$i"]}".bam
# sort BAM file
"$dir1"/samtools sort "$dir3"/"${accession["$i"]}".bam "$dir3"/"${accession["$i"]}".sorted
# convert back to uncompressed BAM
"$dir1"/samtools view -bhu "$dir3"/"${accession["$i"]}".sorted.bam -o "$dir3"/"${accession["$i"]}".sort.bam
# Pileup, call variants, filter variants
# pileup of sorted, uncompressed BAM to BCF
"$dir1"/samtools mpileup -DBSRu -f "$dir2"/Prunus_persica_v1.0_chr.fa "$dir3"/"${accession["$i"]}".sort.bam | "$dir1"/bcftools/bcftools view -Abvcg - > "$dir3"/"${accession["$i"]}".raw.bcf
# BCF filtered to VCF and max depth filtered to 50X, additional filtering in VCFtools step (####changed to 100X 2013-10-27#####)
"$dir1"/bcftools/bcftools view "$dir3"/"${accession["$i"]}".raw.bcf | "$dir1"/bcftools/vcfutils.pl varFilter -D 100 > "$dir3"/"${accession["$i"]}".flt.vcf
#filter VCF (approximate average depth is 35X)
"$dir4"/vcftools --vcf "$dir3"/"${accession["$i"]}".flt.vcf --remove-indels --minQ 20 --min-meanDP 10 --max-meanDP 100 --thin 11 --recode --filtered-sites --out "$dir3"/"${accession["$i"]}".final