From 0ab3682acdcaa7a7820b68ee50dbf807eba241ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mah=C3=A9?= Date: Fri, 30 Aug 2013 20:42:36 +0200 Subject: [PATCH 1/7] Unnecessary use of cat --- TAXAassign.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/TAXAassign.sh b/TAXAassign.sh index a678a98..f889f45 100755 --- a/TAXAassign.sh +++ b/TAXAassign.sh @@ -83,8 +83,7 @@ # along with this program. If not, see . # **************************************************************/ -HELPDOC=$( cat < Date: Fri, 30 Aug 2013 20:46:03 +0200 Subject: [PATCH 2/7] Exit code value should be zero --- TAXAassign.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/TAXAassign.sh b/TAXAassign.sh index f889f45..c6ee807 100755 --- a/TAXAassign.sh +++ b/TAXAassign.sh @@ -83,7 +83,8 @@ # along with this program. If not, see . # **************************************************************/ -HELPDOC="Script to annotate sequences at different taxonomic levels using NCBI's taxonomy +HELPDOC=" +Script to annotate sequences at different taxonomic levels using NCBI's taxonomy Usage: bash `basename $0` -f [options] @@ -398,3 +399,4 @@ else TAXAassign_print "Sequences assigned at species level: $speciesLevelAssignments/$totalReads ($(float_eval "($speciesLevelAssignments / $totalReads) * 100")%)" fi +exit 0 \ No newline at end of file From 9855ffba7249b00af233855660dc930d445f9f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mah=C3=A9?= Date: Fri, 30 Aug 2013 21:05:08 +0200 Subject: [PATCH 3/7] Avoid back-ticks, use $() instead --- TAXAassign.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/TAXAassign.sh b/TAXAassign.sh index c6ee807..5e6ca09 100755 --- a/TAXAassign.sh +++ b/TAXAassign.sh @@ -103,8 +103,10 @@ Options: set -o errexit +CURRENT_DIR=$(pwd) + # = Parameters to set ============== # -LOGFILE="`pwd`/TAXAassign.log" # Where to save the log +LOGFILE="${CURRENT_DIR}/TAXAassign.log" # Where to save the log BLASTN_DIR="/home/opt/ncbi-blast-2.2.28+/bin"; # Path where blastn is installed BLASTDB_DIR="/home/opt/ncbi-blast-2.2.28+/db"; # Path where nt is installed FASTA_FILE="" # This field should be empty @@ -117,8 +119,6 @@ CONSENSUS_THRESHOLD=90 TAXONOMIC_LEVELS_THRESHOLD="" # =/Parameters to set ============== # -CURRENT_DIR=`pwd` - # = Enable FP support ============== # # By default, there is limited capability in bash to handle floating point # operations. In this script bc is used to calculate the floating point operations. From 48143115e7b85b40730acb14acf07364606002ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mah=C3=A9?= Date: Fri, 30 Aug 2013 21:07:17 +0200 Subject: [PATCH 4/7] Remove unused function That function could be replaced by mkdir -p anyway --- TAXAassign.sh | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/TAXAassign.sh b/TAXAassign.sh index 5e6ca09..f22f02f 100755 --- a/TAXAassign.sh +++ b/TAXAassign.sh @@ -165,17 +165,6 @@ function ceil () { # =/Enable FP support ============== # -# Create directories if they don't exist yet -function create_dirs() { - local dir - for dir in "$@" - do - if [ ! -d "$dir" ]; then - mkdir "$dir" - fi - done -} - # Check if files exist function check_prog() { local prog From 38a2da91073b615ebf66a1abff3ce1f7fdbd51a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mah=C3=A9?= Date: Fri, 30 Aug 2013 21:24:53 +0200 Subject: [PATCH 5/7] Apply the same style to all the loops --- TAXAassign.sh | 99 ++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/TAXAassign.sh b/TAXAassign.sh index f22f02f..787ff5f 100755 --- a/TAXAassign.sh +++ b/TAXAassign.sh @@ -132,10 +132,12 @@ function float_eval() { local stat=0 local result=0.0 - if [[ $# -gt 0 ]]; then - result=$(echo "scale=$float_scale; $*" | bc -q 2>/dev/null) + if [[ $# -gt 0 ]] ; then + result=$(echo "scale=$float_scale; $*" | bc -q 2> /dev/null) stat=$? - if [[ $stat -eq 0 && -z "$result" ]]; then stat=1; fi + if [[ $stat -eq 0 && -z "$result" ]] ; then + stat=1 + fi fi echo $result return $stat @@ -147,10 +149,14 @@ function float_eval() function float_cond() { local cond=0 - if [[ $# -gt 0 ]]; then - cond=$(echo "$*" | bc -q 2>/dev/null) - if [[ -z "$cond" ]]; then cond=0; fi - if [[ "$cond" != 0 && "$cond" != 1 ]]; then cond=0; fi + if [[ $# -gt 0 ]] ; then + cond=$(echo "$*" | bc -q 2> /dev/null) + if [[ -z "$cond" ]] ; then + cond=0 + fi + if [[ "$cond" != 0 && "$cond" != 1 ]] ; then + cond=0 + fi fi local stat=$((cond == 0)) return $stat @@ -168,26 +174,25 @@ function ceil () { # Check if files exist function check_prog() { local prog - for prog in "$@" - do - if which $prog >/dev/null; then - TAXAassign_print 'Using ' $prog - else - echo "$prog not in your path" >&2; exit 1; - fi - + for prog in "$@" ; do + if which $prog > /dev/null ; then + TAXAassign_print 'Using ' $prog + else + echo "$prog not in your path" >&2 + exit 1 + fi done } -function skip_gen_file(){ - if [ -f "$1" ]; then - echo "true" +function skip_gen_file() { + if [ -f "$1" ] ; then + echo "true" else - echo "false" + echo "false" fi } -function skip_gen_dir(){ +function skip_gen_dir() { if [ -d "$1" ]; then echo "true" else @@ -201,7 +206,7 @@ function TAXAassign_print() { # Parse options -while getopts ":phc:r:m:f:t:q:a:" opt; do +while getopts ":phc:r:m:f:t:q:a:" opt ; do case $opt in p) PARALLELIZE_FLAG=1 @@ -209,24 +214,24 @@ while getopts ":phc:r:m:f:t:q:a:" opt; do f) FASTA_FILE=$OPTARG ;; - m) - MINIMUM_PERCENT_IDENT=$OPTARG - ;; + m) + MINIMUM_PERCENT_IDENT=$OPTARG + ;; c) NUMBER_OF_CORES=$OPTARG ;; r) NUMBER_OF_REFERENCE_MATCHES=$OPTARG ;; - t) - CONSENSUS_THRESHOLD=$OPTARG - ;; - q) - MINIMUM_QUERY_COVERAGE=$OPTARG - ;; - a) - TAXONOMIC_LEVELS_THRESHOLD=$OPTARG - ;; + t) + CONSENSUS_THRESHOLD=$OPTARG + ;; + q) + MINIMUM_QUERY_COVERAGE=$OPTARG + ;; + a) + TAXONOMIC_LEVELS_THRESHOLD=$OPTARG + ;; h) echo "$HELPDOC" exit 0 @@ -238,13 +243,13 @@ while getopts ":phc:r:m:f:t:q:a:" opt; do ;; esac done -if [ -z $FASTA_FILE ] -then + +if [ -z $FASTA_FILE ] ; then echo "$HELPDOC" exit 1 fi -if [ -z "$TAXONOMIC_LEVELS_THRESHOLD" ]; then +if [ -z "$TAXONOMIC_LEVELS_THRESHOLD" ] ; then TAXONOMIC_LEVEL_THRESHOLD="$MINIMUM_PERCENT_IDENT,$MINIMUM_PERCENT_IDENT,$MINIMUM_PERCENT_IDENT,$MINIMUM_PERCENT_IDENT,$MINIMUM_PERCENT_IDENT,$MINIMUM_PERCENT_IDENT" fi @@ -253,15 +258,13 @@ IFS=","; TLTArray=($TAXONOMIC_LEVELS_THRESHOLD); IFS=$OIFS; -if [ "${#TLTArray[@]}" != "6" ]; then +if [ "${#TLTArray[@]}" != "6" ] ; then echo "$HELPDOC" exit 1 fi -for i in "${TLTArray[@]}" -do - : - if ! [[ $i =~ ^-?[0-9]+$ ]]; then +for i in "${TLTArray[@]}" ; do + if ! [[ $i =~ ^-?[0-9]+$ ]] ; then echo "$HELPDOC" exit 1 fi @@ -281,29 +284,27 @@ TAXAassign_print "TAXAassign v0.4. Copyright (c) 2013 Computational Microbial Ge check_prog $BLASTN_DIR/blastn check_prog $TAXAASSIGN_DIR/scripts/blast_concat_taxon.py check_prog $TAXAASSIGN_DIR/scripts/blast_gen_assignments.pl -if [ $PARALLELIZE_FLAG -eq 1 ] -then +if [ $PARALLELIZE_FLAG -eq 1 ] ; then check_prog parallel fi -fileName=`echo "$(basename $FASTA_FILE)" | cut -d'.' -f1` +fileName=$(basename $FASTA_FILE | cut -d'.' -f1) # Run blastn TAXAassign_print "Blast against NCBI's nt database with minimum percent ident of $MINIMUM_PERCENT_IDENT%, maximum of $NUMBER_OF_REFERENCE_MATCHES reference sequences, and evalue of 0.0001 in blastn." # Format for blastn blastOutFmt="\"6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qcovs staxids\"" blastFileName=$fileName'_B' -if [ "$(skip_gen_file $blastFileName'.out')" == "true" ];then +if [ "$(skip_gen_file $blastFileName'.out')" == "true" ] ; then TAXAassign_print $blastFileName'.out' already exists. Skipping this step. -elif [ $PARALLELIZE_FLAG -eq 1 ]; then - +elif + [ $PARALLELIZE_FLAG -eq 1 ] ; then # Get the file size in KB sizeFileBytes=$(du -b ${FASTA_FILE} | sed 's/\([0-9]*\)\(.*\)/\1/') sizeChunks=$(ceil $(float_eval "$sizeFileBytes / ($NUMBER_OF_CORES * 1024)")) sizeChunksString="${sizeChunks}k" startTime=`date +%s` - cat $FASTA_FILE | parallel --block $sizeChunksString --recstart '>' --pipe $BLASTN_DIR/blastn -perc_identity $MINIMUM_PERCENT_IDENT -evalue 0.00001 -dust no -num_threads 1 -outfmt $blastOutFmt -max_target_seqs $NUMBER_OF_REFERENCE_MATCHES -db $BLASTDB_DIR'/nt' -query - > $blastFileName'.out' TAXAassign_print "blastn using GNU parallel took $(expr `date +%s` - $startTime) seconds for $FASTA_FILE". @@ -321,7 +322,7 @@ blastFilteredFileName=$fileName'_BF' if [ "$(skip_gen_file $blastFilteredFileName'.out')" == "true" ];then TAXAassign_print $blastFilteredFileName'.out' already exists. Skipping this step. else - cat $blastFileName'.out' | awk -F"\t" -v pattern=$MINIMUM_QUERY_COVERAGE '$13>pattern{print $0}' > $blastFilteredFileName'.out' + cat $blastFileName'.out' | awk -F"\t" -v pattern=$MINIMUM_QUERY_COVERAGE '$13>pattern{print $0}' > $blastFilteredFileName'.out' TAXAassign_print $blastFilteredFileName'.out' generated successfully! fi From a04d0c9c773452c547c739ff342428ae6bcbb027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mah=C3=A9?= Date: Fri, 30 Aug 2013 21:30:56 +0200 Subject: [PATCH 6/7] Write all error messages on stderr --- TAXAassign.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/TAXAassign.sh b/TAXAassign.sh index 787ff5f..5bfaaa6 100755 --- a/TAXAassign.sh +++ b/TAXAassign.sh @@ -233,19 +233,19 @@ while getopts ":phc:r:m:f:t:q:a:" opt ; do TAXONOMIC_LEVELS_THRESHOLD=$OPTARG ;; h) - echo "$HELPDOC" + echo "$HELPDOC" 1>&2 exit 0 ;; \?) - echo "$HELPDOC" - echo "Invalid option: -$OPTARG" >&2 + echo "$HELPDOC" 1>&2 + echo "Invalid option: -$OPTARG" 1>&2 exit 1 ;; esac done if [ -z $FASTA_FILE ] ; then - echo "$HELPDOC" + echo "$HELPDOC" 1>&2 exit 1 fi @@ -259,20 +259,20 @@ TLTArray=($TAXONOMIC_LEVELS_THRESHOLD); IFS=$OIFS; if [ "${#TLTArray[@]}" != "6" ] ; then - echo "$HELPDOC" + echo "$HELPDOC" 1>&2 exit 1 fi for i in "${TLTArray[@]}" ; do if ! [[ $i =~ ^-?[0-9]+$ ]] ; then - echo "$HELPDOC" + echo "$HELPDOC" 1>&2 exit 1 fi done if ! [[ $MINIMUM_PERCENT_IDENT =~ ^-?[0-9]+$ ]] || ! [[ $NUMBER_OF_CORES =~ ^-?[0-9]+$ ]] || ! [[ $NUMBER_OF_REFERENCE_MATCHES =~ ^-?[0-9]+$ ]] || ! [[ $CONSENSUS_THRESHOLD =~ ^-?[0-9]+$ ]] || ! [[ $MINIMUM_QUERY_COVERAGE =~ ^-?[0-9]+$ ]]; then - echo "$HELPDOC" + echo "$HELPDOC" 1>&2 exit 1 fi From 1ba356ad993190e3176603b3e6b0b11404bf637e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Mah=C3=A9?= Date: Tue, 10 Sep 2013 19:57:37 +0200 Subject: [PATCH 7/7] Replace if-then-else with shorter && || expressions --- TAXAassign.sh | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/TAXAassign.sh b/TAXAassign.sh index 5bfaaa6..093a034 100755 --- a/TAXAassign.sh +++ b/TAXAassign.sh @@ -185,19 +185,13 @@ function check_prog() { } function skip_gen_file() { - if [ -f "$1" ] ; then - echo "true" - else - echo "false" - fi + # Is it a file? + [[ -f "$1" ]] && echo "true" || echo "false" } function skip_gen_dir() { - if [ -d "$1" ]; then - echo "true" - else - echo "false" - fi + # Is it a directory? + [[ -d "$1" ]] && echo "true" || echo "false" } function TAXAassign_print() {