Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3cfa5ad
delay csvdb load tasks by a random offset to avoid database lock issues
kevinrue Feb 6, 2018
068f8a0
fix import of PipelineExomeAncestry (#397)
BilyanaStoilova Feb 8, 2018
70eea4b
Small change to avoid confuision when running featurecounts (#398)
Feb 9, 2018
496e905
Prevent pipelines from running when DRMAA is not available and --loca…
sebastian-luna-valero Feb 9, 2018
1f2d5e4
pin conda version
sebastian-luna-valero Feb 13, 2018
2a25250
pin sphinx 1.6.7 to work with cgat-report 0.7.6.1
sebastian-luna-valero Feb 14, 2018
112a020
make threads=1 instead of threads=10 for build_report in pipeline.in…
sebastian-luna-valero Feb 14, 2018
2267c0d
fixes #388 (#403)
sebastian-luna-valero Feb 15, 2018
176d110
Sns fix memory pipeline genesets (#402)
snsansom Feb 15, 2018
ea12c41
Have updated PARAMS for template so it now points to the new genesets…
Feb 16, 2018
1d9a195
Ac bamstats report (#404)
Feb 16, 2018
48eba7d
Add back explicit reporting of failed cluster jobs (#406)
snsansom Feb 19, 2018
d0f6d22
Sns fix loading of gtf annotations (#408)
snsansom Feb 20, 2018
702e832
Don't use conda 4.4 (#409)
sebastian-luna-valero Feb 21, 2018
41b0158
increase priority of /ifs/home/sebastian/.cgat ini file (#412)
sebastian-luna-valero Feb 23, 2018
8d21dd0
Typo (#413)
kevinrue Feb 23, 2018
3c94ae4
Bugfix https://github.com/CGATOxford/CGATPipelines/pull/412 (#414)
sebastian-luna-valero Feb 23, 2018
04cdcba
Update PipelineRrbs.py
Feb 26, 2018
1c82403
updates documentation for pipeline_rrbs
Feb 26, 2018
ea904c3
Copy environment to subprocess (#411)
snsansom Mar 2, 2018
deba85c
removed duplicate function (#416)
Mar 9, 2018
50d0fe0
delay csvdb load tasks by a random offset to avoid database lock issues
kevinrue Feb 6, 2018
4d7f447
Control scatter delay by pipeline ini
kevinrue Mar 14, 2018
b5c39e5
merge conflict
kevinrue Mar 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion CGATPipelines/Pipeline/Cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
try:
import drmaa
HAS_DRMAA = True
except:
except (ImportError, RuntimeError):
# the following does not work on Travis
#except ImportError or RuntimeError:
HAS_DRMAA = False
Expand Down Expand Up @@ -325,6 +325,18 @@ def collectSingleJobFromCluster(session, job_id,
(retval.exitStatus,
"".join(stderr), statement))

if ((retval.hasExited is False or retval.wasAborted is True) and not
ignore_errors):

raise OSError(
"-------------------------------------------------\n"
"Cluster job was aborted (%s) and/or failed to exit (%s) "
"while running the following statement:\n"
"\n%s\n"
"(Job may have been cancelled by the user or the scheduler)\n"
"----------------------------------------------------------\n" %
(retval.wasAborted, not retval.hasExited, statement))

try:
os.unlink(job_path)
except OSError:
Expand Down
13 changes: 11 additions & 2 deletions CGATPipelines/Pipeline/Control.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
try:
import drmaa
HAS_DRMAA = True
except:
except (ImportError, RuntimeError):
# the following does not work on Travis
#except ImportError or RuntimeError:
HAS_DRMAA = False
Expand Down Expand Up @@ -337,12 +337,15 @@ def peekParameters(workingdir,
return {}

statement = "python %s -f -v 0 dump" % pipeline

os.environ.update({'BASH_ENV': os.path.join(os.environ['HOME'],'.bashrc')})
process = subprocess.Popen(statement,
cwd=workingdir,
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stderr=subprocess.PIPE,
env=os.environ.copy())

# process.stdin.close()
stdout, stderr = process.communicate()
Expand Down Expand Up @@ -901,6 +904,12 @@ def main(args=sys.argv):
# create the session proxy
startSession()

elif not options.without_cluster and not HAS_DRMAA:
E.critical("DRMAA API not found so cannot talk to a cluster.")
E.critical("Please use --local to run the pipeline"
" on this host: {}".format(os.uname()[1]))
sys.exit(-1)

#
# make sure we are not logging at the same time in
# different processes
Expand Down
10 changes: 7 additions & 3 deletions CGATPipelines/Pipeline/Execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
try:
import drmaa
HAS_DRMAA = True
except:
except (ImportError, RuntimeError):
# the following does not work on Travis
#except ImportError or RuntimeError:
HAS_DRMAA = False
Expand Down Expand Up @@ -136,12 +136,14 @@ def execute(statement, **kwargs):
if statement.endswith(";"):
statement = statement[:-1]

os.environ.update({'BASH_ENV': os.path.join(os.environ['HOME'],'.bashrc')})
process = subprocess.Popen(statement % kwargs,
cwd=cwd,
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stderr=subprocess.PIPE,
env=os.environ.copy())

# process.stdin.close()
stdout, stderr = process.communicate()
Expand Down Expand Up @@ -581,6 +583,7 @@ def _writeJobScript(statement, job_memory, job_name, shellfile):
statement = pipes.quote(statement)
statement = "%s -c %s" % (shell, statement)

os.environ.update({'BASH_ENV': os.path.join(os.environ['HOME'],'.bashrc')})
process = subprocess.Popen(
expandStatement(
statement,
Expand All @@ -589,7 +592,8 @@ def _writeJobScript(statement, job_memory, job_name, shellfile):
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stderr=subprocess.PIPE,
env=os.environ.copy())

# process.stdin.close()
stdout, stderr = process.communicate()
Expand Down
18 changes: 11 additions & 7 deletions CGATPipelines/Pipeline/Parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,13 +431,6 @@ def getParameters(filenames=["pipeline.ini", ],
if os.path.exists(fn):
filenames.insert(0, fn)

if user_ini:
# read configuration from a users home directory
fn = os.path.join(os.path.expanduser("~"),
".cgat")
if os.path.exists(fn):
filenames.insert(0, fn)

if default_ini:
# The link between CGATPipelines and Pipeline.py
# needs to severed at one point.
Expand All @@ -448,6 +441,17 @@ def getParameters(filenames=["pipeline.ini", ],
'configuration',
'pipeline.ini'))

if user_ini:
# read configuration from a users home directory
fn = os.path.join(os.path.expanduser("~"),
".cgat")
if os.path.exists(fn):
if 'pipeline.ini' in filenames:
index = filenames.index('pipeline.ini')
filenames.insert(index,fn)
else:
filenames.append(fn)

# IMS: Several legacy scripts call this with a string as input
# rather than a list. Check for this and correct

Expand Down
34 changes: 1 addition & 33 deletions CGATPipelines/PipelineBamStats.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def summarizeTagsWithinContext(tagfile,
contextfile,
outfile,
min_overlap=0.5,
job_memory="4G"):
job_memory="15G"):
'''count occurances of tags in genomic context.

Examines the genomic context to where tags align.
Expand Down Expand Up @@ -1076,38 +1076,6 @@ def loadBAMStats(infiles, outfile):
P.run()


def buildPicardRnaSeqMetrics(infiles, strand, outfile):
'''run picard:RNASeqMetrics
Arguments
---------
infiles : string
Input filename in :term:`BAM` format.
Genome file in refflat format
(http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat)
outfile : string
Output filename with picard output.
'''
job_memory = PICARD_MEMORY
picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
job_threads = 20
infile, genome = infiles

if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return

statement = '''picard %(picard_opts)s CollectRnaSeqMetrics
REF_FLAT=%(genome)s
INPUT=%(infile)s
ASSUME_SORTED=true
OUTPUT=%(outfile)s
STRAND=%(strand)s
VALIDATION_STRINGENCY=SILENT
'''
P.run()


def loadPicardRnaSeqMetrics(infiles, outfiles):
'''load picard rna stats into database.
Loads tables into the database
Expand Down
12 changes: 6 additions & 6 deletions CGATPipelines/PipelineGO.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
PARAMS = {}


def createGOFromENSEMBL(infile, outfile):
def createGOFromENSEMBL(infile, outfile,
job_memory="5G"):
"""get GO assignments from ENSEMBL

Download GO assignments from the ENSEMBL database and store in
Expand All @@ -46,7 +47,6 @@ def createGOFromENSEMBL(infile, outfile):

"""

job_memory = "5G"
statement = '''
cgat runGO
--filename-dump=%(outfile)s
Expand Down Expand Up @@ -277,7 +277,8 @@ def getGODescriptions(infile):
table[fields.index("description")])])


def createGOSlimFromENSEMBL(infile, outfile):
def createGOSlimFromENSEMBL(infile, outfile,
job_memory="5G"):
"""build GO SLIM assignments.

This method downloads a GOSlim specification
Expand Down Expand Up @@ -315,15 +316,14 @@ def createGOSlimFromENSEMBL(infile, outfile):
P.run()

E.info("mapping GO to GOSlim")
job_memory = "5G"

statement = '''
map2slim -outmap %(outfile)s.map
%(goslim_fn)s
%(ontology_fn)s
'''
P.run()

job_memory = "5G"
statement = '''
zcat < %(infile)s
| cgat runGO
Expand Down Expand Up @@ -406,7 +406,7 @@ def runGOFromFiles(outfile,

options = " ".join(options)
statement = '''
cgat runGO
cgat runGO
--filename-input=%(go_file)s
--genes-tsv-file=%(fg_file)s
--output-filename-pattern='%(outdir)s/%%(set)s.%%(go)s.%%(section)s'
Expand Down
9 changes: 6 additions & 3 deletions CGATPipelines/PipelineGeneset.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def mapUCSCToEnsembl(genome):


def annotateGenome(infile, outfile,
only_proteincoding=False):
only_proteincoding=False,
job_memory="4G"):
'''annotate genomic regions with reference gene set.

The method applies the following filters to an ENSEMBL gene set:
Expand Down Expand Up @@ -120,7 +121,8 @@ def annotateGenome(infile, outfile,


def annotateGeneStructure(infile, outfile,
only_proteincoding=False):
only_proteincoding=False,
job_memory="4G"):
"""annotate genomic regions with gene structure.

The method applies the following filters to an ENSEMBL gene set:
Expand Down Expand Up @@ -1439,7 +1441,8 @@ def sortGTF(infile, outfile, order="contig+gene"):
P.run()


def buildGenomicFunctionalAnnotation(gtffile, dbh, outfiles):
def buildGenomicFunctionalAnnotation(gtffile, dbh, outfiles,
job_memory="4G"):
'''output a bed file with functional annotations.

The genomic region a gene covers is taken from the `gtffile`.
Expand Down
12 changes: 7 additions & 5 deletions CGATPipelines/PipelineGtfsubset.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def connectToUCSC(host="genome-mysql.cse.ucsc.edu",
def getRepeatDataFromUCSC(dbhandle,
repclasses,
outfile,
remove_contigs_regex=None):
remove_contigs_regex=None,
job_memory="4G"):
'''download data from UCSC database and write to `outfile` in
:term:`gff` format.

Expand Down Expand Up @@ -202,7 +203,8 @@ def getRepeatDataFromUCSC(dbhandle,
os.unlink(tmpfilename)


def buildGenomicContext(infiles, outfile, distance=10):
def buildGenomicContext(infiles, outfile, distance=10,
job_memory="4G"):

'''build a :term:`bed` formatted file with genomic context.
The output is a bed formatted file, annotating genomic segments
Expand Down Expand Up @@ -295,7 +297,7 @@ def buildGenomicContext(infiles, outfile, distance=10):
os.unlink(x)


def buildFlatGeneSet(infile, outfile):
def buildFlatGeneSet(infile, outfile, job_memory="4G"):
'''build a flattened gene set.
All transcripts in a gene are merged into a single transcript by
combining overlapping exons.
Expand Down Expand Up @@ -337,7 +339,8 @@ def buildFlatGeneSet(infile, outfile):
P.run()


def loadGeneInformation(infile, outfile, only_proteincoding=False):
def loadGeneInformation(infile, outfile, only_proteincoding=False,
job_memory="4G"):
'''load gene-related attributes from :term:`gtf` file into database.
This method takes transcript-associated features from an
:term:`gtf` file and collects the gene-related attributes in the
Expand All @@ -354,7 +357,6 @@ def loadGeneInformation(infile, outfile, only_proteincoding=False):
If True, only consider protein coding genes.
'''

job_memory = "4G"
table = P.toTable(outfile)

if only_proteincoding:
Expand Down
2 changes: 1 addition & 1 deletion CGATPipelines/configuration/pipeline.ini
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ priority=-10
################################################################
[report]
# number of threads to use to build the documentation
threads=10
threads=1

memory=1G

Expand Down
9 changes: 5 additions & 4 deletions CGATPipelines/pipeline_bamstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def intBam(infile, outfile):
If there is no sequence quality then make a softlink. Picard tools
has an issue when quality score infomation is missing'''

if PARAMS["bam_sequence_stipped"] is True:
if PARAMS["bam_sequence_stripped"] is True:
PipelineBamStats.addPseudoSequenceQuality(infile,
outfile)
else:
Expand Down Expand Up @@ -559,7 +559,7 @@ def buildIntronLevelReadCounts(infiles, outfile):

@active_if(SPLICED_MAPPING)
@transform(intBam,
regex("BamFiles.dir/(.*).bam$"),
regex("BamFiles.dir/(\S+).bam$"),
add_inputs(PARAMS["annotations_interface_geneset_coding_exons_gtf"]),
r"Paired_QC.dir/\1.transcriptprofile.gz")
def buildTranscriptProfiles(infiles, outfile):
Expand Down Expand Up @@ -754,7 +754,8 @@ def loadStrandSpecificity(infiles, outfile):
# These tasks allow ruffus to pipeline tasks together


@follows(loadPicardStats,
@follows(buildTranscriptProfiles,
loadPicardStats,
loadPicardDuplicationStats,
loadBAMStats,
loadContextStats,
Expand Down Expand Up @@ -796,7 +797,7 @@ def renderJupyterReport():
'Jupyter_report'))

statement = ''' cp %(report_path)s/* Jupyter_report.dir/ ; cd Jupyter_report.dir/;
jupyter nbconvert --ExecutePreprocessor.timeout=None --allow-errors --to html --execute *.ipynb;
jupyter nbconvert --ExecutePreprocessor.timeout=None --to html --execute *.ipynb --allow-errors;
mkdir _site;
mv -t _site *.html cgat_logo.jpeg oxford.png'''

Expand Down
2 changes: 1 addition & 1 deletion CGATPipelines/pipeline_bamstats/pipeline.ini
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ paired_end=0

# sometimes a bam has its sequence quality stripped to save space
# if this is the case then specify below:
sequence_stipped=0
sequence_stripped=0

################################################################
## name of the database that you want to generate
Expand Down

Large diffs are not rendered by default.

Loading