Skip to content

Commit

Permalink
fixed ghmap -e, this should be an optional input
Browse files Browse the repository at this point in the history
  • Loading branch information
zqfang committed Jul 9, 2024
1 parent 563315e commit 5d96aca
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 72 deletions.
22 changes: 0 additions & 22 deletions example/MPD_000/trait.000.txt

This file was deleted.

2 changes: 1 addition & 1 deletion haplomap/src/quantTraitMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ struct GhmapOptions
GhmapOptions() : isCategorical(false), filterCoding(false), haploBlocks(false),
geneBlocks(false), geneAllBlocks(false), pvalueCutoff(0.05),datasetName((char *)"Unnamed_dataset"),
phenotypeFileName(NULL), blocksFileName(NULL), outputFileName(NULL), geneName(NULL),
equalFile(NULL), goTermFile(NULL), goFilter(NULL),
expressionFile(NULL), equalFile(NULL), goTermFile(NULL), goFilter(NULL),
geneticRelationMatrix(NULL) {};
};

Expand Down
2 changes: 1 addition & 1 deletion workflows/config.2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ HBCGM:
# KNOWNGENE: "/data/bases/shared/haplomap/PELTZ_20210609/SNP_Annotation/mm10_knownGene.txt"

# genetic relation file from PLink output
GENETIC_REL: "/data/bases/shared/haplomap/PELTZ_20210609/mouse54_grm.rel"
GENETIC_REL: "/data/bases/shared/haplomap/PELTZ_20210609/mouse_grm.dist"
# gene expression file
GENE_EXPRS: "/data/bases/shared/haplomap/PELTZ_20210609/mus.compact.exprs.txt"
# MeSH mapping
Expand Down
6 changes: 3 additions & 3 deletions workflows/config.indel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dbSNP: "/home/fangzq/genome/mouse/mouse.dbsnp.vcf.gz" # mm10

HBCGM:
# working directory
WORKSPACE: "/data/bases/shared/haplomap/MPD_MeSH_Indel_example"
WORKSPACE: "/data/bases/shared/haplomap/MPD_MeSH_Indel"
# path to haplomap
BIN: "/home/fangzq/github/HBCGM/build/bin"
# path to GNNHap repo
Expand All @@ -34,7 +34,7 @@ HBCGM:
GNNHAP_BUNDLE: "/home/fangzq/github/GNNHap/bundle"

# MPD trait ids
TRAIT_IDS: "/home/fangzq/github/HBCGM/example/MPD_MeSH.example.txt"
TRAIT_IDS: "/data/bases/shared/haplomap/PELTZ_20210609/MPD_MeSH_suffixed.txt"
# set to true if input individual animal data. Default: use strain means.
USE_RAWDATA: false
# use custom database instead of using MPD API to get data.
Expand All @@ -47,7 +47,7 @@ HBCGM:
# Ensembl-vep output after variant calling step
VEP_DIR: "/data/bases/shared/haplomap/PELTZ_20210609/VEP"
# path to indel database
SNPS_DIR: "/data/bases/shared/haplomap/PELTZ_20210609/INDELs"
VAR_DIR: "/data/bases/shared/haplomap/PELTZ_20210609/HBCGM_VAR_DB"

# genetic relation file from PLink output
GENETIC_REL: "/data/bases/shared/haplomap/PELTZ_20210609/mouse54_grm.rel"
Expand Down
8 changes: 4 additions & 4 deletions workflows/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dbSNP: "/home/fangzq/genome/mouse/mouse.dbsnp.vcf.gz" # mm10

HBCGM:
# working directory
WORKSPACE: "/data/bases/shared/haplomap/MPD_MeSH_example"
WORKSPACE: "/data/bases/fangzq/20220711_pain/TGR/TGR_05-30" #"/data/bases/shared/haplomap/MPD_MeSH"
# path to haplomap
BIN: "/home/fangzq/github/HBCGM/build/bin"
# path to GNNHap repo
Expand All @@ -34,7 +34,7 @@ HBCGM:
GNNHAP_BUNDLE: "/home/fangzq/github/GNNHap/bundle"

# MPD trait ids
TRAIT_IDS: "/home/fangzq/github/HBCGM/example/MPD_MeSH.example.txt"
TRAIT_IDS: "/data/bases/fangzq/20220711_pain/TGR/TGR_05-30/trait_list.txt"
# set to true if input individual animal data. Default: use strain means.
USE_RAWDATA: false
# use custom database instead of using MPD API to get data.
Expand All @@ -49,7 +49,7 @@ HBCGM:
VEP_DIR: "/data/bases/shared/haplomap/PELTZ_20210609/VEP"

# path to SNP database
SNPS_DIR: "/data/bases/shared/haplomap/PELTZ_20210609/SNPs"
VAR_DIR: "/data/bases/shared/haplomap/PELTZ_20210609/HBCGM_VAR_DB"
# PATH to SNP annotations for all genes
# ANNOVAR: "/data/bases/shared/haplomap/PELTZ_20210609/SNP_Annotation"
# # snp, geneid,genename mapping
Expand All @@ -61,7 +61,7 @@ HBCGM:
# gene expression file
GENE_EXPRS: "/data/bases/shared/haplomap/PELTZ_20210609/mus.compact.exprs.txt"
# MeSH mapping
MPD2MeSH: "/data/bases/shared/haplomap/PELTZ_20210609/mpd2mesh.json"
MPD2MeSH: "/data/bases/fangzq/20220711_pain/TGR/TGR_05-30/trait_mesh.json"

######### others #########
# open chromatin regions to annotate haploblocks.
Expand Down
40 changes: 20 additions & 20 deletions workflows/haplomap.indel.smk
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ with open(TRAIT_IDS, 'r') as t:
# filter id in MeSH
with open(MPD2MeSH, 'r') as j:
MESH_DICT = json.load(j)
IDS = [i for i in IDS_ if i.split("-")[0] in MESH_DICT ]
IDS = [i for i in IDS_ if i.replace("MPD_", "").split("-")[0] in MESH_DICT ]
# filter id that already run
# pat = config['HBCGM']['WORKSPACE'] + "MPD_{ids}_indel.results.mesh.txt"
# IDS = [mnum for mnum in IDS if not os.path.exists(pat.format(ids = mnum))]
Expand All @@ -62,26 +62,26 @@ rule target:
# ouput: os.path.join(OUTPUT_DIR, "mpd.ids.txt")
# shell:
# "cut -d, -f1 {input} | uniq | sed '1d' > {output.txt}"
rule traits:
output: temp(expand("MPD_{ids}/strain.{ids}.temp", ids=IDS))
run:
for out in output:
shell("touch %s"%out)
# rule traits:
# output: temp(expand("MPD_{ids}/strain.{ids}.temp", ids=IDS))
# run:
# for out in output:
# shell("touch %s"%out)

rule strain2trait:
input:
strain = STRAIN_ANNO,
ids = "MPD_{ids}/strain.{ids}.temp"
output:
"MPD_{ids}/strain.{ids}.txt",
"MPD_{ids}/trait.{ids}.txt",
params:
trait = TRAIT_DATA,
outdir = config['HBCGM']['WORKSPACE'],
traitid = "{ids}",
rawdata = config['HBCGM']['USE_RAWDATA']
script:
"../scripts/strain2traits.py"
# rule strain2trait:
# input:
# strain = STRAIN_ANNO,
# ids = "MPD_{ids}/strain.{ids}.temp"
# output:
# "MPD_{ids}/strain.{ids}.txt",
# "MPD_{ids}/trait.{ids}.txt",
# params:
# trait = TRAIT_DATA,
# outdir = config['HBCGM']['WORKSPACE'],
# traitid = "{ids}",
# rawdata = config['HBCGM']['USE_RAWDATA']
# script:
# "../scripts/strain2traits.py"

rule strainOrder:
output: "strain.order.snpdb.txt"
Expand Down
42 changes: 21 additions & 21 deletions workflows/haplomap.smk
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ with open(TRAIT_IDS, 'r') as t:
# filter id in MeSH
with open(MPD2MeSH, 'r') as j:
MESH_DICT = json.load(j)
IDS = [i for i in IDS_ if i.split("-")[0] in MESH_DICT ]
IDS = [i for i in IDS_ if i.replace("MPD_","").split("-")[0] in MESH_DICT ]
# filter id that already run
pat = config['HBCGM']['WORKSPACE'] + "MPD_{ids}_snp.results.txt"
IDS = [mnum for mnum in IDS if not os.path.exists(pat.format(ids = mnum))]
Expand All @@ -47,39 +47,39 @@ CHROMSOME = [str(i) for i in range (1, 20)] + ['X'] # NO 'Y'
# SNPDB = expand("SNPs/chr{i}.txt", i=CHROMOSOMES)
HBCGM = expand("MPD_{ids}/chr{i}.snp.results.txt", ids=IDS, i=CHROMSOME)
HBCGM_NONCODING = expand("MPD_{ids}/chr{i}.open_region.bed", ids=IDS, i=CHROMSOME)
HBCGM_MESH = expand("MPD_{ids}_snp.results.mesh.txt", ids=IDS)
# HBCGM_MESH = expand("MPD_{ids}_snp.results.mesh.txt", ids=IDS)
# rules that not work in a new node
#localrules: target, traits, strain2trait


rule target:
input: HBCGM_MESH, #HBCGM_NONCODING
input: HBCGM, #HBCGM_NONCODING

########################### Prepare Phenotypic DATA (from MPD API) ############################
# rule pheno:
# input: TRAIT_DATA
# ouput: os.path.join(OUTPUT_DIR, "mpd.ids.txt")
# shell:
# "cut -d, -f1 {input} | uniq | sed '1d' > {output.txt}"
rule traits:
output: temp(expand("MPD_{ids}/strain.{ids}.temp", ids=IDS))
run:
for out in output:
shell("touch %s"%out)
# rule traits:
# output: temp(expand("MPD_{ids}/strain.{ids}.temp", ids=IDS))
# run:
# for out in output:
# shell("touch %s"%out)

rule strain2trait:
input:
strain = STRAIN_ANNO,
ids = "MPD_{ids}/strain.{ids}.temp"
output:
"MPD_{ids}/trait.{ids}.txt",
params:
trait = TRAIT_DATA,
outdir = config['HBCGM']['WORKSPACE'],
traitid = "{ids}",
rawdata = config['HBCGM']['USE_RAWDATA']
script:
"../scripts/strain2traits.py"
# rule strain2trait:
# input:
# strain = STRAIN_ANNO,
# ids = "MPD_{ids}/strain.{ids}.temp"
# output:
# "MPD_{ids}/trait.{ids}.txt",
# params:
# trait = TRAIT_DATA,
# outdir = config['HBCGM']['WORKSPACE'],
# traitid = "{ids}",
# rawdata = config['HBCGM']['USE_RAWDATA']
# script:
# "../scripts/strain2traits.py"


############################ Convert VCF to niehs, tped, tfam ########################################
Expand Down

0 comments on commit 5d96aca

Please sign in to comment.