|
| 1 | +import argparse |
| 2 | +import os |
| 3 | +import sys |
| 4 | + |
| 5 | +from functions.vcf_handle import VCF2Excel |
| 6 | +from functions.pretreatment import Pretreatment |
| 7 | +from functions.none_pretreatment import NonePretreatment |
| 8 | +from functions.Statistic_Methods import Statistic |
| 9 | + |
| 10 | + |
| 11 | +def main(args): |
| 12 | + if args.p == 1: |
| 13 | + args.p = True |
| 14 | + else: |
| 15 | + args.p = False |
| 16 | + |
| 17 | + if args.p2 == 1: |
| 18 | + args.p2 = True |
| 19 | + else: |
| 20 | + args.p2 = False |
| 21 | + |
| 22 | + if args.p3 == 1: |
| 23 | + args.p3 = True |
| 24 | + else: |
| 25 | + args.p3 = False |
| 26 | + print("-" * 100) |
| 27 | + print( |
| 28 | + "file path:{}\nmethod:{}\nis pretreatment:{}\nread number:{}\nChi-square test:{}\nContinuity test:{}\nsmooth method:{}\nsmooth window size:{}\nthreshold:{}".format( |
| 29 | + args.i, args.m, args.p, args.p1, args.p2, args.p3, args.s, args.w, args.t)) |
| 30 | + print("-" * 100) |
| 31 | + # 新建文件夹 |
| 32 | + root_path = os.getcwd() |
| 33 | + pretreatment_dir = os.path.join(root_path, "Pretreated_Files") |
| 34 | + nopretreatment_dir = os.path.join(root_path, "NoPretreatment") |
| 35 | + excel_path = os.path.join(root_path, "Excel_Files") |
| 36 | + # filtered_vcf_dir = os.path.join(root_path, "Filtered_VCFs") |
| 37 | + for path in [pretreatment_dir, nopretreatment_dir, excel_path]: |
| 38 | + if not os.path.exists(path): |
| 39 | + os.mkdir(path) |
| 40 | + # 获得文件名、类型 |
| 41 | + temp = args.i.split("/")[-1] |
| 42 | + file_name = temp.split(".")[0] |
| 43 | + file_type = temp.split(".")[1] |
| 44 | + |
| 45 | + if "vcf" in file_type: |
| 46 | + vcf2excel = VCF2Excel(args.i, file_name, excel_path, args.chromosomes, args.samples) |
| 47 | + file_path = vcf2excel.run() |
| 48 | + else: |
| 49 | + file_path = args.i |
| 50 | + |
| 51 | + if args.p: |
| 52 | + return_path = os.path.exists(os.path.join(pretreatment_dir, file_name + "_{}".format(args.p1) + "_freq.npy")) |
| 53 | + pretreat = Pretreatment(args.p1, args.p2, args.p3, file_path, file_name, pretreatment_dir) |
| 54 | + ref_data_path, mut_data_path, freq_data_path, pos_data_path, chrome_set = pretreat.run(return_path) |
| 55 | + if not return_path: |
| 56 | + print("pretreatment & files do not exist") |
| 57 | + else: |
| 58 | + print("pretreatment & files exist") |
| 59 | + else: |
| 60 | + return_path = os.path.exists(os.path.join(nopretreatment_dir, file_name + "_ref.npy")) |
| 61 | + nopretreat = NonePretreatment(file_path, file_name, nopretreatment_dir) |
| 62 | + ref_data_path, mut_data_path, freq_data_path, pos_data_path, chrome_set = nopretreat.run(return_path) |
| 63 | + if not return_path: |
| 64 | + print("nonepretreatment & files do not exist") |
| 65 | + else: |
| 66 | + print("nonepretreatment & files exist") |
| 67 | + # print("chromosome list: ", chrome_set) |
| 68 | + rsp = os.path.join(os.getcwd(), "Results") |
| 69 | + if not os.path.exists(rsp): |
| 70 | + os.mkdir(rsp) |
| 71 | + save_path = os.path.join(rsp, file_name) |
| 72 | + if not os.path.exists(save_path): |
| 73 | + os.mkdir(save_path) |
| 74 | + for m in args.m: |
| 75 | + dl = Statistic(m, freq_data_path, ref_data_path, mut_data_path, |
| 76 | + pos_data_path, chrome_set, args.p1, args.s, |
| 77 | + args.w, args.t, save_path) |
| 78 | + dl.run() |
| 79 | + |
| 80 | + |
| 81 | +# example: |
| 82 | +# python main.py --i /media/xaun/CXX/DeepBSA-terminal/bin/Excel_Files/nc-planthigh-pop1.csv --p False |
| 83 | +if __name__ == "__main__": |
| 84 | + parser = argparse.ArgumentParser() |
| 85 | + # 载入数据及预处理 |
| 86 | + parser.add_argument("--i", default=None, required=True, type=str, help="The input file path(vcf/csv).") |
| 87 | + parser.add_argument("--m", default=["DL"], nargs='+', type=str, |
| 88 | + help="List of algorithms to use(DL/K/ED4/SNP/SmoothG/SmoothLOD/Ridit) used. Default is DL.") |
| 89 | + parser.add_argument("--p", default=1, type=int, |
| 90 | + help="Whether to pretreatment data(1[True] or 0[False]). Default is True.") |
| 91 | + parser.add_argument("--p1", default=0, type=int, |
| 92 | + help="Pretreatment step 1: Number of read thread, the SNP whose number lower than it will be filtered. Default is 0.") |
| 93 | + parser.add_argument("--p2", default=1, type=int, |
| 94 | + help="Pretreatment step 2: Chi-square test(1[True] or 0[False]). Default is 1[True].") |
| 95 | + parser.add_argument("--p3", default=1, type=int, |
| 96 | + help="Pretreatment step 3: Continuity test(1[True] or 0[False]). Default is 1[True].") |
| 97 | + parser.add_argument("--chromosomes", default=[], nargs='+', type=str, help="List of chromosomes to select.") |
| 98 | + parser.add_argument("--samples", default=[], nargs='+', type=str, help="List of samples to select.") |
| 99 | + # 方法选择等 |
| 100 | + parser.add_argument("--s", default="LOWESS", type=str, |
| 101 | + help="The function to smooth the result(Tri-kernel-smooth/LOWESS/Moving Average), Defalut is LOWESS") |
| 102 | + parser.add_argument("--w", default=0, type=float, |
| 103 | + help="Windows size of LOESS. The number is range from 0-1. 0 presents the best size for minimum AICc. Default is 0(auto).") |
| 104 | + parser.add_argument("--t", default=0, type=float, help="The threshold to find peaks(float). Default is 0(auto)") |
| 105 | + |
| 106 | + args = parser.parse_args() |
| 107 | + |
| 108 | + main(args) |
0 commit comments