diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100755 index 0000000..f6109e7 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,87 @@ +name: Create Release from setup.py + +on: + pull_request: + types: + - closed + branches: + - master + +jobs: + release: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + + permissions: + contents: write + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.14" + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y poppler-utils + + + - name: Upgrade pip, setuptools, packaging and install git-cliff + run: | + python -m pip install --upgrade pip setuptools packaging + pip install git-cliff + + - name: Extract version from setup.py + id: get_version + run: | + VERSION=$(python setup.py --version) + echo "VERSION=$VERSION" >> $GITHUB_OUTPUT + + - name: Get latest release tag + id: get_last_release + run: | + latest=$(curl -s \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + https://api.github.com/repos/${{ github.repository }}/releases/latest \ + | jq -r '.tag_name' | sed 's/^v//') + echo "LATEST_RELEASE=$latest" >> $GITHUB_ENV + + - name: Check release version + run: | + echo "New version: ${{ steps.get_version.outputs.VERSION }}" + echo "Latest release: $LATEST_RELEASE" + python - < CHANGELOG.md + + - name: Create Git tag + run: | + git config user.name "github-actions" + git config user.email "github-actions@github.com" + git tag v${{ steps.get_version.outputs.VERSION }} + git push origin v${{ steps.get_version.outputs.VERSION }} + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + tag_name: v${{ steps.get_version.outputs.VERSION }} + body_path: CHANGELOG.md + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 545f3e9..0000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -dist: focal -language: python - -python: - - '3.9' - - '3.12' -before_install: - - if ! [ -f ./src/GRCh37.tar.gz ]; then wget --connect-timeout=10 --tries=20 ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi - -install: - - pip install . - -cache: - directories: - - $TRAVIS_BUILD_DIR/src/ - -before_script: - - SigProfilerMatrixGenerator install GRCh37 --local_genome $TRAVIS_BUILD_DIR/src/ - -script: - - python3 test.py - - pytest -s -rw tests diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..40e9968 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include SigProfilerClusters/controllers/* diff --git a/SigProfilerClusters/SigProfilerClusters.py b/SigProfilerClusters/SigProfilerClusters.py index e37f53b..97caea4 100644 --- a/SigProfilerClusters/SigProfilerClusters.py +++ b/SigProfilerClusters/SigProfilerClusters.py @@ -639,6 +639,8 @@ def analysis( max_cpu=None, subClassify=False, variant_caller="standard", + tumor_vaf_column = 10, + vaf_field = "AF", includedVAFs=True, includedCCFs=False, windowSize=1000000, @@ -674,7 +676,9 @@ def analysis( -> caveman: If your VAF is recorded in the 11th column of your VCF as the last number of the colon delimited values, set variant_caller="caveman". -> standard: If your VAF is recorded in the 8th or 10th column of your VCF as VAF=xx or AF=xx, set variant_caller="standard". -> mutect2: If your VAF is recorded in the 10th or 11th column of your VCF as AF=xx, set variant_caller="mutect2". - + -> custom: Configure tumor_vaf_column as 0 based index (Default = 10) and the vaf_field tag (Default = AF) + tumor_vaf_column -> Activated when variant_caller = "custom". 0 based index of the column in VCF containing VAF (Default 10) + vaf_field -> Activated when variant_caller = "custom". Set the VAF tag (Default AF) includedVAFs -> optional parameter that informs the tool of the inclusion of VAFs in the dataset (boolean; default=True) includedCCFs -> optional parameter that informs the tool of the inclusion of cancer cell fractions in the dataset (boolean; default=True) @@ -1348,7 +1352,7 @@ def analysis( print("Beginning subclassification of clustered mutations...", end="") if includedVAFs: classifyFunctions.pullVaf( - project, input_path, variant_caller, correction + project, input_path, variant_caller, tumor_vaf_column, vaf_field, correction ) sys.stderr.close() sys.stderr = open(error_file, "a") diff --git a/SigProfilerClusters/SigProfilerHotSpots.py b/SigProfilerClusters/SigProfilerHotSpots.py index ea02287..c3f7b61 100644 --- a/SigProfilerClusters/SigProfilerHotSpots.py +++ b/SigProfilerClusters/SigProfilerHotSpots.py @@ -478,6 +478,8 @@ def analysis( max_cpu=None, subClassify=False, variant_caller="standard", + tumor_vaf_column=10, + vaf_field = "AF", includedVAFs=True, windowSize=1000000, bedRanges=None, @@ -511,6 +513,9 @@ def analysis( -> caveman: If your VAF is recorded in the 11th column of your VCF as the last number of the colon delimited values, set variant_caller="caveman". -> standard: If your VAF is recorded in the 8th or 10th column of your VCF as VAF=xx or AF=xx, set variant_caller="standard". -> mutect2: If your VAF is recorded in the 10th or 11th column of your VCF as AF=xx, set variant_caller="mutect2". + -> custom: Configure tumor_vaf_column as 0 based index (Default = 10) and the vaf_field tag (Default = AF) + tumor_vaf_column -> Activated when variant_caller = "custom". 0 based index of the column in VCF containing VAF (Default 10) + vaf_field -> Activated when variant_caller = "custom". Set the VAF tag (Default AF) includedVAFs -> optional parameter that informs the tool of the inclusion of VAFs in the dataset (boolean; default=True) windowSize -> the size of the window used for correcting the IMDs based upon mutational density within a given genomic range (integer; default=10000000) plotIMDfigure -> optional parameter that generates IMD and mutational spectra plots for each sample (boolean; default=True). @@ -1166,7 +1171,7 @@ def analysis( print("Beginning subclassification of clustered mutations...", end="") if includedVAFs: classifyFunctions.pullVaf( - project, input_path, variant_caller, correction + project, input_path, variant_caller, tumor_vaf_column, vaf_field, correction ) sys.stderr.close() sys.stderr = open(error_file, "a") diff --git a/SigProfilerClusters/classifyFunctions.py b/SigProfilerClusters/classifyFunctions.py index 610775f..ee5643e 100644 --- a/SigProfilerClusters/classifyFunctions.py +++ b/SigProfilerClusters/classifyFunctions.py @@ -299,7 +299,7 @@ def pullCCF(project, project_path, correction=True): print("\t".join([x for x in lines]), file=out) -def pullVaf(project, project_path, variant_caller="standard", correction=True): +def pullVaf(project, project_path, variant_caller="standard", tumor_vaf_column=10, vaf_field = "AF",correction=True): """ Collects the VAFs from the original mutation files. Assumes that these are provided in the same format as Sanger or TCGA. @@ -311,6 +311,9 @@ def pullVaf(project, project_path, variant_caller="standard", correction=True): -> caveman: If your VAF is recorded in the 11th column of your VCF as the last number of the colon delimited values, set variant_caller="caveman". -> standard: If your VAF is recorded in the 8th or 10th column of your VCF as VAF=xx or AF=xx, set variant_caller="standard". -> mutect2: If your VAF is recorded in the 10th or 11th column of your VCF as AF=xx, set variant_caller="mutect2". + -> custom: Configure tumor_vaf_column as 0 based index (Default = 10) and the vaf_field tag (Default = AF) + tumor_vaf_column -> Activated when variant_caller = "custom". 0 based index of the column in VCF containing VAF (Default 10) + vaf_field -> Activated when variant_caller = "custom". Set the VAF tag (Default AF) correction -> optional parameter to perform a genome-wide mutational density correction (boolean; default=False) Returns: @@ -340,11 +343,11 @@ def pullVaf(project, project_path, variant_caller="standard", correction=True): vcf_files = [x for x in os.listdir(vcf_path) if x != ".DS_Store"] # Dictionary for variant caller mapping - variant_type_dict = { - "caveman": "caveman", - "standard": "standard", - "mutect2": "mutect2", - } + # variant_type_dict = { + # "caveman": "caveman", + # "standard": "standard", + # "mutect2": "mutect2", + # } ###extracting VAF info if variant_caller == "standard": vafs = {} @@ -431,6 +434,8 @@ def pullVaf(project, project_path, variant_caller="standard", correction=True): break # Stop after finding the header # Check if TUMOR column exists + + print(header) if "TUMOR" not in header: print(f"TUMOR column not found in {vcfFile}. Skipping...") continue @@ -438,6 +443,68 @@ def pullVaf(project, project_path, variant_caller="standard", correction=True): # Get the index of the TUMOR column tumor_index = header.index("TUMOR") + # Process the data rows + for line in f: + if line.startswith("#"): + continue + + try: + fields = line.strip().split("\t") + chrom = fields[0] + + # Normalize chromosome naming + if chrom.lower().startswith("chr"): + chrom = chrom[3:] + + pos = fields[1] + ref = fields[3] + alt = fields[4] + + # Extract FORMAT field and TUMOR data + fmt = fields[8].split(":") + tumor_data = fields[tumor_index].split(":") + + # Get the VAF value + vaf_index = fmt.index(field) + vaf = float(tumor_data[vaf_index]) + + # Create key for the variant + if len(ref) == len(alt) and len(ref) > 1: + for i in range(len(ref)): + keyLine = f"{chrom}:{int(pos) + i}:{ref[i]}:{alt[i]}" + vafs[sample][keyLine] = vaf + else: + keyLine = f"{chrom}:{pos}:{ref}:{alt}" + vafs[sample][keyLine] = vaf + + except (ValueError, IndexError) as e: + print(f"Error processing line in {vcfFile}: {line}\n{e}") + continue + elif variant_caller == "custom": + field = vaf_field # The VAF field in the FORMAT column + vafs = {} + + for vcfFile in vcf_files: + sample = vcfFile.split(".")[0] + vafs[sample] = {} + header = [] + + with open(os.path.join(vcf_path, vcfFile)) as f: + for line in f: + # Identify the header line with column names + if line.startswith("#") and not line.startswith("##"): + header = line.strip().split("\t") + break # Stop after finding the header + + # Check if TUMOR column exists + + if len(header) <= tumor_vaf_column: + print(f"Tumor column {tumor_vaf_column} not found in header of {vcfFile}. Skipping...") + continue + + # Get the index of the TUMOR column + tumor_index = tumor_vaf_column + # Process the data rows for line in f: if line.startswith("#"): @@ -722,7 +789,7 @@ def findClustersOfClusters( len_mnvs = {} total_mnvs = {} distances = [] - count = 1 + group_count = 1 out = open(out_file, "w") print( "\t".join( @@ -798,7 +865,7 @@ def findClustersOfClusters( ): # (pos - prev_pos) < imds_corrected[samp][regions[samp][hotspot.catch([".",".",chrom, pos], regions[samp], chromLengths, genome, imds_corrected[samp])]])): distances.append(pos - prev_pos) mnv_length += 1 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 else: @@ -814,10 +881,9 @@ def findClustersOfClusters( else: total_mnvs[str(mnv_length)] += 1 mnv_length = 0 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 - count += 1 print("\n\n", file=out) else: mnv_length += 1 @@ -833,11 +899,13 @@ def findClustersOfClusters( total_mnvs[str(mnv_length)] += 1 mnv_length = 0 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 - count += 1 print("\n\n", file=out) + + group_count += 1 + else: mnv_length += 1 if prev_samp not in len_mnvs: @@ -852,15 +920,14 @@ def findClustersOfClusters( total_mnvs[str(mnv_length)] += 1 mnv_length = 0 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 - count += 1 - count = 1 + group_count = 1 print("\n\n################ New Sample #################", file=out) print("\n\n", file=out) - lines[i] = [str(count)] + lines[i] + lines[i] = [str(group_count)] + lines[i] print("\t".join([x for x in lines[i]]), file=out) total_muts[samp] += 1 out.close() @@ -878,7 +945,7 @@ def findClustersOfClusters( distances = [] distances_mnv = {} lines = [] - count = 1 + group_count = 1 subclassesHeader = "\t".join( [ x @@ -977,7 +1044,7 @@ def findClustersOfClusters( if len(lines) > 0: if lines[-1][0] == "New": lines = lines[1:] - count = 1 + group_count = 1 write_out = False if len(lines) == 1 or len(lines) == 0: lines = [] @@ -1139,6 +1206,10 @@ def findClustersOfClusters( else: writeClassIII = True + for i in range(0, len(lines), 1): + lines[i][-3] = str(group_count) + group_count += 1 + if writeClassII: processivitySubclassification( lines, out2Y, out2K, out2S, out2N @@ -1146,9 +1217,8 @@ def findClustersOfClusters( for i in range(0, len(lines), 1): lines[i].append("ClassII") print("\t".join([x for x in lines[i]]), file=out4) - lines[i] = [str(count)] + lines[i] + lines[i] = [str(group_count)] + lines[i] print("\t".join([x for x in lines[i]]), file=out2) - count += 1 print("\n\n", file=out2) else: if writeClassI: @@ -1178,6 +1248,7 @@ def findClustersOfClusters( try: for i in range(0, len(lines), 1): lines[i][-1] = "ClassIA" + print( "\t".join([x for x in lines[i]]), file=out6, @@ -1189,6 +1260,7 @@ def findClustersOfClusters( try: for i in range(0, len(lines), 1): lines[i][-1] = "ClassIB" + print( "\t".join([x for x in lines[i]]), file=out7, @@ -1197,6 +1269,7 @@ def findClustersOfClusters( print(lines) elif writeClassIII: # Writes Class III (all other mutations - leftovers) + linesSubClass = lines[:] while len(linesSubClass) > 1: writeClassI = False @@ -1487,6 +1560,11 @@ def findClustersOfClusters( for line in saveNewEvent: linesSubClass.remove(line) + for i in range(0, len(saveNewEvent), 1): + saveNewEvent[i][-3] = str(group_count) + group_count += 1 + + if writeClassII: processivitySubclassification( saveNewEvent, out2Y, out2K, out2S, out2N @@ -1498,13 +1576,12 @@ def findClustersOfClusters( file=out4, ) saveNewEvent[i] = [ - str(count) + str(group_count) ] + saveNewEvent[i] print( "\t".join([x for x in saveNewEvent[i]]), file=out2, ) - count += 1 print("\n\n", file=out2) else: @@ -1519,15 +1596,14 @@ def findClustersOfClusters( ), file=out3, ) + except: print(saveNewEvent) if writeClassIc: # Writes Class Ic (extended MBSs) try: - for i in range( - 0, len(saveNewEvent), 1 - ): + for i in range(0, len(saveNewEvent), 1): saveNewEvent[i][-1] = "ClassIC" print( "\t".join( @@ -1570,7 +1646,6 @@ def findClustersOfClusters( 0, len(saveNewEvent), 1 ): saveNewEvent[i][-1] = "ClassIB" - # lines[i].append(category) print( "\t".join( [ @@ -1606,6 +1681,7 @@ def findClustersOfClusters( for i in range(0, len(linesSubClass), 1): linesSubClass[i].append("ClassIII") linesSubClass[i].append(category) + print( "\t".join( [x for x in linesSubClass[i]] @@ -1922,7 +1998,7 @@ def findClustersOfClusters_noVAF( len_mnvs = {} total_mnvs = {} distances = [] - count = 1 + group_count = 1 out = open(out_file, "w") with open(file) as f: next(f) @@ -1973,7 +2049,7 @@ def findClustersOfClusters_noVAF( ): # (pos - prev_pos) < imds_corrected[samp][regions[samp][hotspot.catch([".",".",chrom, pos], regions[samp], chromLengths, genome, imds_corrected[samp])]])): distances.append(pos - prev_pos) mnv_length += 1 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 else: @@ -1989,10 +2065,9 @@ def findClustersOfClusters_noVAF( else: total_mnvs[str(mnv_length)] += 1 mnv_length = 0 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 - count += 1 print("\n\n", file=out) else: mnv_length += 1 @@ -2008,11 +2083,11 @@ def findClustersOfClusters_noVAF( total_mnvs[str(mnv_length)] += 1 mnv_length = 0 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 - count += 1 print("\n\n", file=out) + group_count += 1 else: mnv_length += 1 if prev_samp not in len_mnvs: @@ -2027,15 +2102,14 @@ def findClustersOfClusters_noVAF( total_mnvs[str(mnv_length)] += 1 mnv_length = 0 - lines[i - 1] = [str(count)] + lines[i - 1] + lines[i - 1] = [str(group_count)] + lines[i - 1] print("\t".join([x for x in lines[i - 1]]), file=out) total_muts[samp] += 1 - count += 1 - count = 1 + group_count = 1 print("\n\n################ New Sample #################", file=out) print("\n\n", file=out) - lines[i] = [str(count)] + lines[i] + lines[i] = [str(group_count)] + lines[i] print("\t".join([x for x in lines[i]]), file=out) total_muts[samp] += 1 out.close() @@ -2053,7 +2127,7 @@ def findClustersOfClusters_noVAF( distances = [] distances_mnv = {} lines = [] - count = 1 + group_count = 1 subclassesHeader = "\t".join( [ x @@ -2148,7 +2222,7 @@ def findClustersOfClusters_noVAF( if len(lines) > 0: if lines[-1][0] == "New": lines = lines[1:] - count = 1 + group_count = 1 write_out = False if len(lines) == 1 or len(lines) == 0: lines = [] @@ -2294,9 +2368,9 @@ def findClustersOfClusters_noVAF( for i in range(0, len(lines), 1): lines[i].append("ClassII") print("\t".join([x for x in lines[i]]), file=out4) - lines[i] = [str(count)] + lines[i] + lines[i] = [str(group_count)] + lines[i] print("\t".join([x for x in lines[i]]), file=out2) - count += 1 + group_count += 1 print("\n\n", file=out2) else: if writeClassI: @@ -2488,9 +2562,9 @@ def findClustersOfClusters_noVAF( for i in range(0, len(lines), 1): lines[i].append("ClassII") print("\t".join([x for x in lines[i]]), file=out4) - lines[i] = [str(count)] + lines[i] + lines[i] = [str(group_count)] + lines[i] print("\t".join([x for x in lines[i]]), file=out2) - count += 1 + group_count += 1 print("\n\n", file=out2) else: if writeClassI: diff --git a/SigProfilerClusters/controllers/cli_controller.py b/SigProfilerClusters/controllers/cli_controller.py new file mode 100644 index 0000000..3332abb --- /dev/null +++ b/SigProfilerClusters/controllers/cli_controller.py @@ -0,0 +1,91 @@ +import argparse +from typing import List +from SigProfilerClusters import SigProfilerClusters + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError("Boolean value expected.") + +def str2list(arg): + return arg.split(",") + +def parse_arguments_clusters(args: List[str], description: str) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=description) + + # Obligatorios + parser.add_argument("project", help="Project name") + parser.add_argument("genome", help="Reference genome") + parser.add_argument("contexts", help="Mutational contexts") + parser.add_argument("simContext", type=str2list, help="Simulated contexts (comma-separated)") + parser.add_argument("input_path", help="Path to input directory") + + # Opcionales + parser.add_argument("--output_type", default="all") + parser.add_argument("--analysis", default="all") + parser.add_argument("--interdistance", default="96") + parser.add_argument("--exome", type=str2bool, nargs="?", const=True, default=False) + parser.add_argument("--clustering_vaf", type=str2bool, nargs="?", const=True, default=False) + parser.add_argument("--sortSims", type=str2bool, nargs="?", const=True, default=True) + parser.add_argument("--extraction", type=str2bool, nargs="?", const=True, default=False) + parser.add_argument("--correction", type=str2bool, nargs="?", const=True, default=True) + parser.add_argument("--startProcess", type=int, default=1) + parser.add_argument("--endProcess", type=int, default=25) + parser.add_argument("--totalIterations", type=int, default=1000) + parser.add_argument("--calculateIMD", type=str2bool, nargs="?", const=True, default=True) + parser.add_argument("--chrom_based", type=str2bool, nargs="?", const=True, default=False) + parser.add_argument("--max_cpu", type=int, default=None) + parser.add_argument("--subClassify", type=str2bool, nargs="?", const=True, default=False) + parser.add_argument("--variant_caller", default="standard") + parser.add_argument("--includedVAFs", type=str2bool, nargs="?", const=True, default=True) + parser.add_argument("--includedCCFs", type=str2bool, nargs="?", const=True, default=False) + parser.add_argument("--windowSize", type=int, default=1000000) + parser.add_argument("--bedRanges", default=None) + parser.add_argument("--plotIMDfigure", type=str2bool, nargs="?", const=True, default=True) + parser.add_argument("--plotRainfall", type=str2bool, nargs="?", const=True, default=True) + parser.add_argument("--probability", type=str2bool, nargs="?", const=True, default=False) + + return parser.parse_args(args) + + +class CliController: + def dispatch_sigProfilerClusters(self, user_args: List[str]) -> None: + parsed_args = parse_arguments_clusters( + user_args, "Cluster mutation analysis" + ) + SigProfilerClusters.analysis( + project=parsed_args.project, + genome=parsed_args.genome, + contexts=parsed_args.contexts, + simContext=parsed_args.simContext, + input_path=parsed_args.input_path, + output_type=parsed_args.output_type, + analysis=parsed_args.analysis, + interdistance=parsed_args.interdistance, + exome=parsed_args.exome, + clustering_vaf=parsed_args.clustering_vaf, + sortSims=parsed_args.sortSims, + extraction=parsed_args.extraction, + correction=parsed_args.correction, + startProcess=parsed_args.startProcess, + endProcess=parsed_args.endProcess, + totalIterations=parsed_args.totalIterations, + calculateIMD=parsed_args.calculateIMD, + chrom_based=parsed_args.chrom_based, + max_cpu=parsed_args.max_cpu, + subClassify=parsed_args.subClassify, + variant_caller=parsed_args.variant_caller, + includedVAFs=parsed_args.includedVAFs, + includedCCFs=parsed_args.includedCCFs, + windowSize=parsed_args.windowSize, + bedRanges=parsed_args.bedRanges, + plotIMDfigure=parsed_args.plotIMDfigure, + plotRainfall=parsed_args.plotRainfall, + probability=parsed_args.probability, + ) \ No newline at end of file diff --git a/SigProfilerClusters/sigprofilerclusters_cli.py b/SigProfilerClusters/sigprofilerclusters_cli.py new file mode 100644 index 0000000..513a125 --- /dev/null +++ b/SigProfilerClusters/sigprofilerclusters_cli.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import sys +from SigProfilerClusters.controllers import cli_controller + + +def main_function(): + commands = { + "analysis": "Analyze clustered mutations" + } + + if len(sys.argv) < 2 or sys.argv[1].lower() not in commands: + print_usage(commands) + return + + command = sys.argv[1].lower() + args = sys.argv[2:] + + controller = cli_controller.CliController() + + if command == "analysis": + controller.dispatch_sigProfilerClusters(args) + + +def print_usage(commands): + """Prints the usage message.""" + print("Usage: SigProfilerClusters []\n") + print("Commands:") + for cmd, desc in commands.items(): + print(f" {cmd}: {desc}") + + +if __name__ == "__main__": + main_function() diff --git a/SigProfilerClusters/version.py b/SigProfilerClusters/version.py index 81cc061..bbaa950 100644 --- a/SigProfilerClusters/version.py +++ b/SigProfilerClusters/version.py @@ -1,7 +1,7 @@ # THIS FILE IS GENERATED FROM SIGPROFILECLUSTERS SETUP.PY -short_version = '1.2.1' -version = '1.2.1' -Update = 'v1.2.1: Add centromere coordinates for mm39' +short_version = '1.2.2' +version = '1.2.2' +Update = 'v1.2.2: Updates to variant_caller parameter and plotting improvements' \ No newline at end of file diff --git a/ci.yml b/ci.yml new file mode 100644 index 0000000..f1ce0ec --- /dev/null +++ b/ci.yml @@ -0,0 +1,70 @@ +name: CI + +on: + push: + branches: + - main + - dev + pull_request: + branches: + - main + +permissions: + contents: read + packages: write + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9', '3.14'] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y poppler-utils + + - name: Upgrade pip, setuptools, and packaging + run: | + python -m pip install --upgrade pip setuptools packaging + + - name: Cache src directory + uses: actions/cache@v4 + with: + path: ${{ github.workspace }}/src/ + key: ${{ runner.os }}-src-grch37 + restore-keys: | + ${{ runner.os }}-src- + + - name: Download GRCh37.tar.gz if not present + run: | + if [ ! -f ${{ github.workspace }}/src/GRCh37.tar.gz ]; then + wget --connect-timeout=10 --tries=20 ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ${{ github.workspace }}/src/ + fi + + - name: Install package with tests + run: | + pip install .[tests] + + - name: Install genome + run: | + SigProfilerMatrixGenerator install GRCh37 --local_genome ${{ github.workspace }}/src/ + + - name: Run unit tests + run: | + pip install pytest + pytest -s -rw tests + + - name: Run integration test + run: | + python3 test.py + diff --git a/cliff.toml b/cliff.toml new file mode 100644 index 0000000..7653012 --- /dev/null +++ b/cliff.toml @@ -0,0 +1,7 @@ +[changelog] +# encabezado del changelog +header = "# Changelog" + +[git] +# no exigimos conventional commits +conventional_commits = false diff --git a/setup.py b/setup.py index cab3010..72fec4c 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def readme(): # return(f.read()) -VERSION = "1.2.2" +VERSION = "1.2.3" def write_version_py(filename="SigProfilerClusters/version.py"): @@ -66,5 +66,10 @@ def write_version_py(filename="SigProfilerClusters/version.py"): "seaborn>=0.13.0", ], include_package_data=True, + entry_points={ + "console_scripts": [ + "SigProfilerClusters=SigProfilerClusters.sigprofilerclusters_cli:main_function", + ], + }, zip_safe=False, ) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..62f0d35 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,93 @@ +import pytest +import argparse +from SigProfilerClusters.controllers.cli_controller import ( + parse_arguments_clusters, + str2bool, + str2list +) + + +def test_argument_parsing(): + args = parse_arguments_clusters( + [ + "dummy_project", + "GRCh38", + "96", + "96", + "path/to/files", + "--output_type", "all", + "--analysis", "all", + "--interdistance", "96", + "--exome", "True", + "--clustering_vaf", "False", + "--extraction", "False", + "--correction","True", + "--startProcess", "1", + "--endProcess", "25", + "--totalIterations","1000", + "--calculateIMD","True", + "--chrom_based","False", + "--max_cpu", "1", + "--subClassify","False", + "--variant_caller","standard", + "--includedVAFs","True", + "--includedCCFs","False", + "--windowSize","1000000", + "--bedRanges","None", + "--plotIMDfigure","True", + "--plotRainfall","True", + "--probability","False" + ], + "Test argument parsing", + ) + + assert args.project == "dummy_project" + assert args.genome == "GRCh38" + assert args.contexts == "96" + assert args.simContext == ["96"] + assert args.input_path == "path/to/files" + assert args.output_type == "all" + assert args.analysis == "all" + assert args.interdistance == "96" + assert args.exome == True + assert args.clustering_vaf == False + assert args.extraction == False + assert args.correction == True + assert args.startProcess == 1 + assert args.endProcess == 25 + assert args.totalIterations == 1000 + assert args.calculateIMD == True + assert args.chrom_based == False + assert args.max_cpu == 1 + assert args.subClassify == False + assert args.variant_caller == "standard" + assert args.includedVAFs == True + assert args.includedCCFs == False + assert args.windowSize == 1000000 + assert args.bedRanges == "None" + assert args.plotIMDfigure == True + assert args.plotRainfall == True + assert args.probability == False + + +def test_boolean_conversion(): + assert str2bool("yes") == True + assert str2bool("true") == True + assert str2bool("t") == True + assert str2bool("y") == True + assert str2bool("1") == True + assert str2bool("no") == False + assert str2bool("false") == False + assert str2bool("f") == False + assert str2bool("n") == False + assert str2bool("0") == False + with pytest.raises(argparse.ArgumentTypeError): + str2bool("maybe") + +def test_str2list(): + assert str2list("arg1,arg2,arg3") == ["arg1", "arg2","arg3"] + assert str2list("arg_unique") == ["arg_unique"] + assert str2list("wrong.sepparator") == ["wrong.sepparator"] + +if __name__ == "__main__": + pytest.main()