Skip to content

Commit

Permalink
release 0.0.17
Browse files Browse the repository at this point in the history
  • Loading branch information
y9c committed Apr 22, 2024
1 parent 671290d commit 356addf
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 20 deletions.
58 changes: 39 additions & 19 deletions cutseq/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,13 @@ def pipeline_single(input1, output1, short1, untrimmed1, barcode, settings):
if barcode.inline5.len > 0:
adapter_inline5 = PrefixAdapter(sequence=barcode.inline5.fw, max_errors=0.2)
modifiers.append(AdapterCutter([adapter_inline5], times=1))
else:
adapter_inline5 = None
if barcode.inline3.len > 0:
adapter_inline3 = SuffixAdapter(sequence=barcode.inline3.fw, max_errors=0.2)
modifiers.append(AdapterCutter([adapter_inline3], times=1))
else:
adapter_inline3 = None

# step 5: extract UMI
if barcode.umi5.len > 0:
Expand Down Expand Up @@ -297,13 +301,13 @@ def pipeline_single(input1, output1, short1, untrimmed1, barcode, settings):
)
# TODO: --max-n=0 support
if (
settings.ensure_inline_barcode
and barcode.inline5.len + barcode.inline3.len > 0
):
barcode.inline5.len + barcode.inline3.len > 0
and settings.ensure_inline_barcode
) or (untrimmed1 is not None):
ref_adapters = []
if barcode.inline5.len > 0:
if adapter_inline5 is not None:
ref_adapters.append(adapter_inline5)
if barcode.inline3.len > 0:
if adapter_inline3 is not None:
ref_adapters.append(adapter_inline3)
steps.append(
SingleEndFilter(
Expand Down Expand Up @@ -387,6 +391,8 @@ def pipeline_paired(
UnconditionalCutter(-barcode.inline5.len),
)
)
else:
adapter_inline5 = None
if barcode.inline3.len > 0:
adapter_inline3 = PrefixAdapter(sequence=barcode.inline3.rc, max_errors=0.2)
modifiers.append(
Expand All @@ -395,6 +401,8 @@ def pipeline_paired(
AdapterCutter([adapter_inline3], times=1),
)
)
else:
adapter_inline3 = None

# step 5: extract UMI
if barcode.umi5.len > 0:
Expand Down Expand Up @@ -498,17 +506,13 @@ def pipeline_paired(
)
# TODO: --max-n=0 support
if (
settings.ensure_inline_barcode
and barcode.inline5.len + barcode.inline3.len > 0
):
barcode.inline5.len + barcode.inline3.len > 0
and settings.ensure_inline_barcode
) or (untrimmed1 is not None and untrimmed2 is not None):
steps.append(
PairedEndFilter(
IsUntrimmedAny([adapter_inline5])
if barcode.inline5.len > 0
else None,
IsUntrimmedAny([adapter_inline3])
if barcode.inline3.len > 0
else None,
IsUntrimmedAny([adapter_inline5] if adapter_inline5 else []),
IsUntrimmedAny([adapter_inline3] if adapter_inline3 else []),
outfiles.open_record_writer(
untrimmed1, untrimmed2, interleaved=False
),
Expand All @@ -530,7 +534,7 @@ def pipeline_paired(


def run_cutseq(args):
barcode_config = BarcodeConfig(args.adapter_scheme.replace(" ", "").upper())
barcode_config = BarcodeConfig(args.adapter_scheme)
settings = CutadaptConfig()
settings.rname_suffix = args.with_rname_suffix
settings.ensure_inline_barcode = args.ensure_inline_barcode
Expand Down Expand Up @@ -603,15 +607,15 @@ def main():
"-s",
"--short-file",
type=str,
nargs="*",
nargs="+",
help="Output file path for discarded too short data.",
)
# discard inline barcode untrimmed reads
parser.add_argument(
"-u",
"--untrimmed-file",
type=str,
nargs="*",
nargs="+",
help="Output file path for discarded reads without inline barcode.",
)
parser.add_argument(
Expand Down Expand Up @@ -696,6 +700,7 @@ def main():
elif args.adapter_scheme is None:
logging.error("Adapter scheme or name is required.")
sys.exit(1)
args.adapter_scheme = args.adapter_scheme.replace(" ", "").upper()

if args.auto_rc is not None:
if args.reverse_complement:
Expand Down Expand Up @@ -738,8 +743,23 @@ def validate_output_file(output_files, input_files, output_prefix, output_suffix
args.short_file = validate_output_file(
args.short_file, args.input_file, args.output_prefix, "short"
)
args.untrimmed_file = validate_output_file(
args.untrimmed_file, args.input_file, args.output_prefix, "untrimmed"

def _check_with_inline_barcode(s):
# inline barcode is in bracket () and length > 0
return re.match(r".*\([ATGCatgc]+\).*", s) is not None

args.untrimmed_file = (
validate_output_file(
args.untrimmed_file, args.input_file, args.output_prefix, "untrimmed"
)
if (
args.untrimmed_file is not None
or (
_check_with_inline_barcode(args.adapter_scheme)
and args.ensure_inline_barcode
)
)
else [None, None]
)

run_cutseq(args)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cutseq"
version = "0.0.16"
version = "0.0.17"
description = "Automatically cut adapter / barcode / UMI from NGS data"
authors = ["Ye Chang <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 356addf

Please sign in to comment.