From 181336631c96e35f447b8b6a7deaf9d4c198bb40 Mon Sep 17 00:00:00 2001 From: ramonamerong Date: Tue, 13 Dec 2022 16:10:58 +0100 Subject: [PATCH] Made insertion motif optional, expanded valid insertion sequence check and added support for target site deletions --- bin/addsv.py | 15 +++++++++------ bin/bamsurgeon/mutableseq.py | 9 ++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/bin/addsv.py b/bin/addsv.py index 965c58d..ebf9872 100644 --- a/bin/addsv.py +++ b/bin/addsv.py @@ -528,18 +528,21 @@ def makemut(args, bedline, alignopts): assert len(a) > 1 # insertion syntax: INS [optional TSDlen] insseqfile = a[1] if not (os.path.exists(insseqfile) or insseqfile == 'RND' or insseqfile.startswith('INSLIB:')): # not a file... is it a sequence? (support indel ins.) - assert re.search('^[ATGCatgc]*$',insseqfile), "cannot determine SV type: %s" % insseqfile # make sure it's a sequence + assert re.search('^[ATGCURYKMSWBDHVNatgcurykmswbdhvn]*$',insseqfile), "cannot determine SV type: %s" % insseqfile # make sure it's a sequence insseq = insseqfile.upper() insseqfile = None if len(a) > 2: # field 5 for insertion is TSD Length tsdlen = int(a[2]) - if len(a) > 3: # field 6 for insertion is motif, format = 'NNNN^NNNN where ^ is cut site - ins_motif = a[3] - assert '^' in ins_motif, 'insertion motif specification requires cut site defined by ^' + if len(a) > 3: + try: # field 6 for VAF in case of floating point. This is the end of the fields + svfrac = float(a[3])/cn + except: # otherwise is insertion motif, format = 'NNNN^NNNN where ^ is cut site + ins_motif = a[3] + assert '^' in ins_motif, 'insertion motif specification requires cut site defined by ^' - if len(a) > 4: # field 7 is VAF - svfrac = float(a[4])/cn + if len(a) > 4: # field 7 is VAF when field 6 is insertion motif + svfrac = float(a[4])/cn if action == 'DUP': if len(a) > 1: diff --git a/bin/bamsurgeon/mutableseq.py b/bin/bamsurgeon/mutableseq.py index a442834..4c996c2 100644 --- a/bin/bamsurgeon/mutableseq.py +++ b/bin/bamsurgeon/mutableseq.py @@ -53,10 +53,13 @@ def deletion(self, start, end): self.seq = self.seq[:start] + self.seq[end:] def insertion(self, loc, seq, tsdlen=0): - ''' inserts seq after position loc, adds taret site duplication (tsd) if tsdlen > 0 ''' + ''' inserts seq after position loc, adds target site duplication (tsd) if tsdlen > 0 and deletion if tsdlen < 0''' loc = int(loc) - tsd = self.seq[loc:loc+tsdlen] - self.seq = self.seq[:loc] + tsd + seq + self.seq[loc:] + if tsdlen >= 0: + tsd = self.seq[loc:loc + tsdlen] + self.seq = self.seq[:loc] + tsd + seq + self.seq[loc:] + else: + self.seq = self.seq[:loc] + seq + self.seq[loc - tsdlen:] def inversion(self, start, end): ''' inverts sequence between start and end, bases at start and end positions are not affected '''