diff --git a/bam2bcf.h b/bam2bcf.h index 54e5faaaa..972923d7c 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -1,7 +1,7 @@ /* bam2bcf.h -- variant calling. Copyright (C) 2010-2012 Broad Institute. - Copyright (C) 2012-2014 Genome Research Ltd. + Copyright (C) 2012-2014, 2019 Genome Research Ltd. Author: Heng Li @@ -99,7 +99,8 @@ typedef struct { } bcf_callret1_t; typedef struct { - int tid, pos; + int tid; + hts_pos_t pos; bcf_hdr_t *bcf_hdr; int a[5]; // alleles: ref, alt, alt2, alt3 float qsum[5]; // for the QS tag @@ -128,7 +129,7 @@ extern "C" { int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call); int bcf_call2bcf(bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int fmt_flag, const bcf_callaux_t *bca, const char *ref); - int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref, + int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf_callaux_t *bca, const char *ref, const void *rghash); void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call); diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c index 9749d5bb7..104d10807 100644 --- a/bam2bcf_indel.c +++ b/bam2bcf_indel.c @@ -1,7 +1,7 @@ /* bam2bcf_indel.c -- indel caller. Copyright (C) 2010, 2011 Broad Institute. - Copyright (C) 2012-2014 Genome Research Ltd. + Copyright (C) 2012-2014, 2019 Genome Research Ltd. Author: Heng Li @@ -87,9 +87,10 @@ void bcf_call_del_rghash(void *_hash) kh_destroy(rg, hash); } -static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos) +static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, hts_pos_t tpos, hts_pos_t is_left, hts_pos_t *_tpos) { - int k, x = c->pos, y = 0, last_y = 0; + int k, y = 0, last_y = 0; + hts_pos_t x = c->pos; *_tpos = c->pos; for (k = 0; k < c->n_cigar; ++k) { int op = cigar[k] & BAM_CIGAR_MASK; @@ -124,9 +125,10 @@ static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run) return q < qh? q : qh; } -static inline int est_indelreg(int pos, const char *ref, int l, char *ins4) +static inline int est_indelreg(hts_pos_t pos, const char *ref, int l, char *ins4) { - int i, j, max = 0, max_i = pos, score = 0; + int j, max = 0, score = 0; + hts_pos_t i, max_i = pos; l = abs(l); for (i = pos + 1, j = 0; ref[i]; ++i, ++j) { if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1; @@ -146,11 +148,12 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4) - 8: estimated sequence quality .. (aux>>8)&0xff - 8: indel quality .. aux&0xff */ -int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref, +int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, hts_pos_t pos, bcf_callaux_t *bca, const char *ref, const void *rghash) { - int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2; + int s, k, t, n_types, *types, max_rd_len, max_ins, *score1, *score2, max_ref2; int N, K, l_run, ref_type, n_alt; + hts_pos_t i, j, left, right; char *inscns = 0, *ref2, *query, **ref_sample; khash_t(rg) *hash = (khash_t(rg)*)rghash; if (ref == 0 || bca == 0) return -1; @@ -225,7 +228,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla free(aux); // TODO revisit how/whether to control printing this warning if (hts_verbose >= 2) - fprintf(stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1); + fprintf(stderr, "[%s] excessive INDEL alleles at position %"PRIhts_pos". Skip the position.\n", __func__, pos + 1); return -1; } types = (int*)calloc(n_types, sizeof(int)); @@ -274,7 +277,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla bam1_t *b = p->b; uint32_t *cigar = bam_get_cigar(b); uint8_t *seq = bam_get_seq(b); - int x = b->core.pos, y = 0; + hts_pos_t x = b->core.pos, y = 0; for (k = 0; k < b->core.n_cigar; ++k) { int op = cigar[k]&0xf; int j, l = cigar[k]>>4; @@ -382,7 +385,8 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla // align each read to ref2 for (i = 0; i < n_plp[s]; ++i, ++K) { bam_pileup1_t *p = plp[s] + i; - int qbeg, qend, tbeg, tend, sc, kk; + int qbeg, qend, sc, kk; + hts_pos_t tbeg, tend; uint8_t *seq = bam_get_seq(p->b); uint32_t *cigar = bam_get_cigar(p->b); if (p->b->core.flag&4) continue; // unmapped reads diff --git a/bam_plcmd.c b/bam_plcmd.c index 44d42a2bf..0497fb6f5 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -1,6 +1,6 @@ /* bam_plcmd.c -- mpileup subcommand. - Copyright (C) 2008-2015 Genome Research Ltd. + Copyright (C) 2008-2015, 2019 Genome Research Ltd. Portions copyright (C) 2009-2012 Broad Institute. Author: Heng Li @@ -64,9 +64,9 @@ static inline int printw(int c, FILE *fp) return 0; } -static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, int pos, - int ref_len, const char *ref, kstring_t *ks, - int rev_del) +static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos, + hts_pos_t ref_len, const char *ref, kstring_t *ks, + int rev_del) { int j; if (p->is_head) { @@ -162,7 +162,7 @@ typedef struct { typedef struct { char *ref[2]; int ref_id[2]; - int ref_len[2]; + hts_pos_t ref_len[2]; } mplp_ref_t; #define MPLP_REF_INIT {{NULL,NULL},{-1,-1},{0,0}} @@ -228,7 +228,7 @@ static int build_auxlist(mplp_conf_t *conf, char *optstring) { return 0; } -static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { +static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, hts_pos_t *ref_len) { mplp_ref_t *r = ma->ref; //printf("get ref %d {%d/%p, %d/%p}\n", tid, r->ref_id[0], r->ref[0], r->ref_id[1], r->ref[1]); @@ -248,9 +248,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { } if (tid == r->ref_id[1]) { // Last, swap over - int tmp; - tmp = r->ref_id[0]; r->ref_id[0] = r->ref_id[1]; r->ref_id[1] = tmp; - tmp = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp; + int tmp_id; + hts_pos_t tmp_len; + tmp_id = r->ref_id[0]; r->ref_id[0] = r->ref_id[1]; r->ref_id[1] = tmp_id; + tmp_len = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp_len; char *tc; tc = r->ref[0]; r->ref[0] = r->ref[1]; r->ref[1] = tc; @@ -266,10 +267,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { r->ref_len[1] = r->ref_len[0]; r->ref_id[0] = tid; - r->ref[0] = faidx_fetch_seq(ma->conf->fai, + r->ref[0] = faidx_fetch_seq64(ma->conf->fai, sam_hdr_tid2name(ma->h, r->ref_id[0]), 0, - INT_MAX, + HTS_POS_MAX, &r->ref_len[0]); if (!r->ref[0]) { @@ -287,10 +288,10 @@ static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, int *ref_len) { static void print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname, - int pos, int n, const char *ref, int ref_len) + hts_pos_t pos, int n, const char *ref, hts_pos_t ref_len) { int i; - fprintf(fp, "%s\t%d\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N'); + fprintf(fp, "%s\t%"PRIhts_pos"\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N'); for (i = 0; i < n; ++i) { fputs("\t0\t*\t*", fp); if (conf->flag & MPLP_PRINT_QPOS) @@ -314,7 +315,9 @@ static int mplp_func(void *data, bam1_t *b) { char *ref; mplp_aux_t *ma = (mplp_aux_t*)data; - int ret, skip = 0, ref_len; + int ret, skip = 0; + hts_pos_t ref_len; + do { int has_ref; ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b); @@ -346,7 +349,7 @@ static int mplp_func(void *data, bam1_t *b) if (ma->conf->fai && b->core.tid >= 0) { has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len); if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence - fprintf(stderr,"[%s] Skipping because %"PRId64" is outside of %d [ref:%d]\n", + fprintf(stderr,"[%s] Skipping because %"PRIhts_pos" is outside of %"PRIhts_pos" [ref:%d]\n", __func__, (int64_t) b->core.pos, ref_len, b->core.tid); skip = 1; continue; @@ -407,7 +410,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx) extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list); extern void bcf_call_del_rghash(void *rghash); mplp_aux_t **data; - int i, tid, pos, *n_plp, beg0 = 0, end0 = INT_MAX, tid0 = 0, ref_len, max_depth, max_indel_depth; + int i, tid, *n_plp, tid0 = 0, max_depth, max_indel_depth; + hts_pos_t pos, beg0 = 0, end0 = HTS_POS_MAX, ref_len; const bam_pileup1_t **plp; mplp_ref_t mp_ref = MPLP_REF_INIT; bam_mplp_t iter; @@ -667,10 +671,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx) bam_mplp_set_maxcnt(iter, max_depth); bcf1_t *bcf_rec = bcf_init1(); int ret; - int last_tid = -1, last_pos = -1; + int last_tid = -1; + hts_pos_t last_pos = -1; // begin pileup - while ( (ret=bam_mplp_auto(iter, &tid, &pos, n_plp, plp)) > 0) { + while ( (ret=bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) { if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested mplp_get_ref(data[0], tid, &ref, &ref_len); //printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref); @@ -739,7 +744,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx) } if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), pos, pos+1)) continue; - fprintf(pileup_fp, "%s\t%d\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); + fprintf(pileup_fp, "%s\t%"PRIhts_pos"\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); for (i = 0; i < n; ++i) { int j, cnt; for (j = cnt = 0; j < n_plp[i]; ++j) {