From 3b2b393554bcbacb64eca1bf57e0d6089a67ac4e Mon Sep 17 00:00:00 2001 From: Jiefei-Wang Date: Wed, 4 Sep 2019 12:20:15 -0400 Subject: [PATCH 1/3] constructor altRep update --- R/IRanges-constructor.R | 15 +++- src/IRanges_constructor.c | 146 +++++++++++++++++++++++++------------- 2 files changed, 110 insertions(+), 51 deletions(-) diff --git a/R/IRanges-constructor.R b/R/IRanges-constructor.R index d985c049..e095bbf5 100644 --- a/R/IRanges-constructor.R +++ b/R/IRanges-constructor.R @@ -48,13 +48,26 @@ solveUserSEW0 <- function(start=NULL, end=NULL, width=NULL) end <- rep.int(NA_integer_, max123) else end <- S4Vectors:::recycleVector(end, max123) - } + } if (L3 < max123) { if (L3 == 0L) width <- rep.int(NA_integer_, max123) else width <- S4Vectors:::recycleVector(width, max123) } + + ## Check whether 'start' and/or 'width' can be used as-is or not. + ## 'start' and 'width' can be used **as-is** to construct the IRanges object + ## to return if they satisfy at least both criteria: + ## (a) They don't have a "dim" or "names" attribute on them. + ## (b) They don't contain NAs. + # use_start_as_is = is.null(dim(start)) && + # is.null(names(start)) && + # !anyNA(start) + # use_width_as_is = is.null(dim(width)) && + # is.null(names(width)) && + # !anyNA(width); + # .Call2("solve_user_SEW0", start, end, width, PACKAGE="IRanges") } diff --git a/src/IRanges_constructor.c b/src/IRanges_constructor.c index 06b27455..380cde95 100644 --- a/src/IRanges_constructor.c +++ b/src/IRanges_constructor.c @@ -116,16 +116,17 @@ static int solve_range(int start, int end, int width, Of course they also must define valid ranges, that is, after resolution of the NAs, the width must be >= 0 and < 2^31, the start must be > -2^31 and < 2^31, and the implicit end must be > -2^31 and < 2^31. This is - checked early and an error is raised on the first invalid range (see 1st - pass below). + checked when resolving the NAs. */ SEXP solve_user_SEW0(SEXP start, SEXP end, SEXP width) { int ans_len, use_start_as_is, use_width_as_is, - i, solved_start, solved_width; - const int *start_p, *end_p, *width_p; - SEXP ans, ans_start, ans_width; + i, solved_start, solved_width, + start_value, end_value, width_value; + SEXP ans, ans_start, ans_width, + check_start_NA, check_width_NA; + //These functions might not only be called by constructor? if (!(IS_INTEGER(start) && IS_INTEGER(end) && IS_INTEGER(width))) error("the supplied 'start', 'end', and 'width', " "must be integer vectors"); @@ -133,58 +134,46 @@ SEXP solve_user_SEW0(SEXP start, SEXP end, SEXP width) if (LENGTH(end) != ans_len || LENGTH(width) != ans_len) error("'start', 'end', and 'width' must have the same length"); + /*Prepare the expression to check any NA values in start and width + The function anyNA can be more efficient than a loop in C.*/ + PROTECT(check_start_NA = lang2(install("anyNA"), start)); + PROTECT(check_width_NA = lang2(install("anyNA"), width)); + use_start_as_is = GET_DIM(start) == R_NilValue && - GET_NAMES(start) == R_NilValue; + GET_NAMES(start) == R_NilValue && + !asLogical(R_tryEval(check_start_NA, R_GlobalEnv, NULL)); use_width_as_is = GET_DIM(width) == R_NilValue && - GET_NAMES(width) == R_NilValue; - - /* 1st pass: Solve and check the supplied ranges and determine - whether 'start' and/or 'width' can be used as-is or not. */ - start_p = INTEGER(start); - end_p = INTEGER(end); - width_p = INTEGER(width); - for (i = 0; i < ans_len; i++) { - if (solve_range(*start_p, *end_p, *width_p, - &solved_start, &solved_width) != 0) - error("In range %d: %s.", i + 1, errmsg_buf); - if (use_start_as_is && *start_p == NA_INTEGER) - use_start_as_is = 0; - if (use_width_as_is && *width_p == NA_INTEGER) - use_width_as_is = 0; - start_p++; - end_p++; - width_p++; - } + GET_NAMES(width) == R_NilValue&& + !asLogical(R_tryEval(check_width_NA, R_GlobalEnv, NULL)); ans_start = start; ans_width = width; if (!(use_start_as_is && use_width_as_is)) { - /* 2nd pass: Allocate and populate 'ans_start' - and/or 'ans_width'. */ + /* Allocate and populate 'ans_start' and/or 'ans_width'. + Call the duplicate function to duplicate an object for + an ALTREP might has a more efficient way to duplicate itself*/ if (!use_start_as_is) - PROTECT(ans_start = NEW_INTEGER(ans_len)); + PROTECT(ans_start = duplicate(start)); if (!use_width_as_is) - PROTECT(ans_width = NEW_INTEGER(ans_len)); - start_p = INTEGER(start); - end_p = INTEGER(end); - width_p = INTEGER(width); + PROTECT(ans_width = duplicate(width)); + for (i = 0; i < ans_len; i++) { - /* All ranges got validated during the 1st pass so - we don't need to check the returned value again. */ - solve_range(*start_p, *end_p, *width_p, - &solved_start, &solved_width); + start_value = INTEGER_ELT(start, i); + end_value = INTEGER_ELT(end, i); + width_value = INTEGER_ELT(width, i); + if (solve_range(start_value, end_value, width_value, + &solved_start, &solved_width) != 0) + error("In range %d: %s.", i + 1, errmsg_buf); + /*The if statement is used to avoid unnecessary assignment*/ if (!use_start_as_is) - INTEGER(ans_start)[i] = solved_start; + SET_INTEGER_ELT(ans_start, i, solved_start); if (!use_width_as_is) - INTEGER(ans_width)[i] = solved_width; - start_p++; - end_p++; - width_p++; + SET_INTEGER_ELT(ans_width, i, solved_width); } } PROTECT(ans = _new_IRanges("IRanges", ans_start, ans_width, R_NilValue)); - UNPROTECT(1 + !use_start_as_is + !use_width_as_is); + UNPROTECT(3 + !use_start_as_is + !use_width_as_is); return ans; } @@ -306,32 +295,89 @@ static int solve_user_SEW_row(int refwidth, int start, int end, int width, SEXP solve_user_SEW(SEXP refwidths, SEXP start, SEXP end, SEXP width, SEXP translate_negative_coord, SEXP allow_nonnarrowing) { - SEXP ans, ans_start, ans_width; - int ans_len, i0, i1, i2, i3; + SEXP ans, ans_start, ans_width, + check_start_NA, check_width_NA; + int ans_len, i0, i1, i2, i3, + refwidths_value, start_value, width_value, end_value, + use_start_as_is, use_width_as_is, + solved_start, solved_width; translate_negative_coord0 = LOGICAL(translate_negative_coord)[0]; nonnarrowing_is_OK = LOGICAL(allow_nonnarrowing)[0]; ans_len = LENGTH(refwidths); - PROTECT(ans_start = NEW_INTEGER(ans_len)); - PROTECT(ans_width = NEW_INTEGER(ans_len)); + + /*Prepare the expression to check any NA values in start and width + The function anyNA can be more efficient than a loop in C.*/ + PROTECT(check_start_NA = lang2(install("anyNA"), start)); + PROTECT(check_width_NA = lang2(install("anyNA"), width)); + + /*Please see comments in solve_user_SEW0 + The length should alse be check since it is not checked in R*/ + use_start_as_is = GET_DIM(start) == R_NilValue && + GET_NAMES(start) == R_NilValue && + !asLogical(R_tryEval(check_start_NA, R_GlobalEnv, NULL))&& + LENGTH(start) == ans_len + ; + use_width_as_is = GET_DIM(width) == R_NilValue && + GET_NAMES(width) == R_NilValue && + !asLogical(R_tryEval(check_width_NA, R_GlobalEnv, NULL))&& + LENGTH(width) == ans_len; + + /* + If user's input cannot be used `as-is`, depending on the vector's length: + 1. If the length is the same as ans_len, the vector will be duplicated + 2. If the length is different from ans_len, a new vector will be created. + */ + if (!use_start_as_is) { + if (LENGTH(start) == ans_len) + PROTECT(ans_start = duplicate(start)); + else + PROTECT(ans_start = NEW_INTEGER(ans_len)); + } + else { + ans_start = start; + } + + if (!use_width_as_is) { + if (LENGTH(width) == ans_len) + PROTECT(ans_width = duplicate(width)); + else + PROTECT(ans_width = NEW_INTEGER(ans_len)); + } + else { + ans_width = width; + } + + for (i0 = i1 = i2 = i3 = 0; i0 < ans_len; i0++, i1++, i2++, i3++) { /* recycling */ if (i1 >= LENGTH(start)) i1 = 0; if (i2 >= LENGTH(end)) i2 = 0; if (i3 >= LENGTH(width)) i3 = 0; + + refwidths_value = INTEGER_ELT(refwidths, i0); + start_value = INTEGER_ELT(start, i1); + end_value = INTEGER_ELT(end, i2); + width_value = INTEGER_ELT(width, i3); + if (solve_user_SEW_row(INTEGER(refwidths)[i0], INTEGER(start)[i1], INTEGER(end)[i2], INTEGER(width)[i3], - INTEGER(ans_start) + i0, - INTEGER(ans_width) + i0) != 0) + solved_start, + solved_width + ) != 0) { - UNPROTECT(2); + UNPROTECT(2 + use_start_as_is + use_width_as_is); error("solving row %d: %s", i0 + 1, errmsg_buf); } + if (!use_start_as_is) + SET_INTEGER_ELT(ans_start, i0, solved_start); + if (!use_width_as_is) + SET_INTEGER_ELT(ans_width, i0, solved_width); } PROTECT(ans = _new_IRanges("IRanges", ans_start, ans_width, R_NilValue)); - UNPROTECT(3); + UNPROTECT(3 + use_start_as_is + use_width_as_is); return ans; } From 055faa6b14efc5492d071cb9a66d21c1a6deb738 Mon Sep 17 00:00:00 2001 From: Jiefei-Wang Date: Wed, 4 Sep 2019 15:25:49 -0400 Subject: [PATCH 2/3] remove attribute after duplicate object --- src/IRanges_constructor.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/IRanges_constructor.c b/src/IRanges_constructor.c index 380cde95..74f156a3 100644 --- a/src/IRanges_constructor.c +++ b/src/IRanges_constructor.c @@ -156,7 +156,14 @@ SEXP solve_user_SEW0(SEXP start, SEXP end, SEXP width) PROTECT(ans_start = duplicate(start)); if (!use_width_as_is) PROTECT(ans_width = duplicate(width)); - + + /* Remove NAMES and DIM attribute(duplicate will preserve them)*/ + SET_NAMES(ans_start, R_NilValue); + SET_NAMES(ans_width, R_NilValue); + SET_DIM(ans_start, R_NilValue); + SET_DIM(ans_width, R_NilValue); + + for (i = 0; i < ans_len; i++) { start_value = INTEGER_ELT(start, i); end_value = INTEGER_ELT(end, i); From 8b801258b0dc2ba8fa3ecfae4f58203e263b456c Mon Sep 17 00:00:00 2001 From: Jiefei-Wang Date: Fri, 6 Sep 2019 13:16:22 -0400 Subject: [PATCH 3/3] fix altrep bugs --- R/IRanges-constructor.R | 1 + src/IRanges_constructor.c | 85 ++++++++++++++++++++++----------------- 2 files changed, 49 insertions(+), 37 deletions(-) diff --git a/R/IRanges-constructor.R b/R/IRanges-constructor.R index e095bbf5..26fa49ec 100644 --- a/R/IRanges-constructor.R +++ b/R/IRanges-constructor.R @@ -178,6 +178,7 @@ solveUserSEW <- function(refwidths, start=NA, end=NA, width=NA, } } + .Call2("solve_user_SEW", refwidths, start, end, width, translate.negative.coord, allow.nonnarrowing, diff --git a/src/IRanges_constructor.c b/src/IRanges_constructor.c index 74f156a3..1790db7e 100644 --- a/src/IRanges_constructor.c +++ b/src/IRanges_constructor.c @@ -106,8 +106,8 @@ static int solve_range(int start, int end, int width, /* --- .Call ENTRY POINT --- 'start' and 'width' can be used **as-is** to construct the IRanges object to return if they satisfy at least both criteria: - (a) They don't have a "dim" or "names" attribute on them. - (b) They don't contain NAs. + (a) They don't have a "dim" or "names" attribute on them. + (b) They don't contain NAs. Note that this just reflects what validObject() expects to see in the "start" and "width" slots of an IRanges object. If they can't be used **as-is** then they need to be modified (i.e. the @@ -120,21 +120,15 @@ static int solve_range(int start, int end, int width, */ SEXP solve_user_SEW0(SEXP start, SEXP end, SEXP width) { - int ans_len, use_start_as_is, use_width_as_is, + int ans_len, + use_start_as_is,use_width_as_is, i, solved_start, solved_width, start_value, end_value, width_value; SEXP ans, ans_start, ans_width, check_start_NA, check_width_NA; - //These functions might not only be called by constructor? - if (!(IS_INTEGER(start) && IS_INTEGER(end) && IS_INTEGER(width))) - error("the supplied 'start', 'end', and 'width', " - "must be integer vectors"); ans_len = LENGTH(start); - if (LENGTH(end) != ans_len || LENGTH(width) != ans_len) - error("'start', 'end', and 'width' must have the same length"); - - /*Prepare the expression to check any NA values in start and width + /* Check if start/width contains NA The function anyNA can be more efficient than a loop in C.*/ PROTECT(check_start_NA = lang2(install("anyNA"), start)); PROTECT(check_width_NA = lang2(install("anyNA"), width)); @@ -143,26 +137,27 @@ SEXP solve_user_SEW0(SEXP start, SEXP end, SEXP width) GET_NAMES(start) == R_NilValue && !asLogical(R_tryEval(check_start_NA, R_GlobalEnv, NULL)); use_width_as_is = GET_DIM(width) == R_NilValue && - GET_NAMES(width) == R_NilValue&& + GET_NAMES(width) == R_NilValue && !asLogical(R_tryEval(check_width_NA, R_GlobalEnv, NULL)); - + ans_start = start; ans_width = width; if (!(use_start_as_is && use_width_as_is)) { /* Allocate and populate 'ans_start' and/or 'ans_width'. Call the duplicate function to duplicate an object for - an ALTREP might has a more efficient way to duplicate itself*/ - if (!use_start_as_is) + an ALTREP might has a more efficient way to duplicate itself + NAMES and DIM attribute will be removed(duplicate will preserve them) + */ + if (!use_start_as_is) { PROTECT(ans_start = duplicate(start)); - if (!use_width_as_is) + SET_NAMES(ans_start, R_NilValue); + SET_DIM(ans_start, R_NilValue); + } + if (!use_width_as_is) { PROTECT(ans_width = duplicate(width)); - - /* Remove NAMES and DIM attribute(duplicate will preserve them)*/ - SET_NAMES(ans_start, R_NilValue); - SET_NAMES(ans_width, R_NilValue); - SET_DIM(ans_start, R_NilValue); - SET_DIM(ans_width, R_NilValue); - + SET_NAMES(ans_width, R_NilValue); + SET_DIM(ans_width, R_NilValue); + } for (i = 0; i < ans_len; i++) { start_value = INTEGER_ELT(start, i); @@ -178,6 +173,10 @@ SEXP solve_user_SEW0(SEXP start, SEXP end, SEXP width) SET_INTEGER_ELT(ans_width, i, solved_width); } } + + + + PROTECT(ans = _new_IRanges("IRanges", ans_start, ans_width, R_NilValue)); UNPROTECT(3 + !use_start_as_is + !use_width_as_is); @@ -245,18 +244,21 @@ static int solve_user_SEW_row(int refwidth, int start, int end, int width, "negative values or NAs are not allowed in 'refwidths'"); return -1; } + if (start != NA_INTEGER) { if (translate_negative_coord0) start = translate_negative_startorend(refwidth, start); if (check_start(refwidth, start, "supplied") != 0) return -1; } + if (end != NA_INTEGER) { if (translate_negative_coord0) end = translate_negative_startorend(refwidth, end); if (check_end(refwidth, end, "supplied") != 0) return -1; } + if (width == NA_INTEGER) { if (start == NA_INTEGER) start = 1; @@ -291,8 +293,10 @@ static int solve_user_SEW_row(int refwidth, int start, int end, int width, return -1; } } + *solved_start = start; *solved_width = width; + return 0; } @@ -322,24 +326,30 @@ SEXP solve_user_SEW(SEXP refwidths, SEXP start, SEXP end, SEXP width, The length should alse be check since it is not checked in R*/ use_start_as_is = GET_DIM(start) == R_NilValue && GET_NAMES(start) == R_NilValue && - !asLogical(R_tryEval(check_start_NA, R_GlobalEnv, NULL))&& - LENGTH(start) == ans_len - ; + !asLogical(R_tryEval(check_start_NA, R_GlobalEnv, NULL)) && + LENGTH(start) == ans_len; + use_width_as_is = GET_DIM(width) == R_NilValue && GET_NAMES(width) == R_NilValue && - !asLogical(R_tryEval(check_width_NA, R_GlobalEnv, NULL))&& + !asLogical(R_tryEval(check_width_NA, R_GlobalEnv, NULL)) && LENGTH(width) == ans_len; + ans_start = start; + ans_width = width; + /* - If user's input cannot be used `as-is`, depending on the vector's length: + If user's input cannot be used `as-is`, , depending on the vector's length: 1. If the length is the same as ans_len, the vector will be duplicated 2. If the length is different from ans_len, a new vector will be created. + NAMES and DIM attribute will be removed(duplicate will preserve them) */ if (!use_start_as_is) { - if (LENGTH(start) == ans_len) + if (LENGTH(start) == ans_len) PROTECT(ans_start = duplicate(start)); else PROTECT(ans_start = NEW_INTEGER(ans_len)); + SET_NAMES(ans_start, R_NilValue); + SET_DIM(ans_start, R_NilValue); } else { ans_start = start; @@ -350,12 +360,13 @@ SEXP solve_user_SEW(SEXP refwidths, SEXP start, SEXP end, SEXP width, PROTECT(ans_width = duplicate(width)); else PROTECT(ans_width = NEW_INTEGER(ans_len)); + SET_NAMES(ans_width, R_NilValue); + SET_DIM(ans_width, R_NilValue); } else { ans_width = width; } - for (i0 = i1 = i2 = i3 = 0; i0 < ans_len; i0++, i1++, i2++, i3++) { /* recycling */ if (i1 >= LENGTH(start)) i1 = 0; @@ -367,12 +378,12 @@ SEXP solve_user_SEW(SEXP refwidths, SEXP start, SEXP end, SEXP width, end_value = INTEGER_ELT(end, i2); width_value = INTEGER_ELT(width, i3); - if (solve_user_SEW_row(INTEGER(refwidths)[i0], - INTEGER(start)[i1], - INTEGER(end)[i2], - INTEGER(width)[i3], - solved_start, - solved_width + if (solve_user_SEW_row(refwidths_value, + start_value, + end_value, + width_value, + &solved_start, + &solved_width ) != 0) { UNPROTECT(2 + use_start_as_is + use_width_as_is); @@ -384,7 +395,7 @@ SEXP solve_user_SEW(SEXP refwidths, SEXP start, SEXP end, SEXP width, SET_INTEGER_ELT(ans_width, i0, solved_width); } PROTECT(ans = _new_IRanges("IRanges", ans_start, ans_width, R_NilValue)); - UNPROTECT(3 + use_start_as_is + use_width_as_is); + UNPROTECT(3 + !use_start_as_is + !use_width_as_is); return ans; }