Skip to content

Commit 6189417

Browse files
authored
Avoid non-API calls when truncating overallocated vectors (#362)
* Use `resize()` instead of non-API `truncate()` * Add a `push_back()` + truncate benchmark test * Bump up number of iterations in `motivations.Rmd` * More benchmark tweaks * NEWS bullet * Update note
1 parent 1c9dbb6 commit 6189417

File tree

8 files changed

+102
-19
lines changed

8 files changed

+102
-19
lines changed

NEWS.md

+14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# cpp11 (development version)
22

3+
* Removed usage of the following non-API functions:
4+
* `SETLENGTH()`
5+
* `SET_TRUELENGTH()`
6+
* `SET_GROWABLE_BIT()`
7+
8+
These functions were used as part of the efficient growable vectors that
9+
cpp11 offered, i.e. what happens under the hood when you use `push_back()`.
10+
The removal of these non-API functions means that cpp11 writable vectors that
11+
have been pushed to with `push_back()` will likely force 1 extra allocation
12+
when the conversion from `cpp11::writable::r_vector<T>` to `SEXP` occurs
13+
(typically when you return a result back to R). This does not affect the
14+
performance of `push_back()` itself, and in general these growable vectors
15+
are still quite efficient (#362).
16+
317
* Fixed a memory leak with the `cpp11::writable::r_vector` move assignment
418
operator (#338).
519

cpp11test/R/cpp11.R

+8
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,18 @@ rcpp_grow_ <- function(n_sxp) {
224224
.Call(`_cpp11test_rcpp_grow_`, n_sxp)
225225
}
226226

227+
rcpp_push_and_truncate_ <- function(size_sxp) {
228+
.Call(`_cpp11test_rcpp_push_and_truncate_`, size_sxp)
229+
}
230+
227231
test_destruction_inner <- function() {
228232
invisible(.Call(`_cpp11test_test_destruction_inner`))
229233
}
230234

231235
test_destruction_outer <- function() {
232236
invisible(.Call(`_cpp11test_test_destruction_outer`))
233237
}
238+
239+
cpp11_push_and_truncate_ <- function(size_sexp) {
240+
.Call(`_cpp11test_cpp11_push_and_truncate_`, size_sexp)
241+
}

cpp11test/bench/truncate.R

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
pkgload::load_all("cpp11test")
2+
3+
bench::press(len = as.integer(10 ^ (0:6)),
4+
{
5+
bench::mark(
6+
cpp11 = cpp11_push_and_truncate_(len),
7+
rcpp = rcpp_push_and_truncate_(len),
8+
check = FALSE,
9+
min_iterations = 1000
10+
)
11+
}
12+
)[c("expression", "len", "min", "mem_alloc", "n_itr", "n_gc")]
13+
14+
# Longer benchmark, lots of gc
15+
len <- as.integer(10 ^ 7)
16+
bench::mark(
17+
cpp11 = cpp11_push_and_truncate_(len),
18+
rcpp = rcpp_push_and_truncate_(len),
19+
min_iterations = 200
20+
)[c("expression", "min", "mem_alloc", "n_itr", "n_gc")]

cpp11test/src/cpp11.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,13 @@ extern "C" SEXP _cpp11test_rcpp_grow_(SEXP n_sxp) {
422422
return cpp11::as_sexp(rcpp_grow_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(n_sxp)));
423423
END_CPP11
424424
}
425+
// sum_rcpp.cpp
426+
SEXP rcpp_push_and_truncate_(SEXP size_sxp);
427+
extern "C" SEXP _cpp11test_rcpp_push_and_truncate_(SEXP size_sxp) {
428+
BEGIN_CPP11
429+
return cpp11::as_sexp(rcpp_push_and_truncate_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(size_sxp)));
430+
END_CPP11
431+
}
425432
// test-protect-nested.cpp
426433
void test_destruction_inner();
427434
extern "C" SEXP _cpp11test_test_destruction_inner() {
@@ -438,6 +445,13 @@ extern "C" SEXP _cpp11test_test_destruction_outer() {
438445
return R_NilValue;
439446
END_CPP11
440447
}
448+
// truncate.cpp
449+
SEXP cpp11_push_and_truncate_(SEXP size_sexp);
450+
extern "C" SEXP _cpp11test_cpp11_push_and_truncate_(SEXP size_sexp) {
451+
BEGIN_CPP11
452+
return cpp11::as_sexp(cpp11_push_and_truncate_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(size_sexp)));
453+
END_CPP11
454+
}
441455

442456
extern "C" {
443457
/* .Call calls */
@@ -447,6 +461,7 @@ static const R_CallMethodDef CallEntries[] = {
447461
{"_cpp11test_col_sums", (DL_FUNC) &_cpp11test_col_sums, 1},
448462
{"_cpp11test_cpp11_add_vec_for_", (DL_FUNC) &_cpp11test_cpp11_add_vec_for_, 2},
449463
{"_cpp11test_cpp11_insert_", (DL_FUNC) &_cpp11test_cpp11_insert_, 1},
464+
{"_cpp11test_cpp11_push_and_truncate_", (DL_FUNC) &_cpp11test_cpp11_push_and_truncate_, 1},
450465
{"_cpp11test_cpp11_release_", (DL_FUNC) &_cpp11test_cpp11_release_, 1},
451466
{"_cpp11test_cpp11_safe_", (DL_FUNC) &_cpp11test_cpp11_safe_, 1},
452467
{"_cpp11test_data_frame_", (DL_FUNC) &_cpp11test_data_frame_, 0},
@@ -481,6 +496,7 @@ static const R_CallMethodDef CallEntries[] = {
481496
{"_cpp11test_protect_one_preserve_", (DL_FUNC) &_cpp11test_protect_one_preserve_, 2},
482497
{"_cpp11test_protect_one_sexp_", (DL_FUNC) &_cpp11test_protect_one_sexp_, 2},
483498
{"_cpp11test_rcpp_grow_", (DL_FUNC) &_cpp11test_rcpp_grow_, 1},
499+
{"_cpp11test_rcpp_push_and_truncate_", (DL_FUNC) &_cpp11test_rcpp_push_and_truncate_, 1},
484500
{"_cpp11test_rcpp_release_", (DL_FUNC) &_cpp11test_rcpp_release_, 1},
485501
{"_cpp11test_rcpp_sum_dbl_accumulate_", (DL_FUNC) &_cpp11test_rcpp_sum_dbl_accumulate_, 1},
486502
{"_cpp11test_rcpp_sum_dbl_for_", (DL_FUNC) &_cpp11test_rcpp_sum_dbl_for_, 1},

cpp11test/src/sum_rcpp.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,15 @@
5656

5757
return x;
5858
}
59+
60+
[[cpp11::register]] SEXP rcpp_push_and_truncate_(SEXP size_sxp) {
61+
R_xlen_t size = INTEGER(size_sxp)[0];
62+
63+
// Allocate `size` worth of doubles (filled with garbage data)
64+
Rcpp::NumericVector out(size);
65+
66+
// Push 1 more past the existing capacity
67+
out.push_back(0);
68+
69+
return out;
70+
}

cpp11test/src/truncate.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include "cpp11/doubles.hpp"
2+
3+
[[cpp11::register]] SEXP cpp11_push_and_truncate_(SEXP size_sexp) {
4+
R_xlen_t size = INTEGER(size_sexp)[0];
5+
6+
// Allocate `size` worth of doubles (filled with garbage data)
7+
cpp11::writable::doubles out(size);
8+
9+
// Push 1 more past the existing length/capacity,
10+
// doubling the capacity for cpp11 vectors
11+
out.push_back(0);
12+
13+
// Truncate back to `size + 1` size and return result.
14+
return SEXP(out);
15+
}

inst/include/cpp11/r_vector.hpp

+15-18
Original file line numberDiff line numberDiff line change
@@ -897,33 +897,30 @@ inline void r_vector<T>::clear() {
897897
length_ = 0;
898898
}
899899

900-
inline SEXP truncate(SEXP x, R_xlen_t length, R_xlen_t capacity) {
901-
#if R_VERSION >= R_Version(3, 4, 0)
902-
SETLENGTH(x, length);
903-
SET_TRUELENGTH(x, capacity);
904-
SET_GROWABLE_BIT(x);
905-
#else
906-
x = safe[Rf_lengthgets](x, length);
907-
#endif
908-
return x;
909-
}
910-
911900
template <typename T>
912901
inline r_vector<T>::operator SEXP() const {
902+
// Throwing away the const-ness is a bit gross, but we only modify
903+
// internal details here, and updating the internal data after we resize allows
904+
// statements like `Rf_setAttrib(<r_vector>, name, value)` to make sense, where
905+
// people expect that the SEXP inside the `<r_vector>` gets the updated attribute.
913906
auto* p = const_cast<r_vector<T>*>(this);
907+
914908
if (data_ == R_NilValue) {
909+
// Specially call out the `NULL` case, which can occur if immediately
910+
// returning a default constructed writable `r_vector` as a `SEXP`.
915911
p->resize(0);
916912
return data_;
917913
}
914+
918915
if (length_ < capacity_) {
919-
p->data_ = truncate(p->data_, length_, capacity_);
920-
SEXP nms = names();
921-
auto nms_size = Rf_xlength(nms);
922-
if ((nms_size > 0) && (length_ < nms_size)) {
923-
nms = truncate(nms, length_, capacity_);
924-
names() = nms;
925-
}
916+
// Truncate the vector to its `length_`. This unfortunately typically forces
917+
// an allocation if the user has called `push_back()` on a writable
918+
// `r_vector`. Importantly, going through `resize()` updates: `data_` and
919+
// protection of it, `data_p_`, and `capacity_`.
920+
p->resize(length_);
921+
return data_;
926922
}
923+
927924
return data_;
928925
}
929926

vignettes/motivations.Rmd

+2-1
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,8 @@ b_grow <- bench::press(.grid = grid,
487487
{
488488
fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_"))))
489489
bench::mark(
490-
fun(len)
490+
fun(len),
491+
min_iterations = 100
491492
)
492493
}
493494
)[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")]

0 commit comments

Comments
 (0)