@@ -353,6 +353,9 @@ xgb.QuantileDMatrix <- function(
353
353
)
354
354
data_iterator <- .single.data.iterator(iterator_env )
355
355
356
+ env_keep_alive <- new.env()
357
+ env_keep_alive $ keepalive <- NULL
358
+
356
359
# Note: the ProxyDMatrix has its finalizer assigned in the R externalptr
357
360
# object, but that finalizer will only be called once the object is
358
361
# garbage-collected, which doesn't happen immediately after it goes out
@@ -363,9 +366,10 @@ xgb.QuantileDMatrix <- function(
363
366
.Call(XGDMatrixFree_R , proxy_handle )
364
367
})
365
368
iterator_next <- function () {
366
- return (xgb.ProxyDMatrix(proxy_handle , data_iterator ))
369
+ return (xgb.ProxyDMatrix(proxy_handle , data_iterator , env_keep_alive ))
367
370
}
368
371
iterator_reset <- function () {
372
+ env_keep_alive $ keepalive <- NULL
369
373
return (data_iterator $ f_reset(iterator_env ))
370
374
}
371
375
calling_env <- environment()
@@ -553,7 +557,8 @@ xgb.DataBatch <- function(
553
557
}
554
558
555
559
# This is only for internal usage, class is not exposed to the user.
556
- xgb.ProxyDMatrix <- function (proxy_handle , data_iterator ) {
560
+ xgb.ProxyDMatrix <- function (proxy_handle , data_iterator , env_keep_alive ) {
561
+ env_keep_alive $ keepalive <- NULL
557
562
lst <- data_iterator $ f_next(data_iterator $ env )
558
563
if (is.null(lst )) {
559
564
return (0L )
@@ -566,13 +571,19 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
566
571
stop(" Either one of 'group' or 'qid' should be NULL" )
567
572
}
568
573
if (is.data.frame(lst $ data )) {
569
- tmp <- .process.df.for.dmatrix(lst $ data , lst $ feature_types )
574
+ data <- lst $ data
575
+ lst $ data <- NULL
576
+ tmp <- .process.df.for.dmatrix(data , lst $ feature_types )
570
577
lst $ feature_types <- tmp $ feature_types
578
+ data <- NULL
579
+ env_keep_alive $ keepalive <- tmp
571
580
.Call(XGProxyDMatrixSetDataColumnar_R , proxy_handle , tmp $ lst )
572
581
} else if (is.matrix(lst $ data )) {
582
+ env_keep_alive $ keepalive <- lst
573
583
.Call(XGProxyDMatrixSetDataDense_R , proxy_handle , lst $ data )
574
584
} else if (inherits(lst $ data , " dgRMatrix" )) {
575
585
tmp <- list (p = lst $ data @ p , j = lst $ data @ j , x = lst $ data @ x , ncol = ncol(lst $ data ))
586
+ env_keep_alive $ keepalive <- tmp
576
587
.Call(XGProxyDMatrixSetDataCSR_R , proxy_handle , tmp )
577
588
} else {
578
589
stop(" 'data' has unsupported type." )
@@ -712,14 +723,23 @@ xgb.ExtMemDMatrix <- function(
712
723
cache_prefix <- path.expand(cache_prefix )
713
724
nthread <- as.integer(NVL(nthread , - 1L ))
714
725
726
+ # The purpose of this environment is to keep data alive (protected from the
727
+ # garbage collector) after setting the data in the proxy dmatrix. The data
728
+ # held here (under name 'keepalive') should be unset (leaving it unprotected
729
+ # for garbage collection) before the start of each data iteration batch and
730
+ # during each iterator reset.
731
+ env_keep_alive <- new.env()
732
+ env_keep_alive $ keepalive <- NULL
733
+
715
734
proxy_handle <- .make.proxy.handle()
716
735
on.exit({
717
736
.Call(XGDMatrixFree_R , proxy_handle )
718
737
})
719
738
iterator_next <- function () {
720
- return (xgb.ProxyDMatrix(proxy_handle , data_iterator ))
739
+ return (xgb.ProxyDMatrix(proxy_handle , data_iterator , env_keep_alive ))
721
740
}
722
741
iterator_reset <- function () {
742
+ env_keep_alive $ keepalive <- NULL
723
743
return (data_iterator $ f_reset(data_iterator $ env ))
724
744
}
725
745
calling_env <- environment()
@@ -779,14 +799,17 @@ xgb.QuantileDMatrix.from_iterator <- function( # nolint
779
799
780
800
nthread <- as.integer(NVL(nthread , - 1L ))
781
801
802
+ env_keep_alive <- new.env()
803
+ env_keep_alive $ keepalive <- NULL
782
804
proxy_handle <- .make.proxy.handle()
783
805
on.exit({
784
806
.Call(XGDMatrixFree_R , proxy_handle )
785
807
})
786
808
iterator_next <- function () {
787
- return (xgb.ProxyDMatrix(proxy_handle , data_iterator ))
809
+ return (xgb.ProxyDMatrix(proxy_handle , data_iterator , env_keep_alive ))
788
810
}
789
811
iterator_reset <- function () {
812
+ env_keep_alive $ keepalive <- NULL
790
813
return (data_iterator $ f_reset(data_iterator $ env ))
791
814
}
792
815
calling_env <- environment()
0 commit comments