s3alfisc · s3alfisc · Jul 8, 2023 · Jul 8, 2023 · Jul 8, 2023 · Jul 8, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: fwildclusterboot
 Title: Fast Wild Cluster Bootstrap Inference for Linear Models
-Version: 0.14.0
+Version: 0.15
 Authors@R: c(
     person("Alexander", "Fischer", , "[email protected]", role = c("aut", "cre")),
     person("David", "Roodman", role = "aut"),
@@ -75,6 +75,8 @@ Suggests:
     testthat (>= 3.0.0),
     tibble, 
     MASS
+Remotes: 
+    kylebutts/fixest/tree/sparse-matrix
 LinkingTo:
     Rcpp,
     RcppArmadillo,

diff --git a/NAMESPACE b/NAMESPACE
@@ -54,6 +54,7 @@ importFrom(dreamerr,check_value_plus)
 importFrom(dreamerr,set_up)
 importFrom(dreamerr,validate_dots)
 importFrom(dreamerr,warn_up)
+importFrom(fixest,sparse_model_matrix)
 importFrom(generics,glance)
 importFrom(generics,tidy)
 importFrom(gtools,permutations)

diff --git a/R/arg_checks.R b/R/arg_checks.R
@@ -123,17 +123,17 @@ check_boottest_args_plus <- function(
       # e.g. fml = y ~ x1 + I(x2^2) shold be possible
       #' @srrstats {G2.4c} *explicit conversion to character via `as.character()`
       #' (and not `paste` or `paste0`)* Done
-      
+
       ("fixef_vars" %in% names(object) &&
        grepl("[",
              Reduce(paste, as.character(as.formula(object$fml_all$fixef))),
-             fixed = TRUE
-       ))
+             fixed = TRUE) && 
+       !is.null(fe)
+       )
       # note: whitespace ~ - for IV
       # grepl("~", deparse_fml, fixed = TRUE)
     ) {
-      rlang::abort("Varying slopes in fixest / fixest via [] to interact
-          fixed effects is currently not supported in boottest().")
+      rlang::abort("Varying slopes fixed effects in `fixest::feols()` are currently not supported in boottest() when fe are projected out in the bootstrap via the `fe` function argument.")
     }
 
 

diff --git a/R/boot_aggregate.R b/R/boot_aggregate.R
@@ -217,7 +217,7 @@ boot_aggregate <- function(
   # note: all boottest function arguments are tested in boottest()
   # therefore, only check for supported subset of features
 
-  check_arg(bootstrap_type, "charin(fnw11)")
+  check_arg(bootstrap_type, "charin(fnw11, 11, 31, 13, 33)")
   check_arg(full, "logical scalar")
   # => later => extend it to more than one set of vars to agg
 
@@ -312,10 +312,26 @@ boot_aggregate <- function(
     val <- gsub(paste0(".*", agg, ".*"), "\\2", cname_select, perl = TRUE)
   }
 
-  mm <- model.matrix(x)
-
-  cat("Run the wild bootstrap: this might take some time...(but 
-      hopefully not too much time =) ).", "\n")
+  if(inherits(x, "etwfe")){
+    X <- fixest:::model.matrix.fixest(x, type = "rhs")
+    mm2 <- sparse_model_matrix(x, type = c("fixef")) |> as.matrix() |> as.data.frame()
+    mm <- cbind(X, mm2)
+
+
+    if(agg %in% c("att", "period", "cohort", "TRUE")){
+      rlang::abort(
+        paste0("`agg='", agg, "'` works for fixest::sunab(), but is not implemented for objects of type `etwfe`. For `etwfe`, you will have to do the aggregation by hand. See `?boot_aggregate` for examples.")
+      )
+    }
+
+  } else {
+    # varying slopes only allowed for objects of type "etwfe"
+    check_no_varying_slopes(x)
+    mm <- model.matrix(x)
+
+  }
+
+  rlang::inform("Run the wild bootstrap: this might take some time...(but hopefully not too much time =) ).")
 
   name_df <- unique(data.frame(root, val, stringsAsFactors = FALSE))
 
@@ -330,7 +346,6 @@ boot_aggregate <- function(
 
   for(i in 1:nk){
 
-
     r <- name_df[i, 1]
     v <- name_df[i, 2]
     v_names <- cname_select[root == r & val == v]
@@ -393,15 +408,15 @@ boot_aggregate <- function(
         ssc = ssc
       )
 
-    setTxtProgressBar(pb,i)
-
     pvalues[i] <- pval(boot_fit)
     teststat[i] <- teststat(boot_fit)
     if(!is.null(clustid)){
         conf_int[i,] <- confint.boottest(boot_fit)
     } else {
       conf_int[i,] <- rep(NA, 2)
     }
+
+    setTxtProgressBar(pb,i)
 
   }
   # th z & p values
@@ -425,5 +440,30 @@ boot_aggregate <- function(
 
 }
 
+check_no_varying_slopes <- function(object){
+
+  has_fixef <- "fixef_vars" %in% names(object)
+
+  if (
+    # '^' illegal in fixef argument, but legal in main formula -
+    # e.g. fml = y ~ x1 + I(x2^2) shold be possible
+    #' @srrstats {G2.4c} *explicit conversion to character via `as.character()`
+    #' (and not `paste` or `paste0`)* Done
+
+    (has_fixef &&
+     grepl("[",
+           Reduce(paste, as.character(as.formula(object$fml_all$fixef))),
+           fixed = TRUE) && 
+     !is.null(fe)
+    )
+    # note: whitespace ~ - for IV
+    # grepl("~", deparse_fml, fixed = TRUE)
+  ) {
+    rlang::abort("Varying slopes fixed effects in `fixest::feols()` are currently not supported in boottest() when fe are projected out in the bootstrap via the `fe` function argument.")
+  }
+
+}
+
+
 
 
diff --git a/R/boot_algo_fastnreliable.R b/R/boot_algo_fastnreliable.R
@@ -162,7 +162,10 @@ boot_algo_fastnreliable <- function(
 
     inv_tXX_tXgXg <- lapply(
       1:G,
-      function(x) inv(tXX - tXgXg[[x]], x)
+      function(x) inv(tXX - tXgXg[[x]], 
+                      paste0(
+                      "Matrix inversion error when computing beta(g) for cluster ", g, ". Using Pseudo-Inverse instead. Potentially, you can suppress this message by specifying a cluster fixed effect in the bootstrap via the `fe` argument of `boottest()`.")
+      )
     )
 
     beta_1g_tilde <- lapply(
@@ -189,7 +192,10 @@ boot_algo_fastnreliable <- function(
     if(is.null(inv_tXX_tXgXg)){
       inv_tXX_tXgXg <- lapply(
         1:G,
-        function(x) inv((tXX - tXgXg[[x]]), x)
+        function(g) inv((tXX - tXgXg[[g]]),
+                        paste0(
+                          "Matrix inversion error when computing beta(g) for cluster ", g, ". Using Pseudo-Inverse instead. Potentially, you can suppress this message by specifying a cluster fixed effect in the bootstrap via the `fe` argument of `boottest()`.")
+        )
       )
     }
   }
@@ -352,14 +358,13 @@ boot_algo_fastnreliable <- function(
 }
 
 
-inv <- function(x, g){
+inv <- function(x, message){
   tryCatch(
     {
       Matrix::solve(x)
     },
     error = function(e) {
-      rlang::warn(message = paste0(
-        "Matrix inversion error when computing beta(g) for cluster ", g, ". Using Pseudo-Inverse instead. Potentially, you can suppress this message by specifying a cluster fixed effect in the bootstrap via the `fe` argument of `boottest()`."), 
+      rlang::warn(message = message, 
         use_cli_format = TRUE)
       eigen_pinv(as.matrix(x))
     }

diff --git a/R/boot_algo_fastnwild.R b/R/boot_algo_fastnwild.R
@@ -112,7 +112,7 @@ boot_algo_fastnwild <-
     # weights_mat <- Matrix::Diagonal(N, weights)
     # if no weights - N x N identity matrix
     weights_sq <- sqrt(weights) # sqrt fine because diagonal matrix
-    A <- solve(crossprod(weights_sq * X)) # k x k
+    A <- inv(crossprod(weights_sq * X), "Matrix inversion failure: Using a generalized inverse instead. Check the produced t-statistic, does it match the one of your regression package (under the same small sample correction)? If yes, this is likely not something to worry about.") # k x k
     # XXinv <- solve(crossprod(X))                          # k x k
     WX <- weights * X
 
@@ -172,47 +172,47 @@ boot_algo_fastnwild <-
     #       as.vector(bootcluster[[1]])
     #     )
     #   ) # N x c*
-
+    
     # preallocate lists
     CC <- vector(mode = "list", length = length(names(clustid)))
     DD <- vector(mode = "list", length = length(names(clustid)))
     CD <- vector(mode = "list", length = length(names(clustid)))
-
+    
     # CC <- matrix(NA, length(names(clustid)), B + 1)
     # CD <- matrix(NA, length(names(clustid)), B + 1)
     # DD <- matrix(NA, length(names(clustid)), B + 1)
-
-
+    
+    
     if (is.null(W)) {
       # if there are no fixed effects - term (2) in equ. (62) fast & wild
       # does not arise
       # note - W refers to W_bar in fast & wild, not regression weights.
       # If no fixed effects
       # in the model / bootstrap, W is NULL
-
+      
       for (x in seq_along(names(clustid))) {
         SXinvXXRX <- collapse::fsum(WXARX, clustid[x]) # c* x f
         SXinvXXRXA <-
           SXinvXXRX %*% A
         # part of numerator independent of both bootstrap errors and r
-
+        
         # P2_bootcluster has been collapsed over "bootcluster",
         # now collapse over cluster c
         P2 <-
           #Matrix.utils::aggregate.Matrix(P2_bootcluster, clustid[x]) # c* x c
           collapse::fsum(P2_bootcluster, clustid[x])
         P_all <- P2 - tcrossprod(SXinvXXRXA, P1) # formerly _a
-
+        
         Q2 <-
           #Matrix.utils::aggregate.Matrix(Q2_bootcluster, clustid[x])
           collapse::fsum(Q2_bootcluster, clustid[x])
         Q_all <- Q2 - tcrossprod(SXinvXXRXA, Q1)
-
+        
         C <-
           eigenMapMatMult(as.matrix(P_all), v, nthreads) # c* x (B + 1)
         D <-
           eigenMapMatMult(as.matrix(Q_all), v, nthreads) # c* x (B + 1)
-
+        
         CC[[x]] <- colSums(C * C)
         DD[[x]] <- colSums(D * D)
         CD[[x]] <- colSums(C * D)
@@ -221,32 +221,32 @@ boot_algo_fastnwild <-
       # project out fe
       Q3_2 <-
         crosstab(as.matrix(weights * W %*% Q),
-          var1 = bootcluster,
-          var2 = fixed_effect
+                 var1 = bootcluster,
+                 var2 = fixed_effect
         ) # f x c*
       P3_2 <-
         crosstab(as.matrix(weights * W %*% P),
-          var1 = bootcluster,
-          var2 = fixed_effect
+                 var1 = bootcluster,
+                 var2 = fixed_effect
         ) # f x c*
-
+      
       for (x in seq_along(names(clustid))) {
         SXinvXXRX <- collapse::fsum(WXARX, clustid[x]) # c* x f
         SXinvXXRXA <-
           SXinvXXRX %*% A
         # part of numerator independent of both bootstrap errors and r
-
+        
         CT_cfe <-
           crosstab(WXAR, var1 = clustid[x], var2 = fixed_effect)
         # c x f, formerly S_XinvXXR_F
-
+        
         # a
         P3 <- t(tcrossprod(P3_2, CT_cfe)) # formerly prod_a
         P2 <-
           #Matrix.utils::aggregate.Matrix(P2_bootcluster, clustid[x]) # c* x c
           collapse::fsum(P2_bootcluster, clustid[x])
         P_all <- P2 - tcrossprod(SXinvXXRXA, P1) - P3
-
+        
         # b: note that from here, if impose_null = TRUE, _b suffix objects and
         # D, DD, CD need not be computed, they are always objects of 0's only
         Q3 <- t(tcrossprod(Q3_2, CT_cfe))
@@ -256,20 +256,20 @@ boot_algo_fastnwild <-
         Q_all <- Q2 - tcrossprod(SXinvXXRXA, Q1) - Q3
         C <- eigenMapMatMult(as.matrix(P_all), v, nthreads)
         D <- eigenMapMatMult(as.matrix(Q_all), v, nthreads)
-
+        
         CC[[x]] <- colSums(C * C)
         DD[[x]] <- colSums(D * D)
         CD[[x]] <- colSums(C * D)
       }
     }
-
+    
     # calculate numerator:
     numer_a <- collapse::fsum(as.vector(WXARP), g)
     numer_b <- collapse::fsum(as.vector(WXARQ), g)
     # calculate A, B
     A <- crossprod(as.matrix(numer_a), v) # q x (B+1) -> q = 1
     B <- crossprod(numer_b, v) # q x (B+1) -> q = 1
-
+    
     p_val_res <-
       p_val_null2(
         r = r,
@@ -299,10 +299,10 @@ boot_algo_fastnwild <-
       CD = CD,
       DD = DD
     )
-
-
+    
+    
     # compute confidence interval
-
+    
     if (is.null(conf_int) || conf_int == TRUE) {
       # guess for standard errors
       if (impose_null == TRUE) {
@@ -312,8 +312,8 @@ boot_algo_fastnwild <-
       } else if (impose_null == FALSE) {
         se_guess <- abs((point_estimate - r) / t_stat)
       }
-
-
+      
+      
       conf_int <- invert_p_val(
         ABCD = ABCD,
         small_sample_correction = small_sample_correction,
@@ -335,7 +335,7 @@ boot_algo_fastnwild <-
         test_vals = NA
       )
     }
-
+    
     res <- list(
       p_val = p_val,
       conf_int = conf_int$conf_int,
@@ -352,8 +352,8 @@ boot_algo_fastnwild <-
       ABCD = ABCD # ,
       # small_sample_correction = small_sample_correction
     )
-
+    
     class(res) <- "boot_algo"
-
+    
     invisible(res)
   }