From d26c6d3a724564db3a9c812f0c29515d4f0c1e0b Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 29 Sep 2015 09:07:37 -0700 Subject: [PATCH 001/396] Create README.md --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000..f50527ed --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# R-software +R software for selective inference +Authors: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid +Maintainer: Rob Tibshirani + +New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. + +Code is in the directory selectiveInference/R. +* funs.common.R: Basic functions used by many other functions, such as standardization. +* funs.fixed.R: Inference for LASSO at a fixed, deterministic value of lambda. +* funs.fs.R: Inference for forward stepwise. +* funs.groupfs.R: Inference for forward stepwise with groups of variables, e.g. factors. +* funs.inf.R: Common functions for inference with fixed, fs, lar, and manymeans (but not group). +* funs.lar.R: Inference for least angle regression. +* funs.max.R: Some numerical approximations. Deprecated? + From 48add166ef5cd3fa1331776d03b4dee09bdeda79 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 29 Sep 2015 09:10:19 -0700 Subject: [PATCH 002/396] Editing readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f50527ed..2d38cdf3 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # R-software -R software for selective inference -Authors: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid -Maintainer: Rob Tibshirani +R software for selective inference +Authors: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid +Maintainer: Rob Tibshirani New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. From c8ab08afcdc978df30840bb0993c7572205d5b7e Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 29 Sep 2015 09:13:54 -0700 Subject: [PATCH 003/396] Fixed test.groupfs.R --- tests/test.groupfs.R | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index 167dd0ac..b590228d 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -92,7 +92,7 @@ cnames[fit$action]#[1:length(fsnames)] print("empty") } -<<<<<<< HEAD:forLater/josh/tests/test.groupfs.R + ======= set.seed(1) n = 100 @@ -133,29 +133,3 @@ print(colMeans(pvalmk)) print(mean(pvalm)) print(mean(pvalmk)) -<<<<<<< HEAD ->>>>>>> 5c372de287b5ff9455ddce7dd513fb868d09e7e6:tests/test.groupfs.R -======= - - - - set.seed(1) - n <- 40 - p <- 20 - index <- sort(rep(1:(p/2), 2)) - steps <- 10 - sparsity <- 5 - snr <- 3 - sigma=3 - - y <- rnorm(n)*sigma - x <- matrix(rnorm(n*p), nrow=n) - - - beta <- rep(0, p) - beta[which(index %in% 1:sparsity)] <- snr - y <- y + x %*% beta - - fit <- groupfs(x, y, index=index, maxsteps = steps) - foo=groupfsInf(fit) ->>>>>>> bc141572a79c89a69ca48574049b3006fa4b38ca From fdb0814926d132437378e1cae4056a02afad9b2e Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 29 Sep 2015 10:21:05 -0700 Subject: [PATCH 004/396] More test.groupfs fixes --- forLater/josh/cv.R | 103 ---------------------------------- forLater/josh/tests/robsNotes | 37 ------------ tests/test.groupfs.R | 17 ++---- 3 files changed, 4 insertions(+), 153 deletions(-) delete mode 100644 forLater/josh/cv.R delete mode 100644 forLater/josh/tests/robsNotes diff --git a/forLater/josh/cv.R b/forLater/josh/cv.R deleted file mode 100644 index 27879901..00000000 --- a/forLater/josh/cv.R +++ /dev/null @@ -1,103 +0,0 @@ -# ------------------------------------------------ -# Cross-validation, preliminary - - -cv_make_folds <- function(x, nfolds = 10) { - #inds <- sample(1:nrow(x), replace=FALSE) - inds <- 1:nrow(x) - foldsize <- floor(nrow(x)/nfolds) - lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize])) -} - -cv_hat_matrix <- function(x, folds, active.sets) { - nfolds <- length(folds) - lapply(1:nfolds, function(f) { - fold <- folds[[f]] - active <- active.sets[[f]] - x_tr <- x[ -fold, active] - x_te <- x[fold, active] - hatm <- matrix(0, nrow=length(fold), ncol=nrow(x)) - hatm[, -fold] <- x_te %*% ginv(x_tr) - return(hatm) - }) -} - -product_cv_hat <- function(folds, inds, finds, ginds, hat_matrices) { - nfolds <- length(folds) - terms <- lapply(inds, function(h) { - t(hat_matrices[[h]][, finds]) %*% hat_matrices[[h]][, ginds] - }) - return(Reduce('+', terms)) -} - -cv_RSSquad <- function(x, folds, active.sets) { - hat_matrices <- cv_hat_matrix(x, folds, active.sets) - nfolds <- length(folds) - rows <- lapply(1:nfolds, function(f) { - do.call(cbind, lapply(1:nfolds, function(g) { - ginds <- folds[[g]] - finds <- folds[[f]] - if (f == g) { - return(product_cv_hat(folds, setdiff(1:nfolds, f), finds, ginds, hat_matrices)) - } else { - return( - product_cv_hat(folds, setdiff(1:nfolds, c(f,g)), finds, ginds, hat_matrices) - hat_matrices[[f]][, ginds] - t(hat_matrices[[g]][, finds])) - } - })) - }) - Q <- do.call(rbind, rows) - return(Q) -} - -cv_fs <- function(x, y, steps, nfolds = 10) { - - n <- nrow(x) - if (steps >= n*(1-1/nfolds)) stop("Too many steps") - - folds <- cv_make_folds(x, nfolds) - nfolds <- length(folds) - projections <- list() - active.sets <- list() - cv_perm <- sample(1:n) - Y <- y[cv_perm] - mean(y) - X <- x[cv_perm, ] - - for (f in 1:nfolds) { - fold <- folds[[f]] - fit <- groupfs(X[-fold,], Y[-fold], steps=steps) - path.projs <- fit$projections - path.projs <- lapply(path.projs, function(step.projs) { - lapply(step.projs, function(proj) { - expanded.proj <- matrix(0, n, n) - expanded.proj[-fold, -fold] <- proj - return(expanded.proj) - }) - }) - projections[[f]] <- path.projs - active.sets[[f]] <- fit$variable - } - projections <- do.call(c, projections) - - RSSquads <- list() - for (s in 1:steps) { - initial.active <- lapply(active.sets, function(a) a[1:s]) - RSSquads[[s]] <- cv_RSSquad(X, folds, initial.active) - } - - RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y) - sstar <- which.min(RSSs) - quadstar <- RSSquads[sstar][[1]] - - RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) - RSSquads[[sstar]] <- NULL # remove the all zeroes case - - fit <- groupfs(X, Y, steps=sstar) - fit$projections <- flatten(fit$projections) - fit$foldprojections <- flatten(projections) - fit$rssprojections <- flatten(RSSquads) - - fit$cvperm <- cv_perm - - invisible(fit) -} - diff --git a/forLater/josh/tests/robsNotes b/forLater/josh/tests/robsNotes deleted file mode 100644 index 8d7c3be1..00000000 --- a/forLater/josh/tests/robsNotes +++ /dev/null @@ -1,37 +0,0 @@ -notes - -see bottom of test.groupfs.R - -1. with n=100, p=120,14 groups, groupfs(x, y,index) returned - - - Error in checkargs.groupfs(x, index, maxsteps) : - maxsteps is too large. If the largest groups are included the model will be saturated/overdetermined - -should have a sensible default so this doesn;t happen - - -2. a=groupfs(x,y,index,maxsteps=4) -groupfsInf(a) gives - - -Using sigma value: -Step 1 - computing p-value for group 14 -Step 2 - computing p-value for group 12 -Step 3 - computing p-value for group 1 -Step 4 - computing p-value for group 13 -Warning messages: -1: In groupfsInf(a) : - p > n/2, and sigmahat = 0.434 used as an estimate of sigma; you may want to use the estimateSigma function -2: In groupfsInf(a) : - P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio. -> aa - -Standard deviation of noise (specified or estimated) sigma = 0.434 -Error in lapply(pvals$support, size) : object 'pvals' not found - - - -3. groupfs has a sigma arg listed in the Rd file, but it's not in the function?? - -you need to do a R CMD check to pick up this stuff diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index b590228d..629bec14 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -1,10 +1,4 @@ -library(intervals) -source("../selectiveInference/R/funs.common.R") -source("../selectiveInference/R/funs.groupfs.R") -source("../selectiveInference/R/funs.quadratic.R") -source("../selectiveInference/R/funs.fs.R") -source("../selectiveInference/R/funs.lar.R") -#library(selectiveInference) +library(selectiveInference) #library(lars) set.seed(1) @@ -65,8 +59,8 @@ for (j in 1:ncol(state.x77)) { states[,j] <- var } states <- cbind(states, state.division) -x <- factor_design(states)$x -X <- scale_groups(x, index)$x +x <- factorDesign(states)$x +X <- scaleGroups(x, index)$x p <- ncol(x) y <- rnorm(n) @@ -92,13 +86,10 @@ cnames[fit$action]#[1:length(fsnames)] print("empty") } - -======= -set.seed(1) n = 100 p = 120 maxsteps = 9 -niter = 50 +niter = 500 # 10 groups of size 10, 10 groups of size 2 index = sort(c(c(1, 1), rep(2:11, 10), rep(12:20, 2))) pvalm = pvalmk = matrix(NA, nrow=niter, ncol=maxsteps) From 5119f8613f5d6bd175355e334fba07f82658888e Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 29 Sep 2015 22:49:39 -0700 Subject: [PATCH 005/396] Unfinished, but modifying cv to work with refactored groupfs functions --- forLater/josh/selectiveInference/R/cv.R | 105 ++++++++++++++++++++++++ forLater/josh/tests/test.cv.R | 9 ++ 2 files changed, 114 insertions(+) create mode 100644 forLater/josh/selectiveInference/R/cv.R create mode 100644 forLater/josh/tests/test.cv.R diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R new file mode 100644 index 00000000..cdc50924 --- /dev/null +++ b/forLater/josh/selectiveInference/R/cv.R @@ -0,0 +1,105 @@ +# ------------------------------------------------ +# Cross-validation, preliminary + +cvMakeFolds <- function(x, nfolds = 10) { + #inds <- sample(1:nrow(x), replace=FALSE) + inds <- 1:nrow(x) + foldsize <- floor(nrow(x)/nfolds) + lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize])) +} + +cvHatMatrix <- function(x, folds, active.sets) { + nfolds <- length(folds) + lapply(1:nfolds, function(f) { + fold <- folds[[f]] + active <- active.sets[[f]] + x_tr <- x[ -fold, active] + x_te <- x[fold, active] + hatm <- matrix(0, nrow=length(fold), ncol=nrow(x)) + hatm[, -fold] <- x_te %*% ginv(x_tr) + return(hatm) + }) +} + +cvProductHat <- function(folds, inds, finds, ginds, hat_matrices) { + nfolds <- length(folds) + terms <- lapply(inds, function(h) { + t(hat_matrices[[h]][, finds]) %*% hat_matrices[[h]][, ginds] + }) + return(Reduce('+', terms)) +} + +cvRSSquad <- function(x, folds, active.sets) { + hat_matrices <- cvHatMatrix(x, folds, active.sets) + nfolds <- length(folds) + rows <- lapply(1:nfolds, function(f) { + do.call(cbind, lapply(1:nfolds, function(g) { + ginds <- folds[[g]] + finds <- folds[[f]] + if (f == g) { + return(cvProductHat(folds, setdiff(1:nfolds, f), finds, ginds, hat_matrices)) + } else { + return( + cvProductHat(folds, setdiff(1:nfolds, c(f,g)), finds, ginds, hat_matrices) - hat_matrices[[f]][, ginds] - t(hat_matrices[[g]][, finds])) + } + })) + }) + Q <- do.call(rbind, rows) + return(Q) +} + +cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { + + n <- nrow(x) + if (maxsteps >= n*(1-1/nfolds)) { + maxsteps <- floor(n*(1-1/nfolds)) + warning(paste("maxsteps too large for training fold size, set to", maxsteps)) + } + + folds <- cvMakeFolds(x, nfolds) + nfolds <- length(folds) + projections <- list(1:nfolds) + active.sets <- list(1:nfolds) + cv_perm <- sample(1:n) + Y <- y[cv_perm] - mean(y) + X <- x[cv_perm, ] + + # Flatten list or something? + for (f in 1:nfolds) { + fold <- folds[[f]] + fit <- groupfs(X[-fold,], Y[-fold], index, maxsteps=maxsteps) + projections[[f]] <- lapply(fit$projections, function(step.projs) { + lapply(step.projs, function(proj) { + # Reduce from n by n matrix to svdu_thresh + expanded.proj <- matrix(0, n, ncol(proj)) + expanded.proj[-fold, ] <- proj + return(expanded.proj) + }) + }) + active.sets[[f]] <- fit$action + } + projections <- do.call(c, projections) + + RSSquads <- list() + for (s in 1:maxsteps) { + initial.active <- lapply(active.sets, function(a) a[1:s]) + RSSquads[[s]] <- cvRSSquad(X, folds, initial.active) + } + + RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y) + sstar <- which.min(RSSs) + quadstar <- RSSquads[sstar][[1]] + + RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) + RSSquads[[sstar]] <- NULL # remove the all zeroes case + + fit <- groupfs(X, Y, index, maxsteps=sstar) + fit$projections <- flatten(fit$projections) + fit$foldprojections <- flatten(projections) + fit$rssprojections <- flatten(RSSquads) + + fit$cvperm <- cv_perm + + invisible(fit) +} + diff --git a/forLater/josh/tests/test.cv.R b/forLater/josh/tests/test.cv.R new file mode 100644 index 00000000..22743ec0 --- /dev/null +++ b/forLater/josh/tests/test.cv.R @@ -0,0 +1,9 @@ +library(selectiveInference) + +source("../selectiveInference/R/cv.R") + +n = 100 +p = 20 +x <- matrix(rnorm(n*p), nrow=n) +y <- rnorm(n) +fit <- cvfs(x, y, maxsteps = 5) From d03455fc2e12d5b5e70cc38fe789a968198c3d2a Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 30 Sep 2015 18:03:13 -0700 Subject: [PATCH 006/396] Some documentation edits, significant progress on CV --- forLater/josh/selectiveInference/R/cv.R | 35 ++++++++----- selectiveInference/DESCRIPTION | 15 +++--- selectiveInference/R/funs.groupfs.R | 67 ++++++++++++++----------- selectiveInference/R/funs.quadratic.R | 3 +- selectiveInference/man/factorDesign.Rd | 3 +- selectiveInference/man/groupfs.Rd | 7 ++- selectiveInference/man/groupfsInf.Rd | 27 ++-------- selectiveInference/man/scaleGroups.Rd | 11 ++-- 8 files changed, 83 insertions(+), 85 deletions(-) diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R index cdc50924..adbc0115 100644 --- a/forLater/josh/selectiveInference/R/cv.R +++ b/forLater/josh/selectiveInference/R/cv.R @@ -8,6 +8,9 @@ cvMakeFolds <- function(x, nfolds = 10) { lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize])) } +############################################ +# Can this be optimized using svdu_thresh? # +############################################ cvHatMatrix <- function(x, folds, active.sets) { nfolds <- length(folds) lapply(1:nfolds, function(f) { @@ -16,7 +19,10 @@ cvHatMatrix <- function(x, folds, active.sets) { x_tr <- x[ -fold, active] x_te <- x[fold, active] hatm <- matrix(0, nrow=length(fold), ncol=nrow(x)) - hatm[, -fold] <- x_te %*% ginv(x_tr) + svdtr <- svd(x_tr) + inds <- svdtr$d > .Machine$double.eps * svdtr$d[1] + xtrinv <- svdtr$v[, inds, drop = FALSE] %*% ((1/svdtr$d[inds]) * t(svdtr$u[, inds, drop = FALSE])) + hatm[, -fold] <- x_te %*% xtrinv return(hatm) }) } @@ -59,7 +65,9 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { folds <- cvMakeFolds(x, nfolds) nfolds <- length(folds) projections <- list(1:nfolds) + maxprojs <- list(1:nfolds) active.sets <- list(1:nfolds) + cvobj <- list(1:nfolds) cv_perm <- sample(1:n) Y <- y[cv_perm] - mean(y) X <- x[cv_perm, ] @@ -68,17 +76,19 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { for (f in 1:nfolds) { fold <- folds[[f]] fit <- groupfs(X[-fold,], Y[-fold], index, maxsteps=maxsteps) - projections[[f]] <- lapply(fit$projections, function(step.projs) { - lapply(step.projs, function(proj) { - # Reduce from n by n matrix to svdu_thresh - expanded.proj <- matrix(0, n, ncol(proj)) - expanded.proj[-fold, ] <- proj - return(expanded.proj) - }) - }) + fit$fold <- fold + ## projections[[f]] <- lapply(fit$projections, function(step.projs) { + ## lapply(step.projs, function(proj) { + ## # Reduce from n by n matrix to svdu_thresh + ## expanded.proj <- matrix(0, n, ncol(proj)) + ## expanded.proj[-fold, ] <- proj + ## return(expanded.proj) + ## }) + ## }) active.sets[[f]] <- fit$action + cvobj[[f]] <- fit } - projections <- do.call(c, projections) + #projections <- do.call(c, projections) RSSquads <- list() for (s in 1:maxsteps) { @@ -94,9 +104,8 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { RSSquads[[sstar]] <- NULL # remove the all zeroes case fit <- groupfs(X, Y, index, maxsteps=sstar) - fit$projections <- flatten(fit$projections) - fit$foldprojections <- flatten(projections) - fit$rssprojections <- flatten(RSSquads) + fit$cvobj <- cvobj + fit$rssprojections <- RSSquads fit$cvperm <- cv_perm diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index df66eb53..0f352b60 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -3,12 +3,15 @@ Type: Package Title: Tools for Selective Inference Version: 1.1.1 Date: 2015-09-01 -Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, - Joshua Loftus, Stephen Reid +Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, + Joshua Loftus, Stephen Reid Maintainer: Rob Tibshirani -Depends: glmnet, intervals -Suggests: Rmpfr +Depends: + glmnet, + intervals +Suggests: + Rmpfr Description: New tools for inference after selection, for use - with forward stepwise regression, least angle regression, the - lasso, and the many means problem. + with forward stepwise regression, least angle regression, the + lasso, and the many means problem. License: GPL-2 diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 3434bd66..8fae045e 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -1,12 +1,12 @@ #' Select a model with forward stepwise. #' -#' This function implements forward selection of linear models almost identically to \code{\link{stepAIC}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +#' This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. #' #' @param x Matrix of predictors (n by p). #' @param y Vector of outcomes (length n). #' @param index Group membership indicator of length p. #' @param maxsteps Maximum number of steps for forward stepwise. -#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link{extractAIC}} for details. +#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link[stats]{extractAIC}} for details. #' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. #' @param intercept Should an intercept be included in the model? Default is TRUE. #' @param normalize Should the design matrix be normalized? Default is TRUE. @@ -196,11 +196,10 @@ add1.groupfs <- function(x, y, index, labels, inactive, k, sigma = NULL) { #' Compute selective p-values for a model fitted by \code{groupfs}. #' -#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \code{chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. +#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. #' #' @param obj Object returned by \code{\link{groupfs}} function #' @param sigma Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate. -#' @param projs Additional projections to define model selection event. For use with cross-validation. Default is NULL and it is not recommended to change this. #' @param verbose Print out progress along the way? Default is FALSE. #' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. The default printing behavior should supply adequate information. #' @@ -212,18 +211,16 @@ add1.groupfs <- function(x, y, index, labels, inactive, k, sigma = NULL) { #' \item{df}{Rank of group of variables when it was added to the model.} #' \item{support}{List of intervals defining the truncation region of the truncated chi.} #' } -groupfsInf <- function(obj, sigma = NULL, projs = NULL, verbose = FALSE) { +groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { n <- nrow(obj$x) p <- ncol(obj$x) - active <- obj$action maxsteps <- attr(obj, "maxsteps") k <- attr(obj, "k") index <- obj$index x <- obj$x y <- obj$y - Eindex <- which(index %in% active) - Ep <- length(Eindex) + Ep <- sum(index %in% obj$action) nanconv <- FALSE pvals <- numeric(maxsteps) @@ -250,40 +247,43 @@ groupfsInf <- function(obj, sigma = NULL, projs = NULL, verbose = FALSE) { # Compute p-value for each active group for (j in 1:maxsteps) { - i <- active[j] - if (verbose) cat(paste0("Step ", j, "/", maxsteps, ": computing P-value for group ", i, "\n")) + i <- obj$action[j] + if (verbose) cat(paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i, "\n")) # Form projection onto active set minus i # and project x_i orthogonally - x_i <- x[,which(index == i), drop = FALSE] - if (length(active) > 1) { - minus_i <- setdiff(active, i) - x_minus_i <- svdu_thresh(x[,which(index %in% minus_i), drop = FALSE]) + x_i <- obj$x[,which(obj$index == i), drop = FALSE] + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + x_minus_i <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) x_i <- x_i - x_minus_i %*% t(x_minus_i) %*% x_i } # Project y onto what remains of x_i Ugtilde <- svdu_thresh(x_i) - R <- t(Ugtilde) %*% y + R <- t(Ugtilde) %*% obj$y TC <- sqrt(sum(R^2)) eta <- Ugtilde %*% R / TC + Z <- obj$y - eta * TC df <- ncol(Ugtilde) - dfs[j] <- df - TCs[j] <- TC - - # For each step... - L <- interval_groupfs(obj, TC, R, eta, Ugtilde) - - # Any additional projections, e.g. from cross-validation? - if (!is.null(projs)) L <- c(L, projs) - + + intervallist <- truncationRegion(obj, TC, R, eta, Z) + if (!is.null(fit$cvobj)) { + intervallist <- c(intervallist, do.call(c, + lapply(fit$cvobj, function(cvf) { + truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) + }))) + } + # Compute intersection: - Lunion <- do.call(interval_union, L) - Lunion <- interval_union(Lunion, Intervals(c(-Inf,0))) - E <- interval_complement(Lunion, check_valid = FALSE) + region <- do.call(interval_union, intervallist) + region <- interval_union(region, Intervals(c(-Inf,0))) + E <- interval_complement(region, check_valid = FALSE) supports[[j]] <- E # E is now potentially a union of intervals - if (length(E) == 0) stop("Trivial intersection") + if (length(E) == 0) { + stop(paste("Empty TC support at step", j)) + } # Sum truncated cdf over each part of E denom <- do.call(sum, lapply(1:nrow(E), function(v) { @@ -323,7 +323,7 @@ groupfsInf <- function(obj, sigma = NULL, projs = NULL, verbose = FALSE) { } if (nanconv) warning("P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.") names(pvals) <- obj$action - out <- list(vars = active, pv=pvals, sigma=sigma, TC=TCs, df = dfs, support=supports) + out <- list(vars = obj$action, pv=pvals, sigma=sigma, TC=TCs, df = dfs, support=supports) class(out) <- "groupfsInf" if (!is.null(attr(obj, "varnames"))) { attr(out, "varnames") <- attr(obj, "varnames") @@ -356,11 +356,18 @@ num_int_chi <- function(a, b, df, nsamp = 10000) { #' Center and scale design matrix by groups #' +#' For internal use by \code{\link{groupfs}}. +#' #' @param x Design matrix. #' @param index Group membership indicator of length p. #' @param center Center groups, default is TRUE. #' @param scale Scale groups by Frobenius norm, default is TRUE. -#' @return Scaled design matrix +#' @return +#' \describe{ +#' \item{x}{Optionally centered/scaled design matrix.} +#' \item{xm}{Means of groups in original design matrix.} +#' \item{xs}{Frobenius norms of groups in original design matrix.} +#' } scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { keys <- unique(index) xm <- rep(0, ncol(x)) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 1aff7ec2..e8b14083 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -1,7 +1,6 @@ -interval_groupfs <- function(obj, TC, R, eta, Ugtilde, tol = 1e-15) { +truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { - Z <- obj$y - eta * TC n <- nrow(obj$x) L <- lapply(1:length(obj$action), function(s) { diff --git a/selectiveInference/man/factorDesign.Rd b/selectiveInference/man/factorDesign.Rd index d5d2576d..3a42e2ea 100644 --- a/selectiveInference/man/factorDesign.Rd +++ b/selectiveInference/man/factorDesign.Rd @@ -1,5 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{factorDesign} \alias{factorDesign} \title{Expand a data frame with factors to form a design matrix with the full binary encoding of each factor.} diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index 4567c6f6..6f719a96 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -1,5 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{groupfs} \alias{groupfs} \title{Select a model with forward stepwise.} @@ -16,7 +15,7 @@ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, \item{maxsteps}{Maximum number of steps for forward stepwise.} -\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link{extractAIC}} for details.} +\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link[stats]{extractAIC}} for details.} \item{k}{Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC.} @@ -30,7 +29,7 @@ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. } \description{ -This function implements forward selection of linear models almost identically to \code{\link{stepAIC}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. } \examples{ x = matrix(rnorm(20*40), nrow=20) diff --git a/selectiveInference/man/groupfsInf.Rd b/selectiveInference/man/groupfsInf.Rd index 43cf25ba..4926896f 100644 --- a/selectiveInference/man/groupfsInf.Rd +++ b/selectiveInference/man/groupfsInf.Rd @@ -1,18 +1,15 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{groupfsInf} \alias{groupfsInf} \title{Compute selective p-values for a model fitted by \code{groupfs}.} \usage{ -groupfsInf(obj, sigma = NULL, projs = NULL, verbose = FALSE) +groupfsInf(obj, sigma = NULL, verbose = FALSE) } \arguments{ \item{obj}{Object returned by \code{\link{groupfs}} function} \item{sigma}{Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate.} -\item{projs}{Additional projections to define model selection event. For use with cross-validation. Default is NULL and it is not recommended to change this.} - \item{verbose}{Print out progress along the way? Default is FALSE.} } \value{ @@ -28,24 +25,6 @@ An object of class "groupfsInf" containing selective p-values for the fitted mod } } \description{ -Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \code{chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. -} -\examples{ -#NOT RUN -#set.seed(1) -#n <- 40 -#p <- 20 -#index <- sort(rep(1:(p/2), 2)) -#steps <- 10 -#sparsity <- 5 -#snr <- 3 -# x <- matrix(rnorm(n*p), nrow=n) -# beta <- rep(0, p) -# beta[which(index %in% 1:sparsity)] <- snr -# y <- x %*% beta+rnorm(n) - -#fit <- groupfs(x, y, index=1:p, maxsteps = steps) - -#out<- groupfsInf(fit) +Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. } diff --git a/selectiveInference/man/scaleGroups.Rd b/selectiveInference/man/scaleGroups.Rd index 6e41ef6a..bfb899c8 100644 --- a/selectiveInference/man/scaleGroups.Rd +++ b/selectiveInference/man/scaleGroups.Rd @@ -1,5 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{scaleGroups} \alias{scaleGroups} \title{Center and scale design matrix by groups} @@ -16,9 +15,13 @@ scaleGroups(x, index, center = TRUE, scale = TRUE) \item{scale}{Scale groups by Frobenius norm, default is TRUE.} } \value{ -Scaled design matrix +\describe{ + \item{x}{Optionally centered/scaled design matrix.} + \item{xm}{Means of groups in original design matrix.} + \item{xs}{Frobenius norms of groups in original design matrix.} +} } \description{ -Center and scale design matrix by groups +For internal use by \code{\link{groupfs}}. } From c9a08a6f14dc96027f3788f1794943adb9a24c9b Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 30 Sep 2015 18:42:04 -0700 Subject: [PATCH 007/396] Simulation to test cv --- forLater/josh/sim.cv.R | 53 +++++++++++++++++++++++++++++ selectiveInference/R/funs.groupfs.R | 16 +++++---- 2 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 forLater/josh/sim.cv.R diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R new file mode 100644 index 00000000..28d9beb0 --- /dev/null +++ b/forLater/josh/sim.cv.R @@ -0,0 +1,53 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 500 +n <- 50 +p <- 100 +maxsteps <- 10 +sparsity <- 5 +snr <- 1 +nfolds <- 5 + +instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { + + x <- matrix(rnorm(n*p), nrow=n) + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) + pvals <- groupfsInf(fit, verbose=T) + ## pvals_naive <- interval.groupfs(fit, x, y, index = 1:ncol(x)) + ## fit$projections <- c(fit$projections, fit$rssprojections) + ## pvals_reduced <- interval.groupfs(fit, x, y, index = 1:ncol(x)) + ## fit$projections <- c(fit$projections, fit$foldprojections) + return(list(variable = fit$action, pvals = pvals$pv)) + #pvals_naive = pvals_naive, pvals_reduced = pvals_reduced)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds)) +}) + +#pvals_reduced <- do.call(c, list(output[4,])) +#pvals_naive <- do.call(c, list(output[3,])) +pvals <- do.call(c, list(output[2,])) +vars <- do.call(c, list(output[1,])) + +save(pvals, vars, file = paste0( + "results_cv_n", n, + "_p", p, + "_sparsity", sparsity, + "_snr", snr, + ".RData")) + +print(time) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 8fae045e..cddf907d 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -248,7 +248,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { # Compute p-value for each active group for (j in 1:maxsteps) { i <- obj$action[j] - if (verbose) cat(paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i, "\n")) + if (verbose) cat(paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i, "\n")) # Form projection onto active set minus i # and project x_i orthogonally x_i <- obj$x[,which(obj$index == i), drop = FALSE] @@ -263,9 +263,11 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { R <- t(Ugtilde) %*% obj$y TC <- sqrt(sum(R^2)) eta <- Ugtilde %*% R / TC - Z <- obj$y - eta * TC + Z <- obj$y - eta * TC df <- ncol(Ugtilde) - + TCs[j] <- TC + dfs[j] <- df + intervallist <- truncationRegion(obj, TC, R, eta, Z) if (!is.null(fit$cvobj)) { intervallist <- c(intervallist, do.call(c, @@ -273,7 +275,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) }))) } - + # Compute intersection: region <- do.call(interval_union, intervallist) region <- interval_union(region, Intervals(c(-Inf,0))) @@ -357,7 +359,7 @@ num_int_chi <- function(a, b, df, nsamp = 10000) { #' Center and scale design matrix by groups #' #' For internal use by \code{\link{groupfs}}. -#' +#' #' @param x Design matrix. #' @param index Group membership indicator of length p. #' @param center Center groups, default is TRUE. @@ -393,8 +395,8 @@ scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { } #' Expand a data frame with factors to form a design matrix with the full binary encoding of each factor. -#' -#' When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. +#' +#' When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. #' #' @param df Data frame containing some columns which are \code{factors}. #' @return List containing From 35c3a02281b81dc36b3f57e29db4d5c9868bd774 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 30 Sep 2015 18:45:31 -0700 Subject: [PATCH 008/396] Typo fix, change fit to obj --- selectiveInference/R/funs.groupfs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index cddf907d..0a4eb6ef 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -269,9 +269,9 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { dfs[j] <- df intervallist <- truncationRegion(obj, TC, R, eta, Z) - if (!is.null(fit$cvobj)) { + if (!is.null(obj$cvobj)) { intervallist <- c(intervallist, do.call(c, - lapply(fit$cvobj, function(cvf) { + lapply(obj$cvobj, function(cvf) { truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) }))) } From f3043a9b05faab39c33f66d4dc7e4f9f2bc35363 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 30 Sep 2015 21:12:41 -0700 Subject: [PATCH 009/396] Included cvquad interval --- forLater/josh/selectiveInference/R/cv.R | 2 +- forLater/josh/tests/test.cv.R | 22 +++++-- selectiveInference/R/funs.groupfs.R | 40 ++++++++----- selectiveInference/R/funs.quadratic.R | 76 +++++++++++++------------ 4 files changed, 82 insertions(+), 58 deletions(-) diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R index adbc0115..0a4661b4 100644 --- a/forLater/josh/selectiveInference/R/cv.R +++ b/forLater/josh/selectiveInference/R/cv.R @@ -105,7 +105,7 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { fit <- groupfs(X, Y, index, maxsteps=sstar) fit$cvobj <- cvobj - fit$rssprojections <- RSSquads + fit$cvquad <- RSSquads fit$cvperm <- cv_perm diff --git a/forLater/josh/tests/test.cv.R b/forLater/josh/tests/test.cv.R index 22743ec0..28b3f446 100644 --- a/forLater/josh/tests/test.cv.R +++ b/forLater/josh/tests/test.cv.R @@ -1,9 +1,21 @@ -library(selectiveInference) - +library(intervals) source("../selectiveInference/R/cv.R") +source("../../../selectiveInference/R/funs.groupfs.R") +source("../../../selectiveInference/R/funs.quadratic.R") +source("../../../selectiveInference/R/funs.common.R") -n = 100 -p = 20 +set.seed(1) +n <- 50 +p <- 100 +maxsteps <- 10 +sparsity <- 5 +snr <- 1 +nfolds <- 5 x <- matrix(rnorm(n*p), nrow=n) y <- rnorm(n) -fit <- cvfs(x, y, maxsteps = 5) +beta <- rep(0, p) +beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) +y <- y + x %*% beta +fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) +pvals <- groupfsInf(fit, verbose=T) + diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 0a4eb6ef..516d834a 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -223,9 +223,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { Ep <- sum(index %in% obj$action) nanconv <- FALSE - pvals <- numeric(maxsteps) - dfs <- numeric(maxsteps) - TCs <- numeric(maxsteps) + pvals = dfs = TCs = numeric(maxsteps) supports <- list() if (!is.null(sigma)) { @@ -274,6 +272,14 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { lapply(obj$cvobj, function(cvf) { truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) }))) + intervallist <- c(intervallist, + lapply(obj$cvquad, function(cvquad) { + etacvquad <- t(eta) %*% cvquad + A <- etacvquad %*% eta + B <- 2 * etacvquad %*% Z + C <- t(Z) %*% cvquad %*% Z + quadratic_roots(A, B, C, tol = 1e-15) + })) } # Compute intersection: @@ -283,6 +289,21 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { supports[[j]] <- E # E is now potentially a union of intervals + pvals[j] <- TC_surv(TC, sigma, df, E) + } + if (nanconv) warning("P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.") + names(pvals) <- obj$action + out <- list(vars = obj$action, pv=pvals, sigma=sigma, TC=TCs, df = dfs, support=supports) + class(out) <- "groupfsInf" + if (!is.null(attr(obj, "varnames"))) { + attr(out, "varnames") <- attr(obj, "varnames") + } + invisible(out) +} + +# ----------------------------------------------------------- + +TC_surv <- function(TC, sigma, df, E) { if (length(E) == 0) { stop(paste("Empty TC support at step", j)) } @@ -321,20 +342,9 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { # Force p-value to lie in the [0,1] interval # in case of numerical issues value <- max(0, min(1, value)) - pvals[j] <- value - } - if (nanconv) warning("P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.") - names(pvals) <- obj$action - out <- list(vars = obj$action, pv=pvals, sigma=sigma, TC=TCs, df = dfs, support=supports) - class(out) <- "groupfsInf" - if (!is.null(attr(obj, "varnames"))) { - attr(out, "varnames") <- attr(obj, "varnames") - } - invisible(out) + value } -# ----------------------------------------------------------- - tchi_interval <- function(lower, upper, sigma, df) { a <- (lower/sigma)^2 b <- (upper/sigma)^2 diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index e8b14083..09dee4a1 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -47,74 +47,76 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { B <- 2 * as.numeric(t(Ugeta) %*% UgZ - t(Uheta) %*% UhZ) C <- sum(UgZ^2) - sum(UhZ^2) - pendiff } + + quadratic_roots(A, B, C, tol) + }) + } + # LL is a list of intervals + }) + # L is now a list of lists of intervals + return(unlist(L, recursive = FALSE, use.names = FALSE)) +} - disc <- B^2 - 4*A*C - b2a <- -B/(2*A) +quadratic_roots <- function(A, B, C, tol) { + disc <- B^2 - 4*A*C + b2a <- -B/(2*A) - if (disc > tol) { - # Real roots + if (disc > tol) { + # Real roots pm <- sqrt(disc)/(2*A) endpoints <- sort(c(b2a - pm, b2a + pm)) - } else { - + } else { # No real roots if (A > -tol) { # Quadratic form always positive - return(Intervals(c(-Inf,0))) + return(Intervals(c(-Inf,0))) } else { # Quadratic form always negative - stop(paste("Empty TC support is infeasible", s, "-", l)) + stop(paste("Empty TC support is infeasible", s, "-", l)) } - } + } - if (A > tol) { + if (A > tol) { # Parabola opens upward if (min(endpoints) > 0) { # Both roots positive, union of intervals - return(Intervals(rbind(c(-Inf,0), endpoints))) + return(Intervals(rbind(c(-Inf,0), endpoints))) } else { # At least one negative root - return(Intervals(c(-Inf, max(0, endpoints[2])))) + return(Intervals(c(-Inf, max(0, endpoints[2])))) } - } else { + } else { if (A < -tol) { # Parabola opens downward - if (endpoints[2] < 0) { + if (endpoints[2] < 0) { # Positive quadratic form only when t negative - stop(paste("Negative TC support is infeasible", s, "-", l)) - } else { - # Part which is positive - if (endpoints[1] > 0) { - return(Intervals(rbind(c(-Inf, endpoints[1]), c(endpoints[2], Inf)))) + stop(paste("Negative TC support is infeasible", s, "-", l)) } else { - return(Intervals(c(endpoints[2], Inf))) + # Part which is positive + if (endpoints[1] > 0) { + return(Intervals(rbind(c(-Inf, endpoints[1]), c(endpoints[2], Inf)))) + } else { + return(Intervals(c(endpoints[2], Inf))) + } } - } } else { # a is too close to 0, quadratic is actually linear - if (abs(B) > tol) { - if (B > 0) { - return(Intervals(c(-Inf, max(0, -C/B)))) + if (abs(B) > tol) { + if (B > 0) { + return(Intervals(c(-Inf, max(0, -C/B)))) + } else { + if (-C/B < 0) stop("Error: infeasible linear equation") + return(Intervals(rbind(c(-Inf, 0), c(-C/B, Inf)))) + } } else { - if (-C/B < 0) stop("Error: infeasible linear equation") - return(Intervals(rbind(c(-Inf, 0), c(-C/B, Inf)))) + warning("Ill-conditioned quadratic") + return(Intervals(c(-Inf,0))) } - } else { - warning("Ill-conditioned quadratic") - return(Intervals(c(-Inf,0))) - } } - } - }) } - # LL is a list of intervals - }) - # L is now a list of lists of intervals - return(unlist(L, recursive = FALSE, use.names = FALSE)) } - roots_to_checkpoints <- function(roots) { checkpoints <- unique(sort(c(0, roots))) return(c(0, (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) From 786ea4d1ae9b97e60b0457cef8dcb2de992e4ee4 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 30 Sep 2015 21:23:30 -0700 Subject: [PATCH 010/396] Changing simulation to known sigma --- forLater/josh/sim.cv.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index 28d9beb0..b8d27cb5 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -25,21 +25,14 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { } fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) - pvals <- groupfsInf(fit, verbose=T) - ## pvals_naive <- interval.groupfs(fit, x, y, index = 1:ncol(x)) - ## fit$projections <- c(fit$projections, fit$rssprojections) - ## pvals_reduced <- interval.groupfs(fit, x, y, index = 1:ncol(x)) - ## fit$projections <- c(fit$projections, fit$foldprojections) + pvals <- groupfsInf(fit, sigma = 1, verbose=T) return(list(variable = fit$action, pvals = pvals$pv)) - #pvals_naive = pvals_naive, pvals_reduced = pvals_reduced)) } time <- system.time({ output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds)) }) -#pvals_reduced <- do.call(c, list(output[4,])) -#pvals_naive <- do.call(c, list(output[3,])) pvals <- do.call(c, list(output[2,])) vars <- do.call(c, list(output[1,])) From ba4be55e03d37dab6e2ed92f041535c72969218f Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 2 Oct 2015 11:31:29 -0700 Subject: [PATCH 011/396] Added links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2d38cdf3..7b6b8ff5 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # R-software -R software for selective inference +R software for [selective inference](http://cran.r-project.org/web/packages/selectiveInference/). Authors: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid Maintainer: Rob Tibshirani -New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. +New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. The package is available on [CRAN](http://cran.r-project.org/web/packages/selectiveInference/). See [this paper](http://www.pnas.org/content/112/25/7629.full) for a high level introduction to selective inference. Code is in the directory selectiveInference/R. * funs.common.R: Basic functions used by many other functions, such as standardization. From 6641e09b4d15d9f94bcd031b97cbab517a6ea18d Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 2 Oct 2015 11:45:32 -0700 Subject: [PATCH 012/396] Fixing whitespace problems in markdown --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7b6b8ff5..1a50e5b0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # R-software -R software for [selective inference](http://cran.r-project.org/web/packages/selectiveInference/). +R software for [selective inference](http://cran.r-project.org/web/packages/selectiveInference/). Authors: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid -Maintainer: Rob Tibshirani +Maintainer: Rob Tibshirani New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. The package is available on [CRAN](http://cran.r-project.org/web/packages/selectiveInference/). See [this paper](http://www.pnas.org/content/112/25/7629.full) for a high level introduction to selective inference. @@ -13,4 +13,3 @@ Code is in the directory selectiveInference/R. * funs.inf.R: Common functions for inference with fixed, fs, lar, and manymeans (but not group). * funs.lar.R: Inference for least angle regression. * funs.max.R: Some numerical approximations. Deprecated? - From a874ffa57230cd5e2e6b52a586815fff7bcb20dd Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 2 Oct 2015 15:52:59 -0700 Subject: [PATCH 013/396] groupfs should now compute AIC exactly as base R --- selectiveInference/R/funs.groupfs.R | 70 ++++++++++++++--------------- tests/test.groupfs.R | 10 +++-- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 516d834a..8c69aa02 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -9,6 +9,7 @@ #' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link[stats]{extractAIC}} for details. #' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. #' @param intercept Should an intercept be included in the model? Default is TRUE. +#' @param center Should the columns of the design matrix be centered? Default is TRUE. #' @param normalize Should the design matrix be normalized? Default is TRUE. #' @param verbose Print out progress along the way? Default is FALSE. #' @return An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. @@ -19,7 +20,7 @@ #' fit = groupfs(x, y, index, maxsteps = 5) #' pvals = groupfsInf(fit) #' @seealso \code{\link{groupfsInf}} -groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, normalize = TRUE, verbose = FALSE) { +groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, verbose = FALSE) { if (missing(index)) stop("Missing argument: index.") p <- ncol(x) @@ -50,7 +51,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE x.update <- x # Center and scale design matrix - xscaled <- scaleGroups(x.update, index, scale = normalize) + xscaled <- scaleGroups(x.update, index, center, normalize) xm <- xscaled$xm xs <- xscaled$xs x.update <- xscaled$x @@ -61,9 +62,9 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE terms = projections = maxprojs = aicpens = maxpens = cumprojs = vector("list", maxsteps) # Store other information from each step - path.info <- data.frame(imax=integer(maxsteps), L=numeric(maxsteps), df=integer(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) + path.info <- data.frame(imax=integer(maxsteps), df=integer(maxsteps), AIC=numeric(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) - modelrank <- 1 + modelrank <- as.numeric(intercept) # Begin main loop for (step in 1:maxsteps) { @@ -76,6 +77,8 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE active <- union(active, imax) inactive.inds <- which(!index %in% active) + # Compute AIC + added$AIC <- n*log(exp(k*modelrank/n) * added$maxterm/n) + n*(log(2*pi) + 1) # Rank of group modelrank <- modelrank + added$df @@ -86,11 +89,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE # Regress added group out of y and inactive x P.imax <- added$maxproj %*% t(added$maxproj) - if (is.null(sigma)) { - P.imax <- P.imax / exp(k*added$df/n) - } else { - P.imax <- P.imax * sigma^2 - } P.imax <- diag(rep(1, n)) - P.imax y.update <- P.imax %*% y.update x.update[, inactive.inds] <- P.imax %*% x.update[, inactive.inds] @@ -103,6 +101,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE if (step > 1) cumprojs[[step]] <- P.imax %*% cumprojs[[step-1]] terms[[step]] <- added$terms + # Compute RSS for unadjusted chisq p-values added$RSS <- sum(y.update^2) scale.chisq <- 1 @@ -112,14 +111,14 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE y.last <- y.update # Projections are stored separately - step.info <- data.frame(added[-c(4:(length(added)-3))]) + step.info <- data.frame(added[-c(3:(length(added)-4))]) path.info[step, ] <- step.info if (verbose) print(step.info) } # Create output object - value <- list(action=path.info$imax, L=path.info$L, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, bx = xm, sx = xs, sigma = sigma, intercept = intercept, terms = terms) + value <- list(action=path.info$imax, L=path.info$L, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, bx = xm, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) class(value) <- "groupfs" attr(value, "labels") <- labels attr(value, "index") <- index @@ -147,15 +146,14 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE #' @param k Multiplier of model size penalty, use \code{k = 2} for AIC, \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. #' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. See \code{\link{extractAIC}} for details. #' @return Index \code{imax} of added group, value \code{L} of maximized negative AIC, lists of projection matrices defining quadratic model selection event. -add1.groupfs <- function(x, y, index, labels, inactive, k, sigma = NULL) { +add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { # Use characters to avoid issues where # list() populates NULL lists in the positions # of the active variables ### Question for later: does this slow down lapply? keys = as.character(inactive) - n2y <- sum(y^2) - n <- ncol(x) + n <- nrow(xr) # Compute sums of squares to determine which group is added # penalized by rank of group if k > 0 @@ -163,33 +161,31 @@ add1.groupfs <- function(x, y, index, labels, inactive, k, sigma = NULL) { names(projections) = names(terms) = names(aicpens) = keys for (key in keys) { inds <- which(index == key) - xi <- x[,inds] + xi <- xr[,inds] ui <- svdu_thresh(xi) dfi <- ncol(ui) projections[[key]] <- ui - dfi <- ncol(ui) - uy <- t(ui) %*% y + uy <- t(ui) %*% yr if (is.null(sigma)) { - aicpens[[key]] <- exp(k*dfi/n) - terms[[key]] <- (sum(uy^2) - sum(y^2)) * aicpens[[key]] + aicpens[[key]] <- exp(k*(dfi+1)/n) + terms[[key]] <- (sum(yr^2) - sum(uy^2)) * aicpens[[key]] } else { - aicpens[[key]] <- sigma^2 * k * dfi/n - terms[[key]] <- (sum(uy^2) - sum(y^2)) - aicpens[[key]] + aicpens[[key]] <- 2 * sigma^2 * k * dfi/n + terms[[key]] <- (sum(yr^2) - sum(uy^2)) - aicpens[[key]] } } # Maximizer = group to be added - terms.maxind <- which.max(terms) - imax <- inactive[terms.maxind] - keyind <- which(keys == imax) - maxproj <- projections[[keyind]] - maxpen <- aicpens[[keyind]] - projections[[keyind]] <- NULL - aicpens[[keyind]] <- NULL - - L <- terms[[terms.maxind]] - - return(list(imax=imax, L=L, df = ncol(maxproj), projections = projections, maxproj = maxproj, aicpens = aicpens, maxpen = maxpen, terms = terms)) + terms.optind <- which.min(terms) + imax <- inactive[terms.optind] + optkey <- which(keys == imax) + maxproj <- projections[[optkey]] + maxpen <- aicpens[[optkey]] + maxterm <- terms[[optkey]] + projections[[optkey]] <- NULL + aicpens[[optkey]] <- NULL + + return(list(imax=imax, df = ncol(maxproj), projections = projections, maxproj = maxproj, aicpens = aicpens, maxpen = maxpen, maxterm = maxterm, terms = terms)) } # ----------------------------------------------------------- @@ -272,7 +268,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { lapply(obj$cvobj, function(cvf) { truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) }))) - intervallist <- c(intervallist, + intervallist <- c(intervallist, lapply(obj$cvquad, function(cvquad) { etacvquad <- t(eta) %*% cvquad A <- etacvquad %*% eta @@ -373,14 +369,14 @@ num_int_chi <- function(a, b, df, nsamp = 10000) { #' @param x Design matrix. #' @param index Group membership indicator of length p. #' @param center Center groups, default is TRUE. -#' @param scale Scale groups by Frobenius norm, default is TRUE. +#' @param normalize Scale groups by Frobenius norm, default is TRUE. #' @return #' \describe{ #' \item{x}{Optionally centered/scaled design matrix.} #' \item{xm}{Means of groups in original design matrix.} #' \item{xs}{Frobenius norms of groups in original design matrix.} #' } -scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { +scaleGroups <- function(x, index, center = TRUE, normalize = TRUE) { keys <- unique(index) xm <- rep(0, ncol(x)) xs <- rep(1, ncol(x)) @@ -396,7 +392,7 @@ scaleGroups <- function(x, index, center = TRUE, scale = TRUE) { xsj <- sqrt(normsq) xs[inds] <- xsj if (xsj > 0) { - if (scale) x[, inds] <- x[, inds] / xsj + if (normalize) x[, inds] <- x[, inds] / xsj } else { stop(paste("Design matrix contains identically zero group of variables:", j)) } @@ -469,7 +465,7 @@ print.groupfs <- function(x, ...) { action <- x$action vnames <- attr(x, "varnames") if (length(vnames) > 0) action <- vnames[action] - tab = data.frame(Group = action, Rank = x$log$df, RSS = round(x$log$RSS, 3)) + tab = data.frame(Group = action, Rank = x$log$df, RSS = round(x$log$RSS, 3), AIC = round(x$log$AIC, 3)) rownames(tab) = 1:nsteps print(tab) cat("\nUse groupfsInf() to compute P-values\n") diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index 629bec14..c0e70e3a 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -1,5 +1,9 @@ -library(selectiveInference) +#library(selectiveInference) #library(lars) +library(intervals) +source("../selectiveInference/R/funs.groupfs.R") +source("../selectiveInference/R/funs.quadratic.R") +source("../selectiveInference/R/funs.common.R") set.seed(1) n <- 40 @@ -72,7 +76,7 @@ y <- y + x %*% beta y <- y-mean(y) df <- data.frame(y = y, states) fsfit <- step(lm(y ~ 0, df), direction="forward", scope = formula(lm(y~., df)), steps = maxsteps, k = 2) -fit <- groupfs(x, y, index, maxsteps, k = 2, normalize = T) +fit <- groupfs(x, y, index, maxsteps, k = 2, intercept = F, center = F, normalize = T) # names(fsfit$coefficients)[-1] if (length(fsfit$coefficients) > 0) { fsnames <- cnames[which(!is.na(charmatch(cnames,names(fsfit$coefficients)[-1])))][order(unlist(lapply(cnames, function(cn) { @@ -99,7 +103,7 @@ for (iter in 1:niter) { y = rnorm(n) fit = groupfs(x, y, index, maxsteps) pvals = groupfsInf(fit) - pvalm[iter, ] = pvals$pv + pvalm[iter, ] = pvals$pv fitk = groupfs(x, y, index, maxsteps, sigma = 1) pvalsk = groupfsInf(fitk) pvalmk[iter, ] = pvalsk$pv From 918b74d40ceaf581a79915fa91d739fa76353c5c Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 2 Oct 2015 15:54:03 -0700 Subject: [PATCH 014/396] Updating documentation --- selectiveInference/man/groupfs.Rd | 4 +++- selectiveInference/man/scaleGroups.Rd | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index 6f719a96..db2e512e 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -4,7 +4,7 @@ \title{Select a model with forward stepwise.} \usage{ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, - normalize = TRUE, verbose = FALSE) + center = TRUE, normalize = TRUE, verbose = FALSE) } \arguments{ \item{x}{Matrix of predictors (n by p).} @@ -21,6 +21,8 @@ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, \item{intercept}{Should an intercept be included in the model? Default is TRUE.} +\item{center}{Should the columns of the design matrix be centered? Default is TRUE.} + \item{normalize}{Should the design matrix be normalized? Default is TRUE.} \item{verbose}{Print out progress along the way? Default is FALSE.} diff --git a/selectiveInference/man/scaleGroups.Rd b/selectiveInference/man/scaleGroups.Rd index bfb899c8..1e7e1c4a 100644 --- a/selectiveInference/man/scaleGroups.Rd +++ b/selectiveInference/man/scaleGroups.Rd @@ -3,7 +3,7 @@ \alias{scaleGroups} \title{Center and scale design matrix by groups} \usage{ -scaleGroups(x, index, center = TRUE, scale = TRUE) +scaleGroups(x, index, center = TRUE, normalize = TRUE) } \arguments{ \item{x}{Design matrix.} @@ -12,7 +12,7 @@ scaleGroups(x, index, center = TRUE, scale = TRUE) \item{center}{Center groups, default is TRUE.} -\item{scale}{Scale groups by Frobenius norm, default is TRUE.} +\item{normalize}{Scale groups by Frobenius norm, default is TRUE.} } \value{ \describe{ From b28900b16fd6e511e51126112ff9ddae1b1fa74e Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 2 Oct 2015 16:10:51 -0700 Subject: [PATCH 015/396] Fixing warnings from CMD check --- selectiveInference/R/funs.groupfs.R | 9 ++++++--- selectiveInference/R/funs.quadratic.R | 8 ++++---- tests/test.groupfs.R | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 8c69aa02..b44b6003 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -138,8 +138,8 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE #' #' For internal use by \code{\link{groupfs}}. #' -#' @param x Design matrix. -#' @param y Response vector. +#' @param xr Design matrix at current step. +#' @param yr Response vector residual at current step. #' @param index Group membership indicator of length p. #' @param labels The unique elements of \code{index}. #' @param inactive Labels of inactive groups. @@ -282,6 +282,9 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { region <- do.call(interval_union, intervallist) region <- interval_union(region, Intervals(c(-Inf,0))) E <- interval_complement(region, check_valid = FALSE) + if (length(E) == 0) { + stop(paste("Empty TC support at step", j)) + } supports[[j]] <- E # E is now potentially a union of intervals @@ -301,7 +304,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { TC_surv <- function(TC, sigma, df, E) { if (length(E) == 0) { - stop(paste("Empty TC support at step", j)) + stop("Empty TC support") } # Sum truncated cdf over each part of E diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 09dee4a1..2a33cf4b 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -47,7 +47,7 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { B <- 2 * as.numeric(t(Ugeta) %*% UgZ - t(Uheta) %*% UhZ) C <- sum(UgZ^2) - sum(UhZ^2) - pendiff } - + quadratic_roots(A, B, C, tol) }) } @@ -62,7 +62,7 @@ quadratic_roots <- function(A, B, C, tol) { b2a <- -B/(2*A) if (disc > tol) { - # Real roots + # Real roots pm <- sqrt(disc)/(2*A) endpoints <- sort(c(b2a - pm, b2a + pm)) @@ -73,7 +73,7 @@ quadratic_roots <- function(A, B, C, tol) { return(Intervals(c(-Inf,0))) } else { # Quadratic form always negative - stop(paste("Empty TC support is infeasible", s, "-", l)) + stop("Empty TC support is infeasible") } } @@ -91,7 +91,7 @@ quadratic_roots <- function(A, B, C, tol) { # Parabola opens downward if (endpoints[2] < 0) { # Positive quadratic form only when t negative - stop(paste("Negative TC support is infeasible", s, "-", l)) + stop("Negative TC support is infeasible") } else { # Part which is positive if (endpoints[1] > 0) { diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index c0e70e3a..db4feff2 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -1,9 +1,9 @@ -#library(selectiveInference) +library(selectiveInference) #library(lars) -library(intervals) -source("../selectiveInference/R/funs.groupfs.R") -source("../selectiveInference/R/funs.quadratic.R") -source("../selectiveInference/R/funs.common.R") +#library(intervals) +#source("../selectiveInference/R/funs.groupfs.R") +#source("../selectiveInference/R/funs.quadratic.R") +#source("../selectiveInference/R/funs.common.R") set.seed(1) n <- 40 From b55101f429ddf5d10126756e5f984dbe0ec5e3d3 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sun, 4 Oct 2015 22:04:57 -0700 Subject: [PATCH 016/396] Comprehensive test comparing step to groupfs with categorical designs --- tests/test.categorical.R | 49 ++++++++++++++++++++++++++++++++++++++++ tests/test.groupfs.R | 11 +++++---- 2 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 tests/test.categorical.R diff --git a/tests/test.categorical.R b/tests/test.categorical.R new file mode 100644 index 00000000..a999a1be --- /dev/null +++ b/tests/test.categorical.R @@ -0,0 +1,49 @@ +#library(selectiveInference) +#library(lars) +library(intervals) +source("../selectiveInference/R/funs.groupfs.R") +source("../selectiveInference/R/funs.quadratic.R") +source("../selectiveInference/R/funs.common.R") + +set.seed(1) +n <- 100 +G <- 10 +maxsteps <- 10 +snr <- 1 +niter <- 200 + +print("Comparing step with groupfs on random categorical designs") +aicdiffs <- numeric(niter) +mismatchcount <- 0 +for (iter in 1:niter) { + rles <- 2 + rpois(G, 2) + df <- data.frame(do.call(cbind, lapply(rles, function(g) { + sample(LETTERS[1:g], n, replace = TRUE, prob = runif(g)) + })), stringsAsFactors = TRUE) + if (any(apply(df, 2, function(col) length(unique(col))) == 1)) next + fd <- factorDesign(df) + if (any(duplicated(fd$x, MARGIN = 2))) next + y <- rnorm(n) + x1 <- fd$x[, fd$index == 1] + y <- y + x1 %*% c(snr, rep(0, ncol(x1) - 2), -snr) + y <- y - mean(y) + df$y <- y + capture.output(fsfit <- step(lm(y ~ 0, df), direction="forward", scope = formula(lm(y~.-1, df)), steps = maxsteps, trace = 1000), file = "/dev/null") + fit <- groupfs(fd$x, df$y, fd$index, maxsteps = 10, intercept = F, center = F, normalize = F) + fsnames <- names(fsfit$coefficients) + if (length(fsnames) > 0) { + fsnames <- unique(substr(fsnames, 1, nchar(fsnames) - 1)) + k <- length(fsnames) + fitnames <- attr(fit, "varnames")[fit$action][1:k] + aicdiffs[iter] <- AIC(fsfit) - fit$log$AIC[k] + if (any(fsnames != fitnames)) { + print(paste("Mismatch at iteration", iter)) + print(fsnames) + print(fitnames) + mismatchcount <- mismatchcount + 1 + } + } +} +print(paste("Total mismatches:", mismatchcount, "out of", niter)) +print("Summary of differences in AIC") +summary(aicdiffs) diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index db4feff2..38cc85df 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -1,9 +1,9 @@ -library(selectiveInference) +#library(selectiveInference) #library(lars) -#library(intervals) -#source("../selectiveInference/R/funs.groupfs.R") -#source("../selectiveInference/R/funs.quadratic.R") -#source("../selectiveInference/R/funs.common.R") +library(intervals) +source("../selectiveInference/R/funs.groupfs.R") +source("../selectiveInference/R/funs.quadratic.R") +source("../selectiveInference/R/funs.common.R") set.seed(1) n <- 40 @@ -128,3 +128,4 @@ print(colMeans(pvalmk)) print(mean(pvalm)) print(mean(pvalmk)) + From 0a3e0881b002f38f3d0b29cd22332f343569380f Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sun, 4 Oct 2015 23:08:40 -0700 Subject: [PATCH 017/396] Started coding early stopping if AIC increases --- selectiveInference/R/funs.groupfs.R | 15 +++++++++++---- selectiveInference/man/factorDesign.Rd | 3 ++- selectiveInference/man/groupfs.Rd | 7 +++++-- selectiveInference/man/groupfsInf.Rd | 3 ++- selectiveInference/man/scaleGroups.Rd | 3 ++- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index b44b6003..17d2834b 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -11,6 +11,7 @@ #' @param intercept Should an intercept be included in the model? Default is TRUE. #' @param center Should the columns of the design matrix be centered? Default is TRUE. #' @param normalize Should the design matrix be normalized? Default is TRUE. +#' @param aicstop Early stopping if AIC increases, default is FALSE. #' @param verbose Print out progress along the way? Default is FALSE. #' @return An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. #' @examples @@ -20,7 +21,7 @@ #' fit = groupfs(x, y, index, maxsteps = 5) #' pvals = groupfsInf(fit) #' @seealso \code{\link{groupfsInf}} -groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, verbose = FALSE) { +groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, aicstop = FALSE, verbose = FALSE) { if (missing(index)) stop("Missing argument: index.") p <- ncol(x) @@ -42,7 +43,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE warning(paste("If the largest groups are included the model will be saturated/overdetermined. To prevent this maxsteps has been changed to", maxsteps)) } - # Initialize copies of data for loop by <- mean(y) y.update <- y @@ -65,6 +65,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE path.info <- data.frame(imax=integer(maxsteps), df=integer(maxsteps), AIC=numeric(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) modelrank <- as.numeric(intercept) + aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k * (is.null(sigma) + intercept)) # Begin main loop for (step in 1:maxsteps) { @@ -101,7 +102,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE if (step > 1) cumprojs[[step]] <- P.imax %*% cumprojs[[step-1]] terms[[step]] <- added$terms - # Compute RSS for unadjusted chisq p-values added$RSS <- sum(y.update^2) scale.chisq <- 1 @@ -109,11 +109,18 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE added$RSSdrop <- sum((y.last - y.update)^2) added$chisq <- pchisq(added$RSSdrop/scale.chisq, lower.tail=FALSE, df = added$df) y.last <- y.update - + # Projections are stored separately step.info <- data.frame(added[-c(3:(length(added)-4))]) path.info[step, ] <- step.info + if (aic.last < added$AIC) { + # TODO tomorrow + # Modify the object somehow + # and groupfsInf + break + } + aic.last <- added$AIC if (verbose) print(step.info) } diff --git a/selectiveInference/man/factorDesign.Rd b/selectiveInference/man/factorDesign.Rd index 3a42e2ea..d5d2576d 100644 --- a/selectiveInference/man/factorDesign.Rd +++ b/selectiveInference/man/factorDesign.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{factorDesign} \alias{factorDesign} \title{Expand a data frame with factors to form a design matrix with the full binary encoding of each factor.} diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index db2e512e..c279c9ec 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -1,10 +1,11 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{groupfs} \alias{groupfs} \title{Select a model with forward stepwise.} \usage{ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, - center = TRUE, normalize = TRUE, verbose = FALSE) + center = TRUE, normalize = TRUE, aicstop = FALSE, verbose = FALSE) } \arguments{ \item{x}{Matrix of predictors (n by p).} @@ -25,6 +26,8 @@ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, \item{normalize}{Should the design matrix be normalized? Default is TRUE.} +\item{aicstop}{Early stopping if AIC increases, default is FALSE.} + \item{verbose}{Print out progress along the way? Default is FALSE.} } \value{ diff --git a/selectiveInference/man/groupfsInf.Rd b/selectiveInference/man/groupfsInf.Rd index 4926896f..f5ee9700 100644 --- a/selectiveInference/man/groupfsInf.Rd +++ b/selectiveInference/man/groupfsInf.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{groupfsInf} \alias{groupfsInf} \title{Compute selective p-values for a model fitted by \code{groupfs}.} diff --git a/selectiveInference/man/scaleGroups.Rd b/selectiveInference/man/scaleGroups.Rd index 1e7e1c4a..e5a93fab 100644 --- a/selectiveInference/man/scaleGroups.Rd +++ b/selectiveInference/man/scaleGroups.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{scaleGroups} \alias{scaleGroups} \title{Center and scale design matrix by groups} From 680a56ddf9057461bd882207798a125f81ac728d Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 5 Oct 2015 13:22:20 -0700 Subject: [PATCH 018/396] Partially complete with early stopping and checking known sigma vs step --- selectiveInference/R/funs.groupfs.R | 63 ++++++++++++++++++++--------- tests/test.categorical.R | 8 ++-- tests/test.groupfs.R | 2 +- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 17d2834b..6abf324f 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -11,17 +11,17 @@ #' @param intercept Should an intercept be included in the model? Default is TRUE. #' @param center Should the columns of the design matrix be centered? Default is TRUE. #' @param normalize Should the design matrix be normalized? Default is TRUE. -#' @param aicstop Early stopping if AIC increases, default is FALSE. +#' @param aicstop Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}. #' @param verbose Print out progress along the way? Default is FALSE. #' @return An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. #' @examples #' x = matrix(rnorm(20*40), nrow=20) #' index = sort(rep(1:20, 2)) -#' y = rnorm(20) + 2 * (x[,1] - x[,2]) - (x[,3] - x[,4]) +#' y = rnorm(20) + 2 * x[,1] - x[,4] #' fit = groupfs(x, y, index, maxsteps = 5) #' pvals = groupfsInf(fit) -#' @seealso \code{\link{groupfsInf}} -groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, aicstop = FALSE, verbose = FALSE) { +#' @seealso \code{\link{groupfsInf}}, \code{\link{factorDesign}}. +groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) { if (missing(index)) stop("Missing argument: index.") p <- ncol(x) @@ -65,7 +65,11 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE path.info <- data.frame(imax=integer(maxsteps), df=integer(maxsteps), AIC=numeric(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) modelrank <- as.numeric(intercept) - aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k * (is.null(sigma) + intercept)) + if (is.null(sigma)) { + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k * (is.null(sigma) + intercept)) + } else { + aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * intercept + } # Begin main loop for (step in 1:maxsteps) { @@ -78,8 +82,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE active <- union(active, imax) inactive.inds <- which(!index %in% active) - # Compute AIC - added$AIC <- n*log(exp(k*modelrank/n) * added$maxterm/n) + n*(log(2*pi) + 1) # Rank of group modelrank <- modelrank + added$df @@ -94,6 +96,13 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE y.update <- P.imax %*% y.update x.update[, inactive.inds] <- P.imax %*% x.update[, inactive.inds] + # Compute AIC + if (is.null(sigma)) { + added$AIC <- n*log(exp(k*modelrank/n) * added$maxterm/n) + n*(log(2*pi) + 1) + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + projections[[step]] <- added$projections maxprojs[[step]] <- added$maxproj aicpens[[step]] <- added$aicpens @@ -109,36 +118,47 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE added$RSSdrop <- sum((y.last - y.update)^2) added$chisq <- pchisq(added$RSSdrop/scale.chisq, lower.tail=FALSE, df = added$df) y.last <- y.update - + # Projections are stored separately step.info <- data.frame(added[-c(3:(length(added)-4))]) path.info[step, ] <- step.info - if (aic.last < added$AIC) { - # TODO tomorrow - # Modify the object somehow - # and groupfsInf - break + if (aicstop > 0 && step >= aicstop && aic.last < added$AIC) { + ########## Incomplete ########## + ## cut off the last aicstop variables as well? + if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { + path.info <- path.info[1:step, ] + projections[(step+1):maxsteps] <- NULL + maxprojs[(step+1):maxsteps] <- NULL + aicpens[(step+1):maxsteps] <- NULL + maxpens[(step+1):maxsteps] <- NULL + cumprojs[(step+1):maxsteps] <- NULL + terms[(step+1):maxsteps] <- NULL + maxsteps <- step + # add additional projections + break + } } aic.last <- added$AIC if (verbose) print(step.info) } # Create output object - value <- list(action=path.info$imax, L=path.info$L, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, bx = xm, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) + value <- list(action=path.info$imax, L=path.info$L, AIC=path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, bx = xm, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) class(value) <- "groupfs" attr(value, "labels") <- labels attr(value, "index") <- index attr(value, "maxsteps") <- maxsteps attr(value, "sigma") <- sigma attr(value, "k") <- k + attr(value, "aicstop") <- aicstop if (is.null(attr(x, "varnames"))) { attr(value, "varnames") <- colnames(x) } else { attr(value, "varnames") <- attr(x, "varnames") } - invisible(value) + return(value) } #' Add one group to the model in \code{groupfs}. @@ -177,8 +197,8 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { aicpens[[key]] <- exp(k*(dfi+1)/n) terms[[key]] <- (sum(yr^2) - sum(uy^2)) * aicpens[[key]] } else { - aicpens[[key]] <- 2 * sigma^2 * k * dfi/n - terms[[key]] <- (sum(yr^2) - sum(uy^2)) - aicpens[[key]] + aicpens[[key]] <- k * dfi/n + terms[[key]] <- (sum(yr^2) - sum(uy^2))/sigma^2 - aicpens[[key]] } } @@ -203,8 +223,10 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' #' @param obj Object returned by \code{\link{groupfs}} function #' @param sigma Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate. +#' @param type Type of conditional p-values to compute. With "all" (default), p-values are computed conditional on the final model with all variables up to \code{maxsteps}; with "aic" the number of steps is chosen after which the AIC criterion increases \code{ntimes} in a row, and then the same type of analysis as in "all" is carried out for the active variables at that number of steps. +#' @param ntimes Number of steps for which AIC criterion has to increase before minimizing point is declared. #' @param verbose Print out progress along the way? Default is FALSE. -#' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. The default printing behavior should supply adequate information. +#' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. #' #' \describe{ #' \item{vars}{Labels of the active groups in the order they were included.} @@ -214,8 +236,9 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' \item{df}{Rank of group of variables when it was added to the model.} #' \item{support}{List of intervals defining the truncation region of the truncated chi.} #' } -groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { +groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, verbose = FALSE) { + type <- match.arg(type) n <- nrow(obj$x) p <- ncol(obj$x) maxsteps <- attr(obj, "maxsteps") @@ -304,7 +327,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (!is.null(attr(obj, "varnames"))) { attr(out, "varnames") <- attr(obj, "varnames") } - invisible(out) + return(out) } # ----------------------------------------------------------- diff --git a/tests/test.categorical.R b/tests/test.categorical.R index a999a1be..4edf0693 100644 --- a/tests/test.categorical.R +++ b/tests/test.categorical.R @@ -10,7 +10,7 @@ n <- 100 G <- 10 maxsteps <- 10 snr <- 1 -niter <- 200 +niter <- 100 print("Comparing step with groupfs on random categorical designs") aicdiffs <- numeric(niter) @@ -29,14 +29,14 @@ for (iter in 1:niter) { y <- y - mean(y) df$y <- y capture.output(fsfit <- step(lm(y ~ 0, df), direction="forward", scope = formula(lm(y~.-1, df)), steps = maxsteps, trace = 1000), file = "/dev/null") - fit <- groupfs(fd$x, df$y, fd$index, maxsteps = 10, intercept = F, center = F, normalize = F) + fit <- groupfs(fd$x, df$y, fd$index, maxsteps = 10, intercept = F, center = F, normalize = F, aicstop = 1) fsnames <- names(fsfit$coefficients) if (length(fsnames) > 0) { fsnames <- unique(substr(fsnames, 1, nchar(fsnames) - 1)) k <- length(fsnames) - fitnames <- attr(fit, "varnames")[fit$action][1:k] + fitnames <- attr(fit, "varnames")[fit$action][1:(length(fit$action)-attr(fit, "aicstop"))] aicdiffs[iter] <- AIC(fsfit) - fit$log$AIC[k] - if (any(fsnames != fitnames)) { + if (length(fitnames) !=k || any(fsnames != fitnames)) { print(paste("Mismatch at iteration", iter)) print(fsnames) print(fitnames) diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index 38cc85df..d35800e5 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -14,7 +14,7 @@ sparsity <- 5 snr <- 3 system.time({ -for (iter in 1:10) { +for (iter in 1:100) { y <- rnorm(n) x <- matrix(rnorm(n*p), nrow=n) beta <- rep(0, p) From e9f061810d74c818d5a56cb7d6356365b10a9588 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 5 Oct 2015 16:05:43 -0700 Subject: [PATCH 019/396] groupfs matches step exactly for both known and unknown sigma --- selectiveInference/R/funs.groupfs.R | 21 ++++++++--- tests/test.categorical.R | 55 +++++++++++++++++++---------- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 6abf324f..44f45236 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -58,6 +58,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE x.begin <- x.update y.begin <- y.update + stopped <- FALSE # Store all projections computed along the path terms = projections = maxprojs = aicpens = maxpens = cumprojs = vector("list", maxsteps) @@ -70,6 +71,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE } else { aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * intercept } + if (verbose) print(paste0("Start: AIC=", round(aic.begin, 3)), quote = FALSE) # Begin main loop for (step in 1:maxsteps) { @@ -98,9 +100,14 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE # Compute AIC if (is.null(sigma)) { - added$AIC <- n*log(exp(k*modelrank/n) * added$maxterm/n) + n*(log(2*pi) + 1) + added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank } else { added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + if (verbose) { + aics <- matrix(round(unlist(added$terms) - n + k * (modelrank - added$df), 2), ncol = 1) + rownames(aics) <- names(added$terms) + write.table(aics, col.names = F, quote = F) + } } projections[[step]] <- added$projections @@ -123,6 +130,8 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE step.info <- data.frame(added[-c(3:(length(added)-4))]) path.info[step, ] <- step.info + if (verbose) print(round(step.info, 3)) + if (aicstop > 0 && step >= aicstop && aic.last < added$AIC) { ########## Incomplete ########## ## cut off the last aicstop variables as well? @@ -135,16 +144,17 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE cumprojs[(step+1):maxsteps] <- NULL terms[(step+1):maxsteps] <- NULL maxsteps <- step + stopped <- TRUE # add additional projections break } } aic.last <- added$AIC - if (verbose) print(step.info) } # Create output object value <- list(action=path.info$imax, L=path.info$L, AIC=path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, bx = xm, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) + class(value) <- "groupfs" attr(value, "labels") <- labels attr(value, "index") <- index @@ -152,6 +162,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE attr(value, "sigma") <- sigma attr(value, "k") <- k attr(value, "aicstop") <- aicstop + if (aicstop > 0) attr(value, "stopped") <- stopped if (is.null(attr(x, "varnames"))) { attr(value, "varnames") <- colnames(x) } else { @@ -194,11 +205,11 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { projections[[key]] <- ui uy <- t(ui) %*% yr if (is.null(sigma)) { - aicpens[[key]] <- exp(k*(dfi+1)/n) + aicpens[[key]] <- exp(k*dfi/n) terms[[key]] <- (sum(yr^2) - sum(uy^2)) * aicpens[[key]] } else { - aicpens[[key]] <- k * dfi/n - terms[[key]] <- (sum(yr^2) - sum(uy^2))/sigma^2 - aicpens[[key]] + aicpens[[key]] <- k * dfi + terms[[key]] <- (sum(yr^2) - sum(uy^2))/sigma^2 + aicpens[[key]] } } diff --git a/tests/test.categorical.R b/tests/test.categorical.R index 4edf0693..2b5f4bd2 100644 --- a/tests/test.categorical.R +++ b/tests/test.categorical.R @@ -12,9 +12,31 @@ maxsteps <- 10 snr <- 1 niter <- 100 +check.mismatch <- function(fsfit, fit) { + fsnames <- names(fsfit$coefficients) + if (length(fsnames) > 0) { + fsnames <- unique(substr(fsnames, 1, nchar(fsnames) - 1)) + k <- length(fsnames) + fitnames <- attr(fit, "varnames")[fit$action][1:(length(fit$action)-attr(fit, "aicstop"))] + if (is.null(fit$sigma)) { + aicdiff <- AIC(fsfit) - fit$log$AIC[k] + } else { + aicdiff <- extractAIC(fsfit, scale = fit$sigma)[2] - fit$log$AIC[k] + } + if (length(fitnames) !=k || any(fsnames != fitnames)) { + print(paste("Mismatch at iteration", iter, ifelse(is.null(fit$sigma), "unknown", "known"))) + print(fsnames) + print(fitnames) + return(list(count = 1, aicdiff = aicdiff)) + } + return(list(count = 0, aicdiff = aicdiff)) + } + return(list(count = 0, aicdiff = 0)) +} + print("Comparing step with groupfs on random categorical designs") -aicdiffs <- numeric(niter) -mismatchcount <- 0 +umismatchcount <- kmismatchcount <- 0 +uaicdiffs <- kaicdiffs <- numeric(niter) for (iter in 1:niter) { rles <- 2 + rpois(G, 2) df <- data.frame(do.call(cbind, lapply(rles, function(g) { @@ -28,22 +50,17 @@ for (iter in 1:niter) { y <- y + x1 %*% c(snr, rep(0, ncol(x1) - 2), -snr) y <- y - mean(y) df$y <- y - capture.output(fsfit <- step(lm(y ~ 0, df), direction="forward", scope = formula(lm(y~.-1, df)), steps = maxsteps, trace = 1000), file = "/dev/null") + capture.output(fsfit <- step(lm(y ~ 0, df), direction="forward", scope = formula(lm(y~.-1, df)), steps = maxsteps), file = "/dev/null") fit <- groupfs(fd$x, df$y, fd$index, maxsteps = 10, intercept = F, center = F, normalize = F, aicstop = 1) - fsnames <- names(fsfit$coefficients) - if (length(fsnames) > 0) { - fsnames <- unique(substr(fsnames, 1, nchar(fsnames) - 1)) - k <- length(fsnames) - fitnames <- attr(fit, "varnames")[fit$action][1:(length(fit$action)-attr(fit, "aicstop"))] - aicdiffs[iter] <- AIC(fsfit) - fit$log$AIC[k] - if (length(fitnames) !=k || any(fsnames != fitnames)) { - print(paste("Mismatch at iteration", iter)) - print(fsnames) - print(fitnames) - mismatchcount <- mismatchcount + 1 - } - } + mismatches <- check.mismatch(fsfit, fit) + umismatchcount <- umismatchcount + mismatches$count + uaicdiffs[iter] <- mismatches$aicdiff + capture.output(fsfit <- step(lm(y ~ 0, df), scale = 1, direction="forward", scope = formula(lm(y~.-1, df)), steps = maxsteps), file = "/dev/null") + fit <- groupfs(fd$x, df$y, fd$index, maxsteps = 10, sigma = 1, intercept = F, center = F, normalize = F, aicstop = 1) + mismatches <- check.mismatch(fsfit, fit) + kmismatchcount <- kmismatchcount + mismatches$count + kaicdiffs[iter] <- mismatches$aicdiff } -print(paste("Total mismatches:", mismatchcount, "out of", niter)) -print("Summary of differences in AIC") -summary(aicdiffs) +print(paste("Mismatches:", umismatchcount, "for unknown sigma and", kmismatchcount, "for known")) +summary(uaicdiffs) +summary(kaicdiffs) From a18943602aae11572b63af64d8ff87417bdb8124 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 5 Oct 2015 16:08:06 -0700 Subject: [PATCH 020/396] Removed print statement used for debugging --- selectiveInference/R/funs.groupfs.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 44f45236..e9b03d79 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -103,11 +103,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank } else { added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank - if (verbose) { - aics <- matrix(round(unlist(added$terms) - n + k * (modelrank - added$df), 2), ncol = 1) - rownames(aics) <- names(added$terms) - write.table(aics, col.names = F, quote = F) - } } projections[[step]] <- added$projections From 23e1599591a93dc9100ee5e661490f7dcbfad1ca Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 5 Oct 2015 18:06:49 -0700 Subject: [PATCH 021/396] Currently broken, but almost done implementing AICstop --- selectiveInference/R/funs.groupfs.R | 65 +++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index e9b03d79..a3f0b7cb 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -131,6 +131,40 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE ########## Incomplete ########## ## cut off the last aicstop variables as well? if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { + stopquads <- ulist <- zlist <- penlist <- vector("list", aicstop) + for (s in seq(aicstop)) { + etas <- cumprojs[[step - aicstop + s]] %*% eta + Zs <- cumprojs[[step - aicstop + s]] %*% Z + maxprojs[[(step-aicstop):step]][[s]] + maxpens[[(step-aicstop):step]][[s]] + } + + Uheta <- t(Uh) %*% etas + Ugeta <- t(Ug) %*% etas + UhZ <- t(Uh) %*% Zs + UgZ <- t(Ug) %*% Zs + etasZs <- t(etas) %*% Zs + peng <- obj$maxpens[[s]] + penh <- obj$aicpens[[s]][[l]] + pendiff <- peng-penh + if (is.null(obj$sigma)) { + A <- sum(Ugeta^2) * peng - sum(Uheta^2) * penh - sum(etas^2) * pendiff + B <- 2 * as.numeric(t(Ugeta) %*% UgZ * peng - t(Uheta) %*% UhZ * penh - etasZs * pendiff) + C <- sum(UgZ^2) * peng - sum(UhZ^2) * penh - sum(Zs^2) * pendiff + } else { + # Check this + A <- sum(Ugeta^2) - sum(Uheta^2) + B <- 2 * as.numeric(t(Ugeta) %*% UgZ - t(Uheta) %*% UhZ) + C <- sum(UgZ^2) - sum(UhZ^2) - pendiff + } + + + if (is.null(sigma)) { + added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + path.info <- path.info[1:step, ] projections[(step+1):maxsteps] <- NULL maxprojs[(step+1):maxsteps] <- NULL @@ -157,13 +191,15 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE attr(value, "sigma") <- sigma attr(value, "k") <- k attr(value, "aicstop") <- aicstop - if (aicstop > 0) attr(value, "stopped") <- stopped + attr(value, "stopped") <- stopped + if (stopped) { + value$stopquads <- stopquads + } if (is.null(attr(x, "varnames"))) { attr(value, "varnames") <- colnames(x) } else { attr(value, "varnames") <- attr(x, "varnames") } - return(value) } @@ -300,18 +336,29 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve intervallist <- truncationRegion(obj, TC, R, eta, Z) if (!is.null(obj$cvobj)) { + if (attr(obj, "stopped")) stop("Cross-validation and early stopping cannot be used simultaneously") intervallist <- c(intervallist, do.call(c, lapply(obj$cvobj, function(cvf) { truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) }))) intervallist <- c(intervallist, - lapply(obj$cvquad, function(cvquad) { - etacvquad <- t(eta) %*% cvquad - A <- etacvquad %*% eta - B <- 2 * etacvquad %*% Z - C <- t(Z) %*% cvquad %*% Z - quadratic_roots(A, B, C, tol = 1e-15) - })) + lapply(obj$cvquad, function(cvquad) { + etacvquad <- t(eta) %*% cvquad + A <- etacvquad %*% eta + B <- 2 * etacvquad %*% Z + C <- t(Z) %*% cvquad %*% Z + quadratic_roots(A, B, C, tol = 1e-15) + })) + } + if (attr(obj, "stopped")) { + intervallist <- c(intervallist, + lapply(obj$stopquads, function(squad) { + etasquad <- t(eta) %*% squad + A <- etasquad %*% eta + B <- 2 * etasquad %*% Z + C <- t(Z) %*% squad %*% Z + quadratic_roots(A, B, C, tol = 1e-15) + })) } # Compute intersection: From d2d9db43c4516842c2b13d3cbf25c75d089862bf Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 5 Oct 2015 22:12:45 -0700 Subject: [PATCH 022/396] Still broken, closer to finishing AICstop --- selectiveInference/R/funs.groupfs.R | 65 +++++++++++++-------------- selectiveInference/R/funs.quadratic.R | 41 ++++++++--------- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index a3f0b7cb..686d248c 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -131,33 +131,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE ########## Incomplete ########## ## cut off the last aicstop variables as well? if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { - stopquads <- ulist <- zlist <- penlist <- vector("list", aicstop) - for (s in seq(aicstop)) { - etas <- cumprojs[[step - aicstop + s]] %*% eta - Zs <- cumprojs[[step - aicstop + s]] %*% Z - maxprojs[[(step-aicstop):step]][[s]] - maxpens[[(step-aicstop):step]][[s]] - } - - Uheta <- t(Uh) %*% etas - Ugeta <- t(Ug) %*% etas - UhZ <- t(Uh) %*% Zs - UgZ <- t(Ug) %*% Zs - etasZs <- t(etas) %*% Zs - peng <- obj$maxpens[[s]] - penh <- obj$aicpens[[s]][[l]] - pendiff <- peng-penh - if (is.null(obj$sigma)) { - A <- sum(Ugeta^2) * peng - sum(Uheta^2) * penh - sum(etas^2) * pendiff - B <- 2 * as.numeric(t(Ugeta) %*% UgZ * peng - t(Uheta) %*% UhZ * penh - etasZs * pendiff) - C <- sum(UgZ^2) * peng - sum(UhZ^2) * penh - sum(Zs^2) * pendiff - } else { - # Check this - A <- sum(Ugeta^2) - sum(Uheta^2) - B <- 2 * as.numeric(t(Ugeta) %*% UgZ - t(Uheta) %*% UhZ) - C <- sum(UgZ^2) - sum(UhZ^2) - pendiff - } - if (is.null(sigma)) { added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank @@ -351,13 +324,39 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve })) } if (attr(obj, "stopped")) { + + ulist <- etalist <- zlist <- penlist <- vector("list", aicstop) + for (s in seq(aicstop)) { + stepind <- maxsteps - aicstop + s + if (stepind > 1) { + etalist[[s]] <- cumprojs[[stepind]] %*% eta + zlist[[s]] <- cumprojs[[stepind]] %*% Z + } else { + etalist[[s]] <- eta + zlist[[s]] <- Z + } + ulist[[s]] <- obj$maxprojs[[stepind]] + penlist[[s]] <- obj$maxpens[[stepind]] + } + intervallist <- c(intervallist, - lapply(obj$stopquads, function(squad) { - etasquad <- t(eta) %*% squad - A <- etasquad %*% eta - B <- 2 * etasquad %*% Z - C <- t(Z) %*% squad %*% Z - quadratic_roots(A, B, C, tol = 1e-15) + lapply(0:(aicstop-1), function(s) { + # ^ fix this ^ + # check indexing direction + Ug <- ulist[[s]] + Uh <- ulist[[s+1]] + peng <- penlist[[s]] + penh <- penlist[[s+1]] + etag <- etalist[[s]] + etah <- etalist[[s+1]] + Zg <- zlist[[s]] + Zh <- zlist[[s+1]] + + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) + + # additional constant term + # pendiff * (sum(rs^2) - sum(r_(s-1)^2)) ? })) } diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 2a33cf4b..db73cc87 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -6,8 +6,7 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { L <- lapply(1:length(obj$action), function(s) { Ug <- obj$maxprojs[[s]] - dfg <- ncol(Ug) - + peng <- obj$maxpens[[s]] if (s > 1) { etas <- obj$cumprojs[[s-1]] %*% eta Zs <- obj$cumprojs[[s-1]] %*% Z @@ -19,25 +18,34 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { num.projs <- length(obj$projections[[s]]) if (num.projs == 0) { return(list(Intervals(c(-Inf,0)))) + } else { lapply(1:num.projs, function(l) { - Uh <- obj$projections[[s]][[l]] - dfh <- ncol(Uh) - # The quadratic form corresponding to - # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 - # we find the roots in t, if there are any - # and return the interval of potential t + Uh <- obj$projections[[s]][[l]] + penh <- obj$aicpens[[s]][[l]] + # The quadratic form corresponding to + # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 + # we find the roots in t, if there are any + # and return the interval of potential t + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) + }) + } + # LL is a list of intervals + }) + # L is now a list of lists of intervals + return(unlist(L, recursive = FALSE, use.names = FALSE)) +} +quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) { Uheta <- t(Uh) %*% etas Ugeta <- t(Ug) %*% etas UhZ <- t(Uh) %*% Zs UgZ <- t(Ug) %*% Zs etasZs <- t(etas) %*% Zs - peng <- obj$maxpens[[s]] - penh <- obj$aicpens[[s]][[l]] pendiff <- peng-penh - if (is.null(obj$sigma)) { + if (is.null(sigma)) { A <- sum(Ugeta^2) * peng - sum(Uheta^2) * penh - sum(etas^2) * pendiff B <- 2 * as.numeric(t(Ugeta) %*% UgZ * peng - t(Uheta) %*% UhZ * penh - etasZs * pendiff) C <- sum(UgZ^2) * peng - sum(UhZ^2) * penh - sum(Zs^2) * pendiff @@ -45,16 +53,9 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { # Check this A <- sum(Ugeta^2) - sum(Uheta^2) B <- 2 * as.numeric(t(Ugeta) %*% UgZ - t(Uheta) %*% UhZ) - C <- sum(UgZ^2) - sum(UhZ^2) - pendiff + C <- sum(UgZ^2) - sum(UhZ^2) - sigma^2 * pendiff } - - quadratic_roots(A, B, C, tol) - }) - } - # LL is a list of intervals - }) - # L is now a list of lists of intervals - return(unlist(L, recursive = FALSE, use.names = FALSE)) + return(list(A = A, B = B, C= C)) } quadratic_roots <- function(A, B, C, tol) { From 78f1cb84218ee23d8e1fa03198b0309e1d970ce1 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 7 Oct 2015 14:12:01 -0700 Subject: [PATCH 023/396] AICstop may be finished. Needs more testing --- selectiveInference/R/funs.groupfs.R | 73 +++++++++++++-------------- selectiveInference/R/funs.quadratic.R | 36 ++++++------- 2 files changed, 55 insertions(+), 54 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 686d248c..b6b95b6e 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -165,9 +165,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE attr(value, "k") <- k attr(value, "aicstop") <- aicstop attr(value, "stopped") <- stopped - if (stopped) { - value$stopquads <- stopquads - } if (is.null(attr(x, "varnames"))) { attr(value, "varnames") <- colnames(x) } else { @@ -212,8 +209,8 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { aicpens[[key]] <- exp(k*dfi/n) terms[[key]] <- (sum(yr^2) - sum(uy^2)) * aicpens[[key]] } else { - aicpens[[key]] <- k * dfi - terms[[key]] <- (sum(yr^2) - sum(uy^2))/sigma^2 + aicpens[[key]] + aicpens[[key]] <- sigma^2 * k * dfi + terms[[key]] <- (sum(yr^2) - sum(uy^2)) + aicpens[[key]] } } @@ -253,6 +250,10 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' } groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, verbose = FALSE) { + if (!is.null(obj$cvobj) && attr(obj, "stopped")) { + stop("Cross-validation and early stopping cannot be used simultaneously") + } + type <- match.arg(type) n <- nrow(obj$x) p <- ncol(obj$x) @@ -309,7 +310,6 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve intervallist <- truncationRegion(obj, TC, R, eta, Z) if (!is.null(obj$cvobj)) { - if (attr(obj, "stopped")) stop("Cross-validation and early stopping cannot be used simultaneously") intervallist <- c(intervallist, do.call(c, lapply(obj$cvobj, function(cvf) { truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) @@ -324,40 +324,39 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve })) } if (attr(obj, "stopped")) { - - ulist <- etalist <- zlist <- penlist <- vector("list", aicstop) - for (s in seq(aicstop)) { - stepind <- maxsteps - aicstop + s - if (stepind > 1) { - etalist[[s]] <- cumprojs[[stepind]] %*% eta - zlist[[s]] <- cumprojs[[stepind]] %*% Z - } else { - etalist[[s]] <- eta - zlist[[s]] <- Z - } - ulist[[s]] <- obj$maxprojs[[stepind]] - penlist[[s]] <- obj$maxpens[[stepind]] + aicstop <- attr(obj, "aicstop") + + ulist <- etalist <- zlist <- penlist <- vector("list", aicstop+1) + for (s in seq(aicstop+1)) { + stepind <- maxsteps - (aicstop+1) + s +# print(c(s, stepind)) + if (stepind > 1) { + etalist[[s]] <- obj$cumprojs[[stepind-1]] %*% eta + zlist[[s]] <- obj$cumprojs[[stepind-1]] %*% Z + } else { + etalist[[s]] <- eta + zlist[[s]] <- Z } + ulist[[s]] <- obj$maxprojs[[stepind]] + penlist[[s]] <- obj$maxpens[[stepind]] + } intervallist <- c(intervallist, - lapply(0:(aicstop-1), function(s) { - # ^ fix this ^ - # check indexing direction - Ug <- ulist[[s]] - Uh <- ulist[[s+1]] - peng <- penlist[[s]] - penh <- penlist[[s+1]] - etag <- etalist[[s]] - etah <- etalist[[s+1]] - Zg <- zlist[[s]] - Zh <- zlist[[s+1]] - - coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) - quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) - - # additional constant term - # pendiff * (sum(rs^2) - sum(r_(s-1)^2)) ? - })) + do.call(c, lapply(1:aicstop, function(s) { + lapply((s+1):(aicstop+1), function(sp) { + Ug <- ulist[[s]] + Uh <- ulist[[sp]] + peng <- penlist[[s]] + penh <- prod(unlist(penlist[s:sp])) + etag <- etalist[[s]] + etah <- etalist[[sp]] + Zg <- zlist[[s]] + Zh <- zlist[[sp]] + + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + }) + }))) } # Compute intersection: diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index db73cc87..53372be5 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -39,23 +39,25 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { } quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) { - Uheta <- t(Uh) %*% etas - Ugeta <- t(Ug) %*% etas - UhZ <- t(Uh) %*% Zs - UgZ <- t(Ug) %*% Zs - etasZs <- t(etas) %*% Zs - pendiff <- peng-penh - if (is.null(sigma)) { - A <- sum(Ugeta^2) * peng - sum(Uheta^2) * penh - sum(etas^2) * pendiff - B <- 2 * as.numeric(t(Ugeta) %*% UgZ * peng - t(Uheta) %*% UhZ * penh - etasZs * pendiff) - C <- sum(UgZ^2) * peng - sum(UhZ^2) * penh - sum(Zs^2) * pendiff - } else { + # g indexes minimizer, h the comparison + Uheta <- t(Uh) %*% etah + Ugeta <- t(Ug) %*% etag + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + etaZh <- t(etah) %*% Zh + etaZg <- t(etag) %*% Zg + if (is.null(sigma)) { + # Check the signs, make it consistent + A <- penh * (sum(etah^2) - sum(Uheta^2)) - peng * (sum(etag)^2 - sum(Ugeta^2)) + B <- 2 * penh * (etaZh - t(Uheta) %*% UhZ) - 2 * peng * (etaZg - t(Ugeta) %*% UgZ) + C <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + } else { # Check this - A <- sum(Ugeta^2) - sum(Uheta^2) - B <- 2 * as.numeric(t(Ugeta) %*% UgZ - t(Uheta) %*% UhZ) - C <- sum(UgZ^2) - sum(UhZ^2) - sigma^2 * pendiff - } - return(list(A = A, B = B, C= C)) + A <- (sum(etah^2) - sum(Uheta^2)) - (sum(etag)^2 - sum(Ugeta^2)) + B <- 2 * (etaZh - t(Uheta) %*% UhZ) - 2 * (etaZg - t(Ugeta) %*% UgZ) + C <- (sum(Zh^2) - sum(UhZ^2) + penh) - (sum(Zg^2) - sum(UgZ^2) + peng) + } + return(list(A = A, B = B, C= C)) } quadratic_roots <- function(A, B, C, tol) { @@ -107,7 +109,7 @@ quadratic_roots <- function(A, B, C, tol) { if (B > 0) { return(Intervals(c(-Inf, max(0, -C/B)))) } else { - if (-C/B < 0) stop("Error: infeasible linear equation") + if (-C/B < 0) stop("Infeasible linear equation") return(Intervals(rbind(c(-Inf, 0), c(-C/B, Inf)))) } } else { From c2b4cf0853d96e0efe0374c0abb24e4a771f1387 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 7 Oct 2015 17:56:54 -0700 Subject: [PATCH 024/396] Fixed some critical typos, more testing of AICstop --- forLater/josh/funs.sims.R | 46 +++++++++++++++++++++++++++ forLater/josh/sim.aicstop.R | 45 ++++++++++++++++++++++++++ selectiveInference/R/funs.groupfs.R | 6 ++-- selectiveInference/R/funs.quadratic.R | 11 +++---- 4 files changed, 99 insertions(+), 9 deletions(-) create mode 100644 forLater/josh/funs.sims.R create mode 100644 forLater/josh/sim.aicstop.R diff --git a/forLater/josh/funs.sims.R b/forLater/josh/funs.sims.R new file mode 100644 index 00000000..7f36d451 --- /dev/null +++ b/forLater/josh/funs.sims.R @@ -0,0 +1,46 @@ +# Functions for simulation/testing + +randomGroupSizes <- function(G, lambda = 2) return(2 + rpois(G, lambda)) + +randomGroups <- function(G, lambda = 2) { + rles <- randomGroupSizes(G, lambda) + return(rep(1:G, rles)) +} + +randomIndexFixedP <- function(p, G) sort(c(sample(1:G), sample(1:G, size = p-G, replace=T))) + +randomFactorDesign <- function(n, G, lambda = 2) { + if (n < (1+lambda)*G) stop("Larger n required to avoid duplicate columns") + rles <- randomGroupSizes(G, lambda) + print(rles) + df <- data.frame(do.call(cbind, lapply(rles, function(g) { + sample(LETTERS[1:g], n, replace = TRUE, prob = runif(g)) + })), stringsAsFactors = TRUE) + if (any(apply(df, 2, function(col) length(unique(col))) == 1)) return(randomFactorDesign(n, G, lambda)) + fd <- factorDesign(df) + if (any(duplicated(fd$x, MARGIN = 2))) return(randomFactorDesign(n, G, lambda)) + return(list(df=df, fd=fd)) +} + +randomFactorsFixedP <- function(p, G) { +# index <- +} + +randomGaussianFixedP <- function(n, p, G = p, sparsity = 0, snr = 0, sigma = 1, rho = 0) { + index <- 1:p + if (G < p) index <- randomIndexFixedP(p, G) + x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + beta <- rep(0, p) + if (sparsity > 0 && snr > 0) { + for (j in 1:sparsity) { + inds <- which(index == j) + beta[inds] <- snr/sqrt(length(inds)) + } + } + y <- x %*% beta + sigma * rnorm(n) + return(list(x=x, y=y, beta = beta, index=index, sigma = sigma)) +} diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R new file mode 100644 index 00000000..33037d2c --- /dev/null +++ b/forLater/josh/sim.aicstop.R @@ -0,0 +1,45 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 500 +n <- 100 +p <- 100 +G <- 50 +maxsteps <- 15 +sparsity <- 5 +snr <- 1 +rho <- .1 +aicstop <- 1 + +instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { + + simd <- randomGaussianFixedP(n, p, G, sparsity, snr, rho) + x <- simd$x + y <- simd$y + index <- simd$index + + fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop) + pvals <- groupfsInf(fit, verbose=T) + return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, G, sparsity, snr, rho, maxsteps, aicstop)) +}) + +stopped <- do.call(c, list(output[3,])) +pvals <- do.call(c, list(output[2,])) +vars <- do.call(c, list(output[1,])) + +save(pvals, vars, stopped, file = paste0( + "results_aic", aicstop, "_n", n, + "_p", p, + "_sparsity", sparsity, + "_snr", snr, + ".RData")) + +print(time) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index b6b95b6e..ae5e9720 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -304,9 +304,9 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve TC <- sqrt(sum(R^2)) eta <- Ugtilde %*% R / TC Z <- obj$y - eta * TC - df <- ncol(Ugtilde) + dfi <- ncol(Ugtilde) TCs[j] <- TC - dfs[j] <- df + dfs[j] <- dfi intervallist <- truncationRegion(obj, TC, R, eta, Z) if (!is.null(obj$cvobj)) { @@ -369,7 +369,7 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve supports[[j]] <- E # E is now potentially a union of intervals - pvals[j] <- TC_surv(TC, sigma, df, E) + pvals[j] <- TC_surv(TC, sigma, dfi, E) } if (nanconv) warning("P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.") names(pvals) <- obj$action diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 53372be5..2a15ce24 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -18,12 +18,11 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { num.projs <- length(obj$projections[[s]]) if (num.projs == 0) { return(list(Intervals(c(-Inf,0)))) - + } else { lapply(1:num.projs, function(l) { - Uh <- obj$projections[[s]][[l]] - penh <- obj$aicpens[[s]][[l]] + penh <- obj$aicpens[[s]][[l]] # The quadratic form corresponding to # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 # we find the roots in t, if there are any @@ -48,14 +47,14 @@ quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh etaZg <- t(etag) %*% Zg if (is.null(sigma)) { # Check the signs, make it consistent - A <- penh * (sum(etah^2) - sum(Uheta^2)) - peng * (sum(etag)^2 - sum(Ugeta^2)) + A <- penh * (sum(etah^2) - sum(Uheta^2)) - peng * (sum(etag^2) - sum(Ugeta^2)) B <- 2 * penh * (etaZh - t(Uheta) %*% UhZ) - 2 * peng * (etaZg - t(Ugeta) %*% UgZ) C <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) } else { # Check this - A <- (sum(etah^2) - sum(Uheta^2)) - (sum(etag)^2 - sum(Ugeta^2)) + A <- (sum(etah^2) - sum(Uheta^2)) - (sum(etag^2) - sum(Ugeta^2)) B <- 2 * (etaZh - t(Uheta) %*% UhZ) - 2 * (etaZg - t(Ugeta) %*% UgZ) - C <- (sum(Zh^2) - sum(UhZ^2) + penh) - (sum(Zg^2) - sum(UgZ^2) + peng) + C <- (sum(Zh^2) - sum(UhZ^2) + penh) - (sum(Zg^2) - sum(UgZ^2) + peng) } return(list(A = A, B = B, C= C)) } From 0c4cfc0ae29ceb4637f0c55258f44f8a640cd319 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 7 Oct 2015 17:59:48 -0700 Subject: [PATCH 025/396] sourcing sim functions file --- forLater/josh/sim.aicstop.R | 1 + 1 file changed, 1 insertion(+) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 33037d2c..f2bfa66d 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -1,4 +1,5 @@ library(intervals) +source("funs.sims.R") source("selectiveInference/R/cv.R") source("../../selectiveInference/R/funs.groupfs.R") source("../../selectiveInference/R/funs.quadratic.R") From a730e91fbd69c0c3c84891170bee28b31bb19fb6 Mon Sep 17 00:00:00 2001 From: tibs Date: Sun, 11 Oct 2015 09:03:09 -0700 Subject: [PATCH 026/396] rob fixed lucas bug --- selectiveInference/R/funs.fixed.R | 4 ++++ selectiveInference/man/selectiveInference.Rd | 17 +++++++++++++++++ tests/test.fixed.R | 6 ++++++ 3 files changed, 27 insertions(+) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index f3d99ea4..d3760907 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -37,6 +37,10 @@ fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha= "(to within specified tolerances)")) vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } if (any(sign(g[vars]) != sign(beta[vars]))) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances). You might try rerunning", diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index e08f69ab..0b1ca4c0 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -139,6 +139,23 @@ arXiv:1405.3340. #mu = c(rep(signal, floor (n/5)), rep(0, n-floor(n/5))) # 20% of elements get the signal; rest 0 #y = mu + rnorm (n, 0, 1) #mmObj = manyMeans(y, bh.q=0.1) +# +# +# Forward stepwise with grouped variables +#set.seed(1) +#n <- 40 +#p <- 20 +#index <- sort(rep(1:(p/2), 2)) +#steps <- 10 +#sparsity <- 5 +#snr <- 3 +# x <- matrix(rnorm(n*p), nrow=n) +# beta <- rep(0, p) +# beta[which(index %in% 1:sparsity)] <- snr +# y <- x %*% beta+rnorm(n) +#fit <- groupfs(x, y, index=1:p, maxsteps = steps) +#out<- groupfsInf(fit) + ##estimation of sigma for use in fsInf or larInf or fixedLassoInf diff --git a/tests/test.fixed.R b/tests/test.fixed.R index e214fe97..624fb65d 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -281,3 +281,9 @@ gfit = glmnet(X,y,standardize=F) coef = coef(gfit, s=lam/n, exact=T)[-1] sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") # Error in v %*% diag(d) : non-conformable arguments + +## lucas again + +load("params_for_Rob.rdata") #variables: X, y, coef, lam, sigma, alpha + +sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") From 9c19d7bbc7dd9e4cf34b71cb79a6690576a8c50b Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 13:06:55 -0700 Subject: [PATCH 027/396] groupfs computes and stores coefficients --- selectiveInference/R/funs.groupfs.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index ae5e9720..447afe9b 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -154,8 +154,13 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE aic.last <- added$AIC } + # Is there a better way of doing this? + # Use some projections already computed? + beta <- coef(lm(y.begin ~ x.begin[,index %in% path.info$imax]-1)) + names(beta) <- index[index %in% path.info$imax] + # Create output object - value <- list(action=path.info$imax, L=path.info$L, AIC=path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, bx = xm, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) + value <- list(action=path.info$imax, L=path.info$L, AIC=path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, coefficients = beta, bx = xm, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) class(value) <- "groupfs" attr(value, "labels") <- labels @@ -301,6 +306,7 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve # Project y onto what remains of x_i Ugtilde <- svdu_thresh(x_i) R <- t(Ugtilde) %*% obj$y + print(R) TC <- sqrt(sum(R^2)) eta <- Ugtilde %*% R / TC Z <- obj$y - eta * TC From 5e1bbee3da78a2cfdcb0580c506dbb8cbfe0776a Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 14:56:22 -0700 Subject: [PATCH 028/396] Added coef and predict functions for groupfs --- selectiveInference/R/funs.groupfs.R | 34 ++++++++++++++++++++--- selectiveInference/man/factorDesign.Rd | 3 +- selectiveInference/man/groupfs.Rd | 11 ++++---- selectiveInference/man/groupfsInf.Rd | 12 +++++--- selectiveInference/man/predict.groupfs.Rd | 23 +++++++++++++++ selectiveInference/man/scaleGroups.Rd | 3 +- 6 files changed, 68 insertions(+), 18 deletions(-) create mode 100644 selectiveInference/man/predict.groupfs.Rd diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 447afe9b..4e19bcb8 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -158,13 +158,14 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE # Use some projections already computed? beta <- coef(lm(y.begin ~ x.begin[,index %in% path.info$imax]-1)) names(beta) <- index[index %in% path.info$imax] - + # Create output object - value <- list(action=path.info$imax, L=path.info$L, AIC=path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, coefficients = beta, bx = xm, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) + value <- list(action = path.info$imax, L = path.info$L, AIC = path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, coefficients = beta, bx = xm, by = by, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) class(value) <- "groupfs" + attr(value, "center") <- center + attr(value, "normalize") <- normalize attr(value, "labels") <- labels - attr(value, "index") <- index attr(value, "maxsteps") <- maxsteps attr(value, "sigma") <- sigma attr(value, "k") <- k @@ -306,7 +307,6 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve # Project y onto what remains of x_i Ugtilde <- svdu_thresh(x_i) R <- t(Ugtilde) %*% obj$y - print(R) TC <- sqrt(sum(R^2)) eta <- Ugtilde %*% R / TC Z <- obj$y - eta * TC @@ -562,6 +562,32 @@ print.groupfs <- function(x, ...) { invisible() } + +coef.groupfs <- function(object, ...) { + return(object$coefficients) +} + +#' @name predict.groupfs +#' @aliases predict.groupfs +#' @aliases coef.groupfs +#' +#' @title Prediction and coefficient functions for \code{\link{groupfs}}. +#' +#' Make predictions or extract coefficients from a groupfs forward stepwise object. +#' +#' @param object Object returned by a call to \code{\link{groupfs}}. +#' @param newx Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used. +#' @return A vector of predictions or a vector of coefficients. +predict.groupfs <- function(object, newx, ...) { + beta <- coef.groupfs(object) + if (missing(newx)) { + newx = object$x + } else { + newx <- scaleGroups(newx, object$index, attr(object, "center"), attr(object, "normalize")) + } + return(newx[, index %in% object$action] %*% beta + ifelse(object$intercept, object$by, 0)) +} + print.groupfsInf <- function(x, ...) { cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) action <- x$vars diff --git a/selectiveInference/man/factorDesign.Rd b/selectiveInference/man/factorDesign.Rd index d5d2576d..3a42e2ea 100644 --- a/selectiveInference/man/factorDesign.Rd +++ b/selectiveInference/man/factorDesign.Rd @@ -1,5 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{factorDesign} \alias{factorDesign} \title{Expand a data frame with factors to form a design matrix with the full binary encoding of each factor.} diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index c279c9ec..78cd32f2 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -1,11 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{groupfs} \alias{groupfs} \title{Select a model with forward stepwise.} \usage{ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, - center = TRUE, normalize = TRUE, aicstop = FALSE, verbose = FALSE) + center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) } \arguments{ \item{x}{Matrix of predictors (n by p).} @@ -26,7 +25,7 @@ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, \item{normalize}{Should the design matrix be normalized? Default is TRUE.} -\item{aicstop}{Early stopping if AIC increases, default is FALSE.} +\item{aicstop}{Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}.} \item{verbose}{Print out progress along the way? Default is FALSE.} } @@ -39,11 +38,11 @@ This function implements forward selection of linear models almost identically t \examples{ x = matrix(rnorm(20*40), nrow=20) index = sort(rep(1:20, 2)) -y = rnorm(20) + 2 * (x[,1] - x[,2]) - (x[,3] - x[,4]) +y = rnorm(20) + 2 * x[,1] - x[,4] fit = groupfs(x, y, index, maxsteps = 5) pvals = groupfsInf(fit) } \seealso{ -\code{\link{groupfsInf}} +\code{\link{groupfsInf}}, \code{\link{factorDesign}}. } diff --git a/selectiveInference/man/groupfsInf.Rd b/selectiveInference/man/groupfsInf.Rd index f5ee9700..7c7190fa 100644 --- a/selectiveInference/man/groupfsInf.Rd +++ b/selectiveInference/man/groupfsInf.Rd @@ -1,20 +1,24 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{groupfsInf} \alias{groupfsInf} \title{Compute selective p-values for a model fitted by \code{groupfs}.} \usage{ -groupfsInf(obj, sigma = NULL, verbose = FALSE) +groupfsInf(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, + verbose = FALSE) } \arguments{ \item{obj}{Object returned by \code{\link{groupfs}} function} \item{sigma}{Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate.} +\item{type}{Type of conditional p-values to compute. With "all" (default), p-values are computed conditional on the final model with all variables up to \code{maxsteps}; with "aic" the number of steps is chosen after which the AIC criterion increases \code{ntimes} in a row, and then the same type of analysis as in "all" is carried out for the active variables at that number of steps.} + +\item{ntimes}{Number of steps for which AIC criterion has to increase before minimizing point is declared.} + \item{verbose}{Print out progress along the way? Default is FALSE.} } \value{ -An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. The default printing behavior should supply adequate information. +An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. \describe{ \item{vars}{Labels of the active groups in the order they were included.} diff --git a/selectiveInference/man/predict.groupfs.Rd b/selectiveInference/man/predict.groupfs.Rd new file mode 100644 index 00000000..4a4c1497 --- /dev/null +++ b/selectiveInference/man/predict.groupfs.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.0.1.99): do not edit by hand +\name{predict.groupfs} +\alias{predict.groupfs} +\title{Prediction and coefficient functions for \code{\link{groupfs}}. + +Make predictions or extract coefficients from a groupfs forward stepwise object.} +\usage{ +\method{predict}{groupfs}(object, newx, ...) +} +\arguments{ +\item{object}{Object returned by a call to \code{\link{groupfs}}.} + +\item{newx}{Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used.} +} +\value{ +A vector of predictions or a vector of coefficients. +} +\description{ +Prediction and coefficient functions for \code{\link{groupfs}}. + +Make predictions or extract coefficients from a groupfs forward stepwise object. +} + diff --git a/selectiveInference/man/scaleGroups.Rd b/selectiveInference/man/scaleGroups.Rd index e5a93fab..1e7e1c4a 100644 --- a/selectiveInference/man/scaleGroups.Rd +++ b/selectiveInference/man/scaleGroups.Rd @@ -1,5 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/funs.groupfs.R +% Generated by roxygen2 (4.0.1.99): do not edit by hand \name{scaleGroups} \alias{scaleGroups} \title{Center and scale design matrix by groups} From ee616483eb0fe8ff232eb6bf794bf8c1d8674219 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 18:07:44 -0700 Subject: [PATCH 029/396] Fixing an aicstop bug --- selectiveInference/R/funs.groupfs.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 4e19bcb8..8dbef781 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -127,9 +127,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE if (verbose) print(round(step.info, 3)) - if (aicstop > 0 && step >= aicstop && aic.last < added$AIC) { - ########## Incomplete ########## - ## cut off the last aicstop variables as well? + if (aicstop > 0 && step < maxstep && step >= aicstop && aic.last < added$AIC) { if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { if (is.null(sigma)) { From 0dbf85a32babc0244b8812e847448baffeee717f Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 18:08:20 -0700 Subject: [PATCH 030/396] Typo --- selectiveInference/R/funs.groupfs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 8dbef781..40978085 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -127,7 +127,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE if (verbose) print(round(step.info, 3)) - if (aicstop > 0 && step < maxstep && step >= aicstop && aic.last < added$AIC) { + if (aicstop > 0 && step < maxsteps && step >= aicstop && aic.last < added$AIC) { if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { if (is.null(sigma)) { From d6f5d87fad907e34bb2100fe7e01c195662db1eb Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 21:12:59 -0700 Subject: [PATCH 031/396] Created .gitignore --- .gitignore | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..863d8434 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +**DS_Store** +**Rcheck** +**tar.gz +**Rapp.history +**.pdf +**.RData +**.o +**.so +forLater/josh/** \ No newline at end of file From fa8636291c7f06473789e5afa5d1e6b05cb8989a Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 21:30:51 -0700 Subject: [PATCH 032/396] Changed SNR in simulation function --- forLater/josh/funs.sims.R | 2 +- forLater/josh/sim.aicstop.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/forLater/josh/funs.sims.R b/forLater/josh/funs.sims.R index 7f36d451..e51dd2fc 100644 --- a/forLater/josh/funs.sims.R +++ b/forLater/josh/funs.sims.R @@ -38,7 +38,7 @@ randomGaussianFixedP <- function(n, p, G = p, sparsity = 0, snr = 0, sigma = 1, if (sparsity > 0 && snr > 0) { for (j in 1:sparsity) { inds <- which(index == j) - beta[inds] <- snr/sqrt(length(inds)) + beta[inds] <- snr/sqrt(n*length(inds)) } } y <- x %*% beta + sigma * rnorm(n) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index f2bfa66d..014d18d0 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -6,7 +6,7 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 500 +niters <- 200 n <- 100 p <- 100 G <- 50 From 7ebb7e021da06a415061c5c0d62317364255ac5c Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 21:37:54 -0700 Subject: [PATCH 033/396] Modifying simulations --- forLater/josh/funs.sims.R | 2 +- forLater/josh/sim.aicstop.R | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/forLater/josh/funs.sims.R b/forLater/josh/funs.sims.R index e51dd2fc..3fdd7dd6 100644 --- a/forLater/josh/funs.sims.R +++ b/forLater/josh/funs.sims.R @@ -38,7 +38,7 @@ randomGaussianFixedP <- function(n, p, G = p, sparsity = 0, snr = 0, sigma = 1, if (sparsity > 0 && snr > 0) { for (j in 1:sparsity) { inds <- which(index == j) - beta[inds] <- snr/sqrt(n*length(inds)) + beta[inds] <- sample(c(-1,1), length(inds), replace=T) * snr/sqrt(n*length(inds)) } } y <- x %*% beta + sigma * rnorm(n) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 014d18d0..72485732 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -6,10 +6,10 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 200 +niters <- 100 n <- 100 -p <- 100 -G <- 50 +p <- 200 +G <- 100 maxsteps <- 15 sparsity <- 5 snr <- 1 @@ -24,7 +24,7 @@ instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { index <- simd$index fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop) - pvals <- groupfsInf(fit, verbose=T) + pvals <- groupfsInf(fit, sigma = 1, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) } @@ -41,6 +41,6 @@ save(pvals, vars, stopped, file = paste0( "_p", p, "_sparsity", sparsity, "_snr", snr, - ".RData")) + "_known.RData")) print(time) From 844235e56fa30e130593e59caefc0ceca2528577 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 12 Oct 2015 21:49:10 -0700 Subject: [PATCH 034/396] More changing simulations --- forLater/josh/funs.sims.R | 2 +- forLater/josh/sim.aicstop.R | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/forLater/josh/funs.sims.R b/forLater/josh/funs.sims.R index 3fdd7dd6..f07fa551 100644 --- a/forLater/josh/funs.sims.R +++ b/forLater/josh/funs.sims.R @@ -38,7 +38,7 @@ randomGaussianFixedP <- function(n, p, G = p, sparsity = 0, snr = 0, sigma = 1, if (sparsity > 0 && snr > 0) { for (j in 1:sparsity) { inds <- which(index == j) - beta[inds] <- sample(c(-1,1), length(inds), replace=T) * snr/sqrt(n*length(inds)) + beta[inds] <- sqrt(2*log(G)) * sample(c(-1,1), length(inds), replace=T) * snr/sqrt(n*length(inds)) } } y <- x %*% beta + sigma * rnorm(n) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 72485732..466f9b4f 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -8,17 +8,18 @@ source("../../selectiveInference/R/funs.common.R") set.seed(1) niters <- 100 n <- 100 -p <- 200 -G <- 100 +p <- 100 +G <- 50 maxsteps <- 15 sparsity <- 5 snr <- 1 +sigma <- 1 rho <- .1 aicstop <- 1 instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { - simd <- randomGaussianFixedP(n, p, G, sparsity, snr, rho) + simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma, rho) x <- simd$x y <- simd$y index <- simd$index From 978d53ee98e7109bafbb673079f32549ee18c783 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 4 Nov 2015 22:04:24 -0800 Subject: [PATCH 035/396] Started including F test code for groupfsInf --- selectiveInference/R/funs.groupfs.R | 174 ++++++++++++++++++-------- selectiveInference/R/funs.quadratic.R | 91 ++++++++++---- 2 files changed, 183 insertions(+), 82 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 40978085..148e35a7 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -239,8 +239,6 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' #' @param obj Object returned by \code{\link{groupfs}} function #' @param sigma Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate. -#' @param type Type of conditional p-values to compute. With "all" (default), p-values are computed conditional on the final model with all variables up to \code{maxsteps}; with "aic" the number of steps is chosen after which the AIC criterion increases \code{ntimes} in a row, and then the same type of analysis as in "all" is carried out for the active variables at that number of steps. -#' @param ntimes Number of steps for which AIC criterion has to increase before minimizing point is declared. #' @param verbose Print out progress along the way? Default is FALSE. #' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. #' @@ -252,13 +250,14 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' \item{df}{Rank of group of variables when it was added to the model.} #' \item{support}{List of intervals defining the truncation region of the truncated chi.} #' } -groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, verbose = FALSE) { +groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (!is.null(obj$cvobj) && attr(obj, "stopped")) { - stop("Cross-validation and early stopping cannot be used simultaneously") + stop("Cross-validation and early stopping cannot be used simultaneously.") + # This shouldn't happen in the first place! + # (it wouldn't anyway unless someone tries to trick it) } - type <- match.arg(type) n <- nrow(obj$x) p <- ncol(obj$x) maxsteps <- attr(obj, "maxsteps") @@ -268,8 +267,7 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve y <- obj$y Ep <- sum(index %in% obj$action) - nanconv <- FALSE - pvals = dfs = TCs = numeric(maxsteps) + pvals = dfs = Tstats = numeric(maxsteps) supports <- list() if (!is.null(sigma)) { @@ -278,6 +276,7 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve } } else { if (is.null(obj$sigma)) { + type <- "TF" if (n >= 2*p) { sigma <- sqrt(sum(lsfit(obj$x, obj$y, intercept = obj$intercept)$res^2)/(n-p-obj$intercept)) } else { @@ -285,6 +284,7 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve warning(paste(sprintf("p > n/2, and sigmahat = %0.3f used as an estimate of sigma;",sigma), "you may want to use the estimateSigma function")) } } else { + type <- "TC" sigma <- obj$sigma } } @@ -293,47 +293,92 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve for (j in 1:maxsteps) { i <- obj$action[j] if (verbose) cat(paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i, "\n")) - # Form projection onto active set minus i - # and project x_i orthogonally - x_i <- obj$x[,which(obj$index == i), drop = FALSE] - if (length(obj$action) > 1) { - minus_i <- setdiff(obj$action, i) - x_minus_i <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) - x_i <- x_i - x_minus_i %*% t(x_minus_i) %*% x_i + + if (!is.null(obj$sigma)) { + # Form projection onto active set minus i + # and project x_i orthogonally + x_i <- obj$x[,which(obj$index == i), drop = FALSE] + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + x_minus_i <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + x_i <- x_i - x_minus_i %*% t(x_minus_i) %*% x_i + } + + # Project y onto what remains of x_i + Ugtilde <- svdu_thresh(x_i) + R <- t(Ugtilde) %*% obj$y + TC <- sqrt(sum(R^2)) + eta <- Ugtilde %*% R / TC + Z <- obj$y - eta * TC + dfi <- ncol(Ugtilde) + Tstats[j] <- TC + dfs[j] <- dfi + + ydecomp <- list(R=R, Z=Z, eta=eta) + + } else { + + Pfull <- svdu_thresh(obj$x[,which(obj$index %in% obj$action), drop = FALSE]) + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + Psub <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + Z <- Psub %*% y + C <- (ncol(Pfull) - ncol(Psub))/(n - ncol(Pfull)) + } else { + Z <- 0 + C <- ncol(Pfull)/(n-ncol(Pfull)) + } + R1 <- y - z + R2 <- y - Pfull %*% y + R <- sqrt(sum(R1^2)) + U <- R1/r + delta <- R1-R2 + Vdelta <- delta/sqrt(sum(delta^2)) + V2 <- R2/sqrt(sum(R2^2)) + ydecomp <- list(R=R, Z=Z, Vd=Vdelta, V2=V2) + } + + intervallist <- truncationRegion(obj, ydecomp, type) - # Project y onto what remains of x_i - Ugtilde <- svdu_thresh(x_i) - R <- t(Ugtilde) %*% obj$y - TC <- sqrt(sum(R^2)) - eta <- Ugtilde %*% R / TC - Z <- obj$y - eta * TC - dfi <- ncol(Ugtilde) - TCs[j] <- TC - dfs[j] <- dfi - - intervallist <- truncationRegion(obj, TC, R, eta, Z) + # Additional constraints from cross-validation? if (!is.null(obj$cvobj)) { intervallist <- c(intervallist, do.call(c, - lapply(obj$cvobj, function(cvf) { - truncationRegion(cvf, TC, R[-cvf$fold], eta[-cvf$fold], Z[-cvf$fold]) - }))) + lapply(obj$cvobj, function(cvf) { + if (type == "TC") { + ydecomp <- list(R=R[-cvf$fold], eta=eta[-cvf$fold], Z=Z[-cvf$fold]) + } else { + #################################### + # THINK ABOUT THIS AGAIN + # when you're not falling asleep + #################################### + ydecomp <- list(R=R[-cvf$fold], Z=Z[-cvf$fold], Vd=Vdelta[-cvf$fold], V2=V2[-cvf$fold]) + } + truncationRegion(cvf, ydecomp, type) + }))) intervallist <- c(intervallist, lapply(obj$cvquad, function(cvquad) { - etacvquad <- t(eta) %*% cvquad - A <- etacvquad %*% eta - B <- 2 * etacvquad %*% Z - C <- t(Z) %*% cvquad %*% Z - quadratic_roots(A, B, C, tol = 1e-15) + if (type == "TC") { + etacvquad <- t(eta) %*% cvquad + A <- etacvquad %*% eta + B <- 2 * etacvquad %*% Z + C <- t(Z) %*% cvquad %*% Z + quadratic_roots(A, B, C, tol = 1e-15) + } else { + #################################### + # DO THIS NEXT + #################################### + } })) } + + # Additional constraints from AIC stopping? if (attr(obj, "stopped")) { aicstop <- attr(obj, "aicstop") - + ulist <- etalist <- zlist <- penlist <- vector("list", aicstop+1) for (s in seq(aicstop+1)) { stepind <- maxsteps - (aicstop+1) + s -# print(c(s, stepind)) if (stepind > 1) { etalist[[s]] <- obj$cumprojs[[stepind-1]] %*% eta zlist[[s]] <- obj$cumprojs[[stepind-1]] %*% Z @@ -348,22 +393,27 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve intervallist <- c(intervallist, do.call(c, lapply(1:aicstop, function(s) { lapply((s+1):(aicstop+1), function(sp) { - Ug <- ulist[[s]] - Uh <- ulist[[sp]] - peng <- penlist[[s]] - penh <- prod(unlist(penlist[s:sp])) - etag <- etalist[[s]] - etah <- etalist[[sp]] - Zg <- zlist[[s]] - Zh <- zlist[[sp]] - - coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) - quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + if (type == "TC") { + Ug <- ulist[[s]] + Uh <- ulist[[sp]] + peng <- penlist[[s]] + penh <- prod(unlist(penlist[s:sp])) + etag <- etalist[[s]] + etah <- etalist[[sp]] + Zg <- zlist[[s]] + Zh <- zlist[[sp]] + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + } else { +################################### + # DO THIS TOO +################################### + } }) }))) } - # Compute intersection: + # Compute intersection: region <- do.call(interval_union, intervallist) region <- interval_union(region, Intervals(c(-Inf,0))) E <- interval_complement(region, check_valid = FALSE) @@ -371,13 +421,31 @@ groupfsInf <- function(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, ve stop(paste("Empty TC support at step", j)) } supports[[j]] <- E - + # E is now potentially a union of intervals - pvals[j] <- TC_surv(TC, sigma, dfi, E) + if (type == "TC") { + pvals[j] <- TC_surv(TC, sigma, dfi, E) + } else { + pvals[j] <- TF_surv(TF, df1, df2, E) + } + } - if (nanconv) warning("P-value NaNs of the form 0/0 converted to 0. This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.") + + if (any(is.nan(pvals))) { + nanp <- which(is.nan(pvals)) + pvals[nanp] <- 0 + warning(paste0("P-value NaNs of the form 0/0 converted to 0 for group(s) ", paste(obj$action[nanp], collapse=","), ". This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.")) + } + names(pvals) <- obj$action - out <- list(vars = obj$action, pv=pvals, sigma=sigma, TC=TCs, df = dfs, support=supports) + out <- list(vars = obj$action, pv=pvals, sigma=sigma) + if (type == "TC") { + out$TC <- Tstats + } else { + out$TF <- Tstats + } + out$df <- dfs + out$support <- supports class(out) <- "groupfsInf" if (!is.null(attr(obj, "varnames"))) { attr(out, "varnames") <- attr(obj, "varnames") @@ -419,10 +487,6 @@ TC_surv <- function(TC, sigma, df, E) { # Survival function value <- numer/denom - if (is.nan(value)) { - value <- 0 - nanconv <- TRUE - } # Force p-value to lie in the [0,1] interval # in case of numerical issues value <- max(0, min(1, value)) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 2a15ce24..335bb789 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -1,8 +1,15 @@ -truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { +truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { n <- nrow(obj$x) - + R <- ydecomp$R + Z <- ydecomp$Z + if (type == "TC") { + eta <- ydecomp$eta + } else { + Vd <- ydecomp$Vd + V2 <- ydecomp$V2 + } L <- lapply(1:length(obj$action), function(s) { Ug <- obj$maxprojs[[s]] @@ -27,8 +34,13 @@ truncationRegion <- function(obj, TC, R, eta, Z, tol = 1e-15) { # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 # we find the roots in t, if there are any # and return the interval of potential t - coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) - quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) + if (type == "TC") { + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) + } else { + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + TF_roots(R, C, coeffs) + } }) } # LL is a list of intervals @@ -119,32 +131,44 @@ quadratic_roots <- function(A, B, C, tol) { } } -roots_to_checkpoints <- function(roots) { - checkpoints <- unique(sort(c(0, roots))) - return(c(0, (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) -} -roots_to_partition <- function(roots) { - checkpoints <- unique(sort(c(0, roots))) - return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) -} +# Efficiently compute coefficients of one-dimensional TF slice function +TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { -# Tchi_roots <- function(Q, a, b, ) + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + UhVd <- t(Uh) %*% Vdh + UgVd <- t(Ug) %*% Vdg + UhV2 <- t(Uh) %*% V2h + UgV2 <- t(Ug) %*% V2g + VdZh <- t(Vdh) %*% Zh + VdZg <- t(Vdg) %*% Zg + V2Zh <- t(V2h) %*% Zh + V2Zg <- t(V2g) %*% Zg + + x0 <- peng * (sum(Zg^2) - sum(UgZ^2)) - penh * (sum(Zh^2) - sum(UhZ^2)) + x1 <- 2*R*(peng * (VdZg - t(UgZ) %*% UgVd) - penh * (VdZh - t(UhZ) %*% UhVd)) + x2 <- 2*R*(peng * (V2Zg - t(UgZ) %*% UgV2) - penh * (V2Zh - t(UhZ) %*% UhV2)) + x12 <- 2*R*(peng * (t(Vdg) %*% V2g - t(UgVd) %*% UgV2) - penh * (t(Vdh) %*% V2h - t(UhVd) %*% UhV2)) + x11 <- R^2*(peng * (sum(Vdg^2) - sum(UgVd^2)) - penh*(sum(Vdh^2) - sum(UhVd^2))) + x22 <- R^2*(peng * (sum(V2g^2) - sum(UgV2^2)) - penh*(sum(V2h^2) - sum(UhV2^2))) + + return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) +} -TF_roots <- function(Q, a, b, Vdelta, V2, z, C, r, tol = 1e-14) { +# Numerically solve for roots of TF slice using +# hybrid polyroot/uniroot approach +TF_roots <- function(R, C, coeffs, tol = 1e-14) { - # z = y - R1 - VdeltaQ <- t(Vdelta) %*% Q - V2Q <- t(V2) %*% Q - x11 <- VdeltaQ %*% Vdelta - x12 <- 2 * VdeltaQ %*% V2 - x22 <- V2Q %*% V2 - x1 <- 2 * VdeltaQ %*% z + t(a) %*% Vdelta - x2 <- 2 * V2Q %*% z + t(a) %*% V2 - x0 <- t(z) %*% Q %*% z + t(a) %*% z + b + x11 <- coeffs$x11 + x22 <- coeffs$x22 + x12 <- coeffs$x12 + x1 <- coeffs$x1 + x2 <- coeffs$x2 + x0 <- coeffs$x0 - g1 <- function(t) r*sqrt(C*t/(1+C*t)) - g2 <- function(t) r/sqrt(1+C*t) + g1 <- function(t) R*sqrt(C*t/(1+C*t)) + g2 <- function(t) R/sqrt(1+C*t) I <- function(t) x11*g1(t)^2 + x12*g1(t)*g2(t) + x22*g2(t)^2 + x1*g1(t) + x2*g2(t) + x0 z4 <- r*complex(real = -x11 + x22, imaginary = -x12)/2 @@ -152,7 +176,7 @@ TF_roots <- function(Q, a, b, Vdelta, V2, z, C, r, tol = 1e-14) { z2 <- complex(real = r*x11+r*x22+2*x0/r) z1 <- Conj(z3) z0 <- Conj(z4) - zcoefs <- r*c(z0, z1, z2, z3, z4)/2 + zcoefs <- R*c(z0, z1, z2, z3, z4)/2 croots <- polyroot(zcoefs) thetas <- Arg(croots) modinds <- Mod(croots) <= 1 + tol & Mod(croots) >= 1 - tol @@ -176,7 +200,10 @@ TF_roots <- function(Q, a, b, Vdelta, V2, z, C, r, tol = 1e-14) { })) partition <- roots_to_partition(roots) positive <- which(I(partition$midpoints) > 0) - +######################################## + # Store the complement! + # interval_intersect uses complement anyway +######################################## intervals <- matrix(NA, ncol=2) for (i in 1:length(positive)) { ind <- positive[i] @@ -192,3 +219,13 @@ TF_roots <- function(Q, a, b, Vdelta, V2, z, C, r, tol = 1e-14) { return(list(intervals = Intervals(c(0,Inf)), I=I)) } + +# Helper functions for TF roots +roots_to_checkpoints <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(c(0, (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) +} +roots_to_partition <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) +} From 6b21cadbfabdf1e7afb47be492ca27b104ade4f0 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 5 Nov 2015 18:24:53 -0800 Subject: [PATCH 036/396] Almost finished porting F code into groupfsInf --- selectiveInference/R/funs.groupfs.R | 104 ++++++++++++++++++-------- selectiveInference/R/funs.quadratic.R | 35 ++++----- 2 files changed, 92 insertions(+), 47 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 148e35a7..80e15298 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -267,7 +267,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { y <- obj$y Ep <- sum(index %in% obj$action) - pvals = dfs = Tstats = numeric(maxsteps) + pvals = dfs = dfs2 = Tstats = numeric(maxsteps) supports <- list() if (!is.null(sigma)) { @@ -322,23 +322,34 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (length(obj$action) > 1) { minus_i <- setdiff(obj$action, i) Psub <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) - Z <- Psub %*% y - C <- (ncol(Pfull) - ncol(Psub))/(n - ncol(Pfull)) + Z <- Psub %*% t(Psub) %*% y + df2 <- ncol(Pfull) - ncol(Psub) + C <- df2/(n - ncol(Pfull)) + } else { Z <- 0 - C <- ncol(Pfull)/(n-ncol(Pfull)) + df2 <- ncol(Pfull) + C <- df2/(n-ncol(Pfull)) } - R1 <- y - z - R2 <- y - Pfull %*% y + R1 <- y - Z + R2 <- y - Pfull %*% t(Pfull) %*% y R <- sqrt(sum(R1^2)) - U <- R1/r + R2sq <- sum(R2^2) + U <- R1/R + + # cv-folds redefine Vdelta/V2? + delta <- R1-R2 Vdelta <- delta/sqrt(sum(delta^2)) V2 <- R2/sqrt(sum(R2^2)) - ydecomp <- list(R=R, Z=Z, Vd=Vdelta, V2=V2) - + TF <- (R^2-R2sq)/(C*R2sq) + Tstats[j] <- TF + dfs[j] <- ncol(Psub) + + ydecomp <- list(R=R, Z=Z, Vd=Vdelta, V2=V2, C=C) + } - + intervallist <- truncationRegion(obj, ydecomp, type) # Additional constraints from cross-validation? @@ -348,11 +359,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (type == "TC") { ydecomp <- list(R=R[-cvf$fold], eta=eta[-cvf$fold], Z=Z[-cvf$fold]) } else { - #################################### - # THINK ABOUT THIS AGAIN - # when you're not falling asleep - #################################### - ydecomp <- list(R=R[-cvf$fold], Z=Z[-cvf$fold], Vd=Vdelta[-cvf$fold], V2=V2[-cvf$fold]) + ydecomp <- list(R=R, Z=Z[-cvf$fold], Vd=Vdelta[-cvf$fold], V2=V2[-cvf$fold], C=C) # C correct? } truncationRegion(cvf, ydecomp, type) }))) @@ -365,25 +372,52 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { C <- t(Z) %*% cvquad %*% Z quadratic_roots(A, B, C, tol = 1e-15) } else { - #################################### - # DO THIS NEXT - #################################### + + zcvquad <- t(Z) %*% cvquad + vdcvquad <- t(Vdelta) %*% cvquad + v2cvquad <- t(V2) %*% cvquad + # (r*(vd*g1 + v2*g2) + z)^T cvquad (r*(vd*g1 + v2*g2) + z) + x0 <- zcvquad %*% Z + x1 <- 2*R*zcvquad %*% Vd + x2 <- 2*R*zcvquad %*% V2 + x12 <- 2*R*vdcvquad %*% V2 + x11 <- R^2*vdcvquad %*% Vd + x22 <- R^2*v2cvquad %*% V2 + TF_roots(R, C, coeffs = list(x0=x0, x1=x1, x2=x2, x12=x12, x11=x11, x22=x22)) } })) } # Additional constraints from AIC stopping? if (attr(obj, "stopped")) { + aicstop <- attr(obj, "aicstop") - - ulist <- etalist <- zlist <- penlist <- vector("list", aicstop+1) + ulist <- penlist <- zlist <- vector("list", aicstop+1) + + if (type == "TC") { + etalist <- vector("list", aicstop+1) + } else { + vdlist <- v2list <- vector("list", aicstop+1) + } + for (s in seq(aicstop+1)) { stepind <- maxsteps - (aicstop+1) + s if (stepind > 1) { - etalist[[s]] <- obj$cumprojs[[stepind-1]] %*% eta - zlist[[s]] <- obj$cumprojs[[stepind-1]] %*% Z + cproj <- obj$cumprojs[[stepind-1]] + if (type == "TC") { + etalist[[s]] <- cproj %*% eta + } else { + v2list[[s]] <- cproj %*% V2 + vdlist[[s]] <- cproj %*% Vdelta + } + zlist[[s]] <- cproj %*% Z } else { - etalist[[s]] <- eta + if (type == "TC") { + etalist[[s]] <- eta + } else { + v2list[[s]] <- V2 + vdlist[[s]] <- Vdelta + } zlist[[s]] <- Z } ulist[[s]] <- obj$maxprojs[[stepind]] @@ -405,9 +439,19 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) } else { -################################### - # DO THIS TOO -################################### + + Ug <- ulist[[s]] + Uh <- ulist[[sp]] + peng <- penlist[[s]] + penh <- prod(unlist(penlist[s:sp])) + Vdg <- vdlist[[s]] + Vdh <- vdlist[[sp]] + V2g <- vdlist[[s]] + V2h <- vdlist[[sp]] + Zg <- zlist[[s]] + Zh <- zlist[[sp]] + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + TF_roots(R, C, coeffs) } }) }))) @@ -418,17 +462,17 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { region <- interval_union(region, Intervals(c(-Inf,0))) E <- interval_complement(region, check_valid = FALSE) if (length(E) == 0) { - stop(paste("Empty TC support at step", j)) + stop(paste("Empty support at step", j)) } supports[[j]] <- E - + # E is now potentially a union of intervals if (type == "TC") { pvals[j] <- TC_surv(TC, sigma, dfi, E) } else { pvals[j] <- TF_surv(TF, df1, df2, E) } - + } if (any(is.nan(pvals))) { @@ -436,7 +480,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { pvals[nanp] <- 0 warning(paste0("P-value NaNs of the form 0/0 converted to 0 for group(s) ", paste(obj$action[nanp], collapse=","), ". This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.")) } - + names(pvals) <- obj$action out <- list(vars = obj$action, pv=pvals, sigma=sigma) if (type == "TC") { diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 335bb789..59b2b7ea 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -3,23 +3,26 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { n <- nrow(obj$x) R <- ydecomp$R - Z <- ydecomp$Z + Z = Zs = ydecomp$Z if (type == "TC") { - eta <- ydecomp$eta + eta = etas = ydecomp$eta } else { - Vd <- ydecomp$Vd - V2 <- ydecomp$V2 + Vd = Vds = ydecomp$Vd + V2 = V2s = ydecomp$V2 + C = ydecomp$C } L <- lapply(1:length(obj$action), function(s) { Ug <- obj$maxprojs[[s]] peng <- obj$maxpens[[s]] if (s > 1) { - etas <- obj$cumprojs[[s-1]] %*% eta - Zs <- obj$cumprojs[[s-1]] %*% Z - } else { - etas <- eta - Zs <- Z + if (type == "TC") { + etas <- obj$cumprojs[[s-1]] %*% eta + Zs <- obj$cumprojs[[s-1]] %*% Z + } else { + Vds <- obj$cumprojs[[s-1]] %*% Vd + V2s <- obj$cumprojs[[s-1]] %*% V2 + } } num.projs <- length(obj$projections[[s]]) @@ -38,7 +41,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) } else { - coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) TF_roots(R, C, coeffs) } }) @@ -58,17 +61,15 @@ quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh etaZh <- t(etah) %*% Zh etaZg <- t(etag) %*% Zg if (is.null(sigma)) { - # Check the signs, make it consistent A <- penh * (sum(etah^2) - sum(Uheta^2)) - peng * (sum(etag^2) - sum(Ugeta^2)) B <- 2 * penh * (etaZh - t(Uheta) %*% UhZ) - 2 * peng * (etaZg - t(Ugeta) %*% UgZ) C <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) } else { - # Check this A <- (sum(etah^2) - sum(Uheta^2)) - (sum(etag^2) - sum(Ugeta^2)) B <- 2 * (etaZh - t(Uheta) %*% UhZ) - 2 * (etaZg - t(Ugeta) %*% UgZ) C <- (sum(Zh^2) - sum(UhZ^2) + penh) - (sum(Zg^2) - sum(UgZ^2) + peng) } - return(list(A = A, B = B, C= C)) + return(list(A = A, B = B, C = C)) } quadratic_roots <- function(A, B, C, tol) { @@ -141,11 +142,11 @@ TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { UgVd <- t(Ug) %*% Vdg UhV2 <- t(Uh) %*% V2h UgV2 <- t(Ug) %*% V2g - VdZh <- t(Vdh) %*% Zh + VdZh <- t(Vdh) %*% Zh VdZg <- t(Vdg) %*% Zg V2Zh <- t(V2h) %*% Zh V2Zg <- t(V2g) %*% Zg - + x0 <- peng * (sum(Zg^2) - sum(UgZ^2)) - penh * (sum(Zh^2) - sum(UhZ^2)) x1 <- 2*R*(peng * (VdZg - t(UgZ) %*% UgVd) - penh * (VdZh - t(UhZ) %*% UhVd)) x2 <- 2*R*(peng * (V2Zg - t(UgZ) %*% UgV2) - penh * (V2Zh - t(UhZ) %*% UhV2)) @@ -200,10 +201,10 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14) { })) partition <- roots_to_partition(roots) positive <- which(I(partition$midpoints) > 0) -######################################## +######################################## # Store the complement! # interval_intersect uses complement anyway -######################################## + intervals <- matrix(NA, ncol=2) for (i in 1:length(positive)) { ind <- positive[i] From d791988742f1d9c9c802c0ee8e9b033d0fcfe2ce Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 6 Nov 2015 15:38:38 -0800 Subject: [PATCH 037/396] Now debugging F test code... --- selectiveInference/R/funs.groupfs.R | 14 ++-- selectiveInference/R/funs.quadratic.R | 93 ++++++++++++++++----------- 2 files changed, 63 insertions(+), 44 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 80e15298..a0fe91a5 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -314,7 +314,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { Tstats[j] <- TC dfs[j] <- dfi - ydecomp <- list(R=R, Z=Z, eta=eta) + ydecomp <- list(Z=Z, eta=eta) } else { @@ -335,10 +335,6 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { R2 <- y - Pfull %*% t(Pfull) %*% y R <- sqrt(sum(R1^2)) R2sq <- sum(R2^2) - U <- R1/R - - # cv-folds redefine Vdelta/V2? - delta <- R1-R2 Vdelta <- delta/sqrt(sum(delta^2)) V2 <- R2/sqrt(sum(R2^2)) @@ -352,6 +348,9 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { intervallist <- truncationRegion(obj, ydecomp, type) + region <- do.call(interval_union, intervallist) + # DELETE THIS ########### + # Additional constraints from cross-validation? if (!is.null(obj$cvobj)) { intervallist <- c(intervallist, do.call(c, @@ -461,8 +460,9 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { region <- do.call(interval_union, intervallist) region <- interval_union(region, Intervals(c(-Inf,0))) E <- interval_complement(region, check_valid = FALSE) + print(E) if (length(E) == 0) { - stop(paste("Empty support at step", j)) +# stop(paste("Empty support at step", j)) } supports[[j]] <- E @@ -470,7 +470,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (type == "TC") { pvals[j] <- TC_surv(TC, sigma, dfi, E) } else { - pvals[j] <- TF_surv(TF, df1, df2, E) +# pvals[j] <- TF_surv(TF, df1, df2, E) } } diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 59b2b7ea..6d9667a8 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -2,7 +2,6 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { n <- nrow(obj$x) - R <- ydecomp$R Z = Zs = ydecomp$Z if (type == "TC") { eta = etas = ydecomp$eta @@ -10,6 +9,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { Vd = Vds = ydecomp$Vd V2 = V2s = ydecomp$V2 C = ydecomp$C + R = ydecomp$R } L <- lapply(1:length(obj$action), function(s) { @@ -28,7 +28,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { num.projs <- length(obj$projections[[s]]) if (num.projs == 0) { return(list(Intervals(c(-Inf,0)))) - + print("no projections") } else { lapply(1:num.projs, function(l) { Uh <- obj$projections[[s]][[l]] @@ -136,30 +136,35 @@ quadratic_roots <- function(A, B, C, tol) { # Efficiently compute coefficients of one-dimensional TF slice function TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { - UhZ <- t(Uh) %*% Zh - UgZ <- t(Ug) %*% Zg - UhVd <- t(Uh) %*% Vdh - UgVd <- t(Ug) %*% Vdg - UhV2 <- t(Uh) %*% V2h - UgV2 <- t(Ug) %*% V2g - VdZh <- t(Vdh) %*% Zh - VdZg <- t(Vdg) %*% Zg - V2Zh <- t(V2h) %*% Zh - V2Zg <- t(V2g) %*% Zg - - x0 <- peng * (sum(Zg^2) - sum(UgZ^2)) - penh * (sum(Zh^2) - sum(UhZ^2)) - x1 <- 2*R*(peng * (VdZg - t(UgZ) %*% UgVd) - penh * (VdZh - t(UhZ) %*% UhVd)) - x2 <- 2*R*(peng * (V2Zg - t(UgZ) %*% UgV2) - penh * (V2Zh - t(UhZ) %*% UhV2)) - x12 <- 2*R*(peng * (t(Vdg) %*% V2g - t(UgVd) %*% UgV2) - penh * (t(Vdh) %*% V2h - t(UhVd) %*% UhV2)) - x11 <- R^2*(peng * (sum(Vdg^2) - sum(UgVd^2)) - penh*(sum(Vdh^2) - sum(UhVd^2))) - x22 <- R^2*(peng * (sum(V2g^2) - sum(UgV2^2)) - penh*(sum(V2h^2) - sum(UhV2^2))) + UhZ <- t(Uh) %*% Zh #check + UgZ <- t(Ug) %*% Zg #check + UhVd <- t(Uh) %*% Vdh #check + UgVd <- t(Ug) %*% Vdg #check + UhV2 <- t(Uh) %*% V2h #check + UgV2 <- t(Ug) %*% V2g #check + VdZh <- sum(Vdh*Zh) #check + VdZg <- sum(Vdg*Zg) #check + V2Zh <- sum(V2h*Zh) #check + V2Zg <- sum(V2g*Zg) #check + + x0 <- peng * (sum(Zg^2) - sum(UgZ^2)) - penh * (sum(Zh^2) - sum(UhZ^2)) #check + + x1 <- 2*R*(peng * (VdZg - sum(UgZ*UgVd)) - penh * (VdZh - sum(UhZ*UhVd))) #check + + x2 <- 2*R*(peng * (V2Zg - sum(UgZ*UgV2)) - penh * (V2Zh - sum(UhZ*UhV2))) #check + + x12 <- 2*R^2*(peng * (sum(Vdg*V2g) - sum(UgVd*UgV2)) - penh * (sum(Vdh*V2h) - sum(UhVd*UhV2))) #fixed R -> R^2 + + x11 <- R^2*(peng * (sum(Vdg^2) - sum(UgVd^2)) - penh * (sum(Vdh^2) - sum(UhVd^2))) #check + x22 <- R^2*(peng * (sum(V2g^2) - sum(UgV2^2)) - penh * (sum(V2h^2) - sum(UhV2^2))) #check +# if (x22 == 0) print(c(sum(UgV2^2), sum(UhV2^2))) return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) } # Numerically solve for roots of TF slice using # hybrid polyroot/uniroot approach -TF_roots <- function(R, C, coeffs, tol = 1e-14) { +TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { x11 <- coeffs$x11 x22 <- coeffs$x22 @@ -168,13 +173,24 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14) { x2 <- coeffs$x2 x0 <- coeffs$x0 + # Handle some special cases + if ((x11 == 0) && (max(abs(c(x22, x12, x1, x2))) < tol2)) { +# print("Special case 1") + return(Intervals(c(-Inf, 0))) + } + + if ((x22 == 0) && (max(abs(c(x2, x12))) < tol2)) { + x2 <- 0 + x12 <- 0 + } + g1 <- function(t) R*sqrt(C*t/(1+C*t)) g2 <- function(t) R/sqrt(1+C*t) I <- function(t) x11*g1(t)^2 + x12*g1(t)*g2(t) + x22*g2(t)^2 + x1*g1(t) + x2*g2(t) + x0 - z4 <- r*complex(real = -x11 + x22, imaginary = -x12)/2 + z4 <- R*complex(real = -x11 + x22, imaginary = -x12)/2 z3 <- complex(real = x2, imaginary = -x1) - z2 <- complex(real = r*x11+r*x22+2*x0/r) + z2 <- complex(real = R*x11+R*x22+2*x0/R) z1 <- Conj(z3) z0 <- Conj(z4) zcoefs <- R*c(z0, z1, z2, z3, z4)/2 @@ -185,8 +201,9 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14) { roots <- unique(thetas[modinds * angleinds]) troots <- tan(roots)^2/C - if (length(roots) == 0) { - return(list(intervals = Intervals(c(0,Inf)), I=I)) + if (length(troots) == 0) { + #return(list(intervals = Intervals(c(0,Inf)), I=I)) + return(Intervals(c(-Inf,0))) } checkpoints <- roots_to_checkpoints(troots) @@ -197,36 +214,38 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14) { if (length(changeinds) > 0) { roots <- unlist(lapply(changeinds, function(ind) { - uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind])$root + uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind], tol = tol2)$root })) + +# if (x2 == 0) print(roots) + partition <- roots_to_partition(roots) - positive <- which(I(partition$midpoints) > 0) -######################################## - # Store the complement! - # interval_intersect uses complement anyway + negative <- which(I(partition$midpoints) < 0) intervals <- matrix(NA, ncol=2) - for (i in 1:length(positive)) { - ind <- positive[i] - if ((i > 1) && (ind == positive[i-1] + 1)) { + for (i in 1:length(negative)) { + ind <- negative[i] + if ((i > 1) && (ind == negative[i-1] + 1)) { + # There was not a sign change at end of previous interval intervals[nrow(intervals), 2] <- partition$endpoints[ind+1] } else { intervals <- rbind(intervals, c(partition$endpoints[ind], partition$endpoints[ind+1])) } } - return(list(intervals = Intervals(intervals[-1,]), I=I)) + #return(list(intervals = Intervals(intervals[-1,]), I=I)) + return(Intervals(intervals[-1,])) } - - return(list(intervals = Intervals(c(0,Inf)), I=I)) + #return(list(intervals = Intervals(c(0,Inf)), I=I)) + return(Intervals(c(-Inf,0))) } # Helper functions for TF roots roots_to_checkpoints <- function(roots) { checkpoints <- unique(sort(c(0, roots))) - return(c(0, (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) + return(c(0, (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) } roots_to_partition <- function(roots) { checkpoints <- unique(sort(c(0, roots))) - return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 2 + checkpoints[length(checkpoints)]))/2)) + return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) } From 7564fcb1c934ec7a5f7b6bb0f819eafaea25b6db Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 6 Nov 2015 15:41:18 -0800 Subject: [PATCH 038/396] Not quite finished with F test --- selectiveInference/R/funs.groupfs.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index a0fe91a5..55a3d6b6 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -348,8 +348,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { intervallist <- truncationRegion(obj, ydecomp, type) - region <- do.call(interval_union, intervallist) - # DELETE THIS ########### + #region <- do.call(interval_union, intervallist) # Additional constraints from cross-validation? if (!is.null(obj$cvobj)) { @@ -462,7 +461,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { E <- interval_complement(region, check_valid = FALSE) print(E) if (length(E) == 0) { -# stop(paste("Empty support at step", j)) + stop(paste("Empty support at step", j)) } supports[[j]] <- E @@ -470,6 +469,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (type == "TC") { pvals[j] <- TC_surv(TC, sigma, dfi, E) } else { + # write TF_surv function first # pvals[j] <- TF_surv(TF, df1, df2, E) } From 80feb547218e9c4fe5eefe4f4bcac778b6f4577b Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 6 Nov 2015 17:33:08 -0800 Subject: [PATCH 039/396] Searching for a bug... --- selectiveInference/R/funs.quadratic.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 6d9667a8..41f27047 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -28,9 +28,9 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { num.projs <- length(obj$projections[[s]]) if (num.projs == 0) { return(list(Intervals(c(-Inf,0)))) - print("no projections") } else { lapply(1:num.projs, function(l) { + Uh <- obj$projections[[s]][[l]] penh <- obj$aicpens[[s]][[l]] # The quadratic form corresponding to @@ -76,6 +76,7 @@ quadratic_roots <- function(A, B, C, tol) { disc <- B^2 - 4*A*C b2a <- -B/(2*A) + if (disc > tol) { # Real roots pm <- sqrt(disc)/(2*A) @@ -207,6 +208,7 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { } checkpoints <- roots_to_checkpoints(troots) + signs <- sign(I(checkpoints)) diffs <- c(0, diff(signs)) changeinds <- which(diffs != 0) @@ -217,8 +219,6 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind], tol = tol2)$root })) -# if (x2 == 0) print(roots) - partition <- roots_to_partition(roots) negative <- which(I(partition$midpoints) < 0) @@ -236,6 +236,7 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { #return(list(intervals = Intervals(intervals[-1,]), I=I)) return(Intervals(intervals[-1,])) } + #return(list(intervals = Intervals(c(0,Inf)), I=I)) return(Intervals(c(-Inf,0))) } From c9a40a907a5ff8cbf506d2c5d7d1a30fde0fdd40 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sat, 7 Nov 2015 15:48:47 -0800 Subject: [PATCH 040/396] Fixed a sign change, added more TF functions --- selectiveInference/R/funs.groupfs.R | 90 ++++++++++++++++++++++----- selectiveInference/R/funs.quadratic.R | 70 ++++++++++----------- 2 files changed, 110 insertions(+), 50 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 55a3d6b6..6432d1e3 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -322,17 +322,16 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (length(obj$action) > 1) { minus_i <- setdiff(obj$action, i) Psub <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) - Z <- Psub %*% t(Psub) %*% y - df2 <- ncol(Pfull) - ncol(Psub) - C <- df2/(n - ncol(Pfull)) - + Z <- Psub %*% t(Psub) %*% obj$y + df1 <- ncol(Pfull) - ncol(Psub) } else { Z <- 0 - df2 <- ncol(Pfull) - C <- df2/(n-ncol(Pfull)) + df1 <- ncol(Pfull) } - R1 <- y - Z - R2 <- y - Pfull %*% t(Pfull) %*% y + df2 <- n - ncol(Pfull) + C <- df1/df2 + R1 <- obj$y - Z + R2 <- obj$y - Pfull %*% t(Pfull) %*% obj$y R <- sqrt(sum(R1^2)) R2sq <- sum(R2^2) delta <- R1-R2 @@ -340,7 +339,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { V2 <- R2/sqrt(sum(R2^2)) TF <- (R^2-R2sq)/(C*R2sq) Tstats[j] <- TF - dfs[j] <- ncol(Psub) + dfs[j] <- df1 ydecomp <- list(R=R, Z=Z, Vd=Vdelta, V2=V2, C=C) @@ -348,8 +347,6 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { intervallist <- truncationRegion(obj, ydecomp, type) - #region <- do.call(interval_union, intervallist) - # Additional constraints from cross-validation? if (!is.null(obj$cvobj)) { intervallist <- c(intervallist, do.call(c, @@ -459,7 +456,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { region <- do.call(interval_union, intervallist) region <- interval_union(region, Intervals(c(-Inf,0))) E <- interval_complement(region, check_valid = FALSE) - print(E) + if (length(E) == 0) { stop(paste("Empty support at step", j)) } @@ -470,7 +467,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { pvals[j] <- TC_surv(TC, sigma, dfi, E) } else { # write TF_surv function first -# pvals[j] <- TF_surv(TF, df1, df2, E) + pvals[j] <- TF_surv(TF, df1, df2, E) } } @@ -557,6 +554,63 @@ num_int_chi <- function(a, b, df, nsamp = 10000) { return((b-a)*mean(integrand)) } +TF_surv <- function(TF, df1, df2, E) { + if (length(E) == 0) { + stop("Empty TF support") + } + + # Sum truncated cdf over each part of E + denom <- do.call(sum, lapply(1:nrow(E), function(v) { + TF_interval(E[v,1], E[v,2], df1, df2) + })) + + # Sum truncated cdf from observed value to max of + # truncation region + numer <- do.call(sum, lapply(1:nrow(E), function(v) { + lower <- E[v,1] + upper <- E[v,2] + if (upper > TF) { + # Observed value is left of this interval's right endpoint + if (lower < TF) { + # Observed value is in this interval + return(TF_interval(TF, upper, df1, df2)) + } else { + # Observed value is not in this interval + return(TF_interval(lower, upper, df1, df2)) + } + } else { + # Observed value is right of this entire interval + return(0) + } + })) + + # Survival function + value <- numer/denom + # Force p-value to lie in the [0,1] interval + # in case of numerical issues + value <- max(0, min(1, value)) + value +} + +TF_interval <- function(lower, upper, df1, df2) { + a <- lower + b <- upper + if (b == Inf) { + integral <- pf(a, df1, df2, lower.tail = FALSE) + } else { + integral <- pf(b, df1, df2) - pf(a, df1, df2) + } + if ((integral < .Machine$double.eps) && (b < Inf)) { + integral <- num_int_F(a, b, df1, df2) + } + return(integral) +} + +num_int_F <- function(a, b, df1, df2, nsamp = 10000) { + grid <- seq(from=a, to=b, length.out=nsamp) + integrand <- df(grid, df1, df2) + return((b-a)*mean(integrand)) +} #' Center and scale design matrix by groups #' @@ -697,13 +751,21 @@ predict.groupfs <- function(object, newx, ...) { print.groupfsInf <- function(x, ...) { cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) action <- x$vars + isTC <- "TC" %in% names(x) + if (isTC) { + Tstat <- x$TC + } else { + Tstat <- x$TF + } vnames <- attr(x, "varnames") if (length(vnames) > 0) action <- vnames[action] - tab = data.frame(Group = action, Pvalue = round(x$pv, 3), Tchi = round(x$TC, 3), + tab = data.frame(Group = action, Pvalue = round(x$pv, 3), + TC = round(Tstat, 3), df = x$df, Size = round(unlist(lapply(lapply(x$support, size), sum)), 3), Ints = unlist(lapply(x$support, nrow)), Min =round(unlist(lapply(x$support, min)), 3), Max = round(unlist(lapply(x$support, max)), 3)) rownames(tab) = 1:length(x$vars) + if (!isTC) names(tab)[3] <- "TF" print(tab) cat("\nInts is the number of intervals in the truncated chi selection region and Size is the sum of their lengths. Min and Max are the lowest and highest endpoints of the truncation region. No confidence intervals are reported by groupfsInf.\n") invisible() diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 41f27047..d0660a1c 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -41,6 +41,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) } else { + # Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) TF_roots(R, C, coeffs) } @@ -134,6 +135,16 @@ quadratic_roots <- function(A, B, C, tol) { } +# Helper functions for TF roots +roots_to_checkpoints <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(c(0, (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) +} +roots_to_partition <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) +} + # Efficiently compute coefficients of one-dimensional TF slice function TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { @@ -148,17 +159,12 @@ TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { V2Zh <- sum(V2h*Zh) #check V2Zg <- sum(V2g*Zg) #check - x0 <- peng * (sum(Zg^2) - sum(UgZ^2)) - penh * (sum(Zh^2) - sum(UhZ^2)) #check - - x1 <- 2*R*(peng * (VdZg - sum(UgZ*UgVd)) - penh * (VdZh - sum(UhZ*UhVd))) #check - - x2 <- 2*R*(peng * (V2Zg - sum(UgZ*UgV2)) - penh * (V2Zh - sum(UhZ*UhV2))) #check - - x12 <- 2*R^2*(peng * (sum(Vdg*V2g) - sum(UgVd*UgV2)) - penh * (sum(Vdh*V2h) - sum(UhVd*UhV2))) #fixed R -> R^2 - - x11 <- R^2*(peng * (sum(Vdg^2) - sum(UgVd^2)) - penh * (sum(Vdh^2) - sum(UhVd^2))) #check - x22 <- R^2*(peng * (sum(V2g^2) - sum(UgV2^2)) - penh * (sum(V2h^2) - sum(UhV2^2))) #check -# if (x22 == 0) print(c(sum(UgV2^2), sum(UhV2^2))) + x0 <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + x1 <- 2*R*(penh * (VdZh - sum(UhZ*UhVd)) - peng * (VdZg - sum(UgZ*UgVd))) + x2 <- 2*R*(penh * (V2Zh - sum(UhZ*UhV2)) - peng * (V2Zg - sum(UgZ*UgV2))) + x12 <- 2*R^2*(penh * (sum(Vdh*V2h) - sum(UhVd*UhV2)) - peng * (sum(Vdg*V2g) - sum(UgVd*UgV2))) + x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) #check + x22 <- R^2*(penh * (sum(V2h^2) - sum(UhV2^2)) - peng * (sum(V2g^2) - sum(UgV2^2))) return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) } @@ -174,32 +180,33 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { x2 <- coeffs$x2 x0 <- coeffs$x0 - # Handle some special cases - if ((x11 == 0) && (max(abs(c(x22, x12, x1, x2))) < tol2)) { -# print("Special case 1") - return(Intervals(c(-Inf, 0))) - } +## # Handle some special cases +## if ((x11 == 0) && (max(abs(c(x22, x12, x1, x2))) < tol2)) { +## # print("Special case 1") +## return(Intervals(c(-Inf, 0))) +## } - if ((x22 == 0) && (max(abs(c(x2, x12))) < tol2)) { - x2 <- 0 - x12 <- 0 - } +## if ((x22 == 0) && (max(abs(c(x2, x12))) < tol2)) { +## x2 <- 0 +## x12 <- 0 +## } - g1 <- function(t) R*sqrt(C*t/(1+C*t)) - g2 <- function(t) R/sqrt(1+C*t) + g1 <- function(t) sqrt(C*t/(1+C*t)) + g2 <- function(t) 1/sqrt(1+C*t) I <- function(t) x11*g1(t)^2 + x12*g1(t)*g2(t) + x22*g2(t)^2 + x1*g1(t) + x2*g2(t) + x0 - z4 <- R*complex(real = -x11 + x22, imaginary = -x12)/2 - z3 <- complex(real = x2, imaginary = -x1) - z2 <- complex(real = R*x11+R*x22+2*x0/R) + z4 <- complex(real = -x11 + x22, imaginary = -x12)/4 + z3 <- complex(real = x2, imaginary = -x1)/2 + z2 <- complex(real = x11/2+x22/2+x0) z1 <- Conj(z3) z0 <- Conj(z4) - zcoefs <- R*c(z0, z1, z2, z3, z4)/2 + + zcoefs <- c(z0, z1, z2, z3, z4) croots <- polyroot(zcoefs) thetas <- Arg(croots) modinds <- Mod(croots) <= 1 + tol & Mod(croots) >= 1 - tol angleinds <- thetas >=0 & thetas <= pi/2 - roots <- unique(thetas[modinds * angleinds]) + roots <- unique(thetas[which(modinds & angleinds)]) troots <- tan(roots)^2/C if (length(troots) == 0) { @@ -241,12 +248,3 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { return(Intervals(c(-Inf,0))) } -# Helper functions for TF roots -roots_to_checkpoints <- function(roots) { - checkpoints <- unique(sort(c(0, roots))) - return(c(0, (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) -} -roots_to_partition <- function(roots) { - checkpoints <- unique(sort(c(0, roots))) - return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) -} From ee08db5a77bd040904a79edb5a104f5decc80119 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sat, 7 Nov 2015 16:07:51 -0800 Subject: [PATCH 041/396] Still getting empty support for F, not sure why --- selectiveInference/R/funs.groupfs.R | 37 +++++++++++++++------------ selectiveInference/R/funs.quadratic.R | 2 +- tests/test.groupfs.R | 8 +++--- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 6432d1e3..d2c539c2 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -277,12 +277,12 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } else { if (is.null(obj$sigma)) { type <- "TF" - if (n >= 2*p) { - sigma <- sqrt(sum(lsfit(obj$x, obj$y, intercept = obj$intercept)$res^2)/(n-p-obj$intercept)) - } else { - sigma = sqrt(obj$log$RSS[length(obj$log$RSS)]/(n-Ep-obj$intercept)) - warning(paste(sprintf("p > n/2, and sigmahat = %0.3f used as an estimate of sigma;",sigma), "you may want to use the estimateSigma function")) - } + ## if (n >= 2*p) { + ## sigma <- sqrt(sum(lsfit(obj$x, obj$y, intercept = obj$intercept)$res^2)/(n-p-obj$intercept)) + ## } else { + ## sigma = sqrt(obj$log$RSS[length(obj$log$RSS)]/(n-Ep-obj$intercept)) + ## warning(paste(sprintf("p > n/2, and sigmahat = %0.3f used as an estimate of sigma;",sigma), "you may want to use the estimateSigma function")) + ## } } else { type <- "TC" sigma <- obj$sigma @@ -294,7 +294,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { i <- obj$action[j] if (verbose) cat(paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i, "\n")) - if (!is.null(obj$sigma)) { + if (type == "TC") { # Form projection onto active set minus i # and project x_i orthogonally x_i <- obj$x[,which(obj$index == i), drop = FALSE] @@ -332,12 +332,13 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { C <- df1/df2 R1 <- obj$y - Z R2 <- obj$y - Pfull %*% t(Pfull) %*% obj$y - R <- sqrt(sum(R1^2)) + R1sq <- sum(R1^2) R2sq <- sum(R2^2) + R <- sqrt(R1sq) delta <- R1-R2 - Vdelta <- delta/sqrt(sum(delta^2)) - V2 <- R2/sqrt(sum(R2^2)) - TF <- (R^2-R2sq)/(C*R2sq) + Vdelta <- delta/sqrt(delta^2) + V2 <- R2/sqrt(R2sq) + TF <- (R1sq-R2sq)/(C*R2sq) Tstats[j] <- TF dfs[j] <- df1 @@ -479,9 +480,10 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } names(pvals) <- obj$action - out <- list(vars = obj$action, pv=pvals, sigma=sigma) + out <- list(vars = obj$action, pv=pvals) if (type == "TC") { out$TC <- Tstats + out$sigma <- sigma } else { out$TF <- Tstats } @@ -749,14 +751,15 @@ predict.groupfs <- function(object, newx, ...) { } print.groupfsInf <- function(x, ...) { - cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) - action <- x$vars - isTC <- "TC" %in% names(x) - if (isTC) { + if (!is.null(x$sigma)) { + isTF <- FALSE Tstat <- x$TC + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) } else { + isTF <- TRUE Tstat <- x$TF } + action <- x$vars vnames <- attr(x, "varnames") if (length(vnames) > 0) action <- vnames[action] tab = data.frame(Group = action, Pvalue = round(x$pv, 3), @@ -765,7 +768,7 @@ print.groupfsInf <- function(x, ...) { Ints = unlist(lapply(x$support, nrow)), Min =round(unlist(lapply(x$support, min)), 3), Max = round(unlist(lapply(x$support, max)), 3)) rownames(tab) = 1:length(x$vars) - if (!isTC) names(tab)[3] <- "TF" + if (isTF) names(tab)[3] <- "TF" print(tab) cat("\nInts is the number of intervals in the truncated chi selection region and Size is the sum of their lengths. Min and Max are the lowest and highest endpoints of the truncation region. No confidence intervals are reported by groupfsInf.\n") invisible() diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index d0660a1c..88cab998 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -41,7 +41,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) } else { - # Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) + #Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) TF_roots(R, C, coeffs) } diff --git a/tests/test.groupfs.R b/tests/test.groupfs.R index d35800e5..08957128 100644 --- a/tests/test.groupfs.R +++ b/tests/test.groupfs.R @@ -9,9 +9,9 @@ set.seed(1) n <- 40 p <- 80 index <- sort(rep(1:(p/2), 2)) -maxsteps <- 10 -sparsity <- 5 -snr <- 3 +maxsteps <- 8 +sparsity <- 4 +snr <- 2 system.time({ for (iter in 1:100) { @@ -21,7 +21,7 @@ for (iter in 1:100) { beta[which(index %in% 1:sparsity)] <- snr y <- y + x %*% beta fit <- groupfs(x, y, index, maxsteps = maxsteps) - pvals <- groupfsInf(fit) + pvals <- groupfsInf(fit, verbose = T) } }) From 504baa1be25ad01773b3295c1b9d4797f2504bee Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sun, 8 Nov 2015 19:32:16 -0800 Subject: [PATCH 042/396] Fixed another error, but still no dice on the F test --- selectiveInference/R/funs.quadratic.R | 84 +++++++++++++++++++-------- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 88cab998..bffc0b80 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -2,27 +2,35 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { n <- nrow(obj$x) - Z = Zs = ydecomp$Z + Z <- ydecomp$Z if (type == "TC") { - eta = etas = ydecomp$eta + eta <- ydecomp$eta } else { - Vd = Vds = ydecomp$Vd - V2 = V2s = ydecomp$V2 - C = ydecomp$C - R = ydecomp$R + Vd <- ydecomp$Vd + V2 <- ydecomp$V2 + C <- ydecomp$C + R <- ydecomp$R } L <- lapply(1:length(obj$action), function(s) { Ug <- obj$maxprojs[[s]] peng <- obj$maxpens[[s]] if (s > 1) { + Zs <- obj$cumprojs[[s-1]] %*% Z # This was a fix! if (type == "TC") { etas <- obj$cumprojs[[s-1]] %*% eta - Zs <- obj$cumprojs[[s-1]] %*% Z } else { Vds <- obj$cumprojs[[s-1]] %*% Vd V2s <- obj$cumprojs[[s-1]] %*% V2 } + } else { + Zs <- Z + if (type == "TC") { + etas <- eta + } else { + Vds <- Vd + V2s <- V2 + } } num.projs <- length(obj$projections[[s]]) @@ -41,9 +49,27 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) } else { - #Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) + + # Debugging + ## Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) + ## g1 <- function(t) sqrt(C*t/(1+C*t)) + ## g2 <- function(t) 1/sqrt(1+C*t) + ## Y <- function(t) { + ## Zs + R * (Vds*g1(t) + V2s*g2(t)) + ## } + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) - TF_roots(R, C, coeffs) + roots <- TF_roots(R, C, coeffs) + + if (is.null(roots)) print(c(s,l)) + ## print(do.call(rbind, lapply(roots, function(r) { + ## c(r, + ## t(Y(r-0.000001)) %*% Q %*% Y(r-0.000001), + ## t(Y(r)) %*% Q %*% Y(r), + ## t(Y(r+0.000001)) %*% Q %*% Y(r+0.000001)) + ## }))) + + return(roots) } }) } @@ -148,22 +174,22 @@ roots_to_partition <- function(roots) { # Efficiently compute coefficients of one-dimensional TF slice function TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { - UhZ <- t(Uh) %*% Zh #check - UgZ <- t(Ug) %*% Zg #check - UhVd <- t(Uh) %*% Vdh #check - UgVd <- t(Ug) %*% Vdg #check - UhV2 <- t(Uh) %*% V2h #check - UgV2 <- t(Ug) %*% V2g #check - VdZh <- sum(Vdh*Zh) #check - VdZg <- sum(Vdg*Zg) #check - V2Zh <- sum(V2h*Zh) #check - V2Zg <- sum(V2g*Zg) #check + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + UhVd <- t(Uh) %*% Vdh + UgVd <- t(Ug) %*% Vdg + UhV2 <- t(Uh) %*% V2h + UgV2 <- t(Ug) %*% V2g + VdZh <- sum(Vdh*Zh) + VdZg <- sum(Vdg*Zg) + V2Zh <- sum(V2h*Zh) + V2Zg <- sum(V2g*Zg) x0 <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) x1 <- 2*R*(penh * (VdZh - sum(UhZ*UhVd)) - peng * (VdZg - sum(UgZ*UgVd))) x2 <- 2*R*(penh * (V2Zh - sum(UhZ*UhV2)) - peng * (V2Zg - sum(UgZ*UgV2))) x12 <- 2*R^2*(penh * (sum(Vdh*V2h) - sum(UhVd*UhV2)) - peng * (sum(Vdg*V2g) - sum(UgVd*UgV2))) - x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) #check + x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) x22 <- R^2*(penh * (sum(V2h^2) - sum(UhV2^2)) - peng * (sum(V2g^2) - sum(UgV2^2))) return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) @@ -210,7 +236,15 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { troots <- tan(roots)^2/C if (length(troots) == 0) { - #return(list(intervals = Intervals(c(0,Inf)), I=I)) + # Something bad happens here + # sometimes there is a root and it's not caught + # by polyroot or something + # do an additional sign change check? + #print(c(I(0), I(100), I(10000), unlist(coeffs)), digits=1) + if (I(0) < 0) { + return(NULL) + return(Intervals(c(0,Inf))) + } return(Intervals(c(-Inf,0))) } @@ -240,11 +274,15 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { } } - #return(list(intervals = Intervals(intervals[-1,]), I=I)) return(Intervals(intervals[-1,])) } - #return(list(intervals = Intervals(c(0,Inf)), I=I)) + # Something bad happening, see above + #print(c(I(0), I(100), I(10000), unlist(coeffs)), digits=1) + if (I(0) < 0) { + return(NULL) + return(Intervals(c(0,Inf))) + } return(Intervals(c(-Inf,0))) } From 3410957e65bd1c92427a2aefa2a1aa92edd2b006 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sun, 8 Nov 2015 20:23:35 -0800 Subject: [PATCH 043/396] Found another error, still not done --- selectiveInference/R/funs.groupfs.R | 2 +- selectiveInference/R/funs.quadratic.R | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index d2c539c2..4fd46427 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -336,7 +336,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { R2sq <- sum(R2^2) R <- sqrt(R1sq) delta <- R1-R2 - Vdelta <- delta/sqrt(delta^2) + Vdelta <- delta/sqrt(sum(delta^2)) V2 <- R2/sqrt(R2sq) TF <- (R1sq-R2sq)/(C*R2sq) Tstats[j] <- TF diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index bffc0b80..401a652f 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -50,18 +50,18 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) } else { - # Debugging + # Debugging ## Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) - ## g1 <- function(t) sqrt(C*t/(1+C*t)) - ## g2 <- function(t) 1/sqrt(1+C*t) - ## Y <- function(t) { - ## Zs + R * (Vds*g1(t) + V2s*g2(t)) - ## } + ## g1 <- function(t) sqrt(C*t/(1+C*t)) + ## g2 <- function(t) 1/sqrt(1+C*t) + ## Y <- function(t) { + ## Zs + R * (Vds*g1(t) + V2s*g2(t)) + ## } coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) roots <- TF_roots(R, C, coeffs) - if (is.null(roots)) print(c(s,l)) + ## if (is.null(roots)) print(c(s,l)) ## print(do.call(rbind, lapply(roots, function(r) { ## c(r, ## t(Y(r-0.000001)) %*% Q %*% Y(r-0.000001), @@ -242,7 +242,7 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { # do an additional sign change check? #print(c(I(0), I(100), I(10000), unlist(coeffs)), digits=1) if (I(0) < 0) { - return(NULL) +# return(NULL) return(Intervals(c(0,Inf))) } return(Intervals(c(-Inf,0))) @@ -280,7 +280,7 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { # Something bad happening, see above #print(c(I(0), I(100), I(10000), unlist(coeffs)), digits=1) if (I(0) < 0) { - return(NULL) +# return(NULL) return(Intervals(c(0,Inf))) } return(Intervals(c(-Inf,0))) From 0ec3b9b841456858719b8ecc336796db3e045961 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sun, 8 Nov 2015 20:48:34 -0800 Subject: [PATCH 044/396] polyroot precision was too low for modinds tol, increased tol and found more roots --- selectiveInference/R/funs.quadratic.R | 35 +++++++++++---------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 401a652f..77f4468d 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -61,7 +61,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) roots <- TF_roots(R, C, coeffs) - ## if (is.null(roots)) print(c(s,l)) + if (is.null(roots)) print(c(s,l)) ## print(do.call(rbind, lapply(roots, function(r) { ## c(r, ## t(Y(r-0.000001)) %*% Q %*% Y(r-0.000001), @@ -230,26 +230,23 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { zcoefs <- c(z0, z1, z2, z3, z4) croots <- polyroot(zcoefs) thetas <- Arg(croots) - modinds <- Mod(croots) <= 1 + tol & Mod(croots) >= 1 - tol + # Can't specify polyroot precision :( + modinds <- Mod(croots) <= 1 + tol2 & Mod(croots) >= 1 - tol2 angleinds <- thetas >=0 & thetas <= pi/2 roots <- unique(thetas[which(modinds & angleinds)]) troots <- tan(roots)^2/C if (length(troots) == 0) { - # Something bad happens here - # sometimes there is a root and it's not caught - # by polyroot or something - # do an additional sign change check? - #print(c(I(0), I(100), I(10000), unlist(coeffs)), digits=1) - if (I(0) < 0) { -# return(NULL) - return(Intervals(c(0,Inf))) - } - return(Intervals(c(-Inf,0))) + # Polyroot didn't catch any roots + # ad-hoc check: + checkpoints <- c(0, tol, tol2, + seq(from = sqrt(tol2), to = 1, length.out = 50), + seq(from = 1.2, to=50, length.out = 20), + 100, 1000, 10000) + } else { + checkpoints <- roots_to_checkpoints(troots) } - - checkpoints <- roots_to_checkpoints(troots) - + signs <- sign(I(checkpoints)) diffs <- c(0, diff(signs)) changeinds <- which(diffs != 0) @@ -277,12 +274,8 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { return(Intervals(intervals[-1,])) } - # Something bad happening, see above - #print(c(I(0), I(100), I(10000), unlist(coeffs)), digits=1) - if (I(0) < 0) { -# return(NULL) - return(Intervals(c(0,Inf))) - } + # Apparently no roots, always positive + if (I(0) < 0) stop("Infeasible constraint!") return(Intervals(c(-Inf,0))) } From b75bb54466b1b719a383de09851972d56b62c229 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sun, 8 Nov 2015 21:10:14 -0800 Subject: [PATCH 045/396] Fixed typo in aicstop code for F test --- forLater/josh/sim.aicstop.R | 2 +- selectiveInference/R/funs.groupfs.R | 7 ++++--- selectiveInference/R/funs.quadratic.R | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 466f9b4f..2c3e5175 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -25,7 +25,7 @@ instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { index <- simd$index fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop) - pvals <- groupfsInf(fit, sigma = 1, verbose=T) + pvals <- groupfsInf(fit, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) } diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 4fd46427..d12af1ea 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -271,6 +271,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { supports <- list() if (!is.null(sigma)) { + type <- "TC" if (!is.null(obj$sigma)) { cat(paste("Using specified value", sigma, "for sigma in place of the value", obj$sigma, "used by groupfs()\n")) } @@ -347,7 +348,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } intervallist <- truncationRegion(obj, ydecomp, type) - + # Additional constraints from cross-validation? if (!is.null(obj$cvobj)) { intervallist <- c(intervallist, do.call(c, @@ -442,8 +443,8 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { penh <- prod(unlist(penlist[s:sp])) Vdg <- vdlist[[s]] Vdh <- vdlist[[sp]] - V2g <- vdlist[[s]] - V2h <- vdlist[[sp]] + V2g <- v2list[[s]] + V2h <- v2list[[sp]] Zg <- zlist[[s]] Zh <- zlist[[sp]] coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 77f4468d..8d7a19f2 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -197,7 +197,7 @@ TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { # Numerically solve for roots of TF slice using # hybrid polyroot/uniroot approach -TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { +TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { x11 <- coeffs$x11 x22 <- coeffs$x22 @@ -254,7 +254,7 @@ TF_roots <- function(R, C, coeffs, tol = 1e-14, tol2 = 1e-8) { if (length(changeinds) > 0) { roots <- unlist(lapply(changeinds, function(ind) { - uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind], tol = tol2)$root + uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind], tol = tol)$root })) partition <- roots_to_partition(roots) From 17b8b6530a01779773e86ee59f51aacdd4ff1f67 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 9 Nov 2015 20:43:23 -0800 Subject: [PATCH 046/396] Last commit before a big change in AICstop --- forLater/josh/sim.aicstop.R | 6 +-- selectiveInference/R/funs.groupfs.R | 74 ++++++++++++++------------- selectiveInference/R/funs.quadratic.R | 34 ++++++------ 3 files changed, 58 insertions(+), 56 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 2c3e5175..3d07ce4d 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -6,7 +6,7 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 100 +niters <- 400 n <- 100 p <- 100 G <- 50 @@ -14,7 +14,7 @@ maxsteps <- 15 sparsity <- 5 snr <- 1 sigma <- 1 -rho <- .1 +rho <- 0 aicstop <- 1 instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { @@ -42,6 +42,6 @@ save(pvals, vars, stopped, file = paste0( "_p", p, "_sparsity", sparsity, "_snr", snr, - "_known.RData")) + "_F_rho0.RData")) print(time) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index d12af1ea..2a5471e0 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -67,7 +67,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE modelrank <- as.numeric(intercept) if (is.null(sigma)) { - aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k * (is.null(sigma) + intercept)) + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k) * (is.null(sigma) + intercept) # This was a fix } else { aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * intercept } @@ -145,7 +145,6 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE terms[(step+1):maxsteps] <- NULL maxsteps <- step stopped <- TRUE - # add additional projections break } } @@ -293,7 +292,12 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { # Compute p-value for each active group for (j in 1:maxsteps) { i <- obj$action[j] - if (verbose) cat(paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i, "\n")) + if (verbose) { + string <- paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i) + if (!is.null(obj$cvobj)) string <- paste0(string, ", including constraints from cross-validation") + if (attr(obj, "stopped")) string <- paste0(string, ", including constraints from AICstop") + cat(paste(string, "\n")) + } if (type == "TC") { # Form projection onto active set minus i @@ -348,7 +352,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } intervallist <- truncationRegion(obj, ydecomp, type) - + # Additional constraints from cross-validation? if (!is.null(obj$cvobj)) { intervallist <- c(intervallist, do.call(c, @@ -422,39 +426,39 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } intervallist <- c(intervallist, - do.call(c, lapply(1:aicstop, function(s) { - lapply((s+1):(aicstop+1), function(sp) { - if (type == "TC") { - Ug <- ulist[[s]] - Uh <- ulist[[sp]] - peng <- penlist[[s]] - penh <- prod(unlist(penlist[s:sp])) - etag <- etalist[[s]] - etah <- etalist[[sp]] - Zg <- zlist[[s]] - Zh <- zlist[[sp]] - coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) - quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) - } else { - - Ug <- ulist[[s]] - Uh <- ulist[[sp]] - peng <- penlist[[s]] - penh <- prod(unlist(penlist[s:sp])) - Vdg <- vdlist[[s]] - Vdh <- vdlist[[sp]] - V2g <- v2list[[s]] - V2h <- v2list[[sp]] - Zg <- zlist[[s]] - Zh <- zlist[[sp]] - coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) - TF_roots(R, C, coeffs) - } - }) - }))) + lapply(1:aicstop, function(s) { + sp <- s+1 + if (type == "TC") { + Ug <- ulist[[s]] + Uh <- ulist[[sp]] + # Check this: known sigma has *additive* pen terms + peng <- penlist[[s]] + penh <- prod(unlist(penlist[s:sp])) + #################################### + etag <- etalist[[s]] + etah <- etalist[[sp]] + Zg <- zlist[[s]] + Zh <- zlist[[sp]] + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + } else { + Ug <- ulist[[s]] + Uh <- ulist[[sp]] + peng <- 1 #penlist[[s]] + penh <- penlist[[sp]] + Vdg <- vdlist[[s]] + Vdh <- vdlist[[sp]] + V2g <- v2list[[s]] + V2h <- v2list[[sp]] + Zg <- zlist[[s]] + Zh <- zlist[[sp]] + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + TF_roots(R, C, coeffs) + } + })) } - # Compute intersection: + # Compute intersection: region <- do.call(interval_union, intervallist) region <- interval_union(region, Intervals(c(-Inf,0))) E <- interval_complement(region, check_valid = FALSE) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 8d7a19f2..5b59fcbc 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -16,7 +16,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { Ug <- obj$maxprojs[[s]] peng <- obj$maxpens[[s]] if (s > 1) { - Zs <- obj$cumprojs[[s-1]] %*% Z # This was a fix! + Zs <- obj$cumprojs[[s-1]] %*% Z if (type == "TC") { etas <- obj$cumprojs[[s-1]] %*% eta } else { @@ -49,7 +49,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) } else { - + # Debugging ## Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) ## g1 <- function(t) sqrt(C*t/(1+C*t)) @@ -57,7 +57,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { ## Y <- function(t) { ## Zs + R * (Vds*g1(t) + V2s*g2(t)) ## } - + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) roots <- TF_roots(R, C, coeffs) @@ -68,7 +68,7 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { ## t(Y(r)) %*% Q %*% Y(r), ## t(Y(r+0.000001)) %*% Q %*% Y(r+0.000001)) ## }))) - + return(roots) } }) @@ -103,7 +103,6 @@ quadratic_roots <- function(A, B, C, tol) { disc <- B^2 - 4*A*C b2a <- -B/(2*A) - if (disc > tol) { # Real roots pm <- sqrt(disc)/(2*A) @@ -160,7 +159,6 @@ quadratic_roots <- function(A, B, C, tol) { } } - # Helper functions for TF roots roots_to_checkpoints <- function(roots) { checkpoints <- unique(sort(c(0, roots))) @@ -174,22 +172,22 @@ roots_to_partition <- function(roots) { # Efficiently compute coefficients of one-dimensional TF slice function TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { - UhZ <- t(Uh) %*% Zh - UgZ <- t(Ug) %*% Zg - UhVd <- t(Uh) %*% Vdh - UgVd <- t(Ug) %*% Vdg - UhV2 <- t(Uh) %*% V2h - UgV2 <- t(Ug) %*% V2g - VdZh <- sum(Vdh*Zh) - VdZg <- sum(Vdg*Zg) - V2Zh <- sum(V2h*Zh) - V2Zg <- sum(V2g*Zg) + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + UhVd <- t(Uh) %*% Vdh + UgVd <- t(Ug) %*% Vdg + UhV2 <- t(Uh) %*% V2h + UgV2 <- t(Ug) %*% V2g + VdZh <- sum(Vdh*Zh) + VdZg <- sum(Vdg*Zg) + V2Zh <- sum(V2h*Zh) + V2Zg <- sum(V2g*Zg) x0 <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) x1 <- 2*R*(penh * (VdZh - sum(UhZ*UhVd)) - peng * (VdZg - sum(UgZ*UgVd))) x2 <- 2*R*(penh * (V2Zh - sum(UhZ*UhV2)) - peng * (V2Zg - sum(UgZ*UgV2))) x12 <- 2*R^2*(penh * (sum(Vdh*V2h) - sum(UhVd*UhV2)) - peng * (sum(Vdg*V2g) - sum(UgVd*UgV2))) - x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) + x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) x22 <- R^2*(penh * (sum(V2h^2) - sum(UhV2^2)) - peng * (sum(V2g^2) - sum(UgV2^2))) return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) @@ -246,7 +244,7 @@ TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { } else { checkpoints <- roots_to_checkpoints(troots) } - + signs <- sign(I(checkpoints)) diffs <- c(0, diff(signs)) changeinds <- which(diffs != 0) From 524720c861bce8aece3ace0483780e4d6236be5d Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 9 Nov 2015 21:24:49 -0800 Subject: [PATCH 047/396] Added constraints for all AIC comparisons --- forLater/josh/sim.aicstop.R | 8 +- selectiveInference/R/funs.groupfs.R | 119 +++++++++++++++------------- 2 files changed, 68 insertions(+), 59 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 3d07ce4d..9f169451 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -6,15 +6,15 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 400 +niters <- 200 n <- 100 p <- 100 G <- 50 maxsteps <- 15 sparsity <- 5 -snr <- 1 +snr <- 2 sigma <- 1 -rho <- 0 +rho <- 0.1 aicstop <- 1 instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { @@ -42,6 +42,6 @@ save(pvals, vars, stopped, file = paste0( "_p", p, "_sparsity", sparsity, "_snr", snr, - "_F_rho0.RData")) + "_F_rhopt1.RData")) print(time) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 2a5471e0..7cb83964 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -67,7 +67,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE modelrank <- as.numeric(intercept) if (is.null(sigma)) { - aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k) * (is.null(sigma) + intercept) # This was a fix + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k) * (is.null(sigma) + intercept) } else { aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * intercept } @@ -289,6 +289,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } } + # Compute p-value for each active group for (j in 1:maxsteps) { i <- obj$action[j] @@ -389,73 +390,81 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { })) } - # Additional constraints from AIC stopping? + # Additional constraints from AIC stopping if (attr(obj, "stopped")) { - + aicintervals <- vector("list", maxsteps-1) aicstop <- attr(obj, "aicstop") - ulist <- penlist <- zlist <- vector("list", aicstop+1) - + AICs <- obj$AIC + + # No stopping at 0 steps? + ## if (is.null(sigma)) { + ## aic.begin <- aic.last <- n*(log(2*pi) + log(mean(obj$y^2)) + 1 + k) * (1 + obj$intercept) + ## } else { + ## aic.begin <- aic.last <- sum(obj$y^2)/sigma^2 - n + k * obj$intercept + ## } + ## AICs <- c(aic.begin, obj$AIC) + + ulist <- obj$maxprojs + penlist <- obj$maxpens + zlist <- vector("list", aicstop+1) + zlist[[1]] <- Z if (type == "TC") { etalist <- vector("list", aicstop+1) + etalist[[1]] <- eta } else { vdlist <- v2list <- vector("list", aicstop+1) + vdlist[[1]] <- Vdelta + v2list[[1]] <- V2 } - - for (s in seq(aicstop+1)) { - stepind <- maxsteps - (aicstop+1) + s - if (stepind > 1) { - cproj <- obj$cumprojs[[stepind-1]] - if (type == "TC") { - etalist[[s]] <- cproj %*% eta - } else { - v2list[[s]] <- cproj %*% V2 - vdlist[[s]] <- cproj %*% Vdelta - } - zlist[[s]] <- cproj %*% Z + for (step in 2:maxsteps) { + cproj <- obj$cumprojs[[step-1]] + zlist[[step]] <- cproj %*% Z + if (type == "TC") { + etalist[[step]] <- cproj %*% eta } else { - if (type == "TC") { - etalist[[s]] <- eta - } else { - v2list[[s]] <- V2 - vdlist[[s]] <- Vdelta - } - zlist[[s]] <- Z + vdlist[[step]] <- cproj %*% Vdelta + v2list[[step]] <- cproj %*% V2 } - ulist[[s]] <- obj$maxprojs[[stepind]] - penlist[[s]] <- obj$maxpens[[stepind]] } - intervallist <- c(intervallist, - lapply(1:aicstop, function(s) { - sp <- s+1 - if (type == "TC") { - Ug <- ulist[[s]] - Uh <- ulist[[sp]] + for (step in 1:(maxsteps-1)) { + # Compare AIC at s-1 to AIC at s + # sp indexes step with larger AIC + if (AICs[step] >= AICs[step+1]) { + sp <- step + s <- step+1 + } else { + sp <- step+1 + s <- step + } + + if (type == "TC") { + Ug <- ulist[[s]] + Uh <- ulist[[sp]] # Check this: known sigma has *additive* pen terms - peng <- penlist[[s]] - penh <- prod(unlist(penlist[s:sp])) + peng <- 1 + penh <- penlist[sp] #################################### - etag <- etalist[[s]] - etah <- etalist[[sp]] - Zg <- zlist[[s]] - Zh <- zlist[[sp]] - coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) - quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) - } else { - Ug <- ulist[[s]] - Uh <- ulist[[sp]] - peng <- 1 #penlist[[s]] - penh <- penlist[[sp]] - Vdg <- vdlist[[s]] - Vdh <- vdlist[[sp]] - V2g <- v2list[[s]] - V2h <- v2list[[sp]] - Zg <- zlist[[s]] - Zh <- zlist[[sp]] - coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) - TF_roots(R, C, coeffs) - } - })) + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + intstep <- quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + } else { + Ug <- ulist[[s]] + Uh <- ulist[[sp]] + peng <- 1 #penlist[[s]] + penh <- penlist[[sp]] + Vdg <- vdlist[[s]] + Vdh <- vdlist[[sp]] + V2g <- v2list[[s]] + V2h <- v2list[[sp]] + Zg <- zlist[[s]] + Zh <- zlist[[sp]] + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + intstep <- TF_roots(R, C, coeffs) + } + + aicintervals[[step]] <- intstep + } + intervallist <- c(intervallist, aicintervals) } # Compute intersection: From 4601bf85d7f88e4a16013e4174ca152dce1a09e6 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 10 Nov 2015 12:05:49 -0800 Subject: [PATCH 048/396] Scoping problem with sigma in AIC simulation --- forLater/josh/sim.aicstop.R | 6 ++-- selectiveInference/R/funs.groupfs.R | 56 ++++++++++++++--------------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 9f169451..67d77205 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -13,18 +13,16 @@ G <- 50 maxsteps <- 15 sparsity <- 5 snr <- 2 -sigma <- 1 rho <- 0.1 aicstop <- 1 instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { - - simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma, rho) + simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma = 1, rho) x <- simd$x y <- simd$y index <- simd$index - fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop) + fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop, verbose=T) pvals <- groupfsInf(fit, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) } diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 7cb83964..1fecb69f 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -67,9 +67,10 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE modelrank <- as.numeric(intercept) if (is.null(sigma)) { - aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1 + k) * (is.null(sigma) + intercept) + modelrank <- modelrank + 1 + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1) + k * modelrank # fixed... again } else { - aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * intercept + aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * modelrank } if (verbose) print(paste0("Start: AIC=", round(aic.begin, 3)), quote = FALSE) @@ -392,43 +393,42 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { # Additional constraints from AIC stopping if (attr(obj, "stopped")) { - aicintervals <- vector("list", maxsteps-1) + aicintervals <- vector("list", maxsteps) aicstop <- attr(obj, "aicstop") - AICs <- obj$AIC - - # No stopping at 0 steps? - ## if (is.null(sigma)) { - ## aic.begin <- aic.last <- n*(log(2*pi) + log(mean(obj$y^2)) + 1 + k) * (1 + obj$intercept) - ## } else { - ## aic.begin <- aic.last <- sum(obj$y^2)/sigma^2 - n + k * obj$intercept - ## } - ## AICs <- c(aic.begin, obj$AIC) - - ulist <- obj$maxprojs - penlist <- obj$maxpens - zlist <- vector("list", aicstop+1) - zlist[[1]] <- Z if (type == "TC") { - etalist <- vector("list", aicstop+1) - etalist[[1]] <- eta + pen0 <- k * obj$intercept + aic.begin <- aic.last <- sum(obj$y^2)/sigma^2 - n + k * obj$intercept } else { - vdlist <- v2list <- vector("list", aicstop+1) - vdlist[[1]] <- Vdelta - v2list[[1]] <- V2 + pen0 <- exp(k * (1+obj$intercept)/n) + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(obj$y^2)) + 1) + k * (1 + obj$intercept) + } + AICs <- c(aic.begin, obj$AIC) + + ulist <- c(list(matrix(0, n, 1)), obj$maxprojs) + penlist <- c(pen0, obj$maxpens) + zlist <- vector("list", maxsteps+1) + zlist[[1]] <- zlist[[2]] <- Z + if (type == "TC") { + etalist <- vector("list", maxsteps+1) + etalist[[1]] <- etalist[[2]] <- eta + } else { + vdlist <- v2list <- vector("list", maxsteps+1) + vdlist[[1]] <- vdlist[[2]] <- Vdelta + v2list[[1]] <- v2list[[2]] <- V2 } for (step in 2:maxsteps) { cproj <- obj$cumprojs[[step-1]] - zlist[[step]] <- cproj %*% Z + zlist[[step+1]] <- cproj %*% Z if (type == "TC") { - etalist[[step]] <- cproj %*% eta + etalist[[step+1]] <- cproj %*% eta } else { - vdlist[[step]] <- cproj %*% Vdelta - v2list[[step]] <- cproj %*% V2 + vdlist[[step+1]] <- cproj %*% Vdelta + v2list[[step+1]] <- cproj %*% V2 } } - for (step in 1:(maxsteps-1)) { - # Compare AIC at s-1 to AIC at s + for (step in 1:maxsteps) { + # Compare AIC at s+1 to AIC at s # sp indexes step with larger AIC if (AICs[step] >= AICs[step+1]) { sp <- step From 202acbcef75a3bd271113a4c0639d4376fc9fdd1 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 10 Nov 2015 15:47:52 -0800 Subject: [PATCH 049/396] Still debugging F with AICstop --- forLater/josh/funs.sims.R | 2 +- forLater/josh/sim.aicstop.R | 11 +++++------ selectiveInference/R/funs.groupfs.R | 18 +++++++++++------- selectiveInference/R/funs.quadratic.R | 2 ++ 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/forLater/josh/funs.sims.R b/forLater/josh/funs.sims.R index f07fa551..53af73f7 100644 --- a/forLater/josh/funs.sims.R +++ b/forLater/josh/funs.sims.R @@ -38,7 +38,7 @@ randomGaussianFixedP <- function(n, p, G = p, sparsity = 0, snr = 0, sigma = 1, if (sparsity > 0 && snr > 0) { for (j in 1:sparsity) { inds <- which(index == j) - beta[inds] <- sqrt(2*log(G)) * sample(c(-1,1), length(inds), replace=T) * snr/sqrt(n*length(inds)) + beta[inds] <- snr * sqrt(2*log(G)/(n*length(inds))) * sample(c(-1,1), length(inds), replace=T) } } y <- x %*% beta + sigma * rnorm(n) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 67d77205..0fd2d26b 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -10,10 +10,10 @@ niters <- 200 n <- 100 p <- 100 G <- 50 -maxsteps <- 15 -sparsity <- 5 +maxsteps <- 10 +sparsity <- 3 snr <- 2 -rho <- 0.1 +rho <- 0 aicstop <- 1 instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { @@ -21,8 +21,7 @@ instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { x <- simd$x y <- simd$y index <- simd$index - - fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop, verbose=T) + fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop) pvals <- groupfsInf(fit, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) } @@ -40,6 +39,6 @@ save(pvals, vars, stopped, file = paste0( "_p", p, "_sparsity", sparsity, "_snr", snr, - "_F_rhopt1.RData")) + "_F_rho0.RData")) print(time) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 1fecb69f..521ed579 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -278,6 +278,10 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } else { if (is.null(obj$sigma)) { type <- "TF" + Pf <- svdu_thresh(obj$x[,which(obj$index %in% obj$action), drop = FALSE]) + dffull <- ncol(Pf) + df2 <- n - dffull - obj$intercept - 1 + Pfull <- Pf %*% t(Pf) ## if (n >= 2*p) { ## sigma <- sqrt(sum(lsfit(obj$x, obj$y, intercept = obj$intercept)$res^2)/(n-p-obj$intercept)) ## } else { @@ -290,7 +294,6 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } } - # Compute p-value for each active group for (j in 1:maxsteps) { i <- obj$action[j] @@ -325,20 +328,19 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { } else { - Pfull <- svdu_thresh(obj$x[,which(obj$index %in% obj$action), drop = FALSE]) if (length(obj$action) > 1) { minus_i <- setdiff(obj$action, i) Psub <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) Z <- Psub %*% t(Psub) %*% obj$y - df1 <- ncol(Pfull) - ncol(Psub) + df1 <- dffull - ncol(Psub) } else { Z <- 0 - df1 <- ncol(Pfull) + df1 <- dffull + obj$intercept + 1 } - df2 <- n - ncol(Pfull) + C <- df1/df2 R1 <- obj$y - Z - R2 <- obj$y - Pfull %*% t(Pfull) %*% obj$y + R2 <- obj$y - Pfull %*% obj$y R1sq <- sum(R1^2) R2sq <- sum(R2^2) R <- sqrt(R1sq) @@ -500,6 +502,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { out$sigma <- sigma } else { out$TF <- Tstats + out$df2 <- df2 } out$df <- dfs out$support <- supports @@ -604,7 +607,7 @@ TF_surv <- function(TF, df1, df2, E) { value <- numer/denom # Force p-value to lie in the [0,1] interval # in case of numerical issues - value <- max(0, min(1, value)) + #value <- max(0, min(1, value)) value } @@ -623,6 +626,7 @@ TF_interval <- function(lower, upper, df1, df2) { } num_int_F <- function(a, b, df1, df2, nsamp = 10000) { + print("numerical!") grid <- seq(from=a, to=b, length.out=nsamp) integrand <- df(grid, df1, df2) return((b-a)*mean(integrand)) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 5b59fcbc..0e8ebcf7 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -100,6 +100,8 @@ quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh } quadratic_roots <- function(A, B, C, tol) { + + print("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC") disc <- B^2 - 4*A*C b2a <- -B/(2*A) From 60fd5efdfc8a6f6bc34d540a021c1b68e32a5d83 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 12 Nov 2015 16:13:02 -0800 Subject: [PATCH 050/396] cproj index was off, still no cigar --- forLater/josh/sim.aicstop.R | 4 ++-- selectiveInference/R/funs.groupfs.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 0fd2d26b..68934ace 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -13,7 +13,7 @@ G <- 50 maxsteps <- 10 sparsity <- 3 snr <- 2 -rho <- 0 +rho <- 0.1 aicstop <- 1 instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { @@ -39,6 +39,6 @@ save(pvals, vars, stopped, file = paste0( "_p", p, "_sparsity", sparsity, "_snr", snr, - "_F_rho0.RData")) + "_F_rhopt1.RData")) print(time) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 521ed579..c81645b9 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -419,7 +419,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { v2list[[1]] <- v2list[[2]] <- V2 } for (step in 2:maxsteps) { - cproj <- obj$cumprojs[[step-1]] + cproj <- obj$cumprojs[[step]] zlist[[step+1]] <- cproj %*% Z if (type == "TC") { etalist[[step+1]] <- cproj %*% eta From 550baf4e3fe2eb43b1bbed5337ee855b2907b782 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Mon, 23 Nov 2015 21:47:11 -0500 Subject: [PATCH 051/396] Fixed very silly bug in fixedLassoInf, that pops up when you don't standardize X manually. --- selectiveInference/R/funs.fixed.R | 2 +- tests/test.fixed.R | 34 ++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index d3760907..b7f20fd4 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -103,7 +103,7 @@ fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha= sign[j] = sign(sum(vj*y)) vj = sign[j] * vj a = poly.pval(y,G,u,vj,sigma,bits) - pv[j] = a$pv * mj # Unstandardize (mult by norm of vj) + pv[j] = a$pv vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) vmat[j,] = vj * mj # Unstandardize (mult by norm of vj) diff --git a/tests/test.fixed.R b/tests/test.fixed.R index 624fb65d..553fe35d 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -1,10 +1,38 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) +#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") #options(error=dump.frames) #attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") -c +##### +n=50 +p=10 +sigma=.7 +beta=c(3,2,0,0,rep(0,p-4)) +set.seed(43) +nsim = 100 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (i in 1:nsim) { +y=mu+sigma*rnorm(n) +#y=y-mean(y) +# first run glmnet +gfit=glmnet(x,y,intercept=F,standardize=F,thresh=1e-8) +lambda = 1 +#extract coef for a given lambda; Note the 1/n factor! +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +# compute fixed lambda p-values and selection intervals +aa = fixedLassoInf(x,y,beta,lambda,intercept=F,sigma=sigma) +pvals[i, which(beta != 0)] <- aa$pv +} +nulls = which(!is.na(pvals[,1]) & !is.na(pvals[,2])) +np = pvals[nulls,-(1:2)] +mean(np[!is.na(np)] < 0.1) + +##### + a=lar(x,y) aa=larInf(a) From 96113963e891ead82a131fc42c344488fb7a1f39 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 12:10:38 -0800 Subject: [PATCH 052/396] Trying to fix aicstop --- forLater/josh/sim.aicstop.R | 3 +- selectiveInference/R/funs.groupfs.R | 43 ++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 68934ace..e55ce929 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -20,8 +20,9 @@ instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma = 1, rho) x <- simd$x y <- simd$y + y <- y - mean(y) index <- simd$index - fit <- groupfs(x, y, index, maxsteps, k = log(n), aicstop = aicstop) + fit <- groupfs(x, y, index, maxsteps, intercept = F, k = log(n), aicstop = aicstop, verbose = T) pvals <- groupfsInf(fit, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) } diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index c81645b9..b26f9857 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -68,7 +68,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE modelrank <- as.numeric(intercept) if (is.null(sigma)) { modelrank <- modelrank + 1 - aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2)) + 1) + k * modelrank # fixed... again + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2))) + k * (n + modelrank) } else { aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * modelrank } @@ -101,7 +101,7 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE # Compute AIC if (is.null(sigma)) { - added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank + added$AIC <- n * log(added$maxterm/n) - k * added$df + n*log(2*pi) + k * (n + modelrank) } else { added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank } @@ -238,8 +238,8 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. #' #' @param obj Object returned by \code{\link{groupfs}} function -#' @param sigma Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate. -#' @param verbose Print out progress along the way? Default is FALSE. +#' @param sigma Estimate of error standard deviation. If NULL (default), p-values will be computed from a selective F test. +#' @param verbose Print out progress along the way? Default is TRUE. #' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. #' #' \describe{ @@ -250,7 +250,7 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' \item{df}{Rank of group of variables when it was added to the model.} #' \item{support}{List of intervals defining the truncation region of the truncated chi.} #' } -groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { +groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { if (!is.null(obj$cvobj) && attr(obj, "stopped")) { stop("Cross-validation and early stopping cannot be used simultaneously.") @@ -402,7 +402,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { aic.begin <- aic.last <- sum(obj$y^2)/sigma^2 - n + k * obj$intercept } else { pen0 <- exp(k * (1+obj$intercept)/n) - aic.begin <- aic.last <- n*(log(2*pi) + log(mean(obj$y^2)) + 1) + k * (1 + obj$intercept) + aic.begin <- n*(log(2*pi) + log(mean(obj$y^2))) + k * (1 + n + obj$intercept) } AICs <- c(aic.begin, obj$AIC) @@ -418,7 +418,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { vdlist[[1]] <- vdlist[[2]] <- Vdelta v2list[[1]] <- v2list[[2]] <- V2 } - for (step in 2:maxsteps) { + for (step in 1:maxsteps) { cproj <- obj$cumprojs[[step]] zlist[[step+1]] <- cproj %*% Z if (type == "TC") { @@ -435,31 +435,48 @@ groupfsInf <- function(obj, sigma = NULL, verbose = FALSE) { if (AICs[step] >= AICs[step+1]) { sp <- step s <- step+1 + peng <- 1 + penh <- penlist[[s]] } else { sp <- step+1 s <- step + peng <- penlist[[sp]] + penh <- 1 } if (type == "TC") { Ug <- ulist[[s]] Uh <- ulist[[sp]] - # Check this: known sigma has *additive* pen terms - peng <- 1 - penh <- penlist[sp] - #################################### + # Fix this: known sigma has *additive* pen terms + #################################### coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) intstep <- quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) } else { + # g indexes the lower AIC Ug <- ulist[[s]] Uh <- ulist[[sp]] - peng <- 1 #penlist[[s]] - penh <- penlist[[sp]] Vdg <- vdlist[[s]] Vdh <- vdlist[[sp]] V2g <- v2list[[s]] V2h <- v2list[[sp]] Zg <- zlist[[s]] Zh <- zlist[[sp]] + + g1 <- function(t) sqrt(C*t/(1+C*t)) + g2 <- function(t) 1/sqrt(1+C*t) + Yg <- Zg + R*(g1(TF)*Vdg + g2(TF)*V2g) + UYg <- t(Ug) %*% Yg + sse <- (sum(Yg^2) - sum(UYg^2)) + AICg <- n*log(sse/n) + n*log(2*pi) + k * (1 + n + 5) + Yh <- Zh + R*(g1(TF)*Vdh + g2(TF)*V2h) + UYh <- t(Uh) %*% Yh + sse <- (sum(Yh^2) - sum(UYh^2)) + AICh <- n*log(sse/n) + n*log(2*pi) + k * (1 + n + 6) + # This does reproduce the correct AIC values + # AIC(g) < AIC(h) + # <-> log(sseG) + (k/n) * ncol(Ug) < log(sseH) + + # Infeasible constraint if penh = 1, why? coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) intstep <- TF_roots(R, C, coeffs) } From 787b6bb5e5eaf28233186b2ea93b934aa1ec4ed4 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 12:12:31 -0800 Subject: [PATCH 053/396] Simulation for fixed maxsteps --- forLater/josh/sim.groupfs.R | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 forLater/josh/sim.groupfs.R diff --git a/forLater/josh/sim.groupfs.R b/forLater/josh/sim.groupfs.R new file mode 100644 index 00000000..b0e891f5 --- /dev/null +++ b/forLater/josh/sim.groupfs.R @@ -0,0 +1,42 @@ +library(intervals) +source("funs.sims.R") +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 200 +n <- 100 +p <- 400 +G <- 200 +maxsteps <- 5 +sparsity <- 3 +snr <- 2 +rho <- .1 + +instance <- function(n, p, G, sparsity, snr, rho, maxsteps) { + simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma = 1, rho) + x <- simd$x + y <- simd$y + index <- simd$index + fit <- groupfs(x, y, index, maxsteps, k = log(n)) + pvals <- groupfsInf(fit, verbose=T) + return(list(variable = fit$action, pvals = pvals$pv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, G, sparsity, snr, rho, maxsteps)) +}) + +pvals <- do.call(c, list(output[2,])) +vars <- do.call(c, list(output[1,])) + +save(pvals, vars, file = paste0( + "results_n", n, + "_p", p, + "_sparsity", sparsity, + "_snr", snr, + "_F.RData")) + +print(time) From 99a3fc1f52ee0f0010243885417f160e1e5205b4 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 12:29:56 -0800 Subject: [PATCH 054/396] Setting up simulations for some final tests --- forLater/josh/sim.aicstop.R | 32 +++++++++++++++++---------- forLater/josh/sim.groupfs.R | 31 +++++++++++++++++--------- selectiveInference/R/funs.quadratic.R | 2 +- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index e55ce929..22f6665d 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -1,28 +1,32 @@ library(intervals) source("funs.sims.R") -source("selectiveInference/R/cv.R") +#source("selectiveInference/R/cv.R") source("../../selectiveInference/R/funs.groupfs.R") source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 200 -n <- 100 -p <- 100 -G <- 50 +known <- FALSE +niters <- 300 +n <- 50 +p <- 150 +G <- 75 maxsteps <- 10 -sparsity <- 3 +sparsity <- 4 snr <- 2 -rho <- 0.1 +rho <- 0 aicstop <- 1 instance <- function(n, p, G, sparsity, snr, rho, maxsteps, aicstop) { simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma = 1, rho) x <- simd$x y <- simd$y - y <- y - mean(y) index <- simd$index - fit <- groupfs(x, y, index, maxsteps, intercept = F, k = log(n), aicstop = aicstop, verbose = T) + if (known) { + fit <- groupfs(x, y, index, maxsteps, sigma = 1, k = 2*log(G), aicstop = aicstop, verbose = T) + } else { + fit <- groupfs(x, y, index, maxsteps, k = 2*log(G), aicstop = aicstop, verbose = T) + } pvals <- groupfsInf(fit, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, stopped = attr(fit, "stopped"))) } @@ -36,10 +40,14 @@ pvals <- do.call(c, list(output[2,])) vars <- do.call(c, list(output[1,])) save(pvals, vars, stopped, file = paste0( - "results_aic", aicstop, "_n", n, + "results_aic_n", n, "_p", p, + "_g", G, + "_rho", gsub(pattern = ".", replacement = "", x = rho, fixed = T), + "_maxsteps", maxsteps, "_sparsity", sparsity, - "_snr", snr, - "_F_rhopt1.RData")) + "_snr", round(snr), + "_known", known, + ".RData")) print(time) diff --git a/forLater/josh/sim.groupfs.R b/forLater/josh/sim.groupfs.R index b0e891f5..9e606319 100644 --- a/forLater/josh/sim.groupfs.R +++ b/forLater/josh/sim.groupfs.R @@ -1,26 +1,31 @@ library(intervals) source("funs.sims.R") -source("selectiveInference/R/cv.R") +#source("selectiveInference/R/cv.R") source("../../selectiveInference/R/funs.groupfs.R") source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 200 -n <- 100 -p <- 400 -G <- 200 -maxsteps <- 5 -sparsity <- 3 +known <- TRUE +niters <- 300 +n <- 50 +p <- 150 +G <- 75 +maxsteps <- 8 +sparsity <- 4 snr <- 2 -rho <- .1 +rho <- 0 instance <- function(n, p, G, sparsity, snr, rho, maxsteps) { simd <- randomGaussianFixedP(n, p, G, sparsity, snr, sigma = 1, rho) x <- simd$x y <- simd$y index <- simd$index - fit <- groupfs(x, y, index, maxsteps, k = log(n)) + if (known) { + fit <- groupfs(x, y, index, maxsteps, sigma = 1, k = log(n)) + } else { + fit <- groupfs(x, y, index, maxsteps, k = log(n)) + } pvals <- groupfsInf(fit, verbose=T) return(list(variable = fit$action, pvals = pvals$pv)) } @@ -35,8 +40,12 @@ vars <- do.call(c, list(output[1,])) save(pvals, vars, file = paste0( "results_n", n, "_p", p, + "_g", G, + "_rho", gsub(pattern = ".", replacement = "", x = rho, fixed = T), + "_maxsteps", maxsteps, "_sparsity", sparsity, - "_snr", snr, - "_F.RData")) + "_snr", round(snr), + "_known", known, + ".RData")) print(time) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 0e8ebcf7..f8134126 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -101,7 +101,7 @@ quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh quadratic_roots <- function(A, B, C, tol) { - print("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC") +# print("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC") disc <- B^2 - 4*A*C b2a <- -B/(2*A) From 6a9eca8ac36f716f338055d70ebd278da24002c8 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 15:10:49 -0800 Subject: [PATCH 055/396] Still fixing aicstop --- selectiveInference/R/funs.groupfs.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index b26f9857..84055aa7 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -418,14 +418,14 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { vdlist[[1]] <- vdlist[[2]] <- Vdelta v2list[[1]] <- v2list[[2]] <- V2 } - for (step in 1:maxsteps) { + for (step in 1:(maxsteps-1)) { cproj <- obj$cumprojs[[step]] - zlist[[step+1]] <- cproj %*% Z + zlist[[step+2]] <- cproj %*% Z if (type == "TC") { - etalist[[step+1]] <- cproj %*% eta + etalist[[step+2]] <- cproj %*% eta } else { - vdlist[[step+1]] <- cproj %*% Vdelta - v2list[[step+1]] <- cproj %*% V2 + vdlist[[step+2]] <- cproj %*% Vdelta + v2list[[step+2]] <- cproj %*% V2 } } @@ -440,8 +440,8 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { } else { sp <- step+1 s <- step - peng <- penlist[[sp]] - penh <- 1 + peng <- 1 + penh <- penlist[[sp]] } if (type == "TC") { @@ -467,11 +467,11 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { Yg <- Zg + R*(g1(TF)*Vdg + g2(TF)*V2g) UYg <- t(Ug) %*% Yg sse <- (sum(Yg^2) - sum(UYg^2)) - AICg <- n*log(sse/n) + n*log(2*pi) + k * (1 + n + 5) + AICg <- n*log(sse/n) + n*log(2*pi) + k * (2 + n + 6) Yh <- Zh + R*(g1(TF)*Vdh + g2(TF)*V2h) UYh <- t(Uh) %*% Yh sse <- (sum(Yh^2) - sum(UYh^2)) - AICh <- n*log(sse/n) + n*log(2*pi) + k * (1 + n + 6) + AICh <- n*log(sse/n) + n*log(2*pi) + k * (2 + n + 7) # This does reproduce the correct AIC values # AIC(g) < AIC(h) # <-> log(sseG) + (k/n) * ncol(Ug) < log(sseH) From 47420c564fe8e7ef1345542684ba8f4631174231 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 15:23:03 -0800 Subject: [PATCH 056/396] Fixed error related to models with only one variable --- forLater/josh/sim.aicstop.R | 2 +- selectiveInference/R/funs.groupfs.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 22f6665d..f2aa653a 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -7,7 +7,7 @@ source("../../selectiveInference/R/funs.common.R") set.seed(1) known <- FALSE -niters <- 300 +niters <- 200 n <- 50 p <- 150 G <- 75 diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 84055aa7..866d3674 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -334,7 +334,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { Z <- Psub %*% t(Psub) %*% obj$y df1 <- dffull - ncol(Psub) } else { - Z <- 0 + Z <- rep(0, n) df1 <- dffull + obj$intercept + 1 } From b610d03f7f7780ca431402aa3cef4c8080cdfeac Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 15:26:16 -0800 Subject: [PATCH 057/396] Fixed another error related to models with only one variable --- selectiveInference/R/funs.groupfs.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 866d3674..96ceacb6 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -418,14 +418,16 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { vdlist[[1]] <- vdlist[[2]] <- Vdelta v2list[[1]] <- v2list[[2]] <- V2 } - for (step in 1:(maxsteps-1)) { - cproj <- obj$cumprojs[[step]] - zlist[[step+2]] <- cproj %*% Z - if (type == "TC") { - etalist[[step+2]] <- cproj %*% eta - } else { - vdlist[[step+2]] <- cproj %*% Vdelta - v2list[[step+2]] <- cproj %*% V2 + if (maxsteps > 1) { + for (step in 1:(maxsteps-1)) { + cproj <- obj$cumprojs[[step]] + zlist[[step+2]] <- cproj %*% Z + if (type == "TC") { + etalist[[step+2]] <- cproj %*% eta + } else { + vdlist[[step+2]] <- cproj %*% Vdelta + v2list[[step+2]] <- cproj %*% V2 + } } } From fe44115408ae190d7665aeb27493abae758dc714 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 16:12:55 -0800 Subject: [PATCH 058/396] Working on AICstop for known sigma --- selectiveInference/R/funs.groupfs.R | 69 +++++++++++------------------ 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 96ceacb6..8fc6bb77 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -433,53 +433,38 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { for (step in 1:maxsteps) { # Compare AIC at s+1 to AIC at s - # sp indexes step with larger AIC - if (AICs[step] >= AICs[step+1]) { - sp <- step - s <- step+1 - peng <- 1 - penh <- penlist[[s]] - } else { - sp <- step+1 - s <- step - peng <- 1 - penh <- penlist[[sp]] - } - + # roots() functions assume g indexes smaller AIC + # this is step+1 until the last step + peng <- penlist[[step+1]] + Ug <- ulist[[step+1]] + Uh <- ulist[[step]] + Zg <- zlist[[step+1]] + Zh <- zlist[[step]] + if (type == "TC") { - Ug <- ulist[[s]] - Uh <- ulist[[sp]] - # Fix this: known sigma has *additive* pen terms - #################################### + penh <- 0 + etag <- etalist[[s]] + etah <- etalist[[sp]] coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + intstep <- quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + } else { - # g indexes the lower AIC - Ug <- ulist[[s]] - Uh <- ulist[[sp]] - Vdg <- vdlist[[s]] - Vdh <- vdlist[[sp]] - V2g <- v2list[[s]] - V2h <- v2list[[sp]] - Zg <- zlist[[s]] - Zh <- zlist[[sp]] - - g1 <- function(t) sqrt(C*t/(1+C*t)) - g2 <- function(t) 1/sqrt(1+C*t) - Yg <- Zg + R*(g1(TF)*Vdg + g2(TF)*V2g) - UYg <- t(Ug) %*% Yg - sse <- (sum(Yg^2) - sum(UYg^2)) - AICg <- n*log(sse/n) + n*log(2*pi) + k * (2 + n + 6) - Yh <- Zh + R*(g1(TF)*Vdh + g2(TF)*V2h) - UYh <- t(Uh) %*% Yh - sse <- (sum(Yh^2) - sum(UYh^2)) - AICh <- n*log(sse/n) + n*log(2*pi) + k * (2 + n + 7) - # This does reproduce the correct AIC values - # AIC(g) < AIC(h) - # <-> log(sseG) + (k/n) * ncol(Ug) < log(sseH) - - # Infeasible constraint if penh = 1, why? + penh <- 1 + Vdg <- vdlist[[step+1]] + Vdh <- vdlist[[step]] + V2g <- v2list[[step+1]] + V2h <- v2list[[step]] coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + intstep <- TF_roots(R, C, coeffs) } From 8ce41f1d25de5211bedef4093756ed27c306a20c Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 16:16:24 -0800 Subject: [PATCH 059/396] Still fixing known sigma AICstop --- selectiveInference/R/funs.groupfs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 8fc6bb77..004ba649 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -443,8 +443,8 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { if (type == "TC") { penh <- 0 - etag <- etalist[[s]] - etah <- etalist[[sp]] + etag <- etalist[[step+1]] + etah <- etalist[[step]] coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) if (AICs[step] < AICs[step+1]) { From 5ed581f4d41e7195154f57d09d039f618daf0fd5 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 16:40:51 -0800 Subject: [PATCH 060/396] Documentation update --- selectiveInference/man/factorDesign.Rd | 3 ++- selectiveInference/man/groupfs.Rd | 3 ++- selectiveInference/man/groupfsInf.Rd | 14 +++++--------- selectiveInference/man/predict.groupfs.Rd | 3 ++- selectiveInference/man/scaleGroups.Rd | 3 ++- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/selectiveInference/man/factorDesign.Rd b/selectiveInference/man/factorDesign.Rd index 3a42e2ea..d5d2576d 100644 --- a/selectiveInference/man/factorDesign.Rd +++ b/selectiveInference/man/factorDesign.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{factorDesign} \alias{factorDesign} \title{Expand a data frame with factors to form a design matrix with the full binary encoding of each factor.} diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index 78cd32f2..1ea9dcda 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{groupfs} \alias{groupfs} \title{Select a model with forward stepwise.} diff --git a/selectiveInference/man/groupfsInf.Rd b/selectiveInference/man/groupfsInf.Rd index 7c7190fa..42fcbe3a 100644 --- a/selectiveInference/man/groupfsInf.Rd +++ b/selectiveInference/man/groupfsInf.Rd @@ -1,21 +1,17 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{groupfsInf} \alias{groupfsInf} \title{Compute selective p-values for a model fitted by \code{groupfs}.} \usage{ -groupfsInf(obj, sigma = NULL, type = c("all", "aic"), ntimes = 2, - verbose = FALSE) +groupfsInf(obj, sigma = NULL, verbose = TRUE) } \arguments{ \item{obj}{Object returned by \code{\link{groupfs}} function} -\item{sigma}{Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and the mean squared residual of the selected model when n < 2p. In the latter case, the user should use \code{\link{estimateSigma}} function for a more accurate estimate.} +\item{sigma}{Estimate of error standard deviation. If NULL (default), p-values will be computed from a selective F test.} -\item{type}{Type of conditional p-values to compute. With "all" (default), p-values are computed conditional on the final model with all variables up to \code{maxsteps}; with "aic" the number of steps is chosen after which the AIC criterion increases \code{ntimes} in a row, and then the same type of analysis as in "all" is carried out for the active variables at that number of steps.} - -\item{ntimes}{Number of steps for which AIC criterion has to increase before minimizing point is declared.} - -\item{verbose}{Print out progress along the way? Default is FALSE.} +\item{verbose}{Print out progress along the way? Default is TRUE.} } \value{ An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. diff --git a/selectiveInference/man/predict.groupfs.Rd b/selectiveInference/man/predict.groupfs.Rd index 4a4c1497..14af2113 100644 --- a/selectiveInference/man/predict.groupfs.Rd +++ b/selectiveInference/man/predict.groupfs.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{predict.groupfs} \alias{predict.groupfs} \title{Prediction and coefficient functions for \code{\link{groupfs}}. diff --git a/selectiveInference/man/scaleGroups.Rd b/selectiveInference/man/scaleGroups.Rd index 1e7e1c4a..e5a93fab 100644 --- a/selectiveInference/man/scaleGroups.Rd +++ b/selectiveInference/man/scaleGroups.Rd @@ -1,4 +1,5 @@ -% Generated by roxygen2 (4.0.1.99): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R \name{scaleGroups} \alias{scaleGroups} \title{Center and scale design matrix by groups} From f2c86a847ff712e7bc8a863a3d702d595f6be63b Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 17:15:03 -0800 Subject: [PATCH 061/396] More documenting --- selectiveInference/R/funs.groupfs.R | 26 ++++++++-------------- selectiveInference/R/funs.quadratic.R | 32 --------------------------- selectiveInference/man/groupfs.Rd | 8 +++---- selectiveInference/man/groupfsInf.Rd | 8 +++---- 4 files changed, 17 insertions(+), 57 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 004ba649..00ee26a6 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -4,11 +4,11 @@ #' #' @param x Matrix of predictors (n by p). #' @param y Vector of outcomes (length n). -#' @param index Group membership indicator of length p. +#' @param index Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups. #' @param maxsteps Maximum number of steps for forward stepwise. -#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link[stats]{extractAIC}} for details. -#' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. -#' @param intercept Should an intercept be included in the model? Default is TRUE. +#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion. +#' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}. +#' @param intercept Should an intercept be included in the model? Default is TRUE. Does not count as a step. #' @param center Should the columns of the design matrix be centered? Default is TRUE. #' @param normalize Should the design matrix be normalized? Default is TRUE. #' @param aicstop Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}. @@ -235,10 +235,10 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' Compute selective p-values for a model fitted by \code{groupfs}. #' -#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. +#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). #' #' @param obj Object returned by \code{\link{groupfs}} function -#' @param sigma Estimate of error standard deviation. If NULL (default), p-values will be computed from a selective F test. +#' @param sigma Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}. #' @param verbose Print out progress along the way? Default is TRUE. #' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. #' @@ -246,9 +246,9 @@ add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { #' \item{vars}{Labels of the active groups in the order they were included.} #' \item{pv}{Selective p-values computed from appropriate truncated distributions.} #' \item{sigma}{Estimate of error variance used in computing p-values.} -#' \item{TC}{Observed value of truncated chi.} +#' \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} #' \item{df}{Rank of group of variables when it was added to the model.} -#' \item{support}{List of intervals defining the truncation region of the truncated chi.} +#' \item{support}{List of intervals defining the truncation region of the corresponding statistic.} #' } groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { @@ -282,12 +282,6 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { dffull <- ncol(Pf) df2 <- n - dffull - obj$intercept - 1 Pfull <- Pf %*% t(Pf) - ## if (n >= 2*p) { - ## sigma <- sqrt(sum(lsfit(obj$x, obj$y, intercept = obj$intercept)$res^2)/(n-p-obj$intercept)) - ## } else { - ## sigma = sqrt(obj$log$RSS[length(obj$log$RSS)]/(n-Ep-obj$intercept)) - ## warning(paste(sprintf("p > n/2, and sigmahat = %0.3f used as an estimate of sigma;",sigma), "you may want to use the estimateSigma function")) - ## } } else { type <- "TC" sigma <- obj$sigma @@ -381,7 +375,6 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { zcvquad <- t(Z) %*% cvquad vdcvquad <- t(Vdelta) %*% cvquad v2cvquad <- t(V2) %*% cvquad - # (r*(vd*g1 + v2*g2) + z)^T cvquad (r*(vd*g1 + v2*g2) + z) x0 <- zcvquad %*% Z x1 <- 2*R*zcvquad %*% Vd x2 <- 2*R*zcvquad %*% V2 @@ -630,7 +623,6 @@ TF_interval <- function(lower, upper, df1, df2) { } num_int_F <- function(a, b, df1, df2, nsamp = 10000) { - print("numerical!") grid <- seq(from=a, to=b, length.out=nsamp) integrand <- df(grid, df1, df2) return((b-a)*mean(integrand)) @@ -767,7 +759,7 @@ predict.groupfs <- function(object, newx, ...) { if (missing(newx)) { newx = object$x } else { - newx <- scaleGroups(newx, object$index, attr(object, "center"), attr(object, "normalize")) + newx <- scaleGroups(newx, object$index, attr(object, "center"), attr(object, "normalize"))$x } return(newx[, index %in% object$action] %*% beta + ifelse(object$intercept, object$by, 0)) } diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index f8134126..003adc6b 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -49,26 +49,8 @@ truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) } else { - - # Debugging - ## Q <- peng * (diag(rep(1,n)) - Ug %*% t(Ug)) - penh * (diag(rep(1,n)) - Uh %*% t(Uh)) - ## g1 <- function(t) sqrt(C*t/(1+C*t)) - ## g2 <- function(t) 1/sqrt(1+C*t) - ## Y <- function(t) { - ## Zs + R * (Vds*g1(t) + V2s*g2(t)) - ## } - coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) roots <- TF_roots(R, C, coeffs) - - if (is.null(roots)) print(c(s,l)) - ## print(do.call(rbind, lapply(roots, function(r) { - ## c(r, - ## t(Y(r-0.000001)) %*% Q %*% Y(r-0.000001), - ## t(Y(r)) %*% Q %*% Y(r), - ## t(Y(r+0.000001)) %*% Q %*% Y(r+0.000001)) - ## }))) - return(roots) } }) @@ -100,8 +82,6 @@ quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh } quadratic_roots <- function(A, B, C, tol) { - -# print("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC") disc <- B^2 - 4*A*C b2a <- -B/(2*A) @@ -206,17 +186,6 @@ TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { x2 <- coeffs$x2 x0 <- coeffs$x0 -## # Handle some special cases -## if ((x11 == 0) && (max(abs(c(x22, x12, x1, x2))) < tol2)) { -## # print("Special case 1") -## return(Intervals(c(-Inf, 0))) -## } - -## if ((x22 == 0) && (max(abs(c(x2, x12))) < tol2)) { -## x2 <- 0 -## x12 <- 0 -## } - g1 <- function(t) sqrt(C*t/(1+C*t)) g2 <- function(t) 1/sqrt(1+C*t) I <- function(t) x11*g1(t)^2 + x12*g1(t)*g2(t) + x22*g2(t)^2 + x1*g1(t) + x2*g2(t) + x0 @@ -278,4 +247,3 @@ TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { if (I(0) < 0) stop("Infeasible constraint!") return(Intervals(c(-Inf,0))) } - diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index 1ea9dcda..a57c6dc8 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -12,15 +12,15 @@ groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, \item{y}{Vector of outcomes (length n).} -\item{index}{Group membership indicator of length p.} +\item{index}{Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups.} \item{maxsteps}{Maximum number of steps for forward stepwise.} -\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. See \code{\link[stats]{extractAIC}} for details.} +\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion.} -\item{k}{Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC.} +\item{k}{Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}.} -\item{intercept}{Should an intercept be included in the model? Default is TRUE.} +\item{intercept}{Should an intercept be included in the model? Default is TRUE. Does not count as a step.} \item{center}{Should the columns of the design matrix be centered? Default is TRUE.} diff --git a/selectiveInference/man/groupfsInf.Rd b/selectiveInference/man/groupfsInf.Rd index 42fcbe3a..74b9a5e0 100644 --- a/selectiveInference/man/groupfsInf.Rd +++ b/selectiveInference/man/groupfsInf.Rd @@ -9,7 +9,7 @@ groupfsInf(obj, sigma = NULL, verbose = TRUE) \arguments{ \item{obj}{Object returned by \code{\link{groupfs}} function} -\item{sigma}{Estimate of error standard deviation. If NULL (default), p-values will be computed from a selective F test.} +\item{sigma}{Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}.} \item{verbose}{Print out progress along the way? Default is TRUE.} } @@ -20,12 +20,12 @@ An object of class "groupfsInf" containing selective p-values for the fitted mod \item{vars}{Labels of the active groups in the order they were included.} \item{pv}{Selective p-values computed from appropriate truncated distributions.} \item{sigma}{Estimate of error variance used in computing p-values.} - \item{TC}{Observed value of truncated chi.} + \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} \item{df}{Rank of group of variables when it was added to the model.} - \item{support}{List of intervals defining the truncation region of the truncated chi.} + \item{support}{List of intervals defining the truncation region of the corresponding statistic.} } } \description{ -Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. Details are provided in a forthcoming work. +Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). } From a8a5d758749fd61bcf5f945a18c65f727d08f3ab Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 25 Nov 2015 17:51:34 -0800 Subject: [PATCH 062/396] Documentation fix --- selectiveInference/R/funs.groupfs.R | 8 ++++---- selectiveInference/man/predict.groupfs.Rd | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 00ee26a6..20ef2654 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -376,10 +376,10 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { vdcvquad <- t(Vdelta) %*% cvquad v2cvquad <- t(V2) %*% cvquad x0 <- zcvquad %*% Z - x1 <- 2*R*zcvquad %*% Vd + x1 <- 2*R*zcvquad %*% Vdelta x2 <- 2*R*zcvquad %*% V2 x12 <- 2*R*vdcvquad %*% V2 - x11 <- R^2*vdcvquad %*% Vd + x11 <- R^2*vdcvquad %*% Vdelta x22 <- R^2*v2cvquad %*% V2 TF_roots(R, C, coeffs = list(x0=x0, x1=x1, x2=x2, x12=x12, x11=x11, x22=x22)) } @@ -754,14 +754,14 @@ coef.groupfs <- function(object, ...) { #' @param object Object returned by a call to \code{\link{groupfs}}. #' @param newx Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used. #' @return A vector of predictions or a vector of coefficients. -predict.groupfs <- function(object, newx, ...) { +predict.groupfs <- function(object, newx) { beta <- coef.groupfs(object) if (missing(newx)) { newx = object$x } else { newx <- scaleGroups(newx, object$index, attr(object, "center"), attr(object, "normalize"))$x } - return(newx[, index %in% object$action] %*% beta + ifelse(object$intercept, object$by, 0)) + return(newx[, object$index %in% object$action] %*% beta + ifelse(object$intercept, object$by, 0)) } print.groupfsInf <- function(x, ...) { diff --git a/selectiveInference/man/predict.groupfs.Rd b/selectiveInference/man/predict.groupfs.Rd index 14af2113..4a382c7e 100644 --- a/selectiveInference/man/predict.groupfs.Rd +++ b/selectiveInference/man/predict.groupfs.Rd @@ -6,7 +6,7 @@ Make predictions or extract coefficients from a groupfs forward stepwise object.} \usage{ -\method{predict}{groupfs}(object, newx, ...) +\method{predict}{groupfs}(object, newx) } \arguments{ \item{object}{Object returned by a call to \code{\link{groupfs}}.} From b47acd5ca04cbadb0950472a2f96b919df410a25 Mon Sep 17 00:00:00 2001 From: tibs Date: Wed, 25 Nov 2015 18:04:32 -0800 Subject: [PATCH 063/396] rob one change --- selectiveInference/DESCRIPTION | 2 +- selectiveInference/R/funs.fixed.R | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index 0f352b60..ce951165 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -1,7 +1,7 @@ Package: selectiveInference Type: Package Title: Tools for Selective Inference -Version: 1.1.1 +Version: 1.1.2 Date: 2015-09-01 Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index b7f20fd4..ac005450 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -184,15 +184,15 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { invisible() } -estimateLambda <- function(x, sigma, nsamp=1000){ - checkargs.xy(x,rep(0,nrow(x))) - if(nsamp < 10) stop("More Monte Carlo samples required for estimation") - if (length(sigma)!=1) stop("sigma should be a number > 0") - if (sigma<=0) stop("sigma should be a number > 0") +#estimateLambda <- function(x, sigma, nsamp=1000){ +# checkargs.xy(x,rep(0,nrow(x))) +# if(nsamp < 10) stop("More Monte Carlo samples required for estimation") +# if (length(sigma)!=1) stop("sigma should be a number > 0") + # if (sigma<=0) stop("sigma should be a number > 0") - n = nrow(x) - eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) - lambda = 2*mean(apply(t(x)%*%eps,2,max)) - return(lambda) -} + # n = nrow(x) + # eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) + # lambda = 2*mean(apply(t(x)%*%eps,2,max)) + # return(lambda) +#} From c353058303b97d29d14c23f1bb95efa2f256d617 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Thu, 26 Nov 2015 00:55:56 -0500 Subject: [PATCH 064/396] Just added Sen's example ... we still seem to get the proper coverage here even if the way the code is being used here is not exactly what we'd expect. --- tests/test.fixed.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test.fixed.R b/tests/test.fixed.R index 553fe35d..79aa7096 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -31,6 +31,30 @@ nulls = which(!is.na(pvals[,1]) & !is.na(pvals[,2])) np = pvals[nulls,-(1:2)] mean(np[!is.na(np)] < 0.1) +##### +library(selectiveInference) +library(MASS) +library(scalreg) + +S <- diag(10) +n <- 100 +p <- 10 +pval <- matrix(1, nrow = 100, ncol = p) +for(i in 1:100){ + cat(i) + X <- mvrnorm(n = n, mu = rep(0, p), Sigma = S) + Y <- X[, 1] + X[, 2] + rnorm(n) + sig.L <- scalreg(X, Y)$hsigma + + lam <- cv.glmnet(X, Y, standardize = FALSE, intercept = FALSE)$lambda.min + bl <- glmnet(X, Y, lambda = lam, standardize = FALSE, intercept = FALSE)$beta[, 1] + idx <- which(bl != 0) + pval[i, idx] <- fixedLassoInf(X, Y, beta = bl, lambda = lam * n, intercept = FALSE, sigma = sig.L, alpha = 0.05)$pv +} + +p <- pval[, -(1:2)] +mean(p[p < 1] < 0.05) + ##### a=lar(x,y) From e71a29df480b7a936a7f52fd4ea70d847bd394a2 Mon Sep 17 00:00:00 2001 From: tibs Date: Thu, 26 Nov 2015 07:57:01 -0800 Subject: [PATCH 065/396] rob test.fixed.R --- tests/test.fixed.R | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/test.fixed.R b/tests/test.fixed.R index 553fe35d..e1a581cc 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -1,5 +1,5 @@ -library(selectiveInference) -#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +#library(selectiveInference) +library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") #options(error=dump.frames) #attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") @@ -315,3 +315,28 @@ sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") load("params_for_Rob.rdata") #variables: X, y, coef, lam, sigma, alpha sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") + + +#### bug from Sen at UW + +library(MASS) +library(scalreg) + +S <- diag(10) +n <- 100 +p <- 10 +pval <- matrix(1, nrow = 100, ncol = p) +for(i in 1:100){ + cat(i) + X <- mvrnorm(n = n, mu = rep(0, p), Sigma = S) + Y <- X[, 1] + X[, 2] + rnorm(n) + sig.L <- scalreg(X, Y)$hsigma + + lam <- cv.glmnet(X, Y, standardize = FALSE, intercept = FALSE)$lambda.min + bl <- glmnet(X, Y, lambda = lam, standardize = FALSE, intercept = FALSE)$beta[, 1] + idx <- which(bl != 0) + pval[i, idx] <- fixedLassoInf(X, Y, beta = bl, lambda = lam * n, intercept = FALSE, sigma = sig.L, alpha = 0.05)$pv +} + +p <- pval[, -(1:2)] +mean(p[p < 1] < 0.05) From 7993fb39593f26372f96d7a1afe8daa523afcdaf Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Thu, 26 Nov 2015 11:28:10 -0500 Subject: [PATCH 066/396] Got rid of estimateLambda --- selectiveInference/NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index c6e2d09c..b306d851 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -8,7 +8,6 @@ export(lar,fs, fixedLassoInf,print.fixedLassoInf, forwardStop, estimateSigma, - estimateLambda, manyMeans,print.manyMeans, groupfs,groupfsInf, scaleGroups,factorDesign From edb1277ffc33e50713f1fb92d7f5031adcf3bb0a Mon Sep 17 00:00:00 2001 From: tibs Date: Thu, 26 Nov 2015 12:09:59 -0800 Subject: [PATCH 067/396] rob --- selectiveInference/man/estimateSigma.Rd | 25 +++++++------ selectiveInference/man/factorDesign.Rd | 2 +- selectiveInference/man/fixedLassoInf.Rd | 37 ++++++++++---------- selectiveInference/man/forwardStop.Rd | 28 +++++++-------- selectiveInference/man/fs.Rd | 34 +++++++++--------- selectiveInference/man/fsInf.Rd | 30 ++++++++-------- selectiveInference/man/lar.Rd | 24 ++++++------- selectiveInference/man/larInf.Rd | 31 ++++++++-------- selectiveInference/man/manyMeans.Rd | 14 ++++---- selectiveInference/man/plot.fs.Rd | 25 +++++++------ selectiveInference/man/plot.lar.Rd | 25 +++++++------ selectiveInference/man/predict.fs.Rd | 26 +++++++------- selectiveInference/man/predict.lar.Rd | 25 +++++++------ selectiveInference/man/selectiveInference.Rd | 33 ++++++++--------- tests/test.fixed.R | 4 +-- 15 files changed, 182 insertions(+), 181 deletions(-) diff --git a/selectiveInference/man/estimateSigma.Rd b/selectiveInference/man/estimateSigma.Rd index 100f944d..9397a08a 100644 --- a/selectiveInference/man/estimateSigma.Rd +++ b/selectiveInference/man/estimateSigma.Rd @@ -42,20 +42,19 @@ A study of error variance estimation in lasso regression. arXiv:1311.5274. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,F) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) +set.seed(43) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,FALSE) +beta=c(3,2,0,0,rep(0,p-4)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) -#out=estimateSigma(x,y) -#a=fs(x,y) -#out2=fsInf(a,sigma=out$sigmahat) +out=estimateSigma(x,y) +fit=fs(x,y) +out2=fsInf(fit,sigma=out$sigmahat) } diff --git a/selectiveInference/man/factorDesign.Rd b/selectiveInference/man/factorDesign.Rd index d5d2576d..8e061db6 100644 --- a/selectiveInference/man/factorDesign.Rd +++ b/selectiveInference/man/factorDesign.Rd @@ -23,7 +23,7 @@ When using \code{\link{groupfs}} with factor variables call this function first \dontrun{ fd = factorDesign(warpbreaks) y = rnorm(nrow(fd$x)) -fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=F) +fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=FALSE) pvals = groupfsInf(fit) } } diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index b250073f..609136ed 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -115,27 +115,28 @@ Exact post-selection inference, with application to the lasso. arXiv:1311.6238. \examples{ -#NOT RUN -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) +#generate data +set.seed(43) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) +beta=c(3,2,0,0,rep(0,p-4)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) # first run glmnet -#gfit=glmnet(x,y,standardize=F) -#lambda = .1 +gfit=glmnet(x,y,standardize=FALSE) +lambda = .1 #extract coef for a given lambda; Note the 1/n factor! -#beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals -#aa=fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out=fixedLassoInf(x,y,beta,lambda,sigma=sigma) # +#NOT RUN # as above, but use lar function instead to get initial lasso fit (should get same result) -# fit=lar(x,y,normalize=F) +# fit=lar(x,y,normalize=FALSE) # beta=coef(fit,s=lambda,mode="lambda") # fixedLassoInf(x,y,beta,lambda,sigma=sigma) @@ -144,15 +145,15 @@ Exact post-selection inference, with application to the lasso. arXiv:1311.6238. # states data example: predict life expectancy #x=state.x77[,-4] #y=state.x77[,4] -#x=scale(x,T,T) +#x=scale(x,TRUE,TRUE) #n=nrow(x) # estimate sigma from cross-validated lasso fit # cvf=cv.glmnet(x,y) -# sigmahat=estimateSigma(x,y,stand=F)$sigmahat +# sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat # # fit lasso # lambda=n*cvf$lambda.min -# gfit=glmnet(x,y,standardize=F) +# gfit=glmnet(x,y,standardize=FALSE) # bhat=coef(gfit, s=lambda/n, exact=TRUE)[-1] # # compute p-values and confidence intervals diff --git a/selectiveInference/man/forwardStop.Rd b/selectiveInference/man/forwardStop.Rd index 5ca36df1..7863afe6 100644 --- a/selectiveInference/man/forwardStop.Rd +++ b/selectiveInference/man/forwardStop.Rd @@ -34,20 +34,20 @@ To appear in Journal of the Royal Statistical Society: Series B. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(433) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) - -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) +set.seed(433) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) +beta=c(4,2,0,0,rep(0,p-4)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) #first run forward stepwise -# fsfit=fs(x,y) + fsfit=fs(x,y) # -# aa=fsInf(fsfit) -# forwardStop(aa, alpha=.10) +# run inference function + out=fsInf(fsfit) +# estimate optimal stopping point + forwardStop(out$pv, alpha=.10) } diff --git a/selectiveInference/man/fs.Rd b/selectiveInference/man/fs.Rd index 434edc42..102aabd1 100644 --- a/selectiveInference/man/fs.Rd +++ b/selectiveInference/man/fs.Rd @@ -68,33 +68,35 @@ to enter along the path} } \examples{ -#NOT RUN -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,F) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#fsfit=fs(x,y) -# -#out=fsInf(x,y) +set.seed(43) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,FALSE) +beta=c(3,2,0,0,rep(0,p-4)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) +# run forward stepwise +fsfit=fs(x,y) +# call inference function +out=fsInf(fsfit) # + +#NOT RUN # states data example: predict life expectancy #x=state.x77[,-4] #y=state.x77[,4] -#x=scale(x,T,T) +#x=scale(x,TRUE,TRUE) #n=nrow(x) # # estimate sigma from cross-validated lasso fit #cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=F)$sigmahat +#sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat # # fit forwardStepwise -# fsfit=fs(x,y,normalize=F) +# fsfit=fs(x,y,normalize=FALSE) # # compute p-values and confidence intervals # fsInf(fsfit,sigma=sigmahat) diff --git a/selectiveInference/man/fsInf.Rd b/selectiveInference/man/fsInf.Rd index f8afc93c..66fc758a 100644 --- a/selectiveInference/man/fsInf.Rd +++ b/selectiveInference/man/fsInf.Rd @@ -109,27 +109,27 @@ model selection. arXiv:1405.3920. \seealso{\code{\link{fs}}} \examples{ -#NOT RUN -#set.seed(433) +set.seed(433) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) +x=scale(x,TRUE,TRUE)/sqrt(n-1) +beta=c(4,2,0,0,rep(0,p-4)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) #first run forward stepwise -# fsfit=fs(x,y) + fsfit=fs(x,y) # # forward stepwise inference for each successive entry of a predictor; # # sigma estimated from mean squared residual # -# aa=fsInf(fsfit) + aa=fsInf(fsfit) +#NOT RUN # forward stepwise inference for fixed model of size 4, testing deletion of each predictor; # known value of sigma used # aa2=fsInf(fsfit,sigma=sigma,type="all",k=4) @@ -141,14 +141,14 @@ model selection. arXiv:1405.3920. # states data example: predict life expectancy #x=state.x77[,-4] #y=state.x77[,4] -#x=scale(x,T,T) +#x=scale(x,TRUE,TRUE) #n=nrow(x) # estimate sigma from cross-validated lasso fit #cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=F)$sigmahat +#sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat # # run forward stepwise -#fsfit=fs(x,y,normalize=F) +#fsfit=fs(x,y,normalize=FALSE) # # compute p-values and confidence intervals # fsInf(fsfit,sigma=sigmahat) diff --git a/selectiveInference/man/lar.Rd b/selectiveInference/man/lar.Rd index 5bafbf93..70e53c5a 100644 --- a/selectiveInference/man/lar.Rd +++ b/selectiveInference/man/lar.Rd @@ -76,17 +76,17 @@ Jerome Friedman (2002, 2009). Elements of Statistical Learning. } \examples{ -#NOT RUN -#set.seed(33) -#n=20 -#p=10 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) + +set.seed(33) +n=20 +p=10 +sigma=1 +x=matrix(rnorm(n*p),n,p) #generate data -#beta=c(3,3,rep(0,p-2)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#run lar -#larfit=lar(x,y,verbose=TRUE) -#plot(larfit) +beta=c(3,3,rep(0,p-2)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) +#run lar and plot results +larfit=lar(x,y,verbose=TRUE) +plot(larfit) } diff --git a/selectiveInference/man/larInf.Rd b/selectiveInference/man/larInf.Rd index 86de5d28..7947ec56 100644 --- a/selectiveInference/man/larInf.Rd +++ b/selectiveInference/man/larInf.Rd @@ -114,34 +114,35 @@ Exact post-selection inference for sequential regression procedures. arXiv:1401. \examples{ -#NOT RUN -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) +#generate data +set.seed(43) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) +beta=c(3,2,0,0,rep(0,p-4)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) #first run lar -# larfit=lar(x,y) + larfit=lar(x,y) # #lar inference for each successive entry of a predictor; sigma estimated # from mean squared residual from least squares fit -# aa=larInf(larfit) + out=larInf(larfit) # +#NOT RUN # states data example: predict life expectancy #x=state.x77[,-4] #y=state.x77[,4] -#x=scale(x,T,T) +#x=scale(x,TRUE,TRUE) #n=nrow(x) # estimate sigma from cross-validated lasso fit #cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=F)$sigmahat +#sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat # # run LAR -#larfit=lar(x,y,normalize=F) +#larfit=lar(x,y,normalize=FALSE) # # compute p-values and confidence intervals # larInf(larfit,sigma=sigmahat) diff --git a/selectiveInference/man/manyMeans.Rd b/selectiveInference/man/manyMeans.Rd index db2d9926..62a14009 100644 --- a/selectiveInference/man/manyMeans.Rd +++ b/selectiveInference/man/manyMeans.Rd @@ -52,11 +52,11 @@ arXiv:1405.3340. \examples{ -#NOT RUN -#set.seed(12345) -#n = 100 # sample size -#signal = 3 # signal size -#mu = c(rep(signal, floor (n/5)), rep(0, n-floor(n/5))) # 20% of elements get the signal; rest 0 -#y = mu + rnorm (n, 0, 1) -#mmObj = manyMeans(y, bh.q=0.1) + +set.seed(12345) +n = 100 # sample size +signal = 3 # signal size +mu = c(rep(signal, floor (n/5)), rep(0, n-floor(n/5))) # 20% of elements get the signal; rest 0 +y = mu + rnorm (n, 0, 1) +out = manyMeans(y, bh.q=0.1) } diff --git a/selectiveInference/man/plot.fs.Rd b/selectiveInference/man/plot.fs.Rd index 3c27ad1f..e90da5d5 100644 --- a/selectiveInference/man/plot.fs.Rd +++ b/selectiveInference/man/plot.fs.Rd @@ -28,18 +28,17 @@ Default is TRUE} \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) +set.seed(33) +n=200 +p=20 +sigma=1 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) #generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#fsfit=fs(x,y) -#plot(fsfit) +beta=c(3,-2,rep(0,p-2)) +beta=c(rep(3,10),rep(0,p-10)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) +fsfit=fs(x,y) +plot(fsfit) } diff --git a/selectiveInference/man/plot.lar.Rd b/selectiveInference/man/plot.lar.Rd index ebf6c0b5..668ce1c2 100644 --- a/selectiveInference/man/plot.lar.Rd +++ b/selectiveInference/man/plot.lar.Rd @@ -32,18 +32,17 @@ Default is TRUE} \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) +set.seed(33) +n=200 +p=20 +sigma=1 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) #generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#larfit=lar(x,y) -#plot(larfit) +beta=c(3,-2,rep(0,p-2)) +beta=c(rep(3,10),rep(0,p-10)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) +larfit=lar(x,y) +plot(larfit) } diff --git a/selectiveInference/man/predict.fs.Rd b/selectiveInference/man/predict.fs.Rd index 32f1a449..0562f6b4 100644 --- a/selectiveInference/man/predict.fs.Rd +++ b/selectiveInference/man/predict.fs.Rd @@ -35,18 +35,18 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) +set.seed(33) +n=200 +p=20 +sigma=1 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) #generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#obj=fs(x,y) -#fit=predict.fs(obj,x) +beta=c(3,-2,rep(0,p-2)) +beta=c(rep(3,10),rep(0,p-10)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) +# run forward stepwise and predict functions +obj=fs(x,y) +fit=predict.fs(obj,x) } diff --git a/selectiveInference/man/predict.lar.Rd b/selectiveInference/man/predict.lar.Rd index 340cd521..a9eebb5e 100644 --- a/selectiveInference/man/predict.lar.Rd +++ b/selectiveInference/man/predict.lar.Rd @@ -38,18 +38,17 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#NOT RUN -#set.seed(33) -#n=200 -#p=20 -#sigma=1 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) +set.seed(33) +n=200 +p=20 +sigma=1 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) #generate y -#beta=c(3,-2,rep(0,p-2)) -#beta=c(rep(3,10),rep(0,p-10)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) -#larfit=lar(x,y) -#fit=predict.lar(larfit,x,type="fit") +beta=c(3,-2,rep(0,p-2)) +beta=c(rep(3,10),rep(0,p-10)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) +larfit=lar(x,y) +fit=predict.lar(larfit,x,type="fit") } diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index 0b1ca4c0..1043f01f 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -70,28 +70,29 @@ arXiv:1405.3340. } \examples{ -#NOT RUN # forward stepwise: # #generate some data -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) +set.seed(43) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE)/sqrt(n-1) +beta=c(4,2,0,0,rep(0,p-4)) +y=x\%*\%beta+sigma*rnorm(n) +y=y-mean(y) # #first run forward stepwise -# fsfit=fs(x,y) + fsfit=fs(x,y) # # forward stepwise inference for each successive entry of a predictor; # sigma estimated from mean squared residual # -# aa=fsInf(fsfit) +out=fsInf(fsfit) + ## +#NOT RUN # lasso with fixed lambda # #set.seed(43) @@ -99,13 +100,13 @@ arXiv:1405.3340. #p=10 #sigma=.7 #x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) +#x=scale(x,TRUE,TRUE)/sqrt(n-1) #beta=c(4,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) #y=y-mean(y) # # first run glmnet -#gfit=glmnet(x,y,standardize=F) +#gfit=glmnet(x,y,standardize=FALSE) #lam = .1 #extract coef for a given lam; Note the 1/n factor in s! #bhat = coef(gfit, s=lam/n, exact=TRUE)[-1] @@ -119,7 +120,7 @@ arXiv:1405.3340. #p=10 #sigma=.7 #x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,T)/sqrt(n-1) +#x=scale(x,TRUE,TRUE)/sqrt(n-1) #beta=c(3,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) #y=y-mean(y) @@ -165,7 +166,7 @@ arXiv:1405.3340. #p=10 #sigma=.7 #x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,F) +#x=scale(x,TRUE,FALSE) #beta=c(3,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) #y=y-mean(y) diff --git a/tests/test.fixed.R b/tests/test.fixed.R index 8cf1e5f2..5c558875 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -1,5 +1,5 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) +#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") #options(error=dump.frames) #attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") From 62739d47c8f3fc8f5e3b0d63031fed2d5722ae18 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Fri, 27 Nov 2015 10:45:04 -0500 Subject: [PATCH 068/396] Small commit --- tests/test.fixed.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test.fixed.R b/tests/test.fixed.R index 8cf1e5f2..63f76c09 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -1,5 +1,5 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) +#library(selectiveInference),lib.loc="/Users/tibs/dropbox/git/R/mylib") #options(error=dump.frames) #attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") @@ -154,7 +154,6 @@ ch=function(bhat,tol.beta=1e-5,tol.kkt=0.1){ cat(c(max(abs(g[oo]))>tol.kkt,min(gg[!oo])< -1-tol.kkt,max(gg[!oo])>1 +tol.kkt),fill=T) } - # x=read.table("/Users/tibs/dropbox/PAPERS/FourOfUs/data64.txt") x=as.matrix(x) From 2b4799cdc3f3e84d92443bf4bbb6a9e37042d7c4 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Fri, 27 Nov 2015 11:57:59 -0500 Subject: [PATCH 069/396] Fixed a bug in LAR plotting function --- selectiveInference/R/funs.lar.R | 23 ++++++++++++----------- tests/test.lar.R | 4 ++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index db4082ed..b6446995 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -632,29 +632,30 @@ plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, xvar = match.arg(xvar) if (xvar=="norm") { - x = colSums(abs(beta)) + xx = colSums(abs(beta)) xlab = "L1 norm" } else if (xvar=="step") { - x = 1:k + xx = 1:k xlab = "Step" } else { - x = lambda + xx = lambda xlab = "Lambda" } if (omit.zeros) { - inds = matrix(FALSE,p,k) - for (i in 1:k) { - inds[i,] = beta[i,]!=0 | c(diff(beta[i,]!=0),F) | c(F,diff(beta[i,]!=0)) - } - beta[!inds] = NA + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA } - plot(c(),c(),xlim=range(x,na.rm=T),ylim=range(beta,na.rm=T), + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), xlab=xlab,ylab="Coefficients",main="LAR path",...) abline(h=0,lwd=2) - matplot(x,t(beta),type="l",lty=1,add=TRUE) - if (breaks) abline(v=x,lty=2) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) invisible() } diff --git a/tests/test.lar.R b/tests/test.lar.R index 6edaa450..be264286 100644 --- a/tests/test.lar.R +++ b/tests/test.lar.R @@ -1,5 +1,5 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) +#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") library(lars) set.seed(0) From 5b8e300b00207972c2c3149f6a996729cfe564e8 Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 27 Nov 2015 16:49:34 -0800 Subject: [PATCH 070/396] rob --- tests/test.fixed.R | 366 +++++++++++++++++++++++++++++++++++++++++++++ tests/test.fs.R | 4 +- 2 files changed, 368 insertions(+), 2 deletions(-) create mode 100644 tests/test.fixed.R diff --git a/tests/test.fixed.R b/tests/test.fixed.R new file mode 100644 index 00000000..5c558875 --- /dev/null +++ b/tests/test.fixed.R @@ -0,0 +1,366 @@ +library(selectiveInference) +#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") + +#options(error=dump.frames) +#attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") + +##### +n=50 +p=10 +sigma=.7 +beta=c(3,2,0,0,rep(0,p-4)) +set.seed(43) +nsim = 100 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (i in 1:nsim) { +y=mu+sigma*rnorm(n) +#y=y-mean(y) +# first run glmnet +gfit=glmnet(x,y,intercept=F,standardize=F,thresh=1e-8) +lambda = 1 +#extract coef for a given lambda; Note the 1/n factor! +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +# compute fixed lambda p-values and selection intervals +aa = fixedLassoInf(x,y,beta,lambda,intercept=F,sigma=sigma) +pvals[i, which(beta != 0)] <- aa$pv +} +nulls = which(!is.na(pvals[,1]) & !is.na(pvals[,2])) +np = pvals[nulls,-(1:2)] +mean(np[!is.na(np)] < 0.1) + +##### +library(selectiveInference) +library(MASS) +library(scalreg) + +S <- diag(10) +n <- 100 +p <- 10 +pval <- matrix(1, nrow = 100, ncol = p) +for(i in 1:100){ + cat(i) + X <- mvrnorm(n = n, mu = rep(0, p), Sigma = S) + Y <- X[, 1] + X[, 2] + rnorm(n) + sig.L <- scalreg(X, Y)$hsigma + + lam <- cv.glmnet(X, Y, standardize = FALSE, intercept = FALSE)$lambda.min + bl <- glmnet(X, Y, lambda = lam, standardize = FALSE, intercept = FALSE)$beta[, 1] + idx <- which(bl != 0) + pval[i, idx] <- fixedLassoInf(X, Y, beta = bl, lambda = lam * n, intercept = FALSE, sigma = sig.L, alpha = 0.05)$pv +} + +p <- pval[, -(1:2)] +mean(p[p < 1] < 0.05) + +##### + +a=lar(x,y) +aa=larInf(a) + +critf=function(b,lam,x,y){ + yhat=x%*%b + .5*sum( (y-yhat)^2) + lam*sum(abs(b)) + } + +##check coverage +set.seed(3) + +n=50 +p=10 +sigma=2 + +x=matrix(rnorm(n*p),n,p) +#x=scale(x,T,T)/sqrt(n-1) #try with and without standardization + +beta=c(5,4,3,2,1,rep(0,p-5)) + +nsim=100 +seeds=sample(1:9999,size=nsim) +pv=rep(NA,nsim) +ci=matrix(NA,nsim,2) +btrue=rep(NA,nsim) +for(ii in 1:nsim){ + cat(ii) + set.seed(seeds[ii]) + mu=x%*%beta + y=mu+sigma*rnorm(n) + y=y-mean(y) + gfit=glmnet(x,y,standardize=F,lambda.min.ratio=1e-9) + ilam=trunc(length(gfit$lam)/4) + lambda=gfit$lam[ilam]*n + bhat = predict(gfit, s=lambda/n,type="coef",exact=F)[-1] + + junk= fixedLassoInf(x,y,bhat,lambda,sigma=sigma) + pv[ii]=junk$pv[1] + # oo=junk$pred # for old package + oo=junk$var # for new package + btrue[ii]=lsfit(x[,oo],mu)$coef[2] + ci[ii,]=junk$ci[1,] +} + +sum(ci[,1]> btrue) +sum(ci[,2]< btrue) + + + + +## BIG example + library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") + +options(error=dump.frames) +attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") +critf=function(b,lam,x,y){ + yhat=x%*%b + .5*sum( (y-yhat)^2) + lam*sum(abs(b)) + } +set.seed(4) +n=100 +p=500 +sigma=1 +x=matrix(rnorm(n*p),ncol=p) +x=scale(x,T,F) +beta=c(rep(2.5,10),rep(0,p-10)) +y=x%*%beta+sigma*rnorm(n) +y=y-mean(y) + + + +gfit=glmnet(x,y,standardize=F) +cvf=cv.glmnet(x,y) + +lambda=n*cvf$lambda.min +#lambda=10 + bhat = as.numeric(predict(gfit, s=lambda/n,type="coef",exact=T))[-1] + +bhat2=lasso2lam(x,y,lambda,int=F,stand=F)$coef +plot(bhat,bhat2) + +critf(bhat,lambda,x,y) +critf(bhat2,lambda,x,y) + junk= fixedLassoInf(x,y,bhat,lambda,sigma=sigma) +junk= fixedLassoInf(x,y,bhat,lambda,sigma=sigma,bits=200) + + # check of KKT +ch=function(bhat,tol.beta=1e-5,tol.kkt=0.1){ + xx=cbind(1,x) + bhatt=c(0,bhat) + g0=t(xx)%*%(y-xx%*%bhatt) + g=g0-lambda*sign(bhatt) + gg=g0/lambda + oo=abs(bhatt)>tol.beta + cat(c(max(abs(g[oo]))>tol.kkt,min(gg[!oo])< -1-tol.kkt,max(gg[!oo])>1 +tol.kkt),fill=T) +} + + +# +x=read.table("/Users/tibs/dropbox/PAPERS/FourOfUs/data64.txt") +x=as.matrix(x) +x=scale(x,T,F) +#x=scale(x,T,T) +n=length(y) +nams=scan("/Users/tibs/dropbox/PAPERS/FourOfUs/data64.names",what="") +y=scan("/Users/tibs/dropbox/PAPERS/FourOfUs/diab.y") +y=y-mean(y) + +cvf=cv.glmnet(x,y) +sigmahat=estimateSigma(x,y,stand=F)$si + +lambda=n*cvf$lambda.min + +lambda=estimateLambda(x,sigma=sigmahat)/2 + +gfit=glmnet(x,y,standardize=F) + +bhat=coef(gfit, s=lambda/n, exact=TRUE)[-1] +bhat2=lasso2lam(x,y,lambda,int=F,stand=F)$coef + +plot(bhat,bhat2) + +critf(bhat,lambda,x,y) +critf(bhat2,lambda,x,y) + + +fixedLassoInf(x,y,bhat,lambda,sigma=sigmahat) +## +set.seed(44) +n=50 +p=10 +sigma=.7 +x=matrix(rnorm(n*p),n,p) +x=scale(x,T,T)/sqrt(n-1) +beta=c(3,2,0,0,rep(0,p-4)) +y=x%*%beta+sigma*rnorm(n) +y=y-mean(y) +# first run glmnet +gfit=glmnet(x,y,standardize=F) +lambda = .1 +#extract coef for a given lambda; Note the 1/n factor! +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +# compute fixed lambda p-values and selection intervals +fixedLassoInf(x,y,beta,lambda,sigma=sigma) + +# as above, but use lar function to get initial lasso fit + fit=lar(x,y,normalize=F) +beta=coef(fit,s=lambda,mode="lambda") +fixedLassoInf(x,y,beta,lambda,sigma=sigma) + +### +x=state.x77[,-4] +y=state.x77[,4] +x=scale(x,T,T) +n=nrow(x) + + +sigmahat=estimateSigma(x,y,stand=F)$si + +lambda=65 + +gfit=glmnet(x,y,standardize=F, thresh=1e-9) + +bhat=coef(gfit, s=lambda/n, exact=TRUE)[-1] +#bhat2=lasso2lam(x,y,lambda,int=F,stand=F)$coef + +fixedLassoInf(x,y,bhat,lambda,sigma=sigmahat) + + +# lucas example +library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") + +set.seed(44) + +p <- 300 +n <- 200 +s0 <- 2 +b <- 1 +b0 <- 0 +sigma <- .5 +alpha = 0.05 + +X = matrix(rnorm(n*p),n,p) +X = scale(X,center=T,scale=T) + +m = 1000 +eps = matrix(rnorm(m*n),n,m) +lam = 2*mean(apply(t(X)%*%eps,2,max)) + + +theta0 <- c(rep(b,s0),rep(0,p-s0)); +w <- sigma*rnorm(n); +y <- (b0+X%*%theta0+w); + + + +tic = proc.time() +gfit = glmnet(X,y,standardize=F) +coef = coef(gfit, s=lam/n, exact=T)[-1] +sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha) + +### lucas example with sims + +set.seed(44) + +p <- 300 +n <- 200 +s0 <- 2 +b <- 1 +b0 <- 0 +sigma <- .5 +alpha = 0.05 +#set.seed('1') + +#X <- rbinom(p*n,1,prob=0.15); +#dim(X) <- c(n,p); +#X <- X %*% diag(1+9*runif(p)) +X = matrix(rnorm(n*p),n,p) +#X = scale(X,center=T,scale=T) # original +#X = scale(X,center=T,scale=T)/sqrt(n-1) #CHANGED + +m = 1000 +eps = matrix(rnorm(m*n),n,m) + +lam = 2*mean(apply(t(X)%*%eps,2,max)) #original +theta0 <- c(rep(b,s0),rep(0,p-s0)) #original +#theta0 <- c(rep(b,s0),rep(0,p-s0))*sqrt(n-1) #CHANGED + mu=b0+X%*%theta0 +nsim=100 +int=matrix(NA,nsim,2) +btrue=rep(NA,nsim) +for(ii in 1:nsim){ + cat(ii) +w <- sigma*rnorm(n); + +y <- (mu+w); + +tic = proc.time() +gfit = glmnet(X,y,standardize=F) + nz=colSums(gfit$beta!=0) + # lam=gfit$lambda[nz>=2]/1.1 # CHANGED + # lam=lam[1]*n #CHANGED +coef = coef(gfit, s=lam/n, exact=T)[-1] +oo=which(coef!=0) +btrue[ii]=lsfit(X[,oo],mu)$coef[2] +sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha) +int[ii,]=sint$ci[1,] +} +## new bugs from lucas +library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") + +set.seed(1) +p <- 500 +n <- 400 +s0 <- 10 +b <- 1.2 +b0 <- 0 +sigma <- 5 +alpha = 0.05 + +X = matrix(rnorm(n*p),n,p) +X = scale(X,center=T,scale=T) +theta0 <- c(rep(b,s0),rep(0,p-s0)) +w <- sigma*rnorm(n) +y <- b0 + X%*%theta0 + w + +# Pick lambda as in Negahban et al. (2012), as done in Lee et al. (2015) +m = 1000 +eps = sigma*matrix(rnorm(m*n),n,m) +lam = 2*mean(apply(abs(t(X)%*%eps),2,max)) + +gfit = glmnet(X,y,standardize=F) +coef = coef(gfit, s=lam/n, exact=T)[-1] +sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") +# Error in v %*% diag(d) : non-conformable arguments + +## lucas again + +load("params_for_Rob.rdata") #variables: X, y, coef, lam, sigma, alpha + +sint = fixedLassoInf(X,y,coef,lam,sigma=sigma,alpha=alpha,type="partial") + + +#### bug from Sen at UW + +library(MASS) +library(scalreg) + +S <- diag(10) +n <- 100 +p <- 10 +pval <- matrix(1, nrow = 100, ncol = p) +for(i in 1:100){ + cat(i) + X <- mvrnorm(n = n, mu = rep(0, p), Sigma = S) + Y <- X[, 1] + X[, 2] + rnorm(n) + sig.L <- scalreg(X, Y)$hsigma + + lam <- cv.glmnet(X, Y, standardize = FALSE, intercept = FALSE)$lambda.min + bl <- glmnet(X, Y, lambda = lam, standardize = FALSE, intercept = FALSE)$beta[, 1] + idx <- which(bl != 0) + pval[i, idx] <- fixedLassoInf(X, Y, beta = bl, lambda = lam * n, intercept = FALSE, sigma = sig.L, alpha = 0.05)$pv +} + +p <- pval[, -(1:2)] +mean(p[p < 1] < 0.05) diff --git a/tests/test.fs.R b/tests/test.fs.R index fbe50ced..f811676e 100644 --- a/tests/test.fs.R +++ b/tests/test.fs.R @@ -1,5 +1,5 @@ -#library(selectiveInference) -library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(selectiveInference) +#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") library(lars) From ad70e47cf8f9db705909c5da959c6c51ac5033ca Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Sat, 28 Nov 2015 00:05:34 -0500 Subject: [PATCH 071/396] Fixed a bug in FS plotting function --- selectiveInference/R/funs.fs.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 3a46b29d..6edc2db8 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -446,12 +446,13 @@ plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { xx = 1:k xlab = "Step" - if (omit.zeros) { - inds = matrix(FALSE,p,k) - for (i in 1:k) { - inds[i,] = beta[i,]!=0 | c(diff(beta[i,]!=0),F) | c(F,diff(beta[i,]!=0)) - } - beta[!inds] = NA + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA } plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), From 7d2ca75d94824fe3e7c2250c284cfe415b93f34e Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Sat, 28 Nov 2015 00:36:06 -0500 Subject: [PATCH 072/396] Fixed a bug in FS plotting function --- selectiveInference/R/funs.fs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 6edc2db8..a46b53e9 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -459,7 +459,7 @@ plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { xlab=xlab,ylab="Coefficients",main="Forward stepwise path",...) abline(h=0,lwd=2) matplot(xx,t(beta),type="l",lty=1,add=TRUE) - if (breaks) abline(v=x,lty=2) + if (breaks) abline(v=xx,lty=2) if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) invisible() } From 9d95250fe316f6181f8b33e0c544ade6b79b7263 Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 27 Nov 2015 21:46:43 -0800 Subject: [PATCH 073/396] rob --- selectiveInference/R/funs.fs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 6edc2db8..a46b53e9 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -459,7 +459,7 @@ plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { xlab=xlab,ylab="Coefficients",main="Forward stepwise path",...) abline(h=0,lwd=2) matplot(xx,t(beta),type="l",lty=1,add=TRUE) - if (breaks) abline(v=x,lty=2) + if (breaks) abline(v=xx,lty=2) if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) invisible() } From 605d6b6066da5a45e4cc7ab44643e6760347652c Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 27 Nov 2015 22:24:20 -0800 Subject: [PATCH 074/396] rob --- selectiveInference/man/fs.Rd | 1 - selectiveInference/man/lar.Rd | 1 - selectiveInference/man/larInf.Rd | 1 - selectiveInference/man/predict.fs.Rd | 2 +- selectiveInference/man/predict.lar.Rd | 3 +-- selectiveInference/man/selectiveInference.Rd | 4 ---- 6 files changed, 2 insertions(+), 10 deletions(-) diff --git a/selectiveInference/man/fs.Rd b/selectiveInference/man/fs.Rd index 102aabd1..931d1be1 100644 --- a/selectiveInference/man/fs.Rd +++ b/selectiveInference/man/fs.Rd @@ -77,7 +77,6 @@ x=matrix(rnorm(n*p),n,p) x=scale(x,TRUE,FALSE) beta=c(3,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) # run forward stepwise fsfit=fs(x,y) # call inference function diff --git a/selectiveInference/man/lar.Rd b/selectiveInference/man/lar.Rd index 70e53c5a..1e9f26e6 100644 --- a/selectiveInference/man/lar.Rd +++ b/selectiveInference/man/lar.Rd @@ -85,7 +85,6 @@ x=matrix(rnorm(n*p),n,p) #generate data beta=c(3,3,rep(0,p-2)) y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) #run lar and plot results larfit=lar(x,y,verbose=TRUE) plot(larfit) diff --git a/selectiveInference/man/larInf.Rd b/selectiveInference/man/larInf.Rd index 7947ec56..e45900b1 100644 --- a/selectiveInference/man/larInf.Rd +++ b/selectiveInference/man/larInf.Rd @@ -123,7 +123,6 @@ x=matrix(rnorm(n*p),n,p) x=scale(x,TRUE,TRUE)/sqrt(n-1) beta=c(3,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) #first run lar larfit=lar(x,y) # diff --git a/selectiveInference/man/predict.fs.Rd b/selectiveInference/man/predict.fs.Rd index 0562f6b4..ca3a32aa 100644 --- a/selectiveInference/man/predict.fs.Rd +++ b/selectiveInference/man/predict.fs.Rd @@ -48,5 +48,5 @@ y=x\%*\%beta+sigma*rnorm(n) y=y-mean(y) # run forward stepwise and predict functions obj=fs(x,y) -fit=predict.fs(obj,x) +fit=predict.fs(obj,x,s=3) } diff --git a/selectiveInference/man/predict.lar.Rd b/selectiveInference/man/predict.lar.Rd index a9eebb5e..16aa7958 100644 --- a/selectiveInference/man/predict.lar.Rd +++ b/selectiveInference/man/predict.lar.Rd @@ -48,7 +48,6 @@ x=scale(x,TRUE,TRUE)/sqrt(n-1) beta=c(3,-2,rep(0,p-2)) beta=c(rep(3,10),rep(0,p-10)) y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) larfit=lar(x,y) -fit=predict.lar(larfit,x,type="fit") +fit=predict.lar(larfit,x,s=3.5,type="fit") } diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index 1043f01f..0f29e328 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -81,7 +81,6 @@ x=matrix(rnorm(n*p),n,p) x=scale(x,TRUE,TRUE)/sqrt(n-1) beta=c(4,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) # #first run forward stepwise fsfit=fs(x,y) @@ -123,7 +122,6 @@ out=fsInf(fsfit) #x=scale(x,TRUE,TRUE)/sqrt(n-1) #beta=c(3,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) #first run lar # larfit=lar(x,y) # @@ -169,7 +167,6 @@ out=fsInf(fsfit) #x=scale(x,TRUE,FALSE) #beta=c(3,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) #out=estimateSigma(x,y) @@ -182,7 +179,6 @@ set.seed(43) #x=scale(x,T,F) #beta=c(3,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) # #estimate lambda using the known value of sigma #lamhat=estimateLambda(x,sigma=.7) From 4f97bc9cfc807a65bc8fd35d6c09afafc811a588 Mon Sep 17 00:00:00 2001 From: tibs Date: Sat, 28 Nov 2015 08:48:57 -0800 Subject: [PATCH 075/396] rob --- selectiveInference/man/selectiveInference.Rd | 1 - 1 file changed, 1 deletion(-) diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index 0f29e328..66423cac 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -102,7 +102,6 @@ out=fsInf(fsfit) #x=scale(x,TRUE,TRUE)/sqrt(n-1) #beta=c(4,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) -#y=y-mean(y) # # first run glmnet #gfit=glmnet(x,y,standardize=FALSE) From e3f80945a97e37b34e6a3058a7bdc7d6b3e82858 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Sat, 28 Nov 2015 12:28:07 -0500 Subject: [PATCH 076/396] Cleaned up the fs, fsInf, and plot.fs examples in the man files --- selectiveInference/NAMESPACE | 6 ++-- selectiveInference/R/funs.lar.R | 2 +- selectiveInference/man/fs.Rd | 41 +++++++-------------- selectiveInference/man/fsInf.Rd | 59 ++++++++++--------------------- selectiveInference/man/plot.fs.Rd | 18 ++++------ 5 files changed, 42 insertions(+), 84 deletions(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index b306d851..8dd9120c 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -31,7 +31,7 @@ S3method("print", "groupfsInf") useDynLib("selectiveInference") import(glmnet) import(intervals) -importFrom("graphics", "abline", "axis", "matplot") -importFrom("stats", "dnorm", "lsfit", "pexp", "pnorm", "predict", - "qnorm", "rnorm", "sd", "uniroot", "dchisq", "model.matrix", "pchisq") +importFrom("graphics", abline, axis, matplot) +importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, + qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index b6446995..26669cad 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -652,7 +652,7 @@ plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, } plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), - xlab=xlab,ylab="Coefficients",main="LAR path",...) + xlab=xlab,ylab="Coefficients",main="Least angle regression path",...) abline(h=0,lwd=2) matplot(xx,t(beta),type="l",lty=1,add=TRUE) if (breaks) abline(v=xx,lty=2) diff --git a/selectiveInference/man/fs.Rd b/selectiveInference/man/fs.Rd index 931d1be1..c1e2a7fd 100644 --- a/selectiveInference/man/fs.Rd +++ b/selectiveInference/man/fs.Rd @@ -68,37 +68,20 @@ to enter along the path} } \examples{ - -set.seed(43) -n=50 -p=10 -sigma=.7 -x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,FALSE) -beta=c(3,2,0,0,rep(0,p-4)) -y=x\%*\%beta+sigma*rnorm(n) +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) # run forward stepwise -fsfit=fs(x,y) -# call inference function -out=fsInf(fsfit) -# +fsfit = fs(x,y) -#NOT RUN -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#x=scale(x,TRUE,TRUE) -#n=nrow(x) -# -# estimate sigma from cross-validated lasso fit -#cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat -# -# fit forwardStepwise -# fsfit=fs(x,y,normalize=FALSE) -# -# compute p-values and confidence intervals -# fsInf(fsfit,sigma=sigmahat) +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out } diff --git a/selectiveInference/man/fsInf.Rd b/selectiveInference/man/fsInf.Rd index 66fc758a..237f7e60 100644 --- a/selectiveInference/man/fsInf.Rd +++ b/selectiveInference/man/fsInf.Rd @@ -109,47 +109,26 @@ model selection. arXiv:1405.3920. \seealso{\code{\link{fs}}} \examples{ -set.seed(433) - -n=50 -p=10 -sigma=.7 -x=matrix(rnorm(n*p),n,p) +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) +# run forward stepwise +fsfit = fs(x,y) -x=scale(x,TRUE,TRUE)/sqrt(n-1) -beta=c(4,2,0,0,rep(0,p-4)) -y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) -#first run forward stepwise - fsfit=fs(x,y) -# -# forward stepwise inference for each successive entry of a predictor; -# -# sigma estimated from mean squared residual -# - aa=fsInf(fsfit) +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq -#NOT RUN -# forward stepwise inference for fixed model of size 4, testing deletion of each predictor; -# known value of sigma used -# aa2=fsInf(fsfit,sigma=sigma,type="all",k=4) +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic -# forward stepwise inference for model chosen by AIC, testing deletion of each predictor; -# known value of sigma used -# aa3=fsInf(fsfit,sigma=sigma,type="aic") -# -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#x=scale(x,TRUE,TRUE) -#n=nrow(x) -# estimate sigma from cross-validated lasso fit -#cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat -# -# run forward stepwise -#fsfit=fs(x,y,normalize=FALSE) -# -# compute p-values and confidence intervals -# fsInf(fsfit,sigma=sigmahat) +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix } diff --git a/selectiveInference/man/plot.fs.Rd b/selectiveInference/man/plot.fs.Rd index e90da5d5..b712cdc5 100644 --- a/selectiveInference/man/plot.fs.Rd +++ b/selectiveInference/man/plot.fs.Rd @@ -29,16 +29,12 @@ Default is TRUE} \examples{ set.seed(33) -n=200 -p=20 -sigma=1 -x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) -#generate y -beta=c(3,-2,rep(0,p-2)) -beta=c(rep(3,10),rep(0,p-10)) -y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) -fsfit=fs(x,y) +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) +fsfit = fs(x,y) plot(fsfit) } From 3117fd7b950a51ba68c36d96eed6dbb03f52db01 Mon Sep 17 00:00:00 2001 From: tibs Date: Sat, 28 Nov 2015 17:32:33 -0800 Subject: [PATCH 077/396] rob --- selectiveInference/man/estimateSigma.Rd | 11 ++++++++--- selectiveInference/man/fixedLassoInf.Rd | 7 ++++--- selectiveInference/man/forwardStop.Rd | 3 ++- selectiveInference/man/lar.Rd | 3 ++- selectiveInference/man/larInf.Rd | 12 ++++++------ selectiveInference/man/plot.lar.Rd | 7 +++++-- selectiveInference/man/predict.fs.Rd | 7 ++++--- selectiveInference/man/predict.lar.Rd | 8 ++++++-- selectiveInference/man/selectiveInference.Rd | 8 +++----- 9 files changed, 40 insertions(+), 26 deletions(-) diff --git a/selectiveInference/man/estimateSigma.Rd b/selectiveInference/man/estimateSigma.Rd index 9397a08a..ad7ee68e 100644 --- a/selectiveInference/man/estimateSigma.Rd +++ b/selectiveInference/man/estimateSigma.Rd @@ -42,20 +42,25 @@ A study of error variance estimation in lasso regression. arXiv:1311.5274. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ +# generate data +# set.seed(43) n=50 p=10 sigma=.7 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,FALSE) beta=c(3,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) y=y-mean(y) - +# +# Estimate sigma out=estimateSigma(x,y) +# #run forwrad stepwise fit=fs(x,y) +# +# run inference functions out2=fsInf(fit,sigma=out$sigmahat) - +out2 } diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 609136ed..1ef078da 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -121,22 +121,23 @@ n=50 p=10 sigma=.7 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) beta=c(3,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) y=y-mean(y) +# # first run glmnet -gfit=glmnet(x,y,standardize=FALSE) +gfit=glmnet(x,y) lambda = .1 #extract coef for a given lambda; Note the 1/n factor! beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals out=fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out # #NOT RUN # as above, but use lar function instead to get initial lasso fit (should get same result) -# fit=lar(x,y,normalize=FALSE) +# fit=lar(x,y) # beta=coef(fit,s=lambda,mode="lambda") # fixedLassoInf(x,y,beta,lambda,sigma=sigma) diff --git a/selectiveInference/man/forwardStop.Rd b/selectiveInference/man/forwardStop.Rd index 7863afe6..e30e052f 100644 --- a/selectiveInference/man/forwardStop.Rd +++ b/selectiveInference/man/forwardStop.Rd @@ -39,15 +39,16 @@ n=50 p=10 sigma=.7 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) beta=c(4,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) y=y-mean(y) +# #first run forward stepwise fsfit=fs(x,y) # # run inference function out=fsInf(fsfit) +# # estimate optimal stopping point forwardStop(out$pv, alpha=.10) } diff --git a/selectiveInference/man/lar.Rd b/selectiveInference/man/lar.Rd index 1e9f26e6..92e1c1ec 100644 --- a/selectiveInference/man/lar.Rd +++ b/selectiveInference/man/lar.Rd @@ -77,14 +77,15 @@ Jerome Friedman (2002, 2009). Elements of Statistical Learning. \examples{ +#generate data set.seed(33) n=20 p=10 sigma=1 x=matrix(rnorm(n*p),n,p) -#generate data beta=c(3,3,rep(0,p-2)) y=x\%*\%beta+sigma*rnorm(n) +# #run lar and plot results larfit=lar(x,y,verbose=TRUE) plot(larfit) diff --git a/selectiveInference/man/larInf.Rd b/selectiveInference/man/larInf.Rd index e45900b1..15e6c90f 100644 --- a/selectiveInference/man/larInf.Rd +++ b/selectiveInference/man/larInf.Rd @@ -120,28 +120,28 @@ n=50 p=10 sigma=.7 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) beta=c(3,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) #first run lar - larfit=lar(x,y) +larfit=lar(x,y) +larfit # #lar inference for each successive entry of a predictor; sigma estimated # from mean squared residual from least squares fit - out=larInf(larfit) +out=larInf(larfit) +out # #NOT RUN # states data example: predict life expectancy #x=state.x77[,-4] #y=state.x77[,4] -#x=scale(x,TRUE,TRUE) #n=nrow(x) # estimate sigma from cross-validated lasso fit #cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat +#sigmahat=estimateSigma(x,y)$sigmahat # # run LAR -#larfit=lar(x,y,normalize=FALSE) +#larfit=lar(x,y) # # compute p-values and confidence intervals # larInf(larfit,sigma=sigmahat) diff --git a/selectiveInference/man/plot.lar.Rd b/selectiveInference/man/plot.lar.Rd index 668ce1c2..03e91935 100644 --- a/selectiveInference/man/plot.lar.Rd +++ b/selectiveInference/man/plot.lar.Rd @@ -32,17 +32,20 @@ Default is TRUE} \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ +#generate data set.seed(33) n=200 p=20 sigma=1 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) -#generate y beta=c(3,-2,rep(0,p-2)) beta=c(rep(3,10),rep(0,p-10)) y=x\%*\%beta+sigma*rnorm(n) y=y-mean(y) +# +#run lar larfit=lar(x,y) +# +#plot results plot(larfit) } diff --git a/selectiveInference/man/predict.fs.Rd b/selectiveInference/man/predict.fs.Rd index ca3a32aa..f64e63b4 100644 --- a/selectiveInference/man/predict.fs.Rd +++ b/selectiveInference/man/predict.fs.Rd @@ -35,18 +35,19 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ +#generate data set.seed(33) n=200 p=20 sigma=1 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) -#generate y -beta=c(3,-2,rep(0,p-2)) beta=c(rep(3,10),rep(0,p-10)) y=x\%*\%beta+sigma*rnorm(n) y=y-mean(y) +# # run forward stepwise and predict functions obj=fs(x,y) +obj fit=predict.fs(obj,x,s=3) +fit } diff --git a/selectiveInference/man/predict.lar.Rd b/selectiveInference/man/predict.lar.Rd index 16aa7958..67331ed0 100644 --- a/selectiveInference/man/predict.lar.Rd +++ b/selectiveInference/man/predict.lar.Rd @@ -38,16 +38,20 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ +#generate data set.seed(33) n=200 p=20 sigma=1 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) -#generate y + beta=c(3,-2,rep(0,p-2)) beta=c(rep(3,10),rep(0,p-10)) y=x\%*\%beta+sigma*rnorm(n) +#run lar larfit=lar(x,y) +# predict from lar fit fit=predict.lar(larfit,x,s=3.5,type="fit") +fit + } diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index 66423cac..2aca3270 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -78,17 +78,17 @@ n=50 p=10 sigma=.7 x=matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE)/sqrt(n-1) beta=c(4,2,0,0,rep(0,p-4)) y=x\%*\%beta+sigma*rnorm(n) # #first run forward stepwise - fsfit=fs(x,y) +fsfit=fs(x,y) # # forward stepwise inference for each successive entry of a predictor; # sigma estimated from mean squared residual # out=fsInf(fsfit) +out ## #NOT RUN @@ -99,7 +99,6 @@ out=fsInf(fsfit) #p=10 #sigma=.7 #x=matrix(rnorm(n*p),n,p) -#x=scale(x,TRUE,TRUE)/sqrt(n-1) #beta=c(4,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) # @@ -118,11 +117,11 @@ out=fsInf(fsfit) #p=10 #sigma=.7 #x=matrix(rnorm(n*p),n,p) -#x=scale(x,TRUE,TRUE)/sqrt(n-1) #beta=c(3,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) #first run lar # larfit=lar(x,y) +# larfit # #lar inference for each successive entry of a predictor; sigma estimated # from mean squared residual from least squares fit @@ -175,7 +174,6 @@ set.seed(43) #p=10 #sigma=.7 #x=matrix(rnorm(n*p),n,p) -#x=scale(x,T,F) #beta=c(3,2,0,0,rep(0,p-4)) #y=x%*%beta+sigma*rnorm(n) # From 179de960ad9337c036049c2835112a73ade8e344 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Sun, 29 Nov 2015 17:05:31 -0500 Subject: [PATCH 078/396] Just some edits to the documentation --- selectiveInference/R/funs.groupfs.R | 3 +- selectiveInference/man/estimateSigma.Rd | 44 ++--- selectiveInference/man/fixedLassoInf.Rd | 60 ++---- selectiveInference/man/forwardStop.Rd | 35 ++-- selectiveInference/man/fs.Rd | 14 +- selectiveInference/man/fsInf.Rd | 3 +- selectiveInference/man/lar.Rd | 31 +-- selectiveInference/man/larInf.Rd | 53 +++-- selectiveInference/man/manyMeans.Rd | 14 +- selectiveInference/man/plot.fs.Rd | 8 +- selectiveInference/man/plot.lar.Rd | 25 +-- selectiveInference/man/predict.fs.Rd | 22 +-- selectiveInference/man/predict.lar.Rd | 25 +-- selectiveInference/man/selectiveInference.Rd | 195 ++++++++----------- 14 files changed, 230 insertions(+), 302 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 20ef2654..a5f46c8c 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -19,7 +19,8 @@ #' index = sort(rep(1:20, 2)) #' y = rnorm(20) + 2 * x[,1] - x[,4] #' fit = groupfs(x, y, index, maxsteps = 5) -#' pvals = groupfsInf(fit) +#' out = groupfsInf(fit) +#' out #' @seealso \code{\link{groupfsInf}}, \code{\link{factorDesign}}. groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) { diff --git a/selectiveInference/man/estimateSigma.Rd b/selectiveInference/man/estimateSigma.Rd index ad7ee68e..c9561218 100644 --- a/selectiveInference/man/estimateSigma.Rd +++ b/selectiveInference/man/estimateSigma.Rd @@ -26,41 +26,39 @@ This function estimates the standard deviation of the noise, in a linear regresi A lasso regression is fit, using cross-validation to estimate the tuning parameter lambda. With sample size n, yhat equal to the predicted values and df being the number of nonzero coefficients from the lasso fit, the estimate of sigma is \code{sqrt(sum((y-yhat)^2) / (n-df-1))}. -Important: if you are using glmnet to compute the lasso estimate, be sure to use the same values -for the intercept and standardized argument in glmnet and estimateSigma. Same applies to fs or lar, -where the argument for standardization is called "normalize" +Important: if you are using glmnet to compute the lasso estimate, be sure to use the settings +for the "intercept" and "standardize" arguments in glmnet and estimateSigma. Same applies to fs +or lar, where the argument for standardization is called "normalize". } \value{ \item{sigmahat}{The estimate of sigma} \item{df}{The degrees of freedom of lasso fit used} } \references{ -Stephen Reid, Jerome Friedman and Rob Tibshirani (2014). +Stephen Reid, Jerome Friedman, and Rob Tibshirani (2014). A study of error variance estimation in lasso regression. arXiv:1311.5274. } \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -# generate data -# -set.seed(43) -n=50 -p=10 -sigma=.7 -x=matrix(rnorm(n*p),n,p) -beta=c(3,2,0,0,rep(0,p-4)) -y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) -# -# Estimate sigma -out=estimateSigma(x,y) -# #run forwrad stepwise -fit=fs(x,y) -# -# run inference functions -out2=fsInf(fit,sigma=out$sigmahat) -out2 +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# estimate sigma +sigmahat = estimateSigma(x,y)$sigmahat + +# run sequential inference with estimated sigma +out = fsInf(fsfit,sigma=sigmahat) +out } diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 1ef078da..8c7afce1 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -108,55 +108,37 @@ to alpha/2, and can be used for error-checking purposes. } \references{ -Jason Lee, Dennis Sun, Yuekai Sun, Jonathan Taylor (2013). +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). Exact post-selection inference, with application to the lasso. arXiv:1311.6238. } \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ - -#generate data set.seed(43) -n=50 -p=10 -sigma=.7 -x=matrix(rnorm(n*p),n,p) -beta=c(3,2,0,0,rep(0,p-4)) -y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) -# -# first run glmnet -gfit=glmnet(x,y) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x,y) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) lambda = .1 -#extract coef for a given lambda; Note the 1/n factor! beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals -out=fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) out -# -#NOT RUN -# as above, but use lar function instead to get initial lasso fit (should get same result) -# fit=lar(x,y) -# beta=coef(fit,s=lambda,mode="lambda") -# fixedLassoInf(x,y,beta,lambda,sigma=sigma) - -## -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#x=scale(x,TRUE,TRUE) -#n=nrow(x) -# estimate sigma from cross-validated lasso fit -# cvf=cv.glmnet(x,y) -# sigmahat=estimateSigma(x,y,stand=FALSE)$sigmahat -# -# fit lasso -# lambda=n*cvf$lambda.min -# gfit=glmnet(x,y,standardize=FALSE) -# bhat=coef(gfit, s=lambda/n, exact=TRUE)[-1] -# -# compute p-values and confidence intervals -# fixedLassoInf(x,y,bhat,lambda,sigma=sigmahat) +## NOT RUN +## as above, but use lar function instead to get initial +## lasso fit (should get same results) +# lfit = lar(x,y) +# beta = coef(lfit,s=lambda,mode="lambda") +# out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +# out2 } diff --git a/selectiveInference/man/forwardStop.Rd b/selectiveInference/man/forwardStop.Rd index e30e052f..87eb7abd 100644 --- a/selectiveInference/man/forwardStop.Rd +++ b/selectiveInference/man/forwardStop.Rd @@ -27,28 +27,29 @@ Guarantees FDR control at the level alpha, for independent p-values. Step number for sequential stop. } \references{ -Max Grazier G'Sell, Stefan Wager, Alexandra Chouldechova, Rob Tibshirani (2014). +Max Grazier G'Sell, Stefan Wager, Alexandra Chouldechova, and Rob Tibshirani (2014). Sequential selection procedures and Fflse Discovery Rate Control. arXiv:1309.5352. To appear in Journal of the Royal Statistical Society: Series B. } \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -set.seed(433) -n=50 -p=10 -sigma=.7 -x=matrix(rnorm(n*p),n,p) -beta=c(4,2,0,0,rep(0,p-4)) -y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) -# -#first run forward stepwise - fsfit=fs(x,y) -# -# run inference function - out=fsInf(fsfit) -# +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out + # estimate optimal stopping point - forwardStop(out$pv, alpha=.10) +forwardStop(out$pv, alpha=.10) } diff --git a/selectiveInference/man/fs.Rd b/selectiveInference/man/fs.Rd index c1e2a7fd..2a61c836 100644 --- a/selectiveInference/man/fs.Rd +++ b/selectiveInference/man/fs.Rd @@ -32,12 +32,14 @@ the interface for the \code{lar} function) \details{ This function implements forward stepwise regression, adding the predictor at each -step that maximizes the absolute correlation between the predictors-- orthogonalized wrt the current model-- and the residual. This entry +step that maximizes the absolute correlation between the predictors---once +orthogonalized with respect to the current model---and the residual. This entry criterion is standard, and is equivalent to choosing the variable that achieves the biggest drop in RSS at each step; it is used, e.g., by the \code{step} function -in R. Note that, for example, the \code{lars} package implements a stepwise option (with type="step"), but -uses a (mildly) different entry criterion, based on maximal absolute correlation between the original (non-orthogonalized) -predictors and the residual. +in R. Note that, for example, the \code{lars} package implements a stepwise option +(with type="step"), but uses a (mildly) different entry criterion, based on maximal +absolute correlation between the original (non-orthogonalized) predictors and the +residual. } \value{ \item{action}{Vector of predictors in order of entry} @@ -75,8 +77,10 @@ sigma = 1 x = matrix(rnorm(n*p),n,p) beta = c(3,2,rep(0,p-2)) y = x\%*\%beta + sigma*rnorm(n) -# run forward stepwise + +# run forward stepwise, plot results fsfit = fs(x,y) +plot(fsfit) # compute sequential p-values and confidence intervals # (sigma estimated from full model) diff --git a/selectiveInference/man/fsInf.Rd b/selectiveInference/man/fsInf.Rd index 237f7e60..b86584d9 100644 --- a/selectiveInference/man/fsInf.Rd +++ b/selectiveInference/man/fsInf.Rd @@ -97,7 +97,7 @@ value chosen by the modified AIC scheme} } \references{ -Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, Rob Tibshirani (2014). +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. Joshua Loftus and Jonathan Taylor (2014). A significance test for forward stepwise @@ -116,6 +116,7 @@ sigma = 1 x = matrix(rnorm(n*p),n,p) beta = c(3,2,rep(0,p-2)) y = x\%*\%beta + sigma*rnorm(n) + # run forward stepwise fsfit = fs(x,y) diff --git a/selectiveInference/man/lar.Rd b/selectiveInference/man/lar.Rd index 92e1c1ec..590a6633 100644 --- a/selectiveInference/man/lar.Rd +++ b/selectiveInference/man/lar.Rd @@ -62,10 +62,10 @@ to enter along the path} } \references{ -Brad Efron, Trevor Hastie, Iain Johnstone and Rob Tibshirani (2002). +Brad Efron, Trevor Hastie, Iain Johnstone, and Rob Tibshirani (2002). Least angle regression. Annals of Statistics (with discussion). -See also the descriptions in Trevor Hastie, Rob Tibshirani and +See also the descriptions in Trevor Hastie, Rob Tibshirani, and Jerome Friedman (2002, 2009). Elements of Statistical Learning. } @@ -76,17 +76,20 @@ Jerome Friedman (2002, 2009). Elements of Statistical Learning. } \examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) -#generate data -set.seed(33) -n=20 -p=10 -sigma=1 -x=matrix(rnorm(n*p),n,p) -beta=c(3,3,rep(0,p-2)) -y=x\%*\%beta+sigma*rnorm(n) -# -#run lar and plot results -larfit=lar(x,y,verbose=TRUE) -plot(larfit) +# run LAR, plot results +larfit = lar(x,y) +plot(larfit) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = larInf(larfit) +out } diff --git a/selectiveInference/man/larInf.Rd b/selectiveInference/man/larInf.Rd index 15e6c90f..8e3b2d03 100644 --- a/selectiveInference/man/larInf.Rd +++ b/selectiveInference/man/larInf.Rd @@ -102,7 +102,7 @@ value chosen by the modified AIC scheme} } \references{ -Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, Rob Tibshirani (2014). +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. } @@ -113,37 +113,28 @@ Exact post-selection inference for sequential regression procedures. arXiv:1401. } \examples{ - -#generate data set.seed(43) -n=50 -p=10 -sigma=.7 -x=matrix(rnorm(n*p),n,p) -beta=c(3,2,0,0,rep(0,p-4)) -y=x\%*\%beta+sigma*rnorm(n) -#first run lar -larfit=lar(x,y) -larfit -# -#lar inference for each successive entry of a predictor; sigma estimated -# from mean squared residual from least squares fit -out=larInf(larfit) -out -# -#NOT RUN -# states data example: predict life expectancy -#x=state.x77[,-4] -#y=state.x77[,4] -#n=nrow(x) -# estimate sigma from cross-validated lasso fit -#cvf=cv.glmnet(x,y) -#sigmahat=estimateSigma(x,y)$sigmahat -# +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + # run LAR -#larfit=lar(x,y) -# -# compute p-values and confidence intervals -# larInf(larfit,sigma=sigmahat) +larfit = lar(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = larInf(larfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = larInf(larfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = larInf(larfit,type="all",k=5) +out.fix } diff --git a/selectiveInference/man/manyMeans.Rd b/selectiveInference/man/manyMeans.Rd index 62a14009..57fc4296 100644 --- a/selectiveInference/man/manyMeans.Rd +++ b/selectiveInference/man/manyMeans.Rd @@ -43,7 +43,7 @@ were selected by the procedure (either BH(q) or top-K). Labelled "Selind" in out } \references{ -Stephen Reid, Jonathan Taylor, Rob Tibshirani (2014). +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). Post-selection point and interval estimation of signal sizes in Gaussian samples. arXiv:1405.3340. } @@ -51,12 +51,10 @@ arXiv:1405.3340. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ - - set.seed(12345) -n = 100 # sample size -signal = 3 # signal size -mu = c(rep(signal, floor (n/5)), rep(0, n-floor(n/5))) # 20% of elements get the signal; rest 0 -y = mu + rnorm (n, 0, 1) -out = manyMeans(y, bh.q=0.1) +n = 100 +mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +y = mu + rnorm(n) +out = manyMeans(y, bh.q=0.1) +out } diff --git a/selectiveInference/man/plot.fs.Rd b/selectiveInference/man/plot.fs.Rd index b712cdc5..4f770132 100644 --- a/selectiveInference/man/plot.fs.Rd +++ b/selectiveInference/man/plot.fs.Rd @@ -29,12 +29,14 @@ Default is TRUE} \examples{ set.seed(33) -n = 200 -p = 20 +n = 50 +p = 10 sigma = 1 x = matrix(rnorm(n*p),n,p) -beta = c(rep(3,10),rep(0,p-10)) +beta = c(3,2,rep(0,p-2)) y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise, plot results fsfit = fs(x,y) plot(fsfit) } diff --git a/selectiveInference/man/plot.lar.Rd b/selectiveInference/man/plot.lar.Rd index 03e91935..baa1195d 100644 --- a/selectiveInference/man/plot.lar.Rd +++ b/selectiveInference/man/plot.lar.Rd @@ -32,20 +32,15 @@ Default is TRUE} \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#generate data -set.seed(33) -n=200 -p=20 -sigma=1 -x=matrix(rnorm(n*p),n,p) -beta=c(3,-2,rep(0,p-2)) -beta=c(rep(3,10),rep(0,p-10)) -y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) -# -#run lar -larfit=lar(x,y) -# -#plot results +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR, plot results +larfit = lar(x,y) plot(larfit) } diff --git a/selectiveInference/man/predict.fs.Rd b/selectiveInference/man/predict.fs.Rd index f64e63b4..5e504824 100644 --- a/selectiveInference/man/predict.fs.Rd +++ b/selectiveInference/man/predict.fs.Rd @@ -35,19 +35,15 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#generate data set.seed(33) -n=200 -p=20 -sigma=1 -x=matrix(rnorm(n*p),n,p) -beta=c(rep(3,10),rep(0,p-10)) -y=x\%*\%beta+sigma*rnorm(n) -y=y-mean(y) -# +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + # run forward stepwise and predict functions -obj=fs(x,y) -obj -fit=predict.fs(obj,x,s=3) -fit +obj = fs(x,y) +fit = predict(obj,x,s=3) } diff --git a/selectiveInference/man/predict.lar.Rd b/selectiveInference/man/predict.lar.Rd index 67331ed0..c91bed34 100644 --- a/selectiveInference/man/predict.lar.Rd +++ b/selectiveInference/man/predict.lar.Rd @@ -38,20 +38,15 @@ Either a vector/matrix of predictions, or a vector/matrix of coefficients. \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ -#generate data set.seed(33) -n=200 -p=20 -sigma=1 -x=matrix(rnorm(n*p),n,p) - -beta=c(3,-2,rep(0,p-2)) -beta=c(rep(3,10),rep(0,p-10)) -y=x\%*\%beta+sigma*rnorm(n) -#run lar -larfit=lar(x,y) -# predict from lar fit -fit=predict.lar(larfit,x,s=3.5,type="fit") -fit - +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + +# run lar and predict functions +obj = lar(x,y) +fit = predict(obj,x,s=3) } diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index 2aca3270..7d2ca571 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -58,132 +58,93 @@ Maintainer: Rob Tibshirani } \references{ -Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, Rob Tibshirani (2014). +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. -Jason Lee, Dennis Sun, Yuekai Sun, Jonathan Taylor (2013). +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). Exact post-selection inference, with application to the lasso. arXiv:1311.6238. -Stephen Reid, Jonathan Taylor, Rob Tibshirani (2014). +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). Post-selection point and interval estimation of signal sizes in Gaussian samples. arXiv:1405.3340. } \examples{ -# forward stepwise: -# -#generate some data -set.seed(43) -n=50 -p=10 -sigma=.7 -x=matrix(rnorm(n*p),n,p) -beta=c(4,2,0,0,rep(0,p-4)) -y=x\%*\%beta+sigma*rnorm(n) -# -#first run forward stepwise -fsfit=fs(x,y) -# -# forward stepwise inference for each successive entry of a predictor; -# sigma estimated from mean squared residual -# -out=fsInf(fsfit) -out - -## -#NOT RUN -# lasso with fixed lambda -# -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#beta=c(4,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -# -# first run glmnet -#gfit=glmnet(x,y,standardize=FALSE) -#lam = .1 -#extract coef for a given lam; Note the 1/n factor in s! -#bhat = coef(gfit, s=lam/n, exact=TRUE)[-1] - -# compute fixed lambda p-values and selection intervals -#aa=fixedLassoInf(x,y,bhat,lam,sigma=sigma) -# -##least angle regression from mean squared residual -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -#first run lar -# larfit=lar(x,y) -# larfit -# -#lar inference for each successive entry of a predictor; sigma estimated -# from mean squared residual from least squares fit -# aa=larInf(larfit) - -## -##many normal means - -#set.seed(12345) -#n = 100 # sample size -#signal = 3 # signal size -#mu = c(rep(signal, floor (n/5)), rep(0, n-floor(n/5))) # 20% of elements get the signal; rest 0 -#y = mu + rnorm (n, 0, 1) -#mmObj = manyMeans(y, bh.q=0.1) -# -# -# Forward stepwise with grouped variables -#set.seed(1) -#n <- 40 -#p <- 20 -#index <- sort(rep(1:(p/2), 2)) -#steps <- 10 -#sparsity <- 5 -#snr <- 3 -# x <- matrix(rnorm(n*p), nrow=n) -# beta <- rep(0, p) -# beta[which(index %in% 1:sparsity)] <- snr -# y <- x %*% beta+rnorm(n) -#fit <- groupfs(x, y, index=1:p, maxsteps = steps) -#out<- groupfsInf(fit) - - - -##estimation of sigma for use in fsInf or larInf or fixedLassoInf -# -#set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#x=scale(x,TRUE,FALSE) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) - -#out=estimateSigma(x,y) - -##estimation of lambda for use in fixedLassoInf -set.seed(43) -#n=50 -#p=10 -#sigma=.7 -#x=matrix(rnorm(n*p),n,p) -#beta=c(3,2,0,0,rep(0,p-4)) -#y=x%*%beta+sigma*rnorm(n) -# -#estimate lambda using the known value of sigma -#lamhat=estimateLambda(x,sigma=.7) -#first estimate sigma -#sigmahat=estimateSigma(x,y)$sigmahat -#lamhat=estimateLambda(x,sigma=sigmahat) -# - +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix + +## NOT RUN---lasso at fixed lambda +## first run glmnet +# gfit = glmnet(x,y) + +## extract coef for a given lambda; note the 1/n factor! +## (and we don't save the intercept term) +# lambda = .1 +# beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +## compute fixed lambda p-values and selection intervals +# out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +# out + +## NOT RUN---many normal means +# set.seed(12345) +# n = 100 +# mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +# y = mu + rnorm(n) +# out = manyMeans(y, bh.q=0.1) +# out + +## NOT RUN---forward stepwise with groups +# set.seed(1) +# n = 20 +# p = 40 +# x = matrix(rnorm(n*p), nrow=n) +# index = sort(rep(1:(p/2), 2)) +# y = rnorm(n) + 2 * x[,1] - x[,4] +# fit = groupfs(x, y, index, maxsteps = 5) +# out = groupfsInf(fit) +# out + +## NOT RUN---estimation of sigma for use in fsInf +## (or larInf or fixedLassoInf) +# set.seed(33) +# n = 50 +# p = 10 +# sigma = 1 +# x = matrix(rnorm(n*p),n,p) +# beta = c(3,2,rep(0,p-2)) +# y = x\%*\%beta + sigma*rnorm(n) + +## run forward stepwise +# fsfit = fs(x,y) + +## estimate sigma +# sigmahat = estimateSigma(x,y)$sigmahat + +## run sequential inference with estimated sigma +# out = fsInf(fit,sigma=sigmahat) +# out } \keyword{ package } From 9ef33d34879ab76437bb6194be1209bcafe9ce0a Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 12:56:06 -0800 Subject: [PATCH 079/396] some renaming of variables for readability --- selectiveInference/R/funs.fs.R | 67 ++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index a46b53e9..0a1b1f12 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -27,8 +27,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, ##### # Find the first variable to enter and its sign - xx = scale(x,center=F,scale=sqrt(colSums(x^2))) - uhat = t(xx)%*%y + working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) + uhat = t(working_x)%*%y ihit = which.max(abs(uhat)) # Hitting coordinate s = Sign(uhat[ihit]) # Sign @@ -50,16 +50,17 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Gamma matrix! gbuf = max(2*p*3,2000) # Space for 3 steps, at least - gi = 0 + gi = 0 # index into rows of Gamma matrix + Gamma = matrix(0,gbuf,n) - Gamma[gi+Seq(1,p-1),] = t(s*xx[,ihit]+xx[,-ihit]); gi = gi+p-1 - Gamma[gi+Seq(1,p-1),] = t(s*xx[,ihit]-xx[,-ihit]); gi = gi+p-1 - Gamma[gi+1,] = t(s*xx[,ihit]); gi = gi+1 + Gamma[gi+Seq(1,p-1),] = t(s*working_x[,ihit]+working_x[,-ihit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(s*working_x[,ihit]-working_x[,-ihit]); gi = gi+p-1 + Gamma[gi+1,] = t(s*working_x[,ihit]); gi = gi+1 - # nk - nk = numeric(buf) + # nconstraint + nconstraint = numeric(buf) vreg = matrix(0,buf,n) - nk[1] = gi + nconstraint[1] = gi vreg[1,] = s*x[,ihit] / sum(x[,ihit]^2) # Other things to keep track of, but not return @@ -92,41 +93,43 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, action = c(action,numeric(buf)) df = c(df,numeric(buf)) beta = cbind(beta,matrix(0,p,buf)) - nk = c(nk,numeric(buf)) + nconstraint = c(nconstraint,numeric(buf)) vreg = rbind(vreg,matrix(0,buf,n)) } # Key quantities for the next entry a = backsolve(R,t(Q1)%*%y) X2perp = X2 - X1 %*% backsolve(R,t(Q1)%*%X2) - xx = scale(X2perp,center=F,scale=sqrt(colSums(X2perp^2))) - aa = as.numeric(t(xx)%*%y) + working_x = scale(X2perp,center=F,scale=sqrt(colSums(X2perp^2))) + score = as.numeric(t(working_x)%*%y) # If the inactive set is empty, nothing will hit if (r==min(n-intercept,p)) break # Otherwise find the next hitting time else { - shits = Sign(aa) - hits = shits * aa - ihit = which.max(hits) - shit = shits[ihit] + sign_score = Sign(score) + abs_score = sign_score * score + ihit = which.max(abs_score) + shit = sign_score[ihit] } # Record the solution - action[k] = I[ihit] + # what is the difference between "action" and "A"? + + action[k] = I[ihit] df[k] = r beta[A,k] = a # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) - xx = t(shits*t(xx)) - Gamma[gi+Seq(1,p-r),] = t(xx); gi = gi+p-r - Gamma[gi+Seq(1,p-r-1),] = t(xx[,ihit]-xx[,-ihit]); gi = gi+p-r-1 - Gamma[gi+1,] = t(xx[,ihit]); gi = gi+1 + working_x = t(sign_score*t(working_x)) + Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,ihit]-working_x[,-ihit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(working_x[,ihit]); gi = gi+1 - # nk, regression contrast - nk[k] = gi + # nconstraint, regression contrast + nconstraint[k] = gi vreg[k,] = shit*X2perp[,ihit] / sum(X2perp[,ihit]^2) # Update all of the variables @@ -156,7 +159,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, df = df[Seq(1,k-1),drop=FALSE] beta = beta[,Seq(1,k-1),drop=FALSE] Gamma = Gamma[Seq(1,gi),,drop=FALSE] - nk = nk[Seq(1,k-1)] + nconstraint = nconstraint[Seq(1,k-1)] vreg = vreg[Seq(1,k-1),,drop=FALSE] # If we reached the maximum number of steps @@ -191,7 +194,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, out = list(action=action,sign=s,df=df,beta=beta, completepath=completepath,bls=bls, - Gamma=Gamma,nk=nk,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, + Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, intercept=intercept,normalize=normalize,call=this.call) class(out) = "fs" return(out) @@ -250,7 +253,7 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic p = ncol(x) n = nrow(x) G = obj$Gamma - nk = obj$nk + nconstraint = obj$nconstraint sx = obj$sx if (is.null(sigma)) { @@ -278,8 +281,8 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - Gj = G[1:nk[j],] - uj = rep(0,nk[j]) + Gj = G[1:nconstraint[j],] + uj = rep(0,nconstraint[j]) vj = vreg[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) @@ -304,13 +307,13 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) khat = out$khat m = out$stopped * ntimes - G = rbind(out$G,G[1:nk[khat+m],]) # Take ntimes more steps past khat - u = c(out$u,rep(0,nk[khat+m])) # (if we need to) + G = rbind(out$G,G[1:nconstraint[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nconstraint[khat+m])) # (if we need to) kk = khat } else { - G = G[1:nk[k],] - u = rep(0,nk[k]) + G = G[1:nconstraint[k],] + u = rep(0,nconstraint[k]) kk = k } From c7f55d9e19c0af16cb41f4f3565083f5520c9f65 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 13:14:50 -0800 Subject: [PATCH 080/396] more variable name changes --- selectiveInference/R/funs.fs.R | 92 +++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 41 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 0a1b1f12..b7a76dbc 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -28,61 +28,68 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, ##### # Find the first variable to enter and its sign working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) - uhat = t(working_x)%*%y - ihit = which.max(abs(uhat)) # Hitting coordinate - s = Sign(uhat[ihit]) # Sign + score = t(working_x)%*%y + i_hit = which.max(abs(score)) # Hitting coordinate + sign_hit = Sign(score[i_hit]) # Sign if (verbose) { - cat(sprintf("1. Adding variable %i, |A|=%i...",ihit,1)) + cat(sprintf("1. Adding variable %i, |A|=%i...",i_hit,1)) } # Now iteratively find the new FS estimates # Things to keep track of, and return at the end + # JT: I guess the "buf" just saves us from making huge + # matrices we don't need? + buf = min(maxsteps,500) action = numeric(buf) # Actions taken df = numeric(buf) # Degrees of freedom beta = matrix(0,p,buf) # FS estimates - action[1] = ihit + action[1] = i_hit df[1] = 0 beta[,1] = 0 # Gamma matrix! gbuf = max(2*p*3,2000) # Space for 3 steps, at least - gi = 0 # index into rows of Gamma matrix + gi = 0 # index into rows of Gamma matrix Gamma = matrix(0,gbuf,n) - Gamma[gi+Seq(1,p-1),] = t(s*working_x[,ihit]+working_x[,-ihit]); gi = gi+p-1 - Gamma[gi+Seq(1,p-1),] = t(s*working_x[,ihit]-working_x[,-ihit]); gi = gi+p-1 - Gamma[gi+1,] = t(s*working_x[,ihit]); gi = gi+1 + Gamma[gi+Seq(1,p-1),] = t(s*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(s*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+1,] = t(s*working_x[,i_hit]); gi = gi+1 # nconstraint nconstraint = numeric(buf) vreg = matrix(0,buf,n) nconstraint[1] = gi - vreg[1,] = s*x[,ihit] / sum(x[,ihit]^2) + vreg[1,] = s*x[,i_hit] / sum(x[,i_hit]^2) # Other things to keep track of, but not return r = 1 # Size of active set - A = ihit # Active set - I = Seq(1,p)[-ihit] # Inactive set - X1 = x[,ihit,drop=FALSE] # Matrix X[,A] - X2 = x[,-ihit,drop=FALSE] # Matrix X[,I] + A = i_hit # Active set -- JT: isn't this basically the same as action? + I = Seq(1,p)[-i_hit] # Inactive set + X_active = x[,i_hit,drop=FALSE] # Matrix X[,A] + X_inactive = x[,-i_hit,drop=FALSE] # Matrix X[,I] k = 2 # What step are we at? + # JT Why keep track of r and k instead of just saying k=r+1? + + # Compute a skinny QR decomposition of X_active + # JT: obs was used as variable name above -- this is something different, no? + # changed it to qr_X - # Compute a skinny QR decomposition of X1 - obj = qr(X1) - Q = qr.Q(obj,complete=TRUE) - Q1 = Q[,1,drop=FALSE]; - Q2 = Q[,-1,drop=FALSE] - R = qr.R(obj) + qr_X = qr(X_active) + Q = qr.Q(qr_X,complete=TRUE) + Q_active = Q[,1,drop=FALSE]; + Q_inactive = Q[,-1,drop=FALSE] + R = qr.R(qr_X) # Throughout the algorithm, we will maintain - # the decomposition X1 = Q1*R. Dimenisons: - # X1: n x r - # Q1: n x r - # Q2: n x (n-r) + # the decomposition X_active = Q_active*R. Dimensions: + # X_active: n x r + # Q_active: n x r + # Q_inactive: n x (n-r) # R: r x r while (k<=maxsteps) { @@ -98,9 +105,10 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, } # Key quantities for the next entry - a = backsolve(R,t(Q1)%*%y) - X2perp = X2 - X1 %*% backsolve(R,t(Q1)%*%X2) - working_x = scale(X2perp,center=F,scale=sqrt(colSums(X2perp^2))) + + a = backsolve(R,t(Q_active)%*%y) + X_inactive_resid = X_inactive - X_active %*% backsolve(R,t(Q_active)%*%X_inactive) + working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) score = as.numeric(t(working_x)%*%y) # If the inactive set is empty, nothing will hit @@ -110,14 +118,14 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, else { sign_score = Sign(score) abs_score = sign_score * score - ihit = which.max(abs_score) - shit = sign_score[ihit] + i_hit = which.max(abs_score) + shit = sign_score[i_hit] } # Record the solution # what is the difference between "action" and "A"? - action[k] = I[ihit] + action[k] = I[i_hit] df[k] = r beta[A,k] = a @@ -125,26 +133,28 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) working_x = t(sign_score*t(working_x)) Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r - Gamma[gi+Seq(1,p-r-1),] = t(working_x[,ihit]-working_x[,-ihit]); gi = gi+p-r-1 - Gamma[gi+1,] = t(working_x[,ihit]); gi = gi+1 + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 # nconstraint, regression contrast nconstraint[k] = gi - vreg[k,] = shit*X2perp[,ihit] / sum(X2perp[,ihit]^2) + vreg[k,] = shit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) # Update all of the variables r = r+1 - A = c(A,I[ihit]) - I = I[-ihit] + A = c(A,I[i_hit]) + I = I[-i_hit] s = c(s,shit) - X1 = cbind(X1,X2[,ihit]) - X2 = X2[,-ihit,drop=FALSE] + X_active = cbind(X_active,X_inactive[,i_hit]) + X_inactive = X_inactive[,-i_hit,drop=FALSE] # Update the QR decomposition - obj = updateQR(Q1,Q2,R,X1[,r]) - Q1 = obj$Q1 - Q2 = obj$Q2 - R = obj$R + updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) + Q_active = updated_qr$Q1 + + # JT: why do we store Q_inactive? Doesn't seem to be used. + Q_inactive = updated_qr$Q2 + R = updated_qr$R if (verbose) { cat(sprintf("\n%i. Adding variable %i, |A|=%i...",k,A[r],r)) From 0e699c5ecf105c10c1ca121d5d98d70259471d84 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 13:24:30 -0800 Subject: [PATCH 081/396] one more name change; comment about vup, vlo --- selectiveInference/R/funs.fs.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index b7a76dbc..b5b13f85 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -106,7 +106,6 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Key quantities for the next entry - a = backsolve(R,t(Q_active)%*%y) X_inactive_resid = X_inactive - X_active %*% backsolve(R,t(Q_active)%*%X_inactive) working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) score = as.numeric(t(working_x)%*%y) @@ -127,7 +126,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, action[k] = I[i_hit] df[k] = r - beta[A,k] = a + beta[A,k] = backsolve(R,t(Q_active)%*%y) # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) @@ -360,6 +359,8 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic } } + # JT: why do we output vup, vlo? Are they used somewhere else? + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, vars=vars,sign=sign,sigma=sigma,alpha=alpha, From 6c30502efaca69823cc10ba1b80528e2ea18a735 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 13:31:35 -0800 Subject: [PATCH 082/396] forgot to rename variable s --- selectiveInference/R/funs.fs.R | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index b5b13f85..a6a9c3ff 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -30,7 +30,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) score = t(working_x)%*%y i_hit = which.max(abs(score)) # Hitting coordinate - sign_hit = Sign(score[i_hit]) # Sign + sign_hit = Sign(score[i_hit]) # Sign + signs = sign_hit # later signs will be appended to `signs` if (verbose) { cat(sprintf("1. Adding variable %i, |A|=%i...",i_hit,1)) @@ -56,15 +57,15 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, gi = 0 # index into rows of Gamma matrix Gamma = matrix(0,gbuf,n) - Gamma[gi+Seq(1,p-1),] = t(s*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 - Gamma[gi+Seq(1,p-1),] = t(s*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 - Gamma[gi+1,] = t(s*working_x[,i_hit]); gi = gi+1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 # nconstraint nconstraint = numeric(buf) vreg = matrix(0,buf,n) nconstraint[1] = gi - vreg[1,] = s*x[,i_hit] / sum(x[,i_hit]^2) + vreg[1,] = sign_hit*x[,i_hit] / sum(x[,i_hit]^2) # Other things to keep track of, but not return r = 1 # Size of active set @@ -118,7 +119,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, sign_score = Sign(score) abs_score = sign_score * score i_hit = which.max(abs_score) - shit = sign_score[i_hit] + sign_hit = sign_score[i_hit] } # Record the solution @@ -137,13 +138,13 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # nconstraint, regression contrast nconstraint[k] = gi - vreg[k,] = shit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) + vreg[k,] = sign_hit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) # Update all of the variables r = r+1 A = c(A,I[i_hit]) I = I[-i_hit] - s = c(s,shit) + signs = c(signs,sign_hit) X_active = cbind(X_active,X_inactive[,i_hit]) X_inactive = X_inactive[,-i_hit,drop=FALSE] @@ -201,7 +202,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Assign column names colnames(beta) = as.character(Seq(1,k-1)) - out = list(action=action,sign=s,df=df,beta=beta, + out = list(action=action,sign=signs,df=df,beta=beta, completepath=completepath,bls=bls, Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, intercept=intercept,normalize=normalize,call=this.call) From 034cfa7f2fcea0d0de075f3ce0f76b71cae3f7c1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 13:58:11 -0800 Subject: [PATCH 083/396] missed one reference to variable a --- selectiveInference/R/funs.fs.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index a6a9c3ff..c2b9806a 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -125,9 +125,10 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the solution # what is the difference between "action" and "A"? + beta_cur = backsolve(R,t(Q_active)%*%y) action[k] = I[i_hit] df[k] = r - beta[A,k] = backsolve(R,t(Q_active)%*%y) + beta[A,k] = beta_cur # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) @@ -189,7 +190,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the least squares solution. Note that # we have already computed this bls = rep(0,p) - bls[A] = a + bls[A] = beta_cur } if (verbose) cat("\n") From 82ee21a76c1bc9af99e0d31c7722126b691c7dd5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 14:25:08 -0800 Subject: [PATCH 084/396] small fix to test.fs.R --- tests/test.fs.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test.fs.R b/tests/test.fs.R index f811676e..a0781558 100644 --- a/tests/test.fs.R +++ b/tests/test.fs.R @@ -1,5 +1,4 @@ library(selectiveInference) -#library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") library(lars) @@ -24,7 +23,7 @@ max(abs(obj$action-unlist(obj2$action))) # These don't always match ... what is the lars function doing? # Checks -max(abs(obj$action-unlist(obj2$action)) +max(abs(obj$action-unlist(obj2$action))) max(abs(coef(obj,s=4.5,mode="step")- lars::predict.lars(obj2,s=4.5,type="coef",mode="step")$coef)) max(abs(predict(obj,s=4.5,mode="step")- From 42b99323ad38fff1d9987a2a4a3cab9465241322 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 14:46:37 -0800 Subject: [PATCH 085/396] BF: beta_cur must be computed before the possible break --- selectiveInference/R/funs.fs.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index c2b9806a..a8b48129 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -111,6 +111,10 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) score = as.numeric(t(working_x)%*%y) + beta_cur = backsolve(R,t(Q_active)%*%y) # must be computed before the break + # so we have it if we have + # completed the path + # If the inactive set is empty, nothing will hit if (r==min(n-intercept,p)) break @@ -125,7 +129,6 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the solution # what is the difference between "action" and "A"? - beta_cur = backsolve(R,t(Q_active)%*%y) action[k] = I[i_hit] df[k] = r beta[A,k] = beta_cur From 575f4a18dc2e0e1062e3a85a4a948bd6b4cbe2b3 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 19:31:12 -0800 Subject: [PATCH 086/396] intersection upper / lower bounds match python code on X.csv, Y.csv --- selectiveInference/R/funs.fs.R | 110 +++++++++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index a8b48129..39ff0c3e 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -27,7 +27,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, ##### # Find the first variable to enter and its sign - working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) + working_scale = sqrt(colSums(x^2)) + working_x = scale(x,center=F,scale=working_scale) score = t(working_x)%*%y i_hit = which.max(abs(score)) # Hitting coordinate sign_hit = Sign(score[i_hit]) # Sign @@ -48,10 +49,22 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, df = numeric(buf) # Degrees of freedom beta = matrix(0,p,buf) # FS estimates + offset_pos = matrix(Inf, p, buf) # upper bounds for selective maxZ + offset_neg = matrix(Inf, p, buf) # lower bounds for selective maxZ + action[1] = i_hit df[1] = 0 beta[,1] = 0 + ##### + # Variables needed to compute truncation limits for + # selective maxZ test + + realized_Z_max = c(sign_hit * score[i_hit]) + offset_pos[,1] = Inf + offset_neg[,1] = Inf + working_resid = y - x %*% beta[,1] + # Gamma matrix! gbuf = max(2*p*3,2000) # Space for 3 steps, at least gi = 0 # index into rows of Gamma matrix @@ -103,13 +116,18 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, beta = cbind(beta,matrix(0,p,buf)) nconstraint = c(nconstraint,numeric(buf)) vreg = rbind(vreg,matrix(0,buf,n)) + + offset_pos = cbind(offset_pos, matrix(0, p, buf)) + offset_neg = cbind(offset_neg, matrix(0, p, buf)) } # Key quantities for the next entry + prev_scale = working_scale[-i_hit] X_inactive_resid = X_inactive - X_active %*% backsolve(R,t(Q_active)%*%X_inactive) - working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) - score = as.numeric(t(working_x)%*%y) + working_scale = sqrt(colSums(X_inactive_resid^2)) + working_x = scale(X_inactive_resid,center=F,scale=working_scale) + working_score = as.numeric(t(working_x)%*%y) beta_cur = backsolve(R,t(Q_active)%*%y) # must be computed before the break # so we have it if we have @@ -120,10 +138,18 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Otherwise find the next hitting time else { - sign_score = Sign(score) - abs_score = sign_score * score + sign_score = Sign(working_score) + abs_score = sign_score * working_score i_hit = which.max(abs_score) sign_hit = sign_score[i_hit] + # keep track of necessary quantities for selective maxZ + + offset_shift = t(X_inactive) %*% (y - working_resid) + realized_Z_scaled = realized_Z_max * prev_scale + offset_pos[I,k] = realized_Z_scaled + offset_shift + offset_neg[I,k] = realized_Z_scaled - offset_shift + + working_resid = y - X_active %*% beta_cur } # Record the solution @@ -152,6 +178,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, X_active = cbind(X_active,X_inactive[,i_hit]) X_inactive = X_inactive[,-i_hit,drop=FALSE] + realized_Z_max = sign_hit * working_score[i_hit] + # Update the QR decomposition updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) Q_active = updated_qr$Q1 @@ -176,6 +204,9 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, nconstraint = nconstraint[Seq(1,k-1)] vreg = vreg[Seq(1,k-1),,drop=FALSE] + offset_pos = offset_pos[,Seq(1,k-1),drop=FALSE] + offset_neg = offset_neg[,Seq(1,k-1),drop=FALSE] + # If we reached the maximum number of steps if (k>maxsteps) { if (verbose) { @@ -209,7 +240,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, out = list(action=action,sign=signs,df=df,beta=beta, completepath=completepath,bls=bls, Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, - intercept=intercept,normalize=normalize,call=this.call) + intercept=intercept,normalize=normalize,call=this.call, + offset_pos=offset_pos,offset_neg=offset_neg) class(out) = "fs" return(out) } @@ -376,6 +408,72 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic ############################## +############################## + +# selected Zmax tests + +fsInf_Zmax <- function(obj, sigma=NULL, alpha=0.1, k=NULL, + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { + + this.call = match.call() + + checkargs.misc(sigma=sigma,alpha=alpha,k=k, + gridrange=gridrange,mult=mult,ntimes=ntimes) + if (class(obj) != "fs") stop("obj must be an object of class fs") + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + G = obj$Gamma + nconstraint = obj$nconstraint + sx = obj$sx + + if (is.null(sigma)) { + # TODO we should probably sample uniform + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + khat = NULL + + vars = obj$action[1:k] + for (j in 1:k) { + + if (j > 1) { + active = vars[1:(j-1)] + inactive = (1:p)[-active] + } else { + inactive = 1:p + } + collapsed_pos = apply(obj$offset_pos[inactive,1:j,drop=FALSE], 1, min) + collapsed_neg = apply(obj$offset_neg[inactive,1:j,drop=FALSE], 1, min) + + # next, condition on solution up to now + } + + out = list(k=k,khat=khat,pv=pv,ci=ci, + tailarea=tailarea,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "fsInf" + return(out) +} + +############################## + ############################## From b9c91bd474e377a2442eee3088f7d9e09c5f320d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 22:42:16 -0800 Subject: [PATCH 087/396] numerically agrees with python code up to the point truncated gaussians need to be drawn --- selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fs.R | 132 ++++++++++++++++++++++++++------- tests/test.fs.selected.R | 21 ++++++ 3 files changed, 128 insertions(+), 27 deletions(-) create mode 100644 tests/test.fs.selected.R diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 8dd9120c..180abf05 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -1,5 +1,5 @@ export(lar,fs, - larInf,fsInf, + larInf,fsInf,fsInf_maxZ, coef.lar,coef.fs, predict.lar,predict.fs, print.lar,print.fs, diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 39ff0c3e..119f5f4b 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -49,8 +49,11 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, df = numeric(buf) # Degrees of freedom beta = matrix(0,p,buf) # FS estimates - offset_pos = matrix(Inf, p, buf) # upper bounds for selective maxZ - offset_neg = matrix(Inf, p, buf) # lower bounds for selective maxZ + # Buffered objects for selective maxZ test + + offset_pos_maxZ = matrix(Inf, p, buf) # upper bounds for selective maxZ + offset_neg_maxZ = matrix(Inf, p, buf) # lower bounds for selective maxZ + scale_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ action[1] = i_hit df[1] = 0 @@ -60,20 +63,28 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Variables needed to compute truncation limits for # selective maxZ test - realized_Z_max = c(sign_hit * score[i_hit]) - offset_pos[,1] = Inf - offset_neg[,1] = Inf - working_resid = y - x %*% beta[,1] + realized_maxZ = c(sign_hit * score[i_hit]) + offset_pos_maxZ[,1] = Inf + offset_neg_maxZ[,1] = Inf + scale_maxZ[,1] = working_scale + working_resid_maxZ = y - x %*% beta[,1] # Gamma matrix! gbuf = max(2*p*3,2000) # Space for 3 steps, at least gi = 0 # index into rows of Gamma matrix + zi = 0 # index into rows of Gamma_maxZ matrix Gamma = matrix(0,gbuf,n) Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 + # Gamma_maxZ is the rbind + # of residualized X_{inactive \cup i_hit} + + Gamma_maxZ = matrix(0,gbuf,n) + Gamma_maxZ[zi+Seq(1,p),] = t(x); zi = zi+p + # nconstraint nconstraint = numeric(buf) vreg = matrix(0,buf,n) @@ -117,8 +128,9 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, nconstraint = c(nconstraint,numeric(buf)) vreg = rbind(vreg,matrix(0,buf,n)) - offset_pos = cbind(offset_pos, matrix(0, p, buf)) - offset_neg = cbind(offset_neg, matrix(0, p, buf)) + offset_pos_maxZ = cbind(offset_pos_maxZ, matrix(0, p, buf)) + offset_neg_maxZ = cbind(offset_neg_maxZ, matrix(0, p, buf)) + scale_maxZ = cbind(scale_maxZ, matrix(0, p, buf)) } # Key quantities for the next entry @@ -144,12 +156,13 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, sign_hit = sign_score[i_hit] # keep track of necessary quantities for selective maxZ - offset_shift = t(X_inactive) %*% (y - working_resid) - realized_Z_scaled = realized_Z_max * prev_scale - offset_pos[I,k] = realized_Z_scaled + offset_shift - offset_neg[I,k] = realized_Z_scaled - offset_shift + offset_shift = t(X_inactive) %*% (y - working_resid_maxZ) + realized_Z_scaled = realized_maxZ * prev_scale + offset_pos_maxZ[I,k] = realized_Z_scaled + offset_shift + offset_neg_maxZ[I,k] = realized_Z_scaled - offset_shift + scale_maxZ[I,k] = working_scale - working_resid = y - X_active %*% beta_cur + working_resid_maxZ = y - X_active %*% beta_cur } # Record the solution @@ -159,7 +172,16 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, df[k] = r beta[A,k] = beta_cur + # store the X_inactive_resid in Gamma_maxZ + + if (gi + p-r > nrow(Gamma_maxZ)) Gamma_maxZ = rbind(Gamma_maxZ,matrix(0,p-r,n)) + Gamma_maxZ[zi+Seq(1,p-r),] = t(X_inactive_resid); zi = zi+p-r + + # update maxZ variable + realized_maxZ = sign_hit * working_score[i_hit] + # Gamma matrix! + if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) working_x = t(sign_score*t(working_x)) Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r @@ -178,8 +200,6 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, X_active = cbind(X_active,X_inactive[,i_hit]) X_inactive = X_inactive[,-i_hit,drop=FALSE] - realized_Z_max = sign_hit * working_score[i_hit] - # Update the QR decomposition updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) Q_active = updated_qr$Q1 @@ -204,8 +224,10 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, nconstraint = nconstraint[Seq(1,k-1)] vreg = vreg[Seq(1,k-1),,drop=FALSE] - offset_pos = offset_pos[,Seq(1,k-1),drop=FALSE] - offset_neg = offset_neg[,Seq(1,k-1),drop=FALSE] + offset_pos_maxZ = offset_pos_maxZ[,Seq(1,k-1),drop=FALSE] + offset_neg_maxZ = offset_neg_maxZ[,Seq(1,k-1),drop=FALSE] + scale_maxZ = offset_pos_maxZ[,Seq(1,k-1),drop=FALSE] + Gamma_maxZ = Gamma_maxZ[Seq(1,zi),,drop=FALSE] # If we reached the maximum number of steps if (k>maxsteps) { @@ -241,7 +263,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, completepath=completepath,bls=bls, Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, intercept=intercept,normalize=normalize,call=this.call, - offset_pos=offset_pos,offset_neg=offset_neg) + offset_pos_maxZ=offset_pos_maxZ,offset_neg_maxZ=offset_neg_maxZ, + scale_maxZ=scale_maxZ,Gamma_maxZ=Gamma_maxZ) class(out) = "fs" return(out) } @@ -410,9 +433,9 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic ############################## -# selected Zmax tests +# selected maxZ tests -fsInf_Zmax <- function(obj, sigma=NULL, alpha=0.1, k=NULL, +fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, k=NULL, gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { this.call = match.call() @@ -450,24 +473,81 @@ fsInf_Zmax <- function(obj, sigma=NULL, alpha=0.1, k=NULL, khat = NULL vars = obj$action[1:k] + zi = 0 for (j in 1:k) { + # the inactive set here does not + # include the variable at the j-th step + # so, at j==1, the inactive set is every variable + # at j==2, the inactive set is everything but the first one + if (j > 1) { active = vars[1:(j-1)] inactive = (1:p)[-active] } else { inactive = 1:p } - collapsed_pos = apply(obj$offset_pos[inactive,1:j,drop=FALSE], 1, min) - collapsed_neg = apply(obj$offset_neg[inactive,1:j,drop=FALSE], 1, min) + collapsed_pos = apply(obj$offset_pos_maxZ[inactive,1:j,drop=FALSE], 1, min) + collapsed_neg = apply(obj$offset_neg_maxZ[inactive,1:j,drop=FALSE], 1, min) + cur_scale = obj$scale_maxZ[,j][inactive] + cur_adjusted_X = obj$Gamma_maxZ[zi + Seq(1,p-j+1),]; zi = zi+p-j+1 + cur_X = obj$x[,inactive] # next, condition on solution up to now + + if (j > 1) { + cur_fitted = predict(obj, s=j) + cur_fitted = cur_fitted - mean(cur_fitted) + cur_offset = t(cur_X) %*% cur_fitted + } + else { + cur_fitted = 0 + cur_offset = 0 + } + + print('pos') + print(collapsed_pos) + print('neg') + print(collapsed_neg) + + print('fitted') + print(cur_fitted[1:10]) + collapsed_pos = collapsed_pos - cur_offset + collapsed_neg = collapsed_neg + cur_offset + + print("cur_offset") + print(cur_offset) + + print('pos_adj') + print(collapsed_pos) + print('neg_adj') + print(collapsed_neg) + + # now, we sample from Y_star, a centered Gaussian with covariance sigma^2 I + # subject to the constraint + # t(cur_adjusted_X) %*% Y_star < collapsed_pos + # -t(cur_adjusted_X) %*% Y_star < collapsed_neg + + # really, we want the covariance of Y_star to be \sigma^2 (I - cur_P) + # where P is projection on the j-1 previous variables + # but this doesn't matter as everything we do with the samples + # will be a function of (I - cur_P) Y_star and the constraints are + # expressible in terms of (I - cur_P) Y_star because + # we have adjusted X + + print('pos') + print(collapsed_pos) + print('neg') + print(collapsed_neg) + + # IMPORTANT: after sampling Y_star, we have to add back cur_fitted } - out = list(k=k,khat=khat,pv=pv,ci=ci, - tailarea=tailarea,vmat=vmat,y=y, - vars=vars,sign=sign,sigma=sigma,alpha=alpha, - call=this.call) + out = list(pos=collapsed_pos, neg=collapsed_neg) + #out = list(k=k,khat=khat,pv=pv,ci=ci, + # tailarea=tailarea,vmat=vmat,y=y, + # vars=vars,sign=sign,sigma=sigma,alpha=alpha, + # call=this.call) class(out) = "fsInf" return(out) } diff --git a/tests/test.fs.selected.R b/tests/test.fs.selected.R new file mode 100644 index 00000000..e5b2cf5d --- /dev/null +++ b/tests/test.fs.selected.R @@ -0,0 +1,21 @@ +library(selectiveInference) +library(lars) + +set.seed(32) + +n=50 +p=10 +sigma=1 + +x = as.matrix(read.table("X.csv", sep=',', header=FALSE)) +Y = as.numeric(read.table("Y.csv", sep=',', header=FALSE)[,1]) + +beta=c(5,4,3,2,1,rep(0,p-5)) +mu=x%*%beta + +y=mu+Y +fsfit=fs(x,y,norm=TRUE, intercept=TRUE) +out = fsInf_maxZ(fsfit,sigma=sigma) +print(out$pos) +print(-out$neg) + From 2b8206e1fc6e9788b10de3d307f9eff649a967b7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 22:54:15 -0800 Subject: [PATCH 088/396] removing debugging print statements --- selectiveInference/R/funs.fs.R | 61 ++++++++++++---------------------- tests/X.csv | 50 ++++++++++++++++++++++++++++ tests/Y.csv | 50 ++++++++++++++++++++++++++++ tests/test.fs.selected.R | 3 +- 4 files changed, 122 insertions(+), 42 deletions(-) create mode 100644 tests/X.csv create mode 100644 tests/Y.csv diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 119f5f4b..433e36ef 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -80,7 +80,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 # Gamma_maxZ is the rbind - # of residualized X_{inactive \cup i_hit} + # of residualized X_inactive's Gamma_maxZ = matrix(0,gbuf,n) Gamma_maxZ[zi+Seq(1,p),] = t(x); zi = zi+p @@ -435,18 +435,13 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic # selected maxZ tests -fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, k=NULL, - gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { +fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL) { this.call = match.call() - checkargs.misc(sigma=sigma,alpha=alpha,k=k, - gridrange=gridrange,mult=mult,ntimes=ntimes) + checkargs.misc(sigma=sigma,alpha=alpha,k=k) + if (class(obj) != "fs") stop("obj must be an object of class fs") - if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { - warning("Package Rmpfr is not installed, reverting to standard precision") - bits = NULL - } k = min(k,length(obj$action)) # Round to last step x = obj$x @@ -487,13 +482,18 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, k=NULL, } else { inactive = 1:p } + collapsed_pos = apply(obj$offset_pos_maxZ[inactive,1:j,drop=FALSE], 1, min) collapsed_neg = apply(obj$offset_neg_maxZ[inactive,1:j,drop=FALSE], 1, min) cur_scale = obj$scale_maxZ[,j][inactive] + cur_adjusted_X = obj$Gamma_maxZ[zi + Seq(1,p-j+1),]; zi = zi+p-j+1 cur_X = obj$x[,inactive] - # next, condition on solution up to now + # now we condition on solution up to now + # this is equivalent to finding vector of + # fitted values up to now and appropriately + # adjusting the box limits if (j > 1) { cur_fitted = predict(obj, s=j) @@ -505,28 +505,13 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, k=NULL, cur_offset = 0 } - print('pos') - print(collapsed_pos) - print('neg') - print(collapsed_neg) - - print('fitted') - print(cur_fitted[1:10]) - collapsed_pos = collapsed_pos - cur_offset - collapsed_neg = collapsed_neg + cur_offset - - print("cur_offset") - print(cur_offset) - - print('pos_adj') - print(collapsed_pos) - print('neg_adj') - print(collapsed_neg) + final_upper = collapsed_pos - cur_offset + final_lower = -(collapsed_neg + cur_offset) # now, we sample from Y_star, a centered Gaussian with covariance sigma^2 I # subject to the constraint - # t(cur_adjusted_X) %*% Y_star < collapsed_pos - # -t(cur_adjusted_X) %*% Y_star < collapsed_neg + # t(cur_adjusted_X) %*% Y_star < final_upper + # -t(cur_adjusted_X) %*% Y_star < -final_lower # really, we want the covariance of Y_star to be \sigma^2 (I - cur_P) # where P is projection on the j-1 previous variables @@ -535,20 +520,16 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, k=NULL, # expressible in terms of (I - cur_P) Y_star because # we have adjusted X - print('pos') - print(collapsed_pos) - print('neg') - print(collapsed_neg) - # IMPORTANT: after sampling Y_star, we have to add back cur_fitted + + pv = c(pv, runif(1)) } - out = list(pos=collapsed_pos, neg=collapsed_neg) - #out = list(k=k,khat=khat,pv=pv,ci=ci, - # tailarea=tailarea,vmat=vmat,y=y, - # vars=vars,sign=sign,sigma=sigma,alpha=alpha, - # call=this.call) - class(out) = "fsInf" + khat = forwardStop(pv,alpha) + + out = list(pv=pv,khat=khat, + call=this.call) + class(out) = "fsInf_Zmax" return(out) } diff --git a/tests/X.csv b/tests/X.csv new file mode 100644 index 00000000..9e3bf148 --- /dev/null +++ b/tests/X.csv @@ -0,0 +1,50 @@ +-1.178998180891155656e-01,-1.431240072891293569e-01,-1.032612207595051818e-01,-3.032187959649952358e-01,3.623527555657671262e-02,7.140070893198336571e-03,1.994856139137879403e-01,-2.091539772440898642e-02,-1.016086254290284319e-01,4.225480470043030748e-03 +1.900091741547343949e-02,8.527956006600532352e-03,-2.360627728880385912e-01,-7.614938655092416860e-03,-1.549697289040389246e-02,2.249142191975829854e-04,9.680233904138081658e-02,-2.051414424020579219e-01,4.630024394840283053e-01,-2.965171494849212080e-02 +-3.699631581918050305e-02,-1.722594123029470847e-01,-2.811921500130166226e-03,-2.389563150481890941e-01,-1.075624889112302696e-01,-7.804616804476580127e-02,-3.706328230757107572e-02,-3.503804035374127185e-02,-2.647277697909438676e-01,-6.682759619509875959e-02 +-5.130177317722107305e-02,2.268018482998235275e-02,-4.179689023642386980e-02,-1.623091275446776893e-01,1.450083194821685240e-01,2.219597463120694947e-01,2.533842648572138742e-02,2.771331081546550557e-02,3.581273753830072765e-02,8.722251846321744939e-02 +-1.306936424111264927e-01,4.432794698567819186e-02,-9.941966213181395451e-02,-8.715861166850324970e-02,-2.303285613309095925e-01,3.078294089253613142e-01,3.344919445579297435e-03,1.196830919307750318e-01,-1.253076375038724988e-01,-1.148761269817597519e-01 +-2.250204095659052439e-01,1.113238116713928527e-01,-5.290002031147110484e-02,7.830761098391121611e-02,7.066607013335457488e-02,6.565522345102817181e-02,-6.308067898881258850e-02,2.025286101924284737e-02,-6.818604986842312365e-02,-1.070102801614746246e-01 +-2.882057940922741479e-02,1.799983072585507882e-01,1.130515074865423159e-01,9.460393874312598983e-02,-7.352923330349291076e-02,-2.755771198440996536e-02,-1.310648265859815260e-02,-2.475061797099528116e-01,2.427234406663877353e-01,2.583612973537190993e-01 +9.611602736254980239e-02,6.273551721516337776e-02,-4.324306198998925654e-02,-3.860336044312057818e-02,-8.403781067825444318e-02,9.562097685082457388e-02,1.492509493676312526e-01,-2.943327955367435744e-01,-2.877171607065032860e-02,4.951010186454657749e-03 +-3.465380375344423447e-01,2.176487162155849725e-02,-3.693161931323302866e-02,1.619662116578308719e-01,-2.564291959900348236e-03,-1.637987837220862553e-01,-3.342210830973137148e-02,-6.685343093833395756e-02,-8.554318254825124535e-02,1.613775288161613530e-01 +5.313251489880643774e-02,3.890058088332522257e-02,-4.613812296420412296e-02,-1.034537934089800326e-01,-9.631838877144843769e-02,-3.092370301090547474e-02,-1.419742935886602053e-01,2.380517554927013592e-03,7.079890010442424497e-02,-3.648327087249821354e-01 +5.102527195639846441e-02,-1.362508515419207089e-01,8.159140673817973088e-02,-1.244854531841070727e-01,-2.606170884965469323e-02,1.511382743326344924e-02,-2.255344304610490958e-01,6.236460909959602728e-02,-2.657951209176429019e-01,-1.594365214738231729e-02 +7.926342892815507746e-02,6.482707539961192478e-02,2.087256857963709078e-01,1.292814475123448825e-01,-1.796195585005122275e-01,1.525550407724216850e-01,5.475504271088289798e-02,2.868404636974628344e-02,1.264796115328103154e-01,-1.555090411723416069e-02 +1.181696244748224739e-01,1.999694578685428381e-01,1.807315761292420109e-01,1.229014824059724564e-01,4.895454174242171164e-02,-1.833503379863557770e-01,6.881950937845732197e-02,-7.141605983769011567e-02,-3.849053159713680498e-03,1.119597953540091562e-01 +-1.454915981077725406e-01,-2.266794840632963548e-01,-1.298952558056660489e-01,5.437551156666114299e-02,6.183066340902761832e-02,1.035937403404784546e-01,-2.570107980749361878e-02,-3.093760287832481026e-01,1.134096421692707829e-01,-9.223798086734372181e-02 +8.709274188337053080e-02,-9.556231597509272369e-02,2.118423200194279044e-01,-8.774546066090892563e-03,2.769267035538602317e-01,-9.815821608839980783e-02,1.709472261185437285e-03,-2.153557681348250430e-02,4.148111640994667226e-02,5.196811833848381107e-02 +1.880295620305566864e-02,-5.850999774080740262e-02,-5.301122779276247476e-02,3.537499672221694480e-02,2.852315743834457629e-02,-6.942851976899913624e-02,2.276894587717699092e-01,1.172152546047936506e-01,6.257413318528345057e-03,-1.682213921871343956e-02 +2.569990952013931906e-01,-1.736032991395405622e-01,1.511557448386198835e-02,2.441813032135399242e-01,-1.105916475419813361e-01,1.472489542271989138e-01,2.295846712275041168e-01,4.356843384762421750e-02,-5.142855348130226378e-02,-2.450933945023268251e-02 +-3.281672906942039586e-02,-1.258316028084845439e-01,-1.229551514195627165e-01,-1.497473216257483863e-01,1.213383871403065223e-01,5.912940006733528181e-02,-3.442255150142065068e-02,1.684351304854427023e-01,7.878512914823442015e-02,6.141853050739853360e-04 +2.985043940771188800e-02,2.697852036140859222e-01,7.513858261740154199e-02,1.691617277348242410e-01,-7.715021687363131375e-02,-1.590745317353356725e-01,5.564786782697213330e-02,-2.145953101463689283e-02,5.036342539549311381e-02,-2.798631226235559488e-01 +-1.206199609736929718e-01,5.952016584844534164e-02,-2.588323978913500273e-02,-3.610815652054708697e-02,-7.111461594831880451e-02,1.716009403396910049e-01,1.945097583365250696e-02,1.682441279803628120e-01,2.416197565576874359e-02,-8.446813595188230883e-02 +-8.788855601271274021e-02,2.870664460710737470e-02,-2.150701597306006463e-01,8.942026820589549230e-02,-6.406031432030431794e-02,-7.238901051636800754e-02,-2.847790921066896686e-01,-1.791769561572375546e-01,-8.985812560956656092e-02,-1.557232758585813948e-01 +1.122914496490508562e-01,-9.161001903716270434e-02,5.898245165418002783e-02,7.666824641318797928e-02,1.144428476261267896e-01,1.279510610165440210e-01,2.794793837130695249e-02,-1.769334059940223525e-01,1.395088287951896100e-01,1.531099359177416630e-01 +-1.756665362986270865e-02,2.243400472787894853e-02,-2.313766383513476965e-02,5.985349262422249433e-02,1.623952653335277718e-01,7.219739543308523189e-02,1.650650206991929048e-01,3.373066648957191438e-01,1.293144667091710442e-01,-1.198206084743443922e-01 +-1.591288826029353398e-01,1.441961795141566763e-02,2.691263563648352286e-01,-7.631601786004396648e-02,-2.035897032262034834e-01,1.839999945230586520e-01,-3.040797131486546595e-02,-2.072195591278232774e-01,-2.582099955618117823e-02,-4.968369036922733184e-02 +1.971222793011394997e-02,2.857593552133443071e-02,1.076526115126625743e-01,-1.169588063781885623e-01,-1.493986053584963822e-02,-9.939156163385284848e-02,5.728631166051838947e-02,1.318076226667439788e-01,-6.559944705762067729e-02,2.000054553822710746e-01 +1.442536217074669447e-01,1.860705352618353958e-01,2.376560497530317284e-01,-4.452755353807675426e-02,-1.479992243428359389e-01,-1.024421274656733688e-01,2.219827293941429441e-02,2.744560730041187280e-01,-1.749229054677335360e-01,2.486267968156883126e-02 +-1.173525007136980713e-01,-1.589063344657667964e-02,-4.855377240993334620e-02,-1.710166455399014795e-01,2.238197997257449501e-01,-3.902440146587572017e-02,-2.277246088212442343e-01,-2.225027033096296514e-01,-1.600856785452938746e-01,-3.513128354873073979e-03 +1.553053556117380551e-01,1.776336988981010168e-01,-5.584732869767267127e-03,4.337629217125157921e-02,1.343992700388582096e-01,2.241754083007339304e-01,-3.804363791330121480e-01,-2.248707918941276335e-02,9.449528681409452568e-02,2.019673102132814868e-01 +8.811133006768176457e-02,-2.495992576632576343e-01,-2.107789429909177337e-01,-7.656628888881508721e-02,-1.571965217975497819e-01,-3.994837344190730472e-02,1.798000461817380116e-02,-3.929477959092856493e-02,8.199998236736730228e-02,-1.771336014986007457e-01 +3.084293724362072164e-02,2.870830338863302744e-01,1.390985439227190867e-01,-1.761267638611300201e-01,-3.030878823823451168e-02,1.283420126059948152e-01,-7.032782881930829677e-02,3.028267745870127925e-01,2.763235193260478728e-01,1.297259596484676494e-02 +-2.980209117205826486e-01,-1.599628570960875684e-01,3.717563457838087004e-02,-1.715739500198807110e-01,1.156109940448075690e-01,2.861100896544049865e-01,-3.224456561914625097e-02,1.883841526758878449e-01,3.120995870006536277e-02,-1.666346081723748540e-01 +1.964845154373343616e-01,-3.262271620530284716e-02,7.539790456017420039e-02,-1.390837387106441114e-01,1.432200744039294031e-02,6.588671044290296541e-02,-7.918721856950346194e-02,4.179783781885562655e-02,-2.963364199479299327e-02,-1.282878362461941923e-01 +3.939736847129866049e-01,-1.330499698590044688e-01,1.620818289382975064e-01,1.086385707584531096e-01,-3.063136061977774771e-01,-7.367331881484205647e-02,2.938740010160398874e-01,-5.544842822282580047e-02,-1.217766056376768796e-01,2.308853011600819627e-01 +-8.747965097463808459e-02,-1.216303641899998678e-01,-2.000717572673424427e-02,-1.394174966077489641e-01,-7.675363917563379768e-02,-2.418816225068699255e-01,5.774342472104643936e-02,-6.667810796688183883e-02,-9.383247246491349669e-02,1.803148190365377324e-01 +2.154878276681011437e-02,7.274723003733224447e-02,8.493890250496327621e-02,2.317676008980297797e-01,1.202666302268647763e-01,-1.795717633338488883e-01,1.194104887456467895e-01,-2.409833230096719917e-02,4.209919073379941867e-02,1.805717730414188316e-01 +-7.417287534784343384e-02,3.280186400443500516e-01,-1.569651800380269802e-01,1.352031133976719091e-01,-6.623571067636253862e-02,1.821579193654246642e-01,-2.611616781264873843e-01,1.257537100225379623e-01,-1.544128139339268624e-01,-8.073570852093319913e-03 +-1.055845973894932038e-01,9.447582815065415363e-02,-1.280223934974548394e-01,-2.467957463871607027e-01,-1.981722006802176250e-01,-2.174114263242006573e-01,-5.258326868244974500e-02,8.632670058414107506e-02,1.278124112560487524e-01,-7.596235162571260413e-02 +-1.319143228822058644e-01,1.105849050743873824e-01,-1.261348865766407090e-01,8.221827529651301592e-02,-1.637828607024564287e-01,2.207216837530200787e-02,-1.572736510450976899e-01,-6.353661633164614519e-02,4.310413166262661022e-02,-8.635729489274258930e-02 +4.002677023935117401e-02,-1.246790995864136553e-01,1.009436985538858939e-01,-3.086445086254718112e-02,-3.105499027327088268e-02,-2.181299643306329195e-01,-1.991706313934264727e-01,1.032401876848659389e-01,-1.392292225465681099e-01,-2.573769322082897770e-01 +-2.365500867481928893e-01,-2.882697181199892489e-01,1.870462413268927579e-02,3.123314847933006955e-01,-1.785379221860150523e-01,-2.620626819754759973e-01,-1.335680157458384310e-01,6.717891508607608253e-02,-9.085066963483198754e-02,-6.554344870038303350e-02 +7.134202132377859273e-02,3.078481712498531628e-02,3.676960617998211722e-01,-4.372148762455418426e-03,1.825773262745455927e-01,-3.602444328216306696e-02,-1.305462273222839364e-01,6.325133697926907383e-02,2.284845903167492809e-02,1.813943850568317806e-01 +-2.036121292893071366e-02,-3.142619518413359431e-02,-7.057387951297722228e-02,-3.212426316839919055e-02,-1.333403450390204525e-01,-4.290173490181348193e-02,8.144726830020809771e-02,6.951860368824913650e-02,2.310857100535866626e-01,1.643640418057101538e-01 +-4.494358712990566967e-02,-8.381098939796007008e-02,5.585039387477649248e-02,9.349073094042398280e-02,-4.148446400739889961e-02,-2.271966867947378677e-01,1.164965341773331886e-01,-1.489054888468986371e-01,1.828198888111968318e-02,-1.253330759037772635e-02 +1.174485886256576572e-02,1.871862923440414939e-01,1.869800905400307023e-01,1.475135435476520041e-01,3.487327309591684577e-01,-8.075531082494548374e-02,-5.490566974763481239e-02,6.693169941538162915e-02,-6.657868205049911892e-02,-5.753564084435989351e-02 +3.540435546439159031e-01,9.799883325759614280e-03,-3.415790785183550060e-01,-1.596069478373152994e-01,1.046321772486082657e-01,8.214960553207252547e-02,3.174383744532797286e-02,3.673108366507741446e-02,-2.228283171016489095e-01,2.236729660172086798e-01 +7.908879265315697793e-02,1.056173691729681563e-01,-1.225984712485288508e-01,3.452599100424690731e-02,2.263175856748587678e-01,7.209894748504550033e-02,6.735185685735761707e-02,2.797405442112595522e-02,1.086462076787598763e-01,-9.525407972667455925e-02 +-1.047538936926625287e-02,-2.035197426425463807e-01,4.146899650406307702e-02,2.132037546098831671e-01,1.850498989115796356e-01,3.755941130259579652e-02,2.809178713198516308e-01,5.339383417911759688e-02,2.288817980479154746e-02,8.112997281404138450e-02 +2.844937002629225817e-02,-5.576917528583905624e-03,-1.424582369180357466e-01,-2.106624435129897388e-02,-5.564091055486999893e-02,-1.191495376277207918e-01,5.941614686177779971e-03,-1.434047109196132375e-01,-1.667172935642340648e-01,-6.724920468473376267e-02 +-2.651010312069623440e-02,-1.752677763399616284e-01,-1.808210581864507782e-01,-9.838386442528869458e-02,9.001532723491878496e-02,1.286355518885316795e-01,1.642453070463370246e-01,-3.123338511306213497e-02,-1.787111417596190521e-01,-3.559049574174898839e-02 +9.747590412046723551e-02,8.023771182677955083e-02,-4.335500350404396241e-02,2.468657521277872746e-01,1.317215783176759469e-01,-9.871658217557859050e-02,2.709271520186684948e-02,-6.092659889691063452e-02,1.531815724573990511e-01,2.589356068507616682e-01 diff --git a/tests/Y.csv b/tests/Y.csv new file mode 100644 index 00000000..468003da --- /dev/null +++ b/tests/Y.csv @@ -0,0 +1,50 @@ +5.981823865387593253e-01 +9.205037786951145717e-01 +2.471950462023575767e-01 +8.302149645795311450e-01 +3.481518894602156911e-01 +1.021133394037474273e+00 +7.737332812607098376e-01 +-1.072378967160714680e+00 +-2.435746837075756510e-01 +1.740226263339641199e-01 +8.402225800803131417e-01 +7.504940266953664674e-01 +-1.243144022380434421e+00 +1.501124988881830546e+00 +-2.891958991875613894e-01 +-3.132123608567724538e-01 +1.173545818815739494e+00 +5.085036448838163858e-01 +5.556170151317273431e-01 +-3.381951833174929312e-01 +-6.636571462865884508e-01 +-1.066616503782338743e+00 +-1.567776891895226454e-01 +-2.086621657630242765e+00 +-7.369246383379658427e-01 +-1.479829646839075641e-01 +9.141209849808230592e-01 +-4.535606299122550422e-01 +-1.439446509501642879e-01 +-1.298232476681334235e+00 +2.091428840290059821e+00 +5.383099610413618619e-01 +8.572788062341546445e-01 +-2.508913668580619039e-01 +3.275009893336582323e-01 +3.278337436330956978e-01 +-3.138873228756299261e-01 +6.287829585785361663e-01 +-6.461561991866940913e-01 +1.159738714281874084e-01 +1.347361280586111265e+00 +2.804011720873073044e-02 +1.431756971602339057e+00 +-1.370351105419743254e+00 +1.197179345676374851e+00 +9.454381529223232494e-01 +6.052977328571994553e-01 +8.951900192671614631e-01 +-1.503900798080590295e+00 +-1.866233032607558462e+00 diff --git a/tests/test.fs.selected.R b/tests/test.fs.selected.R index e5b2cf5d..37c55129 100644 --- a/tests/test.fs.selected.R +++ b/tests/test.fs.selected.R @@ -16,6 +16,5 @@ mu=x%*%beta y=mu+Y fsfit=fs(x,y,norm=TRUE, intercept=TRUE) out = fsInf_maxZ(fsfit,sigma=sigma) -print(out$pos) -print(-out$neg) + From 23c8de4bab2af024d0a0c8de294d273a46a1b1e1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Nov 2015 23:02:53 -0800 Subject: [PATCH 089/396] adding a few more comments --- selectiveInference/R/funs.fs.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 433e36ef..6139dd2b 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -448,12 +448,9 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL) { y = obj$y p = ncol(x) n = nrow(x) - G = obj$Gamma - nconstraint = obj$nconstraint - sx = obj$sx if (is.null(sigma)) { - # TODO we should probably sample uniform + # TODO we need a sampler on a unit sphere if (n >= 2*p) { oo = obj$intercept sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) @@ -487,7 +484,14 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL) { collapsed_neg = apply(obj$offset_neg_maxZ[inactive,1:j,drop=FALSE], 1, min) cur_scale = obj$scale_maxZ[,j][inactive] + # the matrix cur_adjusted_X is used to compute + # the maxZ or maxT for the sampled variables + cur_adjusted_X = obj$Gamma_maxZ[zi + Seq(1,p-j+1),]; zi = zi+p-j+1 + + # cur_X is used to enforce conditioning on + # the ever_active sufficient_statistics + cur_X = obj$x[,inactive] # now we condition on solution up to now From 08f116e1f9487d79296bc304d6116690d4c73a4f Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 1 Dec 2015 00:53:58 -0800 Subject: [PATCH 090/396] for n>p implemneted selective Z -- not quite debugged yet --- selectiveInference/DESCRIPTION | 3 +- selectiveInference/R/funs.fs.R | 60 +++++++++++++++++++++++++++++----- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index ce951165..6e21e688 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -8,7 +8,8 @@ Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Maintainer: Rob Tibshirani Depends: glmnet, - intervals + intervals, + tmvtnorm Suggests: Rmpfr Description: New tools for inference after selection, for use diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 6139dd2b..79201db2 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -54,6 +54,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, offset_pos_maxZ = matrix(Inf, p, buf) # upper bounds for selective maxZ offset_neg_maxZ = matrix(Inf, p, buf) # lower bounds for selective maxZ scale_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ + realized_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ action[1] = i_hit df[1] = 0 @@ -63,7 +64,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Variables needed to compute truncation limits for # selective maxZ test - realized_maxZ = c(sign_hit * score[i_hit]) + realized_maxZ[1] = c(sign_hit * score[i_hit]) offset_pos_maxZ[,1] = Inf offset_neg_maxZ[,1] = Inf scale_maxZ[,1] = working_scale @@ -131,6 +132,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, offset_pos_maxZ = cbind(offset_pos_maxZ, matrix(0, p, buf)) offset_neg_maxZ = cbind(offset_neg_maxZ, matrix(0, p, buf)) scale_maxZ = cbind(scale_maxZ, matrix(0, p, buf)) + realized_maxZ = cbind(realized_maxZ, matrix(0, p, buf)) } # Key quantities for the next entry @@ -157,7 +159,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # keep track of necessary quantities for selective maxZ offset_shift = t(X_inactive) %*% (y - working_resid_maxZ) - realized_Z_scaled = realized_maxZ * prev_scale + realized_Z_scaled = realized_maxZ[k-1] * prev_scale offset_pos_maxZ[I,k] = realized_Z_scaled + offset_shift offset_neg_maxZ[I,k] = realized_Z_scaled - offset_shift scale_maxZ[I,k] = working_scale @@ -178,7 +180,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, Gamma_maxZ[zi+Seq(1,p-r),] = t(X_inactive_resid); zi = zi+p-r # update maxZ variable - realized_maxZ = sign_hit * working_score[i_hit] + realized_maxZ[k] = sign_hit * working_score[i_hit] # Gamma matrix! @@ -264,7 +266,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, intercept=intercept,normalize=normalize,call=this.call, offset_pos_maxZ=offset_pos_maxZ,offset_neg_maxZ=offset_neg_maxZ, - scale_maxZ=scale_maxZ,Gamma_maxZ=Gamma_maxZ) + scale_maxZ=scale_maxZ,Gamma_maxZ=Gamma_maxZ,realized_maxZ=realized_maxZ) class(out) = "fs" return(out) } @@ -435,7 +437,8 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic # selected maxZ tests -fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL) { +fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, + ndraw=8000, burnin=2000) { this.call = match.call() @@ -492,7 +495,7 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL) { # cur_X is used to enforce conditioning on # the ever_active sufficient_statistics - cur_X = obj$x[,inactive] + cur_X = obj$x[,inactive,drop=FALSE] # now we condition on solution up to now # this is equivalent to finding vector of @@ -502,11 +505,11 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL) { if (j > 1) { cur_fitted = predict(obj, s=j) cur_fitted = cur_fitted - mean(cur_fitted) - cur_offset = t(cur_X) %*% cur_fitted + cur_offset = as.numeric(t(cur_X) %*% cur_fitted) } else { cur_fitted = 0 - cur_offset = 0 + cur_offset = rep(0, length(inactive)) } final_upper = collapsed_pos - cur_offset @@ -526,7 +529,46 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL) { # IMPORTANT: after sampling Y_star, we have to add back cur_fitted - pv = c(pv, runif(1)) + # if n > p, we actually just draw cur_adjusted_X %*% Y_star + # because this has a simple box constraint + # with a generically non-degenerate covariance + + + if (n > p) { + library(tmvtnorm) + + if (length(inactive) > 1) { + cov = (cur_adjusted_X %*% t(cur_adjusted_X)) + cov = cov * rep(sigma^2, nrow(cov), ncol(cov)) + } else { + cov = sigma^2 * sum(cur_adjusted_X^2) + } + + truncated_noise = rtmvnorm(n=ndraw, + mean=cur_offset, + sigma=cov, + lower=-collapsed_neg, + upper=collapsed_pos, + algorithm="gibbs", + burn.in.samples=burnin) + + if (length(inactive) > 1) { + sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) + } + else { + sample_maxZ = truncated_noise / cur_scale + } + } else { + # RUBBISH for now!!!! + sample_maxZ = abs(rnorm(ndraw)) + } + + observed_maxZ = obj$realized_maxZ[j] + + pval = sum(sample_maxZ > observed_maxZ) / ndraw + pval = 2 * min(pval, 1 - pval) + + pv = c(pv, pval) } khat = forwardStop(pv,alpha) From 197236a63415ef0e017ddaa0adcf096328b3f792 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Tue, 1 Dec 2015 23:14:37 -0800 Subject: [PATCH 091/396] Setting up cv simulation --- forLater/josh/sim.cv.R | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index b8d27cb5..041cf8fd 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -6,12 +6,12 @@ source("../../selectiveInference/R/funs.common.R") set.seed(1) niters <- 500 -n <- 50 -p <- 100 -maxsteps <- 10 -sparsity <- 5 -snr <- 1 -nfolds <- 5 +n <- 40 +p <- 80 +maxsteps <- 8 +sparsity <- 4 +snr <- 2 +nfolds <- 4 instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { @@ -25,8 +25,10 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { } fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) - pvals <- groupfsInf(fit, sigma = 1, verbose=T) - return(list(variable = fit$action, pvals = pvals$pv)) + fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps")) + pvals <- groupfsInf(fit, verbose=T) + pv2 <- groupfsInf(fit2, verbose=T) + return(list(variable = fit$action, pvals = pvals$pv, var2 = fit2$action, pvals2 = pv2$pv)) } time <- system.time({ @@ -35,12 +37,14 @@ time <- system.time({ pvals <- do.call(c, list(output[2,])) vars <- do.call(c, list(output[1,])) +vars2 <- do.call(c, list(output[3,])) +pvals2 <- do.call(c, list(output[4,])) -save(pvals, vars, file = paste0( +save(pvals, vars, vars2, pvals2, file = paste0( "results_cv_n", n, "_p", p, "_sparsity", sparsity, "_snr", snr, - ".RData")) + "_comparison.RData")) print(time) From 16235ac8501bf9d2834b86543dc7e92a4918caa2 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 2 Dec 2015 15:41:12 -0800 Subject: [PATCH 092/396] cv simulation known sigma --- forLater/josh/selectiveInference/R/cv.R | 6 +++--- forLater/josh/sim.aicstop.R | 5 ++--- forLater/josh/sim.cv.R | 16 ++++++++-------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R index 0a4661b4..8714f2e2 100644 --- a/forLater/josh/selectiveInference/R/cv.R +++ b/forLater/josh/selectiveInference/R/cv.R @@ -54,7 +54,7 @@ cvRSSquad <- function(x, folds, active.sets) { return(Q) } -cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { +cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, nfolds = 10) { n <- nrow(x) if (maxsteps >= n*(1-1/nfolds)) { @@ -75,7 +75,7 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { # Flatten list or something? for (f in 1:nfolds) { fold <- folds[[f]] - fit <- groupfs(X[-fold,], Y[-fold], index, maxsteps=maxsteps) + fit <- groupfs(X[-fold,], Y[-fold], index=index, maxsteps=maxsteps, sigma=sigma) fit$fold <- fold ## projections[[f]] <- lapply(fit$projections, function(step.projs) { ## lapply(step.projs, function(proj) { @@ -103,7 +103,7 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, nfolds = 10) { RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) RSSquads[[sstar]] <- NULL # remove the all zeroes case - fit <- groupfs(X, Y, index, maxsteps=sstar) + fit <- groupfs(X, Y, index=index, maxsteps=sstar, sigma=sigma) fit$cvobj <- cvobj fit$cvquad <- RSSquads diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index f2aa653a..409ab0f2 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -1,19 +1,18 @@ library(intervals) source("funs.sims.R") -#source("selectiveInference/R/cv.R") source("../../selectiveInference/R/funs.groupfs.R") source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) known <- FALSE -niters <- 200 +niters <- 500 n <- 50 p <- 150 G <- 75 maxsteps <- 10 sparsity <- 4 -snr <- 2 +snr <- 3 rho <- 0 aicstop <- 1 diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index 041cf8fd..810d14bf 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -5,13 +5,13 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 500 -n <- 40 -p <- 80 -maxsteps <- 8 -sparsity <- 4 +niters <- 4 +n <- 50 +p <- 100 +maxsteps <- 10 +sparsity <- 5 snr <- 2 -nfolds <- 4 +nfolds <- 5 instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { @@ -24,8 +24,8 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { y <- y + x %*% beta } - fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) - fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps")) + fit <- cvfs(x, y, maxsteps=maxsteps, sigma=1, nfolds=nfolds) + fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps"), sigma = 1) pvals <- groupfsInf(fit, verbose=T) pv2 <- groupfsInf(fit2, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, var2 = fit2$action, pvals2 = pv2$pv)) From 711bb22ebbccad1e3e3917179a7aff130a27d5e0 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 4 Dec 2015 16:41:47 -0800 Subject: [PATCH 093/396] Adding more checkpoints for F roots --- selectiveInference/R/funs.quadratic.R | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 003adc6b..236b39c1 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -205,16 +205,23 @@ TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { roots <- unique(thetas[which(modinds & angleinds)]) troots <- tan(roots)^2/C - if (length(troots) == 0) { - # Polyroot didn't catch any roots - # ad-hoc check: - checkpoints <- c(0, tol, tol2, - seq(from = sqrt(tol2), to = 1, length.out = 50), - seq(from = 1.2, to=50, length.out = 20), - 100, 1000, 10000) - } else { - checkpoints <- roots_to_checkpoints(troots) - } + checkpoints <- c() + if (length(troots) > 0) checkpoints <- roots_to_checkpoints(troots) + checkpoints <- sort( + c(checkpoints, (0, tol, tol2, + seq(from = sqrt(tol2), to = 1, length.out = 50), + seq(from = 1.2, to=50, length.out = 20), + 100, 1000, 10000))) + ## if (length(troots) == 0) { + ## # Polyroot didn't catch any roots + ## # ad-hoc check: + ## checkpoints <- c(0, tol, tol2, + ## seq(from = sqrt(tol2), to = 1, length.out = 50), + ## seq(from = 1.2, to=50, length.out = 20), + ## 100, 1000, 10000) + ## } else { + ## checkpoints <- roots_to_checkpoints(troots) + ## } signs <- sign(I(checkpoints)) diffs <- c(0, diff(signs)) From e61a3fbc1f9508ba9d9a53e214b45b8d9f7f4afa Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 4 Dec 2015 16:44:51 -0800 Subject: [PATCH 094/396] Fixed syntax error --- forLater/josh/sim.cv.R | 6 +++--- selectiveInference/R/funs.quadratic.R | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index 810d14bf..dc64493a 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -5,7 +5,7 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 4 +niters <- 400 n <- 50 p <- 100 maxsteps <- 10 @@ -24,8 +24,8 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { y <- y + x %*% beta } - fit <- cvfs(x, y, maxsteps=maxsteps, sigma=1, nfolds=nfolds) - fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps"), sigma = 1) + fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) + fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps")) pvals <- groupfsInf(fit, verbose=T) pv2 <- groupfsInf(fit2, verbose=T) return(list(variable = fit$action, pvals = pvals$pv, var2 = fit2$action, pvals2 = pv2$pv)) diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 236b39c1..23ff63f5 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -208,10 +208,10 @@ TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { checkpoints <- c() if (length(troots) > 0) checkpoints <- roots_to_checkpoints(troots) checkpoints <- sort( - c(checkpoints, (0, tol, tol2, + c(checkpoints, 0, tol, tol2, seq(from = sqrt(tol2), to = 1, length.out = 50), seq(from = 1.2, to=50, length.out = 20), - 100, 1000, 10000))) + 100, 1000, 10000)) ## if (length(troots) == 0) { ## # Polyroot didn't catch any roots ## # ad-hoc check: From a34b4a4a33d459153ec2ba210a8caf595ba5d286 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Mon, 7 Dec 2015 18:32:43 -0800 Subject: [PATCH 095/396] Fixing intercept, center, normalize for cvfs --- forLater/josh/selectiveInference/R/cv.R | 18 ++++++++++++++---- selectiveInference/R/funs.groupfs.R | 11 +++++------ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R index 8714f2e2..dfd63951 100644 --- a/forLater/josh/selectiveInference/R/cv.R +++ b/forLater/josh/selectiveInference/R/cv.R @@ -54,7 +54,7 @@ cvRSSquad <- function(x, folds, active.sets) { return(Q) } -cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, nfolds = 10) { +cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TRUE, center = TRUE, normalize = TRUE, nfolds = 5) { n <- nrow(x) if (maxsteps >= n*(1-1/nfolds)) { @@ -69,13 +69,23 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, nfolds = 10) { active.sets <- list(1:nfolds) cvobj <- list(1:nfolds) cv_perm <- sample(1:n) - Y <- y[cv_perm] - mean(y) + Y <- y[cv_perm] X <- x[cv_perm, ] + # Initialize copies of data for loop + by <- mean(Y) + if (intercept) Y <- Y - by + + # Center and scale design matrix + xscaled <- scaleGroups(X, index, center, normalize) + xm <- xscaled$xm + xs <- xscaled$xs + X <- xscaled$x + # Flatten list or something? for (f in 1:nfolds) { fold <- folds[[f]] - fit <- groupfs(X[-fold,], Y[-fold], index=index, maxsteps=maxsteps, sigma=sigma) + fit <- groupfs(X[-fold,], Y[-fold], index=index, maxsteps=maxsteps, sigma=sigma, intercept=FALSE, center=FALSE, normalize=FALSE) fit$fold <- fold ## projections[[f]] <- lapply(fit$projections, function(step.projs) { ## lapply(step.projs, function(proj) { @@ -103,7 +113,7 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, nfolds = 10) { RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) RSSquads[[sstar]] <- NULL # remove the all zeroes case - fit <- groupfs(X, Y, index=index, maxsteps=sstar, sigma=sigma) + fit <- groupfs(X, Y, index=index, maxsteps=sstar, sigma=sigma, intercept=intercept, center=center, normalize=normalize) fit$cvobj <- cvobj fit$cvquad <- RSSquads diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index a5f46c8c..532cd026 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -49,10 +49,9 @@ groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE y.update <- y if (intercept) y.update <- y - by y.last <- y.update - x.update <- x # Center and scale design matrix - xscaled <- scaleGroups(x.update, index, center, normalize) + xscaled <- scaleGroups(x, index, center, normalize) xm <- xscaled$xm xs <- xscaled$xs x.update <- xscaled$x @@ -434,7 +433,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { Uh <- ulist[[step]] Zg <- zlist[[step+1]] Zh <- zlist[[step]] - + if (type == "TC") { penh <- 0 etag <- etalist[[step+1]] @@ -444,9 +443,9 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { if (AICs[step] < AICs[step+1]) { coeffs <- lapply(coeffs, function(coeff) -coeff) } - + intstep <- quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) - + } else { penh <- 1 Vdg <- vdlist[[step+1]] @@ -458,7 +457,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { if (AICs[step] < AICs[step+1]) { coeffs <- lapply(coeffs, function(coeff) -coeff) } - + intstep <- TF_roots(R, C, coeffs) } From f2715b47af25b60946f0368e215866bca3c9f155 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 9 Dec 2015 11:52:56 -0800 Subject: [PATCH 096/396] Just running simulations --- forLater/josh/sim.cv.R | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index dc64493a..847a52f5 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -5,12 +5,12 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 400 -n <- 50 -p <- 100 +niters <- 500 +n <- 100 +p <- 200 maxsteps <- 10 sparsity <- 5 -snr <- 2 +snr <- 1 nfolds <- 5 instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { @@ -24,11 +24,14 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { y <- y + x %*% beta } - fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) - fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps")) + fit <- cvfs(x, y, maxsteps=maxsteps, sigma=1, nfolds=nfolds) + fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps"), sigma = 1) pvals <- groupfsInf(fit, verbose=T) pv2 <- groupfsInf(fit2, verbose=T) - return(list(variable = fit$action, pvals = pvals$pv, var2 = fit2$action, pvals2 = pv2$pv)) + Y <- y - mean(y) + cols <- which(1:p %in% fit$action) + pv3 <- summary(lm(Y~x[, cols]-1))$coefficients[,4] + return(list(variable = fit$action, pvals = pvals$pv, var2 = fit2$action, pvals2 = pv2$pv, pvals3 = pv3[order(fit$action)])) } time <- system.time({ @@ -39,12 +42,13 @@ pvals <- do.call(c, list(output[2,])) vars <- do.call(c, list(output[1,])) vars2 <- do.call(c, list(output[3,])) pvals2 <- do.call(c, list(output[4,])) +pvals3 <- do.call(c, list(output[5,])) -save(pvals, vars, vars2, pvals2, file = paste0( +save(pvals, vars, vars2, pvals2, pvals3, file = paste0( "results_cv_n", n, "_p", p, "_sparsity", sparsity, "_snr", snr, - "_comparison.RData")) + "_comparison_TC.RData")) print(time) From b9c4d0179b31af6c544033d361ae3fe88bb94297 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 9 Dec 2015 15:01:00 -0800 Subject: [PATCH 097/396] Data splitting simulation --- forLater/josh/sim.datasplit.R | 64 +++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 forLater/josh/sim.datasplit.R diff --git a/forLater/josh/sim.datasplit.R b/forLater/josh/sim.datasplit.R new file mode 100644 index 00000000..9d59cd65 --- /dev/null +++ b/forLater/josh/sim.datasplit.R @@ -0,0 +1,64 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 1000 +n <- 100 +p <- 50 +maxsteps <- 20 +sparsity <- 5 +snr <- 1 +train <- 1:(7*n/10) +test <- setdiff(1:n, train) +index <- 1:p + +instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { + + x <- matrix(rnorm(n*p), nrow=n) + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + ytr <- y[train] + xtr <- x[train, ] + yte <- y[test] + xte <- x[test, ] + + #log(length(train)) + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train))) + fit <- groupfs(x, y, index, maxsteps=maxsteps, aicstop=1, k = log(n)) + + trcols <- which(1:p %in% trfit$action) + tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] + names(tepv) <- as.character(sort(trfit$action)) + pv <- groupfsInf(fit) + return(list(vars = fit$action, pvals = pv$pv, + splitvars = sort(trfit$action), splitpvals = tepv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds)) +}) + +vars <- do.call(c, list(output[1,])) +pvals <- do.call(c, list(output[2,])) +splitvars <- do.call(c, list(output[3,])) +splitpvals <- do.call(c, list(output[4,])) + + +save(vars, pvals, splitvars, splitpvals, file = paste0( + "results_datasplit_n", n, + "_p", p, + "_sparsity", sparsity, + "_snr", as.character(snr), + "_bic.RData")) + +print(time) + From 01b51b2c1d44dfafc9914a31b9c5547dd96d522a Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 9 Dec 2015 15:36:45 -0800 Subject: [PATCH 098/396] Fixing cv comparison simulation --- forLater/josh/sim.cv.R | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index 847a52f5..32601405 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -5,7 +5,7 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 500 +niters <- 50 n <- 100 p <- 200 maxsteps <- 10 @@ -25,30 +25,36 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { } fit <- cvfs(x, y, maxsteps=maxsteps, sigma=1, nfolds=nfolds) - fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps"), sigma = 1) + vars <- fit$action + #fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps"), sigma = 1) pvals <- groupfsInf(fit, verbose=T) - pv2 <- groupfsInf(fit2, verbose=T) + fit$cvobj <- NULL + nocvpv <- groupfsInf(fit, verbose=T) Y <- y - mean(y) - cols <- which(1:p %in% fit$action) - pv3 <- summary(lm(Y~x[, cols]-1))$coefficients[,4] - return(list(variable = fit$action, pvals = pvals$pv, var2 = fit2$action, pvals2 = pv2$pv, pvals3 = pv3[order(fit$action)])) + cols <- which(1:p %in% vars) + noselpv <- summary(lm(Y~x[, cols]-1))$coefficients[,4] + names(noselpv) <- as.character(sort(vars)) + return(list(vars = vars, pvals = pvals$pv, + nocvvars = vars, nocvpvals = nocvpv$pv, + noselvars = sort(vars), noselpvals = noselpv)) } time <- system.time({ output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds)) }) -pvals <- do.call(c, list(output[2,])) vars <- do.call(c, list(output[1,])) -vars2 <- do.call(c, list(output[3,])) -pvals2 <- do.call(c, list(output[4,])) -pvals3 <- do.call(c, list(output[5,])) +pvals <- do.call(c, list(output[2,])) +nocvvars <- do.call(c, list(output[3,])) +nocvpvals <- do.call(c, list(output[4,])) +noselvars <- do.call(c, list(output[5,])) +noselpvals <- do.call(c, list(output[6,])) -save(pvals, vars, vars2, pvals2, pvals3, file = paste0( +save(vars, pvals, nocvvars, nocvpvals, noselvars, noselpvals, file = paste0( "results_cv_n", n, "_p", p, "_sparsity", sparsity, "_snr", snr, - "_comparison_TC.RData")) + "_comparison.RData")) print(time) From dcbad8661fe316c304830e85b2009ba3cfefb684 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 9 Dec 2015 16:34:17 -0800 Subject: [PATCH 099/396] Fixed typo in TF cv code --- forLater/josh/sim.cv.R | 20 ++++++++++++-------- selectiveInference/R/funs.groupfs.R | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index 32601405..6287d20a 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -7,27 +7,31 @@ source("../../selectiveInference/R/funs.common.R") set.seed(1) niters <- 50 n <- 100 -p <- 200 +p <- 50 maxsteps <- 10 sparsity <- 5 -snr <- 1 +snr <- 2 +rho <- 0.1 nfolds <- 5 -instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { +instance <- function(n, p, sparsity, snr, maxsteps, nfolds, rho) { x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } y <- rnorm(n) if (sparsity > 0) { beta <- rep(0, p) - beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + beta[1:sparsity] <- snr * sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T) y <- y + x %*% beta } - fit <- cvfs(x, y, maxsteps=maxsteps, sigma=1, nfolds=nfolds) + fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) vars <- fit$action - #fit2 <- groupfs(x, y, index = 1:p, maxsteps = attr(fit, "maxsteps"), sigma = 1) - pvals <- groupfsInf(fit, verbose=T) + pvals <- groupfsInf(fit, sigma=1, verbose=T) fit$cvobj <- NULL nocvpv <- groupfsInf(fit, verbose=T) Y <- y - mean(y) @@ -40,7 +44,7 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { } time <- system.time({ - output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds)) + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds, rho)) }) vars <- do.call(c, list(output[1,])) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index 532cd026..b2c04472 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -378,7 +378,7 @@ groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { x0 <- zcvquad %*% Z x1 <- 2*R*zcvquad %*% Vdelta x2 <- 2*R*zcvquad %*% V2 - x12 <- 2*R*vdcvquad %*% V2 + x12 <- 2*R^2*vdcvquad %*% V2 x11 <- R^2*vdcvquad %*% Vdelta x22 <- R^2*v2cvquad %*% V2 TF_roots(R, C, coeffs = list(x0=x0, x1=x1, x2=x2, x12=x12, x11=x11, x22=x22)) From aacba41a8c386d2e549879047770feaaed19d0bc Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 9 Dec 2015 17:14:54 -0800 Subject: [PATCH 100/396] Setting up simmulation --- forLater/josh/sim.cv.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index 6287d20a..807cde2f 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -8,7 +8,7 @@ set.seed(1) niters <- 50 n <- 100 p <- 50 -maxsteps <- 10 +maxsteps <- 8 sparsity <- 5 snr <- 2 rho <- 0.1 @@ -31,7 +31,7 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds, rho) { fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) vars <- fit$action - pvals <- groupfsInf(fit, sigma=1, verbose=T) + pvals <- groupfsInf(fit, verbose=T) fit$cvobj <- NULL nocvpv <- groupfsInf(fit, verbose=T) Y <- y - mean(y) From 3cc3318a868edb7892e465bb615aea8612d9f9ee Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 10 Dec 2015 06:14:31 -0800 Subject: [PATCH 101/396] Organizing simulations --- forLater/josh/sim.aicstop.R | 22 +++++++++------- forLater/josh/sim.cv.R | 23 +++++++++++----- forLater/josh/sim.datasplit.R | 49 +++++++++++++++++++++++------------ forLater/josh/sim.groupfs.R | 21 ++++++++------- 4 files changed, 72 insertions(+), 43 deletions(-) diff --git a/forLater/josh/sim.aicstop.R b/forLater/josh/sim.aicstop.R index 409ab0f2..af150ecd 100644 --- a/forLater/josh/sim.aicstop.R +++ b/forLater/josh/sim.aicstop.R @@ -38,15 +38,17 @@ stopped <- do.call(c, list(output[3,])) pvals <- do.call(c, list(output[2,])) vars <- do.call(c, list(output[1,])) -save(pvals, vars, stopped, file = paste0( - "results_aic_n", n, - "_p", p, - "_g", G, - "_rho", gsub(pattern = ".", replacement = "", x = rho, fixed = T), - "_maxsteps", maxsteps, - "_sparsity", sparsity, - "_snr", round(snr), - "_known", known, - ".RData")) +save(pvals, vars, stopped, + file = paste0( + "results/aic", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_g", G, + "_rho", gsub(".", "pt", rho, fixed=T), + "_maxsteps", maxsteps, + "_sparsity", sparsity, + "_snr", round(snr), + ".RData")) print(time) diff --git a/forLater/josh/sim.cv.R b/forLater/josh/sim.cv.R index 807cde2f..7d20e8c7 100644 --- a/forLater/josh/sim.cv.R +++ b/forLater/josh/sim.cv.R @@ -6,6 +6,7 @@ source("../../selectiveInference/R/funs.common.R") set.seed(1) niters <- 50 +known <- FALSE n <- 100 p <- 50 maxsteps <- 8 @@ -29,7 +30,11 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds, rho) { y <- y + x %*% beta } - fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) + if (known) { + fit <- cvfs(x, y, maxsteps=maxsteps, sigma = 1, nfolds=nfolds) + } else { + fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) + } vars <- fit$action pvals <- groupfsInf(fit, verbose=T) fit$cvobj <- NULL @@ -54,11 +59,15 @@ nocvpvals <- do.call(c, list(output[4,])) noselvars <- do.call(c, list(output[5,])) noselpvals <- do.call(c, list(output[6,])) -save(vars, pvals, nocvvars, nocvpvals, noselvars, noselpvals, file = paste0( - "results_cv_n", n, - "_p", p, - "_sparsity", sparsity, - "_snr", snr, - "_comparison.RData")) +save(vars, pvals, nocvvars, nocvpvals, noselvars, noselpvals, + file = paste0("results/cv", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_maxsteps", maxsteps, + "_snr", snr, + ".RData")) print(time) diff --git a/forLater/josh/sim.datasplit.R b/forLater/josh/sim.datasplit.R index 9d59cd65..ef3de310 100644 --- a/forLater/josh/sim.datasplit.R +++ b/forLater/josh/sim.datasplit.R @@ -5,19 +5,26 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 1000 +niters <- 500 +known <- FALSE n <- 100 -p <- 50 +p <- 200 maxsteps <- 20 sparsity <- 5 snr <- 1 -train <- 1:(7*n/10) +rho <- 0.1 +ratio <- 0.75 +train <- 1:(ratio*n) test <- setdiff(1:n, train) index <- 1:p -instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { +instance <- function(n, p, sparsity, snr, maxsteps, rho) { x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } y <- rnorm(n) if (sparsity > 0) { @@ -31,34 +38,44 @@ instance <- function(n, p, sparsity, snr, maxsteps, nfolds) { yte <- y[test] xte <- x[test, ] - #log(length(train)) - trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train))) - fit <- groupfs(x, y, index, maxsteps=maxsteps, aicstop=1, k = log(n)) + if (known) { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + fit <- groupfs(x, y, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + } else { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train))) + fit <- groupfs(x, y, index, maxsteps=maxsteps, aicstop=1, k = log(n)) + } trcols <- which(1:p %in% trfit$action) tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] names(tepv) <- as.character(sort(trfit$action)) pv <- groupfsInf(fit) + trpv <- groupfsInf(trfit) return(list(vars = fit$action, pvals = pv$pv, - splitvars = sort(trfit$action), splitpvals = tepv)) + splitvars = sort(trfit$action), splitpvals = tepv), + trpvals = trpv$pv) } time <- system.time({ - output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, nfolds)) + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, rho)) }) vars <- do.call(c, list(output[1,])) pvals <- do.call(c, list(output[2,])) splitvars <- do.call(c, list(output[3,])) splitpvals <- do.call(c, list(output[4,])) +trpvals <- do.call(c, list(output[5,])) - -save(vars, pvals, splitvars, splitpvals, file = paste0( - "results_datasplit_n", n, - "_p", p, - "_sparsity", sparsity, - "_snr", as.character(snr), - "_bic.RData")) +save(vars, pvals, splitvars, splitpvals, trpvals, + file = paste0("results/datasplit", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_ratio", gsub(".", "pt", round(ratio, 2), fixed=T), + "_snr", as.character(snr), + "_bic.RData")) print(time) diff --git a/forLater/josh/sim.groupfs.R b/forLater/josh/sim.groupfs.R index 9e606319..5f053e5d 100644 --- a/forLater/josh/sim.groupfs.R +++ b/forLater/josh/sim.groupfs.R @@ -37,15 +37,16 @@ time <- system.time({ pvals <- do.call(c, list(output[2,])) vars <- do.call(c, list(output[1,])) -save(pvals, vars, file = paste0( - "results_n", n, - "_p", p, - "_g", G, - "_rho", gsub(pattern = ".", replacement = "", x = rho, fixed = T), - "_maxsteps", maxsteps, - "_sparsity", sparsity, - "_snr", round(snr), - "_known", known, - ".RData")) +save(pvals, vars, + file = paste0("results/", + ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_g", G, + "_rho", gsub(".", "pt", rho, fixed=T), + "_maxsteps", maxsteps, + "_sparsity", sparsity, + "_snr", round(snr), + ".RData")) print(time) From a4f89ea017da8074ffd296d0b511305e4bfe942a Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 10 Dec 2015 06:30:54 -0800 Subject: [PATCH 102/396] Data carving simulation --- forLater/josh/sim.carve.R | 81 +++++++++++++++++++++++++++++++++++ forLater/josh/sim.datasplit.R | 8 ++-- 2 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 forLater/josh/sim.carve.R diff --git a/forLater/josh/sim.carve.R b/forLater/josh/sim.carve.R new file mode 100644 index 00000000..2881109f --- /dev/null +++ b/forLater/josh/sim.carve.R @@ -0,0 +1,81 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 400 +known <- FALSE +n <- 100 +p <- 50 +maxsteps <- 20 +sparsity <- 10 +snr <- 1 +rho <- 0.1 +ratio <- 0.75 +train <- 1:(ratio*n) +test <- setdiff(1:n, train) +index <- 1:p + +instance <- function(n, p, sparsity, snr, maxsteps, rho) { + + x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + ytr <- y[train] + xtr <- x[train, ] + yte <- y[test] + xte <- x[test, ] + + if (known) { + trfit <- cvfs(xtr, ytr, maxsteps=maxsteps, sigma = 1, nfolds=nfolds) + fit <- cvfs(x, y, maxsteps=maxsteps, sigma = 1, nfolds=nfolds) + } else { + trfit <- cvfs(xtr, ytr, maxsteps=maxsteps, nfolds=nfolds) + fit <- cvfs(x, y, maxsteps=maxsteps, nfolds=nfolds) + } + + trcols <- which(1:p %in% trfit$action) + tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] + names(tepv) <- as.character(sort(trfit$action)) + pv <- groupfsInf(fit) + trpv <- groupfsInf(trfit) + return(list(vars = fit$action, pvals = pv$pv, + splitvars = sort(trfit$action), splitpvals = tepv, + trpvals = trpv$pv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, rho)) +}) + +vars <- do.call(c, list(output[1,])) +pvals <- do.call(c, list(output[2,])) +splitvars <- do.call(c, list(output[3,])) +splitpvals <- do.call(c, list(output[4,])) +trpvals <- do.call(c, list(output[5,])) + +save(vars, pvals, splitvars, splitpvals, trpvals, + file = paste0("results/carvecv", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_ratio", gsub(".", "pt", round(ratio, 2), fixed=T), + "_snr", as.character(snr), + ".RData")) + +print(time) + diff --git a/forLater/josh/sim.datasplit.R b/forLater/josh/sim.datasplit.R index ef3de310..e732ce79 100644 --- a/forLater/josh/sim.datasplit.R +++ b/forLater/josh/sim.datasplit.R @@ -8,9 +8,9 @@ set.seed(1) niters <- 500 known <- FALSE n <- 100 -p <- 200 +p <- 100 maxsteps <- 20 -sparsity <- 5 +sparsity <- 10 snr <- 1 rho <- 0.1 ratio <- 0.75 @@ -52,8 +52,8 @@ instance <- function(n, p, sparsity, snr, maxsteps, rho) { pv <- groupfsInf(fit) trpv <- groupfsInf(trfit) return(list(vars = fit$action, pvals = pv$pv, - splitvars = sort(trfit$action), splitpvals = tepv), - trpvals = trpv$pv) + splitvars = sort(trfit$action), splitpvals = tepv, + trpvals = trpv$pv)) } time <- system.time({ From 859a6d4d43316eec012f3c55d87cd1abf9e679dc Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Fri, 11 Dec 2015 10:54:54 -0800 Subject: [PATCH 103/396] Need to tune simulations better --- forLater/josh/sim.carve.R | 1 + forLater/josh/sim.datasplit.R | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/forLater/josh/sim.carve.R b/forLater/josh/sim.carve.R index 2881109f..7d1ae599 100644 --- a/forLater/josh/sim.carve.R +++ b/forLater/josh/sim.carve.R @@ -17,6 +17,7 @@ ratio <- 0.75 train <- 1:(ratio*n) test <- setdiff(1:n, train) index <- 1:p +nfolds <- 5 instance <- function(n, p, sparsity, snr, maxsteps, rho) { diff --git a/forLater/josh/sim.datasplit.R b/forLater/josh/sim.datasplit.R index e732ce79..be077a1d 100644 --- a/forLater/josh/sim.datasplit.R +++ b/forLater/josh/sim.datasplit.R @@ -5,17 +5,20 @@ source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") set.seed(1) -niters <- 500 +niters <- 400 known <- FALSE n <- 100 p <- 100 -maxsteps <- 20 -sparsity <- 10 +maxsteps <- 10 +sparsity <- 5 snr <- 1 rho <- 0.1 -ratio <- 0.75 +ratio <- 0.7 +ratio2 <- 0.85 train <- 1:(ratio*n) test <- setdiff(1:n, train) +train2 <- 1:(ratio2*n) +test <- setdiff(1:n, train2) index <- 1:p instance <- function(n, p, sparsity, snr, maxsteps, rho) { @@ -37,18 +40,26 @@ instance <- function(n, p, sparsity, snr, maxsteps, rho) { xtr <- x[train, ] yte <- y[test] xte <- x[test, ] + + ytr2 <- y[train2] + xtr2 <- x[train2, ] + yte2 <- y[test2] + xte2 <- x[test2, ] if (known) { trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) - fit <- groupfs(x, y, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) } else { trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train))) - fit <- groupfs(x, y, index, maxsteps=maxsteps, aicstop=1, k = log(n)) + fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, aicstop=1, k = log(length(train2))) } trcols <- which(1:p %in% trfit$action) + tr2cols <- which(1:p %in% fit$action) tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] + tepv2 <- summary(lm(yte2~xte2[, tr2cols]-1))$coefficients[,4] names(tepv) <- as.character(sort(trfit$action)) + names(tepv2) <- as.character(sort(trfit$action)) pv <- groupfsInf(fit) trpv <- groupfsInf(trfit) return(list(vars = fit$action, pvals = pv$pv, From f115ab2ae797700261f3c10c75252df890104627 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Wed, 23 Dec 2015 16:37:33 -0800 Subject: [PATCH 104/396] Setting up new data splitting simulation --- forLater/josh/sim.splitcv.R | 77 +++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 forLater/josh/sim.splitcv.R diff --git a/forLater/josh/sim.splitcv.R b/forLater/josh/sim.splitcv.R new file mode 100644 index 00000000..cc5428ed --- /dev/null +++ b/forLater/josh/sim.splitcv.R @@ -0,0 +1,77 @@ +library(intervals) +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") + +set.seed(1) +niters <- 500 +known <- FALSE +n <- 100 +p <- 200 +maxsteps <- 20 +sparsity <- 5 +snr <- 1 +rho <- 0.1 +ratio <- 0.75 +train <- 1:(ratio*n) +test <- setdiff(1:n, train) +index <- 1:p + +instance <- function(n, p, sparsity, snr, maxsteps, rho) { + + x <- matrix(rnorm(n*p), nrow=n) + if (rho != 0) { + z <- matrix(rep(t(rnorm(n)), p), nrow = n) + x <- sqrt(1-rho)*x + sqrt(rho)*z + } + y <- rnorm(n) + + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + + ytr <- y[train] + xtr <- x[train, ] + yte <- y[test] + xte <- x[test, ] + + if (known) { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + fit <- groupfs(x, y, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + } else { + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train))) + fit <- groupfs(x, y, index, maxsteps=maxsteps, aicstop=1, k = log(n)) + } + + trcols <- which(1:p %in% trfit$action) + tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] + names(tepv) <- as.character(sort(trfit$action)) +# pv <- groupfsInf(fit) +# trpv <- groupfsInf(trfit) + return(list(vars = fit$action, splitvars = sort(trfit$action), splitpvals = tepv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps, rho)) +}) + +vars <- do.call(c, list(output[1,])) +splitvars <- do.call(c, list(output[2,])) +splitpvals <- do.call(c, list(output[3,])) + +save(vars, pvals, splitvars, splitpvals, trpvals, + file = paste0("results/datasplit", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_rho", gsub(".", "pt", rho, fixed=T), + "_sparsity", sparsity, + "_ratio", gsub(".", "pt", round(ratio, 2), fixed=T), + "_snr", as.character(snr), + "_bic.RData")) + +print(time) + From 00396d05fe7cb5464582fb7792d2bb06014648a9 Mon Sep 17 00:00:00 2001 From: tibshirani Date: Sat, 26 Dec 2015 11:46:22 -0500 Subject: [PATCH 105/396] rob --- selectiveInference/NAMESPACE | 1 + selectiveInference/R/funs.fs.R | 13 +++++++------ tests/test.fs.R | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 8dd9120c..5b65949c 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -34,4 +34,5 @@ import(intervals) importFrom("graphics", abline, axis, matplot) importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) +importFrom("stats", "coef", "df", "lm", "pf") diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index a6a9c3ff..b5ee511b 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -106,8 +106,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, } # Key quantities for the next entry - - X_inactive_resid = X_inactive - X_active %*% backsolve(R,t(Q_active)%*%X_inactive) + keepLs=backsolve(R,t(Q_active)%*%X_inactive) + X_inactive_resid = X_inactive - X_active %*% keepLs working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) score = as.numeric(t(working_x)%*%y) @@ -189,7 +189,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the least squares solution. Note that # we have already computed this bls = rep(0,p) - bls[A] = a + if(length(keepLs)>0) bls[A] = keepLs } if (verbose) cat("\n") @@ -448,6 +448,7 @@ print.fsInf <- function(x, tailarea=TRUE, ...) { invisible() } + plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { if (x$completepath) { k = length(x$action)+1 @@ -457,10 +458,10 @@ plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { beta = x$beta } p = nrow(beta) - + xx = 1:k xlab = "Step" - + if (omit.zeros) { good.inds = matrix(FALSE,p,k) good.inds[beta!=0] = TRUE @@ -475,7 +476,7 @@ plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { abline(h=0,lwd=2) matplot(xx,t(beta),type="l",lty=1,add=TRUE) if (breaks) abline(v=xx,lty=2) - if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) invisible() } diff --git a/tests/test.fs.R b/tests/test.fs.R index f811676e..a3d1f916 100644 --- a/tests/test.fs.R +++ b/tests/test.fs.R @@ -171,3 +171,17 @@ out3 = fsInf(obj,sigma=sigma,k=k,type="all") out3 out4 = fsInf(obj,sigma=sigma,k=k,type="all",bits=200) +##plot + + + set.seed(33) + n = 50 + p = 10 + sigma = 1 + x = matrix(rnorm(n*p),n,p) + beta = c(3,2,rep(0,p-2)) + y = x%*%beta + sigma*rnorm(n) + + # run forward stepwise, plot results + fsfit = fs(x,y) + plot(fsfit) From b238822a88e45175188ef334fb1dc4424ff01a9e Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 1 Jan 2016 15:50:39 -0800 Subject: [PATCH 106/396] rob --- forLater/robnotes.pdf | Bin 0 -> 150705 bytes forLater/robnotes.tex | 287 +++++++++++++++++++++++++++++++++ selectiveInference/DESCRIPTION | 6 +- tests/test.fs.R | 18 ++- 4 files changed, 304 insertions(+), 7 deletions(-) create mode 100644 forLater/robnotes.pdf create mode 100644 forLater/robnotes.tex diff --git a/forLater/robnotes.pdf b/forLater/robnotes.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b814a8966b6b046faee5289895535978310ff7ce GIT binary patch literal 150705 zcma&NV|Ol0(5)NWwr$%^R&4u@ZQHiZ72CFLW5u>{o`d~qzhj(TpQ?XAcU?VePI5&t z33_G*b{O*c<&kw5Rw5=Mdt)miK0X*mS%96nivURyl5+yUYu6MC7Rtu1xZ7h`}F1Ko*rmgf|Y&JMz0G?+(SW31En`C(T9 zXANN`kFJs%2%*Tj5MdP$`8TrC>7nP>ci6e@?z_Uib<%17+NDNL#kjRo#?97sN%7O6 z)!^hG@ya<<%e}ny;y=S9z{SQHUUvD_C8+NUYUVnClZPr)y zxwW|k@~A;OHR&~C`(6(((N2B2!}Ig@`kk!?j+iAX7-P5R(so_A+p?=&2zD^q{}d!u z@$bpsvNpR5z1ziyy_b63+|fxs^MR4E`{jKt3L}lHCV0$nVtKp>B}%jAaND5!{6o}t zC*La{Qx=8Tb=`XejTGjF*7AVn5Y0x7c~tFvZAYfQQSi!0ty*_a^0oXih!JpL$gdj? zC38qt9nNa}zCTGw#oFX<^AOdCddK%sdShKrj@?~)hD4VZ69$hv8$TonM`}?R$vXIf z9AyDXYbZ8I9%Gg035Gn4Hc=qhpVmT6n1GERbG#n&vK>+n9EQ^>fxjk*C0=Hdg5`_% zLMeqZCS$|HMXg`oINl_p`NcL}WCQ zc@9>~>JO-_DFQ*a6NwTLDth`{qg9e{j;2|B6VtLU$`t7|(QwK@VS9fOFCo)12@b0b zHbMB`$@F`>b9>N8(i3Po_0;87AMmDhxp?p`WQNh>Us!3Ie!WCfw;Yj^in?R2krT1W zM#``kdgBSCtn%d*hBPN7!Y8b1@BTNJpG z%D!Jgj?t{K8?p{Z^fj19y?6&75Wzy&6P~UxGaC~#cQ|q1P^^+wxo&lgoUEBNbYe59 zjD*>1@5Y`g^Xj&j52j~z+qHr|3ftC;8zBnmPuD0523Rb<(QhA?Eb9nmUqQl2g{Z)i zyEE7V!}Bf>F=dYE05^k{ju0^Ql;EwqUKhe9_@d9W4`X-=!=6{ei?f5GPjAOw?1xb+ z5sX#fCn|b~CYqthBUuq>7biNQK}uOjB@ zRQWUb=^OG&NZIE;j3btXZr)XJqslf+L_e@lxHv*iT2>XB(IO>I;v@hqm3O3xO-l1Z zSN@ANt1-z_V9B`t4P!`d-q8&^gj;W>D4+p7WP8JydwjM!bNZP&7_E+Wsub#d5Th^e zkOE;WX;7XPICAQJY2#c?73RNNlZHA2+4ci@pZ*FWMuq*H$m)K;N|jsk?J^}|fx@PY zFjUE2dHBj|E}%TE!S1J2F`vRe*56TNMl7l!IKO1m2QkWagQy&v*w z>(2WIxUK8%e%VAtgQZ183^uv>^mt@wbXY^Io`L(zIqI}3zXjDVV~Go_X@ffH?SW9* z$M_mvZfI**=M_DeK@?NkDN#7K-H#>AhT93GF|Mk3(obBGq}2Y(X5_`@p|8s_2KD;& zq3iixS+RC~1L0$2y5aNpcGnh{iV1B^pQJ@^Pk(HWVnbSy5u3!K$O+7gqr}mioD38X z&;18oy`9T0UnKTd;t%NQsI>e+XXXkKcjX%jnY0}j?4a~5lMM&@IK{Ip`IF;s(??{Z zjW-oHGf9ZIm`OkIkw^|AIO%n;> z_}=-|IGvaV%f3cLZ9Mtn%o1BCe~sYj!_vpYZVbMO2;PKuP}ujcUYzfqpEvlvkE&FGy=I49kLmfBiISV|4(C0_Dhy8L zSOn-Kfx|)dXWFYr^COLZgR_%Xz^alyWBgEWu$odhv;&YCV6>-bNr{Al=Vt!U;sIlG zM1{w;99B-D2sXuE#n5m?M&fEkGlj^$BO8^>qa7U5{?OKjQdsw;>w4(!l0qObfFLY^ zjZjF^b=|8S%GSh79v%c&#FE4up2i+Ka~_%b`QI%wv_$#rf6t9=o9EpFB63xLp~I=5 zE%c;3#xg?$RDnS;#&+f)T*!vzd_@`)O`~(VR*PbNWDsO#dRC5a^F2pkL#EX5i-m~j z~s5~n#GR1(_;U0Ki$<)b9QHoe8dZF>682viV>Kh()fLx#nF%_=hPuD?wb zbPZ4lGS@^vCc?=yBjcCO3#<-L;M5iFkJ78!rq;fnnh$^7EnTJ6`FZ7#Q-F&SQ7_E3 z)KpQLdvblX?}xoEs_F^tW<^canGkU%b%tfI$Bo%r448WZ4>5~!iFGia{J{SN5R$5tK769w#|Gf+LxQ5)83}B=M{^E*Qo_WyBXHs?SLoA{5k1ZinEoS zT~OA1W5*sJ2wdyaeDJD9AgnmSq+kD|&7$uXX)@t0<5uBW>`g49b~Xvf>#@MSYTF$( z9_$u41R=-76!7{ZQp%qPo3Ag4m%(Y>PO~_GX+D@;3QK+A(jy7P{1cu*(>$}?>=sDG&jdIV{QO0eNF5nJtg6E71Ip7O z>Vg5-nf`x7^Z0*GKk>?Q@vZ<1A zo!W!G?7Tp_0}snZgG;-Td?sEBqV%Zy%L_ zfCHY3yQ^b&K`{2aNh>{l!Qn)VJZA}LXde0?x2Xp&{hYMON;@x|2^HxCNa#B&dSE{r z^1r?PkAvlH9tO5gBwxX5gCiOASy~XA$PetDG%Nhl)cigAgD?Zvha*{RP$%v$W_pV#;L_50hl` z<@uYsQ1cFySP8;#RPHj~Av*&(ukU^vd{Y9&WCx0ltD1bp#8oDe*GS7Q@+ggd7V0g@ z$lPiLW6cf@zdY65cspQ$N+*FH?G)lPz#@)WdqvN}>OzsOK7=`+a5@bQWxk25c)o7x zl-8X>Xke3OwHemW9qp*zAcrWJp#0cuACu^gDPi6lz`tb!qm zCrkC9Htmt+3~`{J7Tyi$S-e*bXbDvNFd$(kXXDU+rQER^yR2Qh8LLx~2_gfY2*X>E zv1A-imkLLGBo%0i-=n~{gY zTTFNc`l1S%w7-s6D|C*t>SQ(m7rs4nu5KRJj#eVDFBA^k60vcq@rx&Qob^DL9O<;B z4rX<2LMX_e=QEXs*L;-yL{mv=qOAe1*n}L(qAd^ISrrM{j=0m4H*s2qP~s=tLzyBY zjmUyC>V;{nJ0=OK#EkR!TWvoeY@4h?D@ptJ`QqWjOzzkMw9P3o033CaRO*|f+>j2q zXmfOwNF{Xg*rHOg_xsznfyO#H><(ijekuF7u{7V~Al+o#d*rL^=lenr;9UsDHETDD zS@O(T|4)`i)7u&qmv(V<4$`8W1l)Xk9F*51I6p;S6F?K{dySamfrZ2-rN2;+2-#YA zxi^L#kQhd!b3?fLQ-^~lDFxwk<$agPitPnH}<3cKG`Fg_$NC}yaw>EW>ydsTbZ?NP6Zeu(x?+>!%!H*4u zn05LwvNWA5CeEG;*(y`9Y~50>tdZvIiXlv@srpg^S5F5ebc(nWrp{wLnHWj==3xK` ziYV=d4f9113^d6K3#ugx=mllHb0w34%|%YoPEAWOyorM%aVz)VjR|3K2gR-x;cE`a zmDJ!#vXb5cWfg*l$KD&}#ibuP07Vx$zNGq9LP|W~AEar1|KMDM3@uG7S9!7?_;Am} z!m#I+t)#yo_s2UF@fan@KEf<;X<;MI@(>&{Utj#p%Aa=#&K z{+Gj3otHHSvjuBvpoNK?c7Q0`2_Cz<%x#dX>zw9ZQp}M+%LFS6c7;!!=~K|Mq-QqT zK(W|-%xJyQ&$iGYde|J7tt0DMx;@D^%wq$2z;!yavW>;PO$8OriEL~U}Xd3;EI2;Tf?|`XAK+5 z+c1wCOA4=fS>%LprFA4XtZ6piNLmCGt8rNy2}?8OvaSviBu@C6bjiKZjdbG9ItS0NC9Imq0s{z-j8oA`$Ujk|N)KAet=uZoB7X7gv89=DQ z2H1h|dXpg|g{ qu-V%-=0<=f?uceO)=LdU)@JcpuXpg2sa2OTb#z!LEtF#Ac;5+4JtFaHi7Im>!ul22oR5G+6ZZz*zon6EL8Gu4c(`nxX7218}2FwVZ)5*_t{uC!?m+1}+3!W?&27$zdn3%^lEk{0*!p=&Y?f@dwRKZ>X#dqiYO^VoJm3hFqRZN zEGcdhz+xU*@D1B+VFJp0!xi@$+9+z0h%X0;2@4v;hCr_i7^9FVpPNWM&-z^M-RN1e zYJ37u-f~wd{Y4f5y$%UDL{xDpTr!|4=$5g`F3aR%f|04P;nsePg&*Y`?NS_*;NL6? zPbiDP@N6jn>x#p`C?I)O>H!eDSB}v)h}=!v;F~rdq=WuRKjyJvA(sI>0%i8z$@iUX z<7=$Dp;7Bh|8iLEZFnH6Mj^v3U0=(Zj*ZRvB%LN>BkgSzrTlEF@FZUOl9();u3IizBI^0SJAo%ZCt zt~D5HNyHonK*`aaLar4@VkC2=dA|?_Ky1WXblU{r?!<69k1>H-<7OM7Xs!E+W-wcg zx;2TOFo#G7z7sV5&3iPumqe}%Yd50v4GNS%_8VS>y`-yrYjsz=MrgxLVOgf9XbwwVcazVyTHcl=L0Z_D_prMHJHaWS@ zgLj1Vq(s2Jpdgr}LBV83IuFKPV~;$+*QH#zL1p53P?bKCkma8iS}lsMq=dtemHCAq zQ{baCZ_#m&37XEGd;j^{BcOfeVYL{plH5qlc^^J0=R?O?5gCG)*)WtO@#we%7Yz|b z<XVV1K87?NXoBpdMXakR%icB$2TJ_*E-aTb)GA4{Qil4jSQksMz(FHZ=2 z?IB>y`k;i%*(>pgy!L?gagX0gV26HUf0ED@8`AKN#xpq75nbSTGO4At z4cR!(DpHy|^672n;b`U2xhoWbSyrYXD|!}o%kh1!TFJJT@}3P);tw^KDa8c%ou6pN z(q)679!K>T33J}FfX!=b6eR=Rwc z`7ib-!)xRRz6c^TC;q+Ggl^cDPElkK(b_v@&v|U60Y^hWfM&wQX2uPnA zG%=xp?CTFU129aen#^!$AmD>#(FsMx;xP!ji<@3)1xN@p7z7mM5_o$e!eeTuO>LMS zzqDtN@!Gwx#2qZaI_scB@ansVK*#o{J4mZRm^3+kqLfRfn>mCITY#F!yE+j_enub9 zfhx~_rg;O3pa>;{TtJv09zW5scyXiMb@T#`uJgJRH#;bssP z`Vc8bHRxrijQj6KRK#LQ?Wqr{QWs`&9bNXu6=FXzh^15N3(@E?>~$S|u$e9b8b+RM zGW;k#1;^GX!xpb2tS^H*#yU9S^}N*^B|=Zju$Kd)2$WV$KObLR*&er8nI3?+RymN> zI3b~!=kS*UM3#8*8sdfxQ&>pJ=&ab}2V(^bgsL0M8Hn(lRQHNdBm!229wN}5JbW)N zqe*^QmUp+lcr0|rd4R=sh#~xT0UjlUcjryx{x^(-e0z%~De^+Gm2z>B9|4RIOTDNY z9H_cmx%u3`vl^H;!BUS~Iu1U+R~QB6i_Z_=E7YzrS!I}aM#+q#QvgTj3o*VUfsGM z#Q~cZS&d8EKvf*<{sfdE3)FwaFoPH1t|XS<2isbp>M#`4ZS_Y^uIKs|uhX3C`s{*{ z!|Ja+K|(nCT+vjEbB;^S_o*y(J8zImL9W%#S97lVnix8Tz=HH~vU@UBqc-tl+ z`n}0w@U!Gy)64D%2uAEyFQdiPSQ*zVk85?m=rTr7{`I)kb3dV7VW1BO*v1 zdqTCBnG2V$V^$-(P;9>N4!T`FGh*6yLo`z~)0Bt?dpGK*vo;3@AM{=fv}A=6uh-x7 z_3+3KK_c%6s>d*m)U~Gb|59UKe;hjEq6GKH%4$>coNI)JM=QIk9Rw_AcB&Z{Pp2kv z_asO6M-~&{&eB!rG>bJ0SW^6tHT~TR0;X*q@?VO zj{-_K>qEfXi*Q(i_{pFhNYL?BHt}120Lu3HmZsG|j^&VEeLvVZoyTtOp)(FF&CJs-IEjm^-z1E@B#J9FA{JMAMi@$ z>SA{f#A_AC%hPVPUkRm&G)I_T1DCSYytTspv_z0~(2dd21uDpxRjLai`}F`9p13$enWKY^kH#u+-_lxmo>RqE1T z4#yAxDm8r0KrzetXQV}+9+d)R>fAmxm|fgk$qltip_6VJ9ByYV>j#7J+%m}F6h5(V zrzfI>rx-WGhke7_=`2zZm5A|mR_KsPS}i+QJ{uZql($8si&fjcmS|B4M-Pg<4h8&v z;q=23H~ZrF!+gIXKb~SU9U^lAzmMaj>-V5u@mo~PNv+%L+!Z@|OzgD658lWh`pSe= zobUid=dORhFvTeZAm>A+$ZQB!f6XoMAzX)Ua=}Jz($aqHs#?x(IQSfj3O;pe>~nH- zLhQ2-DHt^{N6PH3T}57v+dAP({8JgH5L*at_UP8q0WI`%*$51r;%c6$Gro-bD{{Sh z_7`35ElkKq?C2{*vXwt)S!M5Awhfm0YNt#O7mIzy=YVRtjNu*3_@pUMh8EG-)HA36{Y} zM9sf#b$QKiUf1-t{r#AJHKS!KwcDM4cn@^+)w;5`I*A?;2c<4|ID8PjkNhwpY;J%$ zB&QxW{dLkb1`J!aad+*U&RPljw8PEo1_#1E*9BW;0=|)JOMcVQt*yu&^l|^L{K7`| z;WYmbsLk;|3ETh4-TzPh|N9NDb#$EnwW9mI*6r*3oq(aR152EhFPC=DX1kWndY0vF z>dKu*0kEb?rIvI%`uWZR3q_?AWYc>uSsKtG@eLnu&p~p**acC&O^F}*_4RhP`(%S< zon?#+tk4zYprTU>)1e#bLTD`Q`)bZgn9Q1LmX0**9`%{2yymP-Eo6`zX)R}7YnQcB zR$V!F>drn^`-LjdgxuP`4ng&4Z-h&0ua8#Zmv+opZ)K(06?A6+)YN71#Jy}O_1TN# zHT-*g{#e?hDlwI%Se&Q*5*H0Bi3`g%A3`fjqYOcer9jcs9qiGov_WH5siuaA6BHTlbttoIS6@D$NX4ttCJ zi})J?@h?NFRNGA=(Y!r_NmGbz|H#m2T!x^($k;COr(?zrX326l?twTd(VR#ggFd^eyF7+xN0~Kd! z_k{XF45jx4P8z+CWuwgx&E6-)W250v4fjy)McO_7PKtO5abPMZQYRqi9$F&Yn?1o*)IEDWQx=?VqC|CSnMGRnh9u5Q(9s?=RyjX!?ItX+~f+B!$!fbt5X zdHd5V1#b@@#-zJS2^y*DzN%7AB{({C?Bkk@Yb0Xs@3oaoOr*%kWuwJS<6?KqLMeKM z6tV?z$o1wOZEcx#tdX8htv$i`&QD#H{RehO);wKRB1){`@FOp|)tybwRA@@mzR~3t4_(=ms*jq_)%1RSFz6`95UJQ4^CSe9EUor#!~T zWWmeotcj&U78o*D9gHjmP6c4`X~PIi8bv&)Yf{2qJs)gQezj`Nn)_|1CsYL+x7P)E zDj)1voyEEa39sBL)UNll(xJ7`RN6rFf@zU`oFulkW?nzV56po^Ac(R(*&3osB^y$u zc{Kyesq_o@2CYUXyMf>+Od2@ZF@%6sDEE_|n9-sAYoo|EG@b?BkH-)+{4Tar_izzS&<^OCXxuN5{xUXY zR_HpLLxv0Otvc+QCtYa);d_b|^_oFmLt9ev?!HZ{u&pi$>&^bi`K-8R$C4 zSC72^lg-IfaTKI%?Py8SF9K>(%7+a_b52m~Zk4pf_{D{th>Q!Fa?|B`PB4F@UPWeM zPPN#Yv>76Z!_>N#LJG%1f>ZEqINiiY%f@+~Rmz-eIe4xwa%IZHw&W>leQ8<|f|zh> zs1@R%`J*$G{e{NTRz8r$8&BKd0fF1uNcpNN3a(g(Q6^JH-&endfi=HE3w$tvCj0`o zP4Oy})fG<(Py?=@9!17uhKFboIu6qwyu=&zc2EIwzVeCkoRj2=mc{7R*ed8%BvW_N zJUyvJlD18CGD*LxMA=H;@Y+u0)&~*~_NuA*gtU(vd`24By;K>QNCa4%DJ{S2m_zqI zC5ph;@wR?n(RNQCCl;Q{L#W67SU3#YMjJvN7p~K>fU8st_XkH_a1TUtYGD3$2-QS` z;4nqS!0oG`DB;UuVBQK!44~~Epu^WD?OJo7j0j%Uq#tzd2+1b21IolL zQvy4UlN=na_%<^@`FOatddGGnczxAbB}&ph*_ZLcTo1VvV=7EiEFaem4Fcq6FYXP7 zA%^{M3(Q%8UHie@N)3$Ng<`(A1LV)7Pt2m9_} zN8|T;hag}tx12%)aNTMfm_)T}wV$S4GN#NL#D(s;TLd|}H(O&1lgp|d6p%d0-jFao z_n&d%SmPnI?N38BHwd^q@vVjf8(9o7d-eJFXw_yP2t7Gu_Jlf?rU^#FuCBe}AYh zGR7F_gn^I_Z00!`82`T0mMAXj6j^V&%gh#+tjv%w`dFBPrO`P;2ifmwPc?7M{F0{X0qH_B#||PHrya|G~r902aE66uF1K@^hW&s zO)?cBTaAKixa7zD;u>BrRJN4}8AwgpD75{)TEZX>bFj9V{`G0|VY@7Hy%N1Ez9t#R zeC-srL%3cX0B;f0SezK}PDQgq&j(+5ms{9b+RqV-5e!w}UHSelJX-a~R;`KMH@Wpn z7Mn&8cA@G@9y6fcIEXr8wn;g4jizLk$2ZnJ#TM{dwjW}3c|XDsL*R`*${I;rgSW(e zZO6QoP>vo1i<2M zzYOU0+ZH+^D$&VHSaISBN{&mk5fna%c=KIwcqTC&afcoNR}|G;azWaxdB9_LM1MJj ze-pS~<_LZ(XC^-FZ$C^85$UIc!v*I$nN`NKtMshNYW*570kfj#>#)YLsC?j=RU&3q z@GZ)`7A;!SrYdw3OK{sDKo%!dDBTxneZRx)Q`$p>Bwir5am}Ie5V)55?NT@qMhrmO*E0Bl=B$ov{SQd}KX^r$IhdLLPmy{?Ys+p+0_i^s zugxA6+xFFW(x8x10YerM_$G`4@R&Fty7lbrP{zsT^pbZz;6M;jXwAu}7L+WxIQPgPjQ#Vjj<~gQb-(@4SuAIg9Bm{y=4H!|RCOtH z3ykAkX;JB4i@MN~VaZc498zLrI2+#FH!BvUE>#8oT$p=F zGrHb7dN>Yy8rs@6i6E=AmP=nOP*cw>NonHG5{O3VNBRtydS9!_LO z)0arqWlbq#P(9HL+AGhYtRAIh!CR_p-0$pEwE}HEV_5gHXZ4cyFq%5Hf${xnS73Va)| z?X;L?wBu2|gfWzB5UaM04MLE#!_vJ7aJ$Gz?S6;u;8f<*+6U348#HvKo+nl9A&JEa zGKy4z&VV54=I&?&`jot z_Gm1^rlPwXgjSlc^Ao?X`ZioP+d$LA7e#&wVU<0_ z@w&5+VUPy<%JkR2*M6ubrH1tZ(!s|FH@=5TFIM=o<@q7(k*t+BlZ$aXd}3{o{Ze~4 zC&XW`+H`C8oH7oMqV>H_ar}M|gve@A6EZCkb@l9ZrfP#gPW^$lq=$n=rh=(XmoDf)w*jjs`p)ZNR=4!T;?$6TaTKKf4H%EioHitOmP`MZ6y5c}pYVU`(b!zciL*VtlC28xv^jxOm;yWC0_krV zY292GnN%P6TtKB#vcL;Hz(E{au+sDy7`h6rqAMJP?_?a+dd(|V%eLKA3n3P9QirnX zO<^a49b99*`Y+lbftR`PR(4lJHZR4*X1t{L| zq9v*NJ6aW;Tl79hVAQ1BFp323I^o@j$wd5C+rMdx!`l9+tC_up1|^I3lCl3BDeZ@4xZo-)^aR$_|i!TYq8esyDTKFmLv2yD>lk%QwB{P zS9U+-V~orcX$CozDHgM3&*^gu)8_8#VeFc6=j|y-{NSj5sS$$RuKW8qvGMdF=7syw z6gS=N9}in!(waXn_`lxl05PCB%JR{C_m{!oAtYk3Nu#;nLouwisLxFn%u|drS*EnpgE$>6A(w6mD?tm1)F6f8nm4b*agA)k0J>uv?8$5!<~gf)Wn&J}2>WgOMu)xh^}Tn@hCI z>RcjelOWa(!KWIRTdD1;nq#ATb3OdjAral-x@Sp$o?e)Gst=1j=%x}BI|zp2_U%w)-y&+ z-Ln`VeqQf3LHtf6+zd)27!W?}oU0lk!?toH_j-1y|I$171yw}|9tR*GztZZz$^F1@ zmrg+4y4}AepR^zDB;I#~146X45lyg2SGI_);OpzWfk2EY%N>ULOb9?oHs9G$E>6LG zBcS{NFY-cDl!JtlkRU0hEx`Shq5kL2O+lMo{oC!^SGj)HM?bkI=UM0js}D%#=15p4 zVLzWfFfWP;D@#4z&cAB9l1#&>SC5>H4UjrpUuj0yH-j~Zt?sUn6;xj%o-BpGV`i{H zz+j#p9wDJ(Ktviq<5QDypRW*Jy9@<_Io*9T3OHXrx*_7Bsg5J3EON5>AxbvW3 z0m3`NegeO4Uz{SR$KaY_!O?kVE5m@Kg|Iz z%SNk6^~taPXZ|S}TZ%ez22&Z|j1#|H$45l^K!ozb#6Xn!M2G?D8d5++U(I))w|u|b z{d*U`^;W>wzdSpAER_MaL_iCI;(R;1dm?;47jS8PqhaWGd%PARc^}wdhVKfW*$+?; zn*6>Vzxm<&@d>|uyT7@|znee5-X!Gil;wh7bp9wmf_?=}J;|>IG39=4;rugU5J#3G z{l9$lV(ugPV!$ zUit{}fG8Oe=YHSv%w{L=&y^g#D^~~^=9i?tU(=!tX(OI*H~x87@*=^2dld_&{y;;6 z_;}=pE#n+LOWK3Bcl0Bzt3d5b=MeTofQbL-U(_#^D@IEdFx-{Hp;D>=sf#dt^nTy+;b)OnAAp(t=T#tx=kw9;F{rRhQ5AJfnQou}Lvq ztwgj}FGz2k2;OOr#*L!GWZAPM@IlbNP}*&wuxi_R-|dT9%Urh-&Bvv@!KLL&sJSc^ zDLz_G-gxnKcxH!k60-@sRRv5tx?&EjPLj62Tq(U&UefDf`SCWD_UMt9ggF}UOkmd# z0&rBHVwu-ioU>98ut%jsC&T;?Lm#gXonQpLuP|e+_;@?_jM)4Odx`S!7G|DRma$!; zYZ>pD&%aZrp6{cUfENyOZzVID&Korm!_uK5SI=@w)xYfy@8iP9vKjBx)k^-BH>OB^gAA)pJOa9ZMLmSgg2+4_AIWe zAZFqV{AuLS!L29OUN+E~XK($_^bZ1nQV&ia> zXFVkV;*h1cgPevcDd6DBg@q%NMYSJTCjZK`fLjus;$I0YYJJoGo3D4h+6=lR=^W98 z$45h{DH=bsZKdmaMlf3SA$orMPe@WvRc?Y4QEJo*SbI+4uuujYMI)-xv&H@V5UmPT zH|qzIJP<-N)%-jJ+uz9w~&;jBZp)Z`~A6?Bf;da1)y~lfmBwhv@C3h1>%9 z{WQMxZ-#AVQhITXwA6fQ5MCd_GwNB62J{oMh=s_)k`Qw9cF!r?%0F_S>(Pq+W2+8Q zfnT=6K_oI_(^R$WXXVL$*xWMC5i+|S&5`EVAnzab=s3yuapK4uV$yD7PV8mb&&~;p zV`yfFW*lwTdesy6MEuqZ_D=mPK~meCg?nx3<=vTQ{~2in8v>N**usA^wV> zuA|fUnv1t9(s*}geTzKa438}fZGn9vYgj&{{&HsrzlxI5GHkf7)T}g(Z?3fSQ>8gq z(({_V9s5?5{hRdW;^|4_!#}-hvvx*-pLVm-45$o;hFJ zn1(NhQ^@Y2MqFns#i3ol4S`aW$& zwbC-3l*VgVO)*S)^2Oa}ubAtyMWcwUD!yd)M`kRjxAND4;#m1?-Ca85MsTK!KHrXp z+fxL9_J)FG?|aql^xQ0H0kTHz5Pf5w3kV^1%WSKMKsVn=pZ>2Hxy2ZCTvWaR&#{{C zQpYIOqTu)YIJp z(6r@Az5f~{F4j69=CZ4it(S|VL-PQ=gkjHa3mGJIS+Tgsa9kUHqIe%i8HO)Ot|!C`uikPCy+oqkoEHkIpWR_=kKrnmZTef8J)t`Dy~$8-JAYfQ zDPOC;5e&K-vwL4A+BQh{pTNWPy}X(?3v(ZOdCwx*fpV&f%u}_%<9x&$ySP6XIRiyQ z-Ii@Rx3#TKHxV+j4yZ``Q~poJz60BV+)yS353g2qo^^dcSdB9c6t+H#WKF(RqJPQF z8C_es{S-ZX+V_#o3fOFEtWPyTrEROlv`4;)9%3M{iLW>AnQ@)W3Dw%mU^-eQ6aNAY zUbY$>fHY~$3UdL~L(!&+ocu(UYW@0POfp9PL-xhks)_E%2Bd*RWBO1h(j$VGz}C^f zCwg!&_3Aq$U0|zzHft2>^?CW>VOsvomVk=wldbjj#-*Z&x-ue_dg8#WUvu4Z?$i&wwBVT%8^(IeTI;K;lpWREhGQgnnf-r1}Qg1e( zw-B>aVDnN)>ek5|3zWf)pc;i$P_8#bm_JVNM`c-QgB65ZGR5E=HFIv*Kl>2MB|HS{2wOgFV1w2 zxkctqbCV?{vSH9_?#k)%5STU_7VhRwU5RL41seP^|;G6+eqT^1tbz;H^!}s!IXH`qp1oK-;sgBiHsGkX67(1XRX-vFb{&vkTM zg?qUtCL3{Ex(9ov31+s+vu(5DM}tiH6;nf7@An*;p!JynugkD=gHm80S)`m~y6134PGyH3 zjU#JPX0Pw>Qp+uvXdETMdAqy4bI{JdFMAiCsDHRz5kLvqwRxodT}4IQOKazZ)`0Fr_R zlYio#&$)-c1Sy7XaiC>iuj5Y4Zo3c>BV=H+t82 z?B+*%)5Fa)E*B37j6xZd^G?M3&FWtn%Oolvkp$fN0%5#!_gJF8ojE(?^tYI8ek)aa zH+pNx5hIcGQyM@WN!#>&_jkZMHWTzZe z8O!Ay3}x{zvsfJZ>Wt7zsz%9Ep(fpk^X4LVTErMYNKduxL7fnp6J`eNlnd(2UY56v zhuNdNcwJITdhuf|)r@l{zEk;fb<@>j^`}_YGULFGy-yR+27$d4&jleXPDfTo5Jmi$ z_5{62?918tb6ln5y9#IT;3AdoLCfBVsbZwwgUPwzkVGx2NIHJHG;iCP%mI1UbqU)Rp#> zOPFjaUh(Jy>YxTe=~ok&iOpS)tUfYMa%WW+M~^0yVrMb$M6g5NGtfb*ncX#c!Lcjs zwHN9yML~yC&+pb5hW#uhbgN|+r_{@8q9p=$$n z{meu2!rJ&DOQ=?LW4XR4%LK6-mD79CK%4A-lx&Z1sxUP@nmL;!hHHzBYdo_d2ZyXN z+C8FoTpE5G){@p^Z%IU`RLIygx%?;V5#Ig~^!TnMAJjME5iAxU& z9O9Uo7q8d#(U{rmjj8-)p^)NwdUAxOpI=s_NZO3}#+l0BxL0@zP5-ouJ!ayrXfUgo z9H##JXu6AEoIrxJ+Fx;P8=>a`j=$nl=H&$e6%HWBR1~jxq0d}(KM5~0FdNZ!uBHib zzVqKcahVKOmYAbL&n4xfo~4gjKr0^Wxht-=E8q^&4w1frllJX}-W^Ge#iA`ys@|9|y|Fg;w&Koj|qsLSrpiKqLDl zD`(ZdX|$0%#EU;1N_Eq^S@&n&v>IP?kwYH`LeeV6^nVarL+X)}&ij4-mGZDG|rOsvLeeOt4M zIxePW7a}V0KtmK`vknx*u^eTk*dqC1PhK)@Qcmc9F8l-~g97 zGmn{9Bb9CQh6h4Fi+PHmE($>=G#1laPuAn513TZC=g1>f(phi9vF2K6mYp*1;a_h1 zDreZuF&+7U`?prQnhj0*{B@*9U{rh2R9qo2SQcqn>QulXyh6YVhZQY|1;dTQiCXi{ zl*a?5w*o7NE5A-1-@}#$_6EMSk%#S!F(b@n>3Rq`#lXa zRv*YLvEQZA)d)gG;mBFAQh#}E*;mPo% zKrKRytx<8u#B@Pg<4v&`3Z zN>P*Uo>o`*?eM#}zI*41hm$@k@IQ)(4MDN7VXsTWEw*k!d=y@c<6!g~V(v14i%7B6-yY zHd(ia9JCVwyqFu>tSv5XltH zQYj9Zstl5_Wtq$Tc>`Rgk29!g_s5&U(<(AfI`gpI;#NcXj!vNP2qJ-uK-rX3zlgHc zGv;(w+G%9s4;XE8#20w+fau}M;eW10LAD-b6k+xC5AVgb3E#;r=5_IO5`aH>Jz!;7d>B z@n!#5p-`<38>^@98ps$FW%M!l(WD3{v%6Ax*28#<9eW-jC z4E`1x`9ax>`9~G}VEhuB$GWN5W!ZBY%kxL+oMaIcT3k?oZI!9s?hh@I3GfXatY@!A z?PAc_ipjq$C5;_CmReG#bK|N}_1C8-W*#r@WngXr1?O_gOg}>y--_<$|xBgA5RRt<44Zh zFAJm4=rL>@(dBqEV0#vfU7+-s{Pxo&&Bp2nUKD*wdjQ*t3~|m!VfFRRo9HhrS(rw% zsq@u@eK1m{pOKn)&3X-yrr}|l?rzu^Op2zX+k|{t(RC_c>ut?x4`-`)ovnZFU3bVw zN*fwdM@pa|_!!Z?PgJZhNv}g2Y3a9(;Js|cLET$(Pqdz!-33i{6pcP) zEH;I{JLV!}-`n5ksyP$2hRj8_( zcKWXG7Ue3H6p;2C+DEO#pbJ_Oc2aTwe;pq<)u&P%l|P#5_<|;lp6O77qV6lL{!P413_<{#a2eP$xz3#0|U?Hm06NKX8 zZ1BvId;{SNBGZFFs(L!(yWJF3v&NxECu(+UH1%HLLl)J#Y@p@$j7NdDC5!kVXafsq z0-ZBjp|;@)p8f*S;tu+!;xM7$ox+sMQh5kBjE*+4pIK2jt}$kRv5(t%-*UedoqiZ4 zP%OR80FK`6!8iJipI+&Ofr!xi7RIoG7O)P}9j=7wq%hV%+}O~vecj_*WAD~>-6o4+70mqji|CaE zFrumo*Na}n0tBgwKXLUo?$nQgjSG2YvP{s#WtvZ4PmG$~U23hQX{aka;5h^<`4yj9 zO+VskT+E{>M=16K= z^&D*I8xy{IQj1rlXyH-TPLh1*NCtII;y@3GpKnTb|Cl)B+MnvNZzCvIN+C$fT@+?F zrEr6K&cs~6(Ug9&xvNdltzZTwH1LvRNMhPuT#sks#5s5yBZ_$qmMsn7?I=8v{ibu6 z+H^h%pWnwEx=J_i@mRf zTjW?0*V*t2@oBk5pD0_@^!D*N1wkzfbzJl{6|qKz;XPJ})V11?6+I|AN7r$}gI0I8 zlHuTeTE2(q?s17lI+-K7(8C)@!?$2ZZbkS6$PimabM)<(#31b0|D-taWW-^slC^X; zy9Dl8f27T+qpc^RmKQY>>`fML`$oAYSYzjQQsHUMTGwTR%JlugU|uy%N+KN=H9l;EA-UaLn&`^-kC1=X)E%f5~GL1ctHkp4vP) zz4nrKM#xp1&;o|h;pM{^ z`hpHT$2v;)oTwTyYdB6H8C|b#V-8KaFO4rJJXiJ0Bp*CzKDv( zv*ejKX;j``RY5PerJ>#T7i&sW&_%z& zyIZhE5g=&1$f0tJ|Esa>6*h>d=+pV+`ES2H zGF741XcGksDx>rr0a7V+vXps$WUzjsI#`WH#sRe@dH_4aMFXMfmD&MdzSAzZ4O$#y zDk<4*^KrQfCV=Eo{Gc3>jiF!}<2OX9Sy`64whX!N zQ`lK#V*Q&1 zuoxirrW-&zF$MbV@5vOG@8mT|(!NzdudX6r`k)8Z7BPuvprs(yS;IG$xD#~{J z#@1ixX}SyLF^C}RCNOZA4!Wt9xF0U6k1t1xcNH7lrEXTyN`=DHu=h=b^VTGU$%7-Z z#_1k8`#57}?@|RsGryDkiZH{a{3OspXhGG@5WJY#yFl5vG!W#?;>&VCtb48tP{G~6 z(L<>RR<5lEb$oSZopP%_og^4aDvT8if`yPjD@olMg8@&KsA{I>?v;RL8jAIv@s36C zHLJ;`l}hRxIF?C;l>=J`CMp5*Id}9dn?`44cTyJh@^clv3pem*5WWd2w^(s7mQIU^ zO*w<9hrgSN`wA}TK6zP)Ke!5D6gk(@IoyVnZ;7>m+n~{hk5dfgY?s8#4+J76F0qV1 zb%_0S_d#0%=z<|fA22@MhU+T%-b32%!{JOy<%Y^Uz9~G=|0IY|wG$Mf zQwBBMet#gnaM8rc?gaxtumfM|=a|52l83KWf%bVxpHO$nFWw3IcS%KcTx+M~)kwDvIlQvE)b*0}mwd39vL4Y# zkJPRcXZmH(1Q9`hcwBQcc^B=@NVg>=#LtXZbi%#+!L;h?n%ywsD*9iwvx6DK-5|~Z zi9vL11D5lP)zN27_d1>*e_R`f-0Ic!q}%f)d+KfwIeAmVS5tJr`22Gpy*Stsu8g=d2m+zTrck zjF!!{N+-6QORsU`JipSefDMLw+we7*sUCkAJcx3Gkh)Mw@4)q03)|b*&s$ab3$a*u z8hW{K`t{B^4s)~&=^@NeeO8ia&N)xEnp2*EXKt}1F6BPTUp3PKIwwxzHFY$}P0oAx z?^^9+g29VB>s1Gn-XKcl9v$537|DFtA<@t31KqRLFi`XZfn+)dywq3I)i~s-cO7PM z7NdVrFSWo%I_C`vXK8*hI2G8s<~+@dQ4Dv5Edw~sz^t31gO&+cO#otF2= zK$*IYov=Lvfc@!DM@4N>8dB>RmMDLwZKJQp9ctv16~aCxQgQwvF-5ehF;YeLFO{4u|Q)Lo+`$qZ+b7#|ChM5kbTl#bR0vGMG!WVKf@f;C>N94`yAPg6xMyQ zB`?=j^;7Rtj7RI+CW)C9F{Ia4tVS77tocryBiNYj&t`sPmR^dZ(|(JgCL;s?i0QGx zwqV}d3OCLfoq*bp!ASI6$l_ZoOD&v^;+8=?i{!NR&ECf@8S|Y5mYk)fDb1Kre8yD~ zE27uS#6z+pPoqmqvR{~b7^@z4Csz3{Fb5O$#XMtLd72d2G1dBWFmB<7ula6&!Q4!? zVzGW>%8eZoy;TKTLkR0;0!3uB!~Qhi{R7n*jln{2`7LUMf}f|^R2Dz``1?&!&4V1= z-|fBCLx$v|;Rt7vgx_AvIGXWukNyhGyiDH(z@jn>3lT7Arfsy8LT6LcE86)sv&_lu zzx-tua~v3|$5Xg&Im9bN%`xpM)u#PcouN|t#?9em1SR1X=TMl~@{}^G&ol_p@G2lQ zFovk-uGw=7gm7=t)e3|Hk2uYc9I+@~bV@wozzA{xI>FewYWnh5gg#M*4}$o>-^(?`e|mq-g6Uo>1Sq8wzhhd z)EcN2m{Ne?PRD1b=NQmkmcxk8{}1a5$WBkk&(74uzq$eWkdB+Nfe7jhEJV2X*Bl=b z5O5EX0sI@supA-;V1*|Kzy}DB-@uU{!9h+95ZeK>jLywuy%mJ zJ^vSKfXwpt=7wl?=;-8R1lGmvC~WYmWP=k>&%oU~fNmTX_{RZEOItWsHvj>jVHnN;+6@RWkJ{3q%e%`NC;%aQFHC@(onjFG zFC+*zU@Zcee+nE3#l!_bKm^?H)!Ymg*grY-$PJkL`zGc$>TM_>HW9Y77M${#gPs^73ard?#mK|KY)Q>-RUG4T+r7R0C5Kr7T8o^zqruKQ64L?D0OqeLZKRmk3JAEPIM0T?t8z_-x@8F&i~0r2yQ zAZ`Wp{2j0bfM*A}h;slW$btq~7cyM$W2d4CDp;~#z?TpK=r8_&M*_L4_>L7pLVaZeB@7n zPlR}&kB|WQ#)cxYL8O1+*TCpM3NpZX%3lmY7SB^A+u6@n0D*pCZtl0Xu!Uz8-MA2i%){wxji8xKd*bejuJ@_yP?+++qtk)0VuXW!l9s|bV_2&@~ z;?ytn@BMs`z%GIH)jMWIt4M~ht;UvVD@`l<+GxZw$zzxz6nFX_PtDE|7Cv!g4;8Tq zrW;-M@}6)8&V8)fRVQQ9?!uSu#vl3?4Hlh` zIkxXxop0D+T*a(J?v;#YU0#v0;EQBWPPnTQe+^?8pKBK&{TZ&q}Ak#nr(mU1@+# zr6bkEiVtgF)-X*y-*TLrx+)-;u0^8AZr^L38g`w#*z6>}kE+R-_6wPM;qJV)z7s3+ z_qci?xs8$q?>6Fg=JHVUh-Yqnio);>X#Ktr-fd)a`c>n&FWK=Iu zLY#sxDSxAXE`AtGuzbh#1M+!)&_F(=f8xh{cu_~<;glv;O6$e*E4gD-X{ud5jrhNn zJR7cYshAB2IXzxr-W20E*L`5)89K9-mKEH0uz)+Z+Cf|te8|EO_vzWSS{~k{%WzT? zWfnTg9wS(2)Jmu_B4p&_K+&NWp^3Imwh#ESv;7lfpI2}OV;vkPf0rQRO+TLHIjxVOkzolLCljzR{CX2 zRSV~tIPm_|=;RGY&+MT_cq#hQjd`;{mF#n+W6H$QlZ=a~I&ite-JBL{TM%~sQ^%<% z9JX%Gf}HH4W$th7WT9aqE9BrGQI+W&f~Ew5la%%5Y>fZPlby5{^DwIh%#Itz#U7ls zjP~{Q2#n)=@OkH_<0Xm?$P@+Qgs}y1IY_2^_i4A4rO>UN2V`CRS_nm{e@hW9nkWoj z=jy#N4G}e@B^KeV$Ui6VQntg?iG$QvjBi|^%Y#_$T?(Cm#K;LpF`*&DaJK#o8mf$8 zlu-s}pjgPw?%>L!t9SW9s39@Pl5bIAFsPckt((bZ+ES2rwWtvKN&RYU45zbqRJ8AK z`fi7df1d61QEh<5!H_E|!ocqWeZ4Y!C~}4lBUgzQH=ra;>U1d^NDs6UoT4v^+qb`d zt66ki%-xz~HzCA;oEB(_4NE9{rlqTYU&b{8?;ItgoEg?^Gd$M6TEbij=l;N+^>s-f z9%7J+bx(fiiE`=s?z_fM#T9nIe^c8WX~wE=g!&snR@RQ%oU;0(i5gIuqVJ?f7|Yh4 zDU`FE5!4t+`BoRJwv7HX4v;JyYlMFtwRq1+T35nS4%NL_R0F>u@CWKA!vbK?3pMg5@3)C|fD^XclQn4~9Hd&3YJ>Zm$X4ma+CtKd#ZJRTbqi#}lN&I1 z0kfpQmn%;Vhjd%$o9teyW?p$xuW@QIqjrs)Q0S5gG*~vK$|8DO%tDOGeL|xFrB*c} zgYO4nO@j}&N9#Uv(^E%N9Jz2~JEhYR4pLP5A^BtSY0I;`9KSrQVnlzQ#X)kZ<0(3a z+;`ccv+|eA{NbRQr~9UQX=6px6GpLGmP-GpfkXa|wLdN|`Iy@6)2ROv$0rvtP8fy} zP?%ppJZaXtJX7~%GHg_p2a&`J@{jm9UmXs0wyC%WvemO>U{jaHUybZMbU2=d#`Dk^ z1!OH-p?_Z17G~TT-DsNPjYEJ!lVx`uWO9jb+gFmh6!*?TY(X21V^MmKs(Ha=Du4zf z0Zqw3Nr|p^4e1wk<86Brl%e}R`53TW{+FiVyt~m(~*Xr4h&9^tBf@0BJ#{TrBY04 zf%%1eEM6<6n0jZi){ApxE4klDV-fB|^fWhPV+?7e{gxw56czp+fA?G^Te*oLKCGScXx-SYi@~-6*F< z44^JF`kj(t(N@bhLW3>+)d{(@0WD>#$~k5NpGZM0iGD*2=RM2lo~+x;cV4X*{g@>v zzOTc`Ws$pALiHEDwZrIIU|M{>SjLi{1p_etVzOQ(Y%nvJ4!Cm83c!Ucz}CwZMSi)L z`uH2**@A(oupzwxGSfR$va@YSIZ0uCBwTCg=o4D~s=V%6i}Q!QyyyonOI~}dgeEB5 zkpGUBpZd_BWO&8YL=-M6fuV)5XIJW~J@~tMkIWiQbd}8kf(OAKVm}Q>uja zpNwVS2tW1t;`lWT78BiI#nHZ3vJ}p@Rq+9t=AXvAjLGf7*~_+bnT+d7r^J!k%mf7! zq@&Gl@=D9M=mjsgMOvn3*B&6~EH91{&Q_u1$+s}&6jFJRh&AgWm){?6q_|M-ctpj! zoH+|~gER9eqUmI(fq1S9zpF>3A&CPqC|kD0S_P{4InZR6^mh^}2MqLx{md8bSnz6( z-v?(gbQZ7!x#zVvESJsE;Hh4220r$ zY!Vg6m0-M0!$8gYI9>YXx&z{EO!I+uv^I5 zrIdL@m^nNpg(4IO;|Zh=8GPP|_Nq7KUGJIp5t`hdTw3o+MRf91G7SFi-sv)re>gI2 zuqb`iCa4>30Z4|i%>8gQ>=#RUzlKZUIqlrq8WGe;m&$ViU@plq7Cgo#LxsLUy_d6| z94)U7Lj#-y0{NIu(5yFwR6u(4XV655HKg5>{N()3wr{L&k&ORVjq3oA(Ie}UG+fhuVCwXNwCSWL374Ea zgbLpFD}BjVMus%IPlJWm(8L_kZg8IQoP!pLorW61Ln%&H_~yzGTr$(W4*A_iRs`t& z^a#g(gQN#m|I2-nsBZ%AlkV+c2z~0Yui^Oq^F(2Xo^0|UJ~k;+@mId=QcesE;WPtf zp@o0$yP(o>8rX%Fh{B=DZxv}mfmUvtg)zBk*s#L0d!)!>oA-G4BWUy&6quf zTNYzwTYjYKqi5PORy)l0qo=$L1?T)}9IR=M%x6W!a_eIHKV6ROEcSyD_qjG0e*_)4n;{f zVnnh`IPwGr!vAJATE)p6I<-b|oVD(}3P|Y^YB&G}CG2U~nx5hW+%wn1RdrdrvSVq-A_oPuXb6=X=({ zV|1KZ_&~bs&Fc8okP5n6iD|~EG@?u=R$wwd)wanKO83{ze;@wkuul((dA&;<0&UYs z+iu)bXQ5zM{qtw7u06_9`B1$j!96p*uQ{c;W@hu#o@k%1xsZ?e^6pkT_Pzm6Z8uh} zOp7y<&N2t`5NYMi6PD<0%LBuKJCD)~?Yze$v}?~sw#UcQq+xf-nOJQ$P>)4mG)K9q0g7wJ#g zOSLDi%3bzpOibJblb=k4ODLVsV*eTpNtUof&)M&Sf~)4WHf|9H-exz(XLjOMVm_H= zZ>n8lN6XwhD2Hl`Cyjs^V-<)w>a9oPL|C0d#r>2FJnWz$>tk1GJ9fB-dFGW`=Z`WT zO7_i+wm?1b+37ZSThQVxzdhYMqs7TL;cVjO6S>&`Mo#~eS?4VEF4+# zhvT$eJOPp2ysi-WI*#rT8I@tL8LOG$j5kK5U+njuWBJuJnMwRXev!FqJnYr-vclT) zwZNg_^O5`dNNWS+EKEOz{0#ZQ@R8X85lws2%0{Q<1JRYuhM6UUMb|ccS7|uj=&}0u zoe*Z!>cB3ZB~6Pct!ezf)+o?2QBF0jq2Ek+8`n|fY`eu7Wiu)Xg$_AVjnhwzyb0>kVBlm<)_{wW|?APhq_zkDj7~fd7E4H~f4Y(h<9#EE#}UO=)iKvG$~D!p8b*QoLVsU9@})%ADl{4WjMt49mG;go}F0_h$WsymDvlQO_(ma*WD7c0;S3kHZLbQ7rz0#pRlVRy-y| zgeA;QF}i<9iT}|YOP`nycdEYJqQHk1I*kZk^|!g2Aub$qHwg?3#}Vfk1lx%>W+7~tTWae9NOiCGI?}j}c`&#=tW1N}mKu8&)fVgcODu;N-&`#2{AA2H@0ki{OrT$2 zV;df)B>S$NYK=zyiKt7V;~mF@Y5b6+g3l0r2V1ivSIaz8j6`H?jke*v@i6zn!g0L` zkT(pcZBW8Y1_{*Ej^&eUSB4~uXcy`W5PzJdNoUVpv|5EZ%5W!)>K$-e2B!oIqaYN7 zEm)IAc=!scW3s01uI)Y$d43MP)+9ff_-S_Iz*rw{2PhTQAw3k63z}5c&lNqq05T+! zVVv}P!WL!o zOiT1Njj3Xz4=oR2aH(EyUiO-AcuzM$;QY&-jn{<;2_)~VB~DV=d8;8!X5i6Y{F-Ox z<$Y zy>nt8#Ge|<6Yvn874iJ#k*% zRT%q!N7x|Vw8g%+TP)xX8ZkMn6}19Fp4J zaQoqQl9adovVrj{WkJ9hZOx_zBTzN54iCQvf{ECwWoORGd7gp*y;m4qIAS-~%7~eVjg9q;W6Bv!)0ofY2lCn<7T6uCr8fp`Q?v z3!1O@?^>Wr56JC>4shuI?#9jT1F|mzr8BoSko$_;==#_J1(dyNP)o(yWj6u60NqX| zihWYLU2*!lb9&sF^b}k2awQxf|8w3}MpaTO7mWzC7X&VoPVDP_-**Qv9}Op@zm3{T z=4`hWBc7G(0wt_pex)uu<7&&UO!*Q)L@A$*B!_UcwTk}E1C7MUHZw{ZBRaa?NXTS_ zYNwh^+pEZ@e8kyr7&Nssy0BIcW^z;uT4+_Zb}-JC8If;ylp&PB%jUuM}Z#w`zCQgN+vxbJM0t8r$&?n)|#e~%cK(p%&`bJebAW~YhhYF=GT8Cf|=nH z5ShB^k8{0MqB|4ZBF}JCF+_?E-lMF>>l2_ABhIyYTx6U;U1p*@HJFeWmF8U^VFA2k zC!1aSt(zOS{7Jg?VG+{^Mt^_1hY5bKIo-~xO|qEJ2HD5irQ14xm3h~x*k;MMR6vPT zR&VN8)O-AJ1Z%#DHC&o~6$!R25jeDG+dnFy+0nEb4nI^C12Uw6AgkM1kaqt_pLkCj zXi0~GNqB%Q2nMkmVbioZ!Gp-^wFre<3Br!UsVC@*<>`QkWu(MGmUw1*;=9-;jV9%f zy=pvsl(Wn{zajmtcQ1~e{|Sg0$*lu2ik zBTxCs7$u)CY8sdKWN)^eoD=eOSUL`MN>G$cf1Tk8N=})j;f2Ttqs5!;8~Hw$CO8$Y z*X|iW-i-a|F~+x^x|}r+jMG9Ypaq9bWUN=-O}6BiJ`Id%u5^1K_H-)vsp^c5&Z*Xc zF9|bvETo5+5uIE9bS`U?=FQ~gYa-NWuwUNTss{Dj@WQpX@<;ypXA6GWOQaIX{QgaOAcATz|YPb*UOXg z+-wjFpM;s2z$X2msQ}>dURubdf0E&yYgaZ`%a?h?B@4Y4bK=n;&Y3?cPg}ljsAo-D z6Vy&@gmUbkdn-<6EhDt&qowyLbS#L@p|4RMebx1q(9*F-EvxmKpU(d?&!KXp^R^UP zer)JptVmWU&B^#m3#Mc45L2SNZW4v&$Sr45)NK~J)yQp)$v0fq%e6h(2*a4}24z(2 zZySvCu&0q_LQn`E@Wn40K6xx-1OkR9M+(Chk^` zBcacW5L9QsFQ#YyaIZ@5_UuY*44rZ!U^plzP&P5kBwef8F7l?Y_o8oP@wBQRC6&S` zvWVXJ{@er0Fjn}jyWa&fOR5l8-|I-uC4fA*N0!c+yMS5Aph*k#{mjH3?dMO)t&waS@( zU+Rk}W~tA=|6O3=+fpLpD`fZ8lSR+OALnMMh2jl$wm=a~#y*Wd<4q#cBzg1LoCj7* z^IvBU_1P;C-O1-GRLYsAvMRS#27oB7lJTi;*+35PuO9X6yimEXRKHErb{v^BZkIvB zX-u-k7h)l?KoPAg(%yaEkgO@cfAFifL_T%(112}CKWk#IZQAr;BWU3r)ZVXTG|3>L zaN_jLA+N+%6(Cs2GYi$hEfyq4y2>7 zX(oF3*kbPTvTv|(pe786&^f;rV_l?i@AYYSuvmmIULJfjrS+0f_-X{(KA1uVJY*~` zwG~L~^tol0+Bo*A3-j^687Avyx*Ys@$Da!9`}iEbR4uK69)|9E{(>q5?ESm(!2Kqh zPbf1@r*3~aKQzWpKCXc$GUQVUrESI}Nzv#sA2Ze|Hbk0f?D7EWVUH6Za0Xt%wm6x4te25Phs6e_1#6k%hVG;MoClos%)TbzSn(`B!Ttx$d$5K zAgN-^PL{M(B%ObQ#G?dRqlC#b{o*5PecvjR`z8yB)b9ISuisD43za-SdW7TB9vE4$ zRYu!>Ht_mmYn>SA=Hi%vZX7J&eKMS^VmoWeNht^pj+x2|<#8Bo61I&X=W{8(_*lK@ z@PknVZ*ir^5aFb$#)@Rk>(IJD zyPVq5*2)q4plwfUb2W7Bs0zUeQ-MKO>f&m3k{Ws?I*(IPAa;c4x~mvHZi05R|tZ$Erb{j5s0Go>55T zi>XAcSIpvLvgdEW_*+$@@P*aVTrXQwG97o=Ec&RP&nDJAtF|JjJfrkdlRACr`k5}i zi_$sywk+_p0lMJ!?Lrg(zoNTdJC3Rr`QU@tG&pM!inkFrQIz(%LEe2q-53Y)SUTs= zfoY*kd)lQJ<$CGp(f8BS?h-6FvOeR*>lf^mKSsU6-HKZVZ+?K;{EDRSrut zhecW=VpOp}yj|qGnTGH0_U}fPWJZkE;bn6)gsWt#A(<>gor;pJ=1b*q^+LN#b9G0> zr{f)qvNss8a$uz}&?m!TTZX}l%(iNZ8_piA(Xh^DEBdA|xyR~0-9{(9mW$LobLHYGl5 zcokYgIi|DXMjzG{j_$~$Om{>Z9{G#IHVw%LiZc-+x(#XmHwh9ID=lN@J09+Plktn$ z+?`LPtON2lwdu=^HwvFoJ8{xu#=W-mzB3`RAWvmKXkzo)aI^hgLOK8o-H4E-g3 z_&n~rK(}5$t9OoirjJ?ueb>IV~LkFa?+GoL6j93`KJI3kzUO?k&wF z5Po2O8{y@b%+ruKKKrJwjGn|)_m0)}thdVxNBq3MErWlgpT@a*M$c4<4` z?z+bYA1yBv5?%kOFCCb88XJg>&tMxowrjTlequ$2d@0AnHl)V}O;}6V{K2F3OL$h@ z4BjVd4N`ekvJYThXT4tB+HhecaFMSxvw2Bc&K%6RZc`ZsA%6x|k_Thiy>Z=c4A7kz z@LwdNL)-=a2@wvyv zNVU$b;o!EWpVHo!xGYHKOeK#ohtjj>XTW{9+NpZ@4Rp%Ht%EV@EYo9RKy^Ih0=8o@ z?ef}jWr)CQ5D79nk5&vkeF941Mtku^htNqP5~!!MJM0_HEq;i7Ncln=Tg3Hk{%nX^ z`l^V5;F~*Fnt5P-(fyN8r>QIyxkI@yqkXaMtNbD0pb{^_m_5fFeR@JgIKa`;Tp!Qk zK%>{9pgz;;j8-I>$W}~rj9&cW;dyWni58B2t<3Q~ND9saas~Uj-~WoGY=VCjz5eYP zFaWe`gpvpykhgTf$q&_v^72Ay8!pt^L3_*vdPLBnhq@TEKlRgmdSB%4CZ`TnR1lH% zR2Dkea%R6;GM5_nYZZ5k1&LDjsI3W2b2%|1$6Pz5qC>@!^D5`k z`y2J@FIHZAZOn6{MabRB-3npNfn}(~y*uzk!rITKrhWu=D-&r@pM*ga&9C#?1MafV zXLXp~F_TnEmM5+&HSlt#U~I4Y!K5*DP!TIH($`j+_(zDFag_#jgySZw6}5-+3bVo6 zu`CLe+MM-lA8?GX>#QJ81DC6z?%zN+@?>^M@2O(N)5F%55DV14jcTUISB8LTr^62F z8M|Qi$cQDXU&qA7%;lY-oNgX5mr&BQM|M{cA*e=kuK^*kkrT+=w;7ifzZhxrl91qE z&v@dgbIQY-JcBL?R7!JcJTO(UcEWhGxWfr9ktGL{oI@8a)rRckQaqDytBa^NA|2Ou z?Ca%c8!F&)&{Sv)-hLr`E@KJM|3P#4RLT!B>$Fg$PjR-AO3)uP90}~wm!dN$!WS|p z^QLmlJk1L^Cl5Y-#$JV`%rtNs5(M={z7e%9U4?`E3EnqzTS53n;r`fiKPrfpA}UR* z-`;8=2VOCFkRHTV^O~j-bvlQnh9C;3>6x$uZhc%#zQcpVA(k!XrG&(9nSAPuzp)b* zecSpX*lpHk?V9I&uEQY#dL2BH&pgy2A`1JsN;@S@;8Lu#&qlpgV)eH}w(iV&TLob} z0U(^as%T$U8rf+t#6uW~QvK$K0-Gdttu6AHtx5$7&(r;u`pSr*bO}fZUmE zXBF{8ysc-#yGl5CZayw?Zw{W1>0c+I$vL4us+DCbDo~NAeyXJQl7l)?vLBS>EWh1t z`A~0%Jot4VaN;xDNe)E*X&dUYLkqY6VCLxHvh8QaN@ZQHhO+qP|I$F^`xR@BHEUKySKNhpylL)1G^@G6BbyP-YKwaUH6i|%oaWFuBQq4y z%RV2-Rz9Du2|{Tn$9ekx?LlvE_{#b1lfWGEAm8Zav+>k}w@&D{To?9LDRfFAqox{? zj6(d2pP;~wFnA~a@kwA28jQPQ%YY9%1B*3x`7{?bA1{4ZYJ(0?-Q%atNbd0H`!o-T zJ~$QbOjY6uz4ui^0ILN0LA4IgzH*l&JYmGx%54RZo@pl9vy zwh|Z9o3a~KA`87Mt4yD^Ing&d%+seRo*Auu&nl7@k?*_2ol7X8D3t3tI--r|Q0u$P zRgH0?85i|(?vkFAt;s32sM!nrlPIjDLnGb5jkrnsbgz-*CIX_U<5e=`t%!f#dU@Zq zqmrSm{$mjU@6?`SF%Ll*c)F?}3`d|FXkViJHQy$=I3OHC0%X%wp8`xVJ5%n#iO}ZjU>Q==+tP_BKG-{I!jwX`3xDldPDCzjukra}A8i0$YM zp=S@vAwqgHb%wZjd%#$uWvdI&;(SL&v~}g(KP+{q|o)VBa z0o@EK)4YJ%CMr@dr7YSE%)Y{pMc&PXbDD8p=d8MOHqOrvBBCi=#YISHohxW>41T1PC)rd!9ZPQemlFMgoiAOV^V^~B;&z7iT>Z>y zaLy21R9gBZuA!uP<6RJu1_$(VsU6@dDK+}23|B|f<{eyt?|VraPJZ5=y@S~H7$ArJ;u?<`yrOk5?Y?4ii9y|Sh{uMG zR`qn$O(q=Ta!K&hEMIj_y+Xs5!TF~#swEj+o2_4lGAZu96Ey;EA6&BwE8i1k60Y`f zagHy(LN|)8AOUc?_5+6U0&UyW3kJLoB7tq>Mq>q0^v1XG(d%86^q1kEb0*Pw%EEw6 zr?QlPgN5o1^$FI}cK6PPqn4o;=#7x3fQ(XB+m{zX#@<_OlEU+nU0!Y>Y=2&<$O5x+ z8%)i{P`}BJ-bX)MCvAQRWB~qO=2^w;lpZdxKw*CYP@O76IRLfQb+*yCXR)I9W5eAX z>tDPpmfg|+LJ+h6H)AL>+y51VGUBtcbFlq4fS8S)jqQJ{ypC}LXV%u)WStKbuALW6 zLNF1%?G}l_)PZ3_AAu=!D`-Ukndh1pA@hvE_k<`26^1TA?vcoO;r&0>P@l^x7BfA! z&CiZWU9+{c=4baKd~vEWMx_0T!BJ==aB}krtO!7Ud_Y}<@Lc~G8N3syupbIieJ>!9 z_P)Om_rLV=`~v+o=IMlMe{4!4gaMYe_WmD63<0tS9x(L7C|ocE7^&}u4w5$xEiXiv zM+;&NB~C_%GPN@Er+ZpY3LuPy%wJAgI{34di+=_&GEgH35kRJ{5@!jqyO0GPZca#Z zFt0}UA4N=Hae8q<3>kmG)43`J(5>T*}kI+Xq06f+ANXP~#GgkF;OXR_QvtSDkBo93RzHFVw&l;~ z1iW{6{K>VIyVDEQkIg4FM?-J1OrvUN;6*SX;1XSSf>>Ny}tpnkk zEr%Eys9Csp1Nv3i00&V7K5#0h4=luQ?Hljk#sLV6KySuYR&kI^YL3{?$Ea0C#SZ}j z6af(hkT(NRR@#hnr{?ZMM|NgMvP;L15s({)ya1^mL<;yEw9Y?I2ZV=K$Az*N=@|6! z{#AOg8}1(m4-lZy2~?HWLJ!iLwf9!~Jk2kNZ}kMf`|APV z+xhkT@uQaXTmAg|$v4LG5`)XoDhF=JZ+tx8mxa9Jje$f zj^|bt&fIDQbcbTEiu@Dvx0e)2FbmRy)zv><7*HPt>Pp0)mKzfi(EDxf5i-E@$JQF) zFIs@JNGHJD(H8)#I7)x5CloZGzhkd(Z+sdA0K^Y?1dzY#FFpgn+=t!JC>aDm!4Gi3 zVcQot3K)Qn-=JUi$_8wRApHHUt)S97^luMCp1fQcOaB(kf+pe6kIdL+qa_C?d=6cq zVxqMkj1q^3_ldlXEt;@99Y-Z&4o$x7k#5f=sfN1J0ei~#(9@z>XFD8S7l}gY^TO-J zIrR2DT{TFfk@7blRLimDC;HvF$CC-0_rH!iFQj9i<;a!1(WHYbGReXW5#QsP%6-)} zRW=@6=KDWrf}~2;U4^qB*$Tl3RhfG@#VVOY#tCmVIc)z$sKH0>p!_gVqTvdjP1=&K z4dTny5ERNsI^{h#SI&zrx|G8n)V$9J}B$CYcfoGb` ze%@mDOX3(ur;ChO2T_Gw=-E;?%=wU?P*(Z9TaPM4wuCOu6Qp%_f)q8UsfDmVp>A$U zWue=_PYNPCfIhW6eeR9P>_Oke<2*WD6RArrq&fF(%#M?W_#zDtgm2zpCZ%f9>r%LTH%4@W8(cl~4nR0itRrnRU%_(2KDYTIB$Y z3Xb!Si|f+rX+r(FAV>B9?<;PqH`UO@zR1pR*%w0yDqJW7y(pucWpS}MF%Z>2kb{8) z50^ZWAR{b6s%fX?2~gGZBhZ%8ArEDj?7M|v?Nj;+yQj?WPysr&p5!zdZ>8zb1JJPn z(>@+OnFDuieRK?OYM0wrU5J*@55zdwtvUR@a-X8wh`6g)51R^jTrI;(DJNaxk*vGN zbWf{WpuvEkkwNF{H(^I`&w|r?K%H1n7x_sPp~~R8C6wZCx{`=4=eOcD_4$_E1&(<2 znanNxzvW1nVhA>TUy6swGM(MiD5OrF=U>8MvYkgv)Q~j;6A|DZ@L||CI6zfJW!;x} z2{MaL{bjrbpYm=WY&>XC8dsAv`-h4fUin-i5$kK=J@r^f-zAAZ(2yiY-H*11mB)li zi%Pk&x!sUD;w$Z>Hfx6QparOVZ!Nw{Spl`na*ZX;Vj#4i4Rl!Fi!J1jXpIh%w7jMllhkFHD2hK}N?O_3h<_bEf&f%_L?GKAB5(uHHVxf?t_ff2Oi-ih z#1qHJ%c?_{U;@d5QM-BW5_9QZ7B|Vb%E#jvG0;ExHBORv=(UZt)?L%TaNth`lPY$Uc8C&?i`cCHj`O`a3`e58<%CN}F`N0wAO* z^PicUrAJ&yQ@Y4Qykf^9y_?5Wi0V@3XB9ICM1g3T~?9Md9 zMJjL}M`OlK#*NEyR8xO@Wlv3KsHP`fhJZ0cK~{-PwIZVi%<=1q2B?5ZHKmG(9k z{OC?L2Ng$I%l9;^5)bqfXiTE>%dxo>n0MxYSJ295_oMI8Fy04-d_G{XhB`zH| zc+sVF=wh5*gM(V1%!%t`EsHIKNd9^DPFx-ZC&^+XG{DZ!pfhKK7$(L&X>oIPxJicN zo!hlmO94+3*3?re3q)Z6@<^x^ueZsIoZUFoZ9Z3aYJ?K=%X%Ml%Wt9AlA_c*q3C-) z6=vKQu z?D0uZe#%-gVFCHoDV$gfyfq`lJD@*=j#@!1+Spax44D zYfzahX>CY>Lskc28=L*|4z(CfK`>1rW_ALGTj>H=Nhi1{0iF?SDH`0pVJ+quHGVGM zltfZ*%Af6ykPwrZzfBGF*W`F88f;rLee{gZdi4(?P+G{t3O;PI3VhTxf2pxDN6AtW z9#jG)fgo>BzVAb_UNRIuruX^s_%0i$FaBSI*#i=K75bAER;U}MwWjgyAre}fX-X^(7jz^ z(BOc-Zfpk+O`ylDu?%$Cw`+G6UH+7+RXPV1JGgY%Yfa@NA8NVQTP*F3|Gc3*WW=_# z(z7~xUsNK`e3^<(!feko4OfrbKa2(}tk=#G;`|xNXRdzhB-{O(o77yrC3hgqc>0rb&HpwNS;8|M(Ah|`#92^WIIch)v zH}W!i=uf*G&d47cwaVM@X>Be1JVz4K6K4#qC;j*bG(|x04D0Te3VC#07Pb&U&@C3n zm19R1Ojv0@sI^8-#w~3(pl@6wqFCc;I7Fb#PnD!exZlthGPHa)|FU-wv3U99MwaJB z+su5|KW6c&K`@fMs77&NOBjy0Y+Wq1y9!^Mv)W!VsAaLH6QykCtiCX7Fyp2)dk+Bb z58e9cs3=K##YKwjyOxnc=Wl0_7b?lvdava7Uw`5rb%nCG=7cZ89K#q_h48hIM)yt& zCg#+MPqg5)8Po?U``#anBx+N1za1{r183QXJJ7d%Bp)%x*M+6(L{eB)%Dnb$W)4uF z?j_>4lrlkulM6Pc_w+ANl}4MSD84dHPhFfVLR=4#`5wG?F8Q>)-pqCTTGVox4P2+s z&vAufs-P~%#vjSU@iu&I9mlchuhl~aTeDE&n}R87D~UCS$(@~Xc>xJpZCZ=M-_$Oq z@bH@tk@JH)EC&y*br+lna8xy=(JR{!1zx!WTxC`n5QdQ0KVnV=CU5k$ZE@$EM8TFl)A*5t;+^lEyKTPZ6s#-4kTQ=fe=UR zg3U^f*LxJ0`wI^d|dQIq|?!g&+T8Rn6%9-FDQdA3-GZB+7M7LUy^_Y7KAkSX|3%S9v#^ zQ&FCs{&Aw_2ksX~rnresrSjLAJI)HwIF&qSPwVCobjFpm25-bW0&dzG<{5$Dv7^%w zvbY0VN~udd9xY22{(95>e1YiPEpsb$y^c$~4Nh(v5Ly6O;YvxO`>|t{eXUpU{CPSv zL&fTO+|-X-?gSB!b8#P1<%0WWSEJbl|L4}dhA7xGPRut@WA#^Y>IQhf>5Rb zFg{B%*AE-Xq1156uO89?GxgHk07$Em##1wxK_TIH$n0Up*pP|NM~zooCf8CKFU;NV z#bdIS^&r+&YGXA&bO$lju27zY)Jfp^m`x|vhcSnQV{(&;D^3x1bT&5v_TJpTM9Q%u~{<)OB^9}`p;&3OCs ziMH(xi{Dghn};$WR_{TPR)GkiU{Fa5<3XPXbq%9#6R1hMW_MZehSGsGU$z+(K?;}Cv zH_9DM<&`@1xmK#-OiC{5{hlJ%y&+jue23jCyJ1Of$gtH??Mey8un3~x93Rhb<%ye$ zlMSzm+cbr+Gr0%zTukh{k56PTc+sGXN$=&B@jSXC^bcni-TOr8$gkuz$mA2x;QI|E zV#?M+FYO(wErw6>0qNxtE6=5krT~WyZN{$gbSB&$nr^Ez`V6zm{RZz;cyolr@h8;1 z{I-~a^u14baaAVx1VrZJf%~lN5}{r<7;uSQiK~Xdg{%i;=I&ol2@(Zn~vKHs_gT6 zcvymt7Ve@Iq$GG_Ig5Hm^vu(T7{Yqdv;>2*77cbG7_9dPCSgHv8m_6qwTh?diy9b% z7CEw6vv8=T)4G~BJJ6Z}ICY2FKl-H^EO9C zW>xeAJmuN#m}$RGF;1&G&|PgYY@8X?=rX01jMo~Da8^no{)w-G z^WkRq+WQ9!5A>m0!8}WMjU(Z|@1l4eKIy&1>i}YDcNAm zXeI`=r8?Opi_QzA+=t;XAA4^8nKB{D(MG{u0a0N!qxmM+L#8m=L{XN(S|p`STf%n^h)7Iz=Y7A0^51 z`Wc3bc|0G`x09OT+g*}&(Ev#WvvVh+xf7upo|lJefJ-N!2Naz_nuE;u##aaAK%T7^ z4U{AEf2ZFG-3_}R2xQ*lAo8cmu~H#JVYAqgmN#ZrG|`Bc6V+PA{@Gc!Dgx{(wb7C! zC%87%OST32+uvPd@i$6 zcyh>pJrg`ZcTNmf_s>%mO%%NXRlmSu`w|dXVp=z_o3s|z-Hm_jRB1*i)HynPsOD&K zU`mdTSv7ae{h(jQ=f4HI=iP?0W&jjN69`UuO#Ko%urd|nM-2WjLC*JX zi>uFKTsVrsVGvS<+V5ecBtd9k$17Ie(L=Ou9plG{m_m$S;oT!TXYAhRl!`!!m+nfp zYG#lfRLetQk1y2{x~u3e*W+5{w4Y@M95?io1vE5nK*orsAnzSEgJ~mIM>~@Xa;(7J zPWiH?){g28x=0cot(1eA)y`UkKofRX#|v?GAh#t}M`rzfqU{uTHe}aFc*e2tB<}1X z3;S;hpHt=&f+iv_3ylRi50}R3{o~sTE|)6GcXG(phgtd3?7&jlWnFD` z7X4bLSXSURu;R(h5zdl5kaA`(os82yPI%JIF?RDw<7D)D?q}}J8f|$jmj~TZB_cja zG#M)_04+^ZPhj%-Xuj&ioUk^pA_CG3H=7Scj&wR7Kg^~iFO>fn6C}Sx%=e6NXn%?n zE-zBe$}LN?Dv`v_dc(RhK+&LYp+_enUvrpuFtdU&biE4idXU-=k&8u-tT()b82Dj0 zsH(eTlm0qZZ0S!Dt^q^^+E;>D#vkWDrC8H4e~{a_`+g;sQVML28B~*P;`mihQQJ z;mw`1Cj-H2dM{fILCZiNd?yJ4XRh?OZiaGWi4T=cx7mV#zcyInCx8?%k5_& z8Wwf0eYDy2RNIp}yk>oPS)$+;Fbzt-)@b1+Yv@h?a~%9MN>F6&3pkM7PkR#N2+V6o z)O(!$7=i!buFYoBx+Q}z5^!OBd5rwg25AOzO0VC0RDg}mnP!jRNa56dD-__09m-s7 z^<)9)e{}nohlchv%2Jyk9qmGcgQDcGGVH6LZ!;B~nIj_|B6_#1mKxV^J=p8whOlT^ zcm@oub`d7!Q2fIFd;bJs6sW{l%#)QM=E=w;fVtDcp1-%Ni+2&}<0AtC(pZ7-Wa3rN zS4C^=fb(2Co=g^5u8F0+yJyswF6O-QNOsG(Ot7|>5PZP_Hk}RI<-Ey?x@HXz*(md$fBLb>d=* zjftP36KNf#{Axpp%(HN8#G7(s(glHuUWI`;YP>7HHauu&uFOa0NdDXPwSw_GLF@I` zgS?4PDW9{u=OumbZ_tso+K{6h=+L_f;2V@>(gd$9VSedm;W^Tj^IL}RrYxuCL(1@s zDQfMS_Dm>%U>P72XG#o#t$Xa>$u!wP-P5$1WKr3>`@}nibk`*+R?L{AuD{%~Zif=^ zHeUR!E|}~pFZR7D4M~qYg?$67_V>xWB6Ev5HanIA(QHHeXxX?p+^Vrt=!-cS3{-TA z)9V}{D3mp$F=gpjMBqUrjM@GAOYB4mUd`M2G{t&YTlDlFFvf zC7j{lZR04Hr^ye-!BLb(w#~?LODLp582$L?9U#ybqFuB8%qlvLG4%#e*GIr3gOfXq^$l8lz6ayP z;0!5|;Yix}6w6DH3*cT*PSgh!zkAv7oscN+5+a%tVuZo9tm)zf~-5j;Gq1ED);< zO;d}o8ft-kX>8({vo>a=eIN6GKXQf9onze2`};o6hE0RYYr=%7!?AEf?BF%Y z6j-viUO=iU*oD)Q4g!_+Q>N2KTB=;SDn3k*>I`clyIpmI&`7$Ht8C$tVcZl&@+}YLUgulA-B!5eVz&1*OzW`<2zk8y8fW_&9~Mh8 zuWLb9n4=L_ja(9612*(t`DYO;Np!L&dRSUE`X#NsAA6|aWSnZmTe#tOYJm$swX6MO zl-|30R*1|}G5f5OcpWoX^;6d46>A5E*3mtZmg}FWWUw4P8oLhkJYQjHeN&`e18avQ z!jiT(JQ8T?HM>-=rk8vhAsKvqPPyk!YlreZC%X~@UyMq3kQJByNRYHoA#HvC2GT`2 z@D?V)Devq1hCUCX(!{h@nKPrq`U(mNJ~=gLON{|Hc&mpyM8n>;Ou76!nl#U1x~ z3RFs+iJAhWwL9Pt{W?vr^)u|mHDX89O7vXf?a|_r{R7V2aO$td9tAI8JNJNqB4_S$-D+6a=~JP#(ov^S8EShYav@ z>|9j4@zDNdLDYi__ul4H@V$)^o=toswVv3cnNH6XI_J4Wo}fcJ%cPF3y-Y^Vp+wfM zv{V>nVgT=f@#dl52}iq@+nz+h%8fs;;Ntd!D+-f$9P{0GQl(q(2TVf>kSyEX*n#`G zqrDKpJPpz;34{?tE3$n+i6^D?0~nVgBKG>_Y=%j9RH@InJM+09z$lB}?HuL77HXiK zEdVTkz=mic!1k>PN%1DRQh^vL{T;pEEBY9lVqc>lDRK5)1R_Agu#ma}zDR%B!}gJl z6_psd*}1LJ1g5jKijq7cRg@4c2#YMiPq$;Og4;UX5c!NO+^(@j`qMtj5prhwD*9XF z{ap0WJ?=5`Ra~Zdp$sR|AGb2Dc%MqFL$+wJ)3Y{O3UY#nbd!kQz`LwC5p)p^rpn>dj*)>)Bmz00Z)Jn#k&)wO}seBqBzmxz=& zoN;SuwDYs|D`ut>-pS{zFUo;?n!DaZP>%9kTs+IUpxa~pnExy+NWW1%t|p0Na<#+q zFYv?ApTqxucg{lp-*)G0jQ?TZV*3vl*MG{l*x6Z_|JS>7S8(Pm8?@@3z%v3!u=&ks zU8L^Uorr$@B#e|1y+x0L1t}NGO0nfZY6fce%psf*|<7u0fiB%IyG> z_UJ>PaSE6>M-l!TYlL=>U+)n5^~Rw7!6Bd=zU{!oI0O)7Bt(~1*XlwH!(6M!hM#lhcLh&sCG~v)7`(X7zUr8|Xz+8g9 zQilUZAZKfWIzIqS`zzvZ^guvuEbRO=1oWv|XliMHKmgmO;pSA80n4}q^!@&-{NURI z_@aRW8~}a)xoLf!<(FThGpj2m*_wxhc?=$~23U~?hv3(d4S_xnI0EoDQ00rz2SaRJcULh|kAM8_v9hXmNA{HmTEMgnvZ?67a=$MLm1`oTD9nQEY?vS=$L0tt2g z_f_u8C=W99&~lTr)thMrAJjS6^~2NuaH&cTklD?azg}k%NiUonx z2aj@sh=KsXp9P2jFQs*p3+c?Ajo-^ZC-+zlre{aihTQj|3O0mh3ETeN|L6+XDF7fI zOf|InncLSZ=Z=ri2W1%nqRwxfN8i<3xSK(+;+JLj8Rzc_5FOBZ2o40`=lkQy*qu#J z#Xi;lX!lWz>V##1K_Nla@SF05Mot3y3gGVaa0kfl5e(`NfPlw`$1ndEeDCv?-!FMj z?!C7PQ~&IC^;1k-B#i|d|0^tV^WnRE==#;X?rR52wXc_{tlthC6G;Ev>qo}_i+^&H zZtnLL^~>?S7xs6S{CD>K_ZDlsos<1*K;sAh3m(ojnA82obPKMD>cN#mdB2qk=#x$b z?_pH5Qm`toCiYk4PyF5{Nqq1+`oxVY-R+;$P22B4{8p{!+oT41skY_W=fiz80DnY?IgjP@>Ye)xwb)w=tR=)jRh%>f_%81`U ze{Vnj7lfnVHo*7afuB_cfWB*IFSP!jOVF>d?Sou6!C$bS0Ei#aAGUG606$RVANU70 zV&}}!;pO@J)oVLY7Y9cGuOC>TUqC_v+_7Gw2LRjEeqfJHRJ60&nqT3cWqc{MAsf-7 zT9S_9Z;u}r4MBWd1gWkFg|=ezy6w)rkwvI+@SCj3Z@C@@+RM@gM^8EY4DDIS`b70f zCZJP<(FBR@-0+g#$p&l*=>=U6yCpTFFdy+H>$bag?~FW_wk>})UjF%a7C)heiXhBa zg3{Qb$l2t^I9-1pPaexgB%|2IN`!n--1Bm;&Mq+(vH7Lcxp|Z6WJkbbu2d1BEQXGG zQb5qb=yud}qNFEvaal`j)&!ln&Ox!!UGawAojFR9y2E$dk-MH3){N5#9<2Keiurv9 zi+oRE6Jd!MU*QLnzvEh#G#ckSX>R+M>uQ6}^((S$4+kTooPgq9t1za%P%hrE0Oq~3 zX_e9yT9g(_P;lP!Vs|3&v5i!8n0J=DJYI+W3K2~e$&4>TPx7S5LO6C{tw=zlw){jX z)V+J)9=+|_-Y@*Rhq0>!7J>6!IO!;a(lwUtC5lpQk_ZP}@87GoXu0a*sDtw$$?_nF^E+$c#GZe8jgh@=_AX9xR%8)$=n*|F2bnQd3cT87Mrf5P=-xYwd~Mqz zO`|-^t%Qjdg`WNtQV=Gp6Z+x)`6Ok{WLLxGD!2YwU-@TbZb#C)3!QsQZ{39ESRU+B z`du96{BZ6~P?iXd3CsEFdP?M;d&D|V$@*R zZ%oP_>He@M3%fO?*5WES61oY1j(e_Mf-Q-#ho;EN!6*R?tS=$oa_SsGB}SU3QQBdo zN`SZRX1609f2@KoyVB2QaZ~XYp-yKGw*lTY9^wrop&MnO*Hvt&Fi%Romk;i3C3o+T zu*d*fJVfW^Bk)0AGB)FuuDP>qlcW=VZqE*xyX1BGb>KUSk6&`coIJ;L-fwmRGc)3k zsRyg^=C!(PTqHx6yLUA_>23n!tD%3L+mMJv%osC>!F{3h%q9=+s=o71N_d3`h|v4i z4~*(u7_5;CXUm58G`I8;pdORD`jK4J*+*{fhhdu;_i0sotV-qNtTkRM!$MK8|g)o_oF$i`OcMEYPIPGzW$da~nop#SF)_2z&;E3d4C`;4c{L|1{zrd(r;O=On( z@pO?!*fGf(@Nv^nt(1Fu%vte_K7$tgkOfSMZB?0oA1Xfo7!tOz*5HfXn+&>ivkx)I zu(L4J#08h!(pra|HYOP5kK#<4Ia}6E19Ii;fOEw<-Q3)6iIWe2hKlCL*}$ z)@)@n={Slo3_W9iO$l>R1^K0~xwdeDhIhM~^6x#$v1Rb?YlxD=DzUvVIPhXp5GB?l z@?1hkX`M_{kO?l`?xfogWS*GIK7gxX;7j{84@|K&tfRhvsxVN@_tfZ}*`$k0vA&X^ zN0qzdyKKLgx&I|CV;$b;kKi_(W}`|`F=oj0t+DiWgQH)1@)a8Gn0q@EeGJmAO?SD? zI_H8QJXgqRFdYP*6giLmqMcgBAz>~yH(Qxhd!5dNPFpaqGsa(a$EsUKwl;uCx0(`Bci zul_zrQDMCZw{dRx<}AhH>&|*PkVx#wsovKit+u*VaB(xq5|NJe z`B}uD7;g>Qby%UQp=h{m?&{uct?S<+M3e=+AlBQfz9-#MQOSw$dwF_0I2Z2Zb{8$x zK-vasCToBf-2t}T+)h0ym_K)X8MT=u%_pMb4 z6Mu3V-Y});WYFtc0>qjaEHcG5&(>UFKrY3p=aZ7_>t z{?f1XtcJ(%PjZ5)l6;==PNlBxE3e6S6Aya8Ox`w&X%oJ#jMnr@q2FCH2QptF0=Y;N z1+^WYmgPn9aAgI#2tUJ)nvRcOXWMGOx}lqanK{Ld-%0JU5k%=3`W_)>UfI(-@UzJ! z&&Q265GaoHP>J+f*O->3rgHHvGyCM56cWb27?v!kazfFY&BQBL@PK=tB_Wb1l;2T= z)=z~6jv=SldTqSJLh2vW5zOC!D$QY%(w^|D#6%r0Etgo#g=^w7tIV8?A$LtpYd8}H zEqk`hkrEHTIAXc#&HOY4`7AImZqL-xcbK`n2*r1$wi%)h2s~cna2-fZ|Tb@ZCeWwH8Mc zwE-=iN}ZItJ$~M-4nA+Odd)jdq|_C^0L7$9@3NLmAL5@edb4O0)_az{_~QjR^vEe- zUY^D7_wQ^pUV^j{xe4t~dQ?p~g)Oscb=QSVxr>|G!-%U)-+CHKbwOI@RKz3V$}kf2Io9WPN}BFpVmX-}%Y%k%8BT8e~_K@$UXD@miDfD7&E;=s+qipZ?zG zP?!|VXLcoZ$gCH$)!i`D6OgD3B?3O!u1dB|tl zo|HiH`VzX^H)|PWRCg{MgNC%R1`#fwwqPD8uO~puN~GMCC7&N#1X@IL<}x)*U*{k{ z)@RQT0>fTj?|pQ^eGuVoGb?J0)Hzq9YAWNi1UgC<0ka>32Pmn6V30dAaq(he zbeV<ej$N3nc(F7(8IrBvr$c1go{zq0K48bTzAC32?jKKE7b!hGH- zm1}8|AnbgIP;CC*D|*q?0QwL()Xf;fdrY^7t2};6a*B6bs_w0+ddRhBLWRy`vndlx zOp^{t%^s9Bxm&9)-x0LU~hRm(NR8^f@4!--aj@oCr$9gpC#tEva7qq% zwKOd###LsUwR|3!pK*X!uY|jNnO-+vktIGgWNMRJf_86eZ71m}#2^1F{_#}XO%5Ew zUn^AchUT=l5Qty&r^a@|*)717XZ7=MXUwu#CPo}(n&mPUA<7UOc`c+zgD@#2TGG9lO=i@_aiC&Qtia%oV zm)3H8)1c2gC_7sRaoQDAN-sXZy5?339OC4C91i{~&Ay9AaqR3JxRk@=qPr0Yzl|c{ z(`dPHO)VloW&LGJOkConkdty7P^OEN9+<)90zfP(IoV_jVYq*bbxE+?BHkv(=3^{< zrfUd*zpbI^COD<6f4xkh~ZTV=9DAG-al6%s4>s-RzSVwmH+G zTvy}hKTBsGUTG8+hhEdKe_ru% z`*j3(h@Gf+j}nUAw4_MpbWF?86}anpBjfsl;o0pwG{)`v1J3y4XM|?j=!^U@h zMWCWa9xPsa41Vy8^U4e#seV&;q!P93u|p*9 zX0;F^M6!;@6uDMIGoLFm>U862igmdzrn{7tQ=S`w@cZDa%Ek=TK-{wNyGD?uMYGZ> zo-6Y2kr>s^zEaT5{ZDad92SI{={N_O8U%&ct&fwCHN&aA|4&bVccK5X?Ay2)QNbz5 zv=_>7FhXFo$wD1gRv(&}*C__)bE(I+7MI+hAn$s*h(iqwBi7)9LC(>IXsz&4YE9U% zi;*76JKB)n(KAxAkUv^vI6aj2VS6r8k<$$Pm@(7f?mUmO-hy%LMY-}hUyAs!AJ>dX z=_Nzmts7c_$*b_I#IF^`X|m)oo6~H1gLcR&9sNhv;-?92)D)oQL}56ZGId6qJJzf1 zR}f8V4Fd)lk95>1p)kXQq}JyAz1uAdO;D?)QZu(uBV zIK;VUJ7Q|byxpAWj`d@`>srMskJ_+mv*j&Og`UTaA5Ua2-~m^~32R$;nG>LzJkWV7 z1923aq-94eet0T$^DK_xeDTta=m~y92qU-|C5+lPLe79fivrF8AGHlxL&Et?kOj^C z7z~4Iq0mxcL`KOB&7(wYiBI;e@6qU#tORR0kDTfI9*3Y9qTBO`wSysWwn%Z_3tV`v z+rSLX)a4wjT_3|fT!+%iw^0ts}y z?+~kn6*b;n1Y$CssSMB49TBiskC|Zuz_))|3MyGWgivb$lu@Grs)w$3t8}o614Y<9 z^@@pvhDMrp&P-g^m%n|cF7RYuz{Ck@yf1qJAt518!iRb=VHZEN*^#P%&8gtcTAOzIs~b(uyqPjRfFu!F(<(_C*dYikzSE6oEIX(a@eY_r=NNbY6EKhlni`cKZO1o*q}JkmXr0UOqznKwYUulJ2J3()SV!jbY~DJ%zv-v=C~b_KF$=e@`!{_iP!W#q90+HA9u40|k%^w4Xj5=r3aB1oLAAT!zd(E~ zFoKv30##esbLTU}qGX;+_?L+W`!g}PJ%)f8slPu)Y_^QWJ-B`Ppi?jdWoommkCmby z=F^&04+wRMI>;^S2i21U%Q(In=<~G$%r)}7aP3{k8`>>nG#T3uhMo6JcblN{tb8wTSm_K?GNxX8g$*s?Ycs#BC-h=ZXqf#|TlL#8b3dL9Ir70?@b zd-*1H81eAT&o1VWPZw6Uwl=GaVTnX2v^^9E%+g=hNT0$M#th4GNe-B8?Z=j0j2#W& zI8F=ty@1&0+B&H2EK}IvlAYSiBKDdhQP-GvkXfn5R?8f#I}#b?Kn;pfEPOLXwAm!p1d)#h zH)<26dWDVeD+vgIuu1>Doup-^J#Ki_?z9+hufP0KRoB(K6FgKwh{*_Z*tpNm4rr|t@-vL7mq1_% z;(hVoymXexUpEcVrm7u!O{diI5`VXW_Klg8c7v;#Dq#o91gkpudv*!P5g(Ca^Q6;5 zg{Y=T(TxmwC-i)r)Q0kri5Y~m^~Nj(x`&FG7@mbr`w4z}RTi#6)7wVSu3?!vgQHVF zkQC1o(D@=t1=W)3x)~}Z@kWxU&pP5>}%-4<|CKJnMqQDjJ^7hKTWJK0yHd z<{kSQ>(A$eC(^t^F`c7mSKH(C*~sNa>vg)=fWN(lUql-iZjEmWtXOd5yN0$Fy}j*` zub`tOHXug(CuLvM^5;EOl?>t1TaO?LMb^J#UGyDYCDavb1c&aXPxf}(vmp{AKJ zH01^qzo~&DuEl(x3YTO$vt$yZ`EBQ|&B{0K*&%3>ktK$kY>Ekqm+})zJSag9dUwUm zT_9Z|Vh>~kmw*!GctS(&k=ukk=2}q;mOPq;uAWz8s2@>(j=bFZ5&j$NVhLx96^3VGC=JZG! z+|1K^u*Vss&=5=Jnl*Mtr79%gP!((0l+mLtgAli-*^JxP_p9ip)3+4i51s@6KRUa$ z(mq5;NA)X=`b}tTawtT@xtGfI=!2XAR5{Z8uf~6wL%Ii&m^ZqbmwK^azjarnO`9Wk zzZCK4$qiEwDhl=w{AeVX0|Sii&MPUd6pWON3hY;w0(;}aLo8D(9cKVHd3)J_h#;-s z&i1GJ6TVNulawyDkDaID8I<1OtB_D4dmJj?xG<>KU8aPm!g~eZR8gZHQIc7Z$5lwbcwzeat_j`D9o^%Ip!QuTT~tkL+qZ`G;5V zH9FpKF_Ca@wTmDA%6*SN;;WVqk8IqI&_%%bINiy?hLot-Tf-|b{k4PE?3HENyj;1h znrdZdwViX#+;KS?Z-w18Z(%|(HbjZn$&_)^J@iak){K6~Xg^dgWr)2(>^IG1jK0S% zmli{NWMtjRWzNy}_Qh9w$VsrK2?&?@@PC+US&FS(^+yg{-uSik*ObV2Hbw1l%LV#$ zE`f1j>yG*r`3{g86V=FC13OMTF#cX@)#cuuN#0|X=m40eli()nKhpacD_nhEISePE z;x-))POdBdNOTIwH|B5Lk3_*S>mD})-rKe|;EVpP}qbyt>&_p)(*3W8V? zP_9R>vd%gTuYaA>Xcj4&)#vl9Px{LU&VGKUdGULB$+dktj#^I1v?FGMGhPNLjG3(_ zM4z5jhl8JO7XNK8KQ6`@#hN8|P+N_Vpkq0Z;0YM@>JfrkBz#H2dir+uz0Kp>vApV zevW^iTvfBSK?LG37Gur?m|=r7%r4`SH3;Oy2)PKrqv&1oJj6L!#5baYD&B#nOfcI+ zdS}QkM3*MwZKoTKUWj!wMD|9ExZM%euRPPo4r1~ z8=P2ab7_~Mc}PM5^3J!h>)>=Zz3ci=lyOe}#(hdX(UUIKZwL=MqxcJt{2C0Ot z+*E*S%J~E~XEt#ABqVP2&x3JzGg|yJDcgbR;j-@gV1%O-w|~k=KV;Upb{VB$V|tE# zfAzsi!+cw<=fyRFR2;=XIM>d9d=$;_UNLz_2^h57@?_O;J2Y5(~w=y@k!KrC3LL%c}C@&pNjm^ zN3pLHqWA=2tE=JX z(j?f@wk(<|g1}Q@87SqyWP<(ZWz<-tLLE1?%s(?ka8X|6AM{30=}prVPrORq5a0M! zoFO>civk3IkSlT*-J3aS;f}>^0xkL|zc$JAoz^aY8$$vDufIT%Sq%AZ-ekBEf>(a$ z8nyFTW6l@Ab_dn5_R5u(=w*W>G?AX4k@fCS!Bb1oL^_DQi$qFrvdV@;#D{7gUp;FdVDM!7z_*3X9znDksDy?^IvILT~X9ISTATB37 zO=DSFg0X!USpe_%$tT7&P^6CVLgUvhLLFYYehAphe?&wCx5&y)H9o38>(xUZd!vFZRO!NeAJo16o$sVQ_0WM^BNdiCgwBmUbRkmB%NP(n{QSt zt3A6kxsIme@B*M;JF5G+P0wSWGicaKX@-(4h8sibeYT~o{-WqAW4$%AXasMG3m*Evgj>|Fs7`9j?L-hML% znj8OyK1)ZsBS*m1!w2Ven`AyvU=ADGLt2a+If&^fJfCMeEG$N`fErM8Cs|5+K4`?g z9U23cB1~?p{ahl=M>L0*K%xJG`Xe)7MgH=&5|LB)BQyn3<4DdbKluWY`)K?BKVTT< z{|5}i%*4w4zts`{rRZ3h+5Qh3^MBJY05&!N%m2b*{y$Q5FW?GUn+p&yhjIas@PVEG z^T9E-{r5}Txw?i5_~}F6U~EC(Xt!iFzP|tTUhHI4q`&@i@9=bf^$3lUPz;tE8=8S4 zvoX7r85NlwgGY)_Of)nEV`6TYV_{|{jf*eWX#lLf5{)>60-%Qm{%DLEf7OMWNilPAVh>( z-}q|R#Khw26Z@De4pa<4_Y04V41AGy^^1can;*b3hm8aMlNo5WLy7}h4NT)Qf?VJ<|Hz07`-^}UBCHmRUwJSaR2~t7^zGG+3@p$_#+Lz9lgody zKgbRc`YM01TL|A01h6nzYGA)x^dFtv#QMVE?ncz^#NcN_Hs&wEN1rLf777vE?!J$HwOmMCRr?Ho$iw94e+K1e+EtBxYRF*)IDnNszCx zDUlfvE-57>EF~_`5fD&2ZYIkYvcU2n!Bc6%IpV!1JfH51%L~xrE3*I}MoHc)As8cN z9xc?~)fvdc{YUG8FER)!YTwYn8n!+db7P(8Tj;kA9P>A9@9XaT0Dj)!8=sF1&se)p9m;6_an$GGJ;jX~Q0Adc%2mndL(%=HdyG7*t zoqXIyeUpFol|uK&@HKm76re~Jk2n0&{ zo$6&E#Vx%5#9aJ6rGHyI{@uO%?SJ_F^ZUy|YL97o$^V?>i~aR`=Fg6?Lg1^_h1eAN z_~|gV-d!7Z{mZrt^xZt~K!gg5nfY6p>;Tcj7#>%>{+YK3s9#`d_#tn#Lus{xOsCT1 z#LfI=lleU#@r1L4)ru>~P8=r0obALUJ^Y>TzRjs${yaW-+G*_lD^U6>>h#mt?W;fc zS@CU4VAwL^Lg9Nw#)8WYban>yB;ZCE?m7f?rb9IDcgY1d-1#;!7+C6nMrT2HQvUqBr#4CI;yjvK|Pl=tr;$2;$rqY4r2#Cvb=B z_&1VsR|D-=@HPeKH_ugi+IP;=srr|s##MTc|7?5VtT!zD9p69ex7f#(?T69u6SP~8 z{`V|>2QwD<3DN8Hr}gtw+OHq(%@*?I70T4N3vws)m@whBu<;Gk7?Mf-2ezlw{9AAA zi(n4N+{pNWappC#?x%41clsl{@1u6}p0M||kDcMSuIH92@Va-ebK{3_Q+)JQ@%ItNgYfZpk6(|CD2{s!%i!P6Od91{ z1997Bj~vu&w2f;5<_FKWZeqGkg9PEsd*)hGziNS$%SwYx@Y@1WVq1?%ZCBl(_sqvY z@~RxEK_cWOJ%0Y1KX$(2*ViWfn^nRpKHvST#7J~Ze8g^xXOnW>)g zIPa$tb1Zck2hve0)YCYG^Mu>;O4`YU;ek^n{`camEgRN?j!Bnj+4#>%3I=m-O%4*- zAJ}?0+gVVd0xkN9le@DqnhPR9-@krMmU}ng|H@b@ux6opsA#RKKP>t~Pq&m-4;jsM z#Zb-yMk#h_ILef6agP|MT^VAZ!>AHJIj7(gWed~gcnF~suGIiRi};T)uOzan4g*lc zTA}ZWH(5)N%yeYEHEO>kbqNTt@%QRE_-PwnF7c9y3-16l8A;mIu~*Gz$Ad+g75qaB z9GH1Q#ngxnQnA2fQRgEgoWD#W7TMh%(%8(E{Z)hMHao==dJPlkTl=-)69WNP7V)}; zFaG)NzE~~lmn)Nr&&_)krQ&H`>-p>x>=5)Dj*a%qpVEpyhg_(GuM~$-ALhvN9GTTx zx02HeS1kkgzMO`rX0LW-Kdz-(B>nzFRb= z_7Oplsc%Za@1JL9#1$9_?oP1Dp>t=&Q{z>jbnXv5kTT5B#H#XdP~A@kpQ>}?A=%Rf zEel|oenR2nSU>+We~3TNGPS^VhWI)R7jp!CEbG=uIa5|O0W*%}{w+HXFg`3q$j_-* zv+g~{JdT&pgUFy9%EqrE?Mrn^VzZpVm!Do1`3$+-nzE84dG1^WO2jC@@FhT92@qO7 zh?Tb5YTAsg!6F-BwqL4zXEIbVES4~bfHO*bqe*rYU3 z7!w8);6J@Vc^in8+0=4lq%+AeGLxv@sIpxhhpVxccvfI;YdMDqd~o5{05%!@vQr~m zQ1PW=cK?PG7#*&gjttA5MAmy@R=Os*We$e^cFs_{MknU>mLb?#4P~lGpKj*WYsm?<{Yv3#5 zY`%(#lV$u_#|r7H*l<9`?TpqLk!+yJ_4-Vfr})%8QLp-nH;=FTuqhzN3?M4DOhgQf z6BmF*t)~8#?cXs4=JX_k#US_!0ou2{76QvFEu9@)<&cO|9$ID|3H5}4f?EBW-OW|G zE_sV6yPx0eiRJY`duNPaD;$h7Et5R}mHxv0YFtlBYu0G1Z`8UMb899m=}NoVjan|Y zyC7PphQ~V&yCuyMmihV)T%kFN6;6MSqW9Bktx~h~It4?G!OpHBplv?+So%(2mVVyp zSdG*~wmF+_a~KV#Yg(EPjIXicdqN-9CeOWa2w(Ttku4(>&Vyq5A9Sqg2o_*U!S!IiL^Odw1UV;~F?<{aG{Cv9&TZ}hF zF~c!HQe^kk45y*S<$7dA@a1fxv%sBd5!#9$Z8xdT@i*`uYuho`_HINgm|NjYOVUpg_U0=IXI;`GMqKdfIq*L&NPv>Qhrxl7&LB5r^vbaXa zz-gc&y^v+*jM*l9n33Ibd2d&s=)GE=y{Gh;&zDn>zWRpU5tj(l^t+5EoZpc9`y!qT zN(mRo!_x%@IY4tdq6yZ3IO}PJ6m03j{@Ryv5qRLr8Dk z|2M)zb`&iXQ3wG((j+9J>eAI6UMfLYR5lWX+^Lo!n_ZE0<}}Ob92iWJ#!I|-^UV~5 zyk%2}U&V@4gAn3JlAX2*^@^L1ycHx^4S}xZ)DDH`CJB54gbkEVL)1U$WPe`%frQPI z#-Y^_%M1-*aAIv3kyHi%5#`k zJ4N;Ya3{>9D3zQ-Vtp=wKZldAJv7pw=)x(`OPjRPFZaZjE$)9*AX{y0`}I8TcV7Qr z91z}b&0RaDRtT294~-HDEYyvSk@eGha4>_}P79;!JJivPfcU6glV@Xn9N3Fct~h#+ zm2E}2iv~jND?*)Hq&_47d)wi{%lB#Y&$#d>?-ineWZLYIT}O^6qn;vGi5VYcq(+6X zrwNOwHXs_Ze5JH(vPY)T-F(c=^FvU!Fv6XWWLjxugFQn{o*^Qx|?zMbI`qSn5 zob`#dccvbzr|qPBWAhJ0Iq`V8t8A{U`?5#fpo2a~7i?6({g0|cz_o{E91 zsYMHcHp><3M*}8w$|f*{a}=AXzCdzywmq9AkPz}`-a(XhmFbiVGjRhs7P|RsG`z{0 zj;d3E8WnHD@a=J?7j(b838$ac(!Z4RxKDycDie#nHf!C%2d|ZgVGEhz*zyN+$|L{c znzTH<*r%9FJLkCE+99WjbD~OJO?ewWJg{VNH~d1U=7erH*Gm_X3H zwk(RreiQP8cE|c`0P}hLDe=zu-Ec^{6y0V0?pcTIpw%F9Jk|SNF4-?jD}#&OU?wCH zsOLz`0%emu2e*}Z+oI9((xv`i;p=J#5qIwkiGQT9b>Lq^~?Hd;9Q`Rbd!0g6iD#3vax6k`Re^_ z@DvULY`$7Y;FPK>72Mnomm0Jof}R{Fmi4fM>C$s#ZNP3qsmg>Oxy>n~ z5&Db$i~PDp%~J{J-4;#%e3U||K0?HP91EAJB3k}Zlx|LC26v(pBYmYP%J@cAgpmY$ z>qk5E6C%+mn@_c;K{?>FR7Ed-leEqmw=+zhmPWH9ZB*jUcG9OX>B@IAiOtdQ+L@p{>=hv6_72DI_L) zPb={8E~5*YsS|-Ht`X`F+qrxa|H-2-&)G1&X>)ewv7CY|*-Qarby3n~D(Qy>-B~k&UWTddOlz%3UO4TpwrAmn4V|81zaiLyBca-85R|Fr z>)P35R-5Nj$h{j`x}{JDE?AXkCQcM!&PBH$E%LS|T)-}vR@`Yurals`4Q>!xtSg^d zvXW6%9}RXA>waySdcxD5FvQD-+cRP)%!akhwOqsNP!P4%9C z#E7&)V(MI9Cj6_J4ofI$MnP7vXDTNVz= z(E(Fg)M}yapF8J&P>$gv42%TV78Ag#nQ`~GzY-bZq<1uwARfYj-JPuHW{*pqy@~`L zHS+lDZtX6k2xHvJr!l`jhEX6C8H)TdnPKLAhgNKza;fRwLs9~&nj0YWx-WJ){((-t zE*pNZ2-HP!(i*52Puop7Q4Y`JK1xoa32Etgf3j!zP!vzLS{Vk+KQn^EW<{^0&Y8I2 zV;Z0wqz?TsWYUj5gh1M~7s4kgRR>Hd=Z6Vf%|TyP6>e`z=BM~>!mvd%P; zBU9BA;k2c_$U_LUod)G?PHyC9Bxu-Ha|jazrP4dbO|41TktqDJo=x(2#Z}~| z_xPpl0DIpAljW{OsbDMqImRq;LMB z2_-r+wSXGr5f^yRMly6xz&V|}6+ zjU~))R_~Xl=gZ|rG)BNQSyBmntblkcvOyq~ zDh`Eb?4h(&qVCMCr6d>;M(7x9l$~eHOpWhz4wE{-J7xgH2w-u1zO))iVv<^iwq!%1 zIdTgaz(=Qfj#aUBM?BtHbRYk~xxe7w^;B7H`{jy?_NtoLU3pUzSMg%vu>QHgVX*Sp zsIMVueCaa%0(o-YkVjELi#JQ2x7O&~LPa8dueRd$P8ITMG7)V^@yw_={KtRMWx z_jpHaCL~$)_U`UbU{}Vukr>JzC#JrAB?DzM$d+0p4t)GI*jwT=K!j}#wwDM29=Vr81kwj&t*EbW$e1&$1jcIIDwpk7oAvH&{DNi0?JzAjWo1adIq@9Kv(= zZ6`rN*L=aE6ur$wIM{2X{?Qq^mFJ0n;h`N@ zHbTgC#E*RMa!A)RjDqobV8c8KD~=3D6nIyud%H}b7>R}+)c7uZGF0S zHrm%_;NN~+wW^b~1sT?C0%dZe%Rreb)+;U-e4eCI_fUS@cG3r(MU(arVP5I4zPs{8 z3;svenY8YjZ$WVr;QLK6Z=b*hsTb*smJp4z#%8>gVGMdrlIe6O|&k>)VIXYTh&8pk+L-I));OvzD=@(g=l19qtJzUH%RB9gf8%@B zu7@P5v6+rgllmLjjjTzaG1(@%Fy+{zXSW|ct^qkN3nHaZv`1d=DW@(k~} ziE-MR0Q(fRX|h=wXvH8+lJK z-Zl~cHSDa2C&cLbquPtGqRH=y2g7w|^`7~+xvSDi=T)gF#X}*5C>yfGk&!(XKX6t&`%4KI#z+JfJ zn5K9o0Be){xbK@ni!riC6<)4;PEya@!4v*}oUE~bii?ra1mn=M*>DmM2U1&I$I0Ji~d3(fS>+J zu%v>CJ&H>W1U+tktDt_x-L`e?*G>&YX7lWRua!q4shPlW_K zLG#}9Z$!iP_|IRsqpNBi{rYjz4=Fc9OAk$P`rL+GPFuGY{<=}Ezg)Ds3OX7h0|+@% z{xsJZ-{>|jH&5&dUy43U-;@o&q@j_tA&R%x{=~jUEou!VA@>f`d|}QgaP*{9+^8c> z^69u{tVa=s=Z&rZvV8^=C2ViY6;iR$H3P~YZ!{IDHG$I$IttEDZk7rr85?B+G-UEpH_dQWfRqEUT600U9@Fdh7e-l4 z;-qsK5muY@DYflsRtIQb9O+9wDYV^L{1-RomFR#_hfz3X%dOea>Cw=0v4`FCWzLuF z_X(dE6|eQ}g{g_s1!^+yM|X~!;J-r0NX_C3Hy+LdMNokw0VU335~y8)otb5HAL><~ zYw4n>_JOA+>0B(IqyQuDdc=^+4wGnE7Uzm)md{SwWiP+Qb zQbvVE3l!+7VPN}&Ct;Y1J%oB-(Lkhsg_gOyi^Kh^Ie=IU8gHlL+#Zl1XS?_o`NuG^kSK@Qs$hnD> zr-`0VqgU0%wg#in*us&OOe45wlU_B zf4&Z7l@#K(NU>OVv_jVdI<@duME4JNWR6^DHH7mXiK4g8>71%WVcGjrj0u%IG8=KP zkcs2lf{fJb@pJWietVTpHKgQU$r4vDxmvt}d(oEVv?xkUEb%a-+7PUT`mFZQ`Sxb* z0nP2a6iJYSoum;{9Zqtip<*d(u8@fRE)&v{`k7@IX(n|B00tHSrksPTxN#aMlm zP*q_jyjB87ytwa0pXZW!`|08#B55AC3dAzYnq}C^+Nx_yK&qienX%#P9wo75eGJ8x zx3_j`D`R&&Q$jKAAia&y2I) zRR~X{IxCHR?zr7-4YOy9bap~0*JG&~Lc{+F;8pwM`H3PGTeksiln9(JlgD$()wB*Y ze3m;ys3tngkqj+2JkYLBXtik%?H^=qRTX|(v5gKD^MTzIIxc(Z<_!JJ1sX6RSVHNR z{(EL_EL|ia;JJ)JQ(xAES3yCP=3{~K)9n~FC*Ks@j!#Au2RnwU$E(4>k+D_^6jN1T z8E|UwqlUQrV!eDW(sjOTF`%v3r7YFxXCm{{bIw3gJXNvXrt|*EHn4|p6;FZwZ2*=K z__p*%Aih^QUf1hmbwIMRzdyQ0cD3AknB{KaaRhM>k=-QnEWm5xh5M0tV@Q_F-vm3% z?1fEm;A?BUnk{XzTgzY#2OZrNmyW{DIOsyRDxY}k!N7yf%dK=7oq` z?iU1K$WZj#62eQ;A=wv(S41IX3R4#kht%lt`Q;5!{*YflvgDmLYi|7r7AyPtC|&u3 zg7LL)2&E*bPQIRPAixu*@$|wyzvTVc**}dUcB^-_)47@}DX%6ND<2|h8M#_NW!+;9 z8tjX5Yt{Y`jN6{Hj7l0kJp#mvCOta-ljfBA%%FeqX;hw%?YHE<;Ea*}@-t4UMq{X? zm3-3DMwJ$x@>lW^vc4#+#dF(-L)ls(;nmPVmp@@X5-32&g8g|kP&Ww40eHB~-uPh$ zR1H}>Nf!Z;r{k8F$91jLH72;CO4%Pnp3YnmgS!_?>Cd)dA2N`cJv_ciN}MrE*;7Ou zOq|US=s5$H#Vq~L`OiE5YGaqxjBJ|iLc2@BTels*PT+-`RKVY_K=gH~ z4Z){GU`%bblr;Pbcs`IVo0iNo<)z*{b zIq|n8`_cTntEeYu@*I(Pe+rShFP)sHeeell7iRx9gu$C71ZE@Ea2^H9V&GuhG0^4J zFP13ocfZ{%?G?Y}NjzwBZ+)1{iGIUcWtT51&~r1_Ioy%X;erOgg%(P#R!pkZeBS4# zX(N{~H@!#t-oW?;9(krTbrpJXsa_I+d!5mTo%L29u{ zLOpEFnR}B}XR$hw`U__mkGn7rLi}kn7ZeLs zJQSl=#7uEvG`4EAx_z(8ix~HUH*Jux8REFV1QGso0`VHH@$2e zl-zs-o1Qan6?d55Oa3bMrCi%>?;0yWiy#I7ZKqz8NrZqyoOc!0Mr){k5>c9&`lnPv z4HSO0U0X~Kt0>rG3T(hf8gm--NZ+cu(s>I3wqa{5#L4Zi3UMytDv52{(|x)a8${Ff z-?k@gfP;qxbGT?)Wu;j;RQ%T5<*F4N>`21MC;GfX9oKba^#K?yK_n=%iO-#5+v~d& zw=$||m=XGe1L^^|jb3}a2VF$r?ApKS8n&2^Pij@~=SzsrT+y5XMt)_Tf+16iR}o;L z4l)BY=pVH6z%OeAe29WYiHDrcUEttm*3G@gUNS$UPXLD;4pK?3f6WvoCVSRC&$|{@ zf!t&yh32Rajup{&USCpXPt@W~m(a`vhCR&rCEZpzxxw!p6rc34RO6bjM};dNdQVk2 zsoY6={8rGT*rO;n$8!F;3z8@K?ds!f=me)@^@JU}hCgdVw-D!nbmd_Ur2FN=?KbK7 z@i_x^&KncuUGF2Mv9jeyF^;YY=8F$u<5QsmB8WR^TP~yQ(=Jsl{?+WHX%EKoV#3u` z$LJ6xWg8z@p4`;K^nh;no|}wrD{qb;PiG$IsJ@9al15ZYGf@#G^q>7YI<~hxhpwRT zRQ|b%NIfonA%v}80qLteBvvX#)Z43{H=MOy@#lezLS2a=RH_)YWhUA?K)?arx273t z@yx483iiDy^sRLt9`%+Xm*bJ=?0~fEX*O=E^VZzc_a)j%foQnw^*-P)A#4IA)J!MFLj(bZkPc4)u;B zNLc{@i!8*2XlBH!Hq6}}liqCaxezyzMmoQIrL+GwbDyZExmp|CsMuWzd|>=r)`pO0R%>H05`uCSKl5L1KgFalC|$(P<*aJ0)R#a`BYOaE{V-$9Ly)$XOC5) z!mrPB4Ub{csri1e<|!v$Bse`O z;EH-{nd|c!1GriZc4912VIICDM*Tuh$7!fmFT*byoyeX(rm6OH43ddN;Y+SK-RiBWchp1hRGO zlV{6#56b%o*Vz{{71Fpd|gL(-dI#ug-7aoxjPuX)tj zVc%;n{6K%rsotByy!aDF_VCgkbTYn|mMF_!5m69sFN5f{OcAx;q%y+WFYRKK<5~YT zam1!HkL)WOkn9N7+(N@7`8=Y`$0HBUt1&1N!3{?fs_)*fy#PqNR5L2V?0{4{9rx0f zv8_av$M?i%6$RR_ZXbn+C_LB~X)rqYGZpG}F~yClTBlIl^RN*di(}YEvYgH)E0sz_ z?$r#&MkkZS=MzeyC49b>?A7hTiW|80UmKBwXB^|5M%1Bc4z+H>2+Cb1Chv+Er1Qe` zAHK%R0=#Cm^dQ-yKsN}D3*CdC{Jzw%+jJ@g$Nf*qxZ-MvH5d88xm0;8;ZdAO#h{-L;rP-tZ>$@?U>_*)roHxfTY z(oxDCT6*oU%k7sv*Uz@&0a#!D>4rd<*Zzn%>I8CPyHsg?$O_YD=pW}Jh5hK}<*LNJ zdh%o2@*-M<1a)FRHYIGWJSvp`rmS}v{!QMjUg$MVc}rQZm_uzE9>yD_V-PdebP8gh zXb~mNc<+4ci&0EaMxDTnX5TLZn13ZPe~}mpb7sNO=G^ zI(Yb;;!wud(Qf?}xq5EjhBF!qVry^)7XpUQxEkxKuE6--`Xglu6pH>+m`FxjiP|vJ z- zVPC8zC*6(Mqw}L`L|PFoKRCq>F;_snq0N8!>zTf(8Qrm~2HWfM*ZfeQ*3#y_W0O>) zTStG+e_Ap`29J!yKJ_#LvrUdWdR56$2ff5KCL)94L$Bp?G&b#Y{U_b#J|bsYPL-i# zpixcDWaz1D$&IophBCEo9vrXbo3-17#p^V706k4Uu>O;m^5CXw52Eb4Ik%|t@39k< z2XPthY|~(1nYgfH*#2hP?p2gz__Zo6R;WJPmeQDT$<^+k3S{XOl@!F+74$1{j*IFh^+W^W=NdHygrVsnc7zWD3megT9W*G+jK)ps`o)^w`2o-M;gwbji%`j= zIH4ijuS<2fWf45gaH*c?}4%|F}-u8Kcx#Wm&XQy@m2{pbNusL>GucD(=(!u zc7lymtxh7G|6ow$*$RB|#cue$eAG2S-cWu!q>1W1C?6TSc&5eu4{`VwL$S zU>qRd9nBuXar=DwM2SF1^6!!@5FqU0Zcj3$v`c7{>cHTo9gNn-#|DxOi~-Ht{C%1t z4I^(hp1K#F*t+j5kp5qEZHEgYLmP8CH;`@NHg7vilQ?vg(s-bCY13~G+hX%f`^l^& zJau?00*WCM%@kagh~90+Ae*fWsp>PG3c^4I*9;`Hylkxo5@!x&ytbpPdD)f%rI1z- z=~fqyXQz})EY0@` zp%{Y%N+9=Of+SuCKi&IncJNAlu>OF9xsSWtS$d!=lSZ-^6rDzIg21cLv?0RifRU@L zOQs6WPx|QkLi`TMB#`Bq@>QQlj&IA-r1*28y3%WldHvYLU}Mqg9+W%@_j{Py#~YRQ zajIHo@nCBM*r>!GOsUUE=Xbn?z$QKL_zK0Q)Y8r%;4G;`%|hv^SO>#i^fAM%V&9=% z_OuzUT(ri&Cei3srdUF*;X|;^q z1krLit)9;UR-~*&yjTIglGl)d-!cr!;Ponb94%BPUOcV!Z z!2B!$48<`-unvOSruW;S2LPv_k}Y<X{Q=3z|}YspXw9tx)t3 zAJ#NqxOX@n9T_QL>>Q8)A?NtC)I*UGHsU$YjCU132~}tl*ZFB{&OlKai0lo2eXHwu zIu1K@4h~p^Yt~L)8S9~+IwhT;1T1wess#TM8D*}g`lNU$yV7Vtpq~{uq@xZ>=kO4x znVP$fBFh7Z2ukYMY->J7P7tuIRxY)DK&J>lLUl>vtXs6Euce~8DQU9xr>I}3$@icV zqMI&Dybz0@Ywb+s+>AwT*F?Wpv$I?!5$Bs{mbx~(WSDEwhfzjsKeGn2kIgr&(t-dn zU4r6ZKCuQCTqL&po49eOV_g%vuEx}MR90Gr~&Eb8=Vy3=64 zI3JSF*ZSZ-+gv~Lexm+?%lYfj5vE>-*hq5%`C40)cM~okqT$@Mzg~SX*08UcgJ!SV z&_>y$8)@&8-8KHwm$k=hpeKgoXF`T}g>pXT=(1XBB?>d-)5QL3H7i-p97H!5;dzY4 zhJy9_89Y&7?IqRl;VEAf##^s+B^74^Z`sz7$7+s?csF~BL>^P)`cMT_kAEAY&tK=C zHfA%;E^`66<^@Nrf1c-^5Lr0>t%2z^&X|}aVA0nEaV*Lm<`3?E5<=ye30d%FS$4bl z2PV8UT4$#-z8+EDMCt%3nad<+85eSxZ9@SmwpO&!e)CoR8!|MXS@@bJXCcyS286JmOIwY06`~p#A~N%y~N$6`NOad6;Ix z<)uywO$i+wlXaQF;GCHUkt}Q1Z9+@6H9-lJ(dO$TWn_WWG&$zyUBl9Q5GTT_ zwxI9|IbDRp>~i$CR#O_*n><2c!z0;TT?2wn5A!~54zpg`%95db=*^0XSe}$2QLubOg#1LewZvZMunr` zOXlHd5F>?NZE2u+h0#b}JjE-E`fmwj8RwHI+cP!r=u2NWE<4$K>Zt-Fxs z;}BBK{25qB>C-N|*o*G7SUFslf5nB9%3+}v93c$Q2;-BOfIeJkoh)rJhQ_|UwfidL zqKI0H&zD$Q<)De@)FejTPcuJ%8Wq%zpa(Y#C!`9RkH+s9BrLRn<`_ zWw3Z$D$YY=Id})(6XwYaqJ^(w9*)+cH-;xWm8AmHnG8OVl6N1R0ON`rl3-ZsOo!&D zXjVYriRr}9n=>gXWgF`$09(_>u(1Mcr%+-FAj3Qq!o|lN;D2ZYHqJCo?nEP;bD-Z~ z*`L4c0o6umz)X>$Pk-lAF?2pknh&I9i7-*EU$CC_FBh{WZ`a3Q&6X?fX^tn zmN5WwNZi#?Mg^AJhtRDnRpSe~Y$ce2j4iu16Yg%Xi7rVcH2pn0N*?OI6uiShC2eb! z>Udp=Q>oz?$zD>F4v;=OF|FYX$ma_kY8oJs2!$PeSj1dnxYDnYJ5_jX_6>XW&A$rUzX?7X&Q`1BOn>gxM>B4)T&`nNRJf!y0a4j@*JZflTklV7| zcJn_NJEs`U!bZ)OZQHh8UAA4-W!tuG8(p?-+vu{*uWU@8OipGl{^Xp?eY2B&@g{Gw z*Rxh&mP-55ObJ_c_Zy-%RH-YMRP1$ju0p%MJ{i$A@y=eV-LEx43qOhvcKKq3MI>S5 z?yu}dXGtB%GQo1M<#T5sd?w9@`$n;Q<4?-{2gU>143v+FiNC#uRw*dnU*d$=-A%r5AfK3OF7 zQux^tUa9X$!Pi`5lza1E80tPQcl_CP0ejPwsGnPrqrGZ{XYFd30k{xk-#D1vUonYa z6VZL(y^Bgi>q=}vgKEHQvaN+LezopVO!x}czf+-r(js0^IHx8ldSeH)tA&NjV_W|z%ym19-)VnIT$TMR!?G~&&ZC% z?9@m0v9PD+VR6P>0kkV@{}=G0p?NT+tu7p-_n`u{|%zK*f^R0`|1DRLvyn;GySjY0L|d4Dcd+4vNVM*>2TXj zBJRO1Wo*+xBJj*?l9Iv2Y?AKooRZ-YY$WvL)Qdi6A9)XX&foqu0PUu~tRJ1$0PEe= zp7Zi$wso?8Cg%vcY2aB1BS;vK;*`|mHy}YGOkSk%NO_4e@X$@6Z;qm+bHNpCCb^08}WnR}G?REhHk_69{n7lyzVj zCddd_o?Nu)g(Yk&Q+WTae4yJ%RqQcS?LBLqx~0-x=IMDKmS2kv_)o>8G!N zvY240z;533$+1Ff_8laI5D?tm-Q8HdG2JqnQmIgB`|1j zfvW@h01!Y%kY8O}yX$?hLipcg*;<_@!3t=qh7n!@nn0%U`ujj< zu5=9I-^}dH6nR-6#sUN=IYGyz%R#3JpugqjMh3Ajpxh1J12?`($M3qP59z140!j>? zpOGNjVc!7{QX;v;81bt&6L%Wsw)h4&2s{3uGUcFa_ngr6Rixpqpvx2JB#d9~<3-S4 zgxTm|2z$Wy4~}=vAVl&&5#gciV}SeLd2tB>^7Hl&q@eot5*eiZ_laQt5&_VjpCT`A z%+A3;d;cl}`hXvoPflUTKp=y1a6AKqh9I7TAC<2KxQ0(y0ghel3t%RoUsi}V0$;}o^C~K;$_8)IcYVG|Nlb_VoVZZ^qZ6QD0fIh6K)Z*7FMU;? z**|^lKMj?cLO;K7Z}Wrh5z?4|pk6V<$EH3o5!?Dq2VP&snSj5&VbRx$(ujdZ{fq6P zj(~sj`3e5af! zJG|~H>W=FdRl@;`=wAP<(jszt;~_#rR!KxQ1-7pTZuzhOVQ5rDgwlkEX{-O1k^H$B zIV5mXgp9zM27mt;@tg($`5AlRa>ZzkuZx;89{YC6?BI(d`c!cCHy%Gs_A`B3D+l|P_?f>&+4|V|NkRh^X_Jb9`2ZV6uFQ^9;KGR1` z3Ds|CZf1}0JqHF16#gY}>n=7XaNBqGyPbzuU}ual_1Dfw-)(+66v$nGW&z{$AYDk1 zjaBdlV-@-M)_N;Nh*G>V!xPs!#eM9;9w^Hah4o!lCU$gGNh9nRk(O~b1G-e%6r%p= zY&Yj6)g^xi;~JG|pSDug*zB)=XNAn;homfT#7ca8K`zOMTJwl6pg?t| zB=H)mEzeK_fL`dAt%HdLc3FNQC5zwyy~AY&2Ur{_DVr$6%vZCtIXC75N7cBk0Q~Ca zCwg8nUh)8&im>CA`g_35uPA+sm58tBF56YUKYISzUyGY?k*NhFH2`} zmDIUd)j#&JEsAg?7;WKT=l9BpxyJvLqT>p1th(Kamr>pBM^7$cMS|1j z$L>|=EE}LE?q3x75j&c-ie@*g`kSAFb?s3@;|Tu78E9r}*A>VDAhu=mf+O#E0>Y9k@m`{#Q@?Idko}dN zTaUzI23f5K)ae$d8@gtfR3ZB}Trk*xIN<$pMED`hEzx(S)cdK#e}AriVO0T%Czup> zLhx62bB`nH=vigFw;HZ!cdn1X24tj1->s0mtVie5q~X(alwHTX1$q{NSZt2B$)0rT zEi<&$&4E>hy&9eSR|wdW5tT@E?&i6W_H`dKN$rW=_Y$A}eq`jv)3t;_956ILV~q{J z3y(AI=D+e{GC}#JH57YGeaw?8j(nuI+EH=iT&C+ZsXFDS zXs-w#6p(z5Lf&}xH>&8?ib!(>(Tni(3c^Y2h722Jr_z;8_ucKZ>2}WIV@09ggQTGsd9zcM`wx>VGy=ZnKNl@K z!2zAPC~N0_;R=%lA>LGn!L`V)wVohUBtyc-9Kzpm!Ex7PkoRgKxJuKVX)f;z(WB3Q z;ezi%a7{`IWsbbQ?)L2}8R*ELtKSv@bra0n#2kbX};&*UlMY;XP= zod4>2=m?zD!8hYugfnhkb^0sbpy6&f>ANTOZsjhswmB2F>PkUS5nI^-pU#CEyz!Cl}ac!~du9sZ9I| zT(W4)Z#6x)+Pk>`&qi0e5kWq6=X7f3IRp_1@YMF6GwW>}*O;}fmL|rS;CDRBWQ3uH z{*d7e3G3MUzL=MQiu`g=Kq`aGg^Ysb&ms02RH*_m9BjplXw15@;HuZWy=8dk@wiw^ zazBf+1sgvYWbPyE zQS362;Z&3a3JX%JFf@AVp|b5WcL&?SO+F;boCpzBHa43xc`mdeWA?We z)#pCvx?t^l3>LX-W(NHx6T7Y zA(xE7CseW>2E*&xk?JxVF%RoVqu*ewe8R|ta+E{J{UF|*NXK5J0N7*U`qD}F99G#tf;{c=! zY^w(=Jmrn_y0LR2f@U%LKF-VxyC@NlG`zOv$Skrke>HxtYb=#lbSB9hPbAJnL<MpwT1x9U^qzR22e8}l)0Ys$NSHX zovbWaW1lhXVO{;UE9`T?V1$rm{`{@}nq9k%c`i{ir1vY`zAI)Ku7t&0xQYzPi$37Q zI7-a^=wF3=V6#1`NL-C$pCwb*5#nG}OVVcn+k;o0?_Okvf8k^LUqwhfj?k&a<0OVsk8MlT3Q z4s1MdoB5e78+8ga*5w%Oc!as;PYa9Nw9UDD!=7|JW*v@Z&i_VfeTjP@s6zn!%Wl^1 znErY~HTLL4wOl!7SFq5~&O!3>>>kVN5#vyjSklS(OR_*kuAP`wZ2v>N_WBm+fyS%1 z-#ukxeU(U+NuG-Q_L&1B|NLl9eWl^PQafDJrN0baRK{Oov_b2tivh*S^j^7oK4cAg z)H3d1GWLazz)M_YuDQFr(6l0K;o3-Pdmdqx|HOz&K+&VS1b@;eb^@(F`S@% zodT6}Dl^Ag88#cb?;!NJfWa?5#?dy&#V17<8@KQx%7~mB(|XCE|amQJ<%c;hu=u_oEB->IH;x74pDUYY9@rzl{E0e-S%MCvE! z6HQoFk;PmhozO)|ps|3yJ}=&S9x1X9&y#{rh3lQQ{E|jJsHx7}zpD}WR)yD3W zi)=gvBg9G55p8St?Y=g# zqPqadnlm8MjNr{Uxbhj$LA?tZJEe{=_r5iG`t!w z!DBF`_nWt3+sjO20kL@6$$r66N64%WcA>t6sI+?8ndBl>^MqR1suoCH~$W^pTk##WV@^FnVPSh)uA5z-9niJU_IH8OTcH*7aJL!)*Sj<1$xt7Tnc5v&)OxCe>u$J=e%Y7(0`C8LqW z_v!t9O0YdZpVSrCYqj-8rir@}0an7_3LZi4PA*rO%pXg+Y}GHZkDE>OHRMoi3X^#5 zbGWp_uKWZ40%{$jr< zqb|leLvfYe(cvw8LqcyUWm!g(b>#{=SsiB{MRSvM68Rlvvbul7JQO#LY{yKMqFds`+JK3CAX=CK! z1XU4%@!QWh?1Pxi)3dK^q;C>Dzs~E|7v5)XerQ2`8JHW7aE3D19P(*XunO(d#g(;N z^iqa1qfe_BWnmsI^5OzU_qT(%dfuh?&$GeC3Fo#gBgn8Eg>gd7^ipp&JrajIL0P2+Lt)r_t`hG5yrg2PD{SAxm% zijPMBCFy3l%a`jdoxc?IYGVENX>{_v9u%O{m&|c`j=sqNLbYBv--qu*xBbeOFKSh# zeV8;R=cs*xQ~8j3(dl_wS<)*$PK`|aO^&g_!S}458mF*IyHEEvewx zyDu->Y=-Em{B_ZA!_`}VoQ_{^^|AKHA~4UWc@dfNDWF^s-PVI5bZ^ozbUti1SvA12 zn!XbbG0eslk%YUUI98V@F5abotHq(h+PCb0WF2_|xMiH4L_h$R7jr>ZyXuY);p5lK zG9$?nT+;Q6vfiEjuh2RH;@FH1I{kwSvFB3!(Iq=yXO$e7?f1c1IaUHEHm(IpMqw+$ z4;$sG=T#Kii&@8I%EY^!gGM%;3R&U9Emz!_>}&hROZ67YnVol14LA4%Elz|NwXr}X zK>ir&M`^NcukAVYmJWPMBY%spP7EtmHHJB~$^9~_sgaf^iP_k}6!FtuJSodj?>S(3 z@E?MdVR>psH)-#Ufyw0Hbu4nKUqA9(_Cc6e;fqrw9;)&JHD^jr)9gWU<72|9E-%!_ zqFc$6j~xFt3Bo>CN9t-7-Q4^XLaRoQ(RH?Ba10bIcYh2eAEsHTKZ?>PNpClZQ%tHD z^?Bj^wKy@`LqvNd6mh$p8k=h8Lj#*S9~bi*e-y3%xBr5inx5V*DI&4)IreNcZ%6al zLK>^X*`J{6gJz1S}BU6h~hwye(2padVU5VFujS9^=JKji3*bM7kxN-H7_>6U1CEIG|#$k=dAD@LV!y3#Uo;HQb$qwIaO z_uawW@wyWD*>eoZ2*lQwyquqWoqlJW4e*LA*8xV<^9V+TF6wWDU|in&(BYyBU^P_z zleQ+3q``_APi(b9LZ1Pi$9dRBc4Zzjo;;RMKK17s{J!x6rCg7UW3hVhE=9k&<@l3` z(<3ttyhOD)7ZF}Rw~=*3nYj$zipbQl>N*bpiqDvXzUs zKh4fuip>#P@tlXTCqYu&0jegb!ToDZy2@#S%i~gQ-e7o8CX|t^p(~zMV1-P&34(FD zHMcl@sH0?(EY+yG*(%`7(3Iez%8`i7ut2pBn3~B84Qn2?q50~bke=M`ls6ejkj#sS zTBPq?@xh9rtJP#_{&4DWjCXCBXSd`jsBwZBFBY>3eiit<>KRQFkV&IA)c8X8M-edwrxk5T!5tdWd&pbmj6Nw9dWEtd0|YhC@#~SGv01D?E@teCmEv0< zx#)u{lS-u=$l^xnDhp|HnF>(+nQ)f?L>V23%3_)ZhqabHyYhGfRW8?y{kcD@1SMVh z{y3a}x7`7K{0wU893=7pZ_?SHusi;zugi1ZYgv%IJT@__VM_wbnh#bN3#TaB3Rbm< zT)L{628Yyqf`-nL@PBiy!o0nQCb-Tyx9`XyTlu0H*q3|+Jg0ip`H5$q?5=6~&;i}A z(PT*ZW(BGG-`O=%a1ezm%C9KC^R>uO*+u7%&V7upb?8^#y<93i_>ZdtB3WGUZ21hJ z_zXn2(61-kA%%Ib{D}MAL^;&aD@pxTEU+U3GP2a?6`dC2(W~}Do$RUZ zt6kH6I(m1A35bfDrN5=9XNC*>^ZNcU4-^FR^-(VHq~d)qVeYOok3%gbNoV|?mtWDJoVV+>?JN>L{M`Oc#U{VK<5Ha=Um-}@u1O{gNF74D!|5NC41iAxpIWwg<)z+}DfnR*^4C-2o6nyb( z+983kOD}arJ>%8!dG^If+qJ_IoiFzp^y#veinRvW*>9Ns_yh)AUjjy=Pw1-E7X#xS zgygLD#4NpZrlq~#rgA?D{Tq8%xlwrSDe;U>DT@kQdE7cCTZ#32Ev4u!D=gE(vCcEa zr3`zv;;kyHytp>g1lT@Tzm-3y)CQ9@HqnHS(WRtZ9l`umotxPMhP()Ri`{POCerK7 zzEZR5%;WhCp}a2|lIBY>qdc-@cVu4fN&cws!ce-da0O%NfhVsiZ^?S>_Mcq`$-H>f z&0=#gi#qIq_%Qe+ekedn!s=&Fo}q(MA>X>vpt-!N%S>%IG)rLBLD<2Qb7Wxz-w8QZS3wbU zehAZc9;;Mb`zwJ3xWn|oW#|_hUC>V63x(*AF zQijKTh|8ryPr8;pasaZHUj?}4Ii`6QmgnltUZJNhApK~jZ)fv4;l_r4cOj8R) zvaRh;Xeps0rN5dvLmZv=(WiEJ{i>0iBI0j%c8^T0vPyJ5YGI{%2laux<|pRf+grpK>7(sG%(H)s(Od!f#6W{P z8?80bCW`!lwL4r|yj$1So3jvORd@I_;SCr*|P{1 z!lP7(s|ct+_xord2Im_pR0(*rjx${S_5(Dt%r^C|P#-rZSn8g+o04B}aV@M10%I zJ9;G1rKMQbiW8XF?N-bmrk$?n^Z=MtnX}ciucjf8Fd-#kjiqwT1Ab&xHh&p)-UD^& zTF)oILcN`%{JURR#n0hVdakGh3VehZ4;Db*N}q4Aa9=)~2hm{cCl9OvnXhWxix^gW zvL`I3%hGU63V&`Wos80mv0gNk+0)%s@(vQ&T8eOf${nW5Sc}(ZJ!^qWs1oOCIjw z78BQ+`GwT@d%%>ZDhdE`ff`e9I4xkpt{yhShT{(ewJTu$hx!j(F29FYA2tD)&nRhJ zrf%a25?geH$@wqbW7#3QCeqH~ju#GL@50{o%?jd!VWhY7)wTRkH)`%I-t2ZRfsQq> zwXjXCC3lm?Y0vYc6)(CG8k9$MY_U~pk0w$ps~tS z&(|nqhAAq#>)Y&DBirB{%&Tg=%p*qNRqEpJEs_{AI2IwwCH6Aa_tI@g{yb6R|2)J| z`6(i)q(a!6&6ljU)^s8ig=m7OL3o;wnwOe9NZzNIpOeR`mIiE2$sYu`<{a?L%(vLe z?1PPHq?LcQuZNL=ercp8BiM3%DJol<`yb)BV-AXnMW$%N#8*4{H_X4Pza%U($DAED zq~XOp9lJ7n#t7TUUBk+4e6#wE$q#YsucT8_@>r=Wv^dau-)I(LdQq;G*HcPccYf%) z;X>o8WVGeLEwP!y_sRE6{mCau{I%9eB{VKZJpsyFpwJL*BfA566Sx6m_Lt$;l*Y0@ zIyA8hh$1HzSx9pJemo22%A7F$v=*!0<5Bj4M=2JD@+zv&{>d+&wH+u~?!wruM9Ma~ z)!73dU~Pf)F2V}}yT=*eZW@h(SX4aQBOW~On=hUqYy));CHPXlGyS2`8`ff)s|S;Iifp-w8j$Uity_Ss4}X^9GEn22 zQs?W7kubsoM9oH{WL+irINJr#BQ-7|&rW#heKy|R*v5$b2}|YeGGITU=N$-d1@{Ap05%o*8O$N+!eQq!^+BVvxOOM$(wtmB79RU9ngQYixK zUrJQWQnN^FXx@!R*}EYI3crd!Y{&r5WLs1Ob-S*1haAifTJ8)B9k=PrC{|o;!@k27 zMDL@$fSkEW(3wpV!Jf6h@$BRLF_wAtXRh-Q$TcXCT(m%%!1$4ajxVkrX>ko)+eU;w zoF!(d=OB6?TC(vd3U0iDGKRldGp`yeWw%il!XQq+`0%a~?Ru1XUk&DQdMpb|y_hhC?!knS@Y1%~S(A1*Ocp*d zdxif-Zn{84?UbN^i@^=dM7E|lJJqv@806KRfgOly4PMLC^dR|c|2V?TUBHt4^WRu2{MDXePH+HhL1CT9#&5k{>g0LS;5I@hb<qzWgS%9B>wdLJfvli?L<@zsy>*bIk7xFgre zqJaB>RC3X3bDD1o!}N$;n{rPXK#RyYjL|x?Yn>)(LG3LM>eW+;MuX$bF88=%bvt_Q z@HVfx5c)60Zcs(-lsF$ewB0>E$Lz0CfY~QKcB3XhpJWV4l-t{5h zGlH5FSSar*2E za!w&&?^0iS_qZWsG`wYn04!-C1h(1^eM0^+52OcMz82C}E0@Mt0YazJkr5`_z@;s5 zof>P;U$J?1E_B{!4f8v?cr5$9c1x5%V$?r%rSiW9BN!zPmn=X%yw%pUd%zfA%5dK1jTC$eR6%s ztC2h_C8g=1EL;xr+9?eIJ#}39ep*`RqS;ftpt<4j=Gi@_(k&}bixpjr(56m;<>`-s z;O-LFpi$m$Vj})l=hbn?wXfb1;O8#sgt95nKagu*;dcK4&f@qFa25+27w3QDSS&;= zOzi);R>8u;#PomhtpB-I5%U7BsI&PO9Olq2aPRMbRVsMF(nq(qb%P*+AYpH62k?V< z2XYGru3cw4UXHc^KUF(bwS_Mk&b7?G*I_}TTCqaI(*s~s=LUeu{=xBSSR@riQ$q+Q z)+S~a)+Q1~#d7VA4UivF@q&3Uo-R%9wTJI1!D(!ZE10B6+^(Q2G`uU|M!QxZ3=SYz zN=8^rMpo8<)XdD&zA%K@ZXicvcUsl}5K_Uv0|aM5g2X3#XU8_ChUYL&54#dTE!6M< zq2b}t-*tyT$h-wjsTB?Uf&*(SC>D_S7FIT}iq1?eAe~-ch~VsfB~?|VjHt=s;gCfQ zxv)jSO`$~a{QX0dD<}mpF2Foppff;ys1Q^*o4`M_G01@+1$LIl_iZ{e16wOYYuHdg z&^9%)fzJ#DsCLh0g62Tu|60@H1O-P>%|Dx(-}azh{r*;<28JeY;Wq+h{-|2(Kdx(Q z%gY0^3u`-5lRF>=cBZgkR8o_yonD=bpfT4oez-WBom>I4xLvr~>p0l~aNki|SVYu? zAWRVCulbKUvbi;Qy1H1p)i(ZR1iE48=wMIpfgLW ziN|cc?J4Bf^rYi9tdzj~_@ZFae!LX0`MF6vsCVI8M`kxj5S@Re4!>VN>Yt3l28O^H zTA7_7vH=+C&rtqrLPiB|yI9LU=Cld{46GKZ*&n-f zKlVjG{-8-NO)XzCa$o7+KN7Y!=BAfVgrM_OT^&Jxg$IC}@t%KHRbgLWXlQ|Es<*Db z>(obAu$l=-o-tZ~QWhq~7AH|HN^Op}kytHY(3mSu?R{&EG{XkZ0KMR2v zv&#@Jf7%1Qp3-5UHp+fnsSWKf44%F3G`YEeM`ve8^`Hv=1|vcChl4l&F%z8Y6UY5G z9|G*b@PGi7oI*0WJBYt-E6F1OC6C)L>_(vWlf6Sa0%r_=6WIYJzY0V|WA~H2{Yz71 z6n_(`&xd>q*#J>w3P2qx+#rIyCI18qexrR8B?ycNL_qczKO!DLcq)Dc3lc1U6S)B; zzY~HTEWbnE1ti=Ng54Y3Lk2oAIdX8c00-?LoYeXs+5C+Ee%$htiLPdeSmfiFvCgWPLeq#kJa{hn{ zg`AwI``_AM6OIo&<97ZKAHCMNJ2-!15{?fa=p%_*yiw{Yj?wkt(FZ>NGy+Y0A17ch zi(mQ+DW=+fgA0+ry5ANwu781Q2@R}aXY>423DOnt#-1Y{0X3`#yY1_HDT?8J4?`X} zzY?-XEZ)WAh@L=s-L3y9sH|&!lYZ@{H3p+Ng6T>l_;(H?_1mzAn(?wkz2+8t=nLF^^aITN67L!v9^J#HZ-)YSukP)d>~(2!d&xQoG$UPk%eEFq4Bn!Q~U>Rzr6$0Djz<1Oz; z8PC6nDK&B7j8_#`8qSl%ztU%zhAoFLz|rTFjxtT=4ig63S&#-73iUk-XScUz3s3$0 zNz>$tMQ(XxjcM=S>L=ds40pVHgmYI)aMST#KoN+K;NM@_O5)E9E9ssK6{Hq6_L9tL z{gE9RSbR5dKyeO^Q1%T5Z1Ubl}B9~v?52S^e)8c@|$NGT)0K`h8aj#+S)YPQED)+Ouzb7`aMdu zDi-qkl?sF#y>W(^9M`be=x|W}&N*AVwfk!8F<1q79u>SfD4ioy^V~biq?VB99(YC`Ve+QuPeAJxubNQc&O6529D)M-qg(zJesl`ps9*Y!GJnRd?NQw zy~`Yk`L?qM4r(fK&IDGw=1RXT8%SzdToO_@K5Gz9!0}3Cu`}x5iQcU&10E*2IS1Y` z%qiR?M@!wueQju;qSimOfqd{9&J$WAQ^=xu1F=_eL6#d9>P{Bu1=#pRanzx#SWh7W zam$7d6$?%3b8V&p;FJm_z@a6oS^`JWXFGlLWT%?;-sZ3ZC}V9s_UuO`lv8p+vD7vk<+T zI@IJ`uw++y(4wn{{UE8mYB3_%#hi=mi(i@g33+1*y>W&#_+p7V&9_9UN0N~*?W)ma zFExI!PKx{ad>OJLqB$J*_6KtmMurSep)&r5_5zK(_7;Aclkdz%o_B{bXX3|7L^{UC za&o2fP_>Ek>>3x)7=`<X1H?AdZ?a!R!EzP*K zJQlX${(>5odYoci+RIY&FS{Ox2n@Y1xk#!X@E00r* zwG-DEDI5RensSlTuhB#!LeUZ@9^?q99`y>P!sGJ656#pJbM~c5F4L5W3ETWQ0@~kL zNe!H?U5@f60jQ+RFCrce?M{c$Db;8g)D$Z_$nK4Y?dOkn2;VU1)n8mY0$WLoeWKHj zi)PJBeqsb?Vhs6yhi@EOjl|3PMCn1M(!s3HUv@HpL6X&$B;|6K)5Di1%s(eFNfkRV8{`iNclfHN<0b7pJysd30C22y1v4VLA;#Z$sEBZk=%!08 zdp14DC9g!EH`KU%4&ms2l&$AI&9AaV;3%tyosz~<;3+(%au$bA#6;72jiV|H>bc0- zZ}&}gF&AqOYGyvXt4)C5_&ROZMYt{NstwSQtfW-f^kh={0fNaCz*cg$9xE{EzW{t^ z5_cUmJnFFOO6FZATSz8aWlkK2hGl%qE(NFj%Nlqh+fueO=aD!)=rc=h<;S&#D(;m_)#KtaT*1|;xZ#MEs)j>6%gaaFlb0D0IbuTM zb;?Gnlk3fefV~q2eIgj{YgojlarAlAH8hIUEuWO+ZjCus&-a|Q670u?w2i-om6YBi zCZ#NPxy8rzHosi`WbH4H2QqX+!Mn`12o-0Wu8XynmHG6*B%PS+-!cJ4&&*2%BfJfe z2(68K1qtqfg1^RaHGU4eqFKy`&k8>qk=(`ZYO%nn83>%PJ*zY(O%lmrKk2=-xjif;BNfCbEK9;gv<0Its%xaDb-1I_BC&%#t)51GM7 zKy~dI*s|)k>Qk(Cw-nqSJw{MlB40Zsi((HQHhWNdF&drveA;cR|Ky-sG*gX#xBH5JOS?%J3Vfy<&KlC3jXOXhUq)?0Qlt9CBtyi>*Tt3f`PkH9;m z@Jnb=2-hpP`h3LUnF3v4ecD)~Vyi}pkDq=V?isDX^&TxUG1iaEkDZn}o}9UIl?o@< z@yY5}XMQWeIdWKe8Gc=Cii2l#lc-{Gmb-eQq6Yk=Qo6tZp_gYX=tMOe(3q#B^# zrfGErA07UTWvv|PI<>WS;tVt^%u+hXbFQ}~TIi|>It&wMusCSm4D zBHhlu`&&-w5z=TWtWZmWV{JRPxJMO(%6}G83~nm-XM2S{SZE?fnKQCnGvmv4l@qNa z?k}X%!GGgB(`FT^5TA?g!l(YK3}qLdD;96olto!OIAemK6jQmBYsO>h%(pkL;MnF2Ysu>L|MYx#v}h8d}xT^gc(DQF)VPdOlJc zjqJ3p)l=;vpkn5B0M%K%)xylm+?{ z*C@csO4Hj(@*($!j3H7|4)BqmOP8x_(OMk|H=e5Jnw6d6Lbr%CgURJAQ`xm>;9d3= zTHBrEY&u^lx^wz;l-Es(R*vy5D3}peG!QnOE@F0z#9Mr-sCXN@Xyl z%lJOn*^`3hHmYQgiODuwnmtVQ6Krn_*8W%VPfL=$oq~jREA5J;h~2LJr`#ypz8^Y% z#x=GA`TTE~Q|9+TaOS*;ZLH}E0B^{;x+?)2!Nt-x*p!{v&Mh4-lU`2bXQ{{Ef+3{H zsHAas1012o%P_C=E?)A@b1}B@?Un~vX8VPo>oGZozX7dYP7KM)dAoPGXkM_D^bkq* zRsLq6FZ-m2lKxhxxo<}aZ5ol@Py?*&`9ZY#k0k zQ0%tegn2`jSD^4<<4sXC!P>oR`K3JY4H?;ef2QhnA|59pwILTmi=s?C4wh})QBJ~# zPAn!;42qv($Nse~;ElJ(4IaEr6`lpp>lrh8$LfF2!TVGxi^2jHT$m)+Mg1M=^aTLB z_EMLLvuT%uTTR9Cr9W)PiruHw&S)mbNw%229d<5;L4476jRoIo7m*dh@dq^J+ATBu ziFC&-^C-hI)zY&?b8mSp_Y1RCQKOZ}oEyGJ+Ik^!kR>)uo>DLV9 z7HvsGlJ;nX1#w-IXzFhLkZ^OX43|hcFpTTQX}o2H*xx)Y`mh(FA~(ujJC5i|A!Z2@ ztqQSHs6iZ`fu;sntyXuxDi#DFt(o!|XCunZF3clyEQaG@ zqK^$bbl-mlXl32pmR^OA7Sfh-{nBLDp&<+Yi@T$%LACzddjIex@A!!rWqDe;$S2u0 zql!iziBKk7*=D)oH>1=1tJ5oL&h2%&+jfwS?eU0aQp*Q09;$~Wlze3LmprSOkZ>2X z$_|~8mCH65_#~b0w`e~y6#HLj(N?E*)zH+1d4<;H^rqM&2T|L8U2J4YP9U5IzYcfZvB?o}Y!7!tFWP`3C88EuK9 zqU&}89%Bga+7gm0)@PVnS$b8CQ{OhJs^1OQr$w?g6o&$u6XhKs6!RGh0BJ9C9Ck!@ z*kizWMi6wBluGG+pO8CQt`P9{^*}pwP$PLT^rIV__0$?fZ7i*rWOp}@n+K!$`OaxZ z5}JpeU>QWVkA{bC#D)>ei}9x93?KFfPqPS(TGo5*eZZv7fGLh!oTj$S;RxWZwV^|- ztdDsXh}pp|anQ>sLbUUePh9F9-p=HJv|(RF1#Z_+bo)q6@SxQu+M7UHH{lxx8|00} z2K$00HKRlkNrK0}9A_5R@MEqaD^F(sNIJ*n749%9Ofc-_oCLUZVPB#Z;rAXq%(BKE z)2zYFh)I=rw8}HjufJ8|HodJ#Hz8``wyG?x`g$N7)>6+nc@K7GsooL_zRLH8WR(K> zu(CWjgg8VH^jdjT>w-9@ocX3V|1O>+YEE6AHHp0XCAIqgP9jV%tgaVDhi7;9l%86v z`E!25eSlwL3k7vx0H-hECi@Xu-=`88>T3Vg&Pk+lG#62-iuI=pHt=9_&3(7_^*N#u z+z!`u0Dcl`8ugb@Jc-_6*@rSG7V>?{OC0d*NrpuZrdUDYyPFJZHjsSk^F*m)__p_Y zu{T|qR_i>ka8<6D<$g;D(F1^(k%PW6hBA|r@kTXJ#4YAb71fHj=ivHqs#R}pm`i6u z^G%Y66V~V{71T{47D8@C*$yvEIkWyHc8X3vUr{&H=oCqx^Ki6{5s72H$V!XmniM{N z82k&*@a%bJiq3;>0-8_;qW>v$4xJz-4bJR$$&uX~*Q6l~f0+EnA2-vS+*vW{wP~>+ zPM0064mGgnTEMfet9*X`Y{4a05k7L%?$-V2SWO1W~pxhZMD_JMJJW#I7Y(aX+kd z7|&D95KgQ}Z@X;iX7j?_YEpSN--5OeuE?RceUL<#Jf7~U%n0Pp#6$2mAQV)KZnqZ4 z7Bnqz`&DF5-0o!%$)i^hy=*uG);cj99y=IE()4 z=a-*ZYnj-cP(>^HyW)i}{(~hNzj*y|zz1amY49i8vBLoBQAbW9QGfZLkiu0e_8*(C z%?O!H#q1BwkAlD*XEa-slfmf8Gxf;ANKdS7D~Z+kc|1tv*Y;y0_*VqLxplO0H1y$i zJhUBrl78vZ+XquKVb5RfNt#k30!v<^FnYV)wa{j*CvVg>nPOIodsfUk7C5icoTESl zSFL<@KfMpqZFF*SIY;QcMK}z-GMP=vBf;iW)hLHVg13n9s(aRk&R!}wxGyh<^$(m^ zlOfWgFWQOj0yb+Lr;qE8DOCK?dkx#jr32Mw!_hmu*cno8(D4hcxPnB^ zoG;Z(geNmi6GgPhI`6;6?Q5W-C1OFL_n(Q^nP zECanN+j%=r{9uws*_P51jPpzQ1|0_Kl06u9I7}@mJnWA`v0FjfqX-4+e0)AcmA44! zjIQY7MqQwV<|ZcuG<49gzn5;UwQ#O!Tts1d=}+Rj-|T@o{jH*=WS}cp+=u0Reu&^NIYN4-2+~n%$d^Rq#__A z$I}sZuf3~bkXA1d#narf4}DayJu=^AQ)>n}Yz9xI{@y2MTxV(8=Cn<39Bt33MPX>H zi)vB~Ebx`IHPH#1&kzdrwCBCE*D&D+v2C7q@W;Lfwyq;4;34Q*bV7QnB>7l8Mx~Ie zVyTxb{OeoM@Uv(vI-~ZLMs$TPd-}sLO63_7QuPuKR$9JPv##E2%r|S>S<&}{xi)6E zNk?&NkJhm;tW=52{~yNAAv_l*TC=fj+jeqd+qP|-*tTukwr$(SiT#E5pY+o`xPuONWfhO;k*f%EV(_k<#21E;M8s@|dc|fHCUe3W*#7uLd{T_Zgld zt9LMF60YCWaP_cFM4L9H_!SFi)Y}|6lTq(`=Mg&%mT5d^pH8N0anEAY6mb&bBwY4) ztzxQwi@{}ALm;GHaLZP4?#nY-H5IL1@T^NHYgSN=RMo?yz?p?=T+l)Y%2Bj_;VnxV zW;7ti{nt*2_lHy4qnvab^#ulgeM0c0T@joMJ+^yw#T^4jG49g6N8*1DNUOzu=Vj0N_6hZL%8ko$bU1i&~pn6&7L$#~i}GNT%m)m0Ht3Z% z)Z?@~L1p4SnIWPR>ec{BQV51C7>b}$r+I)I-E#`hrXKWIp5Nzj8Tl}Bd zgwX)h#QNF`tq11>DsbNfTL<(E$f0(2&Z_;7AqHcyevjxMx)cO~i3|I#Aw2cbE7gI~ z^4v0}GnBp@t#R;4y7R_MALr+EY#~>Bd`Y9Ao(7pojhLAFs}2=Qg5-Op!TMg;z`R+F z*!;s&G8{s`@~A|TQAgX<;f|;cT;1f{X}>3WXhxtY27R+P5Eb&(t@3pB*O>O57XEiL zaaQgBQUR_{vW~w}d>$h8DiKKJxY%AI`aWU*3BJFJzcgIiimzPe);h{MpO`xjiCi4y zvz{KKefN@`X(S5>H1?{0UF)LS@l^s|(s>*X?aV)hHt&btHgRm1O6s;5a0E^hwW-Ly zmdkMr=>I&hzUz#)I4u$$>e}8zSg!cENPuxG#B1)ttZ6%USH0{^bv!aNKR4)mt4lUf za(i$-zCF~(fF%7TVSC!LU~0;eWs!$))^(^1{ouV@Eog}-C4f7jt5BdN^q99YJQ z8Mzt;E_X@MjTpDnZzEo3lWnY$H&b_`N|f)+?0{IOKoP!p7#)baRd98V8#ouYFbf5k zdcG6Dz_mMPNxb$<$!R?f4ux#u?ZiPw@QuiJjA#j^mC_sxH;!$q=?_%8{he%NrEZ2f zZSt6qfHcb@@)hF0tJm2x(Moe>+cyv!{A)?gB9S7I4iJ4?#DP54xI|w_dlVL*X(ti^Fb}KXX($z;UlU%$Z{NS3o>jbtK4Y#q!T2= zU9-z^oRa~60<@*VWyUF7?=pU8d%)39m(=DA_nc&|K5GhR?4nnl6otQuOi`8xVb3eW zn%;T@O}~AJqjCw)^=gkKg;Pk3Eub}pDeMoZ3_3@MPQGcv~X-=Q4sjB%Jw#7k*VWbDbG#CeAMeb1!6!t@bTUYZ{_sY<~?C zkJ^!NHp%WS%JK@%?u+7j2-P>O65saE+J9q2fdgimMLB;lm7oZqK2JBBIdY}Fko{dLwf?e+6r0?ED za32ia$eKGf8iJZ%=Fyrsv<`7kKH#K?{8vr)JlF%P`bG7XsaYz#*mIDF@*Rwu)U`h(L;+DcvMpnYf(O(--r^nN`x)GX51{cPh&DxndAuO$6}=|` zg5D_D0@TBCOen>}z6yKtiK1mm{Fv8I;jcFC1Ycui{nhbmWe07fvXrwd1z=(((1@uS zV-4#5lDdq5;_$BF3Qkv#gS`!il3E!=*7e$DE!~myx|F^Q9=u0(`ngq+uJK-}?PSu` zR!-=wrD}b;$Di+lf)Flki@pOUw2gYnebi%g7qKYFGBYPBv2Ia;l74j_38<^h3yr1- zkF>cR!JqwHI+b$PEbLRDpW4i%$8$4Wlqu^cncz&1*ehYu96$CqbdV1F=eGn23B~+M z?vpb&Yfub7OqS0<_=ifzYmgZXX3cR4;ev}LJe#Wow`o|1L$=}7Fdx6T3aEM|5lkQk7=C|uXmm~Hl<+d^$s0_@bS%B zTT-X5O*K^YCqb{E6WFT|Vi$Hv*^3$9OYH=^fL(pYkB>wAWc;s;AWl!i(Fw)bIi5{h zr~xGCG@;|4EgYF^kt_K_PCeg{inRV+Lw(89ZdPJ+G>*!*%(lK2QA)TGCGEPU_VL_n z?;P697<2(eHk$Ln9edjK9EDfff3FMjVr^KR`_1a!E#$qQ)`MjAN04T5_QV(kiK~1a*xkKOO`KZApgkx=JR336D(; zJ~z?je|-K|tsY2Hy=6$9PlsJ-lJ0`Ny@GL}XCft_lomwN`~@L5zo#YV9;9jqOmsTm z!T@~3rM!lTly^+<;bGiYx={F(;C^w8-BT~SXcCtvwJwOjOpqI#xkK)kUaP7xhR{3V zojoTPMO|m4*g zlK``XZ3}$Rs~N0A@6k(aW?V*Pd2g04m`(kHWBdQ@MRq4xy?7Hk!!U;WlJWgkUm2@- z3GroY(%CUIJhg5N(*jf6c$@09-Xpa z4`O!Qga#J7R<+)3Te(?VgKb1NdtwA{KJCP-eG)2_6x>yBj|wI716TuieJa1?9MWK6 zsjW(?3SC_G<(Gq7nk69ud*={gFXHJ`mo2(4B~ia@3Y)oClYUQHl9=@fNkWl}d9(=? zJY*NQoM6VrVomt>Xpm3jH8-G3H7xGzzmDBADR>*&RIW_rKV}!8dBxb@#dAOuyQ43o zdNA|sdCvRwCwxmt*|M3L$aSVa^5O7Tgr=`L{+4a8a{&IEoUlWSVc&b6FTDJL=K82G z7jmaro~F|146b97ay+&7(L=|W8MxwFugj4fhiUV(j4Rs?Rj$AWt*OLUkbatf!N=WfNs6$Xw-&ctX$AuP+3pIC3QZjh`(J0Jkc9xS7 zD95(okV_q^nE~V>{)`GLKS9Z+ z)Da4ubTsVKA#@Ujk(czh_D(Fpos!S{cHB-iqLT~?@JSj=(L1`U#MSuFDhO`vjp1$+X`ej~uyoN(mrT{i;lXWR8h^AEoCK?C3Iy?nsN zF_x_e#6vP;>d!WSAAPl59Nm`_7?LmX`P3@)FCxh&T_}=r3b8R&&(VK$P#t9JBH9vk ze%N%vN|2f(mn32kqS|^Md*Q4^uVYMk07OqXb;Li_S1A2dDyq6wTsV>#*Xb80NnmR2 z8i2*rt{;SQ-Veo9GzAfC2Y7_kuxi8k2sh)327@zf1*RD_YL*>#U|-#Y*-HM{nM*|2 zk)Q*vgTi?aHtOrL?4pl;JjGCg`z233l91{-0aUn>hnF6~tfQ)#RJ>On*sBbVz+ep= z`uENY)S~O=;98Ora)e$<+~~Zp42AZDF|nR>WEJSj-ss*yU{mkoXlbapUsf!}#Shm2F_{dB#BJC#Wx%BJp5T^4@}>UYXP zqGNtmZUs6R3%Y~*dGv=K(-s+ME2>Nb&ll&TyJk4r6_+n~QnOeF&z~98x7$ikz%2|Gaw&e(VKs@H{b`5}B&@^Ld`)c~&lCJ5t{Kg05X@R5 zFR#!;bS?8Vh>-nR24NCL&R@ErntxMJj(bNlY9khO9%1Ulk;275HCW6(jL@cH7v_zT z3+}(0`GX5rkNR6@lAv zFp1+FeC9Qumk}d`Lcu&t!dLzAJd#FrXc&8sXHTxDw%O|S0dI4R`v4jYQ=xR`kY0l3 zMh8ts)fP%Hjgi;#IjP(*IISVuOgD1*De_(%CF#9<9B~VzGJebs)LW8$y$`4inu_~= zq2X?E1sjRz9X$jl1EpfoT=0x#lx^;H4m}e(8q&LiQbB#s-JI1`FSHNvg6iJ1)KNRv_8_H!SCwCO$NjWkXbQ((WmJ{H52C!k%z4- z$0K7c!HnRBC*NhF&5A^0tB5zjzuRt~nGo#pxX=`?AN6CH&apoINP#XKv~nY&=9coC z?t|AnBbrFUI{dP3@ygoG84y^pID<+gz568vM}O9u7xF8bjIo0YYp;_}?b(>AEcHtM z^6Gjph^PtV`q_FKIJMU#^K}I($0ch%fb`6Uj<)A&N*|UoiiYU08Rl~;Ll1kNPY3QL z&Uc%PYV#0j4iJAU3WP@E*(MbZOTGX;e$mSGPZJJvd=@WgaRhqjrONWq?pml;T@UzJu#-tU0+tF z#*S5fUUNR1C?#Q~_vL<8>d*0N``16+Di_!52LhJrk|bHoFReQxn>WLxOvI)69f@J= z%3=q1cUfIMt7KVR!n-U))b>l3P=P)#SqjRg97L@=1NXc!6eoI~U1Sx+o4>B!+sqgy zkHpCKqJLj~3bd>2)Mo8d)b+V_$?L+4&2y;*j&L*y*wQ1!8s1Zqop|0uqZ@=oxoGFS z+vS7X(sTdD^)XdiM#-bxI{8xWQ%P{a_r~h->_K!;~s1LKAT_J4OX5K;@*KhE*)|ri*)W#&hrb~|e0_nZd{b#?Z3PEL48H)B z4Q!g(!{p^hpRr))Q3x^?p`BZT!})b0Ay{24EqS*of3eMk+}e__$p@g~oG_WY?D%Er zB&3#69X77N|Kk1PTWc?(Jvs3RQOB>ppgj^Gww;FNGW~jP#Cw0Vo(`wOjABfFe=M*?gPZX_go@eK zje^DVe2Y2+bx;=Ojb@!hW^Zc|Op3qPz7D=$pX`{_nEFAk7%uEG( zbeJyxaaW&2Oda*daMUKqRey;c*IY=y&czBp;iBwCUzu%S~mdL z-oJ=f*ORuH!tG1OyEvrMNt#a1H~P#V91`bk*QlgV0!vbE@&v}J7MY`u80bL!gNLc* z(Vw1=RTWp^Wd~zy3ODO+uIlA%aZyGX8^XDz5XsM4`~$N^ zaGbR{=l;%yW0gNhqUzm`ml?yi>1gnRkSsRHY$Ag+AYhKJaw5j&`CBY1Dcz)5ii?av ztxD%D?`)Wr!`V1m^36R<@-BOO@z;5MbWNXIOfdhjED?(n_%xXflT`{ zQqs@vwuL6l`+^9Iqp@+MLbztu2M7&0q&y64U&|r_ktYCv4U+JO_BrF)r?7XztoZ@4 zQt1;UkIEFP>q=y+4!o7^cWaeVi9gXCooA++qBnB)b9`B9BminNVsLqjVH&=w_(^!D z?Rc=a!P-4b$s@?dU}H%Nxw}!u!?J{ZK0Y>AC%F1<_`?@Z-F6_BxM32{)D^!6cS@MD zb%h8m_6$0+M5X0NE~}B&#Gm7z%lUm+gZ4sY#AnGwjSv5HJv}&_-67dXz1>B&(XV=n zP^x+HYj&pjnaafkqM~TT@YgonaxYs z(^@4flb{C1-VzKmxJ3qb0RIokyW1JD9y2)-?`{>(&78rusVpuDCxE`>Hzh^RzKLH+3Mq6)3>po<%Nl|w9M*T2LB2_%t6 zHt#Ieeng!nAP0JsP{L}F(JA&x6u!VWX%;_LB?~Oq%i-QgGwe`@&(S8BYu#b|$iSF5 zNEJaix?Sa=;5?P(q8&iUS$-T;eYiR5i)7(B%78__<%u4*QpBt0a;s{1p3e&hx{3U; zh@(lro9jYm{oe}3TK>aBMCL2pd?|s z$P+jjfiVdka}?*Rs+C8_Rw)*p7WNo8xN-2eTKj*`WRm;!4pW<_N1%Miq)bpIq^a{z z_bv5|v1frEIosqBok1TCi!7r9E{v5*d4S*ht{fC;d8C=4YY_>Zrh_KD#)@#}R6swf z>}KS!VX@r^9m>_sd)G{=H-p z_Bx|?iAH)udW1|MgsAT*2%M>H4=YmBco+_*Z(-&f`aaETCVwiVU!R&{Y!TfEPIeuH zCZ~P9wb9dn18SI{w!WY~1uuA8+9gGV4?U~hB!>{CId;u2P0<_h5$hce##VUVO@pR2 z2$(tF0vQvS;}i4#bf53L=lFy30pD>oFXoP7NLd6izze2TJ z&0haMCVdG&pLOJfU5}=FIDmBzG`8yPAC1I#em^`e(aWQ?4_G<6>mpJ$vMn2rHqox~ zWDNM2MiUb2q$d83;OQ}wK|UV0Z|#U=%@Xb8wTuppzy{52ll#o$x5t0gJ|=L4Xgs{# z^UgZClmZj|UPV;Mvj^p}A;!!W7h9Wk+Byu+3Peic-J(+V7uouF{HF1*<)nsP4@y@1 zcY{Tx4f9WKRCxI6sc+k?A-+Np8Y!_MOdRYiKDo624EN#yKGFo5$LvqGJZ-c~f;*cdn6wb#gl0$4;j|kSXF8~M zF&>fBqoKX~n(mc0j!{Lp|BMy-tO!Zr6yevID7S#XjC_pPV$Ba(UtM~|<1-gm_ti#& zaUo3oMdJNOS)fH0vY4~18x$XG0K>NoHJ-4xy@pf0me6XbbFq3%&bR>a?A}h^XUBsf z@XMl5&;suS%8K9L>kefULzANH1?8~mk5o}0z|EUOl^@1GA#9Lf&xLCi$LZTOj3Q9o z&S*8Ox$z=Yc zw{$kFrD3pF6kBW`dr%iCVlM}ZS=Y5w>Szwb!!MJ5LB_}v0_UEi1HLMe&tQ>5aAM&=+df2~sH_^JW%KgYzxdipqYwoiOZk2{#*%%>U^Y7BnAKge@^{PP@?XL6 zQgtFjF^^x}dAJRlXSzc->$W6$DroQ)C!z}+r*-%q+tVoYPo!&|ku4D<-XkBFaxbd{ z?ze4rRFTgcYlrhH;^J|-L*AkvoLqyGOrFn&>v>2lSi`WnP>+GyyQe+$YM2Ia*!7(U zLtQGIJ{Gr(q_(B0X&Li1c9?9(HQzzU{XO#et-h-*4i7`(U|4WEtWDXs$X=tTrJ0~d za)>O^02n(7j$d_Qn2ohY8LWuiVoDQwd=GiDN1nBN^st(zfp-0*{TYeCO*~s#IVv)l zxCp7Qh?sl#qa$`V8&qiF!U&b*fR^s+%!Aa%AYRc<8_(vOVf<;EimUCjo^7KyjsZ3x zRT|~L-us^qr(Q3qMD3rw*=GfpS-LtxLe5DSzzx>tz7VSQTv*eZu~L$?cLKp@mpsNVAF>hqj0iy^%zftt zj>V3l6_gA7Q%tPX?1MZVtR=HuMH+`{NDLr396@*QETHe_=%f&2>r=C z&64R84awegIZ6GV5~~>{3rcxQu;Ac%G0@q4{^<#q#Fh8_l1KKh+{CiH!rACI>(HfW z`GXaLM>-?a@}4e^g06VYkH(uN(t&Bcd+Exh7CymZQDi>15qYmi2sW4e=5rsRW2P>~ z8H$q5KU8OkhI&7o5qJF25;3$bB)VenBy#M&4?}Pxm1fU0Zb!sa>4wcVK9tvgUu6%v zMX97Yv|5pH2U{khTuP}2hStOg!=2$QSk^bYEiuBnobcmk=Z8VfEHNTMBRJYvheABM zADv%_T*^Ph6(U^|Y7aL%`A8|_`Y>3I9X4WcMEe>(A2PW;zEOwBw0Tb}WCZizf<=rH z_ow*e^h1-90cl}z9Q1K~5ro*aR0IWGuZZ*_Hmz|Geg^ZtBRB*198CoT_7?@!=n z+c;oR@nghFf)zrOk;DLk#wT?iiOH~ECuMGXp-$TPCT0)dA-TM|nzNE2Ahv8PpIAv! zpCh3W36R`|I6U0L=DFmX7c&z)U`lQlRR*sKTQgT#_&oN)jXvD1UBBrNOG=>H^K2}m zf`J%MMSO$?rN1`DAD(e2Ar#rr6-Q?^t;l<$GP}ax_bc`&lIQ%;lZky92 zazbR&F1C60#07kJ&E=}y8dfAz7bjXjo^;=t=ZNS;b0GXI>}vl*Zt@WqcY~zh*p$Pr zkYM%0vh2WlS&t~_K7yAS-ojXWJn=a=Q)qG^*q}&Un#xY-+PLp&iKlSYR8bOJowIv$h7oHxICc(0e|=%jgInuCG2+d*-1QKu z7;UF%@iGD*&_Y@Tpoz(?^UuYl5VGUOZem{SV2|RtV*c}@DuoAifV45WBuoNC2Nxrl zd2>(y7@f;>AvQKdY;zc(U93b11KQvue&T<0X{3Q#rw*uehul@iOzb29c>1E4r<#Nr~!AyC2!ympl&hkN^P&sYA<_y(m{5RyhYXSNP+hH(I|5H}gTe`;3kww`!1BmV_mp6rwNMq*jjkvuen`&^8q(5y;IA*Wj2fhxU-c*k;$5 zeC}KAdc?7R&|l?h>eCrKLIFBesfsR~ni0CvuxQvNz+@#tG$|CPC>!DzxBm!Sq zNnq9rr=XRV-W(T0xHux%Q!Tiq*SUTP3iHko zv8%nTm#v_w6gGTvm}A<{*`{E#Hx{)ve@$Ny@3mz!EGxu>H&tS;kkgXDdnMnEc2tKV zq#ZKiJBzot))@2GY%BZ3ww=Ua_69pRPj_bRzATk`jZ=1XIY)wGlZacZ&1i5YCO9^m z>qL?=tearya&GZVTXgJAYzy_?5a=?vKg;3|2E|Mc3=! zNE|!WZiEW5Ew8h*ZRHUmqzF*A=s$v@0*K}nqn^;KqaxUg2Ym1&{;;E{Q@ls?4UxP8#zbg8O2(i@JK_eHWD-ZN!Ti*qb|p& zNS4&pBL ztP2Wv3EoUL7+qwBcHaJ{Nd-3s!l7H3j1MJaS4+9C#z`C6Z#jKIL= zkMYJlJAQZ3!T0zt)a(Sw40{0wTCur3vCvuOf#u71wi-o8j9!ScJX7YY&(5*6{o5fl zgUUjJPD74vp6x&7^1Rz;)s6YIOC-VH@&Ndto8jj59BL0Mgb|^jLD976evSdD19h09 zYVwHPVOt_ZXLc9&d6cDZv}t1)z`tgE3jA7{2=Vt4Zm-q7iq7pXBdsUi_7O$~hL>FG zlauKQM{->qTHD@q3T_J$M-&2{h*>Xhh$xD?f$=975U50H&kd4DCc`OO%K#FrI_0k? zCWPx7Bvf8e>99MWkw|@q4)U5-EHBlIot6_wPpM=dIvAXP(bzhjy;{O9JEoE&lcD(s z+FtR!I)C)54et)BZ!M@NW}6@0me36n#vd~VZwM)>zk9Sl&X#1mYm!-$il zSXlN)H~)(r{PMJ;fM}^1u&lH^;0g1UZaR0Dy4V!qa!nRjl)~o;3KHR4gY9}2Df}|k z;0%HJicP|fG{+)rWpO#v7)5$2pgXoMSa)rQ&gJ&#th46O5EfTT{D;#37{Wly4UZt$ zrB3=_y+hB_e$DCXal_adup<3stO5KLZb!j_`R~kfj2{9@9oSdGZ{}B4TEY9`?*=3J zv(Q~p7Mj@aaxE`&Jv^^&`-;GFee|G;zxyoImK&OPLssNL7{}J;o((-!;E23uLQ|WU zT6a31c9jX^)b$Rt)lGPlb&Lq=_J-1j_1kQbAZmq@hB$pKNr!fVW)-PDO+^oUN#dA{ z@(cYS{^<_3zwf~VKo`FmOyEm|ygZ@}6pn+Y&$?(f$pi3th>>q~@h>_6_>H~+^eS@Y zoZNy^9y_m~%5=`x=D;6{qNwj$mtjXUTZ75t@6gs1cpvUo=twt{P^I513WRco7nRK=n65!3~NFrP4vq;1a!Y zJ@SG(6W)f#^r+DW6K>Dw9o^R4Buq(`+f2vW)U!SU{j#V3$4gJ?Qz&OTIQcIv%}-*n zxAUI>gOca>=!<3?RjbC4bm5^IP;P~z;N@!TVJ33eZ1kQkiA;y2d{>o}{o5sqCzq4g zxKtn-XAt)JqHX>rCu5G^T1}Xq96z6Kiv;{^yZP3@qPKl#Di!T=VY7%MiYDHRb2+N$ zH32#Q{x5A_Y+5N1`gbpJdni-D@+wNNEq&5#nmtNaZpg9j(`dT|csK(|@pz_-DNq?k z{HW0MYy*VO`@i3XfYENR*yv=g87BwZ8uF+mxGO5&LOfV;1Z+wvI#dbYtVrxEwJ!d% z1b!PEYJ0I20ELV4?k9x&v9E=i{3t020^|(6gM)2a-<3~W%&a|4MUW$>Q0zM4AB zOg9xBfG2q&wB*iP3=5B!N(&S6vVqO?l2(;-RXPl-){-4aNn@!TsYZL)U^-!6O`A8; zP^OIQ)`i$P#c#*^{oBY(#tq{zI=8%TS~meBhGSCv5=m!=zj?_pc#4F(g!XcOz<6lj zG{j1H+=1BM9hV5D0|iUahe8>~6p?irRNw`$fHC%m&_)KWY2}=<2KT!LJSZYOxs2Pq zA@3hCSNED4W{_zyA7?ntW4Hpl_t2PxCaFUJ&|C@O-gYAbdk=N>OwSlG8SoJB(Q}Qv ze)S6=41?vL2tYQQ3fOZPL>i61oY6E7H-v5UXzkAW2E^lYZonpYb7ewi_YtApZ*1R& zJpi4M9RU4w0)`>G+t&|rPsS?3zAvao*jR9zJU8Rs&>Z?mC%s0+D}#XOmztB^^)fGF z2%p2UNZHvOtc5x%F{C3It^BZ_!%Rg@4h!Lz%cV?UF^OUy-AWm%moNp3ej*1#z>rd( z2#mzHOE6(ARHU6g$IkmrKCZNZ+q^<#9g#;4t zP-GzU^{*aJxY4gDs{vo)Wk_wBC?D2=7&c=VQi7CJ&+@@pfBTFWeCFYFMN_ zz4tQ&&7`{V?qTiKA!fM4@ zkr$Y7g$mc+ANEuvEg$aLQ?ZoJa9gkS#v<>^J{ZT*4n)h!sWxh=5p7g{#}J4HeAjFO zdXMVh_k|_K3Uuzn+!MVO1K^^S8Ly!vaC4$UD9-}a3@e(T56_8Ai85f<4>I9$z5+sP zP3QrP?n*-N{%Lr*L4y;8`;p5k<{1*yCg^EkW!jp0yCC!{&qgMn;8deE&Do3LG1^dm z{I!k67;Q0lI@11Zgb{@_G1;zFddcm1Si?_#zVv`#ypv@cmn=D*>o^@Q~K>F^& zhSUB@E)Bn4HItaSe*}IlUwpQp7X^~eDHgg=nyvfu)4g8JOUj8xJw}#Op-lM@qFsqo`;CyQobjXDX{5i-S4A1v2(USs1dO57N3{qIrYVQj6eb{AkBd6UupNy*$*<+1LnS+=I7=RX z_<`(e?4rofh;q&tGfOdA0EhZJ0%U|{~fC>JEvhH(7=1p%q1`H9iiU#^f+S-US zA5h5qujfvjJfQ`8=2*ehsuTou1)Eh3VVJD&$jzcoEUor9tfI7GkAteTEpQrRtIC`( z@HObfs&f;KRqFlSZQ;iX{ow`$MxDcK{*r;cz zYigumDBS(~TR;Ecs|K@41ep7`$MqY+Lc=P0BD1ieZi46X(Q?7}jI%DYX&M{;VS-cu zFSEN+g_Mrhz5zYq->2!O@j=QG(2)Y<`6+54FLz!MZ3i*S|KkMG^((+BXZzqB;+5kG za|Xt|JyC7QF%35psr1dohX1k>-e16+H(|hiT@Rxm3evgVA?i@*?#m(+9##`r&Ljb(3xMv}Jcg|l-tSR;z@`4S!wx88 z=pGa#60&04S9~8B(H9ePDRpD##9H*j-=MDT>?VVCns4(*EIz}%;11WLtB1?i$i2Lm zIN8}ogwJ=uOlrob$249CiZmiy%K_Y`w%ow-1#t@BD1xwMSh_4tt1h!Mog zba5i}B-&U`)E3SNmgje_&=RwS&#?^>JbQHca66$at>$WF9c{7g+cXXn+1&IHuy}Yo zEDPpHxWsPkja_vdmc(z2d422;PJBb1nr_oNL9K>3OWrSI!r9W`%4qvm3$fP!Iv@7( zxV+{2l&wE)ig@E&+CrgXHq50()<>LBh1YbG8NB~2{#8Po-_4)Q&W6Z=nb^7j?tnfh z_t;U<7)ghw>Eu;1>}f;vW78IPo+$Y9FN1^2ASVt6jdLph%m9)c` zaI!B)oEuT&8mAuUERIGTWe=Z9c^+xEs>n8_XOPat0P3eGd!#o3Bs`_XuDf4+1_&NX- zS8C%bdCbLS;Ezl`BlirBRnR!xabbd)xBE^3+f;|TiTRM{QW_sPJ0Rl_Ivk1D3mQ+| zUMu73i}={#YxJAHIMQY7dvzqfatgt8sRc<8+n1h1x*eXv$;1@^dj`U~TLmD#KvpgB z>~*rO3C_Akg6KX>X#B|qiBiU7)^DE(4=m>w03TB@z_xmuM%eRq1_R1^(%wY}TuEjB z4O0c`wad*k_j^Uhs^AmaOd!gEHLhJ}K21DW#XL6SQ0MC`6}d{}N2cS^j!!~=FANNE zFj1l-9MQnjKxQraQN+o-aV<+%rX&Z>?%)N;GZ=NqbB-%E?>=QAk@^S(rxUeLw17v{ zdwn^=4p-&sZDo@DmOf(?6Fe`&MPqqBc8H7M>g=M(vueZ6#dX$jfZ4qG0u-xy+{nFO zZDR00`{M0d_oiU?^jK>w)|ezJ(olD%FkbkH z<1(t!NGjl7yURZw;kE5dB9fn|qSrFzyM?Y%n~n!r82`OwoOD}?XKTcX%wMrvw%ENr zQ|D^nkfU)iDKPUs08W|hG|B+&;f*<)Ov*q#9Hvb>eaT)ydaX$Zwg_+H2Z!3L9x<;1 zcewKkm23L*c=Q@KhM!3Qx0B8_l1#LY*M~)Qu_NPMg(B+kwJZ1LxSDyW0*4S++d3V) zfZ9g$o>pBly`;^qgnG)DP26sq{&YV9xn&~$1)xcHyh$inqj}6@WQ-mPiMxwcu`cB? z8=S>BI1smk2Bx39+7iBH8XooDvr5kskpb*R1W<5>TXU;bfaLYMMhy4IQ^cMt+(U5* zbSvjeiLUyf_|~twNR1 z&G2V%INNHk*KWpUMJK>8B7jn38R5~_ZGAFR6D_m_r^=cpsJNkAFqB&o|J0aj2QHqS zhQ?)V??&KnjpOJo``n#};cs{A?hWZ3qR3~RJJFZ&Fp!#*s6c)7qTs4@MSeq4*dK|R z(#O|j4#i$wjC@(KNXHAPS->eIBH?E7gEgn1KMCYcTS z){3kbV~-jt$I2F)*~K}X*Wl%dYg@lVx|o0KPSlBC>y&}QS0L2zVeU%Ou`{NJ64ZUGc~SKilZaT01TSga4BT*_FkOt@VlI$^ z6NQ#XOjgKYM@om>W$OUHF24>X)hC$V-hjz^Ej{i5|I16H^$;fU2 zpau1wL~ahZ85_d2zkZb+RT{diB#?iAUEF;-5h%P;_|hY5fKlrFaQ$h~gf5tlYc0&# ztG2(0Hk3~={oLZSIrrs8rY=#rx_yxHG1|8TeK#qHg31u8C5#jZVMN^$=WnCGAo|r4 zDWueWc(`g_qY%OxZS8bv)k&bjXIiHxRsfIt8~EbJ$a)NNcCnaVtq&`&g%$nrf{5_8 z_~~6RfiY0R23gix9Wwx(H9KK|dL#;x)O#)+v7L}u#^9M{_5P*DAJptKv(8E0r|9pe zjzahR-odR(QeA7(K1keK;weW6`)lb~$gQ1*>;EyT5ahER>V!PfiQAR4YjD~Cs!*mx zzse`4yWr6LVyU37V_|o)I#CI8*Q!u`$PU;rtvrSahAPG)oWe@IL<5_oixjzrk^4jLQ_ z+(ASxM{~lX_iN&kWo64qyLyMP#~;bqB6Bj|CX`7j-;F_0|DERRd=U^7eU?j7t9aF? z6cfj{xQ_=I?V_l75YL@RntQ-WZMe;v)#gsg2H?V|w72VY+!n%2I+%AXZ#Z!H3`vUg zUD@9tuXt5U8KH2rS*_onf`eb8j{VEIv>U-jBgrY5Ui3HE z$79ZO{?Fa#uZ{1%*IL$jr<=js-lx7z26oHe&Qqc|j0KB0CNNGyL`A%Ug1X^xL?94w zj{reGTXSNM0ZO0)RE%>} zWJnO;FTsKhe>nWZBOtMaY|m9{Z~DbZPFT4NHP~={eLK3#EHSsab$b) z**S2>b|FpBzXc469k^&9?{ZiLz|H|blQEFl*aa4L$B*T@>x(Fd2tgvih#=m$OlW71 z2Zu1hf`;Yw{Xs_jsTlo;W0sV6j1LMPQ?j8L-{*X<9--mD@!T9(&f=mjy z8R!E`V+1e@K4oLU_kxbVf=m+*A_=|G$H4*<@TMk+O&(0|#m-0vnK>Ll&w}@iUJQmE zv8Y3#Rzl@F2pt-Q>6VxvfFGU86mJU>RJj zkw;GGh-%b&{+Y2&^s1Wo^vPhTPr^)eG+-nUkU$dSAfO6Xpex|Evu}L;`DN&j@T9x; z_rSpV_T*yJfyY3wmGv={kNcrlH{eboV5kn-mG#g4s9*67A0MCycr4(C0PW3zhTq5^ zqA=~BNWv_{_6Hy=P(t%~M4+$luWz$QCVeD{jNWsBGymsw4S9tthX|KRm%<$HP=2z_!1#KePo%NMHBEZ@;|1{vBVxvH#8T+dlcbAC%X*wf$RY z{webNs}zi9d(HRD;*nvhF*2-P!NY$IaqTZ>Ao#mu>Pj^-wtDE_It8^b>^3o2bN$ON zj+<`5L7*6s1`lHUeGc_q4vTnrV`r#HFM*l8?+zc3jR^HcxI5ezctdcDnjWtIT^Ph) zv+Q5)0|UwIa0%18!wf99>J2(@w?KM8UFIDm(Y$nZY^en5f0 zfCqZM082+W_V11givoBRe~bP=B7y*3mA{cFKY&-~AM~Kp`x}Xg0C++Fpob_H{~$2{ zFZgdH_~7?&JvjVh{hKE*Kbm0`S`W3qItw{}+5n z?(h%fdq@TQ7ko(N_)lpMwh!0!ztj(z;QxXTnOyz_g#o;Xf9iY)g|LQ0|5f=zE`<9( z;raL0(&ii|717=uT zqIUa@&#hU!&iGb4_S_iRz-Zxjx%z%uEE;Y-*=?UYXCH$AJJeR>E*0inXID{tt$gB3 z_dX?$`u?)T;n0~a?d@t|Rh-+h3GQh9s8<0cQ_Oi5S<@6xFc93dk2oG5dJs&Volic_ z8NH`hiRZ-j-H#S1@-|xKkGY!mTCULJclkicL}^4pi+c8Wc$~#sem7Z#eAc_PXazo; zGELSwvgYuX&wiK9YfD4t625nA%ECM<&PGYWi%O#WCTgYad4fX;Ljs0i*5Q)Miih&C zNUMblmu7;^y{5fLy^s!$*jD#-PQBMICL3v;AxD^wHEB{kJ*-zKj21bko&`U=V9RRo zHk$nFt3mMcr)84H7BPJDr?jn|{_9qmb?yOs%X$nSmpq1)r;b#-Cc@qJHaP4_4o|@u zjssoWBQ6jJN>k({=ElZAsok2kq#Q*YPU<&g^;xv8=VGnY=v2{+Ntx-ntpbf&ID=*; zbz9$Y*fXkk+xy+IFT?x4ZIG1dm)*-%SLTXl5*1=S$Bd1+YDNiL8*wF58e0=r{Q5X- zO(i_$7h7ol$&1RKlaco)2POsFKuMl5D_wa4Tn+oT#75bgacT#Xc`-uNreY1fB)`N< zRNFYveSSW}4h+8c?u`H}g!z}vJmp%U%)W8%Kp~gzv;~@`czjQ;)9(MpYS_E?!*4Gn zqQiy~_oD=*r_o2PGeis8Mm`Fe^nLBzlG6?yUx2Pb4YF_Spm_0zuVtVU2++E3*4xid z(g9v3?5iLeYB8liYyYU%qd!ij*d+yY$0_haAYJ7s zD!oWE2{|zY3^z6dQwWfFD2bsmH?II@v zBlPk( z3H}jXt&Bl>=>)mQM1~>sCz#sFrudU8Zg(2NUW&+4kBoLI3t5IBN}o~R6no0ALQbj( z2kkyqVlm;hIb{)ye30lCt70a~_!D|VV8PL+mh>ymZ=;Z96ZySlk_kMS!33)kkZ03@ zTyfpF>1K18IY#W67+Jay&z?;ZXHe986?BhkT=DLL?WNI!m%chXqgI1kVy~;m_7Hpa z`q!jh3}q&)VQ`rn84gSOl}n$v>GN^6{HLc%-2+{+=fFuRP`Uc1mA3z7W=n&;BJ zUpG{;qw3{y*t5T+mWLVlVPUjakfhV#LFh4xkj^hT?0`lspZ_8nS2%&~)%fA^!x7(3@vZShG4+Ol)v^B)N?k`i9oYycs4^nh_vUq!mvIG;NA) z%$v-K<6vnOJaU?YuX{w_t_eX=>2hODtqC&hWRG;shGVwq73@t-F@}8?nz)_CWZTA9 zp!Z*~WNX`h6=UW$`$W2{N5T`qBw{OCq->knl6Io)nAH+VWg>&mc@qno1yghuo>R07 zy|0|~$C%mDZzP@NdPd_j!m$(-6^u$;5ecH-a*DvG2-48S9W!QNsYEA^quJ_b^7EO# za9~D}*6SR#t66)r2X_src`2(7i)?oN^1_^1hKfL2l=jqJNx+KM|CdAu4x|X9Wy$`N zBi0C2EB_sCqW;>+nd=AP^;CtKH_TN1FZIq?bxxxOsca{ZavW#)Xw}PLzG}Z>Igt(% z#WjC5@(!C{$Oi39=1j(Hks^_P6l{rUC|AM850=M$Gd#pp{t`4@^PCh*F5b@yNT#g) zmhwdMs{}^Mjhm0;4`IwHrTI-+M6|PEXq5EY$5+oBEEbLV#xO8 za@DE}RsP8I!bL=nl9Npmy9qhduUBjB<4?_mNl#dlDZh^iEScMJ=;1c8RlPIG?j;Zw z(wXBn?q+*hxi6=}AmAd74&{)`|F%>xjEipct8D10OM}LEltTk}tn7$~XYooJH>@Xo z@;FmkyDN;Pl{$IQqiXB>aH|(A6~8My)J~za#?tu(_k7Y_kg#)vBPK_x6DQBPm94!| zE}YI9K^^-=MnIgu2r+*&@5w^_ERDfk^4yvJ0RKl#`!gQ(NqldLcWV;qazNO%MM9N~ zTOaXDjW3)kDFYkT#Qnl7xlH>RH*u0Uo{zsVIuTrq)}X?uf=+E~c=R@taFEWzD0W%O ziNZv;rgFS$`?HiB!^J)fPdzauue|ctAuJ^J2=g&wFzqLJ@BS1tYgs$-^JBA(!+mD7 zpddA`MU}?=J}Xt)IjZVnkHA(Z(b4f8hqP(QV==d!$v96AYD`vT z2Sm$cl{iF$@gOCz;H}yu^)^VyBe$0!gBAF84Gjbee(1B8V2NmDQTk--*sJ6kh@N@KIpdr!j^tjF3n2*cIEUs_ETK5OT75n1YszJNw=S`EYBP4^t0~H1CMSiA^nvfR zgq*`{E$T5?(;W+1HwJGKG@bH34I;n_=55ot>%fedzB>OZQK5>La%}yqdkNuuD(N`v zxK!1|ZzrWb!DOB~#l) zQ=h3Upbj01S2Y@wh@&V`o*z55S-$%3hR`m?|yoNqNsH* z-IfbwB2hkD+&683&3^#lyeyL@z0AwEV_dCrm+5xuU2C`>N$kB_Lo{ZQ(Jp>qFyjMs zO!h(Gb>2m{5Zr_fuEYu1V-wD~rufcCg+&6d)-xE9H`*RhXJ5anOZ6y7u#S z#CM`Dcu6Z3s)n72L37EE2*fwW_&lx#-yRQbn#W@pd`LQCdHNJ3CqSkNOh|!0b4h}$ zu$VHKCL&VDZ9X{9gjQXt?zV#YjrJRvPkjiBOJCF%+$v#1hCVAs7-TfeIMJPGGdKXo z77Uq>NpYNVq`xRJ3)z3`$dA7vx!hicg%Vy%GlFv-4EU-9i_o&81%+(<^RmCVi-Zp)sA`>JX)H=9WablyG%% z!~zoxPo-}?UW|W1#fR@F_hbcTm_K~P*NEIM7y^qAP^InGRNyAlkbAc#B}Q%~B=zHk zsmI7L8@fk8L*M;E_ESH)n(hUJC^@08M$YMsa*X1q;I|2>5nbi!Mf6<^0dW)`QNE)4 zKQZw|ZhA)2)&C)W{;3`@|B!=@jU-la4_p951|0m}&p>Cno-nwJ-X5u&$?}5K+kkoo zsS0D!>>jTSf3>fqkvApn1sFR3Z;LN+PAJDSUbLQ?7q@K&!=P=Wy5w}eq_y`rATW!N z!}OUi6+NR2t`_xnOp8A^#_hrDWGaW=*S3dmGZP*?bE7dB0MJLM9?tO)V!mR1nSVYI zb7~YAnL`{+8xcO^v;A0%1v9g=AOEW*T!dh)$zQwUHLRH%pe7Wd{dnsKuW=+wCk=&h zVxYgKjw`f;{8oBPsc#gg$P4MZ+E(jxHJ9y+oq)GwItx{WUV(^6E?^Mw?SbK!#u6?2mbwW&3^Qy~jjHw%5OPbSt`x%;6-ef1=IoCDq4Ns*i;1D%nh`BCMm7a{j4dX*kTy z;p=8SIrG_7Z+lMt1c^z7l$L^T@a>Wr|I*-;-Q5`pG0XP>6r4Q?XswdSYyx_ba`I(V zCs5n6o3}0g94qpMo^s_NBOA(~yY9mullW{*B6=ncM(Qshex_s_7k0t^+Ab-5cwU`V z?oG*}jq= zDyt!z8ugtd^}@2cZ*%!7i2rL%VUNuWJWpgCJ(QUypV%XEd_=HFXuaAa|t)k1Zc4mql0wapTnGd6>EV%Ft5L# zdY)~9EDMADOEcvJWS*YK@aPcc$m1igpl=1Fpo2&CeKe0tIa~E zG~m?p5oxy14X!UbCyTs%GW3QUHVl;2H>xc;RtkcJdq^>d!8=t8rjjX{Izl6E-0oEz zTj~UJvEf%I$$WtZ`Ng<4VKSTffhzKrA-R-U-)VJ$2wLN?g&^lrTlK&0 zz!rVaJrgtgEu?dVMD~)+cRz#THe#DhFB3x#M?EDoSgW!&(EPw@Dm>5JE6V#9Yer_c z2X4dxDQ9{D_Gfa*pOT2#1Tl^D`R?Hrd*$b}xyvqDy;27*P=cD_^*woo={b^>{)27NSBx*>MLo@e;0LUM&;6{KAD0G5>x4(bui)My*I!(Eu^uh8VW zy2pl(MVu5WatqW-9v7*|w*t@3iOw=~USAbB+zU$t#Z+RsX|1ypD^=faY-v$26VzFn zFvn^l0{miIw>Bv3Ej#4wFxT@QOMjfn30Z5T_id65HydrP+qg%6ck+USWv7%Ko$>0+ ztba{<)XWWM9y!y;IR0%Lv$S{eSzc~v2j(;;3VVYlV9s6tfbpod;0AP#piftgRa@%R zz}Ak?i@e!aBSXJ190%wZPhw6S+6=JYaTVzqcbs4p%+G*=4_N!*F$?eKo4-hL=JM2uLb=-q*D+0((@blf7+BYu|WJd?wU&uA8~dqo`M_aowMX^yl zn${CUZ996aC&ou{pp{qxuWp(@HPQ3w8+Qe!d9A)skJR0fs)p7!Ifk596Y6>F!y zo!r=6x+LdYOE;yPhs5WAqTgxNu)Vfr)~X6^8Tu$qfZeF{!ahkYU|a?zEh}uZ@$jnn z9Bn!N9ZPqlwd)xYODCNiawrpFRABTJ^L30E_>~s!6D77MUvc}_%dYOu8bjCi(La9Z zCP|tgMDhwjqlwB%;e3*~=}+B4vd|J#X0ZM|GA-e3fwD20PiZ0N;<-oU`b&b2J-xb- ziSm!gE^*wwPP$PT<=qPPb-B*s*ny~GVjfU1hKbIkx0gaKYFL|$r)Hb(49_;_sve{H zt@)EVKmgW&U3{h;zg&j&77I;~Y2ETJrHw+>~f2SrMKSj^Xu)_74zVC`CtpHm|U^8LxW z8`3jjtfyxQ#s@qEd0!@tG&BMgscAQQcId4sNTUjT=pP@d?nnzwyx|dIQ(e0zos{Op z3=QHGZ1ZxA8|K@eD;J+&=HlbPUwt z!dMU{BHQSTa`!4IRZd%F^ba;>9&?`aV?4x%L~L`4qanZW?A`N*SQy`yHGlx-HNFVd z$y)Vigk3pol6=*yLHRWwO5xt)uIb95F?fk@hvWf{%u2gAD46ONt9ahyELy|=v2c_~ z;3)sg@$szRbjE3Z5Kn+^ItJ$uxhxAoXJq`VzHyTeSr#u=@)+_pz7{#nlY)AG0uwA_ zJM3eZVeOOHPZgta8=NqA72x7WH7B%0TT}bmglJL)n@I7yBy2#HErBN~@|jo^$m*1;z(99I4| zdLEzIbnVNDyTWNbQMyJUWNnU^O*>IoacODtddpnJEI!H(pM}g<93gI!MPi2j1K7)# z{BYzQHo4`Zrum$Gg^2SRmRn*-j{B^VY$w=n_o_A5hu|iUY?Pe7QxzXba5;`MR^jF` zM@QS%QKr$`7FcjCvQMIy@}%0df57-hL$(&%rAQ7HtQ|wd8cnNAAUWOSlS$+EvQSUM zY5yUlT9|Wr&0+Ay+MCE3jS@x|FKi~p$%fcAP;zY&l$-&qmZ@k+7bXm^ALf}~BrY$< zCc>Qgek1$nYy&G|JF6$>*)7?NuA2 zukU%%1V8w-YaydID*C15m$YbM$JDO#kz~!tm04~DCz9&-bS0@n5W>Yo!HHC{%NjG@ z4wT4x{l}Oa)9=Vv;~9U7IY?$TS&8m(U!F5(J2}&ALaTyXSLlB#b7W{q3uF3Jlxvh} z62D2)o;bv=&s&%tea|$yBw4Bx(VJRFtQU_&dTH02)`ZMdZaSEY6+W+<$hE=-;iyXnG)I06;GU%eg(yb5A#gNd#`b`oK-qzMW_@Tjd!Lw z-Dga`83>}51H;P-zwP?%e%)-&HJ#~!88x9ffKnglFs;Qd7Zc*qK;?Q}>H;3VSAxH=jf`w5N-22yv9mb;6qI zN*`Sgrl;==jM^Y|LsxlZuRiohPP~i^fwt=4;O1kE9RX5B%ILW~6iTQjN!xb0*;$nx zDmkd!3s7l+9)s^sl&p83jG)pr`J-ghS$4m;sFol?P(p{bICT@)SmSs)r^t%Fqsqyj znQr|IaDMmJV^fZGTUnYnGJkPUv~aCWh#*)M%Mm^6Z1?mwd5lc&-JoA3r+c-;if;(S z<7a<9wr}svcx6uN<*CNc+XecnS`3EN;fdMz>teCx(ErldR2am{!2@G=a_$_-klaoMR)CWl?HtgW=zl|W74tssKyU%HQbrKMZDZXE~&}%n7K2g z41j>o0ZI_(+JUZ3pU|L3Cvnptoqf9)8eyN~Pwmy8@0!bFFFQVCQ{_7z4Li`IYD?@2 zM;1boj9v3p85#=3k8+D)0Yq;_RZW2(nRm!v6GYv$VpfBW6)-m1DQ`!>Of!WR5Q0tJ zZXARGUuQyGm1! zh1F%B{H@@@7(Wkp$T$~EHlw0ym;An(OM+fY6@DZ$^jvZ=Kg_dbLcGte_9p9s$9CYW zTXA}}I;_OfG=0ONCdH>=6?H+R{$*Tf=>pURlV2N_gr+b0ru5EdQY&mS!)G|aPCW1` zq6i-$fwlqCX1-8nA6y!Pk3>y~LskZw-gP;219FFEGv+U@bnklN=e zaE~K#w74;IvE~@3vZWmz5rxl7Sq50GaZ%2{;OhHz8w1q5GLP@8#yZj57lDl*z_sUsqD3rks^Fl(mRwrpFI2J7;(Xc=ss>vk5J9F#lU;vD~%l{KWpDw2d{VXyd*m!CnOW4z}qei7I3XkVdnKRurL6M_sjdM z+RmWwdAin3kZ>2s?+;$1_`A9n*T*ZWsQxlwsj$QM&`36?ns3wB4RD>m74+nv0u?y+ zu10jC|4xx2e3~XZ7MEgwi93Z22nHC^;Xd-!l-Fi#TobAv&MfjR9=oLd&j+~q@N%13^J%oW? zJO*&vsUjy~QB|Rgv^vW-nYrY=^Kqc~&dlCeHZ#^or#IN7y2C@$z~?kpiQ-BWL$7Qf zO)$!z{+e5RziAMklW9?oEr55yb-Sme#RKxfsmpg0`r6rn|FRt|YIbT4M4YTe^cw0e zN||7}U|Tr2TFYBZw`lEf+!G0$XHO1eB6U z6cMB?-2FMN2RF$0V)Dk+Zu@v?n6b#J)UC@scrj$?2Oru4d=k&9@2~ z0$4XO6z|Y}rdLwx3(-zCN|&frn1Xk2X45@bCt9`+2)e1?A2fS4ozHajVt#fv(>ZHF z9_+D$iqT7FD)?@6r0BRKEotMRM!J5=@|%1_eXoEvtv#{DOt97O3JY?eitk6l@9>O) zV7$)2#$mWtC|(&8Wt*3qe= zDAFgZj7BQ462~hfFCoR`@He*UIRxXPh409ItCKox8jNkb*b9gy2^aY zC}P*$>kS^p3Qa3)=(rTD!lgLE(t7w;Z&wo-SB1FxG8F*;_-uM_9Lkga%FoRmBvKK$ zvp3$*5!WsFIOiB0ErI@y&K)4#<$H)DNrq?Q*F8W+ls{d09!}qB>HZ}l{D$k5T36>f zOM%gvYSw2}y*EeFlhm@JMXUE+*Cmm?k}GdSP>QeHTCkaFDi~tcsq(OsKbKyAh~FPA z9D2F*WcEYo)xAbDuhD`4(a${WZ`OQi3i}k!HP|JKQ^SviR#x_L=xys$o{)Z8rERi; zImS*3KT*3vM~Krcymr%6|GH5Cdu5Ia-fw1!X;4%4`VLVvZtFTJ;{3HXBCCzBMfI{^ z4<#F5DvVH{ZW{SP1E_@4bv$y4bc%YIf#f}q5#1c}ceT{XaHZEHafFw6lewqwf1PNKuj=ac2T+DYLmo*KuCA0vSFVt3t$mEv}zc)XZ ztCl|OfA6{zN8H<3*=5Sc7)`~}Oh458n7&A@8TDxL&7hz4KJDNv80**!wSh9gyCY&e zft@%Ws_a;cAt#!-@e*WNbBxNrd7}|XYf{P_S~kzb5XHm$Xd7s;GX7PA?NEd-RL`~W z$W(>$E4~-MS401^aakkTCzyMnE0v@~DF6>iZjYPcB;L=7g^+<+)lDgB2!i-$pR3b! z8Phe!8jzj#HjgI=4POt5eoP^B(E5B{Nr%urd!sEz>ab3o34cIoEOLj@-^y87Z2(Qt zb~<>6{iN=!1Q+K5gsI!2kvS00mEkZoP2+7QF8CQ{IInfQA@FQ3XK`ps!YifLHQl>s z8xck)QJ|j?aA3jP5!Zl0Aj|e60y7kTtpwMG%&9VAx_vLU{q<8i9-ZTpt5@)cRTsVf z=Nbs}rWX#EQU(##c5_OJD6BdR3^!X|KbtCKhDmrPLfG}#WiGZex=R)>J)?8|nDI_N z&ftdR_;czY)4d^7*j2YUD_`v&=VX(zt$FdhzGQVI1rwE zTk1}kLur~Hl}R6#qr33D$H`mnX5yODnjV;YMj<7MbQgAm8c{w^ZGcBk!>MZ>dd(yt zNPSA@dqYD-yr?-wm82Ki)sy36!=u*5kWo4PCHP)^4!sO8<3wlaMoc=AN zVTGhRI6sJ?)V?&hYJewbjjGe2f2Iv=J#Hv0GPd97*WZ%KY%E4$q(w5vT);x!{g&C6VxO^)b)&6XDWf-$G&!SFlUpp|B z=ssX*waIo}aDnG_(b6(bBkGZBIMipyJfz0ZQ+u(sr-t@kHHu*ch7Rbv^f>4+cQZKub=p#*itrkOYa^J~? znLG12N1bajPyS1M@n?+)l=tyW^MpY3=S+R5k^@TVa(&>C&v*<;o%o%|FqBmvj$#!P z?q7n4R3cgVyAXP69Cnh6;_g)Xd$;Qo*uwEXDUJYTmZM8W>c;U=$KXlfzT4(J-h z&^zGG0m0MvVS?uchU8!Fd?KD56gaD-5Q1j(+Qd_+_c%NSC_Wa?13QY042Hn6xFlG8 zxvDP7CiD-w9zZAdS22fEU6hyA*Cp^`vhRNiPk6-bG*H; z;dYUUrf~jK-%YZ2Ob)NcRJxx}b1&4?5av(28Y+-a|L}g=dWMv9IJ_>Vm za%Ev{3V7O$w*^#`-y1Cq(jhG&H3&#Eq%cT#cStkB07J~s-JOz3w@65LcS*N^bcske z0&>w`|G#god)KTr^X{k4+0WU}S?|!(sA{l@nZaK}q~S0pHcoa90f2;(Iwv21gM*u$ zgM$l`o?a8`WCQurj7hHzad3pfVFLdOkZ^#2ogQqGV5f&TB{&Qq?`#9$3R*fj*$DuFuCA`^U|UCaxPyf- z6DzSPH}hd4qUTp(tEUxWe5U|Yx^%-AvM0h*Rj$3N^EaC0YDumc3}V6cIjLST*$ z8O|^>hy&nZae#)LB0$9s0{bIa@s9vjz@NPVaI$m$UGC59UxlEs-^pN8Q@E`i80HRz zSpdwTHV}Y{v?9Bcn-ePl3^V%`2)1#AKjed5z)&0T>xY2fI|l=##nb@c2ZDdnb2N2; z+BrG0J3?)KQ3U=f^U!4}n3)9J))oSDa>V?#KS`(q#Pp%Txs;Rn;x5$pm1I5{{&ygmQz_%Fib==0DXR z%n|%e8^FfJ%K-rK@B;XFKmhOmR{5L%U)+CNRl(3d_r~#0K{=Q?902-*!b9)=qT%wV zDj5Hq7$(4fS5SsONErfP{73l)9K0N+4_}=B&-(wJ^8Y9MUse8Z%KzUDNjuxv{B|?` z_Wys}U|Xn-`=0?1N_KX7m0gm zrJ-&RGgYXQspTIM`s)-P%G*F;5LLJ%^w(VjVB_TA_#fZHyqQ`*+$W9?XZ70!c^Iev zE-3{wg`54FJ1*WA0I-7t*d3GOL0(+EyZ}$mhXFN%xcyc(0LTu5J3XWT9_V=k%;650 zzfP2&0{~S0W%`Xkd;lQ$9|QsbP5(x}b_X>37vuy0A^(D00HFE5AU6PL@i%-BBGA&^ z&hp`Y`6uGR2K^W02LNsUft(ME{VVukj)3rgK^_3m;a~8fl;gkP3jomRAISBvGmqc! zKbYG+Zq*`qTkXJwYdm~-RA zePi#xi&ghhVojj=_g*x2zNg+Q z*PH7ZkEIe#F2@csIx;;{N~Cjw;fd#-;ydr;Nj`fPeu%_tU?1Yjy_3%{+zZE?lc!-c z+Vbo6v>H|DM3daRSdDn@1_@~$+P?JjMnymJkZzHvL66+l3D%Wy>lA9TRil6>e!^!r zeT1iX(e8CMi>Z)Fy8v++>AsHX6>;Vz|hZYK$wL>i-?v^0|uM8Cwb4G_GF znw8d6TE7|7&M#DweY8fq`FX-!Po_1|^f-z}v*OiS(xtbBwLK_9mVjjMF>&0vXXzbK z%ropt_$ov_c>kK{UDxvsotfQp9{-&SX18_A$(9wK&{|44*LQZ02jc}ZCHZHMbh))j zc3e{XC(7>RsMv3VZ{g!hVOmv&O48kRthXz>{Y^S;QGv&-n${4`#~Y64nhzWWGbG$&HKq3MA4xUm2l}-!;)nI9$ipBc$Oe|WGmyB!Q@LZwN{P$ zw4}>e3;~U>*R5O%N@>6@WwCcMS#$6;Z7^Kb@#{+@TZ_u_mh`@Z1ve>;n3Tk`4)0t# zi&#PTD&711X@9Rktgh3tr)mK;L52IZ7wD?p-;MtyKmj%6-&wwQcd0s#SIG zIoxq9vcbpyBtpoe$dI}eX=%phG|r$8>n=Yb&ND)0;BqkH8du0=o=y1EV-OKR|1E)! zXaicLDPD@%&s4IH3(ixyh?P4-!RJzD=7*t?5xI_} z88ed0=!AGm|Q_fj<#B>gY-{mMBX~pu!AN8k~Igs4~Yz zoG|LVtanYA$-Lkl%+T9xA|_mXq53KGXDcj=J8_m!*6L+ql#?@sm~xPY_mj|aQU|3myFv;<5Xknoiq&`cC zy>9%uGcs;QMy&PX00%cNFaJ~X+d|g@9bL_?<-WiTzXKSK9S=NxXp-FZRvs>4b)wwb&Eaps-Cxl`N2~<2t(s^{18i z4kZlwu~G#`70Ojm9+Y=UTffA5{g$|MvMfD6RMNh(ONbyq2lB0wy!G~uY{hrE1g};dud5V zET?>&iDVwxXYQRp>YP+{vu(?)aG=_KvKGld27EQBN%n-VRXs=?=uCv|di9FS|I$Zi zP&={`&=hENX(VIJn%NMOYg5Q4RO39ZxItbHc-^dhjx;n4 zRbM--?fdDIH#EG76a~7cWgtg2D5x*W#96?y8{I95ljr6ASb=Kcfy5WKMZ>Sm!Pn4K+0R&bRJeS{1*oWEI8etBN+^<1^D%(NSc%Jb#}q=4X0y}ErHLaX?F zx1Ys!AG@P9P(Z$D8%f(Ph708#1X26{_;H0ARgh{u%zklrvnBa~k)x1wiAVI6klzHi z=}Ghb5`WcD%@(@#v#u)(>DlaNQzAlFk5;yZ`?m%RNO{b0s|ebp2l7eW`4N4b7Ii1k12_hzJNv z$Ipw;VfPhnb_!RV_%pW=6cU0s#BWBpse+ABbff>V8rP{d@u`#kQ7rfGm~o4ynddA zkWCHqti}?;>2<$rOS*16alDP1&a^x89fS#xhQI~A;Y?FR@-aD9l)o`(h5JZYDOm0= z`J^Si1oPyA^>~Gl#&_%DWrx* zKq&Gv9qPOztJ4BZc%s(AoFMg#>E_=1+Ur-8t(-u?t5&zVE!dH<+ArGTs59LAPZfRr^VHSHq$ukl@jx3ubEA6qcrpYRZ+zqSSV5c&l zZ}%7NWVdg)OAU#!!L$AY4ok|ol~!zO<ShfiOb4dL`V6A0OigN@Hb*!m?T4&W_w9See=ORP8(F z97oq#n7UB6+&FKsexsJ{9@l2!rArj>I!^ypQbfqN6gd~QgcJg{T%+2r>>W%k2|@-I zs_E}b66=rMnf7Y&NHFS{vBO04+br7Y#}mFpJ_|nqEcjqY*o=ND^-v^C9u>H8EHG_7 zltSG~66$rw^g__PS0Q6rEgRlsmVo4@8q;P9J9{T1gBC%SYegWnWP;3xJNz3ua;4Ez z-7!8UJ2cI%6_ebe5if3hEtlIt+`1Q{8XC+o61(_cf7qc z<YQNn9yBslL`LfJMJrVGB|*UcB1zKqPb_!#tSjnG4&-f>F5~B!7~P91Y38K1oERMjJcY=A(@Mvy2gm zmvkMzIfbajPAGefwQ7_31eo!YymcYVnjcR)%*U=9&QeW>B||}S&B)^DQ@NM=K#90Z zD*_b}I+`>%VQqN_vb`tk&!tS8JS_K0V##rruqJ*Qy5^2tbFRtAttSh5uianLa3HwL z_3JF+sy9UA_D%ERMP3FPU(C$5t#iE<9=;1aS|E5)osqVAUib*#b2xw__`Es8@Od@a zyF~3EpOVqZ4qdZFGG*DsdOr-8$H^|jjANx*MD#fXEEI{ITV49S?L+*LlCf-CFfC4* zx-3PkUFgqq5N8f`=n@vAh94c(GyikCjM_d*ki+fAet9UDI?B6|YqV745It&t-nf^Sz_Q(@Hxkk1A6J4Nj(Khrs z#$0mnxbtq=V;VbSk<3n0msftQ7$ZJb;n3x5>imAe4`H)}tR$2}hR0f7DQCjsrGa?+ z8d8C-D*-}ZQ0T1gm$eQUINX2wZAzka=h#G}TAAb+QWE35a?`BC9B#g}x$Q(oyw?+k zUJws5Dhn}=AiAb|B1Cacoq<2-eh73{g_ij6?!HxwrSs~%=Dt~f=PtBzHqaWAGjr>r zmmsBrgxVDT$zlyrj`SO@dDOSlR})<8Wk7pSopsG>)gFYL(ag1ySwzh=P9FA;l>RU6n72d6K~mt-Wedx&lF68Y2`S#3C!v_o$(!&Qf*;(2rRxyY93Yx0Q9bj~daouDGLC z`-L0NeOYJlENg4wi9N#Z^)-fiS5PEHeAZRGx*}}c5$ixPr7n!hIsabdTQzI^U~FHI zV$!GEZLv35^`b);@-FABfLqCq>Pb4|8Mnqa8XmyDO*+@!B&q1)2qN7z92*&gzVBDL zDM>!-meJ25zs6U!YnLem>d$Cty*w{geZh&b(U=!O=JIuQu%0Iojk?Y5nX>{T8qCtu zAA)zDO88nX*W@rq;@N<(U+%_LmT<;90`iR!jf>M_UWP*A(W6vAR}FPWs&*>(A}o2z zx?`&DNO3shAe1NB5D+B|xn zb=q&ek?4=Us~qhIpg9HlWNp|)}fDOvI3SaA8znP1X5{I752ri#L(Ff zNMZL)qTaA(GQEBYo?bu8He)vHl^KK z<2!rWriUEDJy7Ufvk}A!SupymURSDuHWtWwV z_cl)Mh~fgI%7`(d);3A7f&l}31@EwvoVin$Ah*l%OBiO8hkG+YM{H@bOoaFe4yXo z%UX?TNB2_YFjk%X$hDCUrDo~UwidV@VdG0}NbJmhshiRN2q=DG^0F!{t^UfycCFAw zh#YK}vU$;8Aca*8t$e8rc^cc4El&4pKQh*57ljs6={2E7kl4Xxq%(gu!#fq!gE~dgqGBYLzzfuOc<=l|SwmoG< zDeWSSV0vH2xSOS~rsFSH*e+&U1FQz$a7|0b+nL-$M3@gdT(Z9~nW#D)^YUvQEsLKL zhg9lvg1T|DW`4N%;(B@6eq64){xN20b|$)9d;z+=&;tZaV`)m@KM%`JDiVkaDF6k< zeXsLO*@zTtLDjI;soqZVGvs|1dFLSIGQSl@`-72U!SXmKy+^adMiWo|Rb3M(rIk`X z+$ZS=k9gRz{Xh{*vb%MjIoZGhbHKh&X1P6$+LJ>kd;S2`<__m$taJG#_mSQ5_yGWG z%HCuEd(puy6x>}rB^D7b^nLm$G#h>Yu-1y^&Gm@$7PH*Ftp6%ZuX~+}Wlb+Zn z>Qp4Bi_%Wa#r@O^_E04fD@I2S$=d2Otg8Wlx`Z2`8ulWNI7qwC?mer%AF%0Uv(Pq3 z;i6vJ>VKw+Ho}}437e$94G~l~EHI~$72S-zQ;#qV6K&bHZQHhO+qP}n?x$_rwr$(C zJ?~tcNhbL&&Sm|9N+o;M+U_a%*7%9%9TsJF!lx5|kIVy{XIfU9A3AnHR(5U&kS#*v z^3y{JKMU{i8iX^$!h*VY1>NySb130Cy)r4dlwZHiN3wmw4CUHEuXW+N*P|bfxM@Bg~T5V zhoO29VfWg^pfOD=c(F2}C_sWN5Ly$1rUnb)t|xOTx>CG@T!~h#bRM*z%3S;wz`#zX z2E>*i!fKbT;Y%oKbNsf{lGp?=eX3bz*DL+9ReU0a=%ZYh=l*9N9{3CW;8}94xCZcT zHv*kpt(l)#s0+&thrQ8lj>IdIGCt(ebU7j>)82rihIMRANjFZvni-pFLlRAU3DF}> zlkr}X3eO$s|ClW0l(0Mjou6|j>`2#&fD$-$AqQ_7cs=s!2~f2b9y7j$$PORM(U=7? z2`SFI@)+!0zp(S^{srGG6*UGKl*V8Kn-Ajy9u;i;r_t;USN|cOYeI=Ci~qr8aVYfg z+O^1!XU&GEfy7j;bc$b3|B_CIryWM1fY#O3R`zPadREGUz6MmcI!%JroRuas<3$b| zVPj0$J#2(aOt>JCbLA-#RlpSLGazyO=O21CcTadMY09hWXMD&hUas$-(hjd?6st3G zSyI7(4?82*ZjJK{k;)nt!(X}A+-ubJl+*M*DTK1Ykf@E(@`2Rh2MYdD!@^iNBF=dI zsvRGNtL~eAkj_B=pm5Sz%IGsfk~X}4;50HA#B8^*sM%21KafrgiJ?Vp8xM^=kO4;x z>g&>Fn2AF&YM2KjToEAHD@Btw!G6iz!~r`75iR-8;r$_(ENPxhu$J_LN;cto^0#4T z0;nU||KUFpx%-1PvEm=oCFeP`^?l0NSulkdP%$ZN!Wl7>{Ckl>G0<*^t@%}%$7_RE z>kG9@o>=txsbGk1MLW0a@2;)+?FX9d3f&Ni6vhr;AUgBLgOj>HUJQZ{bawul>aUZp+BG-2cZM9+JXNAa%ol;Q2 zfzxb62;H83!Sz+OJ5yo1IP_QxtIvGMB$g{6V12I~b%`#`fo2qlbR^)$jLy_A3_o4z zWzmPE3`WkAqkt)`C3eJX=(?3})PuD;1SDmVRLykH?sNi^%_sn=o5Jl=%?C5^a_#1v z6VG+La!i?Dh(#aj-*i^sckMBNsAXELg4Qkd$ZuvknIdyGH+Tx&URTVPbJI#hGTY0O zz{%wH5ZM-zwMXiXL{Qm`H9hV!%aD>4Li#6L4Sn#?vjKeuXon~=zI^1MB{C%b%H|y5 zr;!#_8qt(KT9sL_{9VgtG!ib~ie2BSSSnuh`S>Uz5sbb}HZ6NJ%MhKL({YV;Y0SiV zguOimFiGRRONrVM!*!}_)OH}|Cq~DB1bSZgI>r5?Q8V$)v`catYWY=E9Qn3=$yGT8 zo9CMSNXfNUKq00iy9Xr>kl~_9jKnM)jK(~O@9!~4HMgQjv%P2?eX7wQUkn}Yprqcu zpK!6d`_^ro8Dr?@BCg%aI^1d1{YNjyDVr_#my1m7+9t6RnHEwBW%=9EvX}FJuV84| z6c1vxpAVujqslkNv?cQ2Iy;XO)Yk`GftcTIcI<``VhuTU^A&t12A`qPER`5D3`t0% zW=O>T@W*@G5;Ly5iW>rBHWF_pkvGe)a|@4J?Js2R+tr?DjpAw-0iY2oq|XHB(te+# zn%j^@L5JRg_O^ef)z_O6CJt)BhQ%0jH6lXe36XJiE{qjIKZkzZtbDYE{sp@mV&&0r z^dQ$tRnX5x4;{D*ssC9HuVS4VrsNam8SSGWfZmDDXzS1K{0Z8~A&iS0li z`txRfiv)ultR*t7b`=hslOmN5QP0je09PIVd$!aQ)_)UaaIHg4I7?Y@b`uz2QV!*w zZ#9}04sIErQVMz+Z*7R8Pb-pGGQ{2%Xcy>|d1R7XT?QoOO}KWssTh1AlxKtjXf=`G z_n&ST0-cCQ*!D~=!TGPtdQCq^og|ZB-=(aY9LvxcHZzHrk{E+)eIJm%+?fEDShcy> z*9&Py0HhFj2+-l2mw`0WU()&;A(^NpB@w@f0^0j((mn4!d3u&}3kV3Z(zo|Mc@_ZB zurk@ra|6s>Ksp-_UXhHxGVoOwri@u??)W5TK9sT9-(_55Gt23EVr9CkLFjj|POI+Q zs7nk8KnJaGIW?xbKrZ}MjR>cqm9>ScsJ@{{MEMm$fa|9T?T`rRY$hpZX( zo;L67V|nxh;j#_nj&Ia(q^?YWy5T$phlu7W@?;7MBXceMVb|IGs`T08f66P86U%Iy z-_EqvG-WsC%DD7A^FR>b+|s3xD_nZpI>^d*uFHtzE8JZ5&u|6u7+OL%1F|%u2()Hq za$0Lh)yJz3vT51L36A*t~br1jaeGq>3VZoGon~d? z$$DCGH=ZwJk8M)N5kZvXPu4`A*ln}-6JmqZes5$^L>pL}53jh-E7a+V?3#nhx#1y( z8c)>f*XDx+#A|m45TPHxn*=g&%4;Vln1X3(BJktSGLG1toHkbgLD`)g0iuC_y}thr z>>@NLqVGsR=*bf81ti3f;^oKUkcxOU`fLh1mn{}NylIx@3Mnk4ulJY4YK^vO*LrgI z0rOxP@kD)=I!gW)+bH#}#V;K6JayiGYR+=pft*QH&c-PB7RQhwT@|N!=Phq0eCgm^ z?HUwPpz+--h}KXzW)-$A%rr(k0mw+C-KQ7jQ zdgrBGE|jKBb?Ambfc}fm>qO-Hkh2oMA^jP+=FH2Uez76{goOI=dP1$Tpo7L^@Xisc zC@9k^5vQ^dH;3P`-1+RO-&Vj{iIyLYh#!d}DqR_sA3*8<{CK;F&3YMQYmpZt7CCb&Mk%{K^> zfvmFc~REDt^mO4}#6#5Azs_*wAq5{-~v7*y~l+B|gg9k=&Z{KaYtaNvBf+cnk`yA0v(4yvCRNs$d;0XE#yy8REZncM0i-V6r}>Bj?1fER7P zxp8G6kG~niBhpYXoQI6FS%h1DkbEOgVMISc?qO3j{_?=Y9ssBTy?56NYfbVv8M3vjQ z#k_gvZ4@%ZW2GuN;5eVin1&>$fV>y*6G-GeszD2Uiz)Rj{YUN2ic?KFV9-&?xMLo! z_B+4YtuSXg-zS)7$&_^Y&Ls5m#P)DJWxt$uy}6?p=(m+ML{K{%@p)!*orKjyWT2b4 zW=#j>gQ-OTSuAkKAOn`_u&GASY$Q{%vsS*!zc~|yEBPte^Q_)9cg+o!M2_&i2G!$D z4=#md_P6w}D(%!Ge}E14aPB#TlQGEU0;X*6xZ8v)UV-R@-54iqNM`?P>)|zz8106K z9>G}J%_OvrhUz?8FKLS(-`elJ&IZCtU2vrzAPo3O12_S+_OKP|K*KxGFEhd2JI7Q>D^YJFVYvurYuWB^p5Be=!Y4@o&W=8*0*f$Qr~a>#?tfr z{#|-aMPFHO`6}}(k1$uncJ@BvbQHJ8NU#ahLREQ{r7|^%j^olda_TxYz&8Z8a@iv( z1v1mT%A8cPflpX%4Ety!zI|v~bDT~jtI9s79s*^+k={7gJ(2R3PK%-w&z(u^ zi=#|Z$&HB5%J?taD}N1#1#!}YMNdoa-Y)+#>-O}r*leRcuB!yhRvI)k-zA#t(@DGg zJY)jZ^YsrfNK6a+e{fVx|BItyVgLU`icAD-tW2!`i=$#^Wn%ij=BU)b6_9nBN+cmA z5EJMU3!h0`6ujydvzif*AQC{56o4fZGbspUK`R!cDS}Wg2ndu?h(x_qE1@J((SJ6- ze&5V?v!+Sja+|u`=XQ3u5xZ@S=xR@+90RS5=RF)FKr|4`4Nb_vL4r$2`VAxl^7BEF z?m+*5K_RpT8aPP6YToxeU}Z1f?%kdXcEl2>D$MO$20`kE|xe=mWoX3#mKdd29f*^qjp`e_M`gQ`XC^*m{g9`)2 z6uP-_M0pUwUVt)<4J0`D?)3>nYP*gecB2Rq`uh4nP(qXY4R%sOM*@B&y1D0J;)0ET z_G|m~odQP{P`=w3AOi^I2XQ06fZ4_f?tc|%a3nyw6BszK#9i)1y9OHs4mSjxd4>03 z?KwW-jfePQh6DWdU=fG_Ke)H^cKd(`7y5Dv7%;$2U`4@+7VHE-7NOy~3v4L@;rpWj z3oO?3!xZRo!p`Aef{A+a=|qC}B83Attx5$b6!-Ej9T;d9vE!5b348n~L4KfwyG>}D ztH8iQgNhzF`s=<~5;ZttJgx%s_e@_zjK3E`_*WrFHc=m>gR1J-$B|;6A40Dze=P+= z0R9Z@9C#GKV8Es*0|FiJ33vcb;XYBfiRQ6?vqHXuB0s_rx*2x`Koo&^Kq3Q>{Q*F5 zmf_)sjeQRYL;mExy}<(n0%|2N@XmqUg$n`yM92pnhVi|PP8!Am_GK9w9tZ^P>*n^T z!Hw)YjOO(M|Ni~a(Ty(-FA8jlJh$!lQM$Uy03;C5QG%nSrvL&IB1lT+<2WKd{^wJ? z`jh^$Uj~W#W@f+VM|_!wfKfPrhaB$uk`CL;<0X7PYli^-^9^DQTOV`CpXax=hff0> ziqKE^bAS4C``GjPRZsaVy7^Ge1i!7z+)4*Zo@ zwtmMKwgGV${Ve=VUKtXOOfCp>d;0uBO60IL0oYyvi45=fTM+PH3BcHejtt8(a2VI0 zgHgZ%0{a6O&u0Bc{y;f^Nd1x(ge-osCvH_>h_R;6OioD*9b(ilF96VRKb!-T06+*= zM$NPDt`97TWC4?V4n#5u4M@bG(O>69B>^nRK!SD}E#!en8@X+-p#=y*)NA;6Js^<4 z{%|7TJ%IS`uRuw3c@q7FO<)E&_+|Xdm;fFWw7;e z2M`WOHp)zBrBUZEiMo?uv+053bC_cC>eqqDz~}NXv%lopa!H4WW#)AzwoR$5mhq&Em8MoGAG?r|LCMU)MhKw{B5Tb0r*o zF{~n2%B#zXO5LS;AxqZO=moSpb=xu0E{btP*`x3kRCjV*kv%#vC0-5P*kJhioz9t> z@>*MZe`|0j_xV62|9E-rRII8O2b6Qh%3CLE|IE}34Jf30h%_O(i#mDO6+>e=1%?qa z!9cy_dXhKaP8NN^owl-laF~y z2ln4>bB5Y*j7pwbFMj9uI@(Y7Kw6~w3Joq+W1JM)l(Zyv_sG=d1b3f+zPSB#EukGL z?Yi4-uKqw(0^FQs;CH}dn<>#JWd(aetHH3MG}-8yj$O)WNqymx@??7goz7PL-)3{4 z*D;+8;OX4iBA~6#$#NOLR)2H*bNmOHv$E>+F!C_0W?yWsl+!f!dOn-vY%M06rIt~` z5ax9y9s2@T6W+;qXt$+ety?TvC;H9tU+)ljd2;`&`DoNrPj6-|jpC8gn{36~(hBGG zG-6=yqKmcW2;hx#DC|?yT`YXv-*jAfjI*!CcuU5cOsX;V>QuQp+A)_VG-fX-Mjuy; z&BM7&J4uSFfV{*0wE1mdB%Xgu`$FjP0BaS%9N_0`KrQS_#u~A5#M~C~(Hs17ch8IFXG&8(l zA&a-d6;$LGQ_a&mRRQp0gX)T_05i)x*j9F>rsoh{gGQCQ5yM0BqE}-=zOf^+#A0Pj zoNRk-#?wSPyzY*)P!o4n`3)GH2r&3c2lTet8|lFHEOSF04UA$J!Qv08^66I?{H~LD zHarss`=KE3*$usiUP6a6lN|Bodxzb$e9b2-K+(#ZGI>DTa=f?k(rKz*0G8&R`&Ige ze1%t>SSm0X>!!5!s_Cw>Na?km{_7mB+jq8%`#96CCc|wykx1{Co|X&`KhW;)%Zr0C zUklVYjhiE}?n$p%3;cE&ZB5R_L=ThQrPafBw$UX(%?xdR|880M8y(qcq=cc~oH}1j zJuV_BvPdjxoSgMwX9~EnQ7zR;NZ;k^694Hf00GXfsP+9p_X46SXA&X;R?u={(&|L9 z_z73W^KL}WgSI5VL=$hY0^;92SZZEz7kXZ;?L%45bY!6d#YuN7w1;GYo7{VYBmr-Y zj?6gu7*+~>`m5ThN87QYj-8aJssM#lYtCb5YEd9M=bb=;s?dJ?WSNf^=pzhQ40s2% zu5?ZvSJsL#Pv*J0{+R1OeiylBcawb{Zyy@1{RC2go5l{S!h=Mn3gHqkbWn-jRLLa-W0OT3ET^9ecaT<4rlp#IQD&!cIBLMB> z$^vtWg*K>b*bcL|)<@RQfGfZ!rA=YF7uCyMJ5}mIUk4g7N1o%oU_uXmZ$hsn^SJOT zqm9IukqxVqH93{sH@^WeFPzi$)2M?WdvU@7q;WlktK|s;rcakrE`&)1!;*Q^_(f$(%d! zq3XzytsLf3Rux_m4-hM8du8dLY*0V_thOrk%g%_KB3Wv`zj?{PZ?uGEwMu+h7~>A!5q8F@)%2>wk)_Xg zjB5(!93F^Q2l$St+NEP(D6{uLG233Kb(dl(HW6kmYuU?O z@KApmUZnZ%G>w);3?~#7Y4zBcUe`p3e>eaqbN~F)`&Q)&be^e9j-r5!q)3qOG^l7p zSKx&icHPB$@Z38jOwCq7X6691QZ_1Zd)z7!7JogT)D424IaS$3f?HER4l1fs**F)mAb-7 zF5s-N9~d*?+$|S3=p79*lUDCo*^HmT;bpI}X>3&{g?f;kYu;lEHxRc5zOof|e$;Oy zCbP~A69d)sCV~(0c)LBvn8^r>qu61;*jmMTCeqwwHEF!RZS_5>>1&rCLp%b>V)QL+ zXO%uo{Pt|{P;-2S;7~|QMV@M3HK#8sH!%LdJ4X~ZOJlqmy{nfsyHQT=uvJ&jZeGWD zZLfjn<;gA*{57%VPN1hw^ZE$|@3VLla*zKhVia=npRDO$URhKLFCYuU1`o|v{?)n% z-(6N4+lvnloC}hF#<|v+QfD4uW&L-Gs!r_-xH7qa)0LiCxPE9U+Kb}MW)AXsr^91v zQph}RhSB{^YoCIr)6q)jDsX!wo=8`qdogTWl$KV7+vu-8s>M5>7{f@itL15;!E`QG zIR_=GxImj%IsWZI*GsdXlc&+MJHIR64J10*j#`;9)8jMgyicXQayS&0Ym`xx#5$@D z@?V9;k;&xr9nf5jSa&~+Wq0;&#Ib6!ij^wA%EqMs&Zs%skP4`Uv-2e?bPezC0dWB; zjEM0rY^nCRo^a**a7TA?T=p*Pz+ZrQ%P)yJId%q*w*mtxnJ?HT2YZfpTq!y{L2X6Oys^Tck1QLmQ&lEwy>K7^|{EbwH~`>$g?L0&gkp{ z*abT?e(|Y{vY%)E^i!&6GPCx`@P!RFf5^nmpB+MarUiyOsv1_?TU5A-eb_1Tn~lq? zE(&)vX9<&Z)_d~15i=HF6eT3rI8i*JoxQ4cO9u%XZ2`l zi|E&P$-lCurSy;)8;p0)ODOm!6etJ5MSWYN554;?8iw~|Y@|j562(o!QB)F=C%k2K z=H215f1|M^2Ul+$@T47lS~7*gZ6s_{M1gJEpY2M)(m9=UjCb{OWUD{VI4G6#e8Pb9 z(T_x$B|?4C`GWx}UeTX}%t3A}0;!9Pv7v+T@#JOZdxziDo$sXW$!& znHVqN8BznBoECsilIUtxaWQ7ryE`{nXid0#Ib(6yFL3lHBp*doT7~YDfPhXkQ+3lV zbZnCIX3eeU?;mL|S4{S;@;I7=*jW=a6WZS|ylU=?Bbl$a zzn;vDedbQ?n9Vv8M4f6_*CrY%9B(PyOmIsTcsDMwrj0nqPfcu{S^5mhRl5pl*VzRz z#l4J`ha>%G%*4a@dQVfa<>$QmbG%NLWeB?!w=};m4ywL5z!%N1Z-G zK3_mfnA%)}v7Zs<5t}a_PfyMhO$g0@dZXfTppX`J%szZa0uiX-D+lZJ&RvEs@n^*& zh6bVyOAxP7jSGHCdSe|w=Xk$a!8Zt@xOu_iR-MAki-lP6wRl|DjvI8_*4bDD9?-ie zbIR{heEs@7GBRo}+)##VhGsC|FN3GnoLhP=Sz5-R)oL-Ky~>zq1Ir63_P?OR+_0_= z==9IeFu;+JHbSG{efi7z-)7gxV?Q=t75gd;F$uPpBb$KW+spgLP~_ ziM(!REia{@dytB3u79~`o{{>TK|x2yJLK%`E&}7kaKNjry=L8CVhJ^?M7`v+VsKc>^ug-CbBoT*tD_fzPWOL)qFqPo zy)zo-^8!$-BWvVusPRtSK+vsU{}ViQGfb(h9s#D(AE{lRWIjoTzH3jlh8cZp>HB?( zzQ*RPPE>_3mc!}(O}m6CQEuF}|9BMi+`6=nxKZcSsM;+rAmQx%tc-Vf4vN_ny5kF@ zsl5LSvzuHetb9K!GnVp#eahjnPjBcwNXupZ$`urvHbn0##xP;qJB;exXbrnV%~Vb} z<^84dcoJ}O*mE2w{W;poLk}BalwW5KvV&RIDz@aPCJ^rw6u0li0fyzkv4$#=r7YmZ zN>Atr@tCwOev+4_Q`m8%i`wl5Q>bZO{dfCyfJtpx#Xa^fshqgJmVcpxJt^C-NNhuZ z<(sFi{B_wKQcEycKFiY_zlI$1oSc77V=Jdf`X$sci|G_MK6A&MQbwo618Snu6DnJ6 zceGaY*1XtubVPp%ud|q0?fK=>k~#h`x;Dv2eKj_kZ6WyFpl8h+?q}o!kS|}=4Z`(V z5*SX?f@c3P2kmv>YsFPMotMGiUMaHn^>fznW#v}BxYY)7&MRy<>eK0oo4z;JqK%@P zYH$52=W`i8LYfvRF@)NMO0hCWi9#V>2Y1P#0a%d|`ut-U(?`5@-@1JhKh z&||Xhf<{?V?10NBWDD6M+sJ>RO{v=Ig#X`OJ6@+x^W4+q0`%T)T zz`(x15pMs@BPsB59(d8(nI!ohNwnjYqFcMIFnk`IrEw{h+KEDkA*Y1mO zGO*anW||%}0pD||uq$v{299VVJHdW{!I)b(IE8VWhz%T74=(waSO8YZYDdm7vwkok zhJsz?#^^v~6*&Z`-4Tip5yuSzv8g~jH}*#Q7(i!^0xz*e5pl**b!6F5w7dQKUV9cTgknl<7O+ul#v7LqL>FZuC4*@#v$ePL{q zS*m5sur|gq^{Dz3PWFhs;Q7!jxQJxO2ww9K^mvR7;mb9%qu@ zL^SPY+U89$UEpg$syA666MG7x1FYF>Dct`+ZC;NJFZV8Mi6C4+H_mOAr0K6?_6#hi zE0O0nLJ7@eBNH&GUhG?ZZ_fyO)kBUsP}ySQGD`C@^4F{jDwCGhgyc}yQAw-JZGkW@(Bk&(>qE8YS5a3|V%(XdTlpU4 zMxnN-q@#2&*1YB%+{=U#Dplh2%kpvcj$+YN;^bl`_x{$$Gw?ksPbQ=*(;2SNK1sT} zeqOmULzycBf1Gu{f^%ypR~SOnf9cIS8?3)fGJB?NbDsedkL>}V7=2#rXHDbTS%|H0 ztGNinm=oODZS5fv7*;P@th#Od-kO?7VY$c$wH4SCx{KRiVoIj^;10&u%J>)=#K|}? zRqczt*LQ}l07J3uBvER!EvVW5AUOBi+(Dtc+|7KjK*^-yo#VUQzYA$DU0k!m82)mQ11UH70h_zw)OkmI7t&31dtqb zoystp0>b%DgSbOD?GPOMst_WSKdY=SI;&GCI981Uk51gLb=GSN`J#OKQEEI~nAS5T zrm{KUMoxr#8$TD5?9%h=LqZ4s(A%pcH?(^m{}n^}{$Rt7LQ7Vf>1gg4{ks`T(UhC4 zDMGT3%#zkloBQAu`oL5XXcdY&hBGc|VgeG+Gy3>y-OLAS1ZQ|%^~ru8dczKnur962 z^=>_ov$~1f0k9MM@?`KCGpg~3n7FeZfxninez+Of@dY-y$Y^BoAGc!&ceMb7_cl#! zR7E9fdAKPs^vxJ+&SsM|{1}k_Nz@YM9vIs--i#uqi$Elufoo_prc0)hNn)VCIgw=p zcDrmKphhfdTah^kHaAmK=N5c277aToAZ3ncS1&-$5J{%-*Z<)vt#&YsyPCCe zM-$CAqRkw^$^UzN-k6kKIKBLJO-rnc|F%Qk|3dP#(gn4UmN~}cKc!MoPa?|9G1K(G;*|UPF3TH=R#jD>rWbd}-HJ%a^YM z1&l}A?S)90BCmHR^pZbvoBMg=jk-DrI6uAY*tlh037Z%X2E25HY@pml%+3N@=vH_K--y&=DWaHq7h0vO0A=W091R`sQIi zBN3xpw*t;cHQTXOpIvnE+}QgEMBE!F@jrrHmj4y(vNLi1pD34!fSr?r`TtD+KS2f~ z1LyyBu-gu*vgC`+9w#Bmo&;CSU0l-DMLHHRupSUgcX10Rm;iS{M+#b8Qd}$|fCWhE zQ`Yw8%>VZEm-p0berEIKS39?|_VRNR6RD>iFFlH82CNh!h^XVU-E9QG3CoMjrw2e^ z2f$8G&(DUO%!dI8_!Dc)j2X1OZ-Ak4-G||1M_>(tl-wpp5Q)V=A+UlAw2usM4;lFY z8U64G^zP{?`bRdnJq+-;if5o50LCtW3I;mRka284w`*tM4Gyw6x2F$SEx-nlS6rz{@8RX;q|H6lVQBZ7 zWRndb&oBs>4;>V|%_Gnnpic?RQj;U-uXHqO26WCHaNrO5Y~ThVZ_SPX04dNffPfOG z;uyXev<)C}2f&N!8juDJ@rkbaLk~p0FYgH8v+eW8_$L09PRQ`w*2szg;q2BB<`rD< z3c$?(0y3|pawg_M_#f1!u%$0z4S^H3-1b@zCR}Zs5VBXG4UA&y0a{R>29CPKjRuS$WzekpLgA_pxvc! zx!&1zk0CHndrN@&!H+`{@&j)MF9ZPMDH<9A@&Q1g4loXV-v2Ze*xLqu-geI(Gd-}k z26_rqFI)=v2)-4VfWO}BU=5D|aBvFu`1WHz&?o=@mgQ+PAOO|_ZUh&s|7nO15_I*= zj{kNA_5hf5k3Sp%-~W33oMmx34cZtgM7;4o@6nzrt}!nrFdx1rKl+C>HZsr$(4+TJ z1W2nyp!biBk%J(hp@F>rZO!^q-Mhc`DcV}!=bisf6+$h;fg$`*Lz=w&$_*dQnbiMk zVzc%4@mdTfJi!3f|1$s1d4PHbj4Q=rn0;A;et?DotlHVF?yiqR3gm(S_WF$Lwt#y6;5h=ov!jFwR01S$ega<`*sb?r zP0>T_H@G=E273nT>?FzxK=~g50ZDAnz@C1nWB-MHc!UD*u&vttfkOO&+1L{Ng+357 zvRe~C05yKu>jv`Q|E-IJ83fxPpW{Ph(84QytUdmLADsc;um5#=3EKU^rQ%D55m-Ol zqd;;67u??aTS)Zd1_5*tP`}}?`N#wCC>-yY{KQ}BLyZ5v6#>9CgmHfhgWb0i2#0SF z*$?UquKyZiRRDt%gI~xS{;m+_cV%HK9lTKyZoR#Iyv0HSc?JfoM>{j*dz2gdTAZoY zz_gIBy<&`Rdbz6MOT8xHmWao6#q z@3;-Q%ZR=K&ITkw3w^p+iDb*@z42MaK=K}O84Vv}8uBz`X_j{NMhy(pTETGqDz!4F zJ$Aj7YWl2>Pfm7i4+O#lE(x{tVijCEAW13VtA2Jb91}`47VE5_aX>Xy?}zGmY7qgO zHaK%UUHVcVtXyRb4#X7`0=|)X%XSdp)C9cE+`=*pa7K3Tt|((o)!_Wx@p!;%Cl}#x zSTkdoW?0!M(upy}TY(UK_*xvh0K|mdmgHeE*Dp3oBfBog&oul4Bm_dsBgfa%95E$- zc*rV~6c7pH>q$ag44dzW*~Iki_RIt$C@>;gQ0u6ZylcPZ`}~*uwN(Y^Q1VcOa{iIN z>YhW~gE+=8Dy-FbkuP#od;Rm5DhO9b`2#I!PVla5l^Zuq&Fy&Z|8@X=_cNm z)LzkiK1d=QQ~)3t$hXkl`w1sidJ2r(z>smq9P&;g7k<)Q$FBz2<4=jWO_fOYn9Xir z`S_mOJ-Y@n*eQ#XvwL@r7RhEe#`CR#X7#aC3a3_*NmCg?9=U(|Gn_@=NVX(6Ht1aS z(dZ5qm_}j>IQMvSSbNlWlg*BMf<46|k||WE$P z-ElDny2%(h6OpuzAZ)={+{h_duWM(TKr`8IGq2V2)Qvh-^VXyL^x>|<*ChQwt}RH1 zrk}|YuTx8{V|oHO3_=5KSO}E0f;E>R>bu}T3=4en&>M+xq$$q^`ZOolp@?&x+0aIy zWQ9Kg{WgPPtW^|=(s7ntd8QBYwat zE6LS8wzza5cd-UI)YbWJSo{&8#;#5H(tJ!%MQaH5@}0@)%8dLd@j(93B;ZPI0-NB=nsiXV zNu-SZ9WKUaPqXE{Dx9UrF7^)^3tH}~8p~jX(c~!HEV4weI(az-= zPQJz`gk=O_J+AR+G53{8v$B=&DZ#_qCTHZLnZ)ek(A3=cg(-aUcG_$cMY#j)05!fmfoKv^y@qv*|1@q=GU0m9JaTma`o9w z#L3;X^%DE9nIOqsgU`QH^*k%Jg)ob)ll|=PXlCCCFMBUe^mf|YwP@Ap{AJqO(wQJf zLJBuUYCV-SqP74VI+&y*C87$zq(G=n=icN;lEcLJn`7)fw&Bu^t?aK`R zg0WZc*8)@jo#_c?714%qZha7^%h%0Hjs!?jdfh6pjWIl|_e64gz20D|bZzg}-!9fY ztW(dmonxb34sW?+Oo1Txp7dsP2S}72N?YunMOs2F$f9B%xHF}sFH$upRe^nB0)fp{ zQ_g5Mm~j;K{0^Iwm4haSKY12WI2m@d&s`g?8P7csj^37S)+h6&rhkbBUX0VpAjrRN zop%;Ezj>z48Z8mr-c}vcZyO6A>Y{;ej*b71h;=G5+RMQzm?EIqgbM+R^k+Hh`MiZ@ zWPZnYk07%gyuQ88SD^05>0{rP=*2WEYu@xs`{595>6xB4SQ`)8rBJSYSLESXc5-5u z?oj1%=qW1zMSK#tcU!W-ct_!>G8G3KpOljltki&iUaEooC%yBTv7mjyNcV<5CQo~F z*w4+^0Q)vd-J|=yWIe5<(!MJ^v#JL34VNDZ{skc5f@GK9s3Dxo%`gLK&6A~i^f|1u zPXSloIuvyFl0W8)<%++t#4Eh=czO zk1euf^lM$4n(hKjM$bq%M%yawX*;rWBHAfLZ^HpGux42*hY=GC(mFy?LgTL_xl}QE zA`5sVv`pXR2Yb;y2P^H9Va)|zgMU^|jN z`(*z0y{K)+s4o1>5H1)gq5pPLok#e;56@FsA5bQgNP8n?&Tls>&9+S#bNSW6)X7SM z!2v6`WwBzuD!Y#~aHKUUm|hxGd9k5bRshm!Uih!Y%fWaGUyn5GOmyGUl`k3LpUuMK zy9HRj1ETld*(!(qMI2ev%D18y88ZvsLMZCSgB`9Dp{i*$tF0|Mkm4>oosZUj7*YOl zXueuS3G7nbk==JB5Gr|!2I>p32bWKbhlE?HjCfg$DjL9P1ojPgl<9HlE(i2wjSX`O z&|5;XxJS+{L`a1npNN3h;=pN#XC(~UFTpU2>-??ZYAG4O>4wpLft#S=sfPTVFF^9qb<}WJ z{TrPhNV_~KI1#-ciLvKifqBy=HgpuXUC+&tN0-H7Ygpjnm(=((D1J754vI8g!F$Ir z+hIvW8or%1om!!!huhvmV0`b-35i$rdbbozm$7qroy;{saN`lb^rN0H*Y?XWMZmK=zg->9qeP+Vv4`mli9RRh6b(|h6z$$sOTZ-% zCEJef4vdI0?1AAB@**fNXY6p2?XnC!?-{qpj8cWZeSnhV!8Cc=zWIvAN8jcnDFahN zG}e^0@Is*r)|*e_weqwO-zz8L99^=!`Hb+&7hC92QI*ghAP7|Z-QQdFZ42i3BCu&C zJoJUIaelRTgzW1>rJ`{?&DR<`)}^CyS?f_Z94eR*N}|kOdvdHnJ>j1fjce_&JPW2K*Ty0a@EUqeaQk84)`@tgiC5nit7~t;QLCPE>983ZzUWf( zJ5vk(M&1BCJ(+!$=}Y%gq!&|Cx{CO^gQ{EnoLjB&;3>Nqsko7Sd3~C;$G642gjy_- z@B1l1N&lD!-FXI=J-bN)uQ^rj@xMp2P5DD}8@9(cnY#S6JU9GNbdT0^NlM-Bzg0IV zZZ_KRp6F@kS*_VNB)_=S?;sJUp@BipJu}DT_(khXCNv8O;0||Bl&SKH@GUV<=q1F? zj;ou6@@*KFTml_%s!2Sbn=^VrZRc6KvhYg8da;Ym&e-StytFZIrlmlj5^OG$F5X6$ zurt7zu|lZOo8!nhq;LRxtd$CK*?$Wzh@z3n;XZXOH+X&?3a+D^W$X#`F1;I(a!-pj<;ygdCpF(y zW9*+347L1sW=0QxB_RBH4Xcu4 zqC7#cLq|~s4Vv^0&hp4EqUXsf_c>rxX*L&!&De!Ew>!?FxA*)D+3L(DI z|J@!imND4}Qm>0WqfRpIe`uyWn$hb8b)SiCrFt$nZ_{0hH2Q9(_d-pW5RdpW<_boq zTxp7I$C!j&al@%ZY(wLdi;EH>v zSm~9X`^-iOHEGflA31V%t+~7=iSduLXgf-Q%skqug;8{Xcghz&0X~@*G--9yH^sj% zZRGOA5S%T^GS}aKK|YTDY^#&gTpzd9j?TT@3CbhNSC|{;$a&TmPk!W3Zaj*SU(aq? z!7Z+`L)CTTH`%RfF#0~%gpLFBrBG^&+JQO6$sn%{6``+I&T(ZlgJQ_XisFi1%kGgl+LQzqqMmXqWDTrTNMtb zVn8=|{oE*QTcz2YHBFPGHEJAPrC*UvHnaIq%3|YA3isCJ5(;;sktuJayl1b|W1>#+ z6;n^q=VB>Ic4QlQ$(uGOTGcU8O1Xf%!NE?78R*dHp-@!NT5z*~Sg>Wb6jsD&+=}w0 z4{@!MziH;Qde=YgU&we_UCYk%R*{dFH3IErQ%RrQd_ona_T2)*1Q|@n2hL6ABHLO# za&Mq&b)zJwy6nNbHOwPZJc%gxcJJBD8>2QRn{ zN3kelfF?!&DM!DR;ybi@mFLTq9a)HMYj0w_W{ddKSe2H(dpXDA?foD_pC?!;!de_i zt{x{a>DQg2?g$R_nr)~qWk#Nqo?u{7<|J*vcLCbd<%M_%s>rOHQ=jj9>qcDEI(Utr z@Qz!@heaXbqGQ>{ptavGC!(^esPuCdrpDPZ0DzzM<5lN7?`h&)%nPc>!>1 zZa@5kkBAe_y9D6_m~D)z4M4gqsT&e%pY+d+w?H|%IbMJo|6OTWyH{9%2sf)rKNaeq zX+y7p0hY>B)1u^t3%`GUVLLI|O0uC&1F*K&TdpPsX%2(H;xJ4oIbIzddr@Qjy??Ek~qJI0CrxUG?y)@8daA_E8>qdpE9kYMS?#e0km2A~@#)L_d**7q!(anbQ>!r7HBz zRBK6e21}Akkh=Rl0;w!C9tPn-^L)johYef#ha4*m>ZF(lsDGR@N2SdpcBIs1>1TR7 z00epcNA^4@0^)2c{}wb3^^o+(7lsrze?C6hoZ%TfYSG>N6{4-3C`~~;E=S5?OD2>oAB?c$sZOrb z!Vwb_soX$6f89qyY3YlTNRf^8hO+agXfSJu5KYv*l(d7aH}!J!yc3gDAbXacttx?) zQ_7%`_honQRW5~nZtf64bq93t$W8>VLmv0IB%~h$HG3C+3+A`L)#qd%>OF*7rLNK$zK8F64&XVk8Wj=ZyX={?VsTbedvaS$z*?L!2 z@LUY~076%30ud+L%^Hnes2D@QmE)|-^SNUm zgKdZ|Ty<|td8clq;i^UbhMvKIA5H$@t1cHb5GG)J6}T|A7AM{kE6eQHE005?ub<6T zd)*JliLMS|tEW$*^E}P?xHpk!8Y1k7dh0O@b%tRz_2WvED}l@=yCTremPFGThm-V! zxYBG~`_oxqzAe}le}ms|=|VXTAIOs|zuzUTHL(s{q4|rH1;1E2UHqFvO4z8Wc0k+m zNnh(LOsPA}GqbSSK56vlK?FW#?{U%qtGzLfsQwvs^u}5x{DxU`hmUp((xz4Gsox#) zW!f_vl1J}nQ~Wc9k~zTDw(_bUlV$Ov+HD&#eYjyCP?E8;&jInST6*On@289*-uLyn zUP`ofW-^#vG)tHZeZ>2$bYP2&#GV9-(CvdWKeFut;1)U_`c7f>$|yT34E%fcb|W*Y zz+{j*&UbX{ozjvr8WqiASKFd}X1<#Zk?UpUT)g!&!N_|YFnWN96cxSI(xWyI|PYk_ZwO3Ih_GQXPTne!{`O5E)CVV9}pSwMoOi ziFbEO7!Y1Av1@wa7Q22~QeMNghfcdQu(mu-AZ2RV4ExnIbTl#vq`n);ztG6#Sv)gV zJsn=|J>!(b5V(q($}w_Blc`;v__?id=60}WcFV_|$=TNeqv8rQdsN|+kPlw(!4|e6 zjZ9Aznf+&KhSy;CcHOT2RVuWl%6@qvcrI~On%_0zQ-_e#hBxTh4+n9P;`SON%Vo7Z zx&g=^Bo~lR)6!}Dj`S(amDcq|TZd{z!^v|u7QRd16Z{{0l&>7wL00r7rZs#mY8Ywy5Ub0RlE+VN)_V{l-Y(T&lS6G@58t=JYr0 zF^NpQ=$AWH=x<8JQ(=_IHu8$|g?rvus`sZ2Pf(@IJ|uInU16g(jBDN7k^3sa&N(iQ zes=IPoLZ6v2!aumBuUg%#VTS^r@he&i?hnT2o&YB)n@h!vG6d(8|#DKa{RJk)4l^I zKaC1G_Lo0n5K&5nNTMl!cVpI&;di`6L(Hzn!!Rzahfi^dss_5B6lVs-Pja;;_{ug)eQDzn@=MOCqUOV??6jC%WZliXt0U)362(AWP0az z1_wI%2#spIYsPo$-#q4nba_|Kv=j&YkHDIvu9Q1daT=)&lWu~C%@#>MPE;JX?fK+z z8k{CzI=5e@pr(R{6AG5@IeSn;L2trZNgfJ29Rd8sSE6rnx#Hy^tj(k3Ue}xrIha7l z4@nh)5ihZ%XD%!}ZpQcIz`X2awtH&I-xtE^!XdMEPc(g?sI@S9De;O9;2+Fj;b8F_ zOHydJHcI+sB)@AGgIpId0mM@arhMnH+>|O=aI4U^U zB`=F`IoE{Lcu>>I{wZ>OJX8U#lf7q#M3dsZ*3Hwliq9|C#1tude4s??ODd5#k%P3J zWP753wV|2xFLieaC^vHWae3Z<)H$AY>+GQLDA9Aff=)pe%2KiG`a@HeIF zy1lH;B|I6I-#TOAtM4b7Qi*fZX2$qVVs+!rYVL6@2R&nTm6s#&pjFluBC6e_={wv< z`!ocB+X;Ocj|P7+l8aqZ(^aw*S?=xmJN0`A>6z9L+}I#K9ZN+1wOx9;-sf_zzYkig zP1H0!WNr?-zh08Mm(w)ZeuqACpcWjJ<3GatKKW17Z#m~g(k~NUa(m{F(7aT<)yJ)A zTV0W2LT@d5TE9%js~Gp`ga`%7(wdb#&?(?LD)reV`jr|BT44cevuf|qQ6h zrc&!hY|1~rP_&~JS7#xTy1y%i(LN2C)z~BtsJ`3LCC#k%;iS4QL<@4u>-x?{c_Q)h zgsJROiI^xpPI_K;gnVOJ7Cwcpe)@KM$ZSyodF7l1I#~;Xtsw*DhSt*;TYTW@H_?dt zTGu$(SE*WF)@B=*W?5omec*tF@L3hL-HXVxU&EL@`f<9hK6_CcUApn~2(GWw zPc|C0yIxIk!<=#d%|noSj05O#)D`Wl_{}4XEP(q-{nzmG-Ts;coL*fTMxRPocBS@^ z-eW&5;dsG*Cn@tOIcT|x+vW2uBHu=j_nJP4{G#;S(xy62T7nN5K7EhyX5JGDM`^#> zM&{jiQPV+52MK8qstyZ!$^+S}@URMn%cIk7>_?5j$+K;al@FeOWBHUpsg+qX4n`+G zL$m5uTJX&6NBe_@DX#Dq0!mEQM}42j;nnSx-I28fxuy{l+3Hr=WElZ1eY^SR>IiSy z@{kvL-*rcRFFiyaRe}toOaHH=~y@PqTF>}iHMXY*armM)w^c(9j?h;|LRSJ zq_C%EKkC?6|0R@d0?z>yf`gI;Wf7pBzGCV)$xRP!#hwO=x;07{61SH+&$3WMGJ*PJ z4?RZC?q$*Hh@roI&lx}3`av+`&2DCy;6eRB$;kHI2R=LUj#tv&UrVShUx(5xWeadV zFE@L2@B4f<37T!%fdJ5dAl?y5(tDzwbq_10MiGiZnX3Xhjy{N*Yws=sYN=o=rT-&r zm zZ#eaM63y7R&60pSBc7jRImm4>#g}GhekI-s6llljdW@aS3VW~NL6aS@1{CTXNQ4gkIVTDY5Lm#H!wVSwup~ePR%Q_a(0~F&Ne7IOfPer50Run% zxrr2!06ZhOx3LNa;T1%Q4C+X7oFvTYQMOm%VksYAsROX5fC2XU>D_MKIRwTa!-9kf zAp*caY{8v}R6>Ms0KFtAaKc_b)S>Joxmh+ffj}Pa?)JkuIvqAtHI z%4ZVthe z6Mv^cffK)iH-;7f5CcDpiiQpWGN1uYAzOjJrRr}_U_Na@euEfczI<{L=K!!_AT#i{ z!NT5&?;S%sh5;;6uxIAa{IP$0i2#EER~;y@3_zPhi6wp};{pxY{O2+biudyXkO5;} z1^@!}`+s{f4UbJ@ML4{^kAA%S2zZHwX=Pda_+x&JlT$^#fP4ZB5CQJ`ATR*XMtDX> zKmq-JCxbrgQx5>Yx`W7&icK*B zKJhQIp&}pz7~$`JUxB{h-uo|p)YE>`pMJXuMcKv2_snkgUVevR9D}&MzE1)Yo3OE( z(Xb?DoCtolo+1CvQ&|tx9N5YI+Nuf)Gh7h{H;hW)K%kwXBm4vp>!KdyF<9GjU|GG2 zOZdkFDGwvY1y~a69`yHdWKaP9OZ_x#gS!lPuj()Yd)WdRrdjE~q9Q1exYFMuryzm~ zC{RG4M*@tQ01*`d;4_%X(i-ORzrqGM6eswTfdCLzj6VPtN%YRDMhF0L7t>X{;YXy86CS;iQ)UL(A`{VobEgCS$W2lgn zNlEe~SmEnxvGgj1Nd~{4ZWuIrl2(2%wv$L?QFn31=XM#ADRo5Gl(u@(4z|?7-wTDG z{a|{y^UHT=xtWLCl2DZsKBjc8m&yy$ch3L2X$NiGD#o-K7gfruTgoG`VAyGC8D#Fn zelfW`&(H{07|VVInoe7XJ@)DjAQw<%kMg%t=3FClI}JC-g4iw2^eG^bQWOVuieyJt z|K$FhRE~w`My8E2>|on^g@n%7z^{+KyPRA0{H;{WJk-LIgB3Wooyz;hz=$5$R;bOY zml`d-W`ey_o$|gN6DoJ86U`yFihOWp2FFns21qWz$m~-P(}To~Nb0KW(!$nwX+42J zJ)h)L-kv__EJtaWJxk$(QVRh`A|InC2fOP;l_z1LY1ipVrSfI^&?V|qVjpfKf|y*Y z{`GEkUk|)pXE(^OWxDVIR<<^}qBQm;#^X$k@+|^0dk!1%o$*Msm-E?*Ngwx*r-sa+ zcz69(nG?)>MTOnujvEh?PX3$<*XG6mTvpOEhyoch(=E^BP<=;qX@g-?jiF;s3y-g^gqMq9S=u zwPp?nxFg4H*hz<*zgVnT)5T;iap&$IxhEL)DJ=M=mk~q}+Df)sCQOvP-DAa@`ggxe zAW1_h|Mjq~dxNYDTDrc8A6&WTX(zY)a?BLmXZOxt#YG?s|FH;hjwYj{^v>23*~L^~ zi?79D_TaD^CFNL7VT)ZC3PoRZ>J!D=DJDVN=@sd2@8}al%wj!A-W>K?y*zdg8vkDd zP*@Zgfmd=( zbP?+VN-N~XXH{6Bv^=)#1qY|3m-xQuG!dgFh267)`{>cS?zwYEcCujZ{v>|G4oFH^r^3guWlpxDJ>2m4Vl6B#YddVo^N_|Fk zQL^)3JVADraI=IXxVC(!Sj-ykU-zn460DXxS&_|R z*1=IcotaN^c;4?c(kQ2^z-p;@W)zan8RwZof2Utlb4 z;K9-9R4jK~`vuT+Cd_dVZ>?@8PMBK}z$!HFC5+8D@;6E8j$*5Ud6t0+1tyQVpaLyF z(rHT5=1Bja`3H|SYBx!~ZA>cpzN$XLBRhq|?6h?XmB`Cp;ps8r}uN-=om)iG6}wmgjhsNqfNc$=5L_-41Dt?Hc~0Q@@2L;M(!3# z&1A@sQ0f&kT?5L8#IN->wqDL?=iA$??8y5sWlZF-Dn3~GJbF5OoD!~z=oI|$;JS8N z^7KO~FIJ<`{LQ6aP?!;-x2poG!YBr<+FU-dX8&ph(jnO{9ep`s@|Mhj4qjcGf7qvU>}$cHc%_o-l#L1XudfNGyLPWc?GC>`B%`cr|{{ z{!T0ml4B9HCbS!p_hdqR_;T@Vtq}DzJumQ4 zizG;iY5a=6cC!5EX_b^{VdMw%8=o`kZ&Fbb=ueJ+90J3Ey_HXlH=z9$ok8HBZQ9gy zi|wWGjJa*Ua6{5~7}wXXYFNYE?|Gz1PhxaR=x6aI%;sO<6;;HNpW&{!?Idt)k=e=D z+EYF|MeKRlS!l+rC8L>6EUM@Z93`I`otXK?&2*8qpq9eolxyF>9Wi1hqJ0na8B6q; zomw;OKgA;KRn0)#P*J!t`!(0J40Dnk02^NvCs%4;HBC5BhLW+F1G6R0=Qyd#Gih`4 zZ*vqa_=iT+d{12okx^_{%%z^IClDGjfOH%W0KP()_$m&Ko0al1?lI=c^tP*M|3$rZ zTTSWse$zfE$tm(VNRPmdRS>t?laRGP)OoJdUA6F5Gmt$?5wCs&r#VaDx% zW7Q4HIXo8c;5V-zxZ9N?HZ`PYi{_f82l@eN@38K6;gRD;+o8tsVRWl&oj%>oS2Yo3 zQJS<5Q;LQNz_vOv(UB{(OLg9FLuu@h`wU{MD2Jl9> z$lQlWPbZ|6aoS_5X#W)lWM-GXm|kEP2@ZC^zP)PVLZs9-4?txV@?O#2bb{R5n7iDhnv~<7Q=uc$ApN%~<1Ei7XTGKpWQkn@|6Q9J2GStvVAkg1SC zO?k{O&v1JaiVoSUpJX}k?sg0LH zv}ZFyiEZc!NDmGhxN~{(uhM?U*|ylt^b3Ajz5ovWFdli*k?NQwXtXm0*CH0IagTv< z&UF=A;Q`^H>#c0IqosnJonI*IAEUttpC+%3+TVPi=H1bJ&e;PH;C!aEQNUkQqtoQW zqHBudTEa+DK`!8WjEnXsmvR1E+89iL@9K=x zpb&G(s`&PYH;!Bg*2|$Z?;|Um;XXtEv@BXLj0VETh%?1}Vebc=(o34FF=@Bmm*Ccd zw3W8K2hw^`&fQ!4!)wRkSp}A5#@$51pZmDt-0>u0zOh}YA8^iEX*n@~kGu8batt^p zXSNp{+3N`n^6@zMdw8boKg(wem{hPO$wovzb&K+~U2w~(2P=Lm^Eay>PYrhQ<_2}n zQ@HamX(_)i4@twH;i4K|)?eA+@_uMVQ%&_mRwVsPa8wD1?$Q;~oqw*Uk?X5z@}OEu zR6xJClZ>w>^&a!CZFgvLk+6B;A~LU09Q0RASsHCBOk>|Y*|uhpr`ISByzl>_260lL z%BsvBW1X=^LDRjs6W^~)``#~C#6#G1vL3}M{e5awS7Pul!ui&siRd0!rPnj|F-w{Q z(9j+H{NCOSH7l7i;jUWpN#SwgN;3M#D_D+1<@Ztr_}ta`8=>?lEmeX5GUu5z;noeR+ku}2MWR}#6YOl4#rur|mEz+mB(_qmb&hAMXyx%X&&CUJrQ@xH?`GCB)Z zTZfK}@D%_uBiowyBqr^b8*m;iatQ1fkZTL)%MlAnjT9Ma7x_Qr@6Z^X3>H1MO+a-& z7wdMFbc!98s~2&R`e2tmq&e!{Z!T$`v7G4P>IgRze!K8-l` z#{%-)V!RnFZwCvXfu=PKzo|2+altt!rVPE$NiTmihc_s6PX6L8;|~PZP?Z`MH-WSL z+tWdGgOaW99BHvT-=buVhGOB4tzOOf#qv7v4K@lsOFwtm|sQv3|a7USQGipnR4@2Ql^<%?a@U!#HkN z1=&;;GV<&{ggcF}&bpwK7E#9@->B~U2TydB>XhL$F#*dCp+}gQp4C-vMl8;EwUFs; zv)!AdCAz53pG&1}`t^(A@H^h_?~q9p$-`W7Q54I1+vQm-7~e+@E?DEb!MMi}I@Xas zw7K<&Nyyf7a!YeAb+a?8m>%z{ZMDob5lqWQEJ@ChwTz@iB%r^f<@Jf}69y$L-cuLj zUq)MJ@cIrfk{t8Q8F~D~AF_1e9fwwRZh-Dl3P^VK99ofq$-lr1P;t}3@X-s%F@k93 ztQeeqGfvF+y@Ubg_L~oyO+5GFawR2_tbA%?H^ffy#(8x2a|OaCZdc}0{|s5x?DBX` z@w6*v-SS6ojqM+?X7H%quE3X59rN{YXEDT-c4}jrmisMqHSr{v4_tQ_WyrZwyXhJMWy5(8DpzZQnl8cvXr;n=%JR2+=ErjexFapGhDlB9Mx}g zlj@uOOV_LU6I}OERF{{Tp$Joc`@3)uMG@kMD69Oa9R6OLBok~3?k*J0mCi(`p6vHLa9$6r5zOVoAI36a8NnNy z19|r>W4n=^lZyPfE9``d@i_fzZJU&33 zBzDk57uX9=A5;<^bUFsz)SMSB)K<;G%}XQgb%fR-wHX;v6OWo^aMRzHPwAIP7k0WN zpaY|r`WF$6QDDm^g(hp=e^JdzdabRo+K7}Yi*U-FG$?yBx!Nnu(1@zdxsm0xX9JoSsC9JL_%~YRr zjm9(PJ1h5W&l8AmGB6EXhYSpJ7Ks4b*uYA<<5wZpX_S{HpnfI;b3~H6^1Lx8q3te` zwkt;=K58g?C$)Zp%_EehigU*y$h&4rJYPdF;x&4l93U;Mf5a80V5I55OP7U_lTg$Y zIm(7CKrbw{PzS~C-?4n1vgMWaiI*kkkCSdwe zqbHzok9!QJ^`_WYZS^gedxkf(-}Xe2iN#KtvQG9X_B-?Z){C(57b#eKcz*6->X9q0n6DERdY70 zxKd1=31=kd*S%xU8uIg!nV}fmy#3P}4S2vxki30HyEf(}9zv8(=b*19W2u%{oiBhY zM9h_gb|*;`GwI-7S62{schzS2P{naJZzYmaMKFJX5HwkY_EqgkY=`r`)}<3Yo}}8{ zNo!(Gd9%N%jr!MGd{FK;?c@HUzC8j*TSbxgWA_<*=&@N)*OiGFH_|H% zJtV8I&ol>{wqHGixp~9h&&4DxDK?)^GXq#H{fG`be1v6xT3d=gljDWXY-Z7M>ZhO? z?SRNf)F-{x=i<+dd%GuAF0Z4aNnBU8RIVm8CCvD*d}r>Y_1ZopW$uhS%T8xg4LCx!2m!l3+UNL!~YG@o}KJvhuT!yi&NLEW8eRp>fHf z7XyEL4_7pv?6&RQKhjH^+Gl0*UFwj(rbDR(-s0kOZP~eZN$_O@Y);m`w<9`hL|T5W zgY-{+dUiKQ?!n9OD@fo_4&8p9I%~6QxSh=7RbIE$msGf^1$F=rL90i{E2Sn?6as>e zS2=ILDzliC%o)Lb>=H(T9n;|RJ%_G+uoD|tsAn zJF>Pn%kK#}=jk6SkAP;4AKbmcwa2dDo-8Oev-I;lnYV5n&vjBqcEnTlWY4$CGo@bH zQJS=RYZ?}Qqa(lQ3e{{^%J9D9n{(Bs5l^k76UJVyS>YNlp8O2GY11b{Zt>h`gO%ns zExY1@p+~i0xFiMlAvd_xJ|7zg5G~p>yc~S%^r!8aO-{?O6hGSjW`1DjM2s7=Sn37 zRJ@N#5^m-6aI1bVojBK@?ai$-3xgCNH4GQY_E946Pf6ER;Ef)>W7mO@XIhVuut?9_ z{SH!l^`Dthoi^R(%PDTw+b7I%sfgyiriLAb(M+^5Nxa;!DDG7`L;d6On~kd8Bpl6$ zO`mr*h4wVvTO`Fu(Yo(dipIYQaIFhIe_Op7a+#D2#L}BZJhH2_rx?B_@GDkxD%Q=? ztt|1xv2H@TTr5ynXo+FK{m>yLzAG|TFWd|@9~x6l$GOTnSoTZj*Gf2ywd~><2Iw42 zVT+g;6~#rLDB-q;Qmw`T(!FnJ>e^nIqM=ta`=q?@deLCJ(-#QTh)`1ndLuP+cPn8GW?Gn5R{=gR6f!89l3HM zZ#a2HFY=wTqCBxYPpo;yx>+QoOtUwP*_gfKGkM2IrXE9FsFb91dsUT4a)e5YNJ8+2 zt|cfgQXHsm#&?+Uc;~+Ry!+q#cz=Jj`de3j+gI%|>F=JHOdlHsGJ*>`D079Hf$;R) z@LaG8{$!wF#R@X9y@)v=^4O7M)`yldfe6?339L&SLXg|QA=CjGQ8=VV1LQ)UpQ=5b zsIg!fELfx+3CLK1;uJ81!1GWHpb8sePOjr6QK9iwr;?5i-<$QGsrVMI0efQ>hjzC=fKp1W5+lq-Owt20;U&rwuML zWWcMW3{pwJ#48pj#00h@B*t>}FaXvClrRv$5QTsjGZBfzJs46FQ~}eYyEkY7{WHRR zRSbZ{u!bv8K1aS7D^iU_P{D<{_sGxyDwG0&1p*QQ30}c*hr)}?27oxJ4oHv)L%dhO z2#F{V04ng~0^#_}3P6Mrcb|M;5&@E~Xc8eYnphJ zMYUg>GGIlnYf8o;A|}BAiGaYeFChYe>|X+bi2??!LlsU;+~?xsOZ?{|ApXt)0Ro(m z;=OHS{Q_p^laD zg)RRhV>~&(p`P81%>To9(^~9BLUE!?vn4@xzRiq8d)A7IB9jKfhJ8be-F9@}Ica7+VNUn-`I4&z-jY<>vS+a~1UEx9p1?OGhXPc<&zTEf;T^IHZU5 zJUx^QM~brq2Jd;&xe54MGU4`gOLK)J&CNVdl(<;qR=%ZHN{-6fn;Y@Lo)+J<+ZPAD zwzdh7`LN<=*4ac_$Qygj*^TTg7ReLBIoU1fl*kj`QuEC^6}FwOY^#j+3}xS25u82~ z`?E`mv-0_$2iS#?M$R?<4|9A(R=bfYVBaEMnos$xwI>|tUg3DI7ZZ}O;&EC2wN3bw z>#y%PKQ%E!dTWX^)y5Z7spXG%U`SkGZFz6?4f}cv-utWq`y^zO~&mdi)A_9{`M~`Y84b`*}HeW@ry2 z5oXS3pX#N5ElS;rnJ$_%OG<>PA^I9ybPdjL+0Oq_rA-QHo@`RHoqF5ZhAk%VdgsKv+(zD zExWBxg1$e&hpf6faZ{-NI2>nw-$M&C$LF<67>sZNoB!#Cn5a(lnfy#r z-XiZUE=}qSY?p60@QpE9&Xb_-15K9;-$wqNUA5%?(qDiv+(43 zzpKt0Yu2qiXhDWE_YS~2W&W}rILzjhm!Jb^Xj;h-sv)wRI>8Fy+ zaD9@!wnb+5CUzVs4-)VCnF(6V87y|rv_yNeU1f8!F{*Su>{D{DR7a&)eGP1Zx)o=E zLGwBt8#@K*hq2dF$#e_Mp*Zd1_iMa*&S%|M^csEb&y(a(0(SaW?)^4f(j6A&L67Zg z3Kg7X_nzsD(T0zTGe&{c6X_*W3|Io4%(= zlXag<*zISvNXD;_*5-WIBCfcYy74LP&nIBJX8CTdoS|sHx)Z1U3uy3eaQgoxLF|nG zjRYw;*_*f;n>rCtIGC8Jm}<~6(y=hmF|trX(F;478oJp3d-Cu0e_dMI+ld&um=aKk za4|73vNAF;GyXesFtYr!r6gxyApbW`*52g*Z-|PMp@V~|2?4#Bp^dXC6up9qs0N*w ztBsA3p`G3T#-d{J&-LKn@&90=Ay6}Qa{l*e0_K0n7+E=)*l3yPSpN&^{{cuKW9eo} z@DCF+C4serp^JsHsUd;e|N9yn9U~nhB@`bYfvKH|y^$3ZBkO;g0u@gOQv!NeuT@K18G9VbZ>@fP+?mSM?Tk8c2!YyWr&Q?F zx`L(d#=44@DxC#ZMbJZPo2re4R!vgzoI>T;W}afIQec{;mzX-HGG(~CG)Hn}O!0Hx zE~H@0(}==iYGkTvRi#XoO+mRWm3hvkW)UW~vZk(Qsg9}a3GSbkx(7`?F3zchxLSNg zf4d65v4#pQWkr)zwY;~W&l$&F@c3_5!1wAlt9GyQTYJ6CT{vy&%af-P+Frb${kSo8 zqyM|Svp^i;ub z)-PQ?+9b0#)h|N*9vA|>;VF|6gZe|urD2dDFU<;LEK@_RUhbJX;HtA*I}92aV!xiO z)Zba*k6f9-pe0dy^d=4pfxMo$jykx$rF**K7}^~@po|Z=U8VaU_>@iJLEINEnw4s^i8a9;-X@W5^W*?;=bURx~-ruGM@pO$wXBfKXkd;<>~&FwO|^I8I0Ecwg;%CXu4Au$!+3{3C$>yNziJ{#v_GSQJl>(hjB9| zHv@5@Ii^Iz6d`o^3M}8rCc}7x1t-<2lAH$Ol#-6BB_wPO65XRhB;k`#j{{gzp|Nul zgEYP&GGzlI8<VJ=hP%%jSYI9xW8gM1x0k&V;1@@;n&$QaMaH!Y}i@?wx-W`_uR86vpO3Wf_|+N3?j4zw>Ro$1Da^36?Fc!Flj zlx8UfdRW4E)b-BVh?FojbNN$4rsN8_O`~YPVO{F>Qc zrScPG@T2@SLoYeJR}vja8jR~UC`5*)n%E*;i+JyJmnw7)BTbVz^J$pj(1HVD1vLpq zDwq^eFlDlCsJ;7hg_v!)jo4E&_cK#>fvNq|UCds&m@7Hz>1ph8t13a$)2t%2@$*G+ zQ?Uc(P;*mbBP)s&hxv2Ohdg8qd|*-fsOajEPDLrPFugm4$iPLwvuChCRwo81$g~UO zaG~@o5LIz_Li$oNGaZC`?Kp&#U$djBmk&*5LEMQ6)R2n5_ zIv9m6Cn*IANQGB69d0f;+B|8)Jb)dg{rf~`9P6!u4j}|6#YfiQ6HkH~6iU4;AYYhA z-C#M&*d$`}PC23y8gCxWtFfYh#P{>u`}tl-1uvxO{I%J$sSelm@YV{uYa_I=Q+9+u zO|e|l?aX<^l-N8=l3CUUwvU!#RWeO-ZOa3;r7c57c`3d1k$uV&@vlVBh*CZ-V+^-2^E@Ia#E$LfoRs#8J683tN-+9#r7m_}`De7pC zrg5Tuc>(o;Nts)Y4b9Btk^>Qm zSb3pIs>L1$sb8`%Uxa(6Gk23yG1<`sQ3A}{8KJiUS!471IBOEnVd>h@>;;Wuj;K7= z7bB?~J~`?R`O1029C1{h*FI@6m~L{&olZwDMlx;GwqX!5uyr`CruSWM0u&xn7;gfR zR7<85yb5s#AGilxcNY`Qm5$#ssMw~cvv4EKv2Z%6RWX3-F~<-UG9Em}Co7$}?-V6i zeQ}Awb(u%fI+5wT9FG;Gk~MA9c!M_Y1^R82lR-yDa(dL?IyI&nAhRb}^|X!;I7c-4 z>T<`9?p(b=Bch5WYU<41!lE-Y!>&uib%Se!ne58u1v`EG9Q{1stf zVc*S?rlw*Uw@a!pCxxzhd+w&R*I+F`mDx- z|48yymj9;8^5P>~fs>OT35kLLVph@!XIrb6LostFMzhtS30OBYzV`lu=aQf5Y&Np%Q2{q5%YKB= z$^ENsiF72X?{*BEZ+UJ!+6(96zYoU-pRa4&HS?^NYZ{W>lwb^mK< zr|y2()iy{(`Rm261vmV%U+q_TwfTyA#)MBxN)30Ytc;D}U*5d>;r3sf>P_BUpZ-=- zTtLV}#BC{pIo2zSqLrqC-^BgKj1GX z)2k=`TC+6LV&?Jbv0u(tnxU#@-ZeRYzMRavKc6nY+x_F`JCj{rcUaYx)(PWh^cjs^U*D*X>wo=htyvN-&*!)7x$vJYWyz23PL6A@cp+q-*;z41c>CFBN37dd^1Z#X zv^S~r@+um6d99^2RUPAiZP1d)qhu+%mSrdF#=kho0ZE1I<_N~1+ z=UY+bMa8{IS=+W0yZo7X zRAB#`tGEO>sF2H5aZC1$wVzX=$dQLL_bVxBXvBz!9#!u@?VRty(I^T$3{&Vvb0@F% zf~`wRWw#tV#hQ?x>fAIlbKjqHp63`Fd>KB8EC|@@s`KvX+!tNbGCG#M=-Bnbu!ntL ztw^+zz|7Y2?MkZKI#;}ijd~-nHN0PPqxX^da`H#!o#>i#L!4bnpX*V8Oy?qvPLC;_ zDX+qhA9-@L)alH?0P*{?G;`uf~T{qfG#Ui;R@ia&8bSL#`3yF2Si{_)bI z0=%);c|6aTDhss*%JcYZ|6x}6AvFs#(uzwGi%KerfYD@VYG4GMQ&LrR^>^a}069qq Ag8%>k literal 0 HcmV?d00001 diff --git a/forLater/robnotes.tex b/forLater/robnotes.tex new file mode 100644 index 00000000..dd6ff47b --- /dev/null +++ b/forLater/robnotes.tex @@ -0,0 +1,287 @@ +\documentclass{article} + +\usepackage[round]{natbib} +\usepackage{amsmath,amssymb,amsthm,bm,enumerate,mathrsfs,mathtools} +\usepackage{latexsym,color,verbatim,multirow} +\usepackage{graphicx} +\usepackage{caption} +\usepackage{subcaption} + + +\def\be {{\bf 1}} + + +\newcommand{\real}{\mathbb{R}} +\begin{document} +\title{Post-selection inference for generalized regression} +\author{Jonathan Taylor and Robert Tibshirani} + +\maketitle + +\begin{abstract} +\end{abstract} + + + + +\section{Introduction} + +\begin{itemize} +\item Data $(x_i, y_i), i=1,2,\ldots N$ with $x_i=(x_{i1},x_{i2},\ldots x_{ip})$. +Let $X= \{x_{ij}\}$ be the data matrix. + + +\item Generalized regression model with linear predictor $\eta=\beta_0+X\beta$ and log-likelihood +$\ell(\beta_0,\beta)$. Consider the objective function +\begin{eqnarray} +J(\beta_0,\beta)=-\ell(\beta_0,\beta) +\lambda\cdot \sum_1^p |\beta_j| +\label{eqn:obj}) +\end{eqnarray} +\item Let $\hat\beta_0, \hat\beta_1$ be the minimizers of $J(\beta_0, \beta)$. We wish to carry out post-selection inference +for any functional $\gamma^T\beta$. +\item Leading example: logistic regression. $\pi=E(Y|x)$; $\log \pi/(1-\pi)=\beta_0+X\beta$. +$\ell(\beta_0,\beta)=\sum [y_i \log(\pi_i)+(1-y_i)\log(1-\pi_i)]$. + +\item Background: Gaussian case. Selected model $M$ with sign vector $s$, +the KKT conditions state that $\{\hat M,\hat s \} = (M,s)$ if and only if there exists $\beta$ and $u$ satisfying +\begin{eqnarray} +X_M^(X_M^T\beta- y) +\lambda s)+\lambda s&=&0 \cr +X_{-M}^T(X_M^T \beta-y)+ \lambda s)+\lambda u&=&0 \cr +{\rm sign}(\beta)&=&s \cr +||u||_\infty &<& 1 +\end{eqnarray} +This allows us to write the set of response $y$ that yield the same $M$ and $s$ in the polyhedral form +\begin{equation} +\Bigl\{ \begin{pmatrix} A_0(M,s) \cr + A_1(M,s) + \end{pmatrix} + y < + \begin{pmatrix} b_0(M,s) \cr + b_1(M,s) + \end{pmatrix} + \Bigr\} + \end{equation} + + + + +\item A convenient strategy for minimizing (\ref{eqn:obj}) to express the usual +Newton-Raphson update as an iterative reweighted least squares (IRLS) step, and then +replace the weighted least squares step by a constrained weighted least squares +procedure. + +We define $u= \partial\ell/\partial\eta$, +$W=-\partial^2 l/\partial\eta\eta^T$ and $z=\eta+W^{-1}u$ +Then a one-term Taylor series expansion for $\ell(\beta)$ has the form +\begin{eqnarray} +(z-\eta)^T W(z-\eta) +\label{taylor} +\end{eqnarray} +Hence to minimize (\ref{eqn:obj}) we use the following + procedure: +\begin{enumerate} +\item Fix $s$ and initialize $\hat\beta=0$ +\item Compute $\eta, W$ and $z$ based on the current value of $\hat\beta$ +\item Minimize $(z-\beta_0-X\beta)^TW (z-\beta_0-X\beta)+\lambda\cdot\sum|\beta_j|$ +\item Repeat steps (2) and (3) until $\hat\beta_0, \hat\beta$ don't change. +\end{enumerate} + +\item KKT +$$ -X_M^T W(z- \beta_0-X_M^T\beta)+\lambda s=0$$ +\item +$$\hat\beta=(X_M^TW X_M)^{-1}(X_M^TW z-\lambda s) \;(active) $$ +$$-X_{-M}^T W (z-X_M\beta)+\lambda u =0, ||u||_\infty <1 \;(inactive) $$ +\begin{eqnarray} +u&=&X_{-M}^T W P_MW^{-1}(X_M^T)^+s+ \frac{1}{\lambda} X_{-M}^T W(I-P_M)z +\end{eqnarray} + +\item For active variables, +${\rm diag(s)}\beta>0$ implies +$D(X_M^TXW_M)^{-1}(X_m^TW z-\lambda s)>0$. +where $D={\rm diag(}s)$. + +Hence +$A_1=-D(X_M^TWX_M)^{-1}X_M^TW, b_1=-D(X_M^TWX_M)^{-1}\lambda s$ + +For inactive variables, +$A_0=\frac{1}{\lambda} +\begin{pmatrix} + X_{-M}^T W) \\ + -X_{-M}^T W + \end{pmatrix} + $, $b_0= \begin{pmatrix} + \be+X_{-M}^TWX_M\hat\beta/\lambda \\ + \be-X_{-M}^TWX_M\hat\beta/\lambda + \end{pmatrix} + $ + + Finally, let $A=\begin{pmatrix} A_1\\ A_0 \end{pmatrix} +, b=(b_1,b_0)$ + +\item Idea: take $z\sim N(\mu,W^{-1})$ and apply polyhedral lemma to region $A z \leq b$. +Potential problem: $A,b, z$ depend on $\hat\beta$; and region $A z \leq b$ does not correspond to set +\item Logistic regression: KKT +$$z=X\beta+\frac{y-\hat p}{\hat p(1-\hat p)}$$ +$$ -X_M^T W(z- X_M^T\beta)+\lambda s=0$$ +\item +$$\hat\beta=(X_M^TW X_M)^{-1}(X_M^TW z-\lambda s) \;(active) $$ +$$-X_{-M}^T W (z-X_M\beta)+\lambda u =0, ||u||_\infty <1 \;(inactive) $$ +\begin{eqnarray} +u&=&X_{-M}^T W P_MW^{-1}(X_M^T)^+s+ \frac{1}{\lambda} X_{-M}^T W(I-P_M)z +\end{eqnarray} + +\item For active variables, +${\rm diag(s)}\beta>0$ implies +$D(X_M^TXW_M)^{-1}(X_m^TW z-\lambda s)>0$. +where $D={\rm diag(}s)$. + +Hence +$A_1=-D(X_M^TWX_M)^{-1}X_M^TW, b_1=-D(X_M^TWX_M)^{-1}\lambda s$ + +For inactive variables, +$A_0=\frac{1}{\lambda} +\begin{pmatrix} + X_{-M}^T W) \\ + -X_{-M}^T W + \end{pmatrix} + $, $b_0= \begin{pmatrix} + \be+X_{-M}^TWX_M\hat\beta/\lambda \\ + \be-X_{-M}^TWX_M\hat\beta/\lambda + \end{pmatrix} + $ + + Finally, let $A=\begin{pmatrix} A_1\\ A_0 \end{pmatrix} +, b=(b_1,b_0)$ + +\item Idea: take $z\sim N(\mu,W^{-1})$ and apply polyhedral lemma to region $A z \leq b$ +\end{itemize} + + +\section{Jon's notes} +We are conditioning on the active set and signs. +Let $\hat{\beta}=\hat{\beta}_{\lambda}$ be the LASSO solution. We are going to +fix the model $M$ and signs $s_M$. So, it is a function of +$M, X_M^Ty, X_M, s_M$. +Also, let +$$ +\begin{aligned} +\hat{\pi} &= \pi(X\hat{\beta}_{\lambda}) \\ +W &= \text{diag}(\hat{\pi}(1-\hat{\pi})) +\end{aligned} +$$ +Let +$$ +z = X_M\hat{\beta} + \frac{y - \hat{\pi}}{\hat{\pi}(1 - \hat{\pi})} +$$ + +The KKT conditions can then be written as +$$ +X^T(y - \hat{\pi}) = +XW(z - X_M\hat{\beta}) = \lambda u +$$ +where $u \in \partial (\| \cdot \|_1)(\hat{\beta})$ so +$$ +u_M = s_M, \quad \|u_{-M}\|_{\infty} < 1. +$$ + +By construction, we have that +$$ +\bar{\beta}=(X_M^TWX_M)^{-1}(X_M^TWz) = \hat{\beta} + \lambda (X_M^TWX_M)^{-1} s_M. +$$ + +This is, up to some remainder, the unpenalized logistic regression estimator. +The remainder, after rescaling, goes to 0 in probability ($p$ fixed) before selection. So, under suitable assumptions about the selective likelihood ratio, +so Lemma 1 of randomized response paper applies, and you can +use this for inference about $\beta_M$. + +Let's look at the inactive block. By construction, +$$ +\begin{aligned} +X_{-M}^TW(z - X_M\hat{\beta}) &= X_{-M}^T(y - \hat{\pi}) \\ +& \approx X_{-M}^T(y - \pi) - X_{-M}^TWX_M(\hat{\beta} - \beta_M) \\ +&= X_{-M}^T(y - \pi) - X_{-M}^TWX_M(\bar{\beta} - \beta_M) + X_{-M}^TWX_M (X_M^TWX_M)^{-1}s_M\\ +\end{aligned} +$$ +with the remainder also going to 0 in probability after appropriate rescaling. + +So, while $z$ is not normally distributed, i.e. +the KKT conditions are +an affine function of $z$ and the affine functionals are such that, +they are asymptotically normally distributed. Further, the variances +from Rob's normal approximation work as plugins variance estimators +(Section 4.3 of http://arxiv.org/pdf/1507.06739v3.pdf) under the +{\bf selected model.} + +Since our variance calculations only hold under the selected model, we might be losing some power using +polyhedral lemma. + +\subsection{Selected is the same as full?} + +\newcommand{\E}{E} + +An asymptotic variance calculation under pairs model $(y_i,X_i) \overset{IID}{\sim} F$: +$$ +\text{Cov}_F \left(X_{-M}^T(y-\pi) - \E_F((X_{-M}^TWX_M))\E((X_M^TWX_M))^{-1}X_M^T(y-\pi)), +\E_F((X_M^TWX_M))^{-1}X_M^T(y-\pi)\right) = 0 +$$ +yields that the +randomness in the inactive block is (asymptotically) independent +of $\bar{\beta}$. This assumes +that the selected model is correct, or, more precisely that $\hat{\pi}$ is a +good estimate of $P_F(y=1|X)$ so that +$$ +\frac{1}{n} X^TWX \approx \text{Cov}_F((y-P_F(y=1|X)) \cdot X) +$$ +($X$ on the RHS should be thought of as a random vector). +This might not be true if link is misspecified or selected model is poor... + +I think then the inactive blocks are not needed. + +\section{Current favorite version} + +$$ +\hat{\beta} = \hat{\beta}_{\lambda} = \text{argmin}_{\beta} \ell(\beta) + \lambda \|\beta\|_1 +$$ + +$$ +M = \{j: \hat{\beta} \neq 0\}, s_M = \text{sign}(\hat{\beta}[M])$$ + +$$ +\begin{aligned} +\bar{\beta}_M &= \hat{\beta}[M] - \left(\nabla^2 \ell(\hat{\beta})[M,M]\right)^{-1} \nabla \ell(\hat{\beta})_M \\ +&= \hat{\beta}_M + \lambda \left(\nabla^2 \ell(\hat{\beta})[M,M]\right)^{-1} s_M \\ +&= \hat{\beta}_M + \lambda \ell_M(\hat{\beta}_M)^{-1} s_M +\end{aligned} +$$ +where $\ell_M: \mathbb{R}^M \rightarrow \mathbb{R}$ is the objective funtions +of the selected model and +$$ +\nabla \ell^2(\hat{\beta})[M,M] = \frac{\partial^2}{\partial \beta_i \partial \beta_j} \ell(\beta) \biggl|_{\hat{\beta}}, \qquad i,j \in M +$$ +is an $|M| \times |M|$ matrix. + +If $\ell$ is a negative log-likelihood, then under the selected model, +$$ +\bar{\beta}_M \approx N\left(\beta_M^*, \nabla^2 \ell_M(\hat{\beta}_M)^{-1}\right). +$$ +subject to affine constraints +$$ +\left\{\text{diag}(s_M)\left[\bar{\beta}_M - \nabla^2 \ell_M(\hat{\beta}_M)^{-1} s_M \right] \geq 0 \right\}. +$$ + +We apply polyhedral lemma to $\bar{\beta}_M$, with $M, s_M$ and $\nabla^2 \ell_M (\hat{\beta}_M)$ fixed. + +For logistic regression, these should match your active block KKT conditions exactly where +$$ +\bar{\beta}_M = (X_M^TWX_M)^{-1}X_M^TWz +$$ +with +$$ +z = X_M\hat{\beta}_M + \frac{y - \hat{\pi}}{\hat{\pi}(1-\hat{\pi})} += X_M\hat{\beta}_M + W^{-1}(y - \hat{\pi}). +$$ + + + +\end{document} + diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index ce951165..4fc82743 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -1,8 +1,8 @@ Package: selectiveInference Type: Package -Title: Tools for Selective Inference +Title: Tools for Post-Selection Inference Version: 1.1.2 -Date: 2015-09-01 +Date: 2015-12-17 Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid Maintainer: Rob Tibshirani @@ -11,7 +11,7 @@ Depends: intervals Suggests: Rmpfr -Description: New tools for inference after selection, for use +Description: New tools for post-selection inference, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. License: GPL-2 diff --git a/tests/test.fs.R b/tests/test.fs.R index a3d1f916..b80e7473 100644 --- a/tests/test.fs.R +++ b/tests/test.fs.R @@ -1,6 +1,9 @@ library(selectiveInference) #library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +options(error=dump.frames) + + library(lars) set.seed(0) @@ -113,8 +116,8 @@ x=matrix(rnorm(n*p),n,p) #x=scale(x,T,T)/sqrt(n-1) #try with and without standardization beta=c(5,4,3,2,1,rep(0,p-5)) - -nsim=100 +beta=rep(0,p) +nsim=500 seeds=sample(1:9999,size=nsim) pv=rep(NA,nsim) ci=matrix(NA,nsim,2) @@ -134,7 +137,10 @@ for(ii in 1:nsim){ btrue[ii]=lsfit(x[,oo],mu)$coef[2] ci[ii,]=junk$ci[1,] } - +plot((1:nsim)/nsim,sort(pv)) + abline(0,1) + + sum(ci[,1]> btrue) sum(ci[,2]< btrue) @@ -173,7 +179,11 @@ out3 ##plot - + library(selectiveInference) + +options(error=dump.frames) + + set.seed(33) n = 50 p = 10 From 98595c96fe1c0bf6989d670e5bda717771e9b056 Mon Sep 17 00:00:00 2001 From: tibs Date: Tue, 9 Feb 2016 09:10:39 -0800 Subject: [PATCH 107/396] doc changes --- forLater/robnotes.pdf | Bin 150705 -> 0 bytes forLater/robnotes.tex | 287 ------------------------ selectiveInference/DESCRIPTION | 4 +- selectiveInference/R/funs.fixed.R | 26 ++- selectiveInference/man/fixedLassoInf.Rd | 56 ++++- tests/test.fixed.R | 72 ++++++ 6 files changed, 139 insertions(+), 306 deletions(-) delete mode 100644 forLater/robnotes.pdf delete mode 100644 forLater/robnotes.tex diff --git a/forLater/robnotes.pdf b/forLater/robnotes.pdf deleted file mode 100644 index b814a8966b6b046faee5289895535978310ff7ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 150705 zcma&NV|Ol0(5)NWwr$%^R&4u@ZQHiZ72CFLW5u>{o`d~qzhj(TpQ?XAcU?VePI5&t z33_G*b{O*c<&kw5Rw5=Mdt)miK0X*mS%96nivURyl5+yUYu6MC7Rtu1xZ7h`}F1Ko*rmgf|Y&JMz0G?+(SW31En`C(T9 zXANN`kFJs%2%*Tj5MdP$`8TrC>7nP>ci6e@?z_Uib<%17+NDNL#kjRo#?97sN%7O6 z)!^hG@ya<<%e}ny;y=S9z{SQHUUvD_C8+NUYUVnClZPr)y zxwW|k@~A;OHR&~C`(6(((N2B2!}Ig@`kk!?j+iAX7-P5R(so_A+p?=&2zD^q{}d!u z@$bpsvNpR5z1ziyy_b63+|fxs^MR4E`{jKt3L}lHCV0$nVtKp>B}%jAaND5!{6o}t zC*La{Qx=8Tb=`XejTGjF*7AVn5Y0x7c~tFvZAYfQQSi!0ty*_a^0oXih!JpL$gdj? zC38qt9nNa}zCTGw#oFX<^AOdCddK%sdShKrj@?~)hD4VZ69$hv8$TonM`}?R$vXIf z9AyDXYbZ8I9%Gg035Gn4Hc=qhpVmT6n1GERbG#n&vK>+n9EQ^>fxjk*C0=Hdg5`_% zLMeqZCS$|HMXg`oINl_p`NcL}WCQ zc@9>~>JO-_DFQ*a6NwTLDth`{qg9e{j;2|B6VtLU$`t7|(QwK@VS9fOFCo)12@b0b zHbMB`$@F`>b9>N8(i3Po_0;87AMmDhxp?p`WQNh>Us!3Ie!WCfw;Yj^in?R2krT1W zM#``kdgBSCtn%d*hBPN7!Y8b1@BTNJpG z%D!Jgj?t{K8?p{Z^fj19y?6&75Wzy&6P~UxGaC~#cQ|q1P^^+wxo&lgoUEBNbYe59 zjD*>1@5Y`g^Xj&j52j~z+qHr|3ftC;8zBnmPuD0523Rb<(QhA?Eb9nmUqQl2g{Z)i zyEE7V!}Bf>F=dYE05^k{ju0^Ql;EwqUKhe9_@d9W4`X-=!=6{ei?f5GPjAOw?1xb+ z5sX#fCn|b~CYqthBUuq>7biNQK}uOjB@ zRQWUb=^OG&NZIE;j3btXZr)XJqslf+L_e@lxHv*iT2>XB(IO>I;v@hqm3O3xO-l1Z zSN@ANt1-z_V9B`t4P!`d-q8&^gj;W>D4+p7WP8JydwjM!bNZP&7_E+Wsub#d5Th^e zkOE;WX;7XPICAQJY2#c?73RNNlZHA2+4ci@pZ*FWMuq*H$m)K;N|jsk?J^}|fx@PY zFjUE2dHBj|E}%TE!S1J2F`vRe*56TNMl7l!IKO1m2QkWagQy&v*w z>(2WIxUK8%e%VAtgQZ183^uv>^mt@wbXY^Io`L(zIqI}3zXjDVV~Go_X@ffH?SW9* z$M_mvZfI**=M_DeK@?NkDN#7K-H#>AhT93GF|Mk3(obBGq}2Y(X5_`@p|8s_2KD;& zq3iixS+RC~1L0$2y5aNpcGnh{iV1B^pQJ@^Pk(HWVnbSy5u3!K$O+7gqr}mioD38X z&;18oy`9T0UnKTd;t%NQsI>e+XXXkKcjX%jnY0}j?4a~5lMM&@IK{Ip`IF;s(??{Z zjW-oHGf9ZIm`OkIkw^|AIO%n;> z_}=-|IGvaV%f3cLZ9Mtn%o1BCe~sYj!_vpYZVbMO2;PKuP}ujcUYzfqpEvlvkE&FGy=I49kLmfBiISV|4(C0_Dhy8L zSOn-Kfx|)dXWFYr^COLZgR_%Xz^alyWBgEWu$odhv;&YCV6>-bNr{Al=Vt!U;sIlG zM1{w;99B-D2sXuE#n5m?M&fEkGlj^$BO8^>qa7U5{?OKjQdsw;>w4(!l0qObfFLY^ zjZjF^b=|8S%GSh79v%c&#FE4up2i+Ka~_%b`QI%wv_$#rf6t9=o9EpFB63xLp~I=5 zE%c;3#xg?$RDnS;#&+f)T*!vzd_@`)O`~(VR*PbNWDsO#dRC5a^F2pkL#EX5i-m~j z~s5~n#GR1(_;U0Ki$<)b9QHoe8dZF>682viV>Kh()fLx#nF%_=hPuD?wb zbPZ4lGS@^vCc?=yBjcCO3#<-L;M5iFkJ78!rq;fnnh$^7EnTJ6`FZ7#Q-F&SQ7_E3 z)KpQLdvblX?}xoEs_F^tW<^canGkU%b%tfI$Bo%r448WZ4>5~!iFGia{J{SN5R$5tK769w#|Gf+LxQ5)83}B=M{^E*Qo_WyBXHs?SLoA{5k1ZinEoS zT~OA1W5*sJ2wdyaeDJD9AgnmSq+kD|&7$uXX)@t0<5uBW>`g49b~Xvf>#@MSYTF$( z9_$u41R=-76!7{ZQp%qPo3Ag4m%(Y>PO~_GX+D@;3QK+A(jy7P{1cu*(>$}?>=sDG&jdIV{QO0eNF5nJtg6E71Ip7O z>Vg5-nf`x7^Z0*GKk>?Q@vZ<1A zo!W!G?7Tp_0}snZgG;-Td?sEBqV%Zy%L_ zfCHY3yQ^b&K`{2aNh>{l!Qn)VJZA}LXde0?x2Xp&{hYMON;@x|2^HxCNa#B&dSE{r z^1r?PkAvlH9tO5gBwxX5gCiOASy~XA$PetDG%Nhl)cigAgD?Zvha*{RP$%v$W_pV#;L_50hl` z<@uYsQ1cFySP8;#RPHj~Av*&(ukU^vd{Y9&WCx0ltD1bp#8oDe*GS7Q@+ggd7V0g@ z$lPiLW6cf@zdY65cspQ$N+*FH?G)lPz#@)WdqvN}>OzsOK7=`+a5@bQWxk25c)o7x zl-8X>Xke3OwHemW9qp*zAcrWJp#0cuACu^gDPi6lz`tb!qm zCrkC9Htmt+3~`{J7Tyi$S-e*bXbDvNFd$(kXXDU+rQER^yR2Qh8LLx~2_gfY2*X>E zv1A-imkLLGBo%0i-=n~{gY zTTFNc`l1S%w7-s6D|C*t>SQ(m7rs4nu5KRJj#eVDFBA^k60vcq@rx&Qob^DL9O<;B z4rX<2LMX_e=QEXs*L;-yL{mv=qOAe1*n}L(qAd^ISrrM{j=0m4H*s2qP~s=tLzyBY zjmUyC>V;{nJ0=OK#EkR!TWvoeY@4h?D@ptJ`QqWjOzzkMw9P3o033CaRO*|f+>j2q zXmfOwNF{Xg*rHOg_xsznfyO#H><(ijekuF7u{7V~Al+o#d*rL^=lenr;9UsDHETDD zS@O(T|4)`i)7u&qmv(V<4$`8W1l)Xk9F*51I6p;S6F?K{dySamfrZ2-rN2;+2-#YA zxi^L#kQhd!b3?fLQ-^~lDFxwk<$agPitPnH}<3cKG`Fg_$NC}yaw>EW>ydsTbZ?NP6Zeu(x?+>!%!H*4u zn05LwvNWA5CeEG;*(y`9Y~50>tdZvIiXlv@srpg^S5F5ebc(nWrp{wLnHWj==3xK` ziYV=d4f9113^d6K3#ugx=mllHb0w34%|%YoPEAWOyorM%aVz)VjR|3K2gR-x;cE`a zmDJ!#vXb5cWfg*l$KD&}#ibuP07Vx$zNGq9LP|W~AEar1|KMDM3@uG7S9!7?_;Am} z!m#I+t)#yo_s2UF@fan@KEf<;X<;MI@(>&{Utj#p%Aa=#&K z{+Gj3otHHSvjuBvpoNK?c7Q0`2_Cz<%x#dX>zw9ZQp}M+%LFS6c7;!!=~K|Mq-QqT zK(W|-%xJyQ&$iGYde|J7tt0DMx;@D^%wq$2z;!yavW>;PO$8OriEL~U}Xd3;EI2;Tf?|`XAK+5 z+c1wCOA4=fS>%LprFA4XtZ6piNLmCGt8rNy2}?8OvaSviBu@C6bjiKZjdbG9ItS0NC9Imq0s{z-j8oA`$Ujk|N)KAet=uZoB7X7gv89=DQ z2H1h|dXpg|g{ qu-V%-=0<=f?uceO)=LdU)@JcpuXpg2sa2OTb#z!LEtF#Ac;5+4JtFaHi7Im>!ul22oR5G+6ZZz*zon6EL8Gu4c(`nxX7218}2FwVZ)5*_t{uC!?m+1}+3!W?&27$zdn3%^lEk{0*!p=&Y?f@dwRKZ>X#dqiYO^VoJm3hFqRZN zEGcdhz+xU*@D1B+VFJp0!xi@$+9+z0h%X0;2@4v;hCr_i7^9FVpPNWM&-z^M-RN1e zYJ37u-f~wd{Y4f5y$%UDL{xDpTr!|4=$5g`F3aR%f|04P;nsePg&*Y`?NS_*;NL6? zPbiDP@N6jn>x#p`C?I)O>H!eDSB}v)h}=!v;F~rdq=WuRKjyJvA(sI>0%i8z$@iUX z<7=$Dp;7Bh|8iLEZFnH6Mj^v3U0=(Zj*ZRvB%LN>BkgSzrTlEF@FZUOl9();u3IizBI^0SJAo%ZCt zt~D5HNyHonK*`aaLar4@VkC2=dA|?_Ky1WXblU{r?!<69k1>H-<7OM7Xs!E+W-wcg zx;2TOFo#G7z7sV5&3iPumqe}%Yd50v4GNS%_8VS>y`-yrYjsz=MrgxLVOgf9XbwwVcazVyTHcl=L0Z_D_prMHJHaWS@ zgLj1Vq(s2Jpdgr}LBV83IuFKPV~;$+*QH#zL1p53P?bKCkma8iS}lsMq=dtemHCAq zQ{baCZ_#m&37XEGd;j^{BcOfeVYL{plH5qlc^^J0=R?O?5gCG)*)WtO@#we%7Yz|b z<XVV1K87?NXoBpdMXakR%icB$2TJ_*E-aTb)GA4{Qil4jSQksMz(FHZ=2 z?IB>y`k;i%*(>pgy!L?gagX0gV26HUf0ED@8`AKN#xpq75nbSTGO4At z4cR!(DpHy|^672n;b`U2xhoWbSyrYXD|!}o%kh1!TFJJT@}3P);tw^KDa8c%ou6pN z(q)679!K>T33J}FfX!=b6eR=Rwc z`7ib-!)xRRz6c^TC;q+Ggl^cDPElkK(b_v@&v|U60Y^hWfM&wQX2uPnA zG%=xp?CTFU129aen#^!$AmD>#(FsMx;xP!ji<@3)1xN@p7z7mM5_o$e!eeTuO>LMS zzqDtN@!Gwx#2qZaI_scB@ansVK*#o{J4mZRm^3+kqLfRfn>mCITY#F!yE+j_enub9 zfhx~_rg;O3pa>;{TtJv09zW5scyXiMb@T#`uJgJRH#;bssP z`Vc8bHRxrijQj6KRK#LQ?Wqr{QWs`&9bNXu6=FXzh^15N3(@E?>~$S|u$e9b8b+RM zGW;k#1;^GX!xpb2tS^H*#yU9S^}N*^B|=Zju$Kd)2$WV$KObLR*&er8nI3?+RymN> zI3b~!=kS*UM3#8*8sdfxQ&>pJ=&ab}2V(^bgsL0M8Hn(lRQHNdBm!229wN}5JbW)N zqe*^QmUp+lcr0|rd4R=sh#~xT0UjlUcjryx{x^(-e0z%~De^+Gm2z>B9|4RIOTDNY z9H_cmx%u3`vl^H;!BUS~Iu1U+R~QB6i_Z_=E7YzrS!I}aM#+q#QvgTj3o*VUfsGM z#Q~cZS&d8EKvf*<{sfdE3)FwaFoPH1t|XS<2isbp>M#`4ZS_Y^uIKs|uhX3C`s{*{ z!|Ja+K|(nCT+vjEbB;^S_o*y(J8zImL9W%#S97lVnix8Tz=HH~vU@UBqc-tl+ z`n}0w@U!Gy)64D%2uAEyFQdiPSQ*zVk85?m=rTr7{`I)kb3dV7VW1BO*v1 zdqTCBnG2V$V^$-(P;9>N4!T`FGh*6yLo`z~)0Bt?dpGK*vo;3@AM{=fv}A=6uh-x7 z_3+3KK_c%6s>d*m)U~Gb|59UKe;hjEq6GKH%4$>coNI)JM=QIk9Rw_AcB&Z{Pp2kv z_asO6M-~&{&eB!rG>bJ0SW^6tHT~TR0;X*q@?VO zj{-_K>qEfXi*Q(i_{pFhNYL?BHt}120Lu3HmZsG|j^&VEeLvVZoyTtOp)(FF&CJs-IEjm^-z1E@B#J9FA{JMAMi@$ z>SA{f#A_AC%hPVPUkRm&G)I_T1DCSYytTspv_z0~(2dd21uDpxRjLai`}F`9p13$enWKY^kH#u+-_lxmo>RqE1T z4#yAxDm8r0KrzetXQV}+9+d)R>fAmxm|fgk$qltip_6VJ9ByYV>j#7J+%m}F6h5(V zrzfI>rx-WGhke7_=`2zZm5A|mR_KsPS}i+QJ{uZql($8si&fjcmS|B4M-Pg<4h8&v z;q=23H~ZrF!+gIXKb~SU9U^lAzmMaj>-V5u@mo~PNv+%L+!Z@|OzgD658lWh`pSe= zobUid=dORhFvTeZAm>A+$ZQB!f6XoMAzX)Ua=}Jz($aqHs#?x(IQSfj3O;pe>~nH- zLhQ2-DHt^{N6PH3T}57v+dAP({8JgH5L*at_UP8q0WI`%*$51r;%c6$Gro-bD{{Sh z_7`35ElkKq?C2{*vXwt)S!M5Awhfm0YNt#O7mIzy=YVRtjNu*3_@pUMh8EG-)HA36{Y} zM9sf#b$QKiUf1-t{r#AJHKS!KwcDM4cn@^+)w;5`I*A?;2c<4|ID8PjkNhwpY;J%$ zB&QxW{dLkb1`J!aad+*U&RPljw8PEo1_#1E*9BW;0=|)JOMcVQt*yu&^l|^L{K7`| z;WYmbsLk;|3ETh4-TzPh|N9NDb#$EnwW9mI*6r*3oq(aR152EhFPC=DX1kWndY0vF z>dKu*0kEb?rIvI%`uWZR3q_?AWYc>uSsKtG@eLnu&p~p**acC&O^F}*_4RhP`(%S< zon?#+tk4zYprTU>)1e#bLTD`Q`)bZgn9Q1LmX0**9`%{2yymP-Eo6`zX)R}7YnQcB zR$V!F>drn^`-LjdgxuP`4ng&4Z-h&0ua8#Zmv+opZ)K(06?A6+)YN71#Jy}O_1TN# zHT-*g{#e?hDlwI%Se&Q*5*H0Bi3`g%A3`fjqYOcer9jcs9qiGov_WH5siuaA6BHTlbttoIS6@D$NX4ttCJ zi})J?@h?NFRNGA=(Y!r_NmGbz|H#m2T!x^($k;COr(?zrX326l?twTd(VR#ggFd^eyF7+xN0~Kd! z_k{XF45jx4P8z+CWuwgx&E6-)W250v4fjy)McO_7PKtO5abPMZQYRqi9$F&Yn?1o*)IEDWQx=?VqC|CSnMGRnh9u5Q(9s?=RyjX!?ItX+~f+B!$!fbt5X zdHd5V1#b@@#-zJS2^y*DzN%7AB{({C?Bkk@Yb0Xs@3oaoOr*%kWuwJS<6?KqLMeKM z6tV?z$o1wOZEcx#tdX8htv$i`&QD#H{RehO);wKRB1){`@FOp|)tybwRA@@mzR~3t4_(=ms*jq_)%1RSFz6`95UJQ4^CSe9EUor#!~T zWWmeotcj&U78o*D9gHjmP6c4`X~PIi8bv&)Yf{2qJs)gQezj`Nn)_|1CsYL+x7P)E zDj)1voyEEa39sBL)UNll(xJ7`RN6rFf@zU`oFulkW?nzV56po^Ac(R(*&3osB^y$u zc{Kyesq_o@2CYUXyMf>+Od2@ZF@%6sDEE_|n9-sAYoo|EG@b?BkH-)+{4Tar_izzS&<^OCXxuN5{xUXY zR_HpLLxv0Otvc+QCtYa);d_b|^_oFmLt9ev?!HZ{u&pi$>&^bi`K-8R$C4 zSC72^lg-IfaTKI%?Py8SF9K>(%7+a_b52m~Zk4pf_{D{th>Q!Fa?|B`PB4F@UPWeM zPPN#Yv>76Z!_>N#LJG%1f>ZEqINiiY%f@+~Rmz-eIe4xwa%IZHw&W>leQ8<|f|zh> zs1@R%`J*$G{e{NTRz8r$8&BKd0fF1uNcpNN3a(g(Q6^JH-&endfi=HE3w$tvCj0`o zP4Oy})fG<(Py?=@9!17uhKFboIu6qwyu=&zc2EIwzVeCkoRj2=mc{7R*ed8%BvW_N zJUyvJlD18CGD*LxMA=H;@Y+u0)&~*~_NuA*gtU(vd`24By;K>QNCa4%DJ{S2m_zqI zC5ph;@wR?n(RNQCCl;Q{L#W67SU3#YMjJvN7p~K>fU8st_XkH_a1TUtYGD3$2-QS` z;4nqS!0oG`DB;UuVBQK!44~~Epu^WD?OJo7j0j%Uq#tzd2+1b21IolL zQvy4UlN=na_%<^@`FOatddGGnczxAbB}&ph*_ZLcTo1VvV=7EiEFaem4Fcq6FYXP7 zA%^{M3(Q%8UHie@N)3$Ng<`(A1LV)7Pt2m9_} zN8|T;hag}tx12%)aNTMfm_)T}wV$S4GN#NL#D(s;TLd|}H(O&1lgp|d6p%d0-jFao z_n&d%SmPnI?N38BHwd^q@vVjf8(9o7d-eJFXw_yP2t7Gu_Jlf?rU^#FuCBe}AYh zGR7F_gn^I_Z00!`82`T0mMAXj6j^V&%gh#+tjv%w`dFBPrO`P;2ifmwPc?7M{F0{X0qH_B#||PHrya|G~r902aE66uF1K@^hW&s zO)?cBTaAKixa7zD;u>BrRJN4}8AwgpD75{)TEZX>bFj9V{`G0|VY@7Hy%N1Ez9t#R zeC-srL%3cX0B;f0SezK}PDQgq&j(+5ms{9b+RqV-5e!w}UHSelJX-a~R;`KMH@Wpn z7Mn&8cA@G@9y6fcIEXr8wn;g4jizLk$2ZnJ#TM{dwjW}3c|XDsL*R`*${I;rgSW(e zZO6QoP>vo1i<2M zzYOU0+ZH+^D$&VHSaISBN{&mk5fna%c=KIwcqTC&afcoNR}|G;azWaxdB9_LM1MJj ze-pS~<_LZ(XC^-FZ$C^85$UIc!v*I$nN`NKtMshNYW*570kfj#>#)YLsC?j=RU&3q z@GZ)`7A;!SrYdw3OK{sDKo%!dDBTxneZRx)Q`$p>Bwir5am}Ie5V)55?NT@qMhrmO*E0Bl=B$ov{SQd}KX^r$IhdLLPmy{?Ys+p+0_i^s zugxA6+xFFW(x8x10YerM_$G`4@R&Fty7lbrP{zsT^pbZz;6M;jXwAu}7L+WxIQPgPjQ#Vjj<~gQb-(@4SuAIg9Bm{y=4H!|RCOtH z3ykAkX;JB4i@MN~VaZc498zLrI2+#FH!BvUE>#8oT$p=F zGrHb7dN>Yy8rs@6i6E=AmP=nOP*cw>NonHG5{O3VNBRtydS9!_LO z)0arqWlbq#P(9HL+AGhYtRAIh!CR_p-0$pEwE}HEV_5gHXZ4cyFq%5Hf${xnS73Va)| z?X;L?wBu2|gfWzB5UaM04MLE#!_vJ7aJ$Gz?S6;u;8f<*+6U348#HvKo+nl9A&JEa zGKy4z&VV54=I&?&`jot z_Gm1^rlPwXgjSlc^Ao?X`ZioP+d$LA7e#&wVU<0_ z@w&5+VUPy<%JkR2*M6ubrH1tZ(!s|FH@=5TFIM=o<@q7(k*t+BlZ$aXd}3{o{Ze~4 zC&XW`+H`C8oH7oMqV>H_ar}M|gve@A6EZCkb@l9ZrfP#gPW^$lq=$n=rh=(XmoDf)w*jjs`p)ZNR=4!T;?$6TaTKKf4H%EioHitOmP`MZ6y5c}pYVU`(b!zciL*VtlC28xv^jxOm;yWC0_krV zY292GnN%P6TtKB#vcL;Hz(E{au+sDy7`h6rqAMJP?_?a+dd(|V%eLKA3n3P9QirnX zO<^a49b99*`Y+lbftR`PR(4lJHZR4*X1t{L| zq9v*NJ6aW;Tl79hVAQ1BFp323I^o@j$wd5C+rMdx!`l9+tC_up1|^I3lCl3BDeZ@4xZo-)^aR$_|i!TYq8esyDTKFmLv2yD>lk%QwB{P zS9U+-V~orcX$CozDHgM3&*^gu)8_8#VeFc6=j|y-{NSj5sS$$RuKW8qvGMdF=7syw z6gS=N9}in!(waXn_`lxl05PCB%JR{C_m{!oAtYk3Nu#;nLouwisLxFn%u|drS*EnpgE$>6A(w6mD?tm1)F6f8nm4b*agA)k0J>uv?8$5!<~gf)Wn&J}2>WgOMu)xh^}Tn@hCI z>RcjelOWa(!KWIRTdD1;nq#ATb3OdjAral-x@Sp$o?e)Gst=1j=%x}BI|zp2_U%w)-y&+ z-Ln`VeqQf3LHtf6+zd)27!W?}oU0lk!?toH_j-1y|I$171yw}|9tR*GztZZz$^F1@ zmrg+4y4}AepR^zDB;I#~146X45lyg2SGI_);OpzWfk2EY%N>ULOb9?oHs9G$E>6LG zBcS{NFY-cDl!JtlkRU0hEx`Shq5kL2O+lMo{oC!^SGj)HM?bkI=UM0js}D%#=15p4 zVLzWfFfWP;D@#4z&cAB9l1#&>SC5>H4UjrpUuj0yH-j~Zt?sUn6;xj%o-BpGV`i{H zz+j#p9wDJ(Ktviq<5QDypRW*Jy9@<_Io*9T3OHXrx*_7Bsg5J3EON5>AxbvW3 z0m3`NegeO4Uz{SR$KaY_!O?kVE5m@Kg|Iz z%SNk6^~taPXZ|S}TZ%ez22&Z|j1#|H$45l^K!ozb#6Xn!M2G?D8d5++U(I))w|u|b z{d*U`^;W>wzdSpAER_MaL_iCI;(R;1dm?;47jS8PqhaWGd%PARc^}wdhVKfW*$+?; zn*6>Vzxm<&@d>|uyT7@|znee5-X!Gil;wh7bp9wmf_?=}J;|>IG39=4;rugU5J#3G z{l9$lV(ugPV!$ zUit{}fG8Oe=YHSv%w{L=&y^g#D^~~^=9i?tU(=!tX(OI*H~x87@*=^2dld_&{y;;6 z_;}=pE#n+LOWK3Bcl0Bzt3d5b=MeTofQbL-U(_#^D@IEdFx-{Hp;D>=sf#dt^nTy+;b)OnAAp(t=T#tx=kw9;F{rRhQ5AJfnQou}Lvq ztwgj}FGz2k2;OOr#*L!GWZAPM@IlbNP}*&wuxi_R-|dT9%Urh-&Bvv@!KLL&sJSc^ zDLz_G-gxnKcxH!k60-@sRRv5tx?&EjPLj62Tq(U&UefDf`SCWD_UMt9ggF}UOkmd# z0&rBHVwu-ioU>98ut%jsC&T;?Lm#gXonQpLuP|e+_;@?_jM)4Odx`S!7G|DRma$!; zYZ>pD&%aZrp6{cUfENyOZzVID&Korm!_uK5SI=@w)xYfy@8iP9vKjBx)k^-BH>OB^gAA)pJOa9ZMLmSgg2+4_AIWe zAZFqV{AuLS!L29OUN+E~XK($_^bZ1nQV&ia> zXFVkV;*h1cgPevcDd6DBg@q%NMYSJTCjZK`fLjus;$I0YYJJoGo3D4h+6=lR=^W98 z$45h{DH=bsZKdmaMlf3SA$orMPe@WvRc?Y4QEJo*SbI+4uuujYMI)-xv&H@V5UmPT zH|qzIJP<-N)%-jJ+uz9w~&;jBZp)Z`~A6?Bf;da1)y~lfmBwhv@C3h1>%9 z{WQMxZ-#AVQhITXwA6fQ5MCd_GwNB62J{oMh=s_)k`Qw9cF!r?%0F_S>(Pq+W2+8Q zfnT=6K_oI_(^R$WXXVL$*xWMC5i+|S&5`EVAnzab=s3yuapK4uV$yD7PV8mb&&~;p zV`yfFW*lwTdesy6MEuqZ_D=mPK~meCg?nx3<=vTQ{~2in8v>N**usA^wV> zuA|fUnv1t9(s*}geTzKa438}fZGn9vYgj&{{&HsrzlxI5GHkf7)T}g(Z?3fSQ>8gq z(({_V9s5?5{hRdW;^|4_!#}-hvvx*-pLVm-45$o;hFJ zn1(NhQ^@Y2MqFns#i3ol4S`aW$& zwbC-3l*VgVO)*S)^2Oa}ubAtyMWcwUD!yd)M`kRjxAND4;#m1?-Ca85MsTK!KHrXp z+fxL9_J)FG?|aql^xQ0H0kTHz5Pf5w3kV^1%WSKMKsVn=pZ>2Hxy2ZCTvWaR&#{{C zQpYIOqTu)YIJp z(6r@Az5f~{F4j69=CZ4it(S|VL-PQ=gkjHa3mGJIS+Tgsa9kUHqIe%i8HO)Ot|!C`uikPCy+oqkoEHkIpWR_=kKrnmZTef8J)t`Dy~$8-JAYfQ zDPOC;5e&K-vwL4A+BQh{pTNWPy}X(?3v(ZOdCwx*fpV&f%u}_%<9x&$ySP6XIRiyQ z-Ii@Rx3#TKHxV+j4yZ``Q~poJz60BV+)yS353g2qo^^dcSdB9c6t+H#WKF(RqJPQF z8C_es{S-ZX+V_#o3fOFEtWPyTrEROlv`4;)9%3M{iLW>AnQ@)W3Dw%mU^-eQ6aNAY zUbY$>fHY~$3UdL~L(!&+ocu(UYW@0POfp9PL-xhks)_E%2Bd*RWBO1h(j$VGz}C^f zCwg!&_3Aq$U0|zzHft2>^?CW>VOsvomVk=wldbjj#-*Z&x-ue_dg8#WUvu4Z?$i&wwBVT%8^(IeTI;K;lpWREhGQgnnf-r1}Qg1e( zw-B>aVDnN)>ek5|3zWf)pc;i$P_8#bm_JVNM`c-QgB65ZGR5E=HFIv*Kl>2MB|HS{2wOgFV1w2 zxkctqbCV?{vSH9_?#k)%5STU_7VhRwU5RL41seP^|;G6+eqT^1tbz;H^!}s!IXH`qp1oK-;sgBiHsGkX67(1XRX-vFb{&vkTM zg?qUtCL3{Ex(9ov31+s+vu(5DM}tiH6;nf7@An*;p!JynugkD=gHm80S)`m~y6134PGyH3 zjU#JPX0Pw>Qp+uvXdETMdAqy4bI{JdFMAiCsDHRz5kLvqwRxodT}4IQOKazZ)`0Fr_R zlYio#&$)-c1Sy7XaiC>iuj5Y4Zo3c>BV=H+t82 z?B+*%)5Fa)E*B37j6xZd^G?M3&FWtn%Oolvkp$fN0%5#!_gJF8ojE(?^tYI8ek)aa zH+pNx5hIcGQyM@WN!#>&_jkZMHWTzZe z8O!Ay3}x{zvsfJZ>Wt7zsz%9Ep(fpk^X4LVTErMYNKduxL7fnp6J`eNlnd(2UY56v zhuNdNcwJITdhuf|)r@l{zEk;fb<@>j^`}_YGULFGy-yR+27$d4&jleXPDfTo5Jmi$ z_5{62?918tb6ln5y9#IT;3AdoLCfBVsbZwwgUPwzkVGx2NIHJHG;iCP%mI1UbqU)Rp#> zOPFjaUh(Jy>YxTe=~ok&iOpS)tUfYMa%WW+M~^0yVrMb$M6g5NGtfb*ncX#c!Lcjs zwHN9yML~yC&+pb5hW#uhbgN|+r_{@8q9p=$$n z{meu2!rJ&DOQ=?LW4XR4%LK6-mD79CK%4A-lx&Z1sxUP@nmL;!hHHzBYdo_d2ZyXN z+C8FoTpE5G){@p^Z%IU`RLIygx%?;V5#Ig~^!TnMAJjME5iAxU& z9O9Uo7q8d#(U{rmjj8-)p^)NwdUAxOpI=s_NZO3}#+l0BxL0@zP5-ouJ!ayrXfUgo z9H##JXu6AEoIrxJ+Fx;P8=>a`j=$nl=H&$e6%HWBR1~jxq0d}(KM5~0FdNZ!uBHib zzVqKcahVKOmYAbL&n4xfo~4gjKr0^Wxht-=E8q^&4w1frllJX}-W^Ge#iA`ys@|9|y|Fg;w&Koj|qsLSrpiKqLDl zD`(ZdX|$0%#EU;1N_Eq^S@&n&v>IP?kwYH`LeeV6^nVarL+X)}&ij4-mGZDG|rOsvLeeOt4M zIxePW7a}V0KtmK`vknx*u^eTk*dqC1PhK)@Qcmc9F8l-~g97 zGmn{9Bb9CQh6h4Fi+PHmE($>=G#1laPuAn513TZC=g1>f(phi9vF2K6mYp*1;a_h1 zDreZuF&+7U`?prQnhj0*{B@*9U{rh2R9qo2SQcqn>QulXyh6YVhZQY|1;dTQiCXi{ zl*a?5w*o7NE5A-1-@}#$_6EMSk%#S!F(b@n>3Rq`#lXa zRv*YLvEQZA)d)gG;mBFAQh#}E*;mPo% zKrKRytx<8u#B@Pg<4v&`3Z zN>P*Uo>o`*?eM#}zI*41hm$@k@IQ)(4MDN7VXsTWEw*k!d=y@c<6!g~V(v14i%7B6-yY zHd(ia9JCVwyqFu>tSv5XltH zQYj9Zstl5_Wtq$Tc>`Rgk29!g_s5&U(<(AfI`gpI;#NcXj!vNP2qJ-uK-rX3zlgHc zGv;(w+G%9s4;XE8#20w+fau}M;eW10LAD-b6k+xC5AVgb3E#;r=5_IO5`aH>Jz!;7d>B z@n!#5p-`<38>^@98ps$FW%M!l(WD3{v%6Ax*28#<9eW-jC z4E`1x`9ax>`9~G}VEhuB$GWN5W!ZBY%kxL+oMaIcT3k?oZI!9s?hh@I3GfXatY@!A z?PAc_ipjq$C5;_CmReG#bK|N}_1C8-W*#r@WngXr1?O_gOg}>y--_<$|xBgA5RRt<44Zh zFAJm4=rL>@(dBqEV0#vfU7+-s{Pxo&&Bp2nUKD*wdjQ*t3~|m!VfFRRo9HhrS(rw% zsq@u@eK1m{pOKn)&3X-yrr}|l?rzu^Op2zX+k|{t(RC_c>ut?x4`-`)ovnZFU3bVw zN*fwdM@pa|_!!Z?PgJZhNv}g2Y3a9(;Js|cLET$(Pqdz!-33i{6pcP) zEH;I{JLV!}-`n5ksyP$2hRj8_( zcKWXG7Ue3H6p;2C+DEO#pbJ_Oc2aTwe;pq<)u&P%l|P#5_<|;lp6O77qV6lL{!P413_<{#a2eP$xz3#0|U?Hm06NKX8 zZ1BvId;{SNBGZFFs(L!(yWJF3v&NxECu(+UH1%HLLl)J#Y@p@$j7NdDC5!kVXafsq z0-ZBjp|;@)p8f*S;tu+!;xM7$ox+sMQh5kBjE*+4pIK2jt}$kRv5(t%-*UedoqiZ4 zP%OR80FK`6!8iJipI+&Ofr!xi7RIoG7O)P}9j=7wq%hV%+}O~vecj_*WAD~>-6o4+70mqji|CaE zFrumo*Na}n0tBgwKXLUo?$nQgjSG2YvP{s#WtvZ4PmG$~U23hQX{aka;5h^<`4yj9 zO+VskT+E{>M=16K= z^&D*I8xy{IQj1rlXyH-TPLh1*NCtII;y@3GpKnTb|Cl)B+MnvNZzCvIN+C$fT@+?F zrEr6K&cs~6(Ug9&xvNdltzZTwH1LvRNMhPuT#sks#5s5yBZ_$qmMsn7?I=8v{ibu6 z+H^h%pWnwEx=J_i@mRf zTjW?0*V*t2@oBk5pD0_@^!D*N1wkzfbzJl{6|qKz;XPJ})V11?6+I|AN7r$}gI0I8 zlHuTeTE2(q?s17lI+-K7(8C)@!?$2ZZbkS6$PimabM)<(#31b0|D-taWW-^slC^X; zy9Dl8f27T+qpc^RmKQY>>`fML`$oAYSYzjQQsHUMTGwTR%JlugU|uy%N+KN=H9l;EA-UaLn&`^-kC1=X)E%f5~GL1ctHkp4vP) zz4nrKM#xp1&;o|h;pM{^ z`hpHT$2v;)oTwTyYdB6H8C|b#V-8KaFO4rJJXiJ0Bp*CzKDv( zv*ejKX;j``RY5PerJ>#T7i&sW&_%z& zyIZhE5g=&1$f0tJ|Esa>6*h>d=+pV+`ES2H zGF741XcGksDx>rr0a7V+vXps$WUzjsI#`WH#sRe@dH_4aMFXMfmD&MdzSAzZ4O$#y zDk<4*^KrQfCV=Eo{Gc3>jiF!}<2OX9Sy`64whX!N zQ`lK#V*Q&1 zuoxirrW-&zF$MbV@5vOG@8mT|(!NzdudX6r`k)8Z7BPuvprs(yS;IG$xD#~{J z#@1ixX}SyLF^C}RCNOZA4!Wt9xF0U6k1t1xcNH7lrEXTyN`=DHu=h=b^VTGU$%7-Z z#_1k8`#57}?@|RsGryDkiZH{a{3OspXhGG@5WJY#yFl5vG!W#?;>&VCtb48tP{G~6 z(L<>RR<5lEb$oSZopP%_og^4aDvT8if`yPjD@olMg8@&KsA{I>?v;RL8jAIv@s36C zHLJ;`l}hRxIF?C;l>=J`CMp5*Id}9dn?`44cTyJh@^clv3pem*5WWd2w^(s7mQIU^ zO*w<9hrgSN`wA}TK6zP)Ke!5D6gk(@IoyVnZ;7>m+n~{hk5dfgY?s8#4+J76F0qV1 zb%_0S_d#0%=z<|fA22@MhU+T%-b32%!{JOy<%Y^Uz9~G=|0IY|wG$Mf zQwBBMet#gnaM8rc?gaxtumfM|=a|52l83KWf%bVxpHO$nFWw3IcS%KcTx+M~)kwDvIlQvE)b*0}mwd39vL4Y# zkJPRcXZmH(1Q9`hcwBQcc^B=@NVg>=#LtXZbi%#+!L;h?n%ywsD*9iwvx6DK-5|~Z zi9vL11D5lP)zN27_d1>*e_R`f-0Ic!q}%f)d+KfwIeAmVS5tJr`22Gpy*Stsu8g=d2m+zTrck zjF!!{N+-6QORsU`JipSefDMLw+we7*sUCkAJcx3Gkh)Mw@4)q03)|b*&s$ab3$a*u z8hW{K`t{B^4s)~&=^@NeeO8ia&N)xEnp2*EXKt}1F6BPTUp3PKIwwxzHFY$}P0oAx z?^^9+g29VB>s1Gn-XKcl9v$537|DFtA<@t31KqRLFi`XZfn+)dywq3I)i~s-cO7PM z7NdVrFSWo%I_C`vXK8*hI2G8s<~+@dQ4Dv5Edw~sz^t31gO&+cO#otF2= zK$*IYov=Lvfc@!DM@4N>8dB>RmMDLwZKJQp9ctv16~aCxQgQwvF-5ehF;YeLFO{4u|Q)Lo+`$qZ+b7#|ChM5kbTl#bR0vGMG!WVKf@f;C>N94`yAPg6xMyQ zB`?=j^;7Rtj7RI+CW)C9F{Ia4tVS77tocryBiNYj&t`sPmR^dZ(|(JgCL;s?i0QGx zwqV}d3OCLfoq*bp!ASI6$l_ZoOD&v^;+8=?i{!NR&ECf@8S|Y5mYk)fDb1Kre8yD~ zE27uS#6z+pPoqmqvR{~b7^@z4Csz3{Fb5O$#XMtLd72d2G1dBWFmB<7ula6&!Q4!? zVzGW>%8eZoy;TKTLkR0;0!3uB!~Qhi{R7n*jln{2`7LUMf}f|^R2Dz``1?&!&4V1= z-|fBCLx$v|;Rt7vgx_AvIGXWukNyhGyiDH(z@jn>3lT7Arfsy8LT6LcE86)sv&_lu zzx-tua~v3|$5Xg&Im9bN%`xpM)u#PcouN|t#?9em1SR1X=TMl~@{}^G&ol_p@G2lQ zFovk-uGw=7gm7=t)e3|Hk2uYc9I+@~bV@wozzA{xI>FewYWnh5gg#M*4}$o>-^(?`e|mq-g6Uo>1Sq8wzhhd z)EcN2m{Ne?PRD1b=NQmkmcxk8{}1a5$WBkk&(74uzq$eWkdB+Nfe7jhEJV2X*Bl=b z5O5EX0sI@supA-;V1*|Kzy}DB-@uU{!9h+95ZeK>jLywuy%mJ zJ^vSKfXwpt=7wl?=;-8R1lGmvC~WYmWP=k>&%oU~fNmTX_{RZEOItWsHvj>jVHnN;+6@RWkJ{3q%e%`NC;%aQFHC@(onjFG zFC+*zU@Zcee+nE3#l!_bKm^?H)!Ymg*grY-$PJkL`zGc$>TM_>HW9Y77M${#gPs^73ard?#mK|KY)Q>-RUG4T+r7R0C5Kr7T8o^zqruKQ64L?D0OqeLZKRmk3JAEPIM0T?t8z_-x@8F&i~0r2yQ zAZ`Wp{2j0bfM*A}h;slW$btq~7cyM$W2d4CDp;~#z?TpK=r8_&M*_L4_>L7pLVaZeB@7n zPlR}&kB|WQ#)cxYL8O1+*TCpM3NpZX%3lmY7SB^A+u6@n0D*pCZtl0Xu!Uz8-MA2i%){wxji8xKd*bejuJ@_yP?+++qtk)0VuXW!l9s|bV_2&@~ z;?ytn@BMs`z%GIH)jMWIt4M~ht;UvVD@`l<+GxZw$zzxz6nFX_PtDE|7Cv!g4;8Tq zrW;-M@}6)8&V8)fRVQQ9?!uSu#vl3?4Hlh` zIkxXxop0D+T*a(J?v;#YU0#v0;EQBWPPnTQe+^?8pKBK&{TZ&q}Ak#nr(mU1@+# zr6bkEiVtgF)-X*y-*TLrx+)-;u0^8AZr^L38g`w#*z6>}kE+R-_6wPM;qJV)z7s3+ z_qci?xs8$q?>6Fg=JHVUh-Yqnio);>X#Ktr-fd)a`c>n&FWK=Iu zLY#sxDSxAXE`AtGuzbh#1M+!)&_F(=f8xh{cu_~<;glv;O6$e*E4gD-X{ud5jrhNn zJR7cYshAB2IXzxr-W20E*L`5)89K9-mKEH0uz)+Z+Cf|te8|EO_vzWSS{~k{%WzT? zWfnTg9wS(2)Jmu_B4p&_K+&NWp^3Imwh#ESv;7lfpI2}OV;vkPf0rQRO+TLHIjxVOkzolLCljzR{CX2 zRSV~tIPm_|=;RGY&+MT_cq#hQjd`;{mF#n+W6H$QlZ=a~I&ite-JBL{TM%~sQ^%<% z9JX%Gf}HH4W$th7WT9aqE9BrGQI+W&f~Ew5la%%5Y>fZPlby5{^DwIh%#Itz#U7ls zjP~{Q2#n)=@OkH_<0Xm?$P@+Qgs}y1IY_2^_i4A4rO>UN2V`CRS_nm{e@hW9nkWoj z=jy#N4G}e@B^KeV$Ui6VQntg?iG$QvjBi|^%Y#_$T?(Cm#K;LpF`*&DaJK#o8mf$8 zlu-s}pjgPw?%>L!t9SW9s39@Pl5bIAFsPckt((bZ+ES2rwWtvKN&RYU45zbqRJ8AK z`fi7df1d61QEh<5!H_E|!ocqWeZ4Y!C~}4lBUgzQH=ra;>U1d^NDs6UoT4v^+qb`d zt66ki%-xz~HzCA;oEB(_4NE9{rlqTYU&b{8?;ItgoEg?^Gd$M6TEbij=l;N+^>s-f z9%7J+bx(fiiE`=s?z_fM#T9nIe^c8WX~wE=g!&snR@RQ%oU;0(i5gIuqVJ?f7|Yh4 zDU`FE5!4t+`BoRJwv7HX4v;JyYlMFtwRq1+T35nS4%NL_R0F>u@CWKA!vbK?3pMg5@3)C|fD^XclQn4~9Hd&3YJ>Zm$X4ma+CtKd#ZJRTbqi#}lN&I1 z0kfpQmn%;Vhjd%$o9teyW?p$xuW@QIqjrs)Q0S5gG*~vK$|8DO%tDOGeL|xFrB*c} zgYO4nO@j}&N9#Uv(^E%N9Jz2~JEhYR4pLP5A^BtSY0I;`9KSrQVnlzQ#X)kZ<0(3a z+;`ccv+|eA{NbRQr~9UQX=6px6GpLGmP-GpfkXa|wLdN|`Iy@6)2ROv$0rvtP8fy} zP?%ppJZaXtJX7~%GHg_p2a&`J@{jm9UmXs0wyC%WvemO>U{jaHUybZMbU2=d#`Dk^ z1!OH-p?_Z17G~TT-DsNPjYEJ!lVx`uWO9jb+gFmh6!*?TY(X21V^MmKs(Ha=Du4zf z0Zqw3Nr|p^4e1wk<86Brl%e}R`53TW{+FiVyt~m(~*Xr4h&9^tBf@0BJ#{TrBY04 zf%%1eEM6<6n0jZi){ApxE4klDV-fB|^fWhPV+?7e{gxw56czp+fA?G^Te*oLKCGScXx-SYi@~-6*F< z44^JF`kj(t(N@bhLW3>+)d{(@0WD>#$~k5NpGZM0iGD*2=RM2lo~+x;cV4X*{g@>v zzOTc`Ws$pALiHEDwZrIIU|M{>SjLi{1p_etVzOQ(Y%nvJ4!Cm83c!Ucz}CwZMSi)L z`uH2**@A(oupzwxGSfR$va@YSIZ0uCBwTCg=o4D~s=V%6i}Q!QyyyonOI~}dgeEB5 zkpGUBpZd_BWO&8YL=-M6fuV)5XIJW~J@~tMkIWiQbd}8kf(OAKVm}Q>uja zpNwVS2tW1t;`lWT78BiI#nHZ3vJ}p@Rq+9t=AXvAjLGf7*~_+bnT+d7r^J!k%mf7! zq@&Gl@=D9M=mjsgMOvn3*B&6~EH91{&Q_u1$+s}&6jFJRh&AgWm){?6q_|M-ctpj! zoH+|~gER9eqUmI(fq1S9zpF>3A&CPqC|kD0S_P{4InZR6^mh^}2MqLx{md8bSnz6( z-v?(gbQZ7!x#zVvESJsE;Hh4220r$ zY!Vg6m0-M0!$8gYI9>YXx&z{EO!I+uv^I5 zrIdL@m^nNpg(4IO;|Zh=8GPP|_Nq7KUGJIp5t`hdTw3o+MRf91G7SFi-sv)re>gI2 zuqb`iCa4>30Z4|i%>8gQ>=#RUzlKZUIqlrq8WGe;m&$ViU@plq7Cgo#LxsLUy_d6| z94)U7Lj#-y0{NIu(5yFwR6u(4XV655HKg5>{N()3wr{L&k&ORVjq3oA(Ie}UG+fhuVCwXNwCSWL374Ea zgbLpFD}BjVMus%IPlJWm(8L_kZg8IQoP!pLorW61Ln%&H_~yzGTr$(W4*A_iRs`t& z^a#g(gQN#m|I2-nsBZ%AlkV+c2z~0Yui^Oq^F(2Xo^0|UJ~k;+@mId=QcesE;WPtf zp@o0$yP(o>8rX%Fh{B=DZxv}mfmUvtg)zBk*s#L0d!)!>oA-G4BWUy&6quf zTNYzwTYjYKqi5PORy)l0qo=$L1?T)}9IR=M%x6W!a_eIHKV6ROEcSyD_qjG0e*_)4n;{f zVnnh`IPwGr!vAJATE)p6I<-b|oVD(}3P|Y^YB&G}CG2U~nx5hW+%wn1RdrdrvSVq-A_oPuXb6=X=({ zV|1KZ_&~bs&Fc8okP5n6iD|~EG@?u=R$wwd)wanKO83{ze;@wkuul((dA&;<0&UYs z+iu)bXQ5zM{qtw7u06_9`B1$j!96p*uQ{c;W@hu#o@k%1xsZ?e^6pkT_Pzm6Z8uh} zOp7y<&N2t`5NYMi6PD<0%LBuKJCD)~?Yze$v}?~sw#UcQq+xf-nOJQ$P>)4mG)K9q0g7wJ#g zOSLDi%3bzpOibJblb=k4ODLVsV*eTpNtUof&)M&Sf~)4WHf|9H-exz(XLjOMVm_H= zZ>n8lN6XwhD2Hl`Cyjs^V-<)w>a9oPL|C0d#r>2FJnWz$>tk1GJ9fB-dFGW`=Z`WT zO7_i+wm?1b+37ZSThQVxzdhYMqs7TL;cVjO6S>&`Mo#~eS?4VEF4+# zhvT$eJOPp2ysi-WI*#rT8I@tL8LOG$j5kK5U+njuWBJuJnMwRXev!FqJnYr-vclT) zwZNg_^O5`dNNWS+EKEOz{0#ZQ@R8X85lws2%0{Q<1JRYuhM6UUMb|ccS7|uj=&}0u zoe*Z!>cB3ZB~6Pct!ezf)+o?2QBF0jq2Ek+8`n|fY`eu7Wiu)Xg$_AVjnhwzyb0>kVBlm<)_{wW|?APhq_zkDj7~fd7E4H~f4Y(h<9#EE#}UO=)iKvG$~D!p8b*QoLVsU9@})%ADl{4WjMt49mG;go}F0_h$WsymDvlQO_(ma*WD7c0;S3kHZLbQ7rz0#pRlVRy-y| zgeA;QF}i<9iT}|YOP`nycdEYJqQHk1I*kZk^|!g2Aub$qHwg?3#}Vfk1lx%>W+7~tTWae9NOiCGI?}j}c`&#=tW1N}mKu8&)fVgcODu;N-&`#2{AA2H@0ki{OrT$2 zV;df)B>S$NYK=zyiKt7V;~mF@Y5b6+g3l0r2V1ivSIaz8j6`H?jke*v@i6zn!g0L` zkT(pcZBW8Y1_{*Ej^&eUSB4~uXcy`W5PzJdNoUVpv|5EZ%5W!)>K$-e2B!oIqaYN7 zEm)IAc=!scW3s01uI)Y$d43MP)+9ff_-S_Iz*rw{2PhTQAw3k63z}5c&lNqq05T+! zVVv}P!WL!o zOiT1Njj3Xz4=oR2aH(EyUiO-AcuzM$;QY&-jn{<;2_)~VB~DV=d8;8!X5i6Y{F-Ox z<$Y zy>nt8#Ge|<6Yvn874iJ#k*% zRT%q!N7x|Vw8g%+TP)xX8ZkMn6}19Fp4J zaQoqQl9adovVrj{WkJ9hZOx_zBTzN54iCQvf{ECwWoORGd7gp*y;m4qIAS-~%7~eVjg9q;W6Bv!)0ofY2lCn<7T6uCr8fp`Q?v z3!1O@?^>Wr56JC>4shuI?#9jT1F|mzr8BoSko$_;==#_J1(dyNP)o(yWj6u60NqX| zihWYLU2*!lb9&sF^b}k2awQxf|8w3}MpaTO7mWzC7X&VoPVDP_-**Qv9}Op@zm3{T z=4`hWBc7G(0wt_pex)uu<7&&UO!*Q)L@A$*B!_UcwTk}E1C7MUHZw{ZBRaa?NXTS_ zYNwh^+pEZ@e8kyr7&Nssy0BIcW^z;uT4+_Zb}-JC8If;ylp&PB%jUuM}Z#w`zCQgN+vxbJM0t8r$&?n)|#e~%cK(p%&`bJebAW~YhhYF=GT8Cf|=nH z5ShB^k8{0MqB|4ZBF}JCF+_?E-lMF>>l2_ABhIyYTx6U;U1p*@HJFeWmF8U^VFA2k zC!1aSt(zOS{7Jg?VG+{^Mt^_1hY5bKIo-~xO|qEJ2HD5irQ14xm3h~x*k;MMR6vPT zR&VN8)O-AJ1Z%#DHC&o~6$!R25jeDG+dnFy+0nEb4nI^C12Uw6AgkM1kaqt_pLkCj zXi0~GNqB%Q2nMkmVbioZ!Gp-^wFre<3Br!UsVC@*<>`QkWu(MGmUw1*;=9-;jV9%f zy=pvsl(Wn{zajmtcQ1~e{|Sg0$*lu2ik zBTxCs7$u)CY8sdKWN)^eoD=eOSUL`MN>G$cf1Tk8N=})j;f2Ttqs5!;8~Hw$CO8$Y z*X|iW-i-a|F~+x^x|}r+jMG9Ypaq9bWUN=-O}6BiJ`Id%u5^1K_H-)vsp^c5&Z*Xc zF9|bvETo5+5uIE9bS`U?=FQ~gYa-NWuwUNTss{Dj@WQpX@<;ypXA6GWOQaIX{QgaOAcATz|YPb*UOXg z+-wjFpM;s2z$X2msQ}>dURubdf0E&yYgaZ`%a?h?B@4Y4bK=n;&Y3?cPg}ljsAo-D z6Vy&@gmUbkdn-<6EhDt&qowyLbS#L@p|4RMebx1q(9*F-EvxmKpU(d?&!KXp^R^UP zer)JptVmWU&B^#m3#Mc45L2SNZW4v&$Sr45)NK~J)yQp)$v0fq%e6h(2*a4}24z(2 zZySvCu&0q_LQn`E@Wn40K6xx-1OkR9M+(Chk^` zBcacW5L9QsFQ#YyaIZ@5_UuY*44rZ!U^plzP&P5kBwef8F7l?Y_o8oP@wBQRC6&S` zvWVXJ{@er0Fjn}jyWa&fOR5l8-|I-uC4fA*N0!c+yMS5Aph*k#{mjH3?dMO)t&waS@( zU+Rk}W~tA=|6O3=+fpLpD`fZ8lSR+OALnMMh2jl$wm=a~#y*Wd<4q#cBzg1LoCj7* z^IvBU_1P;C-O1-GRLYsAvMRS#27oB7lJTi;*+35PuO9X6yimEXRKHErb{v^BZkIvB zX-u-k7h)l?KoPAg(%yaEkgO@cfAFifL_T%(112}CKWk#IZQAr;BWU3r)ZVXTG|3>L zaN_jLA+N+%6(Cs2GYi$hEfyq4y2>7 zX(oF3*kbPTvTv|(pe786&^f;rV_l?i@AYYSuvmmIULJfjrS+0f_-X{(KA1uVJY*~` zwG~L~^tol0+Bo*A3-j^687Avyx*Ys@$Da!9`}iEbR4uK69)|9E{(>q5?ESm(!2Kqh zPbf1@r*3~aKQzWpKCXc$GUQVUrESI}Nzv#sA2Ze|Hbk0f?D7EWVUH6Za0Xt%wm6x4te25Phs6e_1#6k%hVG;MoClos%)TbzSn(`B!Ttx$d$5K zAgN-^PL{M(B%ObQ#G?dRqlC#b{o*5PecvjR`z8yB)b9ISuisD43za-SdW7TB9vE4$ zRYu!>Ht_mmYn>SA=Hi%vZX7J&eKMS^VmoWeNht^pj+x2|<#8Bo61I&X=W{8(_*lK@ z@PknVZ*ir^5aFb$#)@Rk>(IJD zyPVq5*2)q4plwfUb2W7Bs0zUeQ-MKO>f&m3k{Ws?I*(IPAa;c4x~mvHZi05R|tZ$Erb{j5s0Go>55T zi>XAcSIpvLvgdEW_*+$@@P*aVTrXQwG97o=Ec&RP&nDJAtF|JjJfrkdlRACr`k5}i zi_$sywk+_p0lMJ!?Lrg(zoNTdJC3Rr`QU@tG&pM!inkFrQIz(%LEe2q-53Y)SUTs= zfoY*kd)lQJ<$CGp(f8BS?h-6FvOeR*>lf^mKSsU6-HKZVZ+?K;{EDRSrut zhecW=VpOp}yj|qGnTGH0_U}fPWJZkE;bn6)gsWt#A(<>gor;pJ=1b*q^+LN#b9G0> zr{f)qvNss8a$uz}&?m!TTZX}l%(iNZ8_piA(Xh^DEBdA|xyR~0-9{(9mW$LobLHYGl5 zcokYgIi|DXMjzG{j_$~$Om{>Z9{G#IHVw%LiZc-+x(#XmHwh9ID=lN@J09+Plktn$ z+?`LPtON2lwdu=^HwvFoJ8{xu#=W-mzB3`RAWvmKXkzo)aI^hgLOK8o-H4E-g3 z_&n~rK(}5$t9OoirjJ?ueb>IV~LkFa?+GoL6j93`KJI3kzUO?k&wF z5Po2O8{y@b%+ruKKKrJwjGn|)_m0)}thdVxNBq3MErWlgpT@a*M$c4<4` z?z+bYA1yBv5?%kOFCCb88XJg>&tMxowrjTlequ$2d@0AnHl)V}O;}6V{K2F3OL$h@ z4BjVd4N`ekvJYThXT4tB+HhecaFMSxvw2Bc&K%6RZc`ZsA%6x|k_Thiy>Z=c4A7kz z@LwdNL)-=a2@wvyv zNVU$b;o!EWpVHo!xGYHKOeK#ohtjj>XTW{9+NpZ@4Rp%Ht%EV@EYo9RKy^Ih0=8o@ z?ef}jWr)CQ5D79nk5&vkeF941Mtku^htNqP5~!!MJM0_HEq;i7Ncln=Tg3Hk{%nX^ z`l^V5;F~*Fnt5P-(fyN8r>QIyxkI@yqkXaMtNbD0pb{^_m_5fFeR@JgIKa`;Tp!Qk zK%>{9pgz;;j8-I>$W}~rj9&cW;dyWni58B2t<3Q~ND9saas~Uj-~WoGY=VCjz5eYP zFaWe`gpvpykhgTf$q&_v^72Ay8!pt^L3_*vdPLBnhq@TEKlRgmdSB%4CZ`TnR1lH% zR2Dkea%R6;GM5_nYZZ5k1&LDjsI3W2b2%|1$6Pz5qC>@!^D5`k z`y2J@FIHZAZOn6{MabRB-3npNfn}(~y*uzk!rITKrhWu=D-&r@pM*ga&9C#?1MafV zXLXp~F_TnEmM5+&HSlt#U~I4Y!K5*DP!TIH($`j+_(zDFag_#jgySZw6}5-+3bVo6 zu`CLe+MM-lA8?GX>#QJ81DC6z?%zN+@?>^M@2O(N)5F%55DV14jcTUISB8LTr^62F z8M|Qi$cQDXU&qA7%;lY-oNgX5mr&BQM|M{cA*e=kuK^*kkrT+=w;7ifzZhxrl91qE z&v@dgbIQY-JcBL?R7!JcJTO(UcEWhGxWfr9ktGL{oI@8a)rRckQaqDytBa^NA|2Ou z?Ca%c8!F&)&{Sv)-hLr`E@KJM|3P#4RLT!B>$Fg$PjR-AO3)uP90}~wm!dN$!WS|p z^QLmlJk1L^Cl5Y-#$JV`%rtNs5(M={z7e%9U4?`E3EnqzTS53n;r`fiKPrfpA}UR* z-`;8=2VOCFkRHTV^O~j-bvlQnh9C;3>6x$uZhc%#zQcpVA(k!XrG&(9nSAPuzp)b* zecSpX*lpHk?V9I&uEQY#dL2BH&pgy2A`1JsN;@S@;8Lu#&qlpgV)eH}w(iV&TLob} z0U(^as%T$U8rf+t#6uW~QvK$K0-Gdttu6AHtx5$7&(r;u`pSr*bO}fZUmE zXBF{8ysc-#yGl5CZayw?Zw{W1>0c+I$vL4us+DCbDo~NAeyXJQl7l)?vLBS>EWh1t z`A~0%Jot4VaN;xDNe)E*X&dUYLkqY6VCLxHvh8QaN@ZQHhO+qP|I$F^`xR@BHEUKySKNhpylL)1G^@G6BbyP-YKwaUH6i|%oaWFuBQq4y z%RV2-Rz9Du2|{Tn$9ekx?LlvE_{#b1lfWGEAm8Zav+>k}w@&D{To?9LDRfFAqox{? zj6(d2pP;~wFnA~a@kwA28jQPQ%YY9%1B*3x`7{?bA1{4ZYJ(0?-Q%atNbd0H`!o-T zJ~$QbOjY6uz4ui^0ILN0LA4IgzH*l&JYmGx%54RZo@pl9vy zwh|Z9o3a~KA`87Mt4yD^Ing&d%+seRo*Auu&nl7@k?*_2ol7X8D3t3tI--r|Q0u$P zRgH0?85i|(?vkFAt;s32sM!nrlPIjDLnGb5jkrnsbgz-*CIX_U<5e=`t%!f#dU@Zq zqmrSm{$mjU@6?`SF%Ll*c)F?}3`d|FXkViJHQy$=I3OHC0%X%wp8`xVJ5%n#iO}ZjU>Q==+tP_BKG-{I!jwX`3xDldPDCzjukra}A8i0$YM zp=S@vAwqgHb%wZjd%#$uWvdI&;(SL&v~}g(KP+{q|o)VBa z0o@EK)4YJ%CMr@dr7YSE%)Y{pMc&PXbDD8p=d8MOHqOrvBBCi=#YISHohxW>41T1PC)rd!9ZPQemlFMgoiAOV^V^~B;&z7iT>Z>y zaLy21R9gBZuA!uP<6RJu1_$(VsU6@dDK+}23|B|f<{eyt?|VraPJZ5=y@S~H7$ArJ;u?<`yrOk5?Y?4ii9y|Sh{uMG zR`qn$O(q=Ta!K&hEMIj_y+Xs5!TF~#swEj+o2_4lGAZu96Ey;EA6&BwE8i1k60Y`f zagHy(LN|)8AOUc?_5+6U0&UyW3kJLoB7tq>Mq>q0^v1XG(d%86^q1kEb0*Pw%EEw6 zr?QlPgN5o1^$FI}cK6PPqn4o;=#7x3fQ(XB+m{zX#@<_OlEU+nU0!Y>Y=2&<$O5x+ z8%)i{P`}BJ-bX)MCvAQRWB~qO=2^w;lpZdxKw*CYP@O76IRLfQb+*yCXR)I9W5eAX z>tDPpmfg|+LJ+h6H)AL>+y51VGUBtcbFlq4fS8S)jqQJ{ypC}LXV%u)WStKbuALW6 zLNF1%?G}l_)PZ3_AAu=!D`-Ukndh1pA@hvE_k<`26^1TA?vcoO;r&0>P@l^x7BfA! z&CiZWU9+{c=4baKd~vEWMx_0T!BJ==aB}krtO!7Ud_Y}<@Lc~G8N3syupbIieJ>!9 z_P)Om_rLV=`~v+o=IMlMe{4!4gaMYe_WmD63<0tS9x(L7C|ocE7^&}u4w5$xEiXiv zM+;&NB~C_%GPN@Er+ZpY3LuPy%wJAgI{34di+=_&GEgH35kRJ{5@!jqyO0GPZca#Z zFt0}UA4N=Hae8q<3>kmG)43`J(5>T*}kI+Xq06f+ANXP~#GgkF;OXR_QvtSDkBo93RzHFVw&l;~ z1iW{6{K>VIyVDEQkIg4FM?-J1OrvUN;6*SX;1XSSf>>Ny}tpnkk zEr%Eys9Csp1Nv3i00&V7K5#0h4=luQ?Hljk#sLV6KySuYR&kI^YL3{?$Ea0C#SZ}j z6af(hkT(NRR@#hnr{?ZMM|NgMvP;L15s({)ya1^mL<;yEw9Y?I2ZV=K$Az*N=@|6! z{#AOg8}1(m4-lZy2~?HWLJ!iLwf9!~Jk2kNZ}kMf`|APV z+xhkT@uQaXTmAg|$v4LG5`)XoDhF=JZ+tx8mxa9Jje$f zj^|bt&fIDQbcbTEiu@Dvx0e)2FbmRy)zv><7*HPt>Pp0)mKzfi(EDxf5i-E@$JQF) zFIs@JNGHJD(H8)#I7)x5CloZGzhkd(Z+sdA0K^Y?1dzY#FFpgn+=t!JC>aDm!4Gi3 zVcQot3K)Qn-=JUi$_8wRApHHUt)S97^luMCp1fQcOaB(kf+pe6kIdL+qa_C?d=6cq zVxqMkj1q^3_ldlXEt;@99Y-Z&4o$x7k#5f=sfN1J0ei~#(9@z>XFD8S7l}gY^TO-J zIrR2DT{TFfk@7blRLimDC;HvF$CC-0_rH!iFQj9i<;a!1(WHYbGReXW5#QsP%6-)} zRW=@6=KDWrf}~2;U4^qB*$Tl3RhfG@#VVOY#tCmVIc)z$sKH0>p!_gVqTvdjP1=&K z4dTny5ERNsI^{h#SI&zrx|G8n)V$9J}B$CYcfoGb` ze%@mDOX3(ur;ChO2T_Gw=-E;?%=wU?P*(Z9TaPM4wuCOu6Qp%_f)q8UsfDmVp>A$U zWue=_PYNPCfIhW6eeR9P>_Oke<2*WD6RArrq&fF(%#M?W_#zDtgm2zpCZ%f9>r%LTH%4@W8(cl~4nR0itRrnRU%_(2KDYTIB$Y z3Xb!Si|f+rX+r(FAV>B9?<;PqH`UO@zR1pR*%w0yDqJW7y(pucWpS}MF%Z>2kb{8) z50^ZWAR{b6s%fX?2~gGZBhZ%8ArEDj?7M|v?Nj;+yQj?WPysr&p5!zdZ>8zb1JJPn z(>@+OnFDuieRK?OYM0wrU5J*@55zdwtvUR@a-X8wh`6g)51R^jTrI;(DJNaxk*vGN zbWf{WpuvEkkwNF{H(^I`&w|r?K%H1n7x_sPp~~R8C6wZCx{`=4=eOcD_4$_E1&(<2 znanNxzvW1nVhA>TUy6swGM(MiD5OrF=U>8MvYkgv)Q~j;6A|DZ@L||CI6zfJW!;x} z2{MaL{bjrbpYm=WY&>XC8dsAv`-h4fUin-i5$kK=J@r^f-zAAZ(2yiY-H*11mB)li zi%Pk&x!sUD;w$Z>Hfx6QparOVZ!Nw{Spl`na*ZX;Vj#4i4Rl!Fi!J1jXpIh%w7jMllhkFHD2hK}N?O_3h<_bEf&f%_L?GKAB5(uHHVxf?t_ff2Oi-ih z#1qHJ%c?_{U;@d5QM-BW5_9QZ7B|Vb%E#jvG0;ExHBORv=(UZt)?L%TaNth`lPY$Uc8C&?i`cCHj`O`a3`e58<%CN}F`N0wAO* z^PicUrAJ&yQ@Y4Qykf^9y_?5Wi0V@3XB9ICM1g3T~?9Md9 zMJjL}M`OlK#*NEyR8xO@Wlv3KsHP`fhJZ0cK~{-PwIZVi%<=1q2B?5ZHKmG(9k z{OC?L2Ng$I%l9;^5)bqfXiTE>%dxo>n0MxYSJ295_oMI8Fy04-d_G{XhB`zH| zc+sVF=wh5*gM(V1%!%t`EsHIKNd9^DPFx-ZC&^+XG{DZ!pfhKK7$(L&X>oIPxJicN zo!hlmO94+3*3?re3q)Z6@<^x^ueZsIoZUFoZ9Z3aYJ?K=%X%Ml%Wt9AlA_c*q3C-) z6=vKQu z?D0uZe#%-gVFCHoDV$gfyfq`lJD@*=j#@!1+Spax44D zYfzahX>CY>Lskc28=L*|4z(CfK`>1rW_ALGTj>H=Nhi1{0iF?SDH`0pVJ+quHGVGM zltfZ*%Af6ykPwrZzfBGF*W`F88f;rLee{gZdi4(?P+G{t3O;PI3VhTxf2pxDN6AtW z9#jG)fgo>BzVAb_UNRIuruX^s_%0i$FaBSI*#i=K75bAER;U}MwWjgyAre}fX-X^(7jz^ z(BOc-Zfpk+O`ylDu?%$Cw`+G6UH+7+RXPV1JGgY%Yfa@NA8NVQTP*F3|Gc3*WW=_# z(z7~xUsNK`e3^<(!feko4OfrbKa2(}tk=#G;`|xNXRdzhB-{O(o77yrC3hgqc>0rb&HpwNS;8|M(Ah|`#92^WIIch)v zH}W!i=uf*G&d47cwaVM@X>Be1JVz4K6K4#qC;j*bG(|x04D0Te3VC#07Pb&U&@C3n zm19R1Ojv0@sI^8-#w~3(pl@6wqFCc;I7Fb#PnD!exZlthGPHa)|FU-wv3U99MwaJB z+su5|KW6c&K`@fMs77&NOBjy0Y+Wq1y9!^Mv)W!VsAaLH6QykCtiCX7Fyp2)dk+Bb z58e9cs3=K##YKwjyOxnc=Wl0_7b?lvdava7Uw`5rb%nCG=7cZ89K#q_h48hIM)yt& zCg#+MPqg5)8Po?U``#anBx+N1za1{r183QXJJ7d%Bp)%x*M+6(L{eB)%Dnb$W)4uF z?j_>4lrlkulM6Pc_w+ANl}4MSD84dHPhFfVLR=4#`5wG?F8Q>)-pqCTTGVox4P2+s z&vAufs-P~%#vjSU@iu&I9mlchuhl~aTeDE&n}R87D~UCS$(@~Xc>xJpZCZ=M-_$Oq z@bH@tk@JH)EC&y*br+lna8xy=(JR{!1zx!WTxC`n5QdQ0KVnV=CU5k$ZE@$EM8TFl)A*5t;+^lEyKTPZ6s#-4kTQ=fe=UR zg3U^f*LxJ0`wI^d|dQIq|?!g&+T8Rn6%9-FDQdA3-GZB+7M7LUy^_Y7KAkSX|3%S9v#^ zQ&FCs{&Aw_2ksX~rnresrSjLAJI)HwIF&qSPwVCobjFpm25-bW0&dzG<{5$Dv7^%w zvbY0VN~udd9xY22{(95>e1YiPEpsb$y^c$~4Nh(v5Ly6O;YvxO`>|t{eXUpU{CPSv zL&fTO+|-X-?gSB!b8#P1<%0WWSEJbl|L4}dhA7xGPRut@WA#^Y>IQhf>5Rb zFg{B%*AE-Xq1156uO89?GxgHk07$Em##1wxK_TIH$n0Up*pP|NM~zooCf8CKFU;NV z#bdIS^&r+&YGXA&bO$lju27zY)Jfp^m`x|vhcSnQV{(&;D^3x1bT&5v_TJpTM9Q%u~{<)OB^9}`p;&3OCs ziMH(xi{Dghn};$WR_{TPR)GkiU{Fa5<3XPXbq%9#6R1hMW_MZehSGsGU$z+(K?;}Cv zH_9DM<&`@1xmK#-OiC{5{hlJ%y&+jue23jCyJ1Of$gtH??Mey8un3~x93Rhb<%ye$ zlMSzm+cbr+Gr0%zTukh{k56PTc+sGXN$=&B@jSXC^bcni-TOr8$gkuz$mA2x;QI|E zV#?M+FYO(wErw6>0qNxtE6=5krT~WyZN{$gbSB&$nr^Ez`V6zm{RZz;cyolr@h8;1 z{I-~a^u14baaAVx1VrZJf%~lN5}{r<7;uSQiK~Xdg{%i;=I&ol2@(Zn~vKHs_gT6 zcvymt7Ve@Iq$GG_Ig5Hm^vu(T7{Yqdv;>2*77cbG7_9dPCSgHv8m_6qwTh?diy9b% z7CEw6vv8=T)4G~BJJ6Z}ICY2FKl-H^EO9C zW>xeAJmuN#m}$RGF;1&G&|PgYY@8X?=rX01jMo~Da8^no{)w-G z^WkRq+WQ9!5A>m0!8}WMjU(Z|@1l4eKIy&1>i}YDcNAm zXeI`=r8?Opi_QzA+=t;XAA4^8nKB{D(MG{u0a0N!qxmM+L#8m=L{XN(S|p`STf%n^h)7Iz=Y7A0^51 z`Wc3bc|0G`x09OT+g*}&(Ev#WvvVh+xf7upo|lJefJ-N!2Naz_nuE;u##aaAK%T7^ z4U{AEf2ZFG-3_}R2xQ*lAo8cmu~H#JVYAqgmN#ZrG|`Bc6V+PA{@Gc!Dgx{(wb7C! zC%87%OST32+uvPd@i$6 zcyh>pJrg`ZcTNmf_s>%mO%%NXRlmSu`w|dXVp=z_o3s|z-Hm_jRB1*i)HynPsOD&K zU`mdTSv7ae{h(jQ=f4HI=iP?0W&jjN69`UuO#Ko%urd|nM-2WjLC*JX zi>uFKTsVrsVGvS<+V5ecBtd9k$17Ie(L=Ou9plG{m_m$S;oT!TXYAhRl!`!!m+nfp zYG#lfRLetQk1y2{x~u3e*W+5{w4Y@M95?io1vE5nK*orsAnzSEgJ~mIM>~@Xa;(7J zPWiH?){g28x=0cot(1eA)y`UkKofRX#|v?GAh#t}M`rzfqU{uTHe}aFc*e2tB<}1X z3;S;hpHt=&f+iv_3ylRi50}R3{o~sTE|)6GcXG(phgtd3?7&jlWnFD` z7X4bLSXSURu;R(h5zdl5kaA`(os82yPI%JIF?RDw<7D)D?q}}J8f|$jmj~TZB_cja zG#M)_04+^ZPhj%-Xuj&ioUk^pA_CG3H=7Scj&wR7Kg^~iFO>fn6C}Sx%=e6NXn%?n zE-zBe$}LN?Dv`v_dc(RhK+&LYp+_enUvrpuFtdU&biE4idXU-=k&8u-tT()b82Dj0 zsH(eTlm0qZZ0S!Dt^q^^+E;>D#vkWDrC8H4e~{a_`+g;sQVML28B~*P;`mihQQJ z;mw`1Cj-H2dM{fILCZiNd?yJ4XRh?OZiaGWi4T=cx7mV#zcyInCx8?%k5_& z8Wwf0eYDy2RNIp}yk>oPS)$+;Fbzt-)@b1+Yv@h?a~%9MN>F6&3pkM7PkR#N2+V6o z)O(!$7=i!buFYoBx+Q}z5^!OBd5rwg25AOzO0VC0RDg}mnP!jRNa56dD-__09m-s7 z^<)9)e{}nohlchv%2Jyk9qmGcgQDcGGVH6LZ!;B~nIj_|B6_#1mKxV^J=p8whOlT^ zcm@oub`d7!Q2fIFd;bJs6sW{l%#)QM=E=w;fVtDcp1-%Ni+2&}<0AtC(pZ7-Wa3rN zS4C^=fb(2Co=g^5u8F0+yJyswF6O-QNOsG(Ot7|>5PZP_Hk}RI<-Ey?x@HXz*(md$fBLb>d=* zjftP36KNf#{Axpp%(HN8#G7(s(glHuUWI`;YP>7HHauu&uFOa0NdDXPwSw_GLF@I` zgS?4PDW9{u=OumbZ_tso+K{6h=+L_f;2V@>(gd$9VSedm;W^Tj^IL}RrYxuCL(1@s zDQfMS_Dm>%U>P72XG#o#t$Xa>$u!wP-P5$1WKr3>`@}nibk`*+R?L{AuD{%~Zif=^ zHeUR!E|}~pFZR7D4M~qYg?$67_V>xWB6Ev5HanIA(QHHeXxX?p+^Vrt=!-cS3{-TA z)9V}{D3mp$F=gpjMBqUrjM@GAOYB4mUd`M2G{t&YTlDlFFvf zC7j{lZR04Hr^ye-!BLb(w#~?LODLp582$L?9U#ybqFuB8%qlvLG4%#e*GIr3gOfXq^$l8lz6ayP z;0!5|;Yix}6w6DH3*cT*PSgh!zkAv7oscN+5+a%tVuZo9tm)zf~-5j;Gq1ED);< zO;d}o8ft-kX>8({vo>a=eIN6GKXQf9onze2`};o6hE0RYYr=%7!?AEf?BF%Y z6j-viUO=iU*oD)Q4g!_+Q>N2KTB=;SDn3k*>I`clyIpmI&`7$Ht8C$tVcZl&@+}YLUgulA-B!5eVz&1*OzW`<2zk8y8fW_&9~Mh8 zuWLb9n4=L_ja(9612*(t`DYO;Np!L&dRSUE`X#NsAA6|aWSnZmTe#tOYJm$swX6MO zl-|30R*1|}G5f5OcpWoX^;6d46>A5E*3mtZmg}FWWUw4P8oLhkJYQjHeN&`e18avQ z!jiT(JQ8T?HM>-=rk8vhAsKvqPPyk!YlreZC%X~@UyMq3kQJByNRYHoA#HvC2GT`2 z@D?V)Devq1hCUCX(!{h@nKPrq`U(mNJ~=gLON{|Hc&mpyM8n>;Ou76!nl#U1x~ z3RFs+iJAhWwL9Pt{W?vr^)u|mHDX89O7vXf?a|_r{R7V2aO$td9tAI8JNJNqB4_S$-D+6a=~JP#(ov^S8EShYav@ z>|9j4@zDNdLDYi__ul4H@V$)^o=toswVv3cnNH6XI_J4Wo}fcJ%cPF3y-Y^Vp+wfM zv{V>nVgT=f@#dl52}iq@+nz+h%8fs;;Ntd!D+-f$9P{0GQl(q(2TVf>kSyEX*n#`G zqrDKpJPpz;34{?tE3$n+i6^D?0~nVgBKG>_Y=%j9RH@InJM+09z$lB}?HuL77HXiK zEdVTkz=mic!1k>PN%1DRQh^vL{T;pEEBY9lVqc>lDRK5)1R_Agu#ma}zDR%B!}gJl z6_psd*}1LJ1g5jKijq7cRg@4c2#YMiPq$;Og4;UX5c!NO+^(@j`qMtj5prhwD*9XF z{ap0WJ?=5`Ra~Zdp$sR|AGb2Dc%MqFL$+wJ)3Y{O3UY#nbd!kQz`LwC5p)p^rpn>dj*)>)Bmz00Z)Jn#k&)wO}seBqBzmxz=& zoN;SuwDYs|D`ut>-pS{zFUo;?n!DaZP>%9kTs+IUpxa~pnExy+NWW1%t|p0Na<#+q zFYv?ApTqxucg{lp-*)G0jQ?TZV*3vl*MG{l*x6Z_|JS>7S8(Pm8?@@3z%v3!u=&ks zU8L^Uorr$@B#e|1y+x0L1t}NGO0nfZY6fce%psf*|<7u0fiB%IyG> z_UJ>PaSE6>M-l!TYlL=>U+)n5^~Rw7!6Bd=zU{!oI0O)7Bt(~1*XlwH!(6M!hM#lhcLh&sCG~v)7`(X7zUr8|Xz+8g9 zQilUZAZKfWIzIqS`zzvZ^guvuEbRO=1oWv|XliMHKmgmO;pSA80n4}q^!@&-{NURI z_@aRW8~}a)xoLf!<(FThGpj2m*_wxhc?=$~23U~?hv3(d4S_xnI0EoDQ00rz2SaRJcULh|kAM8_v9hXmNA{HmTEMgnvZ?67a=$MLm1`oTD9nQEY?vS=$L0tt2g z_f_u8C=W99&~lTr)thMrAJjS6^~2NuaH&cTklD?azg}k%NiUonx z2aj@sh=KsXp9P2jFQs*p3+c?Ajo-^ZC-+zlre{aihTQj|3O0mh3ETeN|L6+XDF7fI zOf|InncLSZ=Z=ri2W1%nqRwxfN8i<3xSK(+;+JLj8Rzc_5FOBZ2o40`=lkQy*qu#J z#Xi;lX!lWz>V##1K_Nla@SF05Mot3y3gGVaa0kfl5e(`NfPlw`$1ndEeDCv?-!FMj z?!C7PQ~&IC^;1k-B#i|d|0^tV^WnRE==#;X?rR52wXc_{tlthC6G;Ev>qo}_i+^&H zZtnLL^~>?S7xs6S{CD>K_ZDlsos<1*K;sAh3m(ojnA82obPKMD>cN#mdB2qk=#x$b z?_pH5Qm`toCiYk4PyF5{Nqq1+`oxVY-R+;$P22B4{8p{!+oT41skY_W=fiz80DnY?IgjP@>Ye)xwb)w=tR=)jRh%>f_%81`U ze{Vnj7lfnVHo*7afuB_cfWB*IFSP!jOVF>d?Sou6!C$bS0Ei#aAGUG606$RVANU70 zV&}}!;pO@J)oVLY7Y9cGuOC>TUqC_v+_7Gw2LRjEeqfJHRJ60&nqT3cWqc{MAsf-7 zT9S_9Z;u}r4MBWd1gWkFg|=ezy6w)rkwvI+@SCj3Z@C@@+RM@gM^8EY4DDIS`b70f zCZJP<(FBR@-0+g#$p&l*=>=U6yCpTFFdy+H>$bag?~FW_wk>})UjF%a7C)heiXhBa zg3{Qb$l2t^I9-1pPaexgB%|2IN`!n--1Bm;&Mq+(vH7Lcxp|Z6WJkbbu2d1BEQXGG zQb5qb=yud}qNFEvaal`j)&!ln&Ox!!UGawAojFR9y2E$dk-MH3){N5#9<2Keiurv9 zi+oRE6Jd!MU*QLnzvEh#G#ckSX>R+M>uQ6}^((S$4+kTooPgq9t1za%P%hrE0Oq~3 zX_e9yT9g(_P;lP!Vs|3&v5i!8n0J=DJYI+W3K2~e$&4>TPx7S5LO6C{tw=zlw){jX z)V+J)9=+|_-Y@*Rhq0>!7J>6!IO!;a(lwUtC5lpQk_ZP}@87GoXu0a*sDtw$$?_nF^E+$c#GZe8jgh@=_AX9xR%8)$=n*|F2bnQd3cT87Mrf5P=-xYwd~Mqz zO`|-^t%Qjdg`WNtQV=Gp6Z+x)`6Ok{WLLxGD!2YwU-@TbZb#C)3!QsQZ{39ESRU+B z`du96{BZ6~P?iXd3CsEFdP?M;d&D|V$@*R zZ%oP_>He@M3%fO?*5WES61oY1j(e_Mf-Q-#ho;EN!6*R?tS=$oa_SsGB}SU3QQBdo zN`SZRX1609f2@KoyVB2QaZ~XYp-yKGw*lTY9^wrop&MnO*Hvt&Fi%Romk;i3C3o+T zu*d*fJVfW^Bk)0AGB)FuuDP>qlcW=VZqE*xyX1BGb>KUSk6&`coIJ;L-fwmRGc)3k zsRyg^=C!(PTqHx6yLUA_>23n!tD%3L+mMJv%osC>!F{3h%q9=+s=o71N_d3`h|v4i z4~*(u7_5;CXUm58G`I8;pdORD`jK4J*+*{fhhdu;_i0sotV-qNtTkRM!$MK8|g)o_oF$i`OcMEYPIPGzW$da~nop#SF)_2z&;E3d4C`;4c{L|1{zrd(r;O=On( z@pO?!*fGf(@Nv^nt(1Fu%vte_K7$tgkOfSMZB?0oA1Xfo7!tOz*5HfXn+&>ivkx)I zu(L4J#08h!(pra|HYOP5kK#<4Ia}6E19Ii;fOEw<-Q3)6iIWe2hKlCL*}$ z)@)@n={Slo3_W9iO$l>R1^K0~xwdeDhIhM~^6x#$v1Rb?YlxD=DzUvVIPhXp5GB?l z@?1hkX`M_{kO?l`?xfogWS*GIK7gxX;7j{84@|K&tfRhvsxVN@_tfZ}*`$k0vA&X^ zN0qzdyKKLgx&I|CV;$b;kKi_(W}`|`F=oj0t+DiWgQH)1@)a8Gn0q@EeGJmAO?SD? zI_H8QJXgqRFdYP*6giLmqMcgBAz>~yH(Qxhd!5dNPFpaqGsa(a$EsUKwl;uCx0(`Bci zul_zrQDMCZw{dRx<}AhH>&|*PkVx#wsovKit+u*VaB(xq5|NJe z`B}uD7;g>Qby%UQp=h{m?&{uct?S<+M3e=+AlBQfz9-#MQOSw$dwF_0I2Z2Zb{8$x zK-vasCToBf-2t}T+)h0ym_K)X8MT=u%_pMb4 z6Mu3V-Y});WYFtc0>qjaEHcG5&(>UFKrY3p=aZ7_>t z{?f1XtcJ(%PjZ5)l6;==PNlBxE3e6S6Aya8Ox`w&X%oJ#jMnr@q2FCH2QptF0=Y;N z1+^WYmgPn9aAgI#2tUJ)nvRcOXWMGOx}lqanK{Ld-%0JU5k%=3`W_)>UfI(-@UzJ! z&&Q265GaoHP>J+f*O->3rgHHvGyCM56cWb27?v!kazfFY&BQBL@PK=tB_Wb1l;2T= z)=z~6jv=SldTqSJLh2vW5zOC!D$QY%(w^|D#6%r0Etgo#g=^w7tIV8?A$LtpYd8}H zEqk`hkrEHTIAXc#&HOY4`7AImZqL-xcbK`n2*r1$wi%)h2s~cna2-fZ|Tb@ZCeWwH8Mc zwE-=iN}ZItJ$~M-4nA+Odd)jdq|_C^0L7$9@3NLmAL5@edb4O0)_az{_~QjR^vEe- zUY^D7_wQ^pUV^j{xe4t~dQ?p~g)Oscb=QSVxr>|G!-%U)-+CHKbwOI@RKz3V$}kf2Io9WPN}BFpVmX-}%Y%k%8BT8e~_K@$UXD@miDfD7&E;=s+qipZ?zG zP?!|VXLcoZ$gCH$)!i`D6OgD3B?3O!u1dB|tl zo|HiH`VzX^H)|PWRCg{MgNC%R1`#fwwqPD8uO~puN~GMCC7&N#1X@IL<}x)*U*{k{ z)@RQT0>fTj?|pQ^eGuVoGb?J0)Hzq9YAWNi1UgC<0ka>32Pmn6V30dAaq(he zbeV<ej$N3nc(F7(8IrBvr$c1go{zq0K48bTzAC32?jKKE7b!hGH- zm1}8|AnbgIP;CC*D|*q?0QwL()Xf;fdrY^7t2};6a*B6bs_w0+ddRhBLWRy`vndlx zOp^{t%^s9Bxm&9)-x0LU~hRm(NR8^f@4!--aj@oCr$9gpC#tEva7qq% zwKOd###LsUwR|3!pK*X!uY|jNnO-+vktIGgWNMRJf_86eZ71m}#2^1F{_#}XO%5Ew zUn^AchUT=l5Qty&r^a@|*)717XZ7=MXUwu#CPo}(n&mPUA<7UOc`c+zgD@#2TGG9lO=i@_aiC&Qtia%oV zm)3H8)1c2gC_7sRaoQDAN-sXZy5?339OC4C91i{~&Ay9AaqR3JxRk@=qPr0Yzl|c{ z(`dPHO)VloW&LGJOkConkdty7P^OEN9+<)90zfP(IoV_jVYq*bbxE+?BHkv(=3^{< zrfUd*zpbI^COD<6f4xkh~ZTV=9DAG-al6%s4>s-RzSVwmH+G zTvy}hKTBsGUTG8+hhEdKe_ru% z`*j3(h@Gf+j}nUAw4_MpbWF?86}anpBjfsl;o0pwG{)`v1J3y4XM|?j=!^U@h zMWCWa9xPsa41Vy8^U4e#seV&;q!P93u|p*9 zX0;F^M6!;@6uDMIGoLFm>U862igmdzrn{7tQ=S`w@cZDa%Ek=TK-{wNyGD?uMYGZ> zo-6Y2kr>s^zEaT5{ZDad92SI{={N_O8U%&ct&fwCHN&aA|4&bVccK5X?Ay2)QNbz5 zv=_>7FhXFo$wD1gRv(&}*C__)bE(I+7MI+hAn$s*h(iqwBi7)9LC(>IXsz&4YE9U% zi;*76JKB)n(KAxAkUv^vI6aj2VS6r8k<$$Pm@(7f?mUmO-hy%LMY-}hUyAs!AJ>dX z=_Nzmts7c_$*b_I#IF^`X|m)oo6~H1gLcR&9sNhv;-?92)D)oQL}56ZGId6qJJzf1 zR}f8V4Fd)lk95>1p)kXQq}JyAz1uAdO;D?)QZu(uBV zIK;VUJ7Q|byxpAWj`d@`>srMskJ_+mv*j&Og`UTaA5Ua2-~m^~32R$;nG>LzJkWV7 z1923aq-94eet0T$^DK_xeDTta=m~y92qU-|C5+lPLe79fivrF8AGHlxL&Et?kOj^C z7z~4Iq0mxcL`KOB&7(wYiBI;e@6qU#tORR0kDTfI9*3Y9qTBO`wSysWwn%Z_3tV`v z+rSLX)a4wjT_3|fT!+%iw^0ts}y z?+~kn6*b;n1Y$CssSMB49TBiskC|Zuz_))|3MyGWgivb$lu@Grs)w$3t8}o614Y<9 z^@@pvhDMrp&P-g^m%n|cF7RYuz{Ck@yf1qJAt518!iRb=VHZEN*^#P%&8gtcTAOzIs~b(uyqPjRfFu!F(<(_C*dYikzSE6oEIX(a@eY_r=NNbY6EKhlni`cKZO1o*q}JkmXr0UOqznKwYUulJ2J3()SV!jbY~DJ%zv-v=C~b_KF$=e@`!{_iP!W#q90+HA9u40|k%^w4Xj5=r3aB1oLAAT!zd(E~ zFoKv30##esbLTU}qGX;+_?L+W`!g}PJ%)f8slPu)Y_^QWJ-B`Ppi?jdWoommkCmby z=F^&04+wRMI>;^S2i21U%Q(In=<~G$%r)}7aP3{k8`>>nG#T3uhMo6JcblN{tb8wTSm_K?GNxX8g$*s?Ycs#BC-h=ZXqf#|TlL#8b3dL9Ir70?@b zd-*1H81eAT&o1VWPZw6Uwl=GaVTnX2v^^9E%+g=hNT0$M#th4GNe-B8?Z=j0j2#W& zI8F=ty@1&0+B&H2EK}IvlAYSiBKDdhQP-GvkXfn5R?8f#I}#b?Kn;pfEPOLXwAm!p1d)#h zH)<26dWDVeD+vgIuu1>Doup-^J#Ki_?z9+hufP0KRoB(K6FgKwh{*_Z*tpNm4rr|t@-vL7mq1_% z;(hVoymXexUpEcVrm7u!O{diI5`VXW_Klg8c7v;#Dq#o91gkpudv*!P5g(Ca^Q6;5 zg{Y=T(TxmwC-i)r)Q0kri5Y~m^~Nj(x`&FG7@mbr`w4z}RTi#6)7wVSu3?!vgQHVF zkQC1o(D@=t1=W)3x)~}Z@kWxU&pP5>}%-4<|CKJnMqQDjJ^7hKTWJK0yHd z<{kSQ>(A$eC(^t^F`c7mSKH(C*~sNa>vg)=fWN(lUql-iZjEmWtXOd5yN0$Fy}j*` zub`tOHXug(CuLvM^5;EOl?>t1TaO?LMb^J#UGyDYCDavb1c&aXPxf}(vmp{AKJ zH01^qzo~&DuEl(x3YTO$vt$yZ`EBQ|&B{0K*&%3>ktK$kY>Ekqm+})zJSag9dUwUm zT_9Z|Vh>~kmw*!GctS(&k=ukk=2}q;mOPq;uAWz8s2@>(j=bFZ5&j$NVhLx96^3VGC=JZG! z+|1K^u*Vss&=5=Jnl*Mtr79%gP!((0l+mLtgAli-*^JxP_p9ip)3+4i51s@6KRUa$ z(mq5;NA)X=`b}tTawtT@xtGfI=!2XAR5{Z8uf~6wL%Ii&m^ZqbmwK^azjarnO`9Wk zzZCK4$qiEwDhl=w{AeVX0|Sii&MPUd6pWON3hY;w0(;}aLo8D(9cKVHd3)J_h#;-s z&i1GJ6TVNulawyDkDaID8I<1OtB_D4dmJj?xG<>KU8aPm!g~eZR8gZHQIc7Z$5lwbcwzeat_j`D9o^%Ip!QuTT~tkL+qZ`G;5V zH9FpKF_Ca@wTmDA%6*SN;;WVqk8IqI&_%%bINiy?hLot-Tf-|b{k4PE?3HENyj;1h znrdZdwViX#+;KS?Z-w18Z(%|(HbjZn$&_)^J@iak){K6~Xg^dgWr)2(>^IG1jK0S% zmli{NWMtjRWzNy}_Qh9w$VsrK2?&?@@PC+US&FS(^+yg{-uSik*ObV2Hbw1l%LV#$ zE`f1j>yG*r`3{g86V=FC13OMTF#cX@)#cuuN#0|X=m40eli()nKhpacD_nhEISePE z;x-))POdBdNOTIwH|B5Lk3_*S>mD})-rKe|;EVpP}qbyt>&_p)(*3W8V? zP_9R>vd%gTuYaA>Xcj4&)#vl9Px{LU&VGKUdGULB$+dktj#^I1v?FGMGhPNLjG3(_ zM4z5jhl8JO7XNK8KQ6`@#hN8|P+N_Vpkq0Z;0YM@>JfrkBz#H2dir+uz0Kp>vApV zevW^iTvfBSK?LG37Gur?m|=r7%r4`SH3;Oy2)PKrqv&1oJj6L!#5baYD&B#nOfcI+ zdS}QkM3*MwZKoTKUWj!wMD|9ExZM%euRPPo4r1~ z8=P2ab7_~Mc}PM5^3J!h>)>=Zz3ci=lyOe}#(hdX(UUIKZwL=MqxcJt{2C0Ot z+*E*S%J~E~XEt#ABqVP2&x3JzGg|yJDcgbR;j-@gV1%O-w|~k=KV;Upb{VB$V|tE# zfAzsi!+cw<=fyRFR2;=XIM>d9d=$;_UNLz_2^h57@?_O;J2Y5(~w=y@k!KrC3LL%c}C@&pNjm^ zN3pLHqWA=2tE=JX z(j?f@wk(<|g1}Q@87SqyWP<(ZWz<-tLLE1?%s(?ka8X|6AM{30=}prVPrORq5a0M! zoFO>civk3IkSlT*-J3aS;f}>^0xkL|zc$JAoz^aY8$$vDufIT%Sq%AZ-ekBEf>(a$ z8nyFTW6l@Ab_dn5_R5u(=w*W>G?AX4k@fCS!Bb1oL^_DQi$qFrvdV@;#D{7gUp;FdVDM!7z_*3X9znDksDy?^IvILT~X9ISTATB37 zO=DSFg0X!USpe_%$tT7&P^6CVLgUvhLLFYYehAphe?&wCx5&y)H9o38>(xUZd!vFZRO!NeAJo16o$sVQ_0WM^BNdiCgwBmUbRkmB%NP(n{QSt zt3A6kxsIme@B*M;JF5G+P0wSWGicaKX@-(4h8sibeYT~o{-WqAW4$%AXasMG3m*Evgj>|Fs7`9j?L-hML% znj8OyK1)ZsBS*m1!w2Ven`AyvU=ADGLt2a+If&^fJfCMeEG$N`fErM8Cs|5+K4`?g z9U23cB1~?p{ahl=M>L0*K%xJG`Xe)7MgH=&5|LB)BQyn3<4DdbKluWY`)K?BKVTT< z{|5}i%*4w4zts`{rRZ3h+5Qh3^MBJY05&!N%m2b*{y$Q5FW?GUn+p&yhjIas@PVEG z^T9E-{r5}Txw?i5_~}F6U~EC(Xt!iFzP|tTUhHI4q`&@i@9=bf^$3lUPz;tE8=8S4 zvoX7r85NlwgGY)_Of)nEV`6TYV_{|{jf*eWX#lLf5{)>60-%Qm{%DLEf7OMWNilPAVh>( z-}q|R#Khw26Z@De4pa<4_Y04V41AGy^^1can;*b3hm8aMlNo5WLy7}h4NT)Qf?VJ<|Hz07`-^}UBCHmRUwJSaR2~t7^zGG+3@p$_#+Lz9lgody zKgbRc`YM01TL|A01h6nzYGA)x^dFtv#QMVE?ncz^#NcN_Hs&wEN1rLf777vE?!J$HwOmMCRr?Ho$iw94e+K1e+EtBxYRF*)IDnNszCx zDUlfvE-57>EF~_`5fD&2ZYIkYvcU2n!Bc6%IpV!1JfH51%L~xrE3*I}MoHc)As8cN z9xc?~)fvdc{YUG8FER)!YTwYn8n!+db7P(8Tj;kA9P>A9@9XaT0Dj)!8=sF1&se)p9m;6_an$GGJ;jX~Q0Adc%2mndL(%=HdyG7*t zoqXIyeUpFol|uK&@HKm76re~Jk2n0&{ zo$6&E#Vx%5#9aJ6rGHyI{@uO%?SJ_F^ZUy|YL97o$^V?>i~aR`=Fg6?Lg1^_h1eAN z_~|gV-d!7Z{mZrt^xZt~K!gg5nfY6p>;Tcj7#>%>{+YK3s9#`d_#tn#Lus{xOsCT1 z#LfI=lleU#@r1L4)ru>~P8=r0obALUJ^Y>TzRjs${yaW-+G*_lD^U6>>h#mt?W;fc zS@CU4VAwL^Lg9Nw#)8WYban>yB;ZCE?m7f?rb9IDcgY1d-1#;!7+C6nMrT2HQvUqBr#4CI;yjvK|Pl=tr;$2;$rqY4r2#Cvb=B z_&1VsR|D-=@HPeKH_ugi+IP;=srr|s##MTc|7?5VtT!zD9p69ex7f#(?T69u6SP~8 z{`V|>2QwD<3DN8Hr}gtw+OHq(%@*?I70T4N3vws)m@whBu<;Gk7?Mf-2ezlw{9AAA zi(n4N+{pNWappC#?x%41clsl{@1u6}p0M||kDcMSuIH92@Va-ebK{3_Q+)JQ@%ItNgYfZpk6(|CD2{s!%i!P6Od91{ z1997Bj~vu&w2f;5<_FKWZeqGkg9PEsd*)hGziNS$%SwYx@Y@1WVq1?%ZCBl(_sqvY z@~RxEK_cWOJ%0Y1KX$(2*ViWfn^nRpKHvST#7J~Ze8g^xXOnW>)g zIPa$tb1Zck2hve0)YCYG^Mu>;O4`YU;ek^n{`camEgRN?j!Bnj+4#>%3I=m-O%4*- zAJ}?0+gVVd0xkN9le@DqnhPR9-@krMmU}ng|H@b@ux6opsA#RKKP>t~Pq&m-4;jsM z#Zb-yMk#h_ILef6agP|MT^VAZ!>AHJIj7(gWed~gcnF~suGIiRi};T)uOzan4g*lc zTA}ZWH(5)N%yeYEHEO>kbqNTt@%QRE_-PwnF7c9y3-16l8A;mIu~*Gz$Ad+g75qaB z9GH1Q#ngxnQnA2fQRgEgoWD#W7TMh%(%8(E{Z)hMHao==dJPlkTl=-)69WNP7V)}; zFaG)NzE~~lmn)Nr&&_)krQ&H`>-p>x>=5)Dj*a%qpVEpyhg_(GuM~$-ALhvN9GTTx zx02HeS1kkgzMO`rX0LW-Kdz-(B>nzFRb= z_7Oplsc%Za@1JL9#1$9_?oP1Dp>t=&Q{z>jbnXv5kTT5B#H#XdP~A@kpQ>}?A=%Rf zEel|oenR2nSU>+We~3TNGPS^VhWI)R7jp!CEbG=uIa5|O0W*%}{w+HXFg`3q$j_-* zv+g~{JdT&pgUFy9%EqrE?Mrn^VzZpVm!Do1`3$+-nzE84dG1^WO2jC@@FhT92@qO7 zh?Tb5YTAsg!6F-BwqL4zXEIbVES4~bfHO*bqe*rYU3 z7!w8);6J@Vc^in8+0=4lq%+AeGLxv@sIpxhhpVxccvfI;YdMDqd~o5{05%!@vQr~m zQ1PW=cK?PG7#*&gjttA5MAmy@R=Os*We$e^cFs_{MknU>mLb?#4P~lGpKj*WYsm?<{Yv3#5 zY`%(#lV$u_#|r7H*l<9`?TpqLk!+yJ_4-Vfr})%8QLp-nH;=FTuqhzN3?M4DOhgQf z6BmF*t)~8#?cXs4=JX_k#US_!0ou2{76QvFEu9@)<&cO|9$ID|3H5}4f?EBW-OW|G zE_sV6yPx0eiRJY`duNPaD;$h7Et5R}mHxv0YFtlBYu0G1Z`8UMb899m=}NoVjan|Y zyC7PphQ~V&yCuyMmihV)T%kFN6;6MSqW9Bktx~h~It4?G!OpHBplv?+So%(2mVVyp zSdG*~wmF+_a~KV#Yg(EPjIXicdqN-9CeOWa2w(Ttku4(>&Vyq5A9Sqg2o_*U!S!IiL^Odw1UV;~F?<{aG{Cv9&TZ}hF zF~c!HQe^kk45y*S<$7dA@a1fxv%sBd5!#9$Z8xdT@i*`uYuho`_HINgm|NjYOVUpg_U0=IXI;`GMqKdfIq*L&NPv>Qhrxl7&LB5r^vbaXa zz-gc&y^v+*jM*l9n33Ibd2d&s=)GE=y{Gh;&zDn>zWRpU5tj(l^t+5EoZpc9`y!qT zN(mRo!_x%@IY4tdq6yZ3IO}PJ6m03j{@Ryv5qRLr8Dk z|2M)zb`&iXQ3wG((j+9J>eAI6UMfLYR5lWX+^Lo!n_ZE0<}}Ob92iWJ#!I|-^UV~5 zyk%2}U&V@4gAn3JlAX2*^@^L1ycHx^4S}xZ)DDH`CJB54gbkEVL)1U$WPe`%frQPI z#-Y^_%M1-*aAIv3kyHi%5#`k zJ4N;Ya3{>9D3zQ-Vtp=wKZldAJv7pw=)x(`OPjRPFZaZjE$)9*AX{y0`}I8TcV7Qr z91z}b&0RaDRtT294~-HDEYyvSk@eGha4>_}P79;!JJivPfcU6glV@Xn9N3Fct~h#+ zm2E}2iv~jND?*)Hq&_47d)wi{%lB#Y&$#d>?-ineWZLYIT}O^6qn;vGi5VYcq(+6X zrwNOwHXs_Ze5JH(vPY)T-F(c=^FvU!Fv6XWWLjxugFQn{o*^Qx|?zMbI`qSn5 zob`#dccvbzr|qPBWAhJ0Iq`V8t8A{U`?5#fpo2a~7i?6({g0|cz_o{E91 zsYMHcHp><3M*}8w$|f*{a}=AXzCdzywmq9AkPz}`-a(XhmFbiVGjRhs7P|RsG`z{0 zj;d3E8WnHD@a=J?7j(b838$ac(!Z4RxKDycDie#nHf!C%2d|ZgVGEhz*zyN+$|L{c znzTH<*r%9FJLkCE+99WjbD~OJO?ewWJg{VNH~d1U=7erH*Gm_X3H zwk(RreiQP8cE|c`0P}hLDe=zu-Ec^{6y0V0?pcTIpw%F9Jk|SNF4-?jD}#&OU?wCH zsOLz`0%emu2e*}Z+oI9((xv`i;p=J#5qIwkiGQT9b>Lq^~?Hd;9Q`Rbd!0g6iD#3vax6k`Re^_ z@DvULY`$7Y;FPK>72Mnomm0Jof}R{Fmi4fM>C$s#ZNP3qsmg>Oxy>n~ z5&Db$i~PDp%~J{J-4;#%e3U||K0?HP91EAJB3k}Zlx|LC26v(pBYmYP%J@cAgpmY$ z>qk5E6C%+mn@_c;K{?>FR7Ed-leEqmw=+zhmPWH9ZB*jUcG9OX>B@IAiOtdQ+L@p{>=hv6_72DI_L) zPb={8E~5*YsS|-Ht`X`F+qrxa|H-2-&)G1&X>)ewv7CY|*-Qarby3n~D(Qy>-B~k&UWTddOlz%3UO4TpwrAmn4V|81zaiLyBca-85R|Fr z>)P35R-5Nj$h{j`x}{JDE?AXkCQcM!&PBH$E%LS|T)-}vR@`Yurals`4Q>!xtSg^d zvXW6%9}RXA>waySdcxD5FvQD-+cRP)%!akhwOqsNP!P4%9C z#E7&)V(MI9Cj6_J4ofI$MnP7vXDTNVz= z(E(Fg)M}yapF8J&P>$gv42%TV78Ag#nQ`~GzY-bZq<1uwARfYj-JPuHW{*pqy@~`L zHS+lDZtX6k2xHvJr!l`jhEX6C8H)TdnPKLAhgNKza;fRwLs9~&nj0YWx-WJ){((-t zE*pNZ2-HP!(i*52Puop7Q4Y`JK1xoa32Etgf3j!zP!vzLS{Vk+KQn^EW<{^0&Y8I2 zV;Z0wqz?TsWYUj5gh1M~7s4kgRR>Hd=Z6Vf%|TyP6>e`z=BM~>!mvd%P; zBU9BA;k2c_$U_LUod)G?PHyC9Bxu-Ha|jazrP4dbO|41TktqDJo=x(2#Z}~| z_xPpl0DIpAljW{OsbDMqImRq;LMB z2_-r+wSXGr5f^yRMly6xz&V|}6+ zjU~))R_~Xl=gZ|rG)BNQSyBmntblkcvOyq~ zDh`Eb?4h(&qVCMCr6d>;M(7x9l$~eHOpWhz4wE{-J7xgH2w-u1zO))iVv<^iwq!%1 zIdTgaz(=Qfj#aUBM?BtHbRYk~xxe7w^;B7H`{jy?_NtoLU3pUzSMg%vu>QHgVX*Sp zsIMVueCaa%0(o-YkVjELi#JQ2x7O&~LPa8dueRd$P8ITMG7)V^@yw_={KtRMWx z_jpHaCL~$)_U`UbU{}Vukr>JzC#JrAB?DzM$d+0p4t)GI*jwT=K!j}#wwDM29=Vr81kwj&t*EbW$e1&$1jcIIDwpk7oAvH&{DNi0?JzAjWo1adIq@9Kv(= zZ6`rN*L=aE6ur$wIM{2X{?Qq^mFJ0n;h`N@ zHbTgC#E*RMa!A)RjDqobV8c8KD~=3D6nIyud%H}b7>R}+)c7uZGF0S zHrm%_;NN~+wW^b~1sT?C0%dZe%Rreb)+;U-e4eCI_fUS@cG3r(MU(arVP5I4zPs{8 z3;svenY8YjZ$WVr;QLK6Z=b*hsTb*smJp4z#%8>gVGMdrlIe6O|&k>)VIXYTh&8pk+L-I));OvzD=@(g=l19qtJzUH%RB9gf8%@B zu7@P5v6+rgllmLjjjTzaG1(@%Fy+{zXSW|ct^qkN3nHaZv`1d=DW@(k~} ziE-MR0Q(fRX|h=wXvH8+lJK z-Zl~cHSDa2C&cLbquPtGqRH=y2g7w|^`7~+xvSDi=T)gF#X}*5C>yfGk&!(XKX6t&`%4KI#z+JfJ zn5K9o0Be){xbK@ni!riC6<)4;PEya@!4v*}oUE~bii?ra1mn=M*>DmM2U1&I$I0Ji~d3(fS>+J zu%v>CJ&H>W1U+tktDt_x-L`e?*G>&YX7lWRua!q4shPlW_K zLG#}9Z$!iP_|IRsqpNBi{rYjz4=Fc9OAk$P`rL+GPFuGY{<=}Ezg)Ds3OX7h0|+@% z{xsJZ-{>|jH&5&dUy43U-;@o&q@j_tA&R%x{=~jUEou!VA@>f`d|}QgaP*{9+^8c> z^69u{tVa=s=Z&rZvV8^=C2ViY6;iR$H3P~YZ!{IDHG$I$IttEDZk7rr85?B+G-UEpH_dQWfRqEUT600U9@Fdh7e-l4 z;-qsK5muY@DYflsRtIQb9O+9wDYV^L{1-RomFR#_hfz3X%dOea>Cw=0v4`FCWzLuF z_X(dE6|eQ}g{g_s1!^+yM|X~!;J-r0NX_C3Hy+LdMNokw0VU335~y8)otb5HAL><~ zYw4n>_JOA+>0B(IqyQuDdc=^+4wGnE7Uzm)md{SwWiP+Qb zQbvVE3l!+7VPN}&Ct;Y1J%oB-(Lkhsg_gOyi^Kh^Ie=IU8gHlL+#Zl1XS?_o`NuG^kSK@Qs$hnD> zr-`0VqgU0%wg#in*us&OOe45wlU_B zf4&Z7l@#K(NU>OVv_jVdI<@duME4JNWR6^DHH7mXiK4g8>71%WVcGjrj0u%IG8=KP zkcs2lf{fJb@pJWietVTpHKgQU$r4vDxmvt}d(oEVv?xkUEb%a-+7PUT`mFZQ`Sxb* z0nP2a6iJYSoum;{9Zqtip<*d(u8@fRE)&v{`k7@IX(n|B00tHSrksPTxN#aMlm zP*q_jyjB87ytwa0pXZW!`|08#B55AC3dAzYnq}C^+Nx_yK&qienX%#P9wo75eGJ8x zx3_j`D`R&&Q$jKAAia&y2I) zRR~X{IxCHR?zr7-4YOy9bap~0*JG&~Lc{+F;8pwM`H3PGTeksiln9(JlgD$()wB*Y ze3m;ys3tngkqj+2JkYLBXtik%?H^=qRTX|(v5gKD^MTzIIxc(Z<_!JJ1sX6RSVHNR z{(EL_EL|ia;JJ)JQ(xAES3yCP=3{~K)9n~FC*Ks@j!#Au2RnwU$E(4>k+D_^6jN1T z8E|UwqlUQrV!eDW(sjOTF`%v3r7YFxXCm{{bIw3gJXNvXrt|*EHn4|p6;FZwZ2*=K z__p*%Aih^QUf1hmbwIMRzdyQ0cD3AknB{KaaRhM>k=-QnEWm5xh5M0tV@Q_F-vm3% z?1fEm;A?BUnk{XzTgzY#2OZrNmyW{DIOsyRDxY}k!N7yf%dK=7oq` z?iU1K$WZj#62eQ;A=wv(S41IX3R4#kht%lt`Q;5!{*YflvgDmLYi|7r7AyPtC|&u3 zg7LL)2&E*bPQIRPAixu*@$|wyzvTVc**}dUcB^-_)47@}DX%6ND<2|h8M#_NW!+;9 z8tjX5Yt{Y`jN6{Hj7l0kJp#mvCOta-ljfBA%%FeqX;hw%?YHE<;Ea*}@-t4UMq{X? zm3-3DMwJ$x@>lW^vc4#+#dF(-L)ls(;nmPVmp@@X5-32&g8g|kP&Ww40eHB~-uPh$ zR1H}>Nf!Z;r{k8F$91jLH72;CO4%Pnp3YnmgS!_?>Cd)dA2N`cJv_ciN}MrE*;7Ou zOq|US=s5$H#Vq~L`OiE5YGaqxjBJ|iLc2@BTels*PT+-`RKVY_K=gH~ z4Z){GU`%bblr;Pbcs`IVo0iNo<)z*{b zIq|n8`_cTntEeYu@*I(Pe+rShFP)sHeeell7iRx9gu$C71ZE@Ea2^H9V&GuhG0^4J zFP13ocfZ{%?G?Y}NjzwBZ+)1{iGIUcWtT51&~r1_Ioy%X;erOgg%(P#R!pkZeBS4# zX(N{~H@!#t-oW?;9(krTbrpJXsa_I+d!5mTo%L29u{ zLOpEFnR}B}XR$hw`U__mkGn7rLi}kn7ZeLs zJQSl=#7uEvG`4EAx_z(8ix~HUH*Jux8REFV1QGso0`VHH@$2e zl-zs-o1Qan6?d55Oa3bMrCi%>?;0yWiy#I7ZKqz8NrZqyoOc!0Mr){k5>c9&`lnPv z4HSO0U0X~Kt0>rG3T(hf8gm--NZ+cu(s>I3wqa{5#L4Zi3UMytDv52{(|x)a8${Ff z-?k@gfP;qxbGT?)Wu;j;RQ%T5<*F4N>`21MC;GfX9oKba^#K?yK_n=%iO-#5+v~d& zw=$||m=XGe1L^^|jb3}a2VF$r?ApKS8n&2^Pij@~=SzsrT+y5XMt)_Tf+16iR}o;L z4l)BY=pVH6z%OeAe29WYiHDrcUEttm*3G@gUNS$UPXLD;4pK?3f6WvoCVSRC&$|{@ zf!t&yh32Rajup{&USCpXPt@W~m(a`vhCR&rCEZpzxxw!p6rc34RO6bjM};dNdQVk2 zsoY6={8rGT*rO;n$8!F;3z8@K?ds!f=me)@^@JU}hCgdVw-D!nbmd_Ur2FN=?KbK7 z@i_x^&KncuUGF2Mv9jeyF^;YY=8F$u<5QsmB8WR^TP~yQ(=Jsl{?+WHX%EKoV#3u` z$LJ6xWg8z@p4`;K^nh;no|}wrD{qb;PiG$IsJ@9al15ZYGf@#G^q>7YI<~hxhpwRT zRQ|b%NIfonA%v}80qLteBvvX#)Z43{H=MOy@#lezLS2a=RH_)YWhUA?K)?arx273t z@yx483iiDy^sRLt9`%+Xm*bJ=?0~fEX*O=E^VZzc_a)j%foQnw^*-P)A#4IA)J!MFLj(bZkPc4)u;B zNLc{@i!8*2XlBH!Hq6}}liqCaxezyzMmoQIrL+GwbDyZExmp|CsMuWzd|>=r)`pO0R%>H05`uCSKl5L1KgFalC|$(P<*aJ0)R#a`BYOaE{V-$9Ly)$XOC5) z!mrPB4Ub{csri1e<|!v$Bse`O z;EH-{nd|c!1GriZc4912VIICDM*Tuh$7!fmFT*byoyeX(rm6OH43ddN;Y+SK-RiBWchp1hRGO zlV{6#56b%o*Vz{{71Fpd|gL(-dI#ug-7aoxjPuX)tj zVc%;n{6K%rsotByy!aDF_VCgkbTYn|mMF_!5m69sFN5f{OcAx;q%y+WFYRKK<5~YT zam1!HkL)WOkn9N7+(N@7`8=Y`$0HBUt1&1N!3{?fs_)*fy#PqNR5L2V?0{4{9rx0f zv8_av$M?i%6$RR_ZXbn+C_LB~X)rqYGZpG}F~yClTBlIl^RN*di(}YEvYgH)E0sz_ z?$r#&MkkZS=MzeyC49b>?A7hTiW|80UmKBwXB^|5M%1Bc4z+H>2+Cb1Chv+Er1Qe` zAHK%R0=#Cm^dQ-yKsN}D3*CdC{Jzw%+jJ@g$Nf*qxZ-MvH5d88xm0;8;ZdAO#h{-L;rP-tZ>$@?U>_*)roHxfTY z(oxDCT6*oU%k7sv*Uz@&0a#!D>4rd<*Zzn%>I8CPyHsg?$O_YD=pW}Jh5hK}<*LNJ zdh%o2@*-M<1a)FRHYIGWJSvp`rmS}v{!QMjUg$MVc}rQZm_uzE9>yD_V-PdebP8gh zXb~mNc<+4ci&0EaMxDTnX5TLZn13ZPe~}mpb7sNO=G^ zI(Yb;;!wud(Qf?}xq5EjhBF!qVry^)7XpUQxEkxKuE6--`Xglu6pH>+m`FxjiP|vJ z- zVPC8zC*6(Mqw}L`L|PFoKRCq>F;_snq0N8!>zTf(8Qrm~2HWfM*ZfeQ*3#y_W0O>) zTStG+e_Ap`29J!yKJ_#LvrUdWdR56$2ff5KCL)94L$Bp?G&b#Y{U_b#J|bsYPL-i# zpixcDWaz1D$&IophBCEo9vrXbo3-17#p^V706k4Uu>O;m^5CXw52Eb4Ik%|t@39k< z2XPthY|~(1nYgfH*#2hP?p2gz__Zo6R;WJPmeQDT$<^+k3S{XOl@!F+74$1{j*IFh^+W^W=NdHygrVsnc7zWD3megT9W*G+jK)ps`o)^w`2o-M;gwbji%`j= zIH4ijuS<2fWf45gaH*c?}4%|F}-u8Kcx#Wm&XQy@m2{pbNusL>GucD(=(!u zc7lymtxh7G|6ow$*$RB|#cue$eAG2S-cWu!q>1W1C?6TSc&5eu4{`VwL$S zU>qRd9nBuXar=DwM2SF1^6!!@5FqU0Zcj3$v`c7{>cHTo9gNn-#|DxOi~-Ht{C%1t z4I^(hp1K#F*t+j5kp5qEZHEgYLmP8CH;`@NHg7vilQ?vg(s-bCY13~G+hX%f`^l^& zJau?00*WCM%@kagh~90+Ae*fWsp>PG3c^4I*9;`Hylkxo5@!x&ytbpPdD)f%rI1z- z=~fqyXQz})EY0@` zp%{Y%N+9=Of+SuCKi&IncJNAlu>OF9xsSWtS$d!=lSZ-^6rDzIg21cLv?0RifRU@L zOQs6WPx|QkLi`TMB#`Bq@>QQlj&IA-r1*28y3%WldHvYLU}Mqg9+W%@_j{Py#~YRQ zajIHo@nCBM*r>!GOsUUE=Xbn?z$QKL_zK0Q)Y8r%;4G;`%|hv^SO>#i^fAM%V&9=% z_OuzUT(ri&Cei3srdUF*;X|;^q z1krLit)9;UR-~*&yjTIglGl)d-!cr!;Ponb94%BPUOcV!Z z!2B!$48<`-unvOSruW;S2LPv_k}Y<X{Q=3z|}YspXw9tx)t3 zAJ#NqxOX@n9T_QL>>Q8)A?NtC)I*UGHsU$YjCU132~}tl*ZFB{&OlKai0lo2eXHwu zIu1K@4h~p^Yt~L)8S9~+IwhT;1T1wess#TM8D*}g`lNU$yV7Vtpq~{uq@xZ>=kO4x znVP$fBFh7Z2ukYMY->J7P7tuIRxY)DK&J>lLUl>vtXs6Euce~8DQU9xr>I}3$@icV zqMI&Dybz0@Ywb+s+>AwT*F?Wpv$I?!5$Bs{mbx~(WSDEwhfzjsKeGn2kIgr&(t-dn zU4r6ZKCuQCTqL&po49eOV_g%vuEx}MR90Gr~&Eb8=Vy3=64 zI3JSF*ZSZ-+gv~Lexm+?%lYfj5vE>-*hq5%`C40)cM~okqT$@Mzg~SX*08UcgJ!SV z&_>y$8)@&8-8KHwm$k=hpeKgoXF`T}g>pXT=(1XBB?>d-)5QL3H7i-p97H!5;dzY4 zhJy9_89Y&7?IqRl;VEAf##^s+B^74^Z`sz7$7+s?csF~BL>^P)`cMT_kAEAY&tK=C zHfA%;E^`66<^@Nrf1c-^5Lr0>t%2z^&X|}aVA0nEaV*Lm<`3?E5<=ye30d%FS$4bl z2PV8UT4$#-z8+EDMCt%3nad<+85eSxZ9@SmwpO&!e)CoR8!|MXS@@bJXCcyS286JmOIwY06`~p#A~N%y~N$6`NOad6;Ix z<)uywO$i+wlXaQF;GCHUkt}Q1Z9+@6H9-lJ(dO$TWn_WWG&$zyUBl9Q5GTT_ zwxI9|IbDRp>~i$CR#O_*n><2c!z0;TT?2wn5A!~54zpg`%95db=*^0XSe}$2QLubOg#1LewZvZMunr` zOXlHd5F>?NZE2u+h0#b}JjE-E`fmwj8RwHI+cP!r=u2NWE<4$K>Zt-Fxs z;}BBK{25qB>C-N|*o*G7SUFslf5nB9%3+}v93c$Q2;-BOfIeJkoh)rJhQ_|UwfidL zqKI0H&zD$Q<)De@)FejTPcuJ%8Wq%zpa(Y#C!`9RkH+s9BrLRn<`_ zWw3Z$D$YY=Id})(6XwYaqJ^(w9*)+cH-;xWm8AmHnG8OVl6N1R0ON`rl3-ZsOo!&D zXjVYriRr}9n=>gXWgF`$09(_>u(1Mcr%+-FAj3Qq!o|lN;D2ZYHqJCo?nEP;bD-Z~ z*`L4c0o6umz)X>$Pk-lAF?2pknh&I9i7-*EU$CC_FBh{WZ`a3Q&6X?fX^tn zmN5WwNZi#?Mg^AJhtRDnRpSe~Y$ce2j4iu16Yg%Xi7rVcH2pn0N*?OI6uiShC2eb! z>Udp=Q>oz?$zD>F4v;=OF|FYX$ma_kY8oJs2!$PeSj1dnxYDnYJ5_jX_6>XW&A$rUzX?7X&Q`1BOn>gxM>B4)T&`nNRJf!y0a4j@*JZflTklV7| zcJn_NJEs`U!bZ)OZQHh8UAA4-W!tuG8(p?-+vu{*uWU@8OipGl{^Xp?eY2B&@g{Gw z*Rxh&mP-55ObJ_c_Zy-%RH-YMRP1$ju0p%MJ{i$A@y=eV-LEx43qOhvcKKq3MI>S5 z?yu}dXGtB%GQo1M<#T5sd?w9@`$n;Q<4?-{2gU>143v+FiNC#uRw*dnU*d$=-A%r5AfK3OF7 zQux^tUa9X$!Pi`5lza1E80tPQcl_CP0ejPwsGnPrqrGZ{XYFd30k{xk-#D1vUonYa z6VZL(y^Bgi>q=}vgKEHQvaN+LezopVO!x}czf+-r(js0^IHx8ldSeH)tA&NjV_W|z%ym19-)VnIT$TMR!?G~&&ZC% z?9@m0v9PD+VR6P>0kkV@{}=G0p?NT+tu7p-_n`u{|%zK*f^R0`|1DRLvyn;GySjY0L|d4Dcd+4vNVM*>2TXj zBJRO1Wo*+xBJj*?l9Iv2Y?AKooRZ-YY$WvL)Qdi6A9)XX&foqu0PUu~tRJ1$0PEe= zp7Zi$wso?8Cg%vcY2aB1BS;vK;*`|mHy}YGOkSk%NO_4e@X$@6Z;qm+bHNpCCb^08}WnR}G?REhHk_69{n7lyzVj zCddd_o?Nu)g(Yk&Q+WTae4yJ%RqQcS?LBLqx~0-x=IMDKmS2kv_)o>8G!N zvY240z;533$+1Ff_8laI5D?tm-Q8HdG2JqnQmIgB`|1j zfvW@h01!Y%kY8O}yX$?hLipcg*;<_@!3t=qh7n!@nn0%U`ujj< zu5=9I-^}dH6nR-6#sUN=IYGyz%R#3JpugqjMh3Ajpxh1J12?`($M3qP59z140!j>? zpOGNjVc!7{QX;v;81bt&6L%Wsw)h4&2s{3uGUcFa_ngr6Rixpqpvx2JB#d9~<3-S4 zgxTm|2z$Wy4~}=vAVl&&5#gciV}SeLd2tB>^7Hl&q@eot5*eiZ_laQt5&_VjpCT`A z%+A3;d;cl}`hXvoPflUTKp=y1a6AKqh9I7TAC<2KxQ0(y0ghel3t%RoUsi}V0$;}o^C~K;$_8)IcYVG|Nlb_VoVZZ^qZ6QD0fIh6K)Z*7FMU;? z**|^lKMj?cLO;K7Z}Wrh5z?4|pk6V<$EH3o5!?Dq2VP&snSj5&VbRx$(ujdZ{fq6P zj(~sj`3e5af! zJG|~H>W=FdRl@;`=wAP<(jszt;~_#rR!KxQ1-7pTZuzhOVQ5rDgwlkEX{-O1k^H$B zIV5mXgp9zM27mt;@tg($`5AlRa>ZzkuZx;89{YC6?BI(d`c!cCHy%Gs_A`B3D+l|P_?f>&+4|V|NkRh^X_Jb9`2ZV6uFQ^9;KGR1` z3Ds|CZf1}0JqHF16#gY}>n=7XaNBqGyPbzuU}ual_1Dfw-)(+66v$nGW&z{$AYDk1 zjaBdlV-@-M)_N;Nh*G>V!xPs!#eM9;9w^Hah4o!lCU$gGNh9nRk(O~b1G-e%6r%p= zY&Yj6)g^xi;~JG|pSDug*zB)=XNAn;homfT#7ca8K`zOMTJwl6pg?t| zB=H)mEzeK_fL`dAt%HdLc3FNQC5zwyy~AY&2Ur{_DVr$6%vZCtIXC75N7cBk0Q~Ca zCwg8nUh)8&im>CA`g_35uPA+sm58tBF56YUKYISzUyGY?k*NhFH2`} zmDIUd)j#&JEsAg?7;WKT=l9BpxyJvLqT>p1th(Kamr>pBM^7$cMS|1j z$L>|=EE}LE?q3x75j&c-ie@*g`kSAFb?s3@;|Tu78E9r}*A>VDAhu=mf+O#E0>Y9k@m`{#Q@?Idko}dN zTaUzI23f5K)ae$d8@gtfR3ZB}Trk*xIN<$pMED`hEzx(S)cdK#e}AriVO0T%Czup> zLhx62bB`nH=vigFw;HZ!cdn1X24tj1->s0mtVie5q~X(alwHTX1$q{NSZt2B$)0rT zEi<&$&4E>hy&9eSR|wdW5tT@E?&i6W_H`dKN$rW=_Y$A}eq`jv)3t;_956ILV~q{J z3y(AI=D+e{GC}#JH57YGeaw?8j(nuI+EH=iT&C+ZsXFDS zXs-w#6p(z5Lf&}xH>&8?ib!(>(Tni(3c^Y2h722Jr_z;8_ucKZ>2}WIV@09ggQTGsd9zcM`wx>VGy=ZnKNl@K z!2zAPC~N0_;R=%lA>LGn!L`V)wVohUBtyc-9Kzpm!Ex7PkoRgKxJuKVX)f;z(WB3Q z;ezi%a7{`IWsbbQ?)L2}8R*ELtKSv@bra0n#2kbX};&*UlMY;XP= zod4>2=m?zD!8hYugfnhkb^0sbpy6&f>ANTOZsjhswmB2F>PkUS5nI^-pU#CEyz!Cl}ac!~du9sZ9I| zT(W4)Z#6x)+Pk>`&qi0e5kWq6=X7f3IRp_1@YMF6GwW>}*O;}fmL|rS;CDRBWQ3uH z{*d7e3G3MUzL=MQiu`g=Kq`aGg^Ysb&ms02RH*_m9BjplXw15@;HuZWy=8dk@wiw^ zazBf+1sgvYWbPyE zQS362;Z&3a3JX%JFf@AVp|b5WcL&?SO+F;boCpzBHa43xc`mdeWA?We z)#pCvx?t^l3>LX-W(NHx6T7Y zA(xE7CseW>2E*&xk?JxVF%RoVqu*ewe8R|ta+E{J{UF|*NXK5J0N7*U`qD}F99G#tf;{c=! zY^w(=Jmrn_y0LR2f@U%LKF-VxyC@NlG`zOv$Skrke>HxtYb=#lbSB9hPbAJnL<MpwT1x9U^qzR22e8}l)0Ys$NSHX zovbWaW1lhXVO{;UE9`T?V1$rm{`{@}nq9k%c`i{ir1vY`zAI)Ku7t&0xQYzPi$37Q zI7-a^=wF3=V6#1`NL-C$pCwb*5#nG}OVVcn+k;o0?_Okvf8k^LUqwhfj?k&a<0OVsk8MlT3Q z4s1MdoB5e78+8ga*5w%Oc!as;PYa9Nw9UDD!=7|JW*v@Z&i_VfeTjP@s6zn!%Wl^1 znErY~HTLL4wOl!7SFq5~&O!3>>>kVN5#vyjSklS(OR_*kuAP`wZ2v>N_WBm+fyS%1 z-#ukxeU(U+NuG-Q_L&1B|NLl9eWl^PQafDJrN0baRK{Oov_b2tivh*S^j^7oK4cAg z)H3d1GWLazz)M_YuDQFr(6l0K;o3-Pdmdqx|HOz&K+&VS1b@;eb^@(F`S@% zodT6}Dl^Ag88#cb?;!NJfWa?5#?dy&#V17<8@KQx%7~mB(|XCE|amQJ<%c;hu=u_oEB->IH;x74pDUYY9@rzl{E0e-S%MCvE! z6HQoFk;PmhozO)|ps|3yJ}=&S9x1X9&y#{rh3lQQ{E|jJsHx7}zpD}WR)yD3W zi)=gvBg9G55p8St?Y=g# zqPqadnlm8MjNr{Uxbhj$LA?tZJEe{=_r5iG`t!w z!DBF`_nWt3+sjO20kL@6$$r66N64%WcA>t6sI+?8ndBl>^MqR1suoCH~$W^pTk##WV@^FnVPSh)uA5z-9niJU_IH8OTcH*7aJL!)*Sj<1$xt7Tnc5v&)OxCe>u$J=e%Y7(0`C8LqW z_v!t9O0YdZpVSrCYqj-8rir@}0an7_3LZi4PA*rO%pXg+Y}GHZkDE>OHRMoi3X^#5 zbGWp_uKWZ40%{$jr< zqb|leLvfYe(cvw8LqcyUWm!g(b>#{=SsiB{MRSvM68Rlvvbul7JQO#LY{yKMqFds`+JK3CAX=CK! z1XU4%@!QWh?1Pxi)3dK^q;C>Dzs~E|7v5)XerQ2`8JHW7aE3D19P(*XunO(d#g(;N z^iqa1qfe_BWnmsI^5OzU_qT(%dfuh?&$GeC3Fo#gBgn8Eg>gd7^ipp&JrajIL0P2+Lt)r_t`hG5yrg2PD{SAxm% zijPMBCFy3l%a`jdoxc?IYGVENX>{_v9u%O{m&|c`j=sqNLbYBv--qu*xBbeOFKSh# zeV8;R=cs*xQ~8j3(dl_wS<)*$PK`|aO^&g_!S}458mF*IyHEEvewx zyDu->Y=-Em{B_ZA!_`}VoQ_{^^|AKHA~4UWc@dfNDWF^s-PVI5bZ^ozbUti1SvA12 zn!XbbG0eslk%YUUI98V@F5abotHq(h+PCb0WF2_|xMiH4L_h$R7jr>ZyXuY);p5lK zG9$?nT+;Q6vfiEjuh2RH;@FH1I{kwSvFB3!(Iq=yXO$e7?f1c1IaUHEHm(IpMqw+$ z4;$sG=T#Kii&@8I%EY^!gGM%;3R&U9Emz!_>}&hROZ67YnVol14LA4%Elz|NwXr}X zK>ir&M`^NcukAVYmJWPMBY%spP7EtmHHJB~$^9~_sgaf^iP_k}6!FtuJSodj?>S(3 z@E?MdVR>psH)-#Ufyw0Hbu4nKUqA9(_Cc6e;fqrw9;)&JHD^jr)9gWU<72|9E-%!_ zqFc$6j~xFt3Bo>CN9t-7-Q4^XLaRoQ(RH?Ba10bIcYh2eAEsHTKZ?>PNpClZQ%tHD z^?Bj^wKy@`LqvNd6mh$p8k=h8Lj#*S9~bi*e-y3%xBr5inx5V*DI&4)IreNcZ%6al zLK>^X*`J{6gJz1S}BU6h~hwye(2padVU5VFujS9^=JKji3*bM7kxN-H7_>6U1CEIG|#$k=dAD@LV!y3#Uo;HQb$qwIaO z_uawW@wyWD*>eoZ2*lQwyquqWoqlJW4e*LA*8xV<^9V+TF6wWDU|in&(BYyBU^P_z zleQ+3q``_APi(b9LZ1Pi$9dRBc4Zzjo;;RMKK17s{J!x6rCg7UW3hVhE=9k&<@l3` z(<3ttyhOD)7ZF}Rw~=*3nYj$zipbQl>N*bpiqDvXzUs zKh4fuip>#P@tlXTCqYu&0jegb!ToDZy2@#S%i~gQ-e7o8CX|t^p(~zMV1-P&34(FD zHMcl@sH0?(EY+yG*(%`7(3Iez%8`i7ut2pBn3~B84Qn2?q50~bke=M`ls6ejkj#sS zTBPq?@xh9rtJP#_{&4DWjCXCBXSd`jsBwZBFBY>3eiit<>KRQFkV&IA)c8X8M-edwrxk5T!5tdWd&pbmj6Nw9dWEtd0|YhC@#~SGv01D?E@teCmEv0< zx#)u{lS-u=$l^xnDhp|HnF>(+nQ)f?L>V23%3_)ZhqabHyYhGfRW8?y{kcD@1SMVh z{y3a}x7`7K{0wU893=7pZ_?SHusi;zugi1ZYgv%IJT@__VM_wbnh#bN3#TaB3Rbm< zT)L{628Yyqf`-nL@PBiy!o0nQCb-Tyx9`XyTlu0H*q3|+Jg0ip`H5$q?5=6~&;i}A z(PT*ZW(BGG-`O=%a1ezm%C9KC^R>uO*+u7%&V7upb?8^#y<93i_>ZdtB3WGUZ21hJ z_zXn2(61-kA%%Ib{D}MAL^;&aD@pxTEU+U3GP2a?6`dC2(W~}Do$RUZ zt6kH6I(m1A35bfDrN5=9XNC*>^ZNcU4-^FR^-(VHq~d)qVeYOok3%gbNoV|?mtWDJoVV+>?JN>L{M`Oc#U{VK<5Ha=Um-}@u1O{gNF74D!|5NC41iAxpIWwg<)z+}DfnR*^4C-2o6nyb( z+983kOD}arJ>%8!dG^If+qJ_IoiFzp^y#veinRvW*>9Ns_yh)AUjjy=Pw1-E7X#xS zgygLD#4NpZrlq~#rgA?D{Tq8%xlwrSDe;U>DT@kQdE7cCTZ#32Ev4u!D=gE(vCcEa zr3`zv;;kyHytp>g1lT@Tzm-3y)CQ9@HqnHS(WRtZ9l`umotxPMhP()Ri`{POCerK7 zzEZR5%;WhCp}a2|lIBY>qdc-@cVu4fN&cws!ce-da0O%NfhVsiZ^?S>_Mcq`$-H>f z&0=#gi#qIq_%Qe+ekedn!s=&Fo}q(MA>X>vpt-!N%S>%IG)rLBLD<2Qb7Wxz-w8QZS3wbU zehAZc9;;Mb`zwJ3xWn|oW#|_hUC>V63x(*AF zQijKTh|8ryPr8;pasaZHUj?}4Ii`6QmgnltUZJNhApK~jZ)fv4;l_r4cOj8R) zvaRh;Xeps0rN5dvLmZv=(WiEJ{i>0iBI0j%c8^T0vPyJ5YGI{%2laux<|pRf+grpK>7(sG%(H)s(Od!f#6W{P z8?80bCW`!lwL4r|yj$1So3jvORd@I_;SCr*|P{1 z!lP7(s|ct+_xord2Im_pR0(*rjx${S_5(Dt%r^C|P#-rZSn8g+o04B}aV@M10%I zJ9;G1rKMQbiW8XF?N-bmrk$?n^Z=MtnX}ciucjf8Fd-#kjiqwT1Ab&xHh&p)-UD^& zTF)oILcN`%{JURR#n0hVdakGh3VehZ4;Db*N}q4Aa9=)~2hm{cCl9OvnXhWxix^gW zvL`I3%hGU63V&`Wos80mv0gNk+0)%s@(vQ&T8eOf${nW5Sc}(ZJ!^qWs1oOCIjw z78BQ+`GwT@d%%>ZDhdE`ff`e9I4xkpt{yhShT{(ewJTu$hx!j(F29FYA2tD)&nRhJ zrf%a25?geH$@wqbW7#3QCeqH~ju#GL@50{o%?jd!VWhY7)wTRkH)`%I-t2ZRfsQq> zwXjXCC3lm?Y0vYc6)(CG8k9$MY_U~pk0w$ps~tS z&(|nqhAAq#>)Y&DBirB{%&Tg=%p*qNRqEpJEs_{AI2IwwCH6Aa_tI@g{yb6R|2)J| z`6(i)q(a!6&6ljU)^s8ig=m7OL3o;wnwOe9NZzNIpOeR`mIiE2$sYu`<{a?L%(vLe z?1PPHq?LcQuZNL=ercp8BiM3%DJol<`yb)BV-AXnMW$%N#8*4{H_X4Pza%U($DAED zq~XOp9lJ7n#t7TUUBk+4e6#wE$q#YsucT8_@>r=Wv^dau-)I(LdQq;G*HcPccYf%) z;X>o8WVGeLEwP!y_sRE6{mCau{I%9eB{VKZJpsyFpwJL*BfA566Sx6m_Lt$;l*Y0@ zIyA8hh$1HzSx9pJemo22%A7F$v=*!0<5Bj4M=2JD@+zv&{>d+&wH+u~?!wruM9Ma~ z)!73dU~Pf)F2V}}yT=*eZW@h(SX4aQBOW~On=hUqYy));CHPXlGyS2`8`ff)s|S;Iifp-w8j$Uity_Ss4}X^9GEn22 zQs?W7kubsoM9oH{WL+irINJr#BQ-7|&rW#heKy|R*v5$b2}|YeGGITU=N$-d1@{Ap05%o*8O$N+!eQq!^+BVvxOOM$(wtmB79RU9ngQYixK zUrJQWQnN^FXx@!R*}EYI3crd!Y{&r5WLs1Ob-S*1haAifTJ8)B9k=PrC{|o;!@k27 zMDL@$fSkEW(3wpV!Jf6h@$BRLF_wAtXRh-Q$TcXCT(m%%!1$4ajxVkrX>ko)+eU;w zoF!(d=OB6?TC(vd3U0iDGKRldGp`yeWw%il!XQq+`0%a~?Ru1XUk&DQdMpb|y_hhC?!knS@Y1%~S(A1*Ocp*d zdxif-Zn{84?UbN^i@^=dM7E|lJJqv@806KRfgOly4PMLC^dR|c|2V?TUBHt4^WRu2{MDXePH+HhL1CT9#&5k{>g0LS;5I@hb<qzWgS%9B>wdLJfvli?L<@zsy>*bIk7xFgre zqJaB>RC3X3bDD1o!}N$;n{rPXK#RyYjL|x?Yn>)(LG3LM>eW+;MuX$bF88=%bvt_Q z@HVfx5c)60Zcs(-lsF$ewB0>E$Lz0CfY~QKcB3XhpJWV4l-t{5h zGlH5FSSar*2E za!w&&?^0iS_qZWsG`wYn04!-C1h(1^eM0^+52OcMz82C}E0@Mt0YazJkr5`_z@;s5 zof>P;U$J?1E_B{!4f8v?cr5$9c1x5%V$?r%rSiW9BN!zPmn=X%yw%pUd%zfA%5dK1jTC$eR6%s ztC2h_C8g=1EL;xr+9?eIJ#}39ep*`RqS;ftpt<4j=Gi@_(k&}bixpjr(56m;<>`-s z;O-LFpi$m$Vj})l=hbn?wXfb1;O8#sgt95nKagu*;dcK4&f@qFa25+27w3QDSS&;= zOzi);R>8u;#PomhtpB-I5%U7BsI&PO9Olq2aPRMbRVsMF(nq(qb%P*+AYpH62k?V< z2XYGru3cw4UXHc^KUF(bwS_Mk&b7?G*I_}TTCqaI(*s~s=LUeu{=xBSSR@riQ$q+Q z)+S~a)+Q1~#d7VA4UivF@q&3Uo-R%9wTJI1!D(!ZE10B6+^(Q2G`uU|M!QxZ3=SYz zN=8^rMpo8<)XdD&zA%K@ZXicvcUsl}5K_Uv0|aM5g2X3#XU8_ChUYL&54#dTE!6M< zq2b}t-*tyT$h-wjsTB?Uf&*(SC>D_S7FIT}iq1?eAe~-ch~VsfB~?|VjHt=s;gCfQ zxv)jSO`$~a{QX0dD<}mpF2Foppff;ys1Q^*o4`M_G01@+1$LIl_iZ{e16wOYYuHdg z&^9%)fzJ#DsCLh0g62Tu|60@H1O-P>%|Dx(-}azh{r*;<28JeY;Wq+h{-|2(Kdx(Q z%gY0^3u`-5lRF>=cBZgkR8o_yonD=bpfT4oez-WBom>I4xLvr~>p0l~aNki|SVYu? zAWRVCulbKUvbi;Qy1H1p)i(ZR1iE48=wMIpfgLW ziN|cc?J4Bf^rYi9tdzj~_@ZFae!LX0`MF6vsCVI8M`kxj5S@Re4!>VN>Yt3l28O^H zTA7_7vH=+C&rtqrLPiB|yI9LU=Cld{46GKZ*&n-f zKlVjG{-8-NO)XzCa$o7+KN7Y!=BAfVgrM_OT^&Jxg$IC}@t%KHRbgLWXlQ|Es<*Db z>(obAu$l=-o-tZ~QWhq~7AH|HN^Op}kytHY(3mSu?R{&EG{XkZ0KMR2v zv&#@Jf7%1Qp3-5UHp+fnsSWKf44%F3G`YEeM`ve8^`Hv=1|vcChl4l&F%z8Y6UY5G z9|G*b@PGi7oI*0WJBYt-E6F1OC6C)L>_(vWlf6Sa0%r_=6WIYJzY0V|WA~H2{Yz71 z6n_(`&xd>q*#J>w3P2qx+#rIyCI18qexrR8B?ycNL_qczKO!DLcq)Dc3lc1U6S)B; zzY~HTEWbnE1ti=Ng54Y3Lk2oAIdX8c00-?LoYeXs+5C+Ee%$htiLPdeSmfiFvCgWPLeq#kJa{hn{ zg`AwI``_AM6OIo&<97ZKAHCMNJ2-!15{?fa=p%_*yiw{Yj?wkt(FZ>NGy+Y0A17ch zi(mQ+DW=+fgA0+ry5ANwu781Q2@R}aXY>423DOnt#-1Y{0X3`#yY1_HDT?8J4?`X} zzY?-XEZ)WAh@L=s-L3y9sH|&!lYZ@{H3p+Ng6T>l_;(H?_1mzAn(?wkz2+8t=nLF^^aITN67L!v9^J#HZ-)YSukP)d>~(2!d&xQoG$UPk%eEFq4Bn!Q~U>Rzr6$0Djz<1Oz; z8PC6nDK&B7j8_#`8qSl%ztU%zhAoFLz|rTFjxtT=4ig63S&#-73iUk-XScUz3s3$0 zNz>$tMQ(XxjcM=S>L=ds40pVHgmYI)aMST#KoN+K;NM@_O5)E9E9ssK6{Hq6_L9tL z{gE9RSbR5dKyeO^Q1%T5Z1Ubl}B9~v?52S^e)8c@|$NGT)0K`h8aj#+S)YPQED)+Ouzb7`aMdu zDi-qkl?sF#y>W(^9M`be=x|W}&N*AVwfk!8F<1q79u>SfD4ioy^V~biq?VB99(YC`Ve+QuPeAJxubNQc&O6529D)M-qg(zJesl`ps9*Y!GJnRd?NQw zy~`Yk`L?qM4r(fK&IDGw=1RXT8%SzdToO_@K5Gz9!0}3Cu`}x5iQcU&10E*2IS1Y` z%qiR?M@!wueQju;qSimOfqd{9&J$WAQ^=xu1F=_eL6#d9>P{Bu1=#pRanzx#SWh7W zam$7d6$?%3b8V&p;FJm_z@a6oS^`JWXFGlLWT%?;-sZ3ZC}V9s_UuO`lv8p+vD7vk<+T zI@IJ`uw++y(4wn{{UE8mYB3_%#hi=mi(i@g33+1*y>W&#_+p7V&9_9UN0N~*?W)ma zFExI!PKx{ad>OJLqB$J*_6KtmMurSep)&r5_5zK(_7;Aclkdz%o_B{bXX3|7L^{UC za&o2fP_>Ek>>3x)7=`<X1H?AdZ?a!R!EzP*K zJQlX${(>5odYoci+RIY&FS{Ox2n@Y1xk#!X@E00r* zwG-DEDI5RensSlTuhB#!LeUZ@9^?q99`y>P!sGJ656#pJbM~c5F4L5W3ETWQ0@~kL zNe!H?U5@f60jQ+RFCrce?M{c$Db;8g)D$Z_$nK4Y?dOkn2;VU1)n8mY0$WLoeWKHj zi)PJBeqsb?Vhs6yhi@EOjl|3PMCn1M(!s3HUv@HpL6X&$B;|6K)5Di1%s(eFNfkRV8{`iNclfHN<0b7pJysd30C22y1v4VLA;#Z$sEBZk=%!08 zdp14DC9g!EH`KU%4&ms2l&$AI&9AaV;3%tyosz~<;3+(%au$bA#6;72jiV|H>bc0- zZ}&}gF&AqOYGyvXt4)C5_&ROZMYt{NstwSQtfW-f^kh={0fNaCz*cg$9xE{EzW{t^ z5_cUmJnFFOO6FZATSz8aWlkK2hGl%qE(NFj%Nlqh+fueO=aD!)=rc=h<;S&#D(;m_)#KtaT*1|;xZ#MEs)j>6%gaaFlb0D0IbuTM zb;?Gnlk3fefV~q2eIgj{YgojlarAlAH8hIUEuWO+ZjCus&-a|Q670u?w2i-om6YBi zCZ#NPxy8rzHosi`WbH4H2QqX+!Mn`12o-0Wu8XynmHG6*B%PS+-!cJ4&&*2%BfJfe z2(68K1qtqfg1^RaHGU4eqFKy`&k8>qk=(`ZYO%nn83>%PJ*zY(O%lmrKk2=-xjif;BNfCbEK9;gv<0Its%xaDb-1I_BC&%#t)51GM7 zKy~dI*s|)k>Qk(Cw-nqSJw{MlB40Zsi((HQHhWNdF&drveA;cR|Ky-sG*gX#xBH5JOS?%J3Vfy<&KlC3jXOXhUq)?0Qlt9CBtyi>*Tt3f`PkH9;m z@Jnb=2-hpP`h3LUnF3v4ecD)~Vyi}pkDq=V?isDX^&TxUG1iaEkDZn}o}9UIl?o@< z@yY5}XMQWeIdWKe8Gc=Cii2l#lc-{Gmb-eQq6Yk=Qo6tZp_gYX=tMOe(3q#B^# zrfGErA07UTWvv|PI<>WS;tVt^%u+hXbFQ}~TIi|>It&wMusCSm4D zBHhlu`&&-w5z=TWtWZmWV{JRPxJMO(%6}G83~nm-XM2S{SZE?fnKQCnGvmv4l@qNa z?k}X%!GGgB(`FT^5TA?g!l(YK3}qLdD;96olto!OIAemK6jQmBYsO>h%(pkL;MnF2Ysu>L|MYx#v}h8d}xT^gc(DQF)VPdOlJc zjqJ3p)l=;vpkn5B0M%K%)xylm+?{ z*C@csO4Hj(@*($!j3H7|4)BqmOP8x_(OMk|H=e5Jnw6d6Lbr%CgURJAQ`xm>;9d3= zTHBrEY&u^lx^wz;l-Es(R*vy5D3}peG!QnOE@F0z#9Mr-sCXN@Xyl z%lJOn*^`3hHmYQgiODuwnmtVQ6Krn_*8W%VPfL=$oq~jREA5J;h~2LJr`#ypz8^Y% z#x=GA`TTE~Q|9+TaOS*;ZLH}E0B^{;x+?)2!Nt-x*p!{v&Mh4-lU`2bXQ{{Ef+3{H zsHAas1012o%P_C=E?)A@b1}B@?Un~vX8VPo>oGZozX7dYP7KM)dAoPGXkM_D^bkq* zRsLq6FZ-m2lKxhxxo<}aZ5ol@Py?*&`9ZY#k0k zQ0%tegn2`jSD^4<<4sXC!P>oR`K3JY4H?;ef2QhnA|59pwILTmi=s?C4wh})QBJ~# zPAn!;42qv($Nse~;ElJ(4IaEr6`lpp>lrh8$LfF2!TVGxi^2jHT$m)+Mg1M=^aTLB z_EMLLvuT%uTTR9Cr9W)PiruHw&S)mbNw%229d<5;L4476jRoIo7m*dh@dq^J+ATBu ziFC&-^C-hI)zY&?b8mSp_Y1RCQKOZ}oEyGJ+Ik^!kR>)uo>DLV9 z7HvsGlJ;nX1#w-IXzFhLkZ^OX43|hcFpTTQX}o2H*xx)Y`mh(FA~(ujJC5i|A!Z2@ ztqQSHs6iZ`fu;sntyXuxDi#DFt(o!|XCunZF3clyEQaG@ zqK^$bbl-mlXl32pmR^OA7Sfh-{nBLDp&<+Yi@T$%LACzddjIex@A!!rWqDe;$S2u0 zql!iziBKk7*=D)oH>1=1tJ5oL&h2%&+jfwS?eU0aQp*Q09;$~Wlze3LmprSOkZ>2X z$_|~8mCH65_#~b0w`e~y6#HLj(N?E*)zH+1d4<;H^rqM&2T|L8U2J4YP9U5IzYcfZvB?o}Y!7!tFWP`3C88EuK9 zqU&}89%Bga+7gm0)@PVnS$b8CQ{OhJs^1OQr$w?g6o&$u6XhKs6!RGh0BJ9C9Ck!@ z*kizWMi6wBluGG+pO8CQt`P9{^*}pwP$PLT^rIV__0$?fZ7i*rWOp}@n+K!$`OaxZ z5}JpeU>QWVkA{bC#D)>ei}9x93?KFfPqPS(TGo5*eZZv7fGLh!oTj$S;RxWZwV^|- ztdDsXh}pp|anQ>sLbUUePh9F9-p=HJv|(RF1#Z_+bo)q6@SxQu+M7UHH{lxx8|00} z2K$00HKRlkNrK0}9A_5R@MEqaD^F(sNIJ*n749%9Ofc-_oCLUZVPB#Z;rAXq%(BKE z)2zYFh)I=rw8}HjufJ8|HodJ#Hz8``wyG?x`g$N7)>6+nc@K7GsooL_zRLH8WR(K> zu(CWjgg8VH^jdjT>w-9@ocX3V|1O>+YEE6AHHp0XCAIqgP9jV%tgaVDhi7;9l%86v z`E!25eSlwL3k7vx0H-hECi@Xu-=`88>T3Vg&Pk+lG#62-iuI=pHt=9_&3(7_^*N#u z+z!`u0Dcl`8ugb@Jc-_6*@rSG7V>?{OC0d*NrpuZrdUDYyPFJZHjsSk^F*m)__p_Y zu{T|qR_i>ka8<6D<$g;D(F1^(k%PW6hBA|r@kTXJ#4YAb71fHj=ivHqs#R}pm`i6u z^G%Y66V~V{71T{47D8@C*$yvEIkWyHc8X3vUr{&H=oCqx^Ki6{5s72H$V!XmniM{N z82k&*@a%bJiq3;>0-8_;qW>v$4xJz-4bJR$$&uX~*Q6l~f0+EnA2-vS+*vW{wP~>+ zPM0064mGgnTEMfet9*X`Y{4a05k7L%?$-V2SWO1W~pxhZMD_JMJJW#I7Y(aX+kd z7|&D95KgQ}Z@X;iX7j?_YEpSN--5OeuE?RceUL<#Jf7~U%n0Pp#6$2mAQV)KZnqZ4 z7Bnqz`&DF5-0o!%$)i^hy=*uG);cj99y=IE()4 z=a-*ZYnj-cP(>^HyW)i}{(~hNzj*y|zz1amY49i8vBLoBQAbW9QGfZLkiu0e_8*(C z%?O!H#q1BwkAlD*XEa-slfmf8Gxf;ANKdS7D~Z+kc|1tv*Y;y0_*VqLxplO0H1y$i zJhUBrl78vZ+XquKVb5RfNt#k30!v<^FnYV)wa{j*CvVg>nPOIodsfUk7C5icoTESl zSFL<@KfMpqZFF*SIY;QcMK}z-GMP=vBf;iW)hLHVg13n9s(aRk&R!}wxGyh<^$(m^ zlOfWgFWQOj0yb+Lr;qE8DOCK?dkx#jr32Mw!_hmu*cno8(D4hcxPnB^ zoG;Z(geNmi6GgPhI`6;6?Q5W-C1OFL_n(Q^nP zECanN+j%=r{9uws*_P51jPpzQ1|0_Kl06u9I7}@mJnWA`v0FjfqX-4+e0)AcmA44! zjIQY7MqQwV<|ZcuG<49gzn5;UwQ#O!Tts1d=}+Rj-|T@o{jH*=WS}cp+=u0Reu&^NIYN4-2+~n%$d^Rq#__A z$I}sZuf3~bkXA1d#narf4}DayJu=^AQ)>n}Yz9xI{@y2MTxV(8=Cn<39Bt33MPX>H zi)vB~Ebx`IHPH#1&kzdrwCBCE*D&D+v2C7q@W;Lfwyq;4;34Q*bV7QnB>7l8Mx~Ie zVyTxb{OeoM@Uv(vI-~ZLMs$TPd-}sLO63_7QuPuKR$9JPv##E2%r|S>S<&}{xi)6E zNk?&NkJhm;tW=52{~yNAAv_l*TC=fj+jeqd+qP|-*tTukwr$(SiT#E5pY+o`xPuONWfhO;k*f%EV(_k<#21E;M8s@|dc|fHCUe3W*#7uLd{T_Zgld zt9LMF60YCWaP_cFM4L9H_!SFi)Y}|6lTq(`=Mg&%mT5d^pH8N0anEAY6mb&bBwY4) ztzxQwi@{}ALm;GHaLZP4?#nY-H5IL1@T^NHYgSN=RMo?yz?p?=T+l)Y%2Bj_;VnxV zW;7ti{nt*2_lHy4qnvab^#ulgeM0c0T@joMJ+^yw#T^4jG49g6N8*1DNUOzu=Vj0N_6hZL%8ko$bU1i&~pn6&7L$#~i}GNT%m)m0Ht3Z% z)Z?@~L1p4SnIWPR>ec{BQV51C7>b}$r+I)I-E#`hrXKWIp5Nzj8Tl}Bd zgwX)h#QNF`tq11>DsbNfTL<(E$f0(2&Z_;7AqHcyevjxMx)cO~i3|I#Aw2cbE7gI~ z^4v0}GnBp@t#R;4y7R_MALr+EY#~>Bd`Y9Ao(7pojhLAFs}2=Qg5-Op!TMg;z`R+F z*!;s&G8{s`@~A|TQAgX<;f|;cT;1f{X}>3WXhxtY27R+P5Eb&(t@3pB*O>O57XEiL zaaQgBQUR_{vW~w}d>$h8DiKKJxY%AI`aWU*3BJFJzcgIiimzPe);h{MpO`xjiCi4y zvz{KKefN@`X(S5>H1?{0UF)LS@l^s|(s>*X?aV)hHt&btHgRm1O6s;5a0E^hwW-Ly zmdkMr=>I&hzUz#)I4u$$>e}8zSg!cENPuxG#B1)ttZ6%USH0{^bv!aNKR4)mt4lUf za(i$-zCF~(fF%7TVSC!LU~0;eWs!$))^(^1{ouV@Eog}-C4f7jt5BdN^q99YJQ z8Mzt;E_X@MjTpDnZzEo3lWnY$H&b_`N|f)+?0{IOKoP!p7#)baRd98V8#ouYFbf5k zdcG6Dz_mMPNxb$<$!R?f4ux#u?ZiPw@QuiJjA#j^mC_sxH;!$q=?_%8{he%NrEZ2f zZSt6qfHcb@@)hF0tJm2x(Moe>+cyv!{A)?gB9S7I4iJ4?#DP54xI|w_dlVL*X(ti^Fb}KXX($z;UlU%$Z{NS3o>jbtK4Y#q!T2= zU9-z^oRa~60<@*VWyUF7?=pU8d%)39m(=DA_nc&|K5GhR?4nnl6otQuOi`8xVb3eW zn%;T@O}~AJqjCw)^=gkKg;Pk3Eub}pDeMoZ3_3@MPQGcv~X-=Q4sjB%Jw#7k*VWbDbG#CeAMeb1!6!t@bTUYZ{_sY<~?C zkJ^!NHp%WS%JK@%?u+7j2-P>O65saE+J9q2fdgimMLB;lm7oZqK2JBBIdY}Fko{dLwf?e+6r0?ED za32ia$eKGf8iJZ%=Fyrsv<`7kKH#K?{8vr)JlF%P`bG7XsaYz#*mIDF@*Rwu)U`h(L;+DcvMpnYf(O(--r^nN`x)GX51{cPh&DxndAuO$6}=|` zg5D_D0@TBCOen>}z6yKtiK1mm{Fv8I;jcFC1Ycui{nhbmWe07fvXrwd1z=(((1@uS zV-4#5lDdq5;_$BF3Qkv#gS`!il3E!=*7e$DE!~myx|F^Q9=u0(`ngq+uJK-}?PSu` zR!-=wrD}b;$Di+lf)Flki@pOUw2gYnebi%g7qKYFGBYPBv2Ia;l74j_38<^h3yr1- zkF>cR!JqwHI+b$PEbLRDpW4i%$8$4Wlqu^cncz&1*ehYu96$CqbdV1F=eGn23B~+M z?vpb&Yfub7OqS0<_=ifzYmgZXX3cR4;ev}LJe#Wow`o|1L$=}7Fdx6T3aEM|5lkQk7=C|uXmm~Hl<+d^$s0_@bS%B zTT-X5O*K^YCqb{E6WFT|Vi$Hv*^3$9OYH=^fL(pYkB>wAWc;s;AWl!i(Fw)bIi5{h zr~xGCG@;|4EgYF^kt_K_PCeg{inRV+Lw(89ZdPJ+G>*!*%(lK2QA)TGCGEPU_VL_n z?;P697<2(eHk$Ln9edjK9EDfff3FMjVr^KR`_1a!E#$qQ)`MjAN04T5_QV(kiK~1a*xkKOO`KZApgkx=JR336D(; zJ~z?je|-K|tsY2Hy=6$9PlsJ-lJ0`Ny@GL}XCft_lomwN`~@L5zo#YV9;9jqOmsTm z!T@~3rM!lTly^+<;bGiYx={F(;C^w8-BT~SXcCtvwJwOjOpqI#xkK)kUaP7xhR{3V zojoTPMO|m4*g zlK``XZ3}$Rs~N0A@6k(aW?V*Pd2g04m`(kHWBdQ@MRq4xy?7Hk!!U;WlJWgkUm2@- z3GroY(%CUIJhg5N(*jf6c$@09-Xpa z4`O!Qga#J7R<+)3Te(?VgKb1NdtwA{KJCP-eG)2_6x>yBj|wI716TuieJa1?9MWK6 zsjW(?3SC_G<(Gq7nk69ud*={gFXHJ`mo2(4B~ia@3Y)oClYUQHl9=@fNkWl}d9(=? zJY*NQoM6VrVomt>Xpm3jH8-G3H7xGzzmDBADR>*&RIW_rKV}!8dBxb@#dAOuyQ43o zdNA|sdCvRwCwxmt*|M3L$aSVa^5O7Tgr=`L{+4a8a{&IEoUlWSVc&b6FTDJL=K82G z7jmaro~F|146b97ay+&7(L=|W8MxwFugj4fhiUV(j4Rs?Rj$AWt*OLUkbatf!N=WfNs6$Xw-&ctX$AuP+3pIC3QZjh`(J0Jkc9xS7 zD95(okV_q^nE~V>{)`GLKS9Z+ z)Da4ubTsVKA#@Ujk(czh_D(Fpos!S{cHB-iqLT~?@JSj=(L1`U#MSuFDhO`vjp1$+X`ej~uyoN(mrT{i;lXWR8h^AEoCK?C3Iy?nsN zF_x_e#6vP;>d!WSAAPl59Nm`_7?LmX`P3@)FCxh&T_}=r3b8R&&(VK$P#t9JBH9vk ze%N%vN|2f(mn32kqS|^Md*Q4^uVYMk07OqXb;Li_S1A2dDyq6wTsV>#*Xb80NnmR2 z8i2*rt{;SQ-Veo9GzAfC2Y7_kuxi8k2sh)327@zf1*RD_YL*>#U|-#Y*-HM{nM*|2 zk)Q*vgTi?aHtOrL?4pl;JjGCg`z233l91{-0aUn>hnF6~tfQ)#RJ>On*sBbVz+ep= z`uENY)S~O=;98Ora)e$<+~~Zp42AZDF|nR>WEJSj-ss*yU{mkoXlbapUsf!}#Shm2F_{dB#BJC#Wx%BJp5T^4@}>UYXP zqGNtmZUs6R3%Y~*dGv=K(-s+ME2>Nb&ll&TyJk4r6_+n~QnOeF&z~98x7$ikz%2|Gaw&e(VKs@H{b`5}B&@^Ld`)c~&lCJ5t{Kg05X@R5 zFR#!;bS?8Vh>-nR24NCL&R@ErntxMJj(bNlY9khO9%1Ulk;275HCW6(jL@cH7v_zT z3+}(0`GX5rkNR6@lAv zFp1+FeC9Qumk}d`Lcu&t!dLzAJd#FrXc&8sXHTxDw%O|S0dI4R`v4jYQ=xR`kY0l3 zMh8ts)fP%Hjgi;#IjP(*IISVuOgD1*De_(%CF#9<9B~VzGJebs)LW8$y$`4inu_~= zq2X?E1sjRz9X$jl1EpfoT=0x#lx^;H4m}e(8q&LiQbB#s-JI1`FSHNvg6iJ1)KNRv_8_H!SCwCO$NjWkXbQ((WmJ{H52C!k%z4- z$0K7c!HnRBC*NhF&5A^0tB5zjzuRt~nGo#pxX=`?AN6CH&apoINP#XKv~nY&=9coC z?t|AnBbrFUI{dP3@ygoG84y^pID<+gz568vM}O9u7xF8bjIo0YYp;_}?b(>AEcHtM z^6Gjph^PtV`q_FKIJMU#^K}I($0ch%fb`6Uj<)A&N*|UoiiYU08Rl~;Ll1kNPY3QL z&Uc%PYV#0j4iJAU3WP@E*(MbZOTGX;e$mSGPZJJvd=@WgaRhqjrONWq?pml;T@UzJu#-tU0+tF z#*S5fUUNR1C?#Q~_vL<8>d*0N``16+Di_!52LhJrk|bHoFReQxn>WLxOvI)69f@J= z%3=q1cUfIMt7KVR!n-U))b>l3P=P)#SqjRg97L@=1NXc!6eoI~U1Sx+o4>B!+sqgy zkHpCKqJLj~3bd>2)Mo8d)b+V_$?L+4&2y;*j&L*y*wQ1!8s1Zqop|0uqZ@=oxoGFS z+vS7X(sTdD^)XdiM#-bxI{8xWQ%P{a_r~h->_K!;~s1LKAT_J4OX5K;@*KhE*)|ri*)W#&hrb~|e0_nZd{b#?Z3PEL48H)B z4Q!g(!{p^hpRr))Q3x^?p`BZT!})b0Ay{24EqS*of3eMk+}e__$p@g~oG_WY?D%Er zB&3#69X77N|Kk1PTWc?(Jvs3RQOB>ppgj^Gww;FNGW~jP#Cw0Vo(`wOjABfFe=M*?gPZX_go@eK zje^DVe2Y2+bx;=Ojb@!hW^Zc|Op3qPz7D=$pX`{_nEFAk7%uEG( zbeJyxaaW&2Oda*daMUKqRey;c*IY=y&czBp;iBwCUzu%S~mdL z-oJ=f*ORuH!tG1OyEvrMNt#a1H~P#V91`bk*QlgV0!vbE@&v}J7MY`u80bL!gNLc* z(Vw1=RTWp^Wd~zy3ODO+uIlA%aZyGX8^XDz5XsM4`~$N^ zaGbR{=l;%yW0gNhqUzm`ml?yi>1gnRkSsRHY$Ag+AYhKJaw5j&`CBY1Dcz)5ii?av ztxD%D?`)Wr!`V1m^36R<@-BOO@z;5MbWNXIOfdhjED?(n_%xXflT`{ zQqs@vwuL6l`+^9Iqp@+MLbztu2M7&0q&y64U&|r_ktYCv4U+JO_BrF)r?7XztoZ@4 zQt1;UkIEFP>q=y+4!o7^cWaeVi9gXCooA++qBnB)b9`B9BminNVsLqjVH&=w_(^!D z?Rc=a!P-4b$s@?dU}H%Nxw}!u!?J{ZK0Y>AC%F1<_`?@Z-F6_BxM32{)D^!6cS@MD zb%h8m_6$0+M5X0NE~}B&#Gm7z%lUm+gZ4sY#AnGwjSv5HJv}&_-67dXz1>B&(XV=n zP^x+HYj&pjnaafkqM~TT@YgonaxYs z(^@4flb{C1-VzKmxJ3qb0RIokyW1JD9y2)-?`{>(&78rusVpuDCxE`>Hzh^RzKLH+3Mq6)3>po<%Nl|w9M*T2LB2_%t6 zHt#Ieeng!nAP0JsP{L}F(JA&x6u!VWX%;_LB?~Oq%i-QgGwe`@&(S8BYu#b|$iSF5 zNEJaix?Sa=;5?P(q8&iUS$-T;eYiR5i)7(B%78__<%u4*QpBt0a;s{1p3e&hx{3U; zh@(lro9jYm{oe}3TK>aBMCL2pd?|s z$P+jjfiVdka}?*Rs+C8_Rw)*p7WNo8xN-2eTKj*`WRm;!4pW<_N1%Miq)bpIq^a{z z_bv5|v1frEIosqBok1TCi!7r9E{v5*d4S*ht{fC;d8C=4YY_>Zrh_KD#)@#}R6swf z>}KS!VX@r^9m>_sd)G{=H-p z_Bx|?iAH)udW1|MgsAT*2%M>H4=YmBco+_*Z(-&f`aaETCVwiVU!R&{Y!TfEPIeuH zCZ~P9wb9dn18SI{w!WY~1uuA8+9gGV4?U~hB!>{CId;u2P0<_h5$hce##VUVO@pR2 z2$(tF0vQvS;}i4#bf53L=lFy30pD>oFXoP7NLd6izze2TJ z&0haMCVdG&pLOJfU5}=FIDmBzG`8yPAC1I#em^`e(aWQ?4_G<6>mpJ$vMn2rHqox~ zWDNM2MiUb2q$d83;OQ}wK|UV0Z|#U=%@Xb8wTuppzy{52ll#o$x5t0gJ|=L4Xgs{# z^UgZClmZj|UPV;Mvj^p}A;!!W7h9Wk+Byu+3Peic-J(+V7uouF{HF1*<)nsP4@y@1 zcY{Tx4f9WKRCxI6sc+k?A-+Np8Y!_MOdRYiKDo624EN#yKGFo5$LvqGJZ-c~f;*cdn6wb#gl0$4;j|kSXF8~M zF&>fBqoKX~n(mc0j!{Lp|BMy-tO!Zr6yevID7S#XjC_pPV$Ba(UtM~|<1-gm_ti#& zaUo3oMdJNOS)fH0vY4~18x$XG0K>NoHJ-4xy@pf0me6XbbFq3%&bR>a?A}h^XUBsf z@XMl5&;suS%8K9L>kefULzANH1?8~mk5o}0z|EUOl^@1GA#9Lf&xLCi$LZTOj3Q9o z&S*8Ox$z=Yc zw{$kFrD3pF6kBW`dr%iCVlM}ZS=Y5w>Szwb!!MJ5LB_}v0_UEi1HLMe&tQ>5aAM&=+df2~sH_^JW%KgYzxdipqYwoiOZk2{#*%%>U^Y7BnAKge@^{PP@?XL6 zQgtFjF^^x}dAJRlXSzc->$W6$DroQ)C!z}+r*-%q+tVoYPo!&|ku4D<-XkBFaxbd{ z?ze4rRFTgcYlrhH;^J|-L*AkvoLqyGOrFn&>v>2lSi`WnP>+GyyQe+$YM2Ia*!7(U zLtQGIJ{Gr(q_(B0X&Li1c9?9(HQzzU{XO#et-h-*4i7`(U|4WEtWDXs$X=tTrJ0~d za)>O^02n(7j$d_Qn2ohY8LWuiVoDQwd=GiDN1nBN^st(zfp-0*{TYeCO*~s#IVv)l zxCp7Qh?sl#qa$`V8&qiF!U&b*fR^s+%!Aa%AYRc<8_(vOVf<;EimUCjo^7KyjsZ3x zRT|~L-us^qr(Q3qMD3rw*=GfpS-LtxLe5DSzzx>tz7VSQTv*eZu~L$?cLKp@mpsNVAF>hqj0iy^%zftt zj>V3l6_gA7Q%tPX?1MZVtR=HuMH+`{NDLr396@*QETHe_=%f&2>r=C z&64R84awegIZ6GV5~~>{3rcxQu;Ac%G0@q4{^<#q#Fh8_l1KKh+{CiH!rACI>(HfW z`GXaLM>-?a@}4e^g06VYkH(uN(t&Bcd+Exh7CymZQDi>15qYmi2sW4e=5rsRW2P>~ z8H$q5KU8OkhI&7o5qJF25;3$bB)VenBy#M&4?}Pxm1fU0Zb!sa>4wcVK9tvgUu6%v zMX97Yv|5pH2U{khTuP}2hStOg!=2$QSk^bYEiuBnobcmk=Z8VfEHNTMBRJYvheABM zADv%_T*^Ph6(U^|Y7aL%`A8|_`Y>3I9X4WcMEe>(A2PW;zEOwBw0Tb}WCZizf<=rH z_ow*e^h1-90cl}z9Q1K~5ro*aR0IWGuZZ*_Hmz|Geg^ZtBRB*198CoT_7?@!=n z+c;oR@nghFf)zrOk;DLk#wT?iiOH~ECuMGXp-$TPCT0)dA-TM|nzNE2Ahv8PpIAv! zpCh3W36R`|I6U0L=DFmX7c&z)U`lQlRR*sKTQgT#_&oN)jXvD1UBBrNOG=>H^K2}m zf`J%MMSO$?rN1`DAD(e2Ar#rr6-Q?^t;l<$GP}ax_bc`&lIQ%;lZky92 zazbR&F1C60#07kJ&E=}y8dfAz7bjXjo^;=t=ZNS;b0GXI>}vl*Zt@WqcY~zh*p$Pr zkYM%0vh2WlS&t~_K7yAS-ojXWJn=a=Q)qG^*q}&Un#xY-+PLp&iKlSYR8bOJowIv$h7oHxICc(0e|=%jgInuCG2+d*-1QKu z7;UF%@iGD*&_Y@Tpoz(?^UuYl5VGUOZem{SV2|RtV*c}@DuoAifV45WBuoNC2Nxrl zd2>(y7@f;>AvQKdY;zc(U93b11KQvue&T<0X{3Q#rw*uehul@iOzb29c>1E4r<#Nr~!AyC2!ympl&hkN^P&sYA<_y(m{5RyhYXSNP+hH(I|5H}gTe`;3kww`!1BmV_mp6rwNMq*jjkvuen`&^8q(5y;IA*Wj2fhxU-c*k;$5 zeC}KAdc?7R&|l?h>eCrKLIFBesfsR~ni0CvuxQvNz+@#tG$|CPC>!DzxBm!Sq zNnq9rr=XRV-W(T0xHux%Q!Tiq*SUTP3iHko zv8%nTm#v_w6gGTvm}A<{*`{E#Hx{)ve@$Ny@3mz!EGxu>H&tS;kkgXDdnMnEc2tKV zq#ZKiJBzot))@2GY%BZ3ww=Ua_69pRPj_bRzATk`jZ=1XIY)wGlZacZ&1i5YCO9^m z>qL?=tearya&GZVTXgJAYzy_?5a=?vKg;3|2E|Mc3=! zNE|!WZiEW5Ew8h*ZRHUmqzF*A=s$v@0*K}nqn^;KqaxUg2Ym1&{;;E{Q@ls?4UxP8#zbg8O2(i@JK_eHWD-ZN!Ti*qb|p& zNS4&pBL ztP2Wv3EoUL7+qwBcHaJ{Nd-3s!l7H3j1MJaS4+9C#z`C6Z#jKIL= zkMYJlJAQZ3!T0zt)a(Sw40{0wTCur3vCvuOf#u71wi-o8j9!ScJX7YY&(5*6{o5fl zgUUjJPD74vp6x&7^1Rz;)s6YIOC-VH@&Ndto8jj59BL0Mgb|^jLD976evSdD19h09 zYVwHPVOt_ZXLc9&d6cDZv}t1)z`tgE3jA7{2=Vt4Zm-q7iq7pXBdsUi_7O$~hL>FG zlauKQM{->qTHD@q3T_J$M-&2{h*>Xhh$xD?f$=975U50H&kd4DCc`OO%K#FrI_0k? zCWPx7Bvf8e>99MWkw|@q4)U5-EHBlIot6_wPpM=dIvAXP(bzhjy;{O9JEoE&lcD(s z+FtR!I)C)54et)BZ!M@NW}6@0me36n#vd~VZwM)>zk9Sl&X#1mYm!-$il zSXlN)H~)(r{PMJ;fM}^1u&lH^;0g1UZaR0Dy4V!qa!nRjl)~o;3KHR4gY9}2Df}|k z;0%HJicP|fG{+)rWpO#v7)5$2pgXoMSa)rQ&gJ&#th46O5EfTT{D;#37{Wly4UZt$ zrB3=_y+hB_e$DCXal_adup<3stO5KLZb!j_`R~kfj2{9@9oSdGZ{}B4TEY9`?*=3J zv(Q~p7Mj@aaxE`&Jv^^&`-;GFee|G;zxyoImK&OPLssNL7{}J;o((-!;E23uLQ|WU zT6a31c9jX^)b$Rt)lGPlb&Lq=_J-1j_1kQbAZmq@hB$pKNr!fVW)-PDO+^oUN#dA{ z@(cYS{^<_3zwf~VKo`FmOyEm|ygZ@}6pn+Y&$?(f$pi3th>>q~@h>_6_>H~+^eS@Y zoZNy^9y_m~%5=`x=D;6{qNwj$mtjXUTZ75t@6gs1cpvUo=twt{P^I513WRco7nRK=n65!3~NFrP4vq;1a!Y zJ@SG(6W)f#^r+DW6K>Dw9o^R4Buq(`+f2vW)U!SU{j#V3$4gJ?Qz&OTIQcIv%}-*n zxAUI>gOca>=!<3?RjbC4bm5^IP;P~z;N@!TVJ33eZ1kQkiA;y2d{>o}{o5sqCzq4g zxKtn-XAt)JqHX>rCu5G^T1}Xq96z6Kiv;{^yZP3@qPKl#Di!T=VY7%MiYDHRb2+N$ zH32#Q{x5A_Y+5N1`gbpJdni-D@+wNNEq&5#nmtNaZpg9j(`dT|csK(|@pz_-DNq?k z{HW0MYy*VO`@i3XfYENR*yv=g87BwZ8uF+mxGO5&LOfV;1Z+wvI#dbYtVrxEwJ!d% z1b!PEYJ0I20ELV4?k9x&v9E=i{3t020^|(6gM)2a-<3~W%&a|4MUW$>Q0zM4AB zOg9xBfG2q&wB*iP3=5B!N(&S6vVqO?l2(;-RXPl-){-4aNn@!TsYZL)U^-!6O`A8; zP^OIQ)`i$P#c#*^{oBY(#tq{zI=8%TS~meBhGSCv5=m!=zj?_pc#4F(g!XcOz<6lj zG{j1H+=1BM9hV5D0|iUahe8>~6p?irRNw`$fHC%m&_)KWY2}=<2KT!LJSZYOxs2Pq zA@3hCSNED4W{_zyA7?ntW4Hpl_t2PxCaFUJ&|C@O-gYAbdk=N>OwSlG8SoJB(Q}Qv ze)S6=41?vL2tYQQ3fOZPL>i61oY6E7H-v5UXzkAW2E^lYZonpYb7ewi_YtApZ*1R& zJpi4M9RU4w0)`>G+t&|rPsS?3zAvao*jR9zJU8Rs&>Z?mC%s0+D}#XOmztB^^)fGF z2%p2UNZHvOtc5x%F{C3It^BZ_!%Rg@4h!Lz%cV?UF^OUy-AWm%moNp3ej*1#z>rd( z2#mzHOE6(ARHU6g$IkmrKCZNZ+q^<#9g#;4t zP-GzU^{*aJxY4gDs{vo)Wk_wBC?D2=7&c=VQi7CJ&+@@pfBTFWeCFYFMN_ zz4tQ&&7`{V?qTiKA!fM4@ zkr$Y7g$mc+ANEuvEg$aLQ?ZoJa9gkS#v<>^J{ZT*4n)h!sWxh=5p7g{#}J4HeAjFO zdXMVh_k|_K3Uuzn+!MVO1K^^S8Ly!vaC4$UD9-}a3@e(T56_8Ai85f<4>I9$z5+sP zP3QrP?n*-N{%Lr*L4y;8`;p5k<{1*yCg^EkW!jp0yCC!{&qgMn;8deE&Do3LG1^dm z{I!k67;Q0lI@11Zgb{@_G1;zFddcm1Si?_#zVv`#ypv@cmn=D*>o^@Q~K>F^& zhSUB@E)Bn4HItaSe*}IlUwpQp7X^~eDHgg=nyvfu)4g8JOUj8xJw}#Op-lM@qFsqo`;CyQobjXDX{5i-S4A1v2(USs1dO57N3{qIrYVQj6eb{AkBd6UupNy*$*<+1LnS+=I7=RX z_<`(e?4rofh;q&tGfOdA0EhZJ0%U|{~fC>JEvhH(7=1p%q1`H9iiU#^f+S-US zA5h5qujfvjJfQ`8=2*ehsuTou1)Eh3VVJD&$jzcoEUor9tfI7GkAteTEpQrRtIC`( z@HObfs&f;KRqFlSZQ;iX{ow`$MxDcK{*r;cz zYigumDBS(~TR;Ecs|K@41ep7`$MqY+Lc=P0BD1ieZi46X(Q?7}jI%DYX&M{;VS-cu zFSEN+g_Mrhz5zYq->2!O@j=QG(2)Y<`6+54FLz!MZ3i*S|KkMG^((+BXZzqB;+5kG za|Xt|JyC7QF%35psr1dohX1k>-e16+H(|hiT@Rxm3evgVA?i@*?#m(+9##`r&Ljb(3xMv}Jcg|l-tSR;z@`4S!wx88 z=pGa#60&04S9~8B(H9ePDRpD##9H*j-=MDT>?VVCns4(*EIz}%;11WLtB1?i$i2Lm zIN8}ogwJ=uOlrob$249CiZmiy%K_Y`w%ow-1#t@BD1xwMSh_4tt1h!Mog zba5i}B-&U`)E3SNmgje_&=RwS&#?^>JbQHca66$at>$WF9c{7g+cXXn+1&IHuy}Yo zEDPpHxWsPkja_vdmc(z2d422;PJBb1nr_oNL9K>3OWrSI!r9W`%4qvm3$fP!Iv@7( zxV+{2l&wE)ig@E&+CrgXHq50()<>LBh1YbG8NB~2{#8Po-_4)Q&W6Z=nb^7j?tnfh z_t;U<7)ghw>Eu;1>}f;vW78IPo+$Y9FN1^2ASVt6jdLph%m9)c` zaI!B)oEuT&8mAuUERIGTWe=Z9c^+xEs>n8_XOPat0P3eGd!#o3Bs`_XuDf4+1_&NX- zS8C%bdCbLS;Ezl`BlirBRnR!xabbd)xBE^3+f;|TiTRM{QW_sPJ0Rl_Ivk1D3mQ+| zUMu73i}={#YxJAHIMQY7dvzqfatgt8sRc<8+n1h1x*eXv$;1@^dj`U~TLmD#KvpgB z>~*rO3C_Akg6KX>X#B|qiBiU7)^DE(4=m>w03TB@z_xmuM%eRq1_R1^(%wY}TuEjB z4O0c`wad*k_j^Uhs^AmaOd!gEHLhJ}K21DW#XL6SQ0MC`6}d{}N2cS^j!!~=FANNE zFj1l-9MQnjKxQraQN+o-aV<+%rX&Z>?%)N;GZ=NqbB-%E?>=QAk@^S(rxUeLw17v{ zdwn^=4p-&sZDo@DmOf(?6Fe`&MPqqBc8H7M>g=M(vueZ6#dX$jfZ4qG0u-xy+{nFO zZDR00`{M0d_oiU?^jK>w)|ezJ(olD%FkbkH z<1(t!NGjl7yURZw;kE5dB9fn|qSrFzyM?Y%n~n!r82`OwoOD}?XKTcX%wMrvw%ENr zQ|D^nkfU)iDKPUs08W|hG|B+&;f*<)Ov*q#9Hvb>eaT)ydaX$Zwg_+H2Z!3L9x<;1 zcewKkm23L*c=Q@KhM!3Qx0B8_l1#LY*M~)Qu_NPMg(B+kwJZ1LxSDyW0*4S++d3V) zfZ9g$o>pBly`;^qgnG)DP26sq{&YV9xn&~$1)xcHyh$inqj}6@WQ-mPiMxwcu`cB? z8=S>BI1smk2Bx39+7iBH8XooDvr5kskpb*R1W<5>TXU;bfaLYMMhy4IQ^cMt+(U5* zbSvjeiLUyf_|~twNR1 z&G2V%INNHk*KWpUMJK>8B7jn38R5~_ZGAFR6D_m_r^=cpsJNkAFqB&o|J0aj2QHqS zhQ?)V??&KnjpOJo``n#};cs{A?hWZ3qR3~RJJFZ&Fp!#*s6c)7qTs4@MSeq4*dK|R z(#O|j4#i$wjC@(KNXHAPS->eIBH?E7gEgn1KMCYcTS z){3kbV~-jt$I2F)*~K}X*Wl%dYg@lVx|o0KPSlBC>y&}QS0L2zVeU%Ou`{NJ64ZUGc~SKilZaT01TSga4BT*_FkOt@VlI$^ z6NQ#XOjgKYM@om>W$OUHF24>X)hC$V-hjz^Ej{i5|I16H^$;fU2 zpau1wL~ahZ85_d2zkZb+RT{diB#?iAUEF;-5h%P;_|hY5fKlrFaQ$h~gf5tlYc0&# ztG2(0Hk3~={oLZSIrrs8rY=#rx_yxHG1|8TeK#qHg31u8C5#jZVMN^$=WnCGAo|r4 zDWueWc(`g_qY%OxZS8bv)k&bjXIiHxRsfIt8~EbJ$a)NNcCnaVtq&`&g%$nrf{5_8 z_~~6RfiY0R23gix9Wwx(H9KK|dL#;x)O#)+v7L}u#^9M{_5P*DAJptKv(8E0r|9pe zjzahR-odR(QeA7(K1keK;weW6`)lb~$gQ1*>;EyT5ahER>V!PfiQAR4YjD~Cs!*mx zzse`4yWr6LVyU37V_|o)I#CI8*Q!u`$PU;rtvrSahAPG)oWe@IL<5_oixjzrk^4jLQ_ z+(ASxM{~lX_iN&kWo64qyLyMP#~;bqB6Bj|CX`7j-;F_0|DERRd=U^7eU?j7t9aF? z6cfj{xQ_=I?V_l75YL@RntQ-WZMe;v)#gsg2H?V|w72VY+!n%2I+%AXZ#Z!H3`vUg zUD@9tuXt5U8KH2rS*_onf`eb8j{VEIv>U-jBgrY5Ui3HE z$79ZO{?Fa#uZ{1%*IL$jr<=js-lx7z26oHe&Qqc|j0KB0CNNGyL`A%Ug1X^xL?94w zj{reGTXSNM0ZO0)RE%>} zWJnO;FTsKhe>nWZBOtMaY|m9{Z~DbZPFT4NHP~={eLK3#EHSsab$b) z**S2>b|FpBzXc469k^&9?{ZiLz|H|blQEFl*aa4L$B*T@>x(Fd2tgvih#=m$OlW71 z2Zu1hf`;Yw{Xs_jsTlo;W0sV6j1LMPQ?j8L-{*X<9--mD@!T9(&f=mjy z8R!E`V+1e@K4oLU_kxbVf=m+*A_=|G$H4*<@TMk+O&(0|#m-0vnK>Ll&w}@iUJQmE zv8Y3#Rzl@F2pt-Q>6VxvfFGU86mJU>RJj zkw;GGh-%b&{+Y2&^s1Wo^vPhTPr^)eG+-nUkU$dSAfO6Xpex|Evu}L;`DN&j@T9x; z_rSpV_T*yJfyY3wmGv={kNcrlH{eboV5kn-mG#g4s9*67A0MCycr4(C0PW3zhTq5^ zqA=~BNWv_{_6Hy=P(t%~M4+$luWz$QCVeD{jNWsBGymsw4S9tthX|KRm%<$HP=2z_!1#KePo%NMHBEZ@;|1{vBVxvH#8T+dlcbAC%X*wf$RY z{webNs}zi9d(HRD;*nvhF*2-P!NY$IaqTZ>Ao#mu>Pj^-wtDE_It8^b>^3o2bN$ON zj+<`5L7*6s1`lHUeGc_q4vTnrV`r#HFM*l8?+zc3jR^HcxI5ezctdcDnjWtIT^Ph) zv+Q5)0|UwIa0%18!wf99>J2(@w?KM8UFIDm(Y$nZY^en5f0 zfCqZM082+W_V11givoBRe~bP=B7y*3mA{cFKY&-~AM~Kp`x}Xg0C++Fpob_H{~$2{ zFZgdH_~7?&JvjVh{hKE*Kbm0`S`W3qItw{}+5n z?(h%fdq@TQ7ko(N_)lpMwh!0!ztj(z;QxXTnOyz_g#o;Xf9iY)g|LQ0|5f=zE`<9( z;raL0(&ii|717=uT zqIUa@&#hU!&iGb4_S_iRz-Zxjx%z%uEE;Y-*=?UYXCH$AJJeR>E*0inXID{tt$gB3 z_dX?$`u?)T;n0~a?d@t|Rh-+h3GQh9s8<0cQ_Oi5S<@6xFc93dk2oG5dJs&Volic_ z8NH`hiRZ-j-H#S1@-|xKkGY!mTCULJclkicL}^4pi+c8Wc$~#sem7Z#eAc_PXazo; zGELSwvgYuX&wiK9YfD4t625nA%ECM<&PGYWi%O#WCTgYad4fX;Ljs0i*5Q)Miih&C zNUMblmu7;^y{5fLy^s!$*jD#-PQBMICL3v;AxD^wHEB{kJ*-zKj21bko&`U=V9RRo zHk$nFt3mMcr)84H7BPJDr?jn|{_9qmb?yOs%X$nSmpq1)r;b#-Cc@qJHaP4_4o|@u zjssoWBQ6jJN>k({=ElZAsok2kq#Q*YPU<&g^;xv8=VGnY=v2{+Ntx-ntpbf&ID=*; zbz9$Y*fXkk+xy+IFT?x4ZIG1dm)*-%SLTXl5*1=S$Bd1+YDNiL8*wF58e0=r{Q5X- zO(i_$7h7ol$&1RKlaco)2POsFKuMl5D_wa4Tn+oT#75bgacT#Xc`-uNreY1fB)`N< zRNFYveSSW}4h+8c?u`H}g!z}vJmp%U%)W8%Kp~gzv;~@`czjQ;)9(MpYS_E?!*4Gn zqQiy~_oD=*r_o2PGeis8Mm`Fe^nLBzlG6?yUx2Pb4YF_Spm_0zuVtVU2++E3*4xid z(g9v3?5iLeYB8liYyYU%qd!ij*d+yY$0_haAYJ7s zD!oWE2{|zY3^z6dQwWfFD2bsmH?II@v zBlPk( z3H}jXt&Bl>=>)mQM1~>sCz#sFrudU8Zg(2NUW&+4kBoLI3t5IBN}o~R6no0ALQbj( z2kkyqVlm;hIb{)ye30lCt70a~_!D|VV8PL+mh>ymZ=;Z96ZySlk_kMS!33)kkZ03@ zTyfpF>1K18IY#W67+Jay&z?;ZXHe986?BhkT=DLL?WNI!m%chXqgI1kVy~;m_7Hpa z`q!jh3}q&)VQ`rn84gSOl}n$v>GN^6{HLc%-2+{+=fFuRP`Uc1mA3z7W=n&;BJ zUpG{;qw3{y*t5T+mWLVlVPUjakfhV#LFh4xkj^hT?0`lspZ_8nS2%&~)%fA^!x7(3@vZShG4+Ol)v^B)N?k`i9oYycs4^nh_vUq!mvIG;NA) z%$v-K<6vnOJaU?YuX{w_t_eX=>2hODtqC&hWRG;shGVwq73@t-F@}8?nz)_CWZTA9 zp!Z*~WNX`h6=UW$`$W2{N5T`qBw{OCq->knl6Io)nAH+VWg>&mc@qno1yghuo>R07 zy|0|~$C%mDZzP@NdPd_j!m$(-6^u$;5ecH-a*DvG2-48S9W!QNsYEA^quJ_b^7EO# za9~D}*6SR#t66)r2X_src`2(7i)?oN^1_^1hKfL2l=jqJNx+KM|CdAu4x|X9Wy$`N zBi0C2EB_sCqW;>+nd=AP^;CtKH_TN1FZIq?bxxxOsca{ZavW#)Xw}PLzG}Z>Igt(% z#WjC5@(!C{$Oi39=1j(Hks^_P6l{rUC|AM850=M$Gd#pp{t`4@^PCh*F5b@yNT#g) zmhwdMs{}^Mjhm0;4`IwHrTI-+M6|PEXq5EY$5+oBEEbLV#xO8 za@DE}RsP8I!bL=nl9Npmy9qhduUBjB<4?_mNl#dlDZh^iEScMJ=;1c8RlPIG?j;Zw z(wXBn?q+*hxi6=}AmAd74&{)`|F%>xjEipct8D10OM}LEltTk}tn7$~XYooJH>@Xo z@;FmkyDN;Pl{$IQqiXB>aH|(A6~8My)J~za#?tu(_k7Y_kg#)vBPK_x6DQBPm94!| zE}YI9K^^-=MnIgu2r+*&@5w^_ERDfk^4yvJ0RKl#`!gQ(NqldLcWV;qazNO%MM9N~ zTOaXDjW3)kDFYkT#Qnl7xlH>RH*u0Uo{zsVIuTrq)}X?uf=+E~c=R@taFEWzD0W%O ziNZv;rgFS$`?HiB!^J)fPdzauue|ctAuJ^J2=g&wFzqLJ@BS1tYgs$-^JBA(!+mD7 zpddA`MU}?=J}Xt)IjZVnkHA(Z(b4f8hqP(QV==d!$v96AYD`vT z2Sm$cl{iF$@gOCz;H}yu^)^VyBe$0!gBAF84Gjbee(1B8V2NmDQTk--*sJ6kh@N@KIpdr!j^tjF3n2*cIEUs_ETK5OT75n1YszJNw=S`EYBP4^t0~H1CMSiA^nvfR zgq*`{E$T5?(;W+1HwJGKG@bH34I;n_=55ot>%fedzB>OZQK5>La%}yqdkNuuD(N`v zxK!1|ZzrWb!DOB~#l) zQ=h3Upbj01S2Y@wh@&V`o*z55S-$%3hR`m?|yoNqNsH* z-IfbwB2hkD+&683&3^#lyeyL@z0AwEV_dCrm+5xuU2C`>N$kB_Lo{ZQ(Jp>qFyjMs zO!h(Gb>2m{5Zr_fuEYu1V-wD~rufcCg+&6d)-xE9H`*RhXJ5anOZ6y7u#S z#CM`Dcu6Z3s)n72L37EE2*fwW_&lx#-yRQbn#W@pd`LQCdHNJ3CqSkNOh|!0b4h}$ zu$VHKCL&VDZ9X{9gjQXt?zV#YjrJRvPkjiBOJCF%+$v#1hCVAs7-TfeIMJPGGdKXo z77Uq>NpYNVq`xRJ3)z3`$dA7vx!hicg%Vy%GlFv-4EU-9i_o&81%+(<^RmCVi-Zp)sA`>JX)H=9WablyG%% z!~zoxPo-}?UW|W1#fR@F_hbcTm_K~P*NEIM7y^qAP^InGRNyAlkbAc#B}Q%~B=zHk zsmI7L8@fk8L*M;E_ESH)n(hUJC^@08M$YMsa*X1q;I|2>5nbi!Mf6<^0dW)`QNE)4 zKQZw|ZhA)2)&C)W{;3`@|B!=@jU-la4_p951|0m}&p>Cno-nwJ-X5u&$?}5K+kkoo zsS0D!>>jTSf3>fqkvApn1sFR3Z;LN+PAJDSUbLQ?7q@K&!=P=Wy5w}eq_y`rATW!N z!}OUi6+NR2t`_xnOp8A^#_hrDWGaW=*S3dmGZP*?bE7dB0MJLM9?tO)V!mR1nSVYI zb7~YAnL`{+8xcO^v;A0%1v9g=AOEW*T!dh)$zQwUHLRH%pe7Wd{dnsKuW=+wCk=&h zVxYgKjw`f;{8oBPsc#gg$P4MZ+E(jxHJ9y+oq)GwItx{WUV(^6E?^Mw?SbK!#u6?2mbwW&3^Qy~jjHw%5OPbSt`x%;6-ef1=IoCDq4Ns*i;1D%nh`BCMm7a{j4dX*kTy z;p=8SIrG_7Z+lMt1c^z7l$L^T@a>Wr|I*-;-Q5`pG0XP>6r4Q?XswdSYyx_ba`I(V zCs5n6o3}0g94qpMo^s_NBOA(~yY9mullW{*B6=ncM(Qshex_s_7k0t^+Ab-5cwU`V z?oG*}jq= zDyt!z8ugtd^}@2cZ*%!7i2rL%VUNuWJWpgCJ(QUypV%XEd_=HFXuaAa|t)k1Zc4mql0wapTnGd6>EV%Ft5L# zdY)~9EDMADOEcvJWS*YK@aPcc$m1igpl=1Fpo2&CeKe0tIa~E zG~m?p5oxy14X!UbCyTs%GW3QUHVl;2H>xc;RtkcJdq^>d!8=t8rjjX{Izl6E-0oEz zTj~UJvEf%I$$WtZ`Ng<4VKSTffhzKrA-R-U-)VJ$2wLN?g&^lrTlK&0 zz!rVaJrgtgEu?dVMD~)+cRz#THe#DhFB3x#M?EDoSgW!&(EPw@Dm>5JE6V#9Yer_c z2X4dxDQ9{D_Gfa*pOT2#1Tl^D`R?Hrd*$b}xyvqDy;27*P=cD_^*woo={b^>{)27NSBx*>MLo@e;0LUM&;6{KAD0G5>x4(bui)My*I!(Eu^uh8VW zy2pl(MVu5WatqW-9v7*|w*t@3iOw=~USAbB+zU$t#Z+RsX|1ypD^=faY-v$26VzFn zFvn^l0{miIw>Bv3Ej#4wFxT@QOMjfn30Z5T_id65HydrP+qg%6ck+USWv7%Ko$>0+ ztba{<)XWWM9y!y;IR0%Lv$S{eSzc~v2j(;;3VVYlV9s6tfbpod;0AP#piftgRa@%R zz}Ak?i@e!aBSXJ190%wZPhw6S+6=JYaTVzqcbs4p%+G*=4_N!*F$?eKo4-hL=JM2uLb=-q*D+0((@blf7+BYu|WJd?wU&uA8~dqo`M_aowMX^yl zn${CUZ996aC&ou{pp{qxuWp(@HPQ3w8+Qe!d9A)skJR0fs)p7!Ifk596Y6>F!y zo!r=6x+LdYOE;yPhs5WAqTgxNu)Vfr)~X6^8Tu$qfZeF{!ahkYU|a?zEh}uZ@$jnn z9Bn!N9ZPqlwd)xYODCNiawrpFRABTJ^L30E_>~s!6D77MUvc}_%dYOu8bjCi(La9Z zCP|tgMDhwjqlwB%;e3*~=}+B4vd|J#X0ZM|GA-e3fwD20PiZ0N;<-oU`b&b2J-xb- ziSm!gE^*wwPP$PT<=qPPb-B*s*ny~GVjfU1hKbIkx0gaKYFL|$r)Hb(49_;_sve{H zt@)EVKmgW&U3{h;zg&j&77I;~Y2ETJrHw+>~f2SrMKSj^Xu)_74zVC`CtpHm|U^8LxW z8`3jjtfyxQ#s@qEd0!@tG&BMgscAQQcId4sNTUjT=pP@d?nnzwyx|dIQ(e0zos{Op z3=QHGZ1ZxA8|K@eD;J+&=HlbPUwt z!dMU{BHQSTa`!4IRZd%F^ba;>9&?`aV?4x%L~L`4qanZW?A`N*SQy`yHGlx-HNFVd z$y)Vigk3pol6=*yLHRWwO5xt)uIb95F?fk@hvWf{%u2gAD46ONt9ahyELy|=v2c_~ z;3)sg@$szRbjE3Z5Kn+^ItJ$uxhxAoXJq`VzHyTeSr#u=@)+_pz7{#nlY)AG0uwA_ zJM3eZVeOOHPZgta8=NqA72x7WH7B%0TT}bmglJL)n@I7yBy2#HErBN~@|jo^$m*1;z(99I4| zdLEzIbnVNDyTWNbQMyJUWNnU^O*>IoacODtddpnJEI!H(pM}g<93gI!MPi2j1K7)# z{BYzQHo4`Zrum$Gg^2SRmRn*-j{B^VY$w=n_o_A5hu|iUY?Pe7QxzXba5;`MR^jF` zM@QS%QKr$`7FcjCvQMIy@}%0df57-hL$(&%rAQ7HtQ|wd8cnNAAUWOSlS$+EvQSUM zY5yUlT9|Wr&0+Ay+MCE3jS@x|FKi~p$%fcAP;zY&l$-&qmZ@k+7bXm^ALf}~BrY$< zCc>Qgek1$nYy&G|JF6$>*)7?NuA2 zukU%%1V8w-YaydID*C15m$YbM$JDO#kz~!tm04~DCz9&-bS0@n5W>Yo!HHC{%NjG@ z4wT4x{l}Oa)9=Vv;~9U7IY?$TS&8m(U!F5(J2}&ALaTyXSLlB#b7W{q3uF3Jlxvh} z62D2)o;bv=&s&%tea|$yBw4Bx(VJRFtQU_&dTH02)`ZMdZaSEY6+W+<$hE=-;iyXnG)I06;GU%eg(yb5A#gNd#`b`oK-qzMW_@Tjd!Lw z-Dga`83>}51H;P-zwP?%e%)-&HJ#~!88x9ffKnglFs;Qd7Zc*qK;?Q}>H;3VSAxH=jf`w5N-22yv9mb;6qI zN*`Sgrl;==jM^Y|LsxlZuRiohPP~i^fwt=4;O1kE9RX5B%ILW~6iTQjN!xb0*;$nx zDmkd!3s7l+9)s^sl&p83jG)pr`J-ghS$4m;sFol?P(p{bICT@)SmSs)r^t%Fqsqyj znQr|IaDMmJV^fZGTUnYnGJkPUv~aCWh#*)M%Mm^6Z1?mwd5lc&-JoA3r+c-;if;(S z<7a<9wr}svcx6uN<*CNc+XecnS`3EN;fdMz>teCx(ErldR2am{!2@G=a_$_-klaoMR)CWl?HtgW=zl|W74tssKyU%HQbrKMZDZXE~&}%n7K2g z41j>o0ZI_(+JUZ3pU|L3Cvnptoqf9)8eyN~Pwmy8@0!bFFFQVCQ{_7z4Li`IYD?@2 zM;1boj9v3p85#=3k8+D)0Yq;_RZW2(nRm!v6GYv$VpfBW6)-m1DQ`!>Of!WR5Q0tJ zZXARGUuQyGm1! zh1F%B{H@@@7(Wkp$T$~EHlw0ym;An(OM+fY6@DZ$^jvZ=Kg_dbLcGte_9p9s$9CYW zTXA}}I;_OfG=0ONCdH>=6?H+R{$*Tf=>pURlV2N_gr+b0ru5EdQY&mS!)G|aPCW1` zq6i-$fwlqCX1-8nA6y!Pk3>y~LskZw-gP;219FFEGv+U@bnklN=e zaE~K#w74;IvE~@3vZWmz5rxl7Sq50GaZ%2{;OhHz8w1q5GLP@8#yZj57lDl*z_sUsqD3rks^Fl(mRwrpFI2J7;(Xc=ss>vk5J9F#lU;vD~%l{KWpDw2d{VXyd*m!CnOW4z}qei7I3XkVdnKRurL6M_sjdM z+RmWwdAin3kZ>2s?+;$1_`A9n*T*ZWsQxlwsj$QM&`36?ns3wB4RD>m74+nv0u?y+ zu10jC|4xx2e3~XZ7MEgwi93Z22nHC^;Xd-!l-Fi#TobAv&MfjR9=oLd&j+~q@N%13^J%oW? zJO*&vsUjy~QB|Rgv^vW-nYrY=^Kqc~&dlCeHZ#^or#IN7y2C@$z~?kpiQ-BWL$7Qf zO)$!z{+e5RziAMklW9?oEr55yb-Sme#RKxfsmpg0`r6rn|FRt|YIbT4M4YTe^cw0e zN||7}U|Tr2TFYBZw`lEf+!G0$XHO1eB6U z6cMB?-2FMN2RF$0V)Dk+Zu@v?n6b#J)UC@scrj$?2Oru4d=k&9@2~ z0$4XO6z|Y}rdLwx3(-zCN|&frn1Xk2X45@bCt9`+2)e1?A2fS4ozHajVt#fv(>ZHF z9_+D$iqT7FD)?@6r0BRKEotMRM!J5=@|%1_eXoEvtv#{DOt97O3JY?eitk6l@9>O) zV7$)2#$mWtC|(&8Wt*3qe= zDAFgZj7BQ462~hfFCoR`@He*UIRxXPh409ItCKox8jNkb*b9gy2^aY zC}P*$>kS^p3Qa3)=(rTD!lgLE(t7w;Z&wo-SB1FxG8F*;_-uM_9Lkga%FoRmBvKK$ zvp3$*5!WsFIOiB0ErI@y&K)4#<$H)DNrq?Q*F8W+ls{d09!}qB>HZ}l{D$k5T36>f zOM%gvYSw2}y*EeFlhm@JMXUE+*Cmm?k}GdSP>QeHTCkaFDi~tcsq(OsKbKyAh~FPA z9D2F*WcEYo)xAbDuhD`4(a${WZ`OQi3i}k!HP|JKQ^SviR#x_L=xys$o{)Z8rERi; zImS*3KT*3vM~Krcymr%6|GH5Cdu5Ia-fw1!X;4%4`VLVvZtFTJ;{3HXBCCzBMfI{^ z4<#F5DvVH{ZW{SP1E_@4bv$y4bc%YIf#f}q5#1c}ceT{XaHZEHafFw6lewqwf1PNKuj=ac2T+DYLmo*KuCA0vSFVt3t$mEv}zc)XZ ztCl|OfA6{zN8H<3*=5Sc7)`~}Oh458n7&A@8TDxL&7hz4KJDNv80**!wSh9gyCY&e zft@%Ws_a;cAt#!-@e*WNbBxNrd7}|XYf{P_S~kzb5XHm$Xd7s;GX7PA?NEd-RL`~W z$W(>$E4~-MS401^aakkTCzyMnE0v@~DF6>iZjYPcB;L=7g^+<+)lDgB2!i-$pR3b! z8Phe!8jzj#HjgI=4POt5eoP^B(E5B{Nr%urd!sEz>ab3o34cIoEOLj@-^y87Z2(Qt zb~<>6{iN=!1Q+K5gsI!2kvS00mEkZoP2+7QF8CQ{IInfQA@FQ3XK`ps!YifLHQl>s z8xck)QJ|j?aA3jP5!Zl0Aj|e60y7kTtpwMG%&9VAx_vLU{q<8i9-ZTpt5@)cRTsVf z=Nbs}rWX#EQU(##c5_OJD6BdR3^!X|KbtCKhDmrPLfG}#WiGZex=R)>J)?8|nDI_N z&ftdR_;czY)4d^7*j2YUD_`v&=VX(zt$FdhzGQVI1rwE zTk1}kLur~Hl}R6#qr33D$H`mnX5yODnjV;YMj<7MbQgAm8c{w^ZGcBk!>MZ>dd(yt zNPSA@dqYD-yr?-wm82Ki)sy36!=u*5kWo4PCHP)^4!sO8<3wlaMoc=AN zVTGhRI6sJ?)V?&hYJewbjjGe2f2Iv=J#Hv0GPd97*WZ%KY%E4$q(w5vT);x!{g&C6VxO^)b)&6XDWf-$G&!SFlUpp|B z=ssX*waIo}aDnG_(b6(bBkGZBIMipyJfz0ZQ+u(sr-t@kHHu*ch7Rbv^f>4+cQZKub=p#*itrkOYa^J~? znLG12N1bajPyS1M@n?+)l=tyW^MpY3=S+R5k^@TVa(&>C&v*<;o%o%|FqBmvj$#!P z?q7n4R3cgVyAXP69Cnh6;_g)Xd$;Qo*uwEXDUJYTmZM8W>c;U=$KXlfzT4(J-h z&^zGG0m0MvVS?uchU8!Fd?KD56gaD-5Q1j(+Qd_+_c%NSC_Wa?13QY042Hn6xFlG8 zxvDP7CiD-w9zZAdS22fEU6hyA*Cp^`vhRNiPk6-bG*H; z;dYUUrf~jK-%YZ2Ob)NcRJxx}b1&4?5av(28Y+-a|L}g=dWMv9IJ_>Vm za%Ev{3V7O$w*^#`-y1Cq(jhG&H3&#Eq%cT#cStkB07J~s-JOz3w@65LcS*N^bcske z0&>w`|G#god)KTr^X{k4+0WU}S?|!(sA{l@nZaK}q~S0pHcoa90f2;(Iwv21gM*u$ zgM$l`o?a8`WCQurj7hHzad3pfVFLdOkZ^#2ogQqGV5f&TB{&Qq?`#9$3R*fj*$DuFuCA`^U|UCaxPyf- z6DzSPH}hd4qUTp(tEUxWe5U|Yx^%-AvM0h*Rj$3N^EaC0YDumc3}V6cIjLST*$ z8O|^>hy&nZae#)LB0$9s0{bIa@s9vjz@NPVaI$m$UGC59UxlEs-^pN8Q@E`i80HRz zSpdwTHV}Y{v?9Bcn-ePl3^V%`2)1#AKjed5z)&0T>xY2fI|l=##nb@c2ZDdnb2N2; z+BrG0J3?)KQ3U=f^U!4}n3)9J))oSDa>V?#KS`(q#Pp%Txs;Rn;x5$pm1I5{{&ygmQz_%Fib==0DXR z%n|%e8^FfJ%K-rK@B;XFKmhOmR{5L%U)+CNRl(3d_r~#0K{=Q?902-*!b9)=qT%wV zDj5Hq7$(4fS5SsONErfP{73l)9K0N+4_}=B&-(wJ^8Y9MUse8Z%KzUDNjuxv{B|?` z_Wys}U|Xn-`=0?1N_KX7m0gm zrJ-&RGgYXQspTIM`s)-P%G*F;5LLJ%^w(VjVB_TA_#fZHyqQ`*+$W9?XZ70!c^Iev zE-3{wg`54FJ1*WA0I-7t*d3GOL0(+EyZ}$mhXFN%xcyc(0LTu5J3XWT9_V=k%;650 zzfP2&0{~S0W%`Xkd;lQ$9|QsbP5(x}b_X>37vuy0A^(D00HFE5AU6PL@i%-BBGA&^ z&hp`Y`6uGR2K^W02LNsUft(ME{VVukj)3rgK^_3m;a~8fl;gkP3jomRAISBvGmqc! zKbYG+Zq*`qTkXJwYdm~-RA zePi#xi&ghhVojj=_g*x2zNg+Q z*PH7ZkEIe#F2@csIx;;{N~Cjw;fd#-;ydr;Nj`fPeu%_tU?1Yjy_3%{+zZE?lc!-c z+Vbo6v>H|DM3daRSdDn@1_@~$+P?JjMnymJkZzHvL66+l3D%Wy>lA9TRil6>e!^!r zeT1iX(e8CMi>Z)Fy8v++>AsHX6>;Vz|hZYK$wL>i-?v^0|uM8Cwb4G_GF znw8d6TE7|7&M#DweY8fq`FX-!Po_1|^f-z}v*OiS(xtbBwLK_9mVjjMF>&0vXXzbK z%ropt_$ov_c>kK{UDxvsotfQp9{-&SX18_A$(9wK&{|44*LQZ02jc}ZCHZHMbh))j zc3e{XC(7>RsMv3VZ{g!hVOmv&O48kRthXz>{Y^S;QGv&-n${4`#~Y64nhzWWGbG$&HKq3MA4xUm2l}-!;)nI9$ipBc$Oe|WGmyB!Q@LZwN{P$ zw4}>e3;~U>*R5O%N@>6@WwCcMS#$6;Z7^Kb@#{+@TZ_u_mh`@Z1ve>;n3Tk`4)0t# zi&#PTD&711X@9Rktgh3tr)mK;L52IZ7wD?p-;MtyKmj%6-&wwQcd0s#SIG zIoxq9vcbpyBtpoe$dI}eX=%phG|r$8>n=Yb&ND)0;BqkH8du0=o=y1EV-OKR|1E)! zXaicLDPD@%&s4IH3(ixyh?P4-!RJzD=7*t?5xI_} z88ed0=!AGm|Q_fj<#B>gY-{mMBX~pu!AN8k~Igs4~Yz zoG|LVtanYA$-Lkl%+T9xA|_mXq53KGXDcj=J8_m!*6L+ql#?@sm~xPY_mj|aQU|3myFv;<5Xknoiq&`cC zy>9%uGcs;QMy&PX00%cNFaJ~X+d|g@9bL_?<-WiTzXKSK9S=NxXp-FZRvs>4b)wwb&Eaps-Cxl`N2~<2t(s^{18i z4kZlwu~G#`70Ojm9+Y=UTffA5{g$|MvMfD6RMNh(ONbyq2lB0wy!G~uY{hrE1g};dud5V zET?>&iDVwxXYQRp>YP+{vu(?)aG=_KvKGld27EQBN%n-VRXs=?=uCv|di9FS|I$Zi zP&={`&=hENX(VIJn%NMOYg5Q4RO39ZxItbHc-^dhjx;n4 zRbM--?fdDIH#EG76a~7cWgtg2D5x*W#96?y8{I95ljr6ASb=Kcfy5WKMZ>Sm!Pn4K+0R&bRJeS{1*oWEI8etBN+^<1^D%(NSc%Jb#}q=4X0y}ErHLaX?F zx1Ys!AG@P9P(Z$D8%f(Ph708#1X26{_;H0ARgh{u%zklrvnBa~k)x1wiAVI6klzHi z=}Ghb5`WcD%@(@#v#u)(>DlaNQzAlFk5;yZ`?m%RNO{b0s|ebp2l7eW`4N4b7Ii1k12_hzJNv z$Ipw;VfPhnb_!RV_%pW=6cU0s#BWBpse+ABbff>V8rP{d@u`#kQ7rfGm~o4ynddA zkWCHqti}?;>2<$rOS*16alDP1&a^x89fS#xhQI~A;Y?FR@-aD9l)o`(h5JZYDOm0= z`J^Si1oPyA^>~Gl#&_%DWrx* zKq&Gv9qPOztJ4BZc%s(AoFMg#>E_=1+Ur-8t(-u?t5&zVE!dH<+ArGTs59LAPZfRr^VHSHq$ukl@jx3ubEA6qcrpYRZ+zqSSV5c&l zZ}%7NWVdg)OAU#!!L$AY4ok|ol~!zO<ShfiOb4dL`V6A0OigN@Hb*!m?T4&W_w9See=ORP8(F z97oq#n7UB6+&FKsexsJ{9@l2!rArj>I!^ypQbfqN6gd~QgcJg{T%+2r>>W%k2|@-I zs_E}b66=rMnf7Y&NHFS{vBO04+br7Y#}mFpJ_|nqEcjqY*o=ND^-v^C9u>H8EHG_7 zltSG~66$rw^g__PS0Q6rEgRlsmVo4@8q;P9J9{T1gBC%SYegWnWP;3xJNz3ua;4Ez z-7!8UJ2cI%6_ebe5if3hEtlIt+`1Q{8XC+o61(_cf7qc z<YQNn9yBslL`LfJMJrVGB|*UcB1zKqPb_!#tSjnG4&-f>F5~B!7~P91Y38K1oERMjJcY=A(@Mvy2gm zmvkMzIfbajPAGefwQ7_31eo!YymcYVnjcR)%*U=9&QeW>B||}S&B)^DQ@NM=K#90Z zD*_b}I+`>%VQqN_vb`tk&!tS8JS_K0V##rruqJ*Qy5^2tbFRtAttSh5uianLa3HwL z_3JF+sy9UA_D%ERMP3FPU(C$5t#iE<9=;1aS|E5)osqVAUib*#b2xw__`Es8@Od@a zyF~3EpOVqZ4qdZFGG*DsdOr-8$H^|jjANx*MD#fXEEI{ITV49S?L+*LlCf-CFfC4* zx-3PkUFgqq5N8f`=n@vAh94c(GyikCjM_d*ki+fAet9UDI?B6|YqV745It&t-nf^Sz_Q(@Hxkk1A6J4Nj(Khrs z#$0mnxbtq=V;VbSk<3n0msftQ7$ZJb;n3x5>imAe4`H)}tR$2}hR0f7DQCjsrGa?+ z8d8C-D*-}ZQ0T1gm$eQUINX2wZAzka=h#G}TAAb+QWE35a?`BC9B#g}x$Q(oyw?+k zUJws5Dhn}=AiAb|B1Cacoq<2-eh73{g_ij6?!HxwrSs~%=Dt~f=PtBzHqaWAGjr>r zmmsBrgxVDT$zlyrj`SO@dDOSlR})<8Wk7pSopsG>)gFYL(ag1ySwzh=P9FA;l>RU6n72d6K~mt-Wedx&lF68Y2`S#3C!v_o$(!&Qf*;(2rRxyY93Yx0Q9bj~daouDGLC z`-L0NeOYJlENg4wi9N#Z^)-fiS5PEHeAZRGx*}}c5$ixPr7n!hIsabdTQzI^U~FHI zV$!GEZLv35^`b);@-FABfLqCq>Pb4|8Mnqa8XmyDO*+@!B&q1)2qN7z92*&gzVBDL zDM>!-meJ25zs6U!YnLem>d$Cty*w{geZh&b(U=!O=JIuQu%0Iojk?Y5nX>{T8qCtu zAA)zDO88nX*W@rq;@N<(U+%_LmT<;90`iR!jf>M_UWP*A(W6vAR}FPWs&*>(A}o2z zx?`&DNO3shAe1NB5D+B|xn zb=q&ek?4=Us~qhIpg9HlWNp|)}fDOvI3SaA8znP1X5{I752ri#L(Ff zNMZL)qTaA(GQEBYo?bu8He)vHl^KK z<2!rWriUEDJy7Ufvk}A!SupymURSDuHWtWwV z_cl)Mh~fgI%7`(d);3A7f&l}31@EwvoVin$Ah*l%OBiO8hkG+YM{H@bOoaFe4yXo z%UX?TNB2_YFjk%X$hDCUrDo~UwidV@VdG0}NbJmhshiRN2q=DG^0F!{t^UfycCFAw zh#YK}vU$;8Aca*8t$e8rc^cc4El&4pKQh*57ljs6={2E7kl4Xxq%(gu!#fq!gE~dgqGBYLzzfuOc<=l|SwmoG< zDeWSSV0vH2xSOS~rsFSH*e+&U1FQz$a7|0b+nL-$M3@gdT(Z9~nW#D)^YUvQEsLKL zhg9lvg1T|DW`4N%;(B@6eq64){xN20b|$)9d;z+=&;tZaV`)m@KM%`JDiVkaDF6k< zeXsLO*@zTtLDjI;soqZVGvs|1dFLSIGQSl@`-72U!SXmKy+^adMiWo|Rb3M(rIk`X z+$ZS=k9gRz{Xh{*vb%MjIoZGhbHKh&X1P6$+LJ>kd;S2`<__m$taJG#_mSQ5_yGWG z%HCuEd(puy6x>}rB^D7b^nLm$G#h>Yu-1y^&Gm@$7PH*Ftp6%ZuX~+}Wlb+Zn z>Qp4Bi_%Wa#r@O^_E04fD@I2S$=d2Otg8Wlx`Z2`8ulWNI7qwC?mer%AF%0Uv(Pq3 z;i6vJ>VKw+Ho}}437e$94G~l~EHI~$72S-zQ;#qV6K&bHZQHhO+qP}n?x$_rwr$(C zJ?~tcNhbL&&Sm|9N+o;M+U_a%*7%9%9TsJF!lx5|kIVy{XIfU9A3AnHR(5U&kS#*v z^3y{JKMU{i8iX^$!h*VY1>NySb130Cy)r4dlwZHiN3wmw4CUHEuXW+N*P|bfxM@Bg~T5V zhoO29VfWg^pfOD=c(F2}C_sWN5Ly$1rUnb)t|xOTx>CG@T!~h#bRM*z%3S;wz`#zX z2E>*i!fKbT;Y%oKbNsf{lGp?=eX3bz*DL+9ReU0a=%ZYh=l*9N9{3CW;8}94xCZcT zHv*kpt(l)#s0+&thrQ8lj>IdIGCt(ebU7j>)82rihIMRANjFZvni-pFLlRAU3DF}> zlkr}X3eO$s|ClW0l(0Mjou6|j>`2#&fD$-$AqQ_7cs=s!2~f2b9y7j$$PORM(U=7? z2`SFI@)+!0zp(S^{srGG6*UGKl*V8Kn-Ajy9u;i;r_t;USN|cOYeI=Ci~qr8aVYfg z+O^1!XU&GEfy7j;bc$b3|B_CIryWM1fY#O3R`zPadREGUz6MmcI!%JroRuas<3$b| zVPj0$J#2(aOt>JCbLA-#RlpSLGazyO=O21CcTadMY09hWXMD&hUas$-(hjd?6st3G zSyI7(4?82*ZjJK{k;)nt!(X}A+-ubJl+*M*DTK1Ykf@E(@`2Rh2MYdD!@^iNBF=dI zsvRGNtL~eAkj_B=pm5Sz%IGsfk~X}4;50HA#B8^*sM%21KafrgiJ?Vp8xM^=kO4;x z>g&>Fn2AF&YM2KjToEAHD@Btw!G6iz!~r`75iR-8;r$_(ENPxhu$J_LN;cto^0#4T z0;nU||KUFpx%-1PvEm=oCFeP`^?l0NSulkdP%$ZN!Wl7>{Ckl>G0<*^t@%}%$7_RE z>kG9@o>=txsbGk1MLW0a@2;)+?FX9d3f&Ni6vhr;AUgBLgOj>HUJQZ{bawul>aUZp+BG-2cZM9+JXNAa%ol;Q2 zfzxb62;H83!Sz+OJ5yo1IP_QxtIvGMB$g{6V12I~b%`#`fo2qlbR^)$jLy_A3_o4z zWzmPE3`WkAqkt)`C3eJX=(?3})PuD;1SDmVRLykH?sNi^%_sn=o5Jl=%?C5^a_#1v z6VG+La!i?Dh(#aj-*i^sckMBNsAXELg4Qkd$ZuvknIdyGH+Tx&URTVPbJI#hGTY0O zz{%wH5ZM-zwMXiXL{Qm`H9hV!%aD>4Li#6L4Sn#?vjKeuXon~=zI^1MB{C%b%H|y5 zr;!#_8qt(KT9sL_{9VgtG!ib~ie2BSSSnuh`S>Uz5sbb}HZ6NJ%MhKL({YV;Y0SiV zguOimFiGRRONrVM!*!}_)OH}|Cq~DB1bSZgI>r5?Q8V$)v`catYWY=E9Qn3=$yGT8 zo9CMSNXfNUKq00iy9Xr>kl~_9jKnM)jK(~O@9!~4HMgQjv%P2?eX7wQUkn}Yprqcu zpK!6d`_^ro8Dr?@BCg%aI^1d1{YNjyDVr_#my1m7+9t6RnHEwBW%=9EvX}FJuV84| z6c1vxpAVujqslkNv?cQ2Iy;XO)Yk`GftcTIcI<``VhuTU^A&t12A`qPER`5D3`t0% zW=O>T@W*@G5;Ly5iW>rBHWF_pkvGe)a|@4J?Js2R+tr?DjpAw-0iY2oq|XHB(te+# zn%j^@L5JRg_O^ef)z_O6CJt)BhQ%0jH6lXe36XJiE{qjIKZkzZtbDYE{sp@mV&&0r z^dQ$tRnX5x4;{D*ssC9HuVS4VrsNam8SSGWfZmDDXzS1K{0Z8~A&iS0li z`txRfiv)ultR*t7b`=hslOmN5QP0je09PIVd$!aQ)_)UaaIHg4I7?Y@b`uz2QV!*w zZ#9}04sIErQVMz+Z*7R8Pb-pGGQ{2%Xcy>|d1R7XT?QoOO}KWssTh1AlxKtjXf=`G z_n&ST0-cCQ*!D~=!TGPtdQCq^og|ZB-=(aY9LvxcHZzHrk{E+)eIJm%+?fEDShcy> z*9&Py0HhFj2+-l2mw`0WU()&;A(^NpB@w@f0^0j((mn4!d3u&}3kV3Z(zo|Mc@_ZB zurk@ra|6s>Ksp-_UXhHxGVoOwri@u??)W5TK9sT9-(_55Gt23EVr9CkLFjj|POI+Q zs7nk8KnJaGIW?xbKrZ}MjR>cqm9>ScsJ@{{MEMm$fa|9T?T`rRY$hpZX( zo;L67V|nxh;j#_nj&Ia(q^?YWy5T$phlu7W@?;7MBXceMVb|IGs`T08f66P86U%Iy z-_EqvG-WsC%DD7A^FR>b+|s3xD_nZpI>^d*uFHtzE8JZ5&u|6u7+OL%1F|%u2()Hq za$0Lh)yJz3vT51L36A*t~br1jaeGq>3VZoGon~d? z$$DCGH=ZwJk8M)N5kZvXPu4`A*ln}-6JmqZes5$^L>pL}53jh-E7a+V?3#nhx#1y( z8c)>f*XDx+#A|m45TPHxn*=g&%4;Vln1X3(BJktSGLG1toHkbgLD`)g0iuC_y}thr z>>@NLqVGsR=*bf81ti3f;^oKUkcxOU`fLh1mn{}NylIx@3Mnk4ulJY4YK^vO*LrgI z0rOxP@kD)=I!gW)+bH#}#V;K6JayiGYR+=pft*QH&c-PB7RQhwT@|N!=Phq0eCgm^ z?HUwPpz+--h}KXzW)-$A%rr(k0mw+C-KQ7jQ zdgrBGE|jKBb?Ambfc}fm>qO-Hkh2oMA^jP+=FH2Uez76{goOI=dP1$Tpo7L^@Xisc zC@9k^5vQ^dH;3P`-1+RO-&Vj{iIyLYh#!d}DqR_sA3*8<{CK;F&3YMQYmpZt7CCb&Mk%{K^> zfvmFc~REDt^mO4}#6#5Azs_*wAq5{-~v7*y~l+B|gg9k=&Z{KaYtaNvBf+cnk`yA0v(4yvCRNs$d;0XE#yy8REZncM0i-V6r}>Bj?1fER7P zxp8G6kG~niBhpYXoQI6FS%h1DkbEOgVMISc?qO3j{_?=Y9ssBTy?56NYfbVv8M3vjQ z#k_gvZ4@%ZW2GuN;5eVin1&>$fV>y*6G-GeszD2Uiz)Rj{YUN2ic?KFV9-&?xMLo! z_B+4YtuSXg-zS)7$&_^Y&Ls5m#P)DJWxt$uy}6?p=(m+ML{K{%@p)!*orKjyWT2b4 zW=#j>gQ-OTSuAkKAOn`_u&GASY$Q{%vsS*!zc~|yEBPte^Q_)9cg+o!M2_&i2G!$D z4=#md_P6w}D(%!Ge}E14aPB#TlQGEU0;X*6xZ8v)UV-R@-54iqNM`?P>)|zz8106K z9>G}J%_OvrhUz?8FKLS(-`elJ&IZCtU2vrzAPo3O12_S+_OKP|K*KxGFEhd2JI7Q>D^YJFVYvurYuWB^p5Be=!Y4@o&W=8*0*f$Qr~a>#?tfr z{#|-aMPFHO`6}}(k1$uncJ@BvbQHJ8NU#ahLREQ{r7|^%j^olda_TxYz&8Z8a@iv( z1v1mT%A8cPflpX%4Ety!zI|v~bDT~jtI9s79s*^+k={7gJ(2R3PK%-w&z(u^ zi=#|Z$&HB5%J?taD}N1#1#!}YMNdoa-Y)+#>-O}r*leRcuB!yhRvI)k-zA#t(@DGg zJY)jZ^YsrfNK6a+e{fVx|BItyVgLU`icAD-tW2!`i=$#^Wn%ij=BU)b6_9nBN+cmA z5EJMU3!h0`6ujydvzif*AQC{56o4fZGbspUK`R!cDS}Wg2ndu?h(x_qE1@J((SJ6- ze&5V?v!+Sja+|u`=XQ3u5xZ@S=xR@+90RS5=RF)FKr|4`4Nb_vL4r$2`VAxl^7BEF z?m+*5K_RpT8aPP6YToxeU}Z1f?%kdXcEl2>D$MO$20`kE|xe=mWoX3#mKdd29f*^qjp`e_M`gQ`XC^*m{g9`)2 z6uP-_M0pUwUVt)<4J0`D?)3>nYP*gecB2Rq`uh4nP(qXY4R%sOM*@B&y1D0J;)0ET z_G|m~odQP{P`=w3AOi^I2XQ06fZ4_f?tc|%a3nyw6BszK#9i)1y9OHs4mSjxd4>03 z?KwW-jfePQh6DWdU=fG_Ke)H^cKd(`7y5Dv7%;$2U`4@+7VHE-7NOy~3v4L@;rpWj z3oO?3!xZRo!p`Aef{A+a=|qC}B83Attx5$b6!-Ej9T;d9vE!5b348n~L4KfwyG>}D ztH8iQgNhzF`s=<~5;ZttJgx%s_e@_zjK3E`_*WrFHc=m>gR1J-$B|;6A40Dze=P+= z0R9Z@9C#GKV8Es*0|FiJ33vcb;XYBfiRQ6?vqHXuB0s_rx*2x`Koo&^Kq3Q>{Q*F5 zmf_)sjeQRYL;mExy}<(n0%|2N@XmqUg$n`yM92pnhVi|PP8!Am_GK9w9tZ^P>*n^T z!Hw)YjOO(M|Ni~a(Ty(-FA8jlJh$!lQM$Uy03;C5QG%nSrvL&IB1lT+<2WKd{^wJ? z`jh^$Uj~W#W@f+VM|_!wfKfPrhaB$uk`CL;<0X7PYli^-^9^DQTOV`CpXax=hff0> ziqKE^bAS4C``GjPRZsaVy7^Ge1i!7z+)4*Zo@ zwtmMKwgGV${Ve=VUKtXOOfCp>d;0uBO60IL0oYyvi45=fTM+PH3BcHejtt8(a2VI0 zgHgZ%0{a6O&u0Bc{y;f^Nd1x(ge-osCvH_>h_R;6OioD*9b(ilF96VRKb!-T06+*= zM$NPDt`97TWC4?V4n#5u4M@bG(O>69B>^nRK!SD}E#!en8@X+-p#=y*)NA;6Js^<4 z{%|7TJ%IS`uRuw3c@q7FO<)E&_+|Xdm;fFWw7;e z2M`WOHp)zBrBUZEiMo?uv+053bC_cC>eqqDz~}NXv%lopa!H4WW#)AzwoR$5mhq&Em8MoGAG?r|LCMU)MhKw{B5Tb0r*o zF{~n2%B#zXO5LS;AxqZO=moSpb=xu0E{btP*`x3kRCjV*kv%#vC0-5P*kJhioz9t> z@>*MZe`|0j_xV62|9E-rRII8O2b6Qh%3CLE|IE}34Jf30h%_O(i#mDO6+>e=1%?qa z!9cy_dXhKaP8NN^owl-laF~y z2ln4>bB5Y*j7pwbFMj9uI@(Y7Kw6~w3Joq+W1JM)l(Zyv_sG=d1b3f+zPSB#EukGL z?Yi4-uKqw(0^FQs;CH}dn<>#JWd(aetHH3MG}-8yj$O)WNqymx@??7goz7PL-)3{4 z*D;+8;OX4iBA~6#$#NOLR)2H*bNmOHv$E>+F!C_0W?yWsl+!f!dOn-vY%M06rIt~` z5ax9y9s2@T6W+;qXt$+ety?TvC;H9tU+)ljd2;`&`DoNrPj6-|jpC8gn{36~(hBGG zG-6=yqKmcW2;hx#DC|?yT`YXv-*jAfjI*!CcuU5cOsX;V>QuQp+A)_VG-fX-Mjuy; z&BM7&J4uSFfV{*0wE1mdB%Xgu`$FjP0BaS%9N_0`KrQS_#u~A5#M~C~(Hs17ch8IFXG&8(l zA&a-d6;$LGQ_a&mRRQp0gX)T_05i)x*j9F>rsoh{gGQCQ5yM0BqE}-=zOf^+#A0Pj zoNRk-#?wSPyzY*)P!o4n`3)GH2r&3c2lTet8|lFHEOSF04UA$J!Qv08^66I?{H~LD zHarss`=KE3*$usiUP6a6lN|Bodxzb$e9b2-K+(#ZGI>DTa=f?k(rKz*0G8&R`&Ige ze1%t>SSm0X>!!5!s_Cw>Na?km{_7mB+jq8%`#96CCc|wykx1{Co|X&`KhW;)%Zr0C zUklVYjhiE}?n$p%3;cE&ZB5R_L=ThQrPafBw$UX(%?xdR|880M8y(qcq=cc~oH}1j zJuV_BvPdjxoSgMwX9~EnQ7zR;NZ;k^694Hf00GXfsP+9p_X46SXA&X;R?u={(&|L9 z_z73W^KL}WgSI5VL=$hY0^;92SZZEz7kXZ;?L%45bY!6d#YuN7w1;GYo7{VYBmr-Y zj?6gu7*+~>`m5ThN87QYj-8aJssM#lYtCb5YEd9M=bb=;s?dJ?WSNf^=pzhQ40s2% zu5?ZvSJsL#Pv*J0{+R1OeiylBcawb{Zyy@1{RC2go5l{S!h=Mn3gHqkbWn-jRLLa-W0OT3ET^9ecaT<4rlp#IQD&!cIBLMB> z$^vtWg*K>b*bcL|)<@RQfGfZ!rA=YF7uCyMJ5}mIUk4g7N1o%oU_uXmZ$hsn^SJOT zqm9IukqxVqH93{sH@^WeFPzi$)2M?WdvU@7q;WlktK|s;rcakrE`&)1!;*Q^_(f$(%d! zq3XzytsLf3Rux_m4-hM8du8dLY*0V_thOrk%g%_KB3Wv`zj?{PZ?uGEwMu+h7~>A!5q8F@)%2>wk)_Xg zjB5(!93F^Q2l$St+NEP(D6{uLG233Kb(dl(HW6kmYuU?O z@KApmUZnZ%G>w);3?~#7Y4zBcUe`p3e>eaqbN~F)`&Q)&be^e9j-r5!q)3qOG^l7p zSKx&icHPB$@Z38jOwCq7X6691QZ_1Zd)z7!7JogT)D424IaS$3f?HER4l1fs**F)mAb-7 zF5s-N9~d*?+$|S3=p79*lUDCo*^HmT;bpI}X>3&{g?f;kYu;lEHxRc5zOof|e$;Oy zCbP~A69d)sCV~(0c)LBvn8^r>qu61;*jmMTCeqwwHEF!RZS_5>>1&rCLp%b>V)QL+ zXO%uo{Pt|{P;-2S;7~|QMV@M3HK#8sH!%LdJ4X~ZOJlqmy{nfsyHQT=uvJ&jZeGWD zZLfjn<;gA*{57%VPN1hw^ZE$|@3VLla*zKhVia=npRDO$URhKLFCYuU1`o|v{?)n% z-(6N4+lvnloC}hF#<|v+QfD4uW&L-Gs!r_-xH7qa)0LiCxPE9U+Kb}MW)AXsr^91v zQph}RhSB{^YoCIr)6q)jDsX!wo=8`qdogTWl$KV7+vu-8s>M5>7{f@itL15;!E`QG zIR_=GxImj%IsWZI*GsdXlc&+MJHIR64J10*j#`;9)8jMgyicXQayS&0Ym`xx#5$@D z@?V9;k;&xr9nf5jSa&~+Wq0;&#Ib6!ij^wA%EqMs&Zs%skP4`Uv-2e?bPezC0dWB; zjEM0rY^nCRo^a**a7TA?T=p*Pz+ZrQ%P)yJId%q*w*mtxnJ?HT2YZfpTq!y{L2X6Oys^Tck1QLmQ&lEwy>K7^|{EbwH~`>$g?L0&gkp{ z*abT?e(|Y{vY%)E^i!&6GPCx`@P!RFf5^nmpB+MarUiyOsv1_?TU5A-eb_1Tn~lq? zE(&)vX9<&Z)_d~15i=HF6eT3rI8i*JoxQ4cO9u%XZ2`l zi|E&P$-lCurSy;)8;p0)ODOm!6etJ5MSWYN554;?8iw~|Y@|j562(o!QB)F=C%k2K z=H215f1|M^2Ul+$@T47lS~7*gZ6s_{M1gJEpY2M)(m9=UjCb{OWUD{VI4G6#e8Pb9 z(T_x$B|?4C`GWx}UeTX}%t3A}0;!9Pv7v+T@#JOZdxziDo$sXW$!& znHVqN8BznBoECsilIUtxaWQ7ryE`{nXid0#Ib(6yFL3lHBp*doT7~YDfPhXkQ+3lV zbZnCIX3eeU?;mL|S4{S;@;I7=*jW=a6WZS|ylU=?Bbl$a zzn;vDedbQ?n9Vv8M4f6_*CrY%9B(PyOmIsTcsDMwrj0nqPfcu{S^5mhRl5pl*VzRz z#l4J`ha>%G%*4a@dQVfa<>$QmbG%NLWeB?!w=};m4ywL5z!%N1Z-G zK3_mfnA%)}v7Zs<5t}a_PfyMhO$g0@dZXfTppX`J%szZa0uiX-D+lZJ&RvEs@n^*& zh6bVyOAxP7jSGHCdSe|w=Xk$a!8Zt@xOu_iR-MAki-lP6wRl|DjvI8_*4bDD9?-ie zbIR{heEs@7GBRo}+)##VhGsC|FN3GnoLhP=Sz5-R)oL-Ky~>zq1Ir63_P?OR+_0_= z==9IeFu;+JHbSG{efi7z-)7gxV?Q=t75gd;F$uPpBb$KW+spgLP~_ ziM(!REia{@dytB3u79~`o{{>TK|x2yJLK%`E&}7kaKNjry=L8CVhJ^?M7`v+VsKc>^ug-CbBoT*tD_fzPWOL)qFqPo zy)zo-^8!$-BWvVusPRtSK+vsU{}ViQGfb(h9s#D(AE{lRWIjoTzH3jlh8cZp>HB?( zzQ*RPPE>_3mc!}(O}m6CQEuF}|9BMi+`6=nxKZcSsM;+rAmQx%tc-Vf4vN_ny5kF@ zsl5LSvzuHetb9K!GnVp#eahjnPjBcwNXupZ$`urvHbn0##xP;qJB;exXbrnV%~Vb} z<^84dcoJ}O*mE2w{W;poLk}BalwW5KvV&RIDz@aPCJ^rw6u0li0fyzkv4$#=r7YmZ zN>Atr@tCwOev+4_Q`m8%i`wl5Q>bZO{dfCyfJtpx#Xa^fshqgJmVcpxJt^C-NNhuZ z<(sFi{B_wKQcEycKFiY_zlI$1oSc77V=Jdf`X$sci|G_MK6A&MQbwo618Snu6DnJ6 zceGaY*1XtubVPp%ud|q0?fK=>k~#h`x;Dv2eKj_kZ6WyFpl8h+?q}o!kS|}=4Z`(V z5*SX?f@c3P2kmv>YsFPMotMGiUMaHn^>fznW#v}BxYY)7&MRy<>eK0oo4z;JqK%@P zYH$52=W`i8LYfvRF@)NMO0hCWi9#V>2Y1P#0a%d|`ut-U(?`5@-@1JhKh z&||Xhf<{?V?10NBWDD6M+sJ>RO{v=Ig#X`OJ6@+x^W4+q0`%T)T zz`(x15pMs@BPsB59(d8(nI!ohNwnjYqFcMIFnk`IrEw{h+KEDkA*Y1mO zGO*anW||%}0pD||uq$v{299VVJHdW{!I)b(IE8VWhz%T74=(waSO8YZYDdm7vwkok zhJsz?#^^v~6*&Z`-4Tip5yuSzv8g~jH}*#Q7(i!^0xz*e5pl**b!6F5w7dQKUV9cTgknl<7O+ul#v7LqL>FZuC4*@#v$ePL{q zS*m5sur|gq^{Dz3PWFhs;Q7!jxQJxO2ww9K^mvR7;mb9%qu@ zL^SPY+U89$UEpg$syA666MG7x1FYF>Dct`+ZC;NJFZV8Mi6C4+H_mOAr0K6?_6#hi zE0O0nLJ7@eBNH&GUhG?ZZ_fyO)kBUsP}ySQGD`C@^4F{jDwCGhgyc}yQAw-JZGkW@(Bk&(>qE8YS5a3|V%(XdTlpU4 zMxnN-q@#2&*1YB%+{=U#Dplh2%kpvcj$+YN;^bl`_x{$$Gw?ksPbQ=*(;2SNK1sT} zeqOmULzycBf1Gu{f^%ypR~SOnf9cIS8?3)fGJB?NbDsedkL>}V7=2#rXHDbTS%|H0 ztGNinm=oODZS5fv7*;P@th#Od-kO?7VY$c$wH4SCx{KRiVoIj^;10&u%J>)=#K|}? zRqczt*LQ}l07J3uBvER!EvVW5AUOBi+(Dtc+|7KjK*^-yo#VUQzYA$DU0k!m82)mQ11UH70h_zw)OkmI7t&31dtqb zoystp0>b%DgSbOD?GPOMst_WSKdY=SI;&GCI981Uk51gLb=GSN`J#OKQEEI~nAS5T zrm{KUMoxr#8$TD5?9%h=LqZ4s(A%pcH?(^m{}n^}{$Rt7LQ7Vf>1gg4{ks`T(UhC4 zDMGT3%#zkloBQAu`oL5XXcdY&hBGc|VgeG+Gy3>y-OLAS1ZQ|%^~ru8dczKnur962 z^=>_ov$~1f0k9MM@?`KCGpg~3n7FeZfxninez+Of@dY-y$Y^BoAGc!&ceMb7_cl#! zR7E9fdAKPs^vxJ+&SsM|{1}k_Nz@YM9vIs--i#uqi$Elufoo_prc0)hNn)VCIgw=p zcDrmKphhfdTah^kHaAmK=N5c277aToAZ3ncS1&-$5J{%-*Z<)vt#&YsyPCCe zM-$CAqRkw^$^UzN-k6kKIKBLJO-rnc|F%Qk|3dP#(gn4UmN~}cKc!MoPa?|9G1K(G;*|UPF3TH=R#jD>rWbd}-HJ%a^YM z1&l}A?S)90BCmHR^pZbvoBMg=jk-DrI6uAY*tlh037Z%X2E25HY@pml%+3N@=vH_K--y&=DWaHq7h0vO0A=W091R`sQIi zBN3xpw*t;cHQTXOpIvnE+}QgEMBE!F@jrrHmj4y(vNLi1pD34!fSr?r`TtD+KS2f~ z1LyyBu-gu*vgC`+9w#Bmo&;CSU0l-DMLHHRupSUgcX10Rm;iS{M+#b8Qd}$|fCWhE zQ`Yw8%>VZEm-p0berEIKS39?|_VRNR6RD>iFFlH82CNh!h^XVU-E9QG3CoMjrw2e^ z2f$8G&(DUO%!dI8_!Dc)j2X1OZ-Ak4-G||1M_>(tl-wpp5Q)V=A+UlAw2usM4;lFY z8U64G^zP{?`bRdnJq+-;if5o50LCtW3I;mRka284w`*tM4Gyw6x2F$SEx-nlS6rz{@8RX;q|H6lVQBZ7 zWRndb&oBs>4;>V|%_Gnnpic?RQj;U-uXHqO26WCHaNrO5Y~ThVZ_SPX04dNffPfOG z;uyXev<)C}2f&N!8juDJ@rkbaLk~p0FYgH8v+eW8_$L09PRQ`w*2szg;q2BB<`rD< z3c$?(0y3|pawg_M_#f1!u%$0z4S^H3-1b@zCR}Zs5VBXG4UA&y0a{R>29CPKjRuS$WzekpLgA_pxvc! zx!&1zk0CHndrN@&!H+`{@&j)MF9ZPMDH<9A@&Q1g4loXV-v2Ze*xLqu-geI(Gd-}k z26_rqFI)=v2)-4VfWO}BU=5D|aBvFu`1WHz&?o=@mgQ+PAOO|_ZUh&s|7nO15_I*= zj{kNA_5hf5k3Sp%-~W33oMmx34cZtgM7;4o@6nzrt}!nrFdx1rKl+C>HZsr$(4+TJ z1W2nyp!biBk%J(hp@F>rZO!^q-Mhc`DcV}!=bisf6+$h;fg$`*Lz=w&$_*dQnbiMk zVzc%4@mdTfJi!3f|1$s1d4PHbj4Q=rn0;A;et?DotlHVF?yiqR3gm(S_WF$Lwt#y6;5h=ov!jFwR01S$ega<`*sb?r zP0>T_H@G=E273nT>?FzxK=~g50ZDAnz@C1nWB-MHc!UD*u&vttfkOO&+1L{Ng+357 zvRe~C05yKu>jv`Q|E-IJ83fxPpW{Ph(84QytUdmLADsc;um5#=3EKU^rQ%D55m-Ol zqd;;67u??aTS)Zd1_5*tP`}}?`N#wCC>-yY{KQ}BLyZ5v6#>9CgmHfhgWb0i2#0SF z*$?UquKyZiRRDt%gI~xS{;m+_cV%HK9lTKyZoR#Iyv0HSc?JfoM>{j*dz2gdTAZoY zz_gIBy<&`Rdbz6MOT8xHmWao6#q z@3;-Q%ZR=K&ITkw3w^p+iDb*@z42MaK=K}O84Vv}8uBz`X_j{NMhy(pTETGqDz!4F zJ$Aj7YWl2>Pfm7i4+O#lE(x{tVijCEAW13VtA2Jb91}`47VE5_aX>Xy?}zGmY7qgO zHaK%UUHVcVtXyRb4#X7`0=|)X%XSdp)C9cE+`=*pa7K3Tt|((o)!_Wx@p!;%Cl}#x zSTkdoW?0!M(upy}TY(UK_*xvh0K|mdmgHeE*Dp3oBfBog&oul4Bm_dsBgfa%95E$- zc*rV~6c7pH>q$ag44dzW*~Iki_RIt$C@>;gQ0u6ZylcPZ`}~*uwN(Y^Q1VcOa{iIN z>YhW~gE+=8Dy-FbkuP#od;Rm5DhO9b`2#I!PVla5l^Zuq&Fy&Z|8@X=_cNm z)LzkiK1d=QQ~)3t$hXkl`w1sidJ2r(z>smq9P&;g7k<)Q$FBz2<4=jWO_fOYn9Xir z`S_mOJ-Y@n*eQ#XvwL@r7RhEe#`CR#X7#aC3a3_*NmCg?9=U(|Gn_@=NVX(6Ht1aS z(dZ5qm_}j>IQMvSSbNlWlg*BMf<46|k||WE$P z-ElDny2%(h6OpuzAZ)={+{h_duWM(TKr`8IGq2V2)Qvh-^VXyL^x>|<*ChQwt}RH1 zrk}|YuTx8{V|oHO3_=5KSO}E0f;E>R>bu}T3=4en&>M+xq$$q^`ZOolp@?&x+0aIy zWQ9Kg{WgPPtW^|=(s7ntd8QBYwat zE6LS8wzza5cd-UI)YbWJSo{&8#;#5H(tJ!%MQaH5@}0@)%8dLd@j(93B;ZPI0-NB=nsiXV zNu-SZ9WKUaPqXE{Dx9UrF7^)^3tH}~8p~jX(c~!HEV4weI(az-= zPQJz`gk=O_J+AR+G53{8v$B=&DZ#_qCTHZLnZ)ek(A3=cg(-aUcG_$cMY#j)05!fmfoKv^y@qv*|1@q=GU0m9JaTma`o9w z#L3;X^%DE9nIOqsgU`QH^*k%Jg)ob)ll|=PXlCCCFMBUe^mf|YwP@Ap{AJqO(wQJf zLJBuUYCV-SqP74VI+&y*C87$zq(G=n=icN;lEcLJn`7)fw&Bu^t?aK`R zg0WZc*8)@jo#_c?714%qZha7^%h%0Hjs!?jdfh6pjWIl|_e64gz20D|bZzg}-!9fY ztW(dmonxb34sW?+Oo1Txp7dsP2S}72N?YunMOs2F$f9B%xHF}sFH$upRe^nB0)fp{ zQ_g5Mm~j;K{0^Iwm4haSKY12WI2m@d&s`g?8P7csj^37S)+h6&rhkbBUX0VpAjrRN zop%;Ezj>z48Z8mr-c}vcZyO6A>Y{;ej*b71h;=G5+RMQzm?EIqgbM+R^k+Hh`MiZ@ zWPZnYk07%gyuQ88SD^05>0{rP=*2WEYu@xs`{595>6xB4SQ`)8rBJSYSLESXc5-5u z?oj1%=qW1zMSK#tcU!W-ct_!>G8G3KpOljltki&iUaEooC%yBTv7mjyNcV<5CQo~F z*w4+^0Q)vd-J|=yWIe5<(!MJ^v#JL34VNDZ{skc5f@GK9s3Dxo%`gLK&6A~i^f|1u zPXSloIuvyFl0W8)<%++t#4Eh=czO zk1euf^lM$4n(hKjM$bq%M%yawX*;rWBHAfLZ^HpGux42*hY=GC(mFy?LgTL_xl}QE zA`5sVv`pXR2Yb;y2P^H9Va)|zgMU^|jN z`(*z0y{K)+s4o1>5H1)gq5pPLok#e;56@FsA5bQgNP8n?&Tls>&9+S#bNSW6)X7SM z!2v6`WwBzuD!Y#~aHKUUm|hxGd9k5bRshm!Uih!Y%fWaGUyn5GOmyGUl`k3LpUuMK zy9HRj1ETld*(!(qMI2ev%D18y88ZvsLMZCSgB`9Dp{i*$tF0|Mkm4>oosZUj7*YOl zXueuS3G7nbk==JB5Gr|!2I>p32bWKbhlE?HjCfg$DjL9P1ojPgl<9HlE(i2wjSX`O z&|5;XxJS+{L`a1npNN3h;=pN#XC(~UFTpU2>-??ZYAG4O>4wpLft#S=sfPTVFF^9qb<}WJ z{TrPhNV_~KI1#-ciLvKifqBy=HgpuXUC+&tN0-H7Ygpjnm(=((D1J754vI8g!F$Ir z+hIvW8or%1om!!!huhvmV0`b-35i$rdbbozm$7qroy;{saN`lb^rN0H*Y?XWMZmK=zg->9qeP+Vv4`mli9RRh6b(|h6z$$sOTZ-% zCEJef4vdI0?1AAB@**fNXY6p2?XnC!?-{qpj8cWZeSnhV!8Cc=zWIvAN8jcnDFahN zG}e^0@Is*r)|*e_weqwO-zz8L99^=!`Hb+&7hC92QI*ghAP7|Z-QQdFZ42i3BCu&C zJoJUIaelRTgzW1>rJ`{?&DR<`)}^CyS?f_Z94eR*N}|kOdvdHnJ>j1fjce_&JPW2K*Ty0a@EUqeaQk84)`@tgiC5nit7~t;QLCPE>983ZzUWf( zJ5vk(M&1BCJ(+!$=}Y%gq!&|Cx{CO^gQ{EnoLjB&;3>Nqsko7Sd3~C;$G642gjy_- z@B1l1N&lD!-FXI=J-bN)uQ^rj@xMp2P5DD}8@9(cnY#S6JU9GNbdT0^NlM-Bzg0IV zZZ_KRp6F@kS*_VNB)_=S?;sJUp@BipJu}DT_(khXCNv8O;0||Bl&SKH@GUV<=q1F? zj;ou6@@*KFTml_%s!2Sbn=^VrZRc6KvhYg8da;Ym&e-StytFZIrlmlj5^OG$F5X6$ zurt7zu|lZOo8!nhq;LRxtd$CK*?$Wzh@z3n;XZXOH+X&?3a+D^W$X#`F1;I(a!-pj<;ygdCpF(y zW9*+347L1sW=0QxB_RBH4Xcu4 zqC7#cLq|~s4Vv^0&hp4EqUXsf_c>rxX*L&!&De!Ew>!?FxA*)D+3L(DI z|J@!imND4}Qm>0WqfRpIe`uyWn$hb8b)SiCrFt$nZ_{0hH2Q9(_d-pW5RdpW<_boq zTxp7I$C!j&al@%ZY(wLdi;EH>v zSm~9X`^-iOHEGflA31V%t+~7=iSduLXgf-Q%skqug;8{Xcghz&0X~@*G--9yH^sj% zZRGOA5S%T^GS}aKK|YTDY^#&gTpzd9j?TT@3CbhNSC|{;$a&TmPk!W3Zaj*SU(aq? z!7Z+`L)CTTH`%RfF#0~%gpLFBrBG^&+JQO6$sn%{6``+I&T(ZlgJQ_XisFi1%kGgl+LQzqqMmXqWDTrTNMtb zVn8=|{oE*QTcz2YHBFPGHEJAPrC*UvHnaIq%3|YA3isCJ5(;;sktuJayl1b|W1>#+ z6;n^q=VB>Ic4QlQ$(uGOTGcU8O1Xf%!NE?78R*dHp-@!NT5z*~Sg>Wb6jsD&+=}w0 z4{@!MziH;Qde=YgU&we_UCYk%R*{dFH3IErQ%RrQd_ona_T2)*1Q|@n2hL6ABHLO# za&Mq&b)zJwy6nNbHOwPZJc%gxcJJBD8>2QRn{ zN3kelfF?!&DM!DR;ybi@mFLTq9a)HMYj0w_W{ddKSe2H(dpXDA?foD_pC?!;!de_i zt{x{a>DQg2?g$R_nr)~qWk#Nqo?u{7<|J*vcLCbd<%M_%s>rOHQ=jj9>qcDEI(Utr z@Qz!@heaXbqGQ>{ptavGC!(^esPuCdrpDPZ0DzzM<5lN7?`h&)%nPc>!>1 zZa@5kkBAe_y9D6_m~D)z4M4gqsT&e%pY+d+w?H|%IbMJo|6OTWyH{9%2sf)rKNaeq zX+y7p0hY>B)1u^t3%`GUVLLI|O0uC&1F*K&TdpPsX%2(H;xJ4oIbIzddr@Qjy??Ek~qJI0CrxUG?y)@8daA_E8>qdpE9kYMS?#e0km2A~@#)L_d**7q!(anbQ>!r7HBz zRBK6e21}Akkh=Rl0;w!C9tPn-^L)johYef#ha4*m>ZF(lsDGR@N2SdpcBIs1>1TR7 z00epcNA^4@0^)2c{}wb3^^o+(7lsrze?C6hoZ%TfYSG>N6{4-3C`~~;E=S5?OD2>oAB?c$sZOrb z!Vwb_soX$6f89qyY3YlTNRf^8hO+agXfSJu5KYv*l(d7aH}!J!yc3gDAbXacttx?) zQ_7%`_honQRW5~nZtf64bq93t$W8>VLmv0IB%~h$HG3C+3+A`L)#qd%>OF*7rLNK$zK8F64&XVk8Wj=ZyX={?VsTbedvaS$z*?L!2 z@LUY~076%30ud+L%^Hnes2D@QmE)|-^SNUm zgKdZ|Ty<|td8clq;i^UbhMvKIA5H$@t1cHb5GG)J6}T|A7AM{kE6eQHE005?ub<6T zd)*JliLMS|tEW$*^E}P?xHpk!8Y1k7dh0O@b%tRz_2WvED}l@=yCTremPFGThm-V! zxYBG~`_oxqzAe}le}ms|=|VXTAIOs|zuzUTHL(s{q4|rH1;1E2UHqFvO4z8Wc0k+m zNnh(LOsPA}GqbSSK56vlK?FW#?{U%qtGzLfsQwvs^u}5x{DxU`hmUp((xz4Gsox#) zW!f_vl1J}nQ~Wc9k~zTDw(_bUlV$Ov+HD&#eYjyCP?E8;&jInST6*On@289*-uLyn zUP`ofW-^#vG)tHZeZ>2$bYP2&#GV9-(CvdWKeFut;1)U_`c7f>$|yT34E%fcb|W*Y zz+{j*&UbX{ozjvr8WqiASKFd}X1<#Zk?UpUT)g!&!N_|YFnWN96cxSI(xWyI|PYk_ZwO3Ih_GQXPTne!{`O5E)CVV9}pSwMoOi ziFbEO7!Y1Av1@wa7Q22~QeMNghfcdQu(mu-AZ2RV4ExnIbTl#vq`n);ztG6#Sv)gV zJsn=|J>!(b5V(q($}w_Blc`;v__?id=60}WcFV_|$=TNeqv8rQdsN|+kPlw(!4|e6 zjZ9Aznf+&KhSy;CcHOT2RVuWl%6@qvcrI~On%_0zQ-_e#hBxTh4+n9P;`SON%Vo7Z zx&g=^Bo~lR)6!}Dj`S(amDcq|TZd{z!^v|u7QRd16Z{{0l&>7wL00r7rZs#mY8Ywy5Ub0RlE+VN)_V{l-Y(T&lS6G@58t=JYr0 zF^NpQ=$AWH=x<8JQ(=_IHu8$|g?rvus`sZ2Pf(@IJ|uInU16g(jBDN7k^3sa&N(iQ zes=IPoLZ6v2!aumBuUg%#VTS^r@he&i?hnT2o&YB)n@h!vG6d(8|#DKa{RJk)4l^I zKaC1G_Lo0n5K&5nNTMl!cVpI&;di`6L(Hzn!!Rzahfi^dss_5B6lVs-Pja;;_{ug)eQDzn@=MOCqUOV??6jC%WZliXt0U)362(AWP0az z1_wI%2#spIYsPo$-#q4nba_|Kv=j&YkHDIvu9Q1daT=)&lWu~C%@#>MPE;JX?fK+z z8k{CzI=5e@pr(R{6AG5@IeSn;L2trZNgfJ29Rd8sSE6rnx#Hy^tj(k3Ue}xrIha7l z4@nh)5ihZ%XD%!}ZpQcIz`X2awtH&I-xtE^!XdMEPc(g?sI@S9De;O9;2+Fj;b8F_ zOHydJHcI+sB)@AGgIpId0mM@arhMnH+>|O=aI4U^U zB`=F`IoE{Lcu>>I{wZ>OJX8U#lf7q#M3dsZ*3Hwliq9|C#1tude4s??ODd5#k%P3J zWP753wV|2xFLieaC^vHWae3Z<)H$AY>+GQLDA9Aff=)pe%2KiG`a@HeIF zy1lH;B|I6I-#TOAtM4b7Qi*fZX2$qVVs+!rYVL6@2R&nTm6s#&pjFluBC6e_={wv< z`!ocB+X;Ocj|P7+l8aqZ(^aw*S?=xmJN0`A>6z9L+}I#K9ZN+1wOx9;-sf_zzYkig zP1H0!WNr?-zh08Mm(w)ZeuqACpcWjJ<3GatKKW17Z#m~g(k~NUa(m{F(7aT<)yJ)A zTV0W2LT@d5TE9%js~Gp`ga`%7(wdb#&?(?LD)reV`jr|BT44cevuf|qQ6h zrc&!hY|1~rP_&~JS7#xTy1y%i(LN2C)z~BtsJ`3LCC#k%;iS4QL<@4u>-x?{c_Q)h zgsJROiI^xpPI_K;gnVOJ7Cwcpe)@KM$ZSyodF7l1I#~;Xtsw*DhSt*;TYTW@H_?dt zTGu$(SE*WF)@B=*W?5omec*tF@L3hL-HXVxU&EL@`f<9hK6_CcUApn~2(GWw zPc|C0yIxIk!<=#d%|noSj05O#)D`Wl_{}4XEP(q-{nzmG-Ts;coL*fTMxRPocBS@^ z-eW&5;dsG*Cn@tOIcT|x+vW2uBHu=j_nJP4{G#;S(xy62T7nN5K7EhyX5JGDM`^#> zM&{jiQPV+52MK8qstyZ!$^+S}@URMn%cIk7>_?5j$+K;al@FeOWBHUpsg+qX4n`+G zL$m5uTJX&6NBe_@DX#Dq0!mEQM}42j;nnSx-I28fxuy{l+3Hr=WElZ1eY^SR>IiSy z@{kvL-*rcRFFiyaRe}toOaHH=~y@PqTF>}iHMXY*armM)w^c(9j?h;|LRSJ zq_C%EKkC?6|0R@d0?z>yf`gI;Wf7pBzGCV)$xRP!#hwO=x;07{61SH+&$3WMGJ*PJ z4?RZC?q$*Hh@roI&lx}3`av+`&2DCy;6eRB$;kHI2R=LUj#tv&UrVShUx(5xWeadV zFE@L2@B4f<37T!%fdJ5dAl?y5(tDzwbq_10MiGiZnX3Xhjy{N*Yws=sYN=o=rT-&r zm zZ#eaM63y7R&60pSBc7jRImm4>#g}GhekI-s6llljdW@aS3VW~NL6aS@1{CTXNQ4gkIVTDY5Lm#H!wVSwup~ePR%Q_a(0~F&Ne7IOfPer50Run% zxrr2!06ZhOx3LNa;T1%Q4C+X7oFvTYQMOm%VksYAsROX5fC2XU>D_MKIRwTa!-9kf zAp*caY{8v}R6>Ms0KFtAaKc_b)S>Joxmh+ffj}Pa?)JkuIvqAtHI z%4ZVthe z6Mv^cffK)iH-;7f5CcDpiiQpWGN1uYAzOjJrRr}_U_Na@euEfczI<{L=K!!_AT#i{ z!NT5&?;S%sh5;;6uxIAa{IP$0i2#EER~;y@3_zPhi6wp};{pxY{O2+biudyXkO5;} z1^@!}`+s{f4UbJ@ML4{^kAA%S2zZHwX=Pda_+x&JlT$^#fP4ZB5CQJ`ATR*XMtDX> zKmq-JCxbrgQx5>Yx`W7&icK*B zKJhQIp&}pz7~$`JUxB{h-uo|p)YE>`pMJXuMcKv2_snkgUVevR9D}&MzE1)Yo3OE( z(Xb?DoCtolo+1CvQ&|tx9N5YI+Nuf)Gh7h{H;hW)K%kwXBm4vp>!KdyF<9GjU|GG2 zOZdkFDGwvY1y~a69`yHdWKaP9OZ_x#gS!lPuj()Yd)WdRrdjE~q9Q1exYFMuryzm~ zC{RG4M*@tQ01*`d;4_%X(i-ORzrqGM6eswTfdCLzj6VPtN%YRDMhF0L7t>X{;YXy86CS;iQ)UL(A`{VobEgCS$W2lgn zNlEe~SmEnxvGgj1Nd~{4ZWuIrl2(2%wv$L?QFn31=XM#ADRo5Gl(u@(4z|?7-wTDG z{a|{y^UHT=xtWLCl2DZsKBjc8m&yy$ch3L2X$NiGD#o-K7gfruTgoG`VAyGC8D#Fn zelfW`&(H{07|VVInoe7XJ@)DjAQw<%kMg%t=3FClI}JC-g4iw2^eG^bQWOVuieyJt z|K$FhRE~w`My8E2>|on^g@n%7z^{+KyPRA0{H;{WJk-LIgB3Wooyz;hz=$5$R;bOY zml`d-W`ey_o$|gN6DoJ86U`yFihOWp2FFns21qWz$m~-P(}To~Nb0KW(!$nwX+42J zJ)h)L-kv__EJtaWJxk$(QVRh`A|InC2fOP;l_z1LY1ipVrSfI^&?V|qVjpfKf|y*Y z{`GEkUk|)pXE(^OWxDVIR<<^}qBQm;#^X$k@+|^0dk!1%o$*Msm-E?*Ngwx*r-sa+ zcz69(nG?)>MTOnujvEh?PX3$<*XG6mTvpOEhyoch(=E^BP<=;qX@g-?jiF;s3y-g^gqMq9S=u zwPp?nxFg4H*hz<*zgVnT)5T;iap&$IxhEL)DJ=M=mk~q}+Df)sCQOvP-DAa@`ggxe zAW1_h|Mjq~dxNYDTDrc8A6&WTX(zY)a?BLmXZOxt#YG?s|FH;hjwYj{^v>23*~L^~ zi?79D_TaD^CFNL7VT)ZC3PoRZ>J!D=DJDVN=@sd2@8}al%wj!A-W>K?y*zdg8vkDd zP*@Zgfmd=( zbP?+VN-N~XXH{6Bv^=)#1qY|3m-xQuG!dgFh267)`{>cS?zwYEcCujZ{v>|G4oFH^r^3guWlpxDJ>2m4Vl6B#YddVo^N_|Fk zQL^)3JVADraI=IXxVC(!Sj-ykU-zn460DXxS&_|R z*1=IcotaN^c;4?c(kQ2^z-p;@W)zan8RwZof2Utlb4 z;K9-9R4jK~`vuT+Cd_dVZ>?@8PMBK}z$!HFC5+8D@;6E8j$*5Ud6t0+1tyQVpaLyF z(rHT5=1Bja`3H|SYBx!~ZA>cpzN$XLBRhq|?6h?XmB`Cp;ps8r}uN-=om)iG6}wmgjhsNqfNc$=5L_-41Dt?Hc~0Q@@2L;M(!3# z&1A@sQ0f&kT?5L8#IN->wqDL?=iA$??8y5sWlZF-Dn3~GJbF5OoD!~z=oI|$;JS8N z^7KO~FIJ<`{LQ6aP?!;-x2poG!YBr<+FU-dX8&ph(jnO{9ep`s@|Mhj4qjcGf7qvU>}$cHc%_o-l#L1XudfNGyLPWc?GC>`B%`cr|{{ z{!T0ml4B9HCbS!p_hdqR_;T@Vtq}DzJumQ4 zizG;iY5a=6cC!5EX_b^{VdMw%8=o`kZ&Fbb=ueJ+90J3Ey_HXlH=z9$ok8HBZQ9gy zi|wWGjJa*Ua6{5~7}wXXYFNYE?|Gz1PhxaR=x6aI%;sO<6;;HNpW&{!?Idt)k=e=D z+EYF|MeKRlS!l+rC8L>6EUM@Z93`I`otXK?&2*8qpq9eolxyF>9Wi1hqJ0na8B6q; zomw;OKgA;KRn0)#P*J!t`!(0J40Dnk02^NvCs%4;HBC5BhLW+F1G6R0=Qyd#Gih`4 zZ*vqa_=iT+d{12okx^_{%%z^IClDGjfOH%W0KP()_$m&Ko0al1?lI=c^tP*M|3$rZ zTTSWse$zfE$tm(VNRPmdRS>t?laRGP)OoJdUA6F5Gmt$?5wCs&r#VaDx% zW7Q4HIXo8c;5V-zxZ9N?HZ`PYi{_f82l@eN@38K6;gRD;+o8tsVRWl&oj%>oS2Yo3 zQJS<5Q;LQNz_vOv(UB{(OLg9FLuu@h`wU{MD2Jl9> z$lQlWPbZ|6aoS_5X#W)lWM-GXm|kEP2@ZC^zP)PVLZs9-4?txV@?O#2bb{R5n7iDhnv~<7Q=uc$ApN%~<1Ei7XTGKpWQkn@|6Q9J2GStvVAkg1SC zO?k{O&v1JaiVoSUpJX}k?sg0LH zv}ZFyiEZc!NDmGhxN~{(uhM?U*|ylt^b3Ajz5ovWFdli*k?NQwXtXm0*CH0IagTv< z&UF=A;Q`^H>#c0IqosnJonI*IAEUttpC+%3+TVPi=H1bJ&e;PH;C!aEQNUkQqtoQW zqHBudTEa+DK`!8WjEnXsmvR1E+89iL@9K=x zpb&G(s`&PYH;!Bg*2|$Z?;|Um;XXtEv@BXLj0VETh%?1}Vebc=(o34FF=@Bmm*Ccd zw3W8K2hw^`&fQ!4!)wRkSp}A5#@$51pZmDt-0>u0zOh}YA8^iEX*n@~kGu8batt^p zXSNp{+3N`n^6@zMdw8boKg(wem{hPO$wovzb&K+~U2w~(2P=Lm^Eay>PYrhQ<_2}n zQ@HamX(_)i4@twH;i4K|)?eA+@_uMVQ%&_mRwVsPa8wD1?$Q;~oqw*Uk?X5z@}OEu zR6xJClZ>w>^&a!CZFgvLk+6B;A~LU09Q0RASsHCBOk>|Y*|uhpr`ISByzl>_260lL z%BsvBW1X=^LDRjs6W^~)``#~C#6#G1vL3}M{e5awS7Pul!ui&siRd0!rPnj|F-w{Q z(9j+H{NCOSH7l7i;jUWpN#SwgN;3M#D_D+1<@Ztr_}ta`8=>?lEmeX5GUu5z;noeR+ku}2MWR}#6YOl4#rur|mEz+mB(_qmb&hAMXyx%X&&CUJrQ@xH?`GCB)Z zTZfK}@D%_uBiowyBqr^b8*m;iatQ1fkZTL)%MlAnjT9Ma7x_Qr@6Z^X3>H1MO+a-& z7wdMFbc!98s~2&R`e2tmq&e!{Z!T$`v7G4P>IgRze!K8-l` z#{%-)V!RnFZwCvXfu=PKzo|2+altt!rVPE$NiTmihc_s6PX6L8;|~PZP?Z`MH-WSL z+tWdGgOaW99BHvT-=buVhGOB4tzOOf#qv7v4K@lsOFwtm|sQv3|a7USQGipnR4@2Ql^<%?a@U!#HkN z1=&;;GV<&{ggcF}&bpwK7E#9@->B~U2TydB>XhL$F#*dCp+}gQp4C-vMl8;EwUFs; zv)!AdCAz53pG&1}`t^(A@H^h_?~q9p$-`W7Q54I1+vQm-7~e+@E?DEb!MMi}I@Xas zw7K<&Nyyf7a!YeAb+a?8m>%z{ZMDob5lqWQEJ@ChwTz@iB%r^f<@Jf}69y$L-cuLj zUq)MJ@cIrfk{t8Q8F~D~AF_1e9fwwRZh-Dl3P^VK99ofq$-lr1P;t}3@X-s%F@k93 ztQeeqGfvF+y@Ubg_L~oyO+5GFawR2_tbA%?H^ffy#(8x2a|OaCZdc}0{|s5x?DBX` z@w6*v-SS6ojqM+?X7H%quE3X59rN{YXEDT-c4}jrmisMqHSr{v4_tQ_WyrZwyXhJMWy5(8DpzZQnl8cvXr;n=%JR2+=ErjexFapGhDlB9Mx}g zlj@uOOV_LU6I}OERF{{Tp$Joc`@3)uMG@kMD69Oa9R6OLBok~3?k*J0mCi(`p6vHLa9$6r5zOVoAI36a8NnNy z19|r>W4n=^lZyPfE9``d@i_fzZJU&33 zBzDk57uX9=A5;<^bUFsz)SMSB)K<;G%}XQgb%fR-wHX;v6OWo^aMRzHPwAIP7k0WN zpaY|r`WF$6QDDm^g(hp=e^JdzdabRo+K7}Yi*U-FG$?yBx!Nnu(1@zdxsm0xX9JoSsC9JL_%~YRr zjm9(PJ1h5W&l8AmGB6EXhYSpJ7Ks4b*uYA<<5wZpX_S{HpnfI;b3~H6^1Lx8q3te` zwkt;=K58g?C$)Zp%_EehigU*y$h&4rJYPdF;x&4l93U;Mf5a80V5I55OP7U_lTg$Y zIm(7CKrbw{PzS~C-?4n1vgMWaiI*kkkCSdwe zqbHzok9!QJ^`_WYZS^gedxkf(-}Xe2iN#KtvQG9X_B-?Z){C(57b#eKcz*6->X9q0n6DERdY70 zxKd1=31=kd*S%xU8uIg!nV}fmy#3P}4S2vxki30HyEf(}9zv8(=b*19W2u%{oiBhY zM9h_gb|*;`GwI-7S62{schzS2P{naJZzYmaMKFJX5HwkY_EqgkY=`r`)}<3Yo}}8{ zNo!(Gd9%N%jr!MGd{FK;?c@HUzC8j*TSbxgWA_<*=&@N)*OiGFH_|H% zJtV8I&ol>{wqHGixp~9h&&4DxDK?)^GXq#H{fG`be1v6xT3d=gljDWXY-Z7M>ZhO? z?SRNf)F-{x=i<+dd%GuAF0Z4aNnBU8RIVm8CCvD*d}r>Y_1ZopW$uhS%T8xg4LCx!2m!l3+UNL!~YG@o}KJvhuT!yi&NLEW8eRp>fHf z7XyEL4_7pv?6&RQKhjH^+Gl0*UFwj(rbDR(-s0kOZP~eZN$_O@Y);m`w<9`hL|T5W zgY-{+dUiKQ?!n9OD@fo_4&8p9I%~6QxSh=7RbIE$msGf^1$F=rL90i{E2Sn?6as>e zS2=ILDzliC%o)Lb>=H(T9n;|RJ%_G+uoD|tsAn zJF>Pn%kK#}=jk6SkAP;4AKbmcwa2dDo-8Oev-I;lnYV5n&vjBqcEnTlWY4$CGo@bH zQJS=RYZ?}Qqa(lQ3e{{^%J9D9n{(Bs5l^k76UJVyS>YNlp8O2GY11b{Zt>h`gO%ns zExY1@p+~i0xFiMlAvd_xJ|7zg5G~p>yc~S%^r!8aO-{?O6hGSjW`1DjM2s7=Sn37 zRJ@N#5^m-6aI1bVojBK@?ai$-3xgCNH4GQY_E946Pf6ER;Ef)>W7mO@XIhVuut?9_ z{SH!l^`Dthoi^R(%PDTw+b7I%sfgyiriLAb(M+^5Nxa;!DDG7`L;d6On~kd8Bpl6$ zO`mr*h4wVvTO`Fu(Yo(dipIYQaIFhIe_Op7a+#D2#L}BZJhH2_rx?B_@GDkxD%Q=? ztt|1xv2H@TTr5ynXo+FK{m>yLzAG|TFWd|@9~x6l$GOTnSoTZj*Gf2ywd~><2Iw42 zVT+g;6~#rLDB-q;Qmw`T(!FnJ>e^nIqM=ta`=q?@deLCJ(-#QTh)`1ndLuP+cPn8GW?Gn5R{=gR6f!89l3HM zZ#a2HFY=wTqCBxYPpo;yx>+QoOtUwP*_gfKGkM2IrXE9FsFb91dsUT4a)e5YNJ8+2 zt|cfgQXHsm#&?+Uc;~+Ry!+q#cz=Jj`de3j+gI%|>F=JHOdlHsGJ*>`D079Hf$;R) z@LaG8{$!wF#R@X9y@)v=^4O7M)`yldfe6?339L&SLXg|QA=CjGQ8=VV1LQ)UpQ=5b zsIg!fELfx+3CLK1;uJ81!1GWHpb8sePOjr6QK9iwr;?5i-<$QGsrVMI0efQ>hjzC=fKp1W5+lq-Owt20;U&rwuML zWWcMW3{pwJ#48pj#00h@B*t>}FaXvClrRv$5QTsjGZBfzJs46FQ~}eYyEkY7{WHRR zRSbZ{u!bv8K1aS7D^iU_P{D<{_sGxyDwG0&1p*QQ30}c*hr)}?27oxJ4oHv)L%dhO z2#F{V04ng~0^#_}3P6Mrcb|M;5&@E~Xc8eYnphJ zMYUg>GGIlnYf8o;A|}BAiGaYeFChYe>|X+bi2??!LlsU;+~?xsOZ?{|ApXt)0Ro(m z;=OHS{Q_p^laD zg)RRhV>~&(p`P81%>To9(^~9BLUE!?vn4@xzRiq8d)A7IB9jKfhJ8be-F9@}Ica7+VNUn-`I4&z-jY<>vS+a~1UEx9p1?OGhXPc<&zTEf;T^IHZU5 zJUx^QM~brq2Jd;&xe54MGU4`gOLK)J&CNVdl(<;qR=%ZHN{-6fn;Y@Lo)+J<+ZPAD zwzdh7`LN<=*4ac_$Qygj*^TTg7ReLBIoU1fl*kj`QuEC^6}FwOY^#j+3}xS25u82~ z`?E`mv-0_$2iS#?M$R?<4|9A(R=bfYVBaEMnos$xwI>|tUg3DI7ZZ}O;&EC2wN3bw z>#y%PKQ%E!dTWX^)y5Z7spXG%U`SkGZFz6?4f}cv-utWq`y^zO~&mdi)A_9{`M~`Y84b`*}HeW@ry2 z5oXS3pX#N5ElS;rnJ$_%OG<>PA^I9ybPdjL+0Oq_rA-QHo@`RHoqF5ZhAk%VdgsKv+(zD zExWBxg1$e&hpf6faZ{-NI2>nw-$M&C$LF<67>sZNoB!#Cn5a(lnfy#r z-XiZUE=}qSY?p60@QpE9&Xb_-15K9;-$wqNUA5%?(qDiv+(43 zzpKt0Yu2qiXhDWE_YS~2W&W}rILzjhm!Jb^Xj;h-sv)wRI>8Fy+ zaD9@!wnb+5CUzVs4-)VCnF(6V87y|rv_yNeU1f8!F{*Su>{D{DR7a&)eGP1Zx)o=E zLGwBt8#@K*hq2dF$#e_Mp*Zd1_iMa*&S%|M^csEb&y(a(0(SaW?)^4f(j6A&L67Zg z3Kg7X_nzsD(T0zTGe&{c6X_*W3|Io4%(= zlXag<*zISvNXD;_*5-WIBCfcYy74LP&nIBJX8CTdoS|sHx)Z1U3uy3eaQgoxLF|nG zjRYw;*_*f;n>rCtIGC8Jm}<~6(y=hmF|trX(F;478oJp3d-Cu0e_dMI+ld&um=aKk za4|73vNAF;GyXesFtYr!r6gxyApbW`*52g*Z-|PMp@V~|2?4#Bp^dXC6up9qs0N*w ztBsA3p`G3T#-d{J&-LKn@&90=Ay6}Qa{l*e0_K0n7+E=)*l3yPSpN&^{{cuKW9eo} z@DCF+C4serp^JsHsUd;e|N9yn9U~nhB@`bYfvKH|y^$3ZBkO;g0u@gOQv!NeuT@K18G9VbZ>@fP+?mSM?Tk8c2!YyWr&Q?F zx`L(d#=44@DxC#ZMbJZPo2re4R!vgzoI>T;W}afIQec{;mzX-HGG(~CG)Hn}O!0Hx zE~H@0(}==iYGkTvRi#XoO+mRWm3hvkW)UW~vZk(Qsg9}a3GSbkx(7`?F3zchxLSNg zf4d65v4#pQWkr)zwY;~W&l$&F@c3_5!1wAlt9GyQTYJ6CT{vy&%af-P+Frb${kSo8 zqyM|Svp^i;ub z)-PQ?+9b0#)h|N*9vA|>;VF|6gZe|urD2dDFU<;LEK@_RUhbJX;HtA*I}92aV!xiO z)Zba*k6f9-pe0dy^d=4pfxMo$jykx$rF**K7}^~@po|Z=U8VaU_>@iJLEINEnw4s^i8a9;-X@W5^W*?;=bURx~-ruGM@pO$wXBfKXkd;<>~&FwO|^I8I0Ecwg;%CXu4Au$!+3{3C$>yNziJ{#v_GSQJl>(hjB9| zHv@5@Ii^Iz6d`o^3M}8rCc}7x1t-<2lAH$Ol#-6BB_wPO65XRhB;k`#j{{gzp|Nul zgEYP&GGzlI8<VJ=hP%%jSYI9xW8gM1x0k&V;1@@;n&$QaMaH!Y}i@?wx-W`_uR86vpO3Wf_|+N3?j4zw>Ro$1Da^36?Fc!Flj zlx8UfdRW4E)b-BVh?FojbNN$4rsN8_O`~YPVO{F>Qc zrScPG@T2@SLoYeJR}vja8jR~UC`5*)n%E*;i+JyJmnw7)BTbVz^J$pj(1HVD1vLpq zDwq^eFlDlCsJ;7hg_v!)jo4E&_cK#>fvNq|UCds&m@7Hz>1ph8t13a$)2t%2@$*G+ zQ?Uc(P;*mbBP)s&hxv2Ohdg8qd|*-fsOajEPDLrPFugm4$iPLwvuChCRwo81$g~UO zaG~@o5LIz_Li$oNGaZC`?Kp&#U$djBmk&*5LEMQ6)R2n5_ zIv9m6Cn*IANQGB69d0f;+B|8)Jb)dg{rf~`9P6!u4j}|6#YfiQ6HkH~6iU4;AYYhA z-C#M&*d$`}PC23y8gCxWtFfYh#P{>u`}tl-1uvxO{I%J$sSelm@YV{uYa_I=Q+9+u zO|e|l?aX<^l-N8=l3CUUwvU!#RWeO-ZOa3;r7c57c`3d1k$uV&@vlVBh*CZ-V+^-2^E@Ia#E$LfoRs#8J683tN-+9#r7m_}`De7pC zrg5Tuc>(o;Nts)Y4b9Btk^>Qm zSb3pIs>L1$sb8`%Uxa(6Gk23yG1<`sQ3A}{8KJiUS!471IBOEnVd>h@>;;Wuj;K7= z7bB?~J~`?R`O1029C1{h*FI@6m~L{&olZwDMlx;GwqX!5uyr`CruSWM0u&xn7;gfR zR7<85yb5s#AGilxcNY`Qm5$#ssMw~cvv4EKv2Z%6RWX3-F~<-UG9Em}Co7$}?-V6i zeQ}Awb(u%fI+5wT9FG;Gk~MA9c!M_Y1^R82lR-yDa(dL?IyI&nAhRb}^|X!;I7c-4 z>T<`9?p(b=Bch5WYU<41!lE-Y!>&uib%Se!ne58u1v`EG9Q{1stf zVc*S?rlw*Uw@a!pCxxzhd+w&R*I+F`mDx- z|48yymj9;8^5P>~fs>OT35kLLVph@!XIrb6LostFMzhtS30OBYzV`lu=aQf5Y&Np%Q2{q5%YKB= z$^ENsiF72X?{*BEZ+UJ!+6(96zYoU-pRa4&HS?^NYZ{W>lwb^mK< zr|y2()iy{(`Rm261vmV%U+q_TwfTyA#)MBxN)30Ytc;D}U*5d>;r3sf>P_BUpZ-=- zTtLV}#BC{pIo2zSqLrqC-^BgKj1GX z)2k=`TC+6LV&?Jbv0u(tnxU#@-ZeRYzMRavKc6nY+x_F`JCj{rcUaYx)(PWh^cjs^U*D*X>wo=htyvN-&*!)7x$vJYWyz23PL6A@cp+q-*;z41c>CFBN37dd^1Z#X zv^S~r@+um6d99^2RUPAiZP1d)qhu+%mSrdF#=kho0ZE1I<_N~1+ z=UY+bMa8{IS=+W0yZo7X zRAB#`tGEO>sF2H5aZC1$wVzX=$dQLL_bVxBXvBz!9#!u@?VRty(I^T$3{&Vvb0@F% zf~`wRWw#tV#hQ?x>fAIlbKjqHp63`Fd>KB8EC|@@s`KvX+!tNbGCG#M=-Bnbu!ntL ztw^+zz|7Y2?MkZKI#;}ijd~-nHN0PPqxX^da`H#!o#>i#L!4bnpX*V8Oy?qvPLC;_ zDX+qhA9-@L)alH?0P*{?G;`uf~T{qfG#Ui;R@ia&8bSL#`3yF2Si{_)bI z0=%);c|6aTDhss*%JcYZ|6x}6AvFs#(uzwGi%KerfYD@VYG4GMQ&LrR^>^a}069qq Ag8%>k diff --git a/forLater/robnotes.tex b/forLater/robnotes.tex deleted file mode 100644 index dd6ff47b..00000000 --- a/forLater/robnotes.tex +++ /dev/null @@ -1,287 +0,0 @@ -\documentclass{article} - -\usepackage[round]{natbib} -\usepackage{amsmath,amssymb,amsthm,bm,enumerate,mathrsfs,mathtools} -\usepackage{latexsym,color,verbatim,multirow} -\usepackage{graphicx} -\usepackage{caption} -\usepackage{subcaption} - - -\def\be {{\bf 1}} - - -\newcommand{\real}{\mathbb{R}} -\begin{document} -\title{Post-selection inference for generalized regression} -\author{Jonathan Taylor and Robert Tibshirani} - -\maketitle - -\begin{abstract} -\end{abstract} - - - - -\section{Introduction} - -\begin{itemize} -\item Data $(x_i, y_i), i=1,2,\ldots N$ with $x_i=(x_{i1},x_{i2},\ldots x_{ip})$. -Let $X= \{x_{ij}\}$ be the data matrix. - - -\item Generalized regression model with linear predictor $\eta=\beta_0+X\beta$ and log-likelihood -$\ell(\beta_0,\beta)$. Consider the objective function -\begin{eqnarray} -J(\beta_0,\beta)=-\ell(\beta_0,\beta) +\lambda\cdot \sum_1^p |\beta_j| -\label{eqn:obj}) -\end{eqnarray} -\item Let $\hat\beta_0, \hat\beta_1$ be the minimizers of $J(\beta_0, \beta)$. We wish to carry out post-selection inference -for any functional $\gamma^T\beta$. -\item Leading example: logistic regression. $\pi=E(Y|x)$; $\log \pi/(1-\pi)=\beta_0+X\beta$. -$\ell(\beta_0,\beta)=\sum [y_i \log(\pi_i)+(1-y_i)\log(1-\pi_i)]$. - -\item Background: Gaussian case. Selected model $M$ with sign vector $s$, -the KKT conditions state that $\{\hat M,\hat s \} = (M,s)$ if and only if there exists $\beta$ and $u$ satisfying -\begin{eqnarray} -X_M^(X_M^T\beta- y) +\lambda s)+\lambda s&=&0 \cr -X_{-M}^T(X_M^T \beta-y)+ \lambda s)+\lambda u&=&0 \cr -{\rm sign}(\beta)&=&s \cr -||u||_\infty &<& 1 -\end{eqnarray} -This allows us to write the set of response $y$ that yield the same $M$ and $s$ in the polyhedral form -\begin{equation} -\Bigl\{ \begin{pmatrix} A_0(M,s) \cr - A_1(M,s) - \end{pmatrix} - y < - \begin{pmatrix} b_0(M,s) \cr - b_1(M,s) - \end{pmatrix} - \Bigr\} - \end{equation} - - - - -\item A convenient strategy for minimizing (\ref{eqn:obj}) to express the usual -Newton-Raphson update as an iterative reweighted least squares (IRLS) step, and then -replace the weighted least squares step by a constrained weighted least squares -procedure. - -We define $u= \partial\ell/\partial\eta$, -$W=-\partial^2 l/\partial\eta\eta^T$ and $z=\eta+W^{-1}u$ -Then a one-term Taylor series expansion for $\ell(\beta)$ has the form -\begin{eqnarray} -(z-\eta)^T W(z-\eta) -\label{taylor} -\end{eqnarray} -Hence to minimize (\ref{eqn:obj}) we use the following - procedure: -\begin{enumerate} -\item Fix $s$ and initialize $\hat\beta=0$ -\item Compute $\eta, W$ and $z$ based on the current value of $\hat\beta$ -\item Minimize $(z-\beta_0-X\beta)^TW (z-\beta_0-X\beta)+\lambda\cdot\sum|\beta_j|$ -\item Repeat steps (2) and (3) until $\hat\beta_0, \hat\beta$ don't change. -\end{enumerate} - -\item KKT -$$ -X_M^T W(z- \beta_0-X_M^T\beta)+\lambda s=0$$ -\item -$$\hat\beta=(X_M^TW X_M)^{-1}(X_M^TW z-\lambda s) \;(active) $$ -$$-X_{-M}^T W (z-X_M\beta)+\lambda u =0, ||u||_\infty <1 \;(inactive) $$ -\begin{eqnarray} -u&=&X_{-M}^T W P_MW^{-1}(X_M^T)^+s+ \frac{1}{\lambda} X_{-M}^T W(I-P_M)z -\end{eqnarray} - -\item For active variables, -${\rm diag(s)}\beta>0$ implies -$D(X_M^TXW_M)^{-1}(X_m^TW z-\lambda s)>0$. -where $D={\rm diag(}s)$. - -Hence -$A_1=-D(X_M^TWX_M)^{-1}X_M^TW, b_1=-D(X_M^TWX_M)^{-1}\lambda s$ - -For inactive variables, -$A_0=\frac{1}{\lambda} -\begin{pmatrix} - X_{-M}^T W) \\ - -X_{-M}^T W - \end{pmatrix} - $, $b_0= \begin{pmatrix} - \be+X_{-M}^TWX_M\hat\beta/\lambda \\ - \be-X_{-M}^TWX_M\hat\beta/\lambda - \end{pmatrix} - $ - - Finally, let $A=\begin{pmatrix} A_1\\ A_0 \end{pmatrix} -, b=(b_1,b_0)$ - -\item Idea: take $z\sim N(\mu,W^{-1})$ and apply polyhedral lemma to region $A z \leq b$. -Potential problem: $A,b, z$ depend on $\hat\beta$; and region $A z \leq b$ does not correspond to set -\item Logistic regression: KKT -$$z=X\beta+\frac{y-\hat p}{\hat p(1-\hat p)}$$ -$$ -X_M^T W(z- X_M^T\beta)+\lambda s=0$$ -\item -$$\hat\beta=(X_M^TW X_M)^{-1}(X_M^TW z-\lambda s) \;(active) $$ -$$-X_{-M}^T W (z-X_M\beta)+\lambda u =0, ||u||_\infty <1 \;(inactive) $$ -\begin{eqnarray} -u&=&X_{-M}^T W P_MW^{-1}(X_M^T)^+s+ \frac{1}{\lambda} X_{-M}^T W(I-P_M)z -\end{eqnarray} - -\item For active variables, -${\rm diag(s)}\beta>0$ implies -$D(X_M^TXW_M)^{-1}(X_m^TW z-\lambda s)>0$. -where $D={\rm diag(}s)$. - -Hence -$A_1=-D(X_M^TWX_M)^{-1}X_M^TW, b_1=-D(X_M^TWX_M)^{-1}\lambda s$ - -For inactive variables, -$A_0=\frac{1}{\lambda} -\begin{pmatrix} - X_{-M}^T W) \\ - -X_{-M}^T W - \end{pmatrix} - $, $b_0= \begin{pmatrix} - \be+X_{-M}^TWX_M\hat\beta/\lambda \\ - \be-X_{-M}^TWX_M\hat\beta/\lambda - \end{pmatrix} - $ - - Finally, let $A=\begin{pmatrix} A_1\\ A_0 \end{pmatrix} -, b=(b_1,b_0)$ - -\item Idea: take $z\sim N(\mu,W^{-1})$ and apply polyhedral lemma to region $A z \leq b$ -\end{itemize} - - -\section{Jon's notes} -We are conditioning on the active set and signs. -Let $\hat{\beta}=\hat{\beta}_{\lambda}$ be the LASSO solution. We are going to -fix the model $M$ and signs $s_M$. So, it is a function of -$M, X_M^Ty, X_M, s_M$. -Also, let -$$ -\begin{aligned} -\hat{\pi} &= \pi(X\hat{\beta}_{\lambda}) \\ -W &= \text{diag}(\hat{\pi}(1-\hat{\pi})) -\end{aligned} -$$ -Let -$$ -z = X_M\hat{\beta} + \frac{y - \hat{\pi}}{\hat{\pi}(1 - \hat{\pi})} -$$ - -The KKT conditions can then be written as -$$ -X^T(y - \hat{\pi}) = -XW(z - X_M\hat{\beta}) = \lambda u -$$ -where $u \in \partial (\| \cdot \|_1)(\hat{\beta})$ so -$$ -u_M = s_M, \quad \|u_{-M}\|_{\infty} < 1. -$$ - -By construction, we have that -$$ -\bar{\beta}=(X_M^TWX_M)^{-1}(X_M^TWz) = \hat{\beta} + \lambda (X_M^TWX_M)^{-1} s_M. -$$ - -This is, up to some remainder, the unpenalized logistic regression estimator. -The remainder, after rescaling, goes to 0 in probability ($p$ fixed) before selection. So, under suitable assumptions about the selective likelihood ratio, -so Lemma 1 of randomized response paper applies, and you can -use this for inference about $\beta_M$. - -Let's look at the inactive block. By construction, -$$ -\begin{aligned} -X_{-M}^TW(z - X_M\hat{\beta}) &= X_{-M}^T(y - \hat{\pi}) \\ -& \approx X_{-M}^T(y - \pi) - X_{-M}^TWX_M(\hat{\beta} - \beta_M) \\ -&= X_{-M}^T(y - \pi) - X_{-M}^TWX_M(\bar{\beta} - \beta_M) + X_{-M}^TWX_M (X_M^TWX_M)^{-1}s_M\\ -\end{aligned} -$$ -with the remainder also going to 0 in probability after appropriate rescaling. - -So, while $z$ is not normally distributed, i.e. -the KKT conditions are -an affine function of $z$ and the affine functionals are such that, -they are asymptotically normally distributed. Further, the variances -from Rob's normal approximation work as plugins variance estimators -(Section 4.3 of http://arxiv.org/pdf/1507.06739v3.pdf) under the -{\bf selected model.} - -Since our variance calculations only hold under the selected model, we might be losing some power using -polyhedral lemma. - -\subsection{Selected is the same as full?} - -\newcommand{\E}{E} - -An asymptotic variance calculation under pairs model $(y_i,X_i) \overset{IID}{\sim} F$: -$$ -\text{Cov}_F \left(X_{-M}^T(y-\pi) - \E_F((X_{-M}^TWX_M))\E((X_M^TWX_M))^{-1}X_M^T(y-\pi)), -\E_F((X_M^TWX_M))^{-1}X_M^T(y-\pi)\right) = 0 -$$ -yields that the -randomness in the inactive block is (asymptotically) independent -of $\bar{\beta}$. This assumes -that the selected model is correct, or, more precisely that $\hat{\pi}$ is a -good estimate of $P_F(y=1|X)$ so that -$$ -\frac{1}{n} X^TWX \approx \text{Cov}_F((y-P_F(y=1|X)) \cdot X) -$$ -($X$ on the RHS should be thought of as a random vector). -This might not be true if link is misspecified or selected model is poor... - -I think then the inactive blocks are not needed. - -\section{Current favorite version} - -$$ -\hat{\beta} = \hat{\beta}_{\lambda} = \text{argmin}_{\beta} \ell(\beta) + \lambda \|\beta\|_1 -$$ - -$$ -M = \{j: \hat{\beta} \neq 0\}, s_M = \text{sign}(\hat{\beta}[M])$$ - -$$ -\begin{aligned} -\bar{\beta}_M &= \hat{\beta}[M] - \left(\nabla^2 \ell(\hat{\beta})[M,M]\right)^{-1} \nabla \ell(\hat{\beta})_M \\ -&= \hat{\beta}_M + \lambda \left(\nabla^2 \ell(\hat{\beta})[M,M]\right)^{-1} s_M \\ -&= \hat{\beta}_M + \lambda \ell_M(\hat{\beta}_M)^{-1} s_M -\end{aligned} -$$ -where $\ell_M: \mathbb{R}^M \rightarrow \mathbb{R}$ is the objective funtions -of the selected model and -$$ -\nabla \ell^2(\hat{\beta})[M,M] = \frac{\partial^2}{\partial \beta_i \partial \beta_j} \ell(\beta) \biggl|_{\hat{\beta}}, \qquad i,j \in M -$$ -is an $|M| \times |M|$ matrix. - -If $\ell$ is a negative log-likelihood, then under the selected model, -$$ -\bar{\beta}_M \approx N\left(\beta_M^*, \nabla^2 \ell_M(\hat{\beta}_M)^{-1}\right). -$$ -subject to affine constraints -$$ -\left\{\text{diag}(s_M)\left[\bar{\beta}_M - \nabla^2 \ell_M(\hat{\beta}_M)^{-1} s_M \right] \geq 0 \right\}. -$$ - -We apply polyhedral lemma to $\bar{\beta}_M$, with $M, s_M$ and $\nabla^2 \ell_M (\hat{\beta}_M)$ fixed. - -For logistic regression, these should match your active block KKT conditions exactly where -$$ -\bar{\beta}_M = (X_M^TWX_M)^{-1}X_M^TWz -$$ -with -$$ -z = X_M\hat{\beta}_M + \frac{y - \hat{\pi}}{\hat{\pi}(1-\hat{\pi})} -= X_M\hat{\beta}_M + W^{-1}(y - \hat{\pi}). -$$ - - - -\end{document} - diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index 4fc82743..6bac8a08 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -1,8 +1,8 @@ Package: selectiveInference Type: Package Title: Tools for Post-Selection Inference -Version: 1.1.2 -Date: 2015-12-17 +Version: 1.1.3 +Date: 2016-02-8 Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid Maintainer: Rob Tibshirani diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index ac005450..ada155b7 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -122,9 +122,11 @@ fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha= return(out) } -############################## +############################# + -fixedLasso.poly <- function(x, y, beta, lambda, a) { +fixedLasso.poly= +function(x, y, beta, lambda, a) { xa = x[,a,drop=F] xac = x[,!a,drop=F] xai = pinv(crossprod(xa)) @@ -134,16 +136,22 @@ fixedLasso.poly <- function(x, y, beta, lambda, a) { if (length(za)==1) dz = matrix(za,1,1) P = diag(1,nrow(xa)) - xa %*% xap - G = -rbind(1/lambda * t(xac) %*% P, - -1/lambda * t(xac) %*% P, - -dz %*% xap) - u = -c(1 - t(xac) %*% t(xap) %*% za, - 1 + t(xac) %*% t(xap) %*% za, - -lambda * dz %*% xai %*% za) + #NOTE: inactive constraints not needed below! + + G = -rbind( + # 1/lambda * t(xac) %*% P, + # -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + # 1 - t(xac) %*% t(xap) %*% za, + # 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) return(list(G=G,u=u)) } - # Moore-Penrose pseudo inverse for symmetric matrices pinv <- function(A, tol=.Machine$double.eps) { diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 8c7afce1..ac1b9a00 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -15,7 +15,7 @@ fixedLassoInf(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha=0.1, } \arguments{ \item{x}{ -Matrix of predictors (n by p) +Matrix of predictors (n by p); } \item{y}{ Vector of outcomes (length n) @@ -89,6 +89,14 @@ if the observed statistic is too close to either end of the truncation interval desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} gives the achieved Gaussian tail areas for the reported intervals---these should be close to alpha/2, and can be used for error-checking purposes. + +Important!: Before running glmnet (or some other lasso-solver) x should be centered, that is x <- scale(X,TRUE,FALSE). +In addition, if standardization of the predictors is desired, x should be scaled as well: x <- scale(x,TRUE,TRUE). +Then when running glmnet, set standardize=F. See example below. + +The penalty.factor facility in glmmet-- allowing different penalties lambda for each predictor, +is not yet implemented in fixedLassoInf. However you can finesse this--- see the example below. One caveat- using this approach, a penalty factor of zero (forcing a predictor in) +is not allowed. } \value{ \item{type}{Type of coefficients tested (partial or full)} @@ -118,27 +126,59 @@ set.seed(43) n = 50 p = 10 sigma = 1 + x = matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE) + beta = c(3,2,rep(0,p-2)) y = x\%*\%beta + sigma*rnorm(n) # first run glmnet -gfit = glmnet(x,y) +gfit = glmnet(x,y,standardize=FALSE) # extract coef for a given lambda; note the 1/n factor! # (and we don't save the intercept term) -lambda = .1 +lambda = .8 beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) out -## NOT RUN + ## as above, but use lar function instead to get initial ## lasso fit (should get same results) -# lfit = lar(x,y) -# beta = coef(lfit,s=lambda,mode="lambda") -# out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) -# out2 + lfit = lar(x,y,normalize=FALSE) + beta = coef(lfit,s=lambda,mode="lambda") + out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) + out2 + +## mimic different penalty factors by first scaling x + set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) +pf=c(rep(1,7),rep(.1,3)) #define penalty factors +pf=p*pf/sum(pf) # penalty factors should be rescaled so they sum to p +xs=scale(x,FALSE,pf) #scale cols of x by penalty factors +# first run glmnet +gfit = glmnet(xs,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(xs,y,beta,lambda,sigma=sigma) + +#rescale conf points to undo the penalty factor +out$ci=t(scale(t(out$ci),FALSE,pf[out$vars])) +out } diff --git a/tests/test.fixed.R b/tests/test.fixed.R index 5c558875..e2c5931c 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -208,6 +208,35 @@ fixedLassoInf(x,y,beta,lambda,sigma=sigma) beta=coef(fit,s=lambda,mode="lambda") fixedLassoInf(x,y,beta,lambda,sigma=sigma) +# now try penalty factors + set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,T,T) + + beta = c(3,2,rep(0,p-2)) + y = x%*%beta + sigma*rnorm(n) + + pf=c(rep(1,7),rep(.1,p-7)) + pf=p*pf/sum(pf) # penalty factors should be rescaled so they sum to p + xs=scale(x,F,pf) #scale cols of x + # first run glmnet + gfit = glmnet(xs,y,standardize=F) + + # extract coef for a given lambda; note the 1/n factor! + # (and we don't save the intercept term) + lambda = .8 + beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(xs,y,beta,lambda,sigma=sigma) + #rescale conf points to undo the penalty factor + out$ci=t(scale(t(out$ci),F,pf[out$vars])) + + ### x=state.x77[,-4] y=state.x77[,4] @@ -364,3 +393,46 @@ for(i in 1:100){ p <- pval[, -(1:2)] mean(p[p < 1] < 0.05) + + +#test from Chong + + +library(selectiveInference) + +library(glmnet);library(MASS);#library(grplasso);library(gvlma);library(grpreg) +library(penalized) +load("fooXY.RData") + +#d=read.csv("DesignMatrixX_and_y.csv");dim(d); head(d) + +#source("temp.R") +n=length(Y) +p=ncol(X) +#X=scale(X,T,F) +X=X+.01*matrix(rnorm(n*p),n,p) # I added noise to avoid collinearity +#X=scale(X,T,T)/sqrt(n-1) + +Y=Y-mean(Y) + +#X=as.matrix(d[,1:192]); ### design matrix, no intercept +#Y=d$y; ### Response variable Y. +fit = glmnet(x=X, y=Y, family="gaussian",alpha = 1, thresh = 1e-9, standardize=F) +set.seed(39) +lam= fit$lambda[30]; +#lam= fit$lambda[15]## Try getting coefficient at this lambda +beta = coef(fit, s=lam, exact=TRUE)[-1];length(beta);table(beta!=0) + + aa=penalized(Y~X,lambda1=lam*n,model="linear",standardize=F) + b=coef(aa,which="all")[-1] + +lam2=n*lam + +g=t(X)%*%(Y-X%*%beta)/lam2 + +g[beta!=0] + +g=t(X)%*%(Y-X%*%b)/lam2 +out = fixedLassoInf(X,Y,beta,lam*n) + + From f6dab3ef78586db01096900659a7768852a881ce Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Mar 2016 13:34:41 -0700 Subject: [PATCH 108/396] C code for sampling truncated whitened Gaussian --- selectiveInference/src/truncnorm.c | 175 +++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 selectiveInference/src/truncnorm.c diff --git a/selectiveInference/src/truncnorm.c b/selectiveInference/src/truncnorm.c new file mode 100644 index 00000000..b436b1f9 --- /dev/null +++ b/selectiveInference/src/truncnorm.c @@ -0,0 +1,175 @@ +#include +#include + +// Take a Gibbs hit and run step along a given direction + +// Assumes the covariance is identity + +void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *direction, /* direction we will take Gibbs step */ + double *U, /* A %*% state - b */ + double *alpha, /* A %*% direction */ + int nconstraint, /* number of rows of A */ + int nstate) /* dimension of state */ +{ + + int istate; + double value = 0; + + /* Compute V=\eta^Ty */ + + for (istate = 0; istate < nstate; istate++) { + value += direction[istate] * state[istate]; + } + + /* Compute upper and lower bounds */ + + double lower_bound = -1e12; + double upper_bound = 1e12; + double bound_val = 0; + double tol=1.e-7; + int iconstraint; + + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + bound_val = -U[iconstraint] / alpha[iconstraint] + value; + + if ((alpha[iconstraint] > tol) && + (bound_val < upper_bound)) { + upper_bound = bound_val; + } + else if ((alpha[iconstraint] < -tol) && + (bound_val > lower_bound)) { + lower_bound = bound_val; + } + + } + + /* Ensure constraints are satisfied */ + if (lower_bound > value) { + lower_bound = value - tol; + } + else if (upper_bound < value) { + upper_bound = value + tol; + } + + /* Check to see if constraints are satisfied */ + + /* if (lower_bound > upper_bound) { + + }*/ + + /* Now, take a step */ + + double tnorm; /* the 1D gaussian variable */ + double cdfU, cdfL, unif; /* temp variables */ + + if (upper_bound < -10) { + + /* use Exp approximation */ + /* the approximation is that */ + /* Z | lower_bound < Z < upper_bound */ + /* is fabs(upper_bound) * (upper_bound - Z) = E approx Exp(1) */ + /* so Z = upper_bound - E / fabs(upper_bound) */ + /* and the truncation of the exponential is */ + /* E < fabs(upper_bound - lower_bound) * fabs(upper_bound) = D */ + + /* this has distribution function (1 - exp(-x)) / (1 - exp(-D)) */ + /* so to draw from this distribution */ + /* we set E = - log(1 - U * (1 - exp(-D))) where U is Unif(0,1) */ + /* and Z (= tnorm below) is as stated */ + + unif = runif(0., 1.) * (1 - exp(-fabs((lower_bound - upper_bound) * upper_bound))); + tnorm = (upper_bound + log(1 - unif) / fabs(upper_bound)); + } + else if (lower_bound > 10) { + + /* here Z = lower_bound + E / fabs(lower_bound) (though lower_bound is positive) */ + /* and D = fabs((upper_bound - lower_bound) * lower_bound) */ + + unif = runif(0., 1.) * (1 - exp(-fabs((upper_bound - lower_bound) * lower_bound))); + tnorm = (lower_bound - log(1 - unif) / lower_bound); + } + else if (lower_bound < 0) { + cdfL = pnorm(lower_bound, 0., 1., 1, 0); /* Ryan */ + cdfU = pnorm(upper_bound, 0., 1., 1, 0); /* Ryan */ + unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; /* Ryan */ + if (unif < 0.5) { + tnorm = qnorm(unif, 0., 1., 1, 0); /* Ryan */ + } + else { + tnorm = -qnorm(1-unif, 0., 1., 1, 0); /* Ryan */ + } + } + else { + cdfL = pnorm(-lower_bound, 0., 1., 1, 0); /* Ryan */ + cdfU = pnorm(-upper_bound, 0., 1., 1, 0); /* Ryan */ + unif = runif(0., 1.) * (cdfL - cdfU) + cdfU; + if (unif < 0.5) { + tnorm = -qnorm(unif, 0., 1., 1, 0); /* Ryan */ + } + else { + tnorm = qnorm(1-unif, 0., 1., 1, 0); /* Ryan */ + } + } + + /* Now update the state and U */ + + double delta = tnorm - value; + + for (istate = 0; istate < nstate; istate++) { + state[istate] += delta * direction[istate]; + } + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + U[iconstraint] += U[iconstraint] + delta * alpha[iconstraint] ; + } + + /* End of gibbs_step */ + +} + +void sample_truncnorm_white(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *U, /* A %*% state - b */ + double *directions, /* possible steps for sampler to take */ + /* assumed to be stored as list of vectors of dimension nstate */ + double *alphas, /* The matrix A %*% directions */ + double *output, /* array in which to store samples */ + /* assumed will stored as list of vectors of dimension nstate */ + int nconstraint, /* number of rows of A */ + int ndirection, /* the possible number of directions to choose from */ + /* `directions` should have size nstate*ndirection */ + int nstate, /* dimension of state */ + int burnin, /* number of burnin steps */ + int ndraw) /* total number of samples to return */ +{ + + int iter_count; + int which_direction; + + double *direction, *alpha; + + for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { + + which_direction = (int) floor(runif(0., 1.) * ndirection); /* Ryan */ + direction = directions + nstate * which_direction; /* Ryan */ + alpha = alpha + nconstraint * which_direction; /* Ryan */ + + /* take a step, which implicitly updates `state` and `U` */ + + gibbs_step(state, + direction, + U, + alpha, + nconstraint, + nstate); + + /* Store result if after burnin */ + + int istate; + if (iter_count >= burnin) { + for (istate = 0; istate < nstate; istate++) { + *output = state[istate]; + output++; + } + } + } +} From 477750c259d5205b59bd249587f4c6557ba9cf04 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 13:12:45 -0700 Subject: [PATCH 109/396] utility file for sampling from affine constraints --- selectiveInference/R/funs.constraints.R | 142 ++++++++++++++++++++++++ selectiveInference/R/funs.fs.R | 3 +- 2 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 selectiveInference/R/funs.constraints.R diff --git a/selectiveInference/R/funs.constraints.R b/selectiveInference/R/funs.constraints.R new file mode 100644 index 00000000..6c16b500 --- /dev/null +++ b/selectiveInference/R/funs.constraints.R @@ -0,0 +1,142 @@ +# +# Some utilities for affine constraints +# + +# +# compute the square-root and inverse square-root of a non-negative +# definite matrix +# + +factor_covariance = function(S, rank=NA) { + if (!is.na(rank)) { + rank = nrow(S) + } + svd_X = svd(S, nu = rank, nv=rank) + sqrt_cov = t(sqrt(svd_X$d[1:rank]) * t(svd_X$u[,1:rank])) + sqrt_inv = t((1. / sqrt(svd_X$d[1:rank])) * t(svd_X$u[,1:rank])) + + return(list(sqrt_cov=sqrt_cov, sqrt_inv=sqrt_inv)) +} + +# +# from a constraint, return an equivalent +# constraint and a whitening and inverse +# whitening map +# + +# law is Z \sim N(mean, covariance) subject to constraints linear_part %*% Z \leq offset + +whiten_constraint = function(linear_part, offset, mean, covariance) { + + factor_cov = factor_covariance(covariance) + sqrt_cov = factor_cov$sqrt_cov + sqrt_inv = factor_cov$sqrt_inv + + new_A = A %*% sqrt_cov + new_b = offset - linear_part %*% mean + + # rescale rows to have length 1 + + scaling = sqrt(apply(new_A^2, sum, 1)) + new_A = new_A / scaling + new_b = new_b / scaling + + # TODO: check these functions will behave when Z is a matrix. + + inverse_map = function(Z) { + return(sqrt_cov %*% Z + mu) + } + + forward_map = function(W) { + return(sqrt_inv %*% (W - mu)) + } + + return(list(linear_part=new_A, + offset=new_b, + inverse_map=inverse_map, + forward_map=forward_map)) +} + +# +# sample from the law +# +# Z \sim N(mean, covariance) subject to constraints linear_part %*% Z \leq offset + +sample_from_constraints = function(linear_part, + offset, + covariance, + mean, + initial_point, # point must be feasible for constraints + ndraw=8000, + burnin=2000, + accept_reject_params=NA) #TODO: implement accept reject check +{ + + whitened_con = whiten_constraint(linear_part, + offset, + covariance, + mean) + white_initial = whitened_con$forward_map(initial_point) + +# # try 100 draws of accept reject +# # if we get more than 50 good draws, then just return a smaller sample +# # of size (burnin+ndraw)/5 + +# if accept_reject_params: +# use_hit_and_run = False +# num_trial, min_accept, num_draw = accept_reject_params + +# def _accept_reject(sample_size, linear_part, offset): +# Z_sample = np.random.standard_normal((100, linear_part.shape[1])) +# constraint_satisfied = (np.dot(Z_sample, linear_part.T) - +# offset[None,:]).max(1) < 0 +# return Z_sample[constraint_satisfied] + +# Z_sample = _accept_reject(100, +# white_con.linear_part, +# white_con.offset) + +# if Z_sample.shape[0] >= min_accept: +# while True: +# Z_sample = np.vstack([Z_sample, +# _accept_reject(num_draw / 5, +# white_con.linear_part, +# white_con.offset)]) +# if Z_sample.shape[0] > num_draw: +# break +# white_samples = Z_sample +# else: +# use_hit_and_run = True +# else: +# use_hit_and_run = True + + use_hit_and_run = TRUE + + if (use_hit_and_run) { + + white_linear = whitened_con$linear_part + white_offset = whitened_con$offset + + nstate = length(white_initial) + directions = rbind(diag(rep(1, ndim)), + matrix(rnorm(ndim^2), nstate, nstate)) + directions = apply(directions, function(x) { x/sqrt(sum(x^2)) }, 1) # normalize rows to have length 1 + alphas = white_linear %*% directions + U = white_linear %*% white_initial - white_offset + Z_sample = matrix(rep(NA, ndraw*ndim), ndraw, nstate) + + .C("sample_truncnorm_white", + white_initial, + U, + directions, + alphas, + output, + nrow(white_linear), + nstate, + burnin, + ndraw) + + Z = t(whitened_con$inverse_map(t(output))) + return(Z) +} + diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 1dd0a7a6..8c4fb548 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -533,7 +533,6 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, # because this has a simple box constraint # with a generically non-degenerate covariance - if (n > p) { library(tmvtnorm) @@ -550,7 +549,7 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, lower=-collapsed_neg, upper=collapsed_pos, algorithm="gibbs", - burn.in.samples=burnin) + burn.in.samples=burnin) if (length(inactive) > 1) { sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) From c53ff76de582fe05581ab6645aff6495585be3cc Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 18:25:07 -0700 Subject: [PATCH 110/396] syntax error in constraints --- selectiveInference/R/funs.constraints.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.constraints.R b/selectiveInference/R/funs.constraints.R index 6c16b500..5f6e5546 100644 --- a/selectiveInference/R/funs.constraints.R +++ b/selectiveInference/R/funs.constraints.R @@ -135,7 +135,8 @@ sample_from_constraints = function(linear_part, nstate, burnin, ndraw) - + + } Z = t(whitened_con$inverse_map(t(output))) return(Z) } From 607bccdeb4cb7e509a4d627c8efb6d6439e28037 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 19:32:55 -0700 Subject: [PATCH 111/396] transposed matrices so pointer arithmetic correct in C code --- selectiveInference/R/funs.constraints.R | 10 +++++----- selectiveInference/src/truncnorm.c | 25 +++++++++++++------------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/selectiveInference/R/funs.constraints.R b/selectiveInference/R/funs.constraints.R index 5f6e5546..52e4fa5b 100644 --- a/selectiveInference/R/funs.constraints.R +++ b/selectiveInference/R/funs.constraints.R @@ -123,21 +123,21 @@ sample_from_constraints = function(linear_part, directions = apply(directions, function(x) { x/sqrt(sum(x^2)) }, 1) # normalize rows to have length 1 alphas = white_linear %*% directions U = white_linear %*% white_initial - white_offset - Z_sample = matrix(rep(NA, ndraw*ndim), ndraw, nstate) + Z_sample = matrix(rep(NA, ndraw*ndim), nstate, ndraw) .C("sample_truncnorm_white", white_initial, U, - directions, - alphas, - output, + t(directions), + t(alphas), + Z_sample, nrow(white_linear), nstate, burnin, ndraw) } - Z = t(whitened_con$inverse_map(t(output))) + Z = t(whitened_con$inverse_map(Z_sample)) return(Z) } diff --git a/selectiveInference/src/truncnorm.c b/selectiveInference/src/truncnorm.c index b436b1f9..b7584c41 100644 --- a/selectiveInference/src/truncnorm.c +++ b/selectiveInference/src/truncnorm.c @@ -45,6 +45,7 @@ void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhe } /* Ensure constraints are satisfied */ + if (lower_bound > value) { lower_bound = value - tol; } @@ -90,25 +91,25 @@ void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhe tnorm = (lower_bound - log(1 - unif) / lower_bound); } else if (lower_bound < 0) { - cdfL = pnorm(lower_bound, 0., 1., 1, 0); /* Ryan */ - cdfU = pnorm(upper_bound, 0., 1., 1, 0); /* Ryan */ - unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; /* Ryan */ + cdfL = pnorm(lower_bound, 0., 1., 1, 0); + cdfU = pnorm(upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; if (unif < 0.5) { - tnorm = qnorm(unif, 0., 1., 1, 0); /* Ryan */ + tnorm = qnorm(unif, 0., 1., 1, 0); } else { - tnorm = -qnorm(1-unif, 0., 1., 1, 0); /* Ryan */ + tnorm = -qnorm(1-unif, 0., 1., 1, 0); } } else { - cdfL = pnorm(-lower_bound, 0., 1., 1, 0); /* Ryan */ - cdfU = pnorm(-upper_bound, 0., 1., 1, 0); /* Ryan */ + cdfL = pnorm(-lower_bound, 0., 1., 1, 0); + cdfU = pnorm(-upper_bound, 0., 1., 1, 0); unif = runif(0., 1.) * (cdfL - cdfU) + cdfU; if (unif < 0.5) { - tnorm = -qnorm(unif, 0., 1., 1, 0); /* Ryan */ + tnorm = -qnorm(unif, 0., 1., 1, 0); } else { - tnorm = qnorm(1-unif, 0., 1., 1, 0); /* Ryan */ + tnorm = qnorm(1-unif, 0., 1., 1, 0); } } @@ -149,9 +150,9 @@ void sample_truncnorm_white(double *state, /* state has law N(0,I) constrai for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { - which_direction = (int) floor(runif(0., 1.) * ndirection); /* Ryan */ - direction = directions + nstate * which_direction; /* Ryan */ - alpha = alpha + nconstraint * which_direction; /* Ryan */ + which_direction = (int) floor(runif(0., 1.) * ndirection); + direction = ((double *) directions) + nstate * which_direction; + alpha = ((double *) alphas) + nconstraint * which_direction; /* take a step, which implicitly updates `state` and `U` */ From bacacb0ea783a899e96ed7f602a34d0effec3495 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 21:34:37 -0700 Subject: [PATCH 112/396] sampler runs now -- needs a test to confirm results are OK --- selectiveInference/R/funs.constraints.R | 63 +++++++++++++++---------- selectiveInference/src/truncnorm.c | 36 +++++++++----- 2 files changed, 61 insertions(+), 38 deletions(-) diff --git a/selectiveInference/R/funs.constraints.R b/selectiveInference/R/funs.constraints.R index 52e4fa5b..b65cdcc1 100644 --- a/selectiveInference/R/funs.constraints.R +++ b/selectiveInference/R/funs.constraints.R @@ -8,10 +8,10 @@ # factor_covariance = function(S, rank=NA) { - if (!is.na(rank)) { + if (is.na(rank)) { rank = nrow(S) } - svd_X = svd(S, nu = rank, nv=rank) + svd_X = svd(S, nu=rank, nv=rank) sqrt_cov = t(sqrt(svd_X$d[1:rank]) * t(svd_X$u[,1:rank])) sqrt_inv = t((1. / sqrt(svd_X$d[1:rank])) * t(svd_X$u[,1:rank])) @@ -32,23 +32,23 @@ whiten_constraint = function(linear_part, offset, mean, covariance) { sqrt_cov = factor_cov$sqrt_cov sqrt_inv = factor_cov$sqrt_inv - new_A = A %*% sqrt_cov + new_A = linear_part %*% sqrt_cov new_b = offset - linear_part %*% mean # rescale rows to have length 1 - scaling = sqrt(apply(new_A^2, sum, 1)) + scaling = sqrt(apply(new_A^2, 1, sum)) new_A = new_A / scaling new_b = new_b / scaling # TODO: check these functions will behave when Z is a matrix. inverse_map = function(Z) { - return(sqrt_cov %*% Z + mu) + return(sqrt_cov %*% Z + mean) } forward_map = function(W) { - return(sqrt_inv %*% (W - mu)) + return(sqrt_inv %*% (W - mean)) } return(list(linear_part=new_A, @@ -64,8 +64,8 @@ whiten_constraint = function(linear_part, offset, mean, covariance) { sample_from_constraints = function(linear_part, offset, - covariance, mean, + covariance, initial_point, # point must be feasible for constraints ndraw=8000, burnin=2000, @@ -74,8 +74,8 @@ sample_from_constraints = function(linear_part, whitened_con = whiten_constraint(linear_part, offset, - covariance, - mean) + mean, + covariance) white_initial = whitened_con$forward_map(initial_point) # # try 100 draws of accept reject @@ -118,25 +118,36 @@ sample_from_constraints = function(linear_part, white_offset = whitened_con$offset nstate = length(white_initial) - directions = rbind(diag(rep(1, ndim)), - matrix(rnorm(ndim^2), nstate, nstate)) - directions = apply(directions, function(x) { x/sqrt(sum(x^2)) }, 1) # normalize rows to have length 1 - alphas = white_linear %*% directions + nconstraint = nrow(white_linear) + + directions = rbind(diag(rep(1, nstate)), + matrix(rnorm(nstate^2), nstate, nstate)) + + # normalize rows to have length 1 + + scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) + directions = directions / scaling + ndirection = nrow(directions) + + alphas = directions %*% t(white_linear) U = white_linear %*% white_initial - white_offset - Z_sample = matrix(rep(NA, ndraw*ndim), nstate, ndraw) - - .C("sample_truncnorm_white", - white_initial, - U, - t(directions), - t(alphas), - Z_sample, - nrow(white_linear), - nstate, - burnin, - ndraw) - + Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) + + result = .C("sample_truncnorm_white", + as.numeric(white_initial), + as.numeric(U), + as.numeric(t(directions)), + as.numeric(t(alphas)), + output=Z_sample, + as.integer(nconstraint), + as.integer(ndirection), + as.integer(nstate), + as.integer(burnin), + as.integer(ndraw), + package="selectiveInference") + Z_sample = result$output } + Z = t(whitened_con$inverse_map(Z_sample)) return(Z) } diff --git a/selectiveInference/src/truncnorm.c b/selectiveInference/src/truncnorm.c index b7584c41..cca61d93 100644 --- a/selectiveInference/src/truncnorm.c +++ b/selectiveInference/src/truncnorm.c @@ -31,6 +31,7 @@ void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhe int iconstraint; for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + bound_val = -U[iconstraint] / alpha[iconstraint] + value; if ((alpha[iconstraint] > tol) && @@ -121,7 +122,7 @@ void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhe state[istate] += delta * direction[istate]; } for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { - U[iconstraint] += U[iconstraint] + delta * alpha[iconstraint] ; + U[iconstraint] += delta * alpha[iconstraint] ; } /* End of gibbs_step */ @@ -131,21 +132,30 @@ void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhe void sample_truncnorm_white(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ double *U, /* A %*% state - b */ double *directions, /* possible steps for sampler to take */ - /* assumed to be stored as list of vectors of dimension nstate */ + /* assumed to be stored as list of columns of dimension nstate */ + /* has shape (nstate, ndirection) */ double *alphas, /* The matrix A %*% directions */ + /* has shape (nconstraint, ndirection) */ double *output, /* array in which to store samples */ /* assumed will stored as list of vectors of dimension nstate */ - int nconstraint, /* number of rows of A */ - int ndirection, /* the possible number of directions to choose from */ + /* has shape (nstate, ndraw) */ + int *pnconstraint, /* number of rows of A */ + int *pndirection, /* the possible number of directions to choose from */ /* `directions` should have size nstate*ndirection */ - int nstate, /* dimension of state */ - int burnin, /* number of burnin steps */ - int ndraw) /* total number of samples to return */ + int *pnstate, /* dimension of state */ + int *pburnin, /* number of burnin steps */ + int *pndraw) /* total number of samples to return */ { int iter_count; int which_direction; + int nconstraint = *pnconstraint; + int ndirection = *pndirection; + int nstate = *pnstate; + int burnin = *pburnin; + int ndraw = *pndraw; + double *direction, *alpha; for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { @@ -156,10 +166,10 @@ void sample_truncnorm_white(double *state, /* state has law N(0,I) constrai /* take a step, which implicitly updates `state` and `U` */ - gibbs_step(state, - direction, - U, - alpha, + gibbs_step(state, + direction, + U, + alpha, nconstraint, nstate); @@ -169,8 +179,10 @@ void sample_truncnorm_white(double *state, /* state has law N(0,I) constrai if (iter_count >= burnin) { for (istate = 0; istate < nstate; istate++) { *output = state[istate]; - output++; + output++; } } } + } + From 0e45d9f4eb0faf56150b36e1d0eb65317b33a461 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 21:43:52 -0700 Subject: [PATCH 113/396] fixed code so it calls new sampler -- should be ready to test... --- selectiveInference/R/funs.fs.R | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 8c4fb548..41d138c3 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -558,8 +558,22 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, sample_maxZ = truncated_noise / cur_scale } } else { - # RUBBISH for now!!!! - sample_maxZ = abs(rnorm(ndraw)) + + linear_part = rbind(t(cur_adjusted_X), -t(cur_adjusted_X)) + offset = c(final_upper, -final_lower) + covariance = diag(rep(sigma^2, nrow(cor_adjusted_X))) + mean = rep(0, nrow(cur_adjusted_X)) + initial_point = y + truncated_y = sample_from_constraints(linear_part, + offset, + mean, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + + truncated_noise = truncated_y %*% cur_adjusted_X + sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) } observed_maxZ = obj$realized_maxZ[j] From 54e5bafb4ede56537bc98a8aeee282b4a9cb36b1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 22:02:04 -0700 Subject: [PATCH 114/396] print method for maxZ inference --- selectiveInference/R/funs.fs.R | 82 ++++++++++++---------------------- 1 file changed, 29 insertions(+), 53 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 41d138c3..786bc45e 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -437,8 +437,8 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic # selected maxZ tests -fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, - ndraw=8000, burnin=2000) { +fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, + ndraw=8000, burnin=2000) { this.call = match.call() @@ -586,14 +586,22 @@ fsInf_maxZ <- function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, khat = forwardStop(pv,alpha) - out = list(pv=pv,khat=khat, - call=this.call) + out = list(pv=pv, + k=k, + khat=khat, + sigma=sigma, + call=this.call, + vars=vars, + sign=sign, + alpha=alpha, + realized_maxZ=obj$realized_maxZ) class(out) = "fsInf_Zmax" return(out) } ############################## +Print methods ############################## @@ -610,66 +618,34 @@ print.fs <- function(x, ...) { invisible() } -print.fsInf <- function(x, tailarea=TRUE, ...) { +print.fsInf_Zmax <- function(obj) { + cat("\nCall:\n") - dput(x$call) + dput(obj$call) cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", - x$sigma)) - - if (x$type == "active") { - cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) - tab = cbind(1:length(x$pv),x$vars, - round(x$sign*x$vmat%*%x$y,3), - round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), - round(x$pv,3),round(x$ci,3)) - colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", - "LowConfPt", "UpConfPt") - if (tailarea) { - tab = cbind(tab,round(x$tailarea,3)) - colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") - } - rownames(tab) = rep("",nrow(tab)) - print(tab) + obj$sigma)) - cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) - } + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",obj$alpha)) - else if (x$type == "all") { - cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) - tab = cbind(x$vars, - round(x$sign*x$vmat%*%x$y,3), - round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), - round(x$pv,3),round(x$ci,3)) - colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") - if (tailarea) { - tab = cbind(tab,round(x$tailarea,3)) - colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") - } - rownames(tab) = rep("",nrow(tab)) - print(tab) - } + tab = cbind(1:length(obj$pv),obj$vars, + round(obj$sign*obj$realized_maxZ, 3), + round(obj$pv,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value") + rownames(tab) = rep("",nrow(tab)) + print(tab) - else if (x$type == "aic") { - cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) - tab = cbind(x$vars, - round(x$sign*x$vmat%*%x$y,3), - round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), - round(x$pv,3),round(x$ci,3)) - colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") - if (tailarea) { - tab = cbind(tab,round(x$tailarea,3)) - colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") - } - rownames(tab) = rep("",nrow(tab)) - print(tab) - - cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",obj$khat)) } invisible() } +############################## + +Plot methods + +############################## plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { if (x$completepath) { From 832b6c245075b71a7268d463c3af2a24b66e12ff Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 23:13:59 -0700 Subject: [PATCH 115/396] maxZ now runs and prints output, see tests/test.fs_maxZ.R --- selectiveInference/NAMESPACE | 4 +- selectiveInference/R/funs.constraints.R | 68 +++++++----- selectiveInference/R/funs.fs.R | 132 ++++++++++++++++++------ tests/test.fs_maxZ.R | 23 +++++ 4 files changed, 165 insertions(+), 62 deletions(-) create mode 100644 tests/test.fs_maxZ.R diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 64a02112..cc7e76f3 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -10,7 +10,8 @@ export(lar,fs, estimateSigma, manyMeans,print.manyMeans, groupfs,groupfsInf, - scaleGroups,factorDesign + scaleGroups,factorDesign, + sample_from_constraints ) S3method("coef", "lar") @@ -23,6 +24,7 @@ S3method("predict", "fs") S3method("print", "fs") S3method("plot", "fs") S3method("print", "fsInf") +S3method("print", "fsInf_maxZ") S3method("print", "fixedLassoInf") S3method("print", "manyMeans") S3method("print", "groupfs") diff --git a/selectiveInference/R/funs.constraints.R b/selectiveInference/R/funs.constraints.R index b65cdcc1..e2bc0417 100644 --- a/selectiveInference/R/funs.constraints.R +++ b/selectiveInference/R/funs.constraints.R @@ -117,35 +117,47 @@ sample_from_constraints = function(linear_part, white_linear = whitened_con$linear_part white_offset = whitened_con$offset + # Inf cannot be used in C code + # In theory, these rows can be dropped + + rows_to_keep = white_offset < Inf + white_linear = white_linear[rows_to_keep,] + white_offset = white_offset[rows_to_keep,] + nstate = length(white_initial) - nconstraint = nrow(white_linear) - - directions = rbind(diag(rep(1, nstate)), - matrix(rnorm(nstate^2), nstate, nstate)) - - # normalize rows to have length 1 - - scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) - directions = directions / scaling - ndirection = nrow(directions) - - alphas = directions %*% t(white_linear) - U = white_linear %*% white_initial - white_offset - Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) - - result = .C("sample_truncnorm_white", - as.numeric(white_initial), - as.numeric(U), - as.numeric(t(directions)), - as.numeric(t(alphas)), - output=Z_sample, - as.integer(nconstraint), - as.integer(ndirection), - as.integer(nstate), - as.integer(burnin), - as.integer(ndraw), - package="selectiveInference") - Z_sample = result$output + if (sum(rows_to_keep) > 0) { + nconstraint = nrow(white_linear) + + directions = rbind(diag(rep(1, nstate)), + matrix(rnorm(nstate^2), nstate, nstate)) + + # normalize rows to have length 1 + + scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) + directions = directions / scaling + ndirection = nrow(directions) + + alphas = directions %*% t(white_linear) + U = white_linear %*% white_initial - white_offset + Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) + + result = .C("sample_truncnorm_white", + as.numeric(white_initial), + as.numeric(U), + as.numeric(t(directions)), + as.numeric(t(alphas)), + output=Z_sample, + as.integer(nconstraint), + as.integer(ndirection), + as.integer(nstate), + as.integer(burnin), + as.integer(ndraw), + package="selectiveInference") + Z_sample = result$output + } + else { + Z_sample = matrix(rnorm(nstate * ndraw), nstate, ndraw) + } } Z = t(whitened_con$inverse_map(Z_sample)) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 786bc45e..dedc0d39 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -29,9 +29,9 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Find the first variable to enter and its sign working_scale = sqrt(colSums(x^2)) working_x = scale(x,center=F,scale=working_scale) - score = t(working_x)%*%y - i_hit = which.max(abs(score)) # Hitting coordinate - sign_hit = Sign(score[i_hit]) # Sign + working_score = t(working_x)%*%y + i_hit = which.max(abs(working_score)) # Hitting coordinate + sign_hit = Sign(working_score[i_hit]) # Sign signs = sign_hit # later signs will be appended to `signs` if (verbose) { @@ -54,7 +54,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, offset_pos_maxZ = matrix(Inf, p, buf) # upper bounds for selective maxZ offset_neg_maxZ = matrix(Inf, p, buf) # lower bounds for selective maxZ scale_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ - realized_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ + realized_maxZ = numeric(buf) # lower bounds for selective maxZ action[1] = i_hit df[1] = 0 @@ -64,7 +64,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Variables needed to compute truncation limits for # selective maxZ test - realized_maxZ[1] = c(sign_hit * score[i_hit]) + realized_maxZ[1] = c(sign_hit * working_score[i_hit]) offset_pos_maxZ[,1] = Inf offset_neg_maxZ[,1] = Inf scale_maxZ[,1] = working_scale @@ -132,15 +132,18 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, offset_pos_maxZ = cbind(offset_pos_maxZ, matrix(0, p, buf)) offset_neg_maxZ = cbind(offset_neg_maxZ, matrix(0, p, buf)) scale_maxZ = cbind(scale_maxZ, matrix(0, p, buf)) - realized_maxZ = cbind(realized_maxZ, matrix(0, p, buf)) + realized_maxZ = c(realized_maxZ, numeric(buf)) } # Key quantities for the next entry keepLs=backsolve(R,t(Q_active)%*%X_inactive) + + prev_scale = working_scale[-i_hit] # this variable used later for maxZ X_inactive_resid = X_inactive - X_active %*% keepLs - working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) - score = as.numeric(t(working_x)%*%y) + working_scale = sqrt(colSums(X_inactive_resid^2)) # this variable used later for maxZ + working_x = scale(X_inactive_resid,center=F,scale=working_scale) + working_score = as.numeric(t(working_x)%*%y) beta_cur = backsolve(R,t(Q_active)%*%y) # must be computed before the break # so we have it if we have @@ -180,7 +183,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # update maxZ variable realized_maxZ[k] = sign_hit * working_score[i_hit] - + # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) @@ -451,6 +454,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, y = obj$y p = ncol(x) n = nrow(x) + pv = c() if (is.null(sigma)) { # TODO we need a sampler on a unit sphere @@ -487,10 +491,10 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, collapsed_neg = apply(obj$offset_neg_maxZ[inactive,1:j,drop=FALSE], 1, min) cur_scale = obj$scale_maxZ[,j][inactive] - # the matrix cur_adjusted_X is used to compute + # the matrix cur_adjusted_Xt is used to compute (always as length(y) columns) # the maxZ or maxT for the sampled variables - - cur_adjusted_X = obj$Gamma_maxZ[zi + Seq(1,p-j+1),]; zi = zi+p-j+1 + # + cur_adjusted_Xt = obj$Gamma_maxZ[zi + Seq(1,p-j+1),]; zi = zi+p-j+1 # Xt for transpose # cur_X is used to enforce conditioning on # the ever_active sufficient_statistics @@ -517,8 +521,8 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, # now, we sample from Y_star, a centered Gaussian with covariance sigma^2 I # subject to the constraint - # t(cur_adjusted_X) %*% Y_star < final_upper - # -t(cur_adjusted_X) %*% Y_star < -final_lower + # t(cur_adjusted_Xt) %*% Y_star < final_upper + # -t(cur_adjusted_Xt) %*% Y_star < -final_lower # really, we want the covariance of Y_star to be \sigma^2 (I - cur_P) # where P is projection on the j-1 previous variables @@ -529,7 +533,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, # IMPORTANT: after sampling Y_star, we have to add back cur_fitted - # if n > p, we actually just draw cur_adjusted_X %*% Y_star + # if n > p, we actually just draw cur_adjusted_Xt %*% Y_star # because this has a simple box constraint # with a generically non-degenerate covariance @@ -537,10 +541,10 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, library(tmvtnorm) if (length(inactive) > 1) { - cov = (cur_adjusted_X %*% t(cur_adjusted_X)) + cov = (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) cov = cov * rep(sigma^2, nrow(cov), ncol(cov)) } else { - cov = sigma^2 * sum(cur_adjusted_X^2) + cov = sigma^2 * sum(cur_adjusted_Xt^2) } truncated_noise = rtmvnorm(n=ndraw, @@ -559,11 +563,12 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, } } else { - linear_part = rbind(t(cur_adjusted_X), -t(cur_adjusted_X)) + linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) offset = c(final_upper, -final_lower) - covariance = diag(rep(sigma^2, nrow(cor_adjusted_X))) - mean = rep(0, nrow(cur_adjusted_X)) + covariance = diag(rep(sigma^2, length(y))) + mean = rep(0, length(y)) initial_point = y + truncated_y = sample_from_constraints(linear_part, offset, mean, @@ -572,7 +577,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, burnin=burnin, ndraw=ndraw) - truncated_noise = truncated_y %*% cur_adjusted_X + truncated_noise = truncated_y %*% t(cur_adjusted_Xt) sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) } @@ -592,17 +597,17 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, sigma=sigma, call=this.call, vars=vars, - sign=sign, + sign=obj$sign, alpha=alpha, realized_maxZ=obj$realized_maxZ) - class(out) = "fsInf_Zmax" + class(out) = "fsInf_maxZ" return(out) } ############################## - -Print methods - +# +# Print methods +# ############################## print.fs <- function(x, ...) { @@ -618,7 +623,68 @@ print.fs <- function(x, ...) { invisible() } -print.fsInf_Zmax <- function(obj) { +print.fsInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + if (x$type == "active") { + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) + tab = cbind(1:length(x$pv),x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", + "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) + } + + else if (x$type == "all") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + } + + else if (x$type == "aic") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + } + + invisible() +} + + +print.fsInf_maxZ <- function(obj) { cat("\nCall:\n") dput(obj$call) @@ -628,23 +694,23 @@ print.fsInf_Zmax <- function(obj) { cat(sprintf("\nSequential testing results with alpha = %0.3f\n",obj$alpha)) - tab = cbind(1:length(obj$pv),obj$vars, + tab = cbind(1:length(obj$pv), + obj$vars, round(obj$sign*obj$realized_maxZ, 3), round(obj$pv,3)) - colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value") + colnames(tab) = c("Step", "Var", "Z-score", "P-value") rownames(tab) = rep("",nrow(tab)) print(tab) cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",obj$khat)) - } invisible() } ############################## - -Plot methods - +# +# Plot methods +# ############################## plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { diff --git a/tests/test.fs_maxZ.R b/tests/test.fs_maxZ.R new file mode 100644 index 00000000..dc7e6e28 --- /dev/null +++ b/tests/test.fs_maxZ.R @@ -0,0 +1,23 @@ +library(selectiveInference) +options(error=dump.frames) + +set.seed(0) +n = 100 +p = 120 +s = 3 +size = 5 + +sigma = 1.5 +x = matrix(rnorm(n*p),n,p) +#x = scale(x,T,F)/sqrt(n-1) + +b = c(sample(c(-1,1),s,replace=T)*rep(size,s),rep(0,p-s)) +mu = x%*%b +y = mu + sigma*rnorm(n) + +obj = fs(x,y,verb=T,intercept=T,norm=T, maxsteps=20) + + +# Sequential inference +out = fsInf_maxZ(obj,sigma=sigma,k=20, ndraw=5000, burnin=1000) +print(out) From e48e922d8b0b9834119fad71b24f4e93a8abc2f8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Mar 2016 23:38:32 -0700 Subject: [PATCH 116/396] removing tmvtnorm dependency --- selectiveInference/R/funs.fs.R | 61 ++++++++++------------------------ 1 file changed, 18 insertions(+), 43 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index dedc0d39..7b040125 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -533,53 +533,28 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, # IMPORTANT: after sampling Y_star, we have to add back cur_fitted - # if n > p, we actually just draw cur_adjusted_Xt %*% Y_star + # if n > p, we could actually just draw cur_adjusted_Xt %*% Y_star # because this has a simple box constraint # with a generically non-degenerate covariance - if (n > p) { - library(tmvtnorm) - - if (length(inactive) > 1) { - cov = (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) - cov = cov * rep(sigma^2, nrow(cov), ncol(cov)) - } else { - cov = sigma^2 * sum(cur_adjusted_Xt^2) - } - - truncated_noise = rtmvnorm(n=ndraw, - mean=cur_offset, - sigma=cov, - lower=-collapsed_neg, - upper=collapsed_pos, - algorithm="gibbs", - burn.in.samples=burnin) - - if (length(inactive) > 1) { - sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) - } - else { - sample_maxZ = truncated_noise / cur_scale - } - } else { + # but `tmvtnorm` seems to give poor results for its sampler - linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) - offset = c(final_upper, -final_lower) - covariance = diag(rep(sigma^2, length(y))) - mean = rep(0, length(y)) - initial_point = y - - truncated_y = sample_from_constraints(linear_part, - offset, - mean, - covariance, - initial_point, - burnin=burnin, - ndraw=ndraw) - - truncated_noise = truncated_y %*% t(cur_adjusted_Xt) - sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) - } + linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) + offset = c(final_upper, -final_lower) + covariance = diag(rep(sigma^2, length(y))) + mean = rep(0, length(y)) + initial_point = y + + truncated_y = sample_from_constraints(linear_part, + offset, + mean, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + + truncated_noise = truncated_y %*% t(cur_adjusted_Xt) + sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) observed_maxZ = obj$realized_maxZ[j] From ee560b57738840fcda72445337329144eb17e5cd Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 19 Mar 2016 08:13:19 -0700 Subject: [PATCH 117/396] BF: last step was failing because of how R treats numeric different than matrix --- selectiveInference/R/funs.fs.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 7b040125..a86f96d0 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -553,9 +553,13 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, burnin=burnin, ndraw=ndraw) - truncated_noise = truncated_y %*% t(cur_adjusted_Xt) - sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) - + if (j < p) { + truncated_noise = truncated_y %*% t(cur_adjusted_Xt) + sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) + } + else { + sample_maxZ = abs(truncated_y %*% cur_adjusted_Xt) + } observed_maxZ = obj$realized_maxZ[j] pval = sum(sample_maxZ > observed_maxZ) / ndraw From 338fac0e7f9745d355f483c896bd9a10dddc14d3 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 19 Mar 2016 10:50:07 -0700 Subject: [PATCH 118/396] using drop=FALSE instead of if statement --- selectiveInference/R/funs.fs.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index a86f96d0..1bb08b9e 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -494,7 +494,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, # the matrix cur_adjusted_Xt is used to compute (always as length(y) columns) # the maxZ or maxT for the sampled variables # - cur_adjusted_Xt = obj$Gamma_maxZ[zi + Seq(1,p-j+1),]; zi = zi+p-j+1 # Xt for transpose + cur_adjusted_Xt = obj$Gamma_maxZ[zi + Seq(1,p-j+1),,drop=FALSE]; zi = zi+p-j+1 # Xt for transpose # cur_X is used to enforce conditioning on # the ever_active sufficient_statistics @@ -537,8 +537,6 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, # because this has a simple box constraint # with a generically non-degenerate covariance - # but `tmvtnorm` seems to give poor results for its sampler - linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) offset = c(final_upper, -final_lower) covariance = diag(rep(sigma^2, length(y))) @@ -553,13 +551,9 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, burnin=burnin, ndraw=ndraw) - if (j < p) { - truncated_noise = truncated_y %*% t(cur_adjusted_Xt) - sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) - } - else { - sample_maxZ = abs(truncated_y %*% cur_adjusted_Xt) - } + truncated_noise = truncated_y %*% t(cur_adjusted_Xt) + sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) + observed_maxZ = obj$realized_maxZ[j] pval = sum(sample_maxZ > observed_maxZ) / ndraw From 45406e6fbcd210cd7c7ce8e2d8f6539622499212 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 19 Mar 2016 19:54:34 -0700 Subject: [PATCH 119/396] BF: broadcasting error, also scale_maxZ was wrong variable after resizing --- selectiveInference/R/funs.fs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 1bb08b9e..7906be63 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -230,7 +230,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, offset_pos_maxZ = offset_pos_maxZ[,Seq(1,k-1),drop=FALSE] offset_neg_maxZ = offset_neg_maxZ[,Seq(1,k-1),drop=FALSE] - scale_maxZ = offset_pos_maxZ[,Seq(1,k-1),drop=FALSE] + scale_maxZ = scale_maxZ[,Seq(1,k-1),drop=FALSE] Gamma_maxZ = Gamma_maxZ[Seq(1,zi),,drop=FALSE] # If we reached the maximum number of steps @@ -552,7 +552,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, ndraw=ndraw) truncated_noise = truncated_y %*% t(cur_adjusted_Xt) - sample_maxZ = apply(abs(1. / cur_scale * truncated_noise), 1, max) + sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) observed_maxZ = obj$realized_maxZ[j] From c6e959c5f8b7c22c35dd0148f3ecf192d9e346f2 Mon Sep 17 00:00:00 2001 From: tibs Date: Sat, 19 Mar 2016 23:30:56 -0700 Subject: [PATCH 120/396] rob added Rd file for fsInf_maxZ --- selectiveInference/R/funs.fs.R | 4 ++-- selectiveInference/R/funs.quadratic.R | 3 ++- tests/test.fs_maxZ.R | 30 +++++++++++++++++++++------ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 7906be63..a4cc45fc 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -568,11 +568,11 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, k=k, khat=khat, sigma=sigma, - call=this.call, vars=vars, sign=obj$sign, alpha=alpha, - realized_maxZ=obj$realized_maxZ) + realized_maxZ=obj$realized_maxZ, + call=this.call) class(out) = "fsInf_maxZ" return(out) } diff --git a/selectiveInference/R/funs.quadratic.R b/selectiveInference/R/funs.quadratic.R index 23ff63f5..799352b5 100644 --- a/selectiveInference/R/funs.quadratic.R +++ b/selectiveInference/R/funs.quadratic.R @@ -1,4 +1,3 @@ - truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { n <- nrow(obj$x) @@ -177,6 +176,7 @@ TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { # Numerically solve for roots of TF slice using # hybrid polyroot/uniroot approach + TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { x11 <- coeffs$x11 @@ -254,3 +254,4 @@ TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { if (I(0) < 0) stop("Infeasible constraint!") return(Intervals(c(-Inf,0))) } + diff --git a/tests/test.fs_maxZ.R b/tests/test.fs_maxZ.R index dc7e6e28..107948d7 100644 --- a/tests/test.fs_maxZ.R +++ b/tests/test.fs_maxZ.R @@ -2,22 +2,40 @@ library(selectiveInference) options(error=dump.frames) set.seed(0) -n = 100 -p = 120 +n = 20 +p = 5 s = 3 size = 5 sigma = 1.5 x = matrix(rnorm(n*p),n,p) -#x = scale(x,T,F)/sqrt(n-1) + b = c(sample(c(-1,1),s,replace=T)*rep(size,s),rep(0,p-s)) +b=rep(0,p) mu = x%*%b +nsim=200 +pv=matrix(NA,nsim,p) +for(ii in 1:nsim){ + cat(ii) y = mu + sigma*rnorm(n) -obj = fs(x,y,verb=T,intercept=T,norm=T, maxsteps=20) +obj = fs(x,y,verb=T,intercept=T,norm=T, maxsteps=p) # Sequential inference -out = fsInf_maxZ(obj,sigma=sigma,k=20, ndraw=5000, burnin=1000) -print(out) +out = fsInf_maxZ(obj,sigma=sigma, ndraw=5000, burnin=1000) +pv[ii,]=out$pv +} + + + apply(pv,2,quantile) + [,1] [,2] [,3] [,4] [,5] +0% 0 0.0000 0.0000 0.0000 0.0000 +25% 0 0.0000 0.0055 0.1215 0.2123 +50% 0 0.0000 0.1826 0.4028 0.4824 +75% 0 0.0875 0.5638 0.7002 0.7517 +100% 0 0.9860 0.9916 0.9996 0.9984 + + +plot((1:nsim)/nsim,sort(pv[,4])) From 1470e608f6cdd7e052198e04e66d910ca52d3ccb Mon Sep 17 00:00:00 2001 From: tibs Date: Sun, 20 Mar 2016 12:39:42 -0700 Subject: [PATCH 121/396] rob --- selectiveInference/R/funs.fs.R | 6 +++--- tests/test.fs_maxZ.R | 15 +++++---------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index a4cc45fc..de3ebf56 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -440,8 +440,8 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic # selected maxZ tests -fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, - ndraw=8000, burnin=2000) { +fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, + ndraw=8000, burnin=2000, verbose=FALSE) { this.call = match.call() @@ -474,7 +474,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, verbose=FALSE, k=NULL, vars = obj$action[1:k] zi = 0 for (j in 1:k) { - + if(verbose) cat(c("Step=",j),fill=T) # the inactive set here does not # include the variable at the j-th step # so, at j==1, the inactive set is every variable diff --git a/tests/test.fs_maxZ.R b/tests/test.fs_maxZ.R index 107948d7..11e89aa2 100644 --- a/tests/test.fs_maxZ.R +++ b/tests/test.fs_maxZ.R @@ -29,13 +29,8 @@ pv[ii,]=out$pv } - apply(pv,2,quantile) - [,1] [,2] [,3] [,4] [,5] -0% 0 0.0000 0.0000 0.0000 0.0000 -25% 0 0.0000 0.0055 0.1215 0.2123 -50% 0 0.0000 0.1826 0.4028 0.4824 -75% 0 0.0875 0.5638 0.7002 0.7517 -100% 0 0.9860 0.9916 0.9996 0.9984 - - -plot((1:nsim)/nsim,sort(pv[,4])) +par(mfrow=c(3,3)) +for(j in 1:p){ +plot((1:nsim)/nsim,sort(pv[,j])) +abline(0,1) +} From f9681b461b9ec6d1f3eb21b4929b564b510852f3 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 21 Mar 2016 11:01:55 -0700 Subject: [PATCH 122/396] sample a smaller dimensional gaussian when possible --- selectiveInference/R/funs.fs.R | 59 +++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index de3ebf56..7e6f2faf 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -533,27 +533,50 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, # IMPORTANT: after sampling Y_star, we have to add back cur_fitted - # if n > p, we could actually just draw cur_adjusted_Xt %*% Y_star + # if n >= p, we could actually just draw cur_adjusted_Xt %*% Y_star # because this has a simple box constraint # with a generically non-degenerate covariance - linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) - offset = c(final_upper, -final_lower) - covariance = diag(rep(sigma^2, length(y))) - mean = rep(0, length(y)) - initial_point = y - - truncated_y = sample_from_constraints(linear_part, - offset, - mean, - covariance, - initial_point, - burnin=burnin, - ndraw=ndraw) - - truncated_noise = truncated_y %*% t(cur_adjusted_Xt) - sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) - + if (nrow(cur_adjusted_Xt) > length(y)) { + linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) + offset = c(final_upper, -final_lower) + covariance = diag(rep(sigma^2, length(y))) + mean = rep(0, length(y)) + initial_point = y + + truncated_y = sample_from_constraints(linear_part, + offset, + mean, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + + truncated_noise = truncated_y %*% t(cur_adjusted_Xt) + sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) + } else if (nrow(cur_adjusted_Xt) > 1) { # sample from a smaller dimensional gaussian + linear_part = rbind(diag(rep(1, nrow(cur_adjusted_Xt))), + diag(rep(-1, nrow(cur_adjusted_Xt)))) + covariance = sigma^2 * (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) + offset = c(final_upper, -final_lower) + mean = rep(0, nrow(cur_adjusted_Xt)) + initial_point = cur_adjusted_Xt %*% y + + truncated_noise = sample_from_constraints(linear_part, + offset, + mean, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) + + } else { # problem is just a univariate gaussian + # this should work as long as cdfL - cdfU is not tiny + cdfL = pnorm(final_lower) + cdfU = pnorm(final_upper) + sample_maxZ = qnorm(runif(ndraw) * (cdfU - cdfL) + cdfL) + } observed_maxZ = obj$realized_maxZ[j] pval = sum(sample_maxZ > observed_maxZ) / ndraw From fdb26b14ab27f1725585d6fea8e2340bd609680c Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 21 Mar 2016 11:48:29 -0700 Subject: [PATCH 123/396] BF in univariate sampler -- needed abs value --- selectiveInference/R/funs.fs.R | 37 +++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 7e6f2faf..64031062 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -512,7 +512,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, cur_offset = as.numeric(t(cur_X) %*% cur_fitted) } else { - cur_fitted = 0 + cur_fitted = rep(0, length(y)) cur_offset = rep(0, length(inactive)) } @@ -541,12 +541,12 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) offset = c(final_upper, -final_lower) covariance = diag(rep(sigma^2, length(y))) - mean = rep(0, length(y)) + mean_param = cur_fitted # rep(0, length(y)) initial_point = y truncated_y = sample_from_constraints(linear_part, offset, - mean, + mean_param, covariance, initial_point, burnin=burnin, @@ -559,29 +559,42 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, diag(rep(-1, nrow(cur_adjusted_Xt)))) covariance = sigma^2 * (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) offset = c(final_upper, -final_lower) - mean = rep(0, nrow(cur_adjusted_Xt)) + mean_param = cur_adjusted_Xt %*% cur_fitted # rep(0, nrow(cur_adjusted_Xt)) initial_point = cur_adjusted_Xt %*% y truncated_noise = sample_from_constraints(linear_part, offset, - mean, + mean_param, covariance, initial_point, burnin=burnin, ndraw=ndraw) sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) - } else { # problem is just a univariate gaussian - # this should work as long as cdfL - cdfU is not tiny - cdfL = pnorm(final_lower) - cdfU = pnorm(final_upper) - sample_maxZ = qnorm(runif(ndraw) * (cdfU - cdfL) + cdfL) + } else { # just a univariated truncated Gaussian + # but we need the law of the absolute value of it + # we are sampling here, but could probably + # do this without sampling + mean_param = sum(as.numeric(cur_adjusted_Xt) * as.numeric(cur_fitted)) + scaling = sigma * sqrt(sum(cur_adjusted_Xt^2)) + L = (final_lower - mean_param) / scaling + U = (final_upper - mean_param) / scaling + if (L > 6) { # use Exp approximation + Z = rexp(ndraw) / L + L + } else if (U < -6) { + Z = rexp(ndraw) / U + U + } else { + Z = qnorm(runif(ndraw) * (pnorm(U) - pnorm(L)) + pnorm(L)) + } + print('delta P') + print(pnorm(U) - pnorm(L)) + sample_maxZ = abs(Z * scaling + mean_param) + } - observed_maxZ = obj$realized_maxZ[j] + observed_maxZ = obj$realized_maxZ[j] pval = sum(sample_maxZ > observed_maxZ) / ndraw pval = 2 * min(pval, 1 - pval) - pv = c(pv, pval) } From 2b22bbf965d631dad012daed0318295cf8e115d5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 21 Mar 2016 12:11:04 -0700 Subject: [PATCH 124/396] moved logic for 1D constraint sampling to constraints file, renamed mean to mean_param --- selectiveInference/R/funs.constraints.R | 102 ++++++++++++++---------- selectiveInference/R/funs.fs.R | 44 ++++------ 2 files changed, 77 insertions(+), 69 deletions(-) diff --git a/selectiveInference/R/funs.constraints.R b/selectiveInference/R/funs.constraints.R index e2bc0417..3bf6b77e 100644 --- a/selectiveInference/R/funs.constraints.R +++ b/selectiveInference/R/funs.constraints.R @@ -24,16 +24,16 @@ factor_covariance = function(S, rank=NA) { # whitening map # -# law is Z \sim N(mean, covariance) subject to constraints linear_part %*% Z \leq offset +# law is Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset -whiten_constraint = function(linear_part, offset, mean, covariance) { +whiten_constraint = function(linear_part, offset, mean_param, covariance) { factor_cov = factor_covariance(covariance) sqrt_cov = factor_cov$sqrt_cov sqrt_inv = factor_cov$sqrt_inv new_A = linear_part %*% sqrt_cov - new_b = offset - linear_part %*% mean + new_b = offset - linear_part %*% mean_param # rescale rows to have length 1 @@ -44,11 +44,13 @@ whiten_constraint = function(linear_part, offset, mean, covariance) { # TODO: check these functions will behave when Z is a matrix. inverse_map = function(Z) { - return(sqrt_cov %*% Z + mean) + # broadcasting here + # the columns of Z are same length as mean_param + return(sqrt_cov %*% Z + as.numeric(mean_param)) } forward_map = function(W) { - return(sqrt_inv %*% (W - mean)) + return(sqrt_inv %*% (W - mean_param)) } return(list(linear_part=new_A, @@ -60,11 +62,11 @@ whiten_constraint = function(linear_part, offset, mean, covariance) { # # sample from the law # -# Z \sim N(mean, covariance) subject to constraints linear_part %*% Z \leq offset +# Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset sample_from_constraints = function(linear_part, offset, - mean, + mean_param, covariance, initial_point, # point must be feasible for constraints ndraw=8000, @@ -74,7 +76,7 @@ sample_from_constraints = function(linear_part, whitened_con = whiten_constraint(linear_part, offset, - mean, + mean_param, covariance) white_initial = whitened_con$forward_map(initial_point) @@ -121,42 +123,60 @@ sample_from_constraints = function(linear_part, # In theory, these rows can be dropped rows_to_keep = white_offset < Inf - white_linear = white_linear[rows_to_keep,] - white_offset = white_offset[rows_to_keep,] + white_linear = white_linear[rows_to_keep,,drop=FALSE] + white_offset = white_offset[rows_to_keep] nstate = length(white_initial) if (sum(rows_to_keep) > 0) { - nconstraint = nrow(white_linear) - - directions = rbind(diag(rep(1, nstate)), - matrix(rnorm(nstate^2), nstate, nstate)) - - # normalize rows to have length 1 - - scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) - directions = directions / scaling - ndirection = nrow(directions) - - alphas = directions %*% t(white_linear) - U = white_linear %*% white_initial - white_offset - Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) - - result = .C("sample_truncnorm_white", - as.numeric(white_initial), - as.numeric(U), - as.numeric(t(directions)), - as.numeric(t(alphas)), - output=Z_sample, - as.integer(nconstraint), - as.integer(ndirection), - as.integer(nstate), - as.integer(burnin), - as.integer(ndraw), - package="selectiveInference") - Z_sample = result$output - } - else { - Z_sample = matrix(rnorm(nstate * ndraw), nstate, ndraw) + if (ncol(white_linear) > 1) { + nconstraint = nrow(white_linear) + + directions = rbind(diag(rep(1, nstate)), + matrix(rnorm(nstate^2), nstate, nstate)) + + # normalize rows to have length 1 + + scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) + directions = directions / scaling + ndirection = nrow(directions) + + alphas = directions %*% t(white_linear) + U = white_linear %*% white_initial - white_offset + Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) + + result = .C("sample_truncnorm_white", + as.numeric(white_initial), + as.numeric(U), + as.numeric(t(directions)), + as.numeric(t(alphas)), + output=Z_sample, + as.integer(nconstraint), + as.integer(ndirection), + as.integer(nstate), + as.integer(burnin), + as.integer(ndraw), + package="selectiveInference") + Z_sample = result$output + } else { # the distribution is univariate + # we can just work out upper and lower limits + + white_linear = as.numeric(white_linear) + pos = (white_linear * white_offset) >= 0 + neg = (white_linear * white_offset) <= 0 + if (sum(pos) > 0) { + U = min((white_offset / white_linear)[pos]) + } else { + U = Inf + } + if (sum(neg) < 0) { + L = max((white_offset / white_linear)[neg]) + } else { + L = -Inf + } + Z_sample = matrix(qnorm((pnorm(U) - pnorm(L)) * runif(ndraw) + pnorm(L)), 1, ndraw) + } + } else { + Z_sample = matrix(rnorm(nstate * ndraw), nstate, ndraw) } } diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 64031062..cb63da2c 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -554,14 +554,21 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, truncated_noise = truncated_y %*% t(cur_adjusted_Xt) sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) - } else if (nrow(cur_adjusted_Xt) > 1) { # sample from a smaller dimensional gaussian - linear_part = rbind(diag(rep(1, nrow(cur_adjusted_Xt))), - diag(rep(-1, nrow(cur_adjusted_Xt)))) - covariance = sigma^2 * (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) - offset = c(final_upper, -final_lower) - mean_param = cur_adjusted_Xt %*% cur_fitted # rep(0, nrow(cur_adjusted_Xt)) - initial_point = cur_adjusted_Xt %*% y - + } else { # sample from a smaller dimensional gaussian + if (nrow(cur_adjusted_Xt) > 1) { + linear_part = rbind(diag(rep(1, nrow(cur_adjusted_Xt))), + diag(rep(-1, nrow(cur_adjusted_Xt)))) + covariance = sigma^2 * (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) + offset = c(final_upper, -final_lower) + mean_param = cur_adjusted_Xt %*% cur_fitted # rep(0, nrow(cur_adjusted_Xt)) + initial_point = cur_adjusted_Xt %*% y + } else { + mean_param = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(cur_fitted))) + covariance = matrix(sigma^2 * sum(cur_adjusted_Xt^2)) + linear_part = matrix(c(1,-1), 2, 1) + offset = c(final_upper, -final_lower) + initial_point = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(y))) + } truncated_noise = sample_from_constraints(linear_part, offset, mean_param, @@ -571,26 +578,7 @@ fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, ndraw=ndraw) sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) - } else { # just a univariated truncated Gaussian - # but we need the law of the absolute value of it - # we are sampling here, but could probably - # do this without sampling - mean_param = sum(as.numeric(cur_adjusted_Xt) * as.numeric(cur_fitted)) - scaling = sigma * sqrt(sum(cur_adjusted_Xt^2)) - L = (final_lower - mean_param) / scaling - U = (final_upper - mean_param) / scaling - if (L > 6) { # use Exp approximation - Z = rexp(ndraw) / L + L - } else if (U < -6) { - Z = rexp(ndraw) / U + U - } else { - Z = qnorm(runif(ndraw) * (pnorm(U) - pnorm(L)) + pnorm(L)) - } - print('delta P') - print(pnorm(U) - pnorm(L)) - sample_maxZ = abs(Z * scaling + mean_param) - - } + } observed_maxZ = obj$realized_maxZ[j] pval = sum(sample_maxZ > observed_maxZ) / ndraw From f8b1ba89ed8a2307e5749184b48d1174a29b3903 Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 25 Mar 2016 09:49:38 -0700 Subject: [PATCH 125/396] rob added binom and Cox families to fixedLassoInf --- selectiveInference/DESCRIPTION | 3 +- selectiveInference/NAMESPACE | 4 + selectiveInference/R/funs.fixed.R | 25 +++- selectiveInference/R/funs.fixedCox.R | 111 ++++++++++++++++++ selectiveInference/R/funs.fixedLogit.R | 145 ++++++++++++++++++++++++ selectiveInference/R/funs.inf.R | 53 +++++++++ selectiveInference/man/fixedLassoInf.Rd | 64 ++++++++++- tests/test.fixed.R | 98 ++++++++++++++-- 8 files changed, 485 insertions(+), 18 deletions(-) create mode 100644 selectiveInference/R/funs.fixedCox.R create mode 100644 selectiveInference/R/funs.fixedLogit.R diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index 9e283ecc..af80d99a 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -9,7 +9,8 @@ Maintainer: Rob Tibshirani Depends: glmnet, intervals, - tmvtnorm + tmvtnorm, + survival Suggests: Rmpfr Description: New tools for post-selection inference, for use diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index cc7e76f3..4d283980 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -6,6 +6,8 @@ export(lar,fs, print.larInf,print.fsInf, plot.lar,plot.fs, fixedLassoInf,print.fixedLassoInf, + # fixedLogitLassoInf,print.fixedLogitLassoInf, + # fixedCoxLassoInf,print.fixedCoxLassoInf, forwardStop, estimateSigma, manyMeans,print.manyMeans, @@ -26,6 +28,8 @@ S3method("plot", "fs") S3method("print", "fsInf") S3method("print", "fsInf_maxZ") S3method("print", "fixedLassoInf") +S3method("print", "fixedLogitLassoInf") +S3method("print", "fixedCoxLassoInf") S3method("print", "manyMeans") S3method("print", "groupfs") S3method("print", "groupfsInf") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index ada155b7..8948a10f 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -2,12 +2,32 @@ # for the solution of # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 -fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha=0.1, +fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, +sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE) { - + + family = match.arg(family) this.call = match.call() type = match.arg(type) + + if(family=="binomial") { + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } +else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + +else{ + + + checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") @@ -121,6 +141,7 @@ fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha= class(out) = "fixedLassoInf" return(out) } +} ############################# diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R new file mode 100644 index 00000000..82eeb613 --- /dev/null +++ b/selectiveInference/R/funs.fixedCox.R @@ -0,0 +1,111 @@ +fixedCoxLassoInf=function(x,y,status,beta,lambda,alpha=.1, type=c("partial"),tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + + checkargs.xy(x,y) + if(is.null(status)) stop("Must supply `status' argument") +if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have values 0 or 1") + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n=nrow(x) + p=ncol(x) + nvar=sum(beta!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + + + m=beta!=0 +vars=which(m) +if(sum(m)>0){ + bhat=beta[beta!=0] #penalized coefs just for active variables +s2=sign(bhat) + + #check KKT + + aaa=coxph(Surv(y,status)~x[,m],init=bhat,iter.max=0) + res=residuals(aaa,type="score") +if(!is.matrix(res)) res=matrix(res,ncol=1) +scor=colSums(res) + g=(scor+lambda*s2)/(2*lambda) +# cat(c(g,lambda,tol.kkt),fill=T) + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + +MM=vcov(aaa) + + bbar=(bhat+lambda*MM%*%s2) + A1=-(mydiag(s2)) +b1= -(mydiag(s2)%*%MM)%*%s2*lambda + + temp=max(A1%*%bbar-b1) + + +# compute p-values + for(jj in 1:length(bbar)){ + vj=rep(0,length(bbar));vj[jj]=1 + + + junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) + + pv[jj] = junk$pv + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + + junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha) + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + + } + fit0=coxph(Surv(y,status)~x[,m]) + coef0=fit0$coef + se0=sqrt(diag(fit0$var)) + zscore0=coef0/se0 + + out = list(lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call) + class(out) = "fixedCoxLassoInf" +} +return(out) +} + + + +print.fixedCoxLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R new file mode 100644 index 00000000..ea476b1c --- /dev/null +++ b/selectiveInference/R/funs.fixedLogit.R @@ -0,0 +1,145 @@ + +fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + + type = match.arg(type) + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + + n=length(y) + p=ncol(x) + # I assume that intcpt was used + if(length(beta)!=p+1) stop("beta must be of length ncol(x)+1, that is, it should include an intercept") + nvar=sum(beta[-1]!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + +#do we need to worry about standardization? + +# obj = standardize(x,y,TRUE,FALSE) + # x = obj$x + # y = obj$y + + m=beta[-1]!=0 #active set + + bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars + s2=sign(bhat) + lam2m=diag(c(0,rep(lambda,sum(m)))) + + + xxm=cbind(1,x[,m]) + + etahat = xxm %*% bhat + prhat = as.vector(exp(etahat) / (1 + exp(etahat))) + ww=prhat*(1-prhat) + w=diag(ww) + +#check KKT + z=etahat+(y-prhat)/ww + g= t(x)%*%w%*%(z-etahat)/lambda + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[-1][vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + #constraints for active variables + MM=solve(t(xxm)%*%w%*%xxm) + + bbar=(bhat-lam2m%*%MM%*%s2) + + + A1=-(mydiag(s2))[-1,-1] + b1= ((mydiag(s2)%*%MM)%*%s2*lambda)[-1] + + tol.poly = 0.01 + if (max(A1 %*% bbar[-1] - b1) > tol.poly) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + + + for(jj in 1:sum(m)){ + vj=c(rep(0,sum(m)));vj[jj]=1 +# compute p-values + junk=mypoly.pval.lee(bbar[-1],A1,b1,vj,MM[-1,-1]) + pv[jj] = junk$pv + + + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + # junk2=mypoly.int.lee(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) + junk2=mypoly.int.lee(bbar[-1],vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + } + + fit0=glm(y~x[,m],family="binomial") + sfit0=summary(fit0) + coef0=fit0$coef[-1] + se0=sqrt(diag(sfit0$cov.scaled)[-1]) + zscore0=coef0/se0 + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call) + class(out) = "fixedLogitLassoInf" + return(out) + + } + + + +print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + + diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 6a487086..423b4c3e 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -244,3 +244,56 @@ aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { return(list(khat=khat,G=G,u=u,aic=aic,stopped=(i0])) + sd=sqrt(vv) + pv = tnorm.surv(temp,0,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup,sd=sd)) +} + + + +mypoly.int.lee= + function(y,eta,vlo,vup,sd, alpha, gridrange=c(-100,100),gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma + + temp = sum(eta*y) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(temp,x,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} + + + +mydiag=function(x){ + if(length(x)==1) out=x + if(length(x)>1) out=diag(x) + return(out) + } + diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index ac1b9a00..3c029f7c 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -9,7 +9,8 @@ Compute p-values and confidence intervals for the lasso estimate, at a fixed value of the tuning parameter lambda } \usage{ -fixedLassoInf(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha=0.1, +fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", + "cox"),intercept=TRUE, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE) } @@ -37,11 +38,16 @@ Estimated lasso coefficients (e.g., from glmnet). This is of length p \item{lambda}{ Value of lambda used to compute beta. See the above warning } + +\item{family}{Response type: "gaussian" (default), "binomial", or + "cox" (for censored survival data) } + \item{sigma}{ Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares fit when n >= 2p, and using the standard deviation of y when n < 2p. In the latter case, the user -should use \code{\link{estimateSigma}} function for a more accurate estimate +should use \code{\link{estimateSigma}} function for a more accurate estimate. +Not used for family= "binomial", or "cox" } \item{alpha}{ Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) @@ -181,4 +187,56 @@ out = fixedLassoInf(xs,y,beta,lambda,sigma=sigma) #rescale conf points to undo the penalty factor out$ci=t(scale(t(out$ci),FALSE,pf[out$vars])) out -} + +#logistic model +set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + y = x%*%beta + sigma*rnorm(n) + y=1*(y>mean(y)) + # first run glmnet + gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + lambda = .8 + beta = coef(gfit, s=lambda/n, exact=TRUE) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x,y,beta,lambda,family="binomial") + out + +#Cox model +set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + tim= tim-min(tim)+1 +status=sample(c(0,1),size=n,replace=T) + # first run glmnet + + + gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + + # extract coef for a given lambda; note the 1/n factor! + + lambda = 1.5 + beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x,tim,beta,lambda,status=status,family="cox") + out +} + \ No newline at end of file diff --git a/tests/test.fixed.R b/tests/test.fixed.R index e2c5931c..f1ce6cf0 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -1,21 +1,26 @@ library(selectiveInference) #library(selectiveInference,lib.loc="/Users/tibs/dropbox/git/R/mylib") +library(glmnet) +library(MASS) +library(scalreg) #options(error=dump.frames) #attach("/Users/tibs/dropbox/PAPERS/lasso/lasso3/.RData") ##### +#gaussian n=50 p=10 sigma=.7 beta=c(3,2,0,0,rep(0,p-4)) set.seed(43) -nsim = 100 +nsim = 200 pvals <- matrix(NA, nrow=nsim, ncol=p) x = matrix(rnorm(n*p),n,p) x = scale(x,T,T)/sqrt(n-1) mu = x%*%beta for (i in 1:nsim) { + cat(i) y=mu+sigma*rnorm(n) #y=y-mean(y) # first run glmnet @@ -30,11 +35,11 @@ pvals[i, which(beta != 0)] <- aa$pv nulls = which(!is.na(pvals[,1]) & !is.na(pvals[,2])) np = pvals[nulls,-(1:2)] mean(np[!is.na(np)] < 0.1) - +o=!is.na(np) +plot((1:sum(o))/sum(o),sort(np)) +abline(0,1) ##### -library(selectiveInference) -library(MASS) -library(scalreg) + S <- diag(10) n <- 100 @@ -55,7 +60,75 @@ for(i in 1:100){ p <- pval[, -(1:2)] mean(p[p < 1] < 0.05) -##### +##logistic + +n=50 +p=10 +beta=c(3,2,0,0,rep(0,p-4)) +beta=rep(0,p) +set.seed(3) +nsim = 200 +pvals=matrix(NA, nrow=nsim, ncol=p) +ci=array(NA,c(nsim,p,2)) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (ii in 1:nsim) { + cat(ii) +y=mu+rnorm(n) +y=1*(y>mean(y)) +# first run glmnet +gfit=glmnet(x,y,standardize=F,thresh=1e-8,family="binomial") +lambda = .25 +#extract coef for a given lambda; Note the 1/n factor! +beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) +# compute fixed lambda p-values and selection intervals + aa = fixedLassoInf(x,y,beta,lambda,family="binomial") + pvals[ii, which(beta[-1] != 0)] <- aa$pv + ci[ii,which(beta[-1] != 0),]=aa$ci +} + +o=!is.na(pvals) +plot((1:sum(o))/sum(o),sort(pvals)) +abline(0,1) +o=ci[,1,1]>0 | ci[,1,2]<0 +mean(o,na.rm=T) + + +## cox + +n=50 +p=10 +#beta=c(6,6,0,0,rep(0,p-4)) +beta=rep(0,p) +set.seed(3) +nsim = 200 +pvals=matrix(NA, nrow=nsim, ncol=p) +ci=array(NA,c(nsim,p,2)) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (ii in 1:nsim) { + cat(ii) +tim=as.vector(mu+rnorm(n))+10 +status=sample(c(0,1),size=n,replace=T) + lambda=0.2 + y=cbind(time=tim,status=status) + gfit=glmnet(x,y,family="cox",standardize=FALSE) + b=as.numeric(coef(gfit,s=lambda/n,exact=TRUE)) + + aa= fixedLassoInf(x,tim,b,lambda,status=status,family="cox") + +pvals[ii, which(b != 0)] <- aa$pv[1:sum(!is.na(aa$pv))] + ci[ii,which(b != 0),]=aa$ci +} + +o=!is.na(pvals) +plot((1:sum(o))/sum(o),sort(pvals)) +abline(0,1) + + +#####more Gaussian a=lar(x,y) aa=larInf(a) @@ -71,13 +144,14 @@ set.seed(3) n=50 p=10 sigma=2 +nsim=100 x=matrix(rnorm(n*p),n,p) #x=scale(x,T,T)/sqrt(n-1) #try with and without standardization beta=c(5,4,3,2,1,rep(0,p-5)) -nsim=100 + seeds=sample(1:9999,size=nsim) pv=rep(NA,nsim) ci=matrix(NA,nsim,2) @@ -94,12 +168,12 @@ for(ii in 1:nsim){ bhat = predict(gfit, s=lambda/n,type="coef",exact=F)[-1] junk= fixedLassoInf(x,y,bhat,lambda,sigma=sigma) - pv[ii]=junk$pv[1] - # oo=junk$pred # for old package - oo=junk$var # for new package - btrue[ii]=lsfit(x[,oo],mu)$coef[2] - ci[ii,]=junk$ci[1,] + pvals[ii, which(bhat != 0)] <- aa$pv[1:sum(!is.na(aa$pv))] + ci[ii,which(bhat != 0),]=aa$ci + } +o=!is.na(pvals) +plot((1:sum(o))/sum(o),sort(pvals)) sum(ci[,1]> btrue) sum(ci[,2]< btrue) From 450347222c8c1e501b870d8f49bc5d328a4a588b Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 25 Mar 2016 09:55:57 -0700 Subject: [PATCH 126/396] rob --- selectiveInference/man/fixedLassoInf.Rd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 3c029f7c..00b4c81e 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -54,7 +54,7 @@ Significance level for confidence intervals (target is miscoverage alpha/2 in ea } \item{intercept}{ Was the lasso problem solved (e.g., by glmnet) with an intercept in the model? -Default is TRUE +Default is TRUE. Must be TRUE for "binomial" family. Not used for 'cox" family, where no intercept is assumed. } \item{type}{Contrast type for p-values and confidence intervals: default is "partial"---meaning that the contrasts tested are the partial population @@ -89,6 +89,7 @@ Print out progress along the way? Default is FALSE} \details{ This function computes selective p-values and confidence intervals for the lasso, given a fixed value of the tuning parameter lambda. +Three different response types are supported: gaussian, binomial and Cox. The confidence interval construction involves numerical search and can be fragile: if the observed statistic is too close to either end of the truncation interval (vlo and vup, see references), then one or possibly both endpoints of the interval of @@ -123,7 +124,11 @@ is not allowed. \references{ Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). + Exact post-selection inference, with application to the lasso. arXiv:1311.6238. + Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + } \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} From 4f67765c0d1a7c2de310b621acbfeecdd96c5c80 Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 25 Mar 2016 11:24:13 -0700 Subject: [PATCH 127/396] rob --- selectiveInference/man/fixedLassoInf.Rd | 2 +- selectiveInference/man/selectiveInference.Rd | 53 +++++++++++++++++++- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 00b4c81e..31e47b42 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -124,8 +124,8 @@ is not allowed. \references{ Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). - Exact post-selection inference, with application to the lasso. arXiv:1311.6238. + Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. arXiv:1602.07358 diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index 7d2ca571..fde22193 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -20,7 +20,8 @@ This package provides tools for inference after selection, in forward stepwise regression, least angle regression, the lasso, and the many normal means problem. The functions compute p-values and selection intervals that properly account for the inherent selection carried out by the procedure. These have exact finite sample -type I error and coverage under Gaussian errors. +type I error and coverage under Gaussian errors. For the logistic and Cox familes (fixedLassoInf), + the coverage is asymptotically valid This R package was developed as part of the selective inference software project in Python and R: @@ -94,7 +95,7 @@ out.aic out.fix = fsInf(fsfit,type="all",k=5) out.fix -## NOT RUN---lasso at fixed lambda +## NOT RUN---lasso at fixed lambda- Gaussian family ## first run glmnet # gfit = glmnet(x,y) @@ -107,6 +108,54 @@ out.fix # out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) # out + +#lasso at fixed lambda- logistic family +#set.seed(43) + # n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + # +# beta = c(3,2,rep(0,p-2)) + # y = x%*%beta + sigma*rnorm(n) + # y=1*(y>mean(y)) + # first run glmnet + # gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + # lambda = .8 + # beta = coef(gfit, s=lambda/n, exact=TRUE) + + # # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,y,beta,lambda,family="binomial") + # out + +##lasso at fixed lambda- Cox family +#set.seed(43) +# n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + # x=scale(x,TRUE,TRUE) + + # beta = c(3,2,rep(0,p-2)) + # tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + # tim= tim-min(tim)+1 +#status=sample(c(0,1),size=n,replace=T) + # first run glmnet + # gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + # extract coef for a given lambda; note the 1/n factor! + + # lambda = 1.5 + # beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) + + # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,tim,beta,lambda,status=status,family="cox") + # out ## NOT RUN---many normal means # set.seed(12345) # n = 100 From 19f51a1efc5c8a52e74fd24cdd500bffa90a263d Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 25 Mar 2016 11:29:11 -0700 Subject: [PATCH 128/396] rob --- selectiveInference/man/selectiveInference.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index fde22193..851ee912 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -7,7 +7,7 @@ Tools for selective inference \description{ Functions to perform post-selection inference for forward stepwise regression, least angle regression, the lasso and the -many normal means problem +many normal means problem. The lasso function also supports logistic regression and the Cox model. } \details{ \tabular{ll}{ From f6e65a44de1eeac9cae5c8fc6330947e5928c8d9 Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 25 Mar 2016 11:30:49 -0700 Subject: [PATCH 129/396] rob --- selectiveInference/man/selectiveInference.Rd | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/selectiveInference/man/selectiveInference.Rd b/selectiveInference/man/selectiveInference.Rd index 851ee912..6e038d42 100644 --- a/selectiveInference/man/selectiveInference.Rd +++ b/selectiveInference/man/selectiveInference.Rd @@ -68,6 +68,11 @@ Exact post-selection inference, with application to the lasso. arXiv:1311.6238. Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). Post-selection point and interval estimation of signal sizes in Gaussian samples. arXiv:1405.3340. + + +Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + } \examples{ From 4e3d614b45023c4447c57ff43b00baaca161a8da Mon Sep 17 00:00:00 2001 From: tibs Date: Sun, 27 Mar 2016 21:26:22 -0700 Subject: [PATCH 130/396] rob --- selectiveInference/man/fsInf_maxZ.Rd | 86 ++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 selectiveInference/man/fsInf_maxZ.Rd diff --git a/selectiveInference/man/fsInf_maxZ.Rd b/selectiveInference/man/fsInf_maxZ.Rd new file mode 100644 index 00000000..89aa886e --- /dev/null +++ b/selectiveInference/man/fsInf_maxZ.Rd @@ -0,0 +1,86 @@ +\name{fsInf_maxZ} +\alias{fsInf_maxZ} +\title{ +Selective inference for forward stepwise regression +} +\description{ +Computes maxZ selective p-values and confidence intervals for forward +stepwise regression +} +\usage{ + +fsInf_maxZ(obj, sigma=NULL, alpha=0.1, k=NULL, ndraw=8000, burnin=2000,verbose=FALSE) + +} + +\arguments{ + \item{obj}{ +Object returned by \code{\link{fs}} function +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{k}{ +See "type" argument below. Default is NULL, in which case k is taken to be the +the number of steps computed in the forward stepwise path +} +\item{ndraw}{Number of Monte Carlo samples generated} +\item{burnin}{ +Number of samples discarded at the beginning of the chain +} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective maxZ p-values +for forward stepwise regression. These p-values are independent the under null, +so that stopping via the forwardStop rule yields guaranteed FDR control +} + +\value{ +\item{pv}{P-values for each model in the sequence} +\item{k}{Value of k specified in call} +\item{khat}{When type is "active", this is an estimated stopping point +declared by \code{\link{forwardStop}}} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{realized_maxZ}{Value of maxZ statistic computed at each step} +\item{call}{The call to fsInf_maxZ} +} + +\references{ +Will Fithian, Jonathan Taylor, Ryan Tibshirani, and Rob Tibshirani (2015). +Selective sequential model selection. arXiv:1512.02565.. + + +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{\code{\link{fs}}} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values a +# (sigma estimated from full model) +out.seq = fsInf_maxZ(fsfit) +out.seq +} From fa20cc9fd6d8521c01e5e1442d950f396d51ceab Mon Sep 17 00:00:00 2001 From: tibshirani Date: Mon, 28 Mar 2016 15:17:24 -0700 Subject: [PATCH 131/396] test of forking --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 1a50e5b0..41b7a4e5 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Maintainer: Rob Tibshirani New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. The package is available on [CRAN](http://cran.r-project.org/web/packages/selectiveInference/). See [this paper](http://www.pnas.org/content/112/25/7629.full) for a high level introduction to selective inference. +test + Code is in the directory selectiveInference/R. * funs.common.R: Basic functions used by many other functions, such as standardization. * funs.fixed.R: Inference for LASSO at a fixed, deterministic value of lambda. From 2843f21d7bf241456c1ad3bb796100f0c38113e9 Mon Sep 17 00:00:00 2001 From: tibshirani Date: Mon, 28 Mar 2016 15:21:24 -0700 Subject: [PATCH 132/396] undoing test --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 41b7a4e5..1a50e5b0 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,6 @@ Maintainer: Rob Tibshirani New tools for inference after selection, for use with forward stepwise regression, least angle regression, the lasso, and the many means problem. The package is available on [CRAN](http://cran.r-project.org/web/packages/selectiveInference/). See [this paper](http://www.pnas.org/content/112/25/7629.full) for a high level introduction to selective inference. -test - Code is in the directory selectiveInference/R. * funs.common.R: Basic functions used by many other functions, such as standardization. * funs.fixed.R: Inference for LASSO at a fixed, deterministic value of lambda. From 44a0627897f4f28a0542944c7d9c0503872b54d1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 6 Apr 2016 17:11:24 -0700 Subject: [PATCH 133/396] BF: conditioning on too much in forward stepwise, c(mathematical bug rather than code bug); also added a comment about one-sided pvalues --- selectiveInference/R/funs.fs.R | 3 ++- selectiveInference/man/fixedLassoInf.Rd | 2 +- selectiveInference/man/fsInf.Rd | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index cb63da2c..b75923d3 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -188,7 +188,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) working_x = t(sign_score*t(working_x)) - Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + #Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-r-1 Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 31e47b42..26b276fc 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -108,7 +108,7 @@ is not allowed. \value{ \item{type}{Type of coefficients tested (partial or full)} \item{lambda}{Value of tuning parameter lambda used} -\item{pv}{P-values for active variables} +\item{pv}{One-sided P-values for active variables, uses the fact we have conditioned on the sign.} \item{ci}{Confidence intervals} \item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} \item{vlo}{Lower truncation limits for statistics} diff --git a/selectiveInference/man/fsInf.Rd b/selectiveInference/man/fsInf.Rd index b86584d9..613bc5a8 100644 --- a/selectiveInference/man/fsInf.Rd +++ b/selectiveInference/man/fsInf.Rd @@ -82,7 +82,7 @@ to alpha/2, and can be used for error-checking purposes. \item{khat}{When type is "active", this is an estimated stopping point declared by \code{\link{forwardStop}}; when type is "aic", this is the value chosen by the modified AIC scheme} -\item{pv}{P-values for active variables} +\item{pv}{One sided P-values for active variables, uses the sign that a variable entered the model with.} \item{ci}{Confidence intervals} \item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} \item{vlo}{Lower truncation limits for statistics} From f6e9a442d6d12326767c37dc4ebc51782c09d5f8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 6 Apr 2016 23:23:06 -0700 Subject: [PATCH 134/396] removing tmvtnorm dependency in DESCRIPTION --- selectiveInference/DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index af80d99a..58232f11 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -9,7 +9,6 @@ Maintainer: Rob Tibshirani Depends: glmnet, intervals, - tmvtnorm, survival Suggests: Rmpfr From 235dfc988cd2ed964cdfa7fc8670a89cedefbea9 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 7 Apr 2016 00:41:57 -0700 Subject: [PATCH 135/396] a few questions about logit and cox, edited examples for logit and cox --- selectiveInference/R/funs.fixedCox.R | 6 ++++++ selectiveInference/R/funs.fixedLogit.R | 1 + selectiveInference/man/fixedLassoInf.Rd | 8 ++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index 82eeb613..f0419b86 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -50,6 +50,11 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda # compute p-values + +# JT: are we sure the signs of these are correctly handled? +# two sided p-values numerically agree with python but +# the one sided p-values are a bit off + for(jj in 1:length(bbar)){ vj=rep(0,length(bbar));vj[jj]=1 @@ -66,6 +71,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda tailarea[jj,] = junk2$tailarea } + # JT: these don't seem to be the real one-step estimators fit0=coxph(Surv(y,status)~x[,m]) coef0=fit0$coef se0=sqrt(diag(fit0$var)) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index ea476b1c..e1510abc 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -98,6 +98,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet tailarea[jj,] = junk2$tailarea } + # JT: these are not the one step estimators but they are close fit0=glm(y~x[,m],family="binomial") sfit0=summary(fit0) coef0=fit0$coef[-1] diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 26b276fc..ab8c71ec 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -211,10 +211,10 @@ set.seed(43) # extract coef for a given lambda; note the 1/n factor! # (and here we DO include the intercept term) lambda = .8 - beta = coef(gfit, s=lambda/n, exact=TRUE) + beta_hat = coef(gfit, s=lambda/n, exact=TRUE) # compute fixed lambda p-values and selection intervals - out = fixedLassoInf(x,y,beta,lambda,family="binomial") + out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial") out #Cox model @@ -238,10 +238,10 @@ status=sample(c(0,1),size=n,replace=T) # extract coef for a given lambda; note the 1/n factor! lambda = 1.5 - beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) + beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) # compute fixed lambda p-values and selection intervals - out = fixedLassoInf(x,tim,beta,lambda,status=status,family="cox") + out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox") out } \ No newline at end of file From 5bb52cd6d51ebfb23d94ce1c9d6a35b8c002a3e3 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 7 Apr 2016 00:42:52 -0700 Subject: [PATCH 136/396] another renaming of beta to beta_hat in examples --- selectiveInference/man/fixedLassoInf.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index ab8c71ec..43cf2c3a 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -184,10 +184,10 @@ gfit = glmnet(xs,y,standardize=FALSE) # extract coef for a given lambda; note the 1/n factor! # (and we don't save the intercept term) lambda = .8 -beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +beta_hat = coef(gfit, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals -out = fixedLassoInf(xs,y,beta,lambda,sigma=sigma) +out = fixedLassoInf(xs,y,beta_hat,lambda,sigma=sigma) #rescale conf points to undo the penalty factor out$ci=t(scale(t(out$ci),FALSE,pf[out$vars])) From 3ad8c5f974cd4705cc0843bceaf027dd7bb146c5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 7 Apr 2016 22:35:38 -0700 Subject: [PATCH 137/396] adding coef0 and sd to output for use in summary --- selectiveInference/R/funs.fixed.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 8948a10f..4263c0b2 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -126,7 +126,7 @@ else{ pv[j] = a$pv vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) - vmat[j,] = vj * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange, flip=(sign[j]==-1),bits=bits) @@ -137,6 +137,8 @@ else{ out = list(type=type,lambda=lambda,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, call=this.call) class(out) = "fixedLassoInf" return(out) @@ -197,8 +199,8 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) cat("",fill=T) tab = cbind(x$vars, - round(x$sign*x$vmat%*%x$y,3), - round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$coef0,3), + round(x$coef0 / x$sd,3), round(x$pv,3),round(x$ci,3)) colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") if (tailarea) { From 89034879ed92a4729414e31b6cda20fa3c2f2dd4 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 8 Apr 2016 11:21:38 -0700 Subject: [PATCH 138/396] BF: fixed a bug in onestep estimator, reporting onestep estimator and its estimated covariance. everything except p-values match the python code now --- selectiveInference/R/funs.fixedLogit.R | 37 ++++++++++++++------------ 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index e1510abc..08a2dc5a 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -45,7 +45,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet #check KKT z=etahat+(y-prhat)/ww - g= t(x)%*%w%*%(z-etahat)/lambda + g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) @@ -63,15 +63,18 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet #constraints for active variables MM=solve(t(xxm)%*%w%*%xxm) - - bbar=(bhat-lam2m%*%MM%*%s2) - + gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized + # at exact LASSO solution it should be s2[-1] + dbeta = MM %*% gm + + # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... + bbar = bhat - dbeta - A1=-(mydiag(s2))[-1,-1] - b1= ((mydiag(s2)%*%MM)%*%s2*lambda)[-1] - - tol.poly = 0.01 - if (max(A1 %*% bbar[-1] - b1) > tol.poly) + A1=-(mydiag(s2))[-1,] + b1= (s2 * dbeta)[-1] + + tol.poly = 0.01 + if (max((A1 %*% bbar)[-1] - b1) > tol.poly) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", "and check whether the specified value of lambda is too small", @@ -82,17 +85,16 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet for(jj in 1:sum(m)){ - vj=c(rep(0,sum(m)));vj[jj]=1 -# compute p-values - junk=mypoly.pval.lee(bbar[-1],A1,b1,vj,MM[-1,-1]) + vj=c(rep(0,sum(m)+1));vj[jj+1]=1 + # compute p-values + junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) pv[jj] = junk$pv - vlo[jj]=junk$vlo vup[jj]=junk$vup sd[jj]=junk$sd # junk2=mypoly.int.lee(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) - junk2=mypoly.int.lee(bbar[-1],vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea @@ -101,14 +103,15 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet # JT: these are not the one step estimators but they are close fit0=glm(y~x[,m],family="binomial") sfit0=summary(fit0) - coef0=fit0$coef[-1] - se0=sqrt(diag(sfit0$cov.scaled)[-1]) + coef0=bbar[-1] #fit0$coef[-1] + se0=sqrt(diag(MM)[-1]) # sfit0$cov.scaled)[-1]) zscore0=coef0/se0 out = list(type=type,lambda=lambda,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, - call=this.call) + call=this.call, + info.matrix=MM) # info.matrix is output just for debugging purposes at the moment class(out) = "fixedLogitLassoInf" return(out) From c9d184b42f6f6477b69338c281906959c1060407 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 8 Apr 2016 22:08:02 -0700 Subject: [PATCH 139/396] need to use sign in poly pval for correct one-sided pvalues --- selectiveInference/R/funs.fixedCox.R | 2 +- selectiveInference/R/funs.fixedLogit.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index f0419b86..c23b5a35 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -56,7 +56,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda # the one sided p-values are a bit off for(jj in 1:length(bbar)){ - vj=rep(0,length(bbar));vj[jj]=1 + vj=rep(0,length(bbar));vj[jj]=s2[jj] junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 08a2dc5a..a16c41dd 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -85,7 +85,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet for(jj in 1:sum(m)){ - vj=c(rep(0,sum(m)+1));vj[jj+1]=1 + vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] # compute p-values junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) pv[jj] = junk$pv From 79a55636c7331a8c4be978984f614112bc419b41 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Sat, 21 May 2016 15:43:52 -0700 Subject: [PATCH 140/396] old datasplit simulation --- forLater/josh/sim.datasplit.R | 41 ++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/forLater/josh/sim.datasplit.R b/forLater/josh/sim.datasplit.R index be077a1d..0e19335d 100644 --- a/forLater/josh/sim.datasplit.R +++ b/forLater/josh/sim.datasplit.R @@ -4,30 +4,31 @@ source("../../selectiveInference/R/funs.groupfs.R") source("../../selectiveInference/R/funs.quadratic.R") source("../../selectiveInference/R/funs.common.R") -set.seed(1) -niters <- 400 +set.seed(19) +niters <- 500 known <- FALSE -n <- 100 +n <- 50 p <- 100 -maxsteps <- 10 +maxsteps <- 8 sparsity <- 5 -snr <- 1 +snr <- 2 rho <- 0.1 -ratio <- 0.7 -ratio2 <- 0.85 +ratio <- 0.6 +ratio2 <- 0.8 train <- 1:(ratio*n) test <- setdiff(1:n, train) train2 <- 1:(ratio2*n) -test <- setdiff(1:n, train2) +test2 <- setdiff(1:n, train2) index <- 1:p -instance <- function(n, p, sparsity, snr, maxsteps, rho) { - - x <- matrix(rnorm(n*p), nrow=n) +x <- matrix(rnorm(n*p), nrow=n) if (rho != 0) { z <- matrix(rep(t(rnorm(n)), p), nrow = n) x <- sqrt(1-rho)*x + sqrt(rho)*z } + +instance <- function(n, p, sparsity, snr, maxsteps, rho) { + y <- rnorm(n) if (sparsity > 0) { @@ -47,11 +48,11 @@ instance <- function(n, p, sparsity, snr, maxsteps, rho) { xte2 <- x[test2, ] if (known) { - trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) - fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = 2*log(p)) + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = log(length(train))) + fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, sigma=1, aicstop=1, k = log(length(train2))) } else { - trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = log(length(train))) - fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, aicstop=1, k = log(length(train2))) + trfit <- groupfs(xtr, ytr, index, maxsteps=maxsteps, aicstop=1, k = 2*log(p)) + fit <- groupfs(xtr2, ytr2, index, maxsteps=maxsteps, aicstop=1, k = 2*log(p)) } trcols <- which(1:p %in% trfit$action) @@ -59,11 +60,12 @@ instance <- function(n, p, sparsity, snr, maxsteps, rho) { tepv <- summary(lm(yte~xte[, trcols]-1))$coefficients[,4] tepv2 <- summary(lm(yte2~xte2[, tr2cols]-1))$coefficients[,4] names(tepv) <- as.character(sort(trfit$action)) - names(tepv2) <- as.character(sort(trfit$action)) + names(tepv2) <- as.character(sort(fit$action)) pv <- groupfsInf(fit) trpv <- groupfsInf(trfit) return(list(vars = fit$action, pvals = pv$pv, splitvars = sort(trfit$action), splitpvals = tepv, + splitvars2 = sort(fit$action), splitpvals2 = tepv2, trpvals = trpv$pv)) } @@ -75,9 +77,12 @@ vars <- do.call(c, list(output[1,])) pvals <- do.call(c, list(output[2,])) splitvars <- do.call(c, list(output[3,])) splitpvals <- do.call(c, list(output[4,])) -trpvals <- do.call(c, list(output[5,])) +splitvars2 <- do.call(c, list(output[5,])) +splitpvals2 <- do.call(c, list(output[6,])) +trpvals <- do.call(c, list(output[7,])) -save(vars, pvals, splitvars, splitpvals, trpvals, +save(vars, pvals, splitvars, splitpvals, + splitvars2, splitpvals2, trpvals, file = paste0("results/datasplit", "_", ifelse(known, "TC", "TF"), "_n", n, From ad2bdb1c199c298658d49d0210264c8e18fc3b94 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 8 Jun 2016 11:30:51 -0700 Subject: [PATCH 141/396] selectiveInference/R/funs.fixedCox.R --- selectiveInference/R/funs.fixedCox.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index c23b5a35..17c2603d 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -29,7 +29,9 @@ s2=sign(bhat) #check KKT - aaa=coxph(Surv(y,status)~x[,m],init=bhat,iter.max=0) + aaa=coxph(Surv(y,status)~x[,m],init=bhat,iter.max=0) # this gives the Cox model at exactly bhat + # so when we compute gradient and score + # we are evaluating at the LASSO solution res=residuals(aaa,type="score") if(!is.matrix(res)) res=matrix(res,ncol=1) scor=colSums(res) @@ -39,11 +41,11 @@ scor=colSums(res) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) - +# Hessian of partial likelihood at the LASSO solution MM=vcov(aaa) - bbar=(bhat+lambda*MM%*%s2) - A1=-(mydiag(s2)) +bbar=(bhat+lambda*MM%*%s2) +A1=-(mydiag(s2)) b1= -(mydiag(s2)%*%MM)%*%s2*lambda temp=max(A1%*%bbar-b1) From 1a777033bdac83eb7eb474853d693000460d5f0d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 8 Jun 2016 11:31:41 -0700 Subject: [PATCH 142/396] added comments to Cox --- selectiveInference/R/funs.fixedCox.R | 1 + 1 file changed, 1 insertion(+) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index 17c2603d..43bd080f 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -32,6 +32,7 @@ s2=sign(bhat) aaa=coxph(Surv(y,status)~x[,m],init=bhat,iter.max=0) # this gives the Cox model at exactly bhat # so when we compute gradient and score # we are evaluating at the LASSO solution + # naming of variables could be improved... res=residuals(aaa,type="score") if(!is.matrix(res)) res=matrix(res,ncol=1) scor=colSums(res) From 05347b62d9a688930a19e0187205f7fe86915df2 Mon Sep 17 00:00:00 2001 From: tibshirani Date: Wed, 8 Jun 2016 16:38:11 -0700 Subject: [PATCH 143/396] rob added logistic+cox features --- selectiveInference/NAMESPACE | 13 +- selectiveInference/R/funs.constraints.R | 186 --------------- selectiveInference/R/funs.fixedCox.R | 2 +- selectiveInference/R/funs.fixedLogit.R | 2 +- selectiveInference/R/funs.fs.R | 286 +----------------------- selectiveInference/man/fixedLassoInf.Rd | 11 +- selectiveInference/man/fsInf_maxZ.Rd | 86 ------- selectiveInference/src/symbols.rds | Bin 274 -> 367 bytes 8 files changed, 28 insertions(+), 558 deletions(-) delete mode 100644 selectiveInference/R/funs.constraints.R delete mode 100644 selectiveInference/man/fsInf_maxZ.Rd diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 4d283980..4a764f92 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -1,19 +1,18 @@ export(lar,fs, - larInf,fsInf,fsInf_maxZ, + larInf,fsInf, coef.lar,coef.fs, predict.lar,predict.fs, print.lar,print.fs, print.larInf,print.fsInf, plot.lar,plot.fs, fixedLassoInf,print.fixedLassoInf, - # fixedLogitLassoInf,print.fixedLogitLassoInf, - # fixedCoxLassoInf,print.fixedCoxLassoInf, +# fixedLogitLassoInf,print.fixedLogitLassoInf, +# fixedCoxLassoInf,print.fixedCoxLassoInf, forwardStop, estimateSigma, manyMeans,print.manyMeans, groupfs,groupfsInf, - scaleGroups,factorDesign, - sample_from_constraints + scaleGroups,factorDesign ) S3method("coef", "lar") @@ -26,7 +25,6 @@ S3method("predict", "fs") S3method("print", "fs") S3method("plot", "fs") S3method("print", "fsInf") -S3method("print", "fsInf_maxZ") S3method("print", "fixedLassoInf") S3method("print", "fixedLogitLassoInf") S3method("print", "fixedCoxLassoInf") @@ -37,8 +35,9 @@ S3method("print", "groupfsInf") useDynLib("selectiveInference") import(glmnet) import(intervals) +import(survival) importFrom("graphics", abline, axis, matplot) -importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, +importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) importFrom("stats", "coef", "df", "lm", "pf") diff --git a/selectiveInference/R/funs.constraints.R b/selectiveInference/R/funs.constraints.R deleted file mode 100644 index 3bf6b77e..00000000 --- a/selectiveInference/R/funs.constraints.R +++ /dev/null @@ -1,186 +0,0 @@ -# -# Some utilities for affine constraints -# - -# -# compute the square-root and inverse square-root of a non-negative -# definite matrix -# - -factor_covariance = function(S, rank=NA) { - if (is.na(rank)) { - rank = nrow(S) - } - svd_X = svd(S, nu=rank, nv=rank) - sqrt_cov = t(sqrt(svd_X$d[1:rank]) * t(svd_X$u[,1:rank])) - sqrt_inv = t((1. / sqrt(svd_X$d[1:rank])) * t(svd_X$u[,1:rank])) - - return(list(sqrt_cov=sqrt_cov, sqrt_inv=sqrt_inv)) -} - -# -# from a constraint, return an equivalent -# constraint and a whitening and inverse -# whitening map -# - -# law is Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset - -whiten_constraint = function(linear_part, offset, mean_param, covariance) { - - factor_cov = factor_covariance(covariance) - sqrt_cov = factor_cov$sqrt_cov - sqrt_inv = factor_cov$sqrt_inv - - new_A = linear_part %*% sqrt_cov - new_b = offset - linear_part %*% mean_param - - # rescale rows to have length 1 - - scaling = sqrt(apply(new_A^2, 1, sum)) - new_A = new_A / scaling - new_b = new_b / scaling - - # TODO: check these functions will behave when Z is a matrix. - - inverse_map = function(Z) { - # broadcasting here - # the columns of Z are same length as mean_param - return(sqrt_cov %*% Z + as.numeric(mean_param)) - } - - forward_map = function(W) { - return(sqrt_inv %*% (W - mean_param)) - } - - return(list(linear_part=new_A, - offset=new_b, - inverse_map=inverse_map, - forward_map=forward_map)) -} - -# -# sample from the law -# -# Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset - -sample_from_constraints = function(linear_part, - offset, - mean_param, - covariance, - initial_point, # point must be feasible for constraints - ndraw=8000, - burnin=2000, - accept_reject_params=NA) #TODO: implement accept reject check -{ - - whitened_con = whiten_constraint(linear_part, - offset, - mean_param, - covariance) - white_initial = whitened_con$forward_map(initial_point) - -# # try 100 draws of accept reject -# # if we get more than 50 good draws, then just return a smaller sample -# # of size (burnin+ndraw)/5 - -# if accept_reject_params: -# use_hit_and_run = False -# num_trial, min_accept, num_draw = accept_reject_params - -# def _accept_reject(sample_size, linear_part, offset): -# Z_sample = np.random.standard_normal((100, linear_part.shape[1])) -# constraint_satisfied = (np.dot(Z_sample, linear_part.T) - -# offset[None,:]).max(1) < 0 -# return Z_sample[constraint_satisfied] - -# Z_sample = _accept_reject(100, -# white_con.linear_part, -# white_con.offset) - -# if Z_sample.shape[0] >= min_accept: -# while True: -# Z_sample = np.vstack([Z_sample, -# _accept_reject(num_draw / 5, -# white_con.linear_part, -# white_con.offset)]) -# if Z_sample.shape[0] > num_draw: -# break -# white_samples = Z_sample -# else: -# use_hit_and_run = True -# else: -# use_hit_and_run = True - - use_hit_and_run = TRUE - - if (use_hit_and_run) { - - white_linear = whitened_con$linear_part - white_offset = whitened_con$offset - - # Inf cannot be used in C code - # In theory, these rows can be dropped - - rows_to_keep = white_offset < Inf - white_linear = white_linear[rows_to_keep,,drop=FALSE] - white_offset = white_offset[rows_to_keep] - - nstate = length(white_initial) - if (sum(rows_to_keep) > 0) { - if (ncol(white_linear) > 1) { - nconstraint = nrow(white_linear) - - directions = rbind(diag(rep(1, nstate)), - matrix(rnorm(nstate^2), nstate, nstate)) - - # normalize rows to have length 1 - - scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) - directions = directions / scaling - ndirection = nrow(directions) - - alphas = directions %*% t(white_linear) - U = white_linear %*% white_initial - white_offset - Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) - - result = .C("sample_truncnorm_white", - as.numeric(white_initial), - as.numeric(U), - as.numeric(t(directions)), - as.numeric(t(alphas)), - output=Z_sample, - as.integer(nconstraint), - as.integer(ndirection), - as.integer(nstate), - as.integer(burnin), - as.integer(ndraw), - package="selectiveInference") - Z_sample = result$output - } else { # the distribution is univariate - # we can just work out upper and lower limits - - white_linear = as.numeric(white_linear) - pos = (white_linear * white_offset) >= 0 - neg = (white_linear * white_offset) <= 0 - if (sum(pos) > 0) { - U = min((white_offset / white_linear)[pos]) - } else { - U = Inf - } - if (sum(neg) < 0) { - L = max((white_offset / white_linear)[neg]) - } else { - L = -Inf - } - Z_sample = matrix(qnorm((pnorm(U) - pnorm(L)) * runif(ndraw) + pnorm(L)), 1, ndraw) - } - } else { - Z_sample = matrix(rnorm(nstate * ndraw), nstate, ndraw) - } - } - - Z = t(whitened_con$inverse_map(Z_sample)) - return(Z) -} - diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index c23b5a35..d32a7775 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -25,7 +25,7 @@ if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have valu vars=which(m) if(sum(m)>0){ bhat=beta[beta!=0] #penalized coefs just for active variables -s2=sign(bhat) + s2=sign(bhat) #check KKT diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index a16c41dd..fb4092d7 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -74,7 +74,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet b1= (s2 * dbeta)[-1] tol.poly = 0.01 - if (max((A1 %*% bbar)[-1] - b1) > tol.poly) + if (max((A1 %*% bbar) - b1) > tol.poly) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", "and check whether the specified value of lambda is too small", diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index b75923d3..b5ee511b 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -27,11 +27,10 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, ##### # Find the first variable to enter and its sign - working_scale = sqrt(colSums(x^2)) - working_x = scale(x,center=F,scale=working_scale) - working_score = t(working_x)%*%y - i_hit = which.max(abs(working_score)) # Hitting coordinate - sign_hit = Sign(working_score[i_hit]) # Sign + working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) + score = t(working_x)%*%y + i_hit = which.max(abs(score)) # Hitting coordinate + sign_hit = Sign(score[i_hit]) # Sign signs = sign_hit # later signs will be appended to `signs` if (verbose) { @@ -49,43 +48,19 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, df = numeric(buf) # Degrees of freedom beta = matrix(0,p,buf) # FS estimates - # Buffered objects for selective maxZ test - - offset_pos_maxZ = matrix(Inf, p, buf) # upper bounds for selective maxZ - offset_neg_maxZ = matrix(Inf, p, buf) # lower bounds for selective maxZ - scale_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ - realized_maxZ = numeric(buf) # lower bounds for selective maxZ - action[1] = i_hit df[1] = 0 beta[,1] = 0 - ##### - # Variables needed to compute truncation limits for - # selective maxZ test - - realized_maxZ[1] = c(sign_hit * working_score[i_hit]) - offset_pos_maxZ[,1] = Inf - offset_neg_maxZ[,1] = Inf - scale_maxZ[,1] = working_scale - working_resid_maxZ = y - x %*% beta[,1] - # Gamma matrix! gbuf = max(2*p*3,2000) # Space for 3 steps, at least gi = 0 # index into rows of Gamma matrix - zi = 0 # index into rows of Gamma_maxZ matrix Gamma = matrix(0,gbuf,n) Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 - # Gamma_maxZ is the rbind - # of residualized X_inactive's - - Gamma_maxZ = matrix(0,gbuf,n) - Gamma_maxZ[zi+Seq(1,p),] = t(x); zi = zi+p - # nconstraint nconstraint = numeric(buf) vreg = matrix(0,buf,n) @@ -128,45 +103,23 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, beta = cbind(beta,matrix(0,p,buf)) nconstraint = c(nconstraint,numeric(buf)) vreg = rbind(vreg,matrix(0,buf,n)) - - offset_pos_maxZ = cbind(offset_pos_maxZ, matrix(0, p, buf)) - offset_neg_maxZ = cbind(offset_neg_maxZ, matrix(0, p, buf)) - scale_maxZ = cbind(scale_maxZ, matrix(0, p, buf)) - realized_maxZ = c(realized_maxZ, numeric(buf)) } # Key quantities for the next entry - keepLs=backsolve(R,t(Q_active)%*%X_inactive) - - prev_scale = working_scale[-i_hit] # this variable used later for maxZ X_inactive_resid = X_inactive - X_active %*% keepLs - working_scale = sqrt(colSums(X_inactive_resid^2)) # this variable used later for maxZ - working_x = scale(X_inactive_resid,center=F,scale=working_scale) - working_score = as.numeric(t(working_x)%*%y) + working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) + score = as.numeric(t(working_x)%*%y) - beta_cur = backsolve(R,t(Q_active)%*%y) # must be computed before the break - # so we have it if we have - # completed the path - # If the inactive set is empty, nothing will hit if (r==min(n-intercept,p)) break # Otherwise find the next hitting time else { - sign_score = Sign(working_score) - abs_score = sign_score * working_score + sign_score = Sign(score) + abs_score = sign_score * score i_hit = which.max(abs_score) sign_hit = sign_score[i_hit] - # keep track of necessary quantities for selective maxZ - - offset_shift = t(X_inactive) %*% (y - working_resid_maxZ) - realized_Z_scaled = realized_maxZ[k-1] * prev_scale - offset_pos_maxZ[I,k] = realized_Z_scaled + offset_shift - offset_neg_maxZ[I,k] = realized_Z_scaled - offset_shift - scale_maxZ[I,k] = working_scale - - working_resid_maxZ = y - X_active %*% beta_cur } # Record the solution @@ -174,22 +127,12 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, action[k] = I[i_hit] df[k] = r - beta[A,k] = beta_cur + beta[A,k] = backsolve(R,t(Q_active)%*%y) - # store the X_inactive_resid in Gamma_maxZ - - if (gi + p-r > nrow(Gamma_maxZ)) Gamma_maxZ = rbind(Gamma_maxZ,matrix(0,p-r,n)) - Gamma_maxZ[zi+Seq(1,p-r),] = t(X_inactive_resid); zi = zi+p-r - - # update maxZ variable - realized_maxZ[k] = sign_hit * working_score[i_hit] - # Gamma matrix! - if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) working_x = t(sign_score*t(working_x)) - #Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r - Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 @@ -229,11 +172,6 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, nconstraint = nconstraint[Seq(1,k-1)] vreg = vreg[Seq(1,k-1),,drop=FALSE] - offset_pos_maxZ = offset_pos_maxZ[,Seq(1,k-1),drop=FALSE] - offset_neg_maxZ = offset_neg_maxZ[,Seq(1,k-1),drop=FALSE] - scale_maxZ = scale_maxZ[,Seq(1,k-1),drop=FALSE] - Gamma_maxZ = Gamma_maxZ[Seq(1,zi),,drop=FALSE] - # If we reached the maximum number of steps if (k>maxsteps) { if (verbose) { @@ -251,8 +189,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the least squares solution. Note that # we have already computed this bls = rep(0,p) - if(length(keepLs)>0) bls[A] = keepLs - + if(length(keepLs)>0) bls[A] = keepLs } if (verbose) cat("\n") @@ -268,9 +205,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, out = list(action=action,sign=signs,df=df,beta=beta, completepath=completepath,bls=bls, Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, - intercept=intercept,normalize=normalize,call=this.call, - offset_pos_maxZ=offset_pos_maxZ,offset_neg_maxZ=offset_neg_maxZ, - scale_maxZ=scale_maxZ,Gamma_maxZ=Gamma_maxZ,realized_maxZ=realized_maxZ) + intercept=intercept,normalize=normalize,call=this.call) class(out) = "fs" return(out) } @@ -437,175 +372,7 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic ############################## -############################## - -# selected maxZ tests - -fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, - ndraw=8000, burnin=2000, verbose=FALSE) { - - this.call = match.call() - - checkargs.misc(sigma=sigma,alpha=alpha,k=k) - - if (class(obj) != "fs") stop("obj must be an object of class fs") - - k = min(k,length(obj$action)) # Round to last step - x = obj$x - y = obj$y - p = ncol(x) - n = nrow(x) - pv = c() - - if (is.null(sigma)) { - # TODO we need a sampler on a unit sphere - if (n >= 2*p) { - oo = obj$intercept - sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) - } - else { - sigma = sd(y) - warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), - "you may want to use the estimateSigma function")) - } - } - - khat = NULL - - vars = obj$action[1:k] - zi = 0 - for (j in 1:k) { - if(verbose) cat(c("Step=",j),fill=T) - # the inactive set here does not - # include the variable at the j-th step - # so, at j==1, the inactive set is every variable - # at j==2, the inactive set is everything but the first one - - if (j > 1) { - active = vars[1:(j-1)] - inactive = (1:p)[-active] - } else { - inactive = 1:p - } - - collapsed_pos = apply(obj$offset_pos_maxZ[inactive,1:j,drop=FALSE], 1, min) - collapsed_neg = apply(obj$offset_neg_maxZ[inactive,1:j,drop=FALSE], 1, min) - cur_scale = obj$scale_maxZ[,j][inactive] - - # the matrix cur_adjusted_Xt is used to compute (always as length(y) columns) - # the maxZ or maxT for the sampled variables - # - cur_adjusted_Xt = obj$Gamma_maxZ[zi + Seq(1,p-j+1),,drop=FALSE]; zi = zi+p-j+1 # Xt for transpose - - # cur_X is used to enforce conditioning on - # the ever_active sufficient_statistics - - cur_X = obj$x[,inactive,drop=FALSE] - - # now we condition on solution up to now - # this is equivalent to finding vector of - # fitted values up to now and appropriately - # adjusting the box limits - - if (j > 1) { - cur_fitted = predict(obj, s=j) - cur_fitted = cur_fitted - mean(cur_fitted) - cur_offset = as.numeric(t(cur_X) %*% cur_fitted) - } - else { - cur_fitted = rep(0, length(y)) - cur_offset = rep(0, length(inactive)) - } - - final_upper = collapsed_pos - cur_offset - final_lower = -(collapsed_neg + cur_offset) - - # now, we sample from Y_star, a centered Gaussian with covariance sigma^2 I - # subject to the constraint - # t(cur_adjusted_Xt) %*% Y_star < final_upper - # -t(cur_adjusted_Xt) %*% Y_star < -final_lower - - # really, we want the covariance of Y_star to be \sigma^2 (I - cur_P) - # where P is projection on the j-1 previous variables - # but this doesn't matter as everything we do with the samples - # will be a function of (I - cur_P) Y_star and the constraints are - # expressible in terms of (I - cur_P) Y_star because - # we have adjusted X - - # IMPORTANT: after sampling Y_star, we have to add back cur_fitted - - # if n >= p, we could actually just draw cur_adjusted_Xt %*% Y_star - # because this has a simple box constraint - # with a generically non-degenerate covariance - - if (nrow(cur_adjusted_Xt) > length(y)) { - linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) - offset = c(final_upper, -final_lower) - covariance = diag(rep(sigma^2, length(y))) - mean_param = cur_fitted # rep(0, length(y)) - initial_point = y - - truncated_y = sample_from_constraints(linear_part, - offset, - mean_param, - covariance, - initial_point, - burnin=burnin, - ndraw=ndraw) - - truncated_noise = truncated_y %*% t(cur_adjusted_Xt) - sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) - } else { # sample from a smaller dimensional gaussian - if (nrow(cur_adjusted_Xt) > 1) { - linear_part = rbind(diag(rep(1, nrow(cur_adjusted_Xt))), - diag(rep(-1, nrow(cur_adjusted_Xt)))) - covariance = sigma^2 * (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) - offset = c(final_upper, -final_lower) - mean_param = cur_adjusted_Xt %*% cur_fitted # rep(0, nrow(cur_adjusted_Xt)) - initial_point = cur_adjusted_Xt %*% y - } else { - mean_param = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(cur_fitted))) - covariance = matrix(sigma^2 * sum(cur_adjusted_Xt^2)) - linear_part = matrix(c(1,-1), 2, 1) - offset = c(final_upper, -final_lower) - initial_point = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(y))) - } - truncated_noise = sample_from_constraints(linear_part, - offset, - mean_param, - covariance, - initial_point, - burnin=burnin, - ndraw=ndraw) - sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) - - } - - observed_maxZ = obj$realized_maxZ[j] - pval = sum(sample_maxZ > observed_maxZ) / ndraw - pval = 2 * min(pval, 1 - pval) - pv = c(pv, pval) - } - - khat = forwardStop(pv,alpha) - - out = list(pv=pv, - k=k, - khat=khat, - sigma=sigma, - vars=vars, - sign=obj$sign, - alpha=alpha, - realized_maxZ=obj$realized_maxZ, - call=this.call) - class(out) = "fsInf_maxZ" - return(out) -} -############################## -# -# Print methods -# ############################## print.fs <- function(x, ...) { @@ -682,35 +449,6 @@ print.fsInf <- function(x, tailarea=TRUE, ...) { } -print.fsInf_maxZ <- function(obj) { - - cat("\nCall:\n") - dput(obj$call) - - cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", - obj$sigma)) - - cat(sprintf("\nSequential testing results with alpha = %0.3f\n",obj$alpha)) - - tab = cbind(1:length(obj$pv), - obj$vars, - round(obj$sign*obj$realized_maxZ, 3), - round(obj$pv,3)) - colnames(tab) = c("Step", "Var", "Z-score", "P-value") - rownames(tab) = rep("",nrow(tab)) - print(tab) - - cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",obj$khat)) - - invisible() -} - -############################## -# -# Plot methods -# -############################## - plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { if (x$completepath) { k = length(x$action)+1 diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 43cf2c3a..c5a99120 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -10,7 +10,7 @@ fixed value of the tuning parameter lambda } \usage{ fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", - "cox"),intercept=TRUE, sigma=NULL, alpha=0.1, + "cox"),intercept=TRUE, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE) } @@ -56,6 +56,7 @@ Significance level for confidence intervals (target is miscoverage alpha/2 in ea Was the lasso problem solved (e.g., by glmnet) with an intercept in the model? Default is TRUE. Must be TRUE for "binomial" family. Not used for 'cox" family, where no intercept is assumed. } +\item{status}{Censoring status for Cox model; 1=failurem 0=censored} \item{type}{Contrast type for p-values and confidence intervals: default is "partial"---meaning that the contrasts tested are the partial population regression coefficients, within the active set of predictors; the alternative is @@ -104,6 +105,10 @@ Then when running glmnet, set standardize=F. See example below. The penalty.factor facility in glmmet-- allowing different penalties lambda for each predictor, is not yet implemented in fixedLassoInf. However you can finesse this--- see the example below. One caveat- using this approach, a penalty factor of zero (forcing a predictor in) is not allowed. + +Note that the coefficients and standard errors reported are unregularized. +Eg for the Gaussian, they are the usual least squares estimates and standard errors +for the model fit to the actice set from the lasso. } \value{ \item{type}{Type of coefficients tested (partial or full)} @@ -203,7 +208,7 @@ set.seed(43) x=scale(x,TRUE,TRUE) beta = c(3,2,rep(0,p-2)) - y = x%*%beta + sigma*rnorm(n) + y = x\%*\%beta + sigma*rnorm(n) y=1*(y>mean(y)) # first run glmnet gfit = glmnet(x,y,standardize=FALSE,family="binomial") @@ -229,7 +234,7 @@ set.seed(43) beta = c(3,2,rep(0,p-2)) tim = as.vector(x\%*\%beta + sigma*rnorm(n)) tim= tim-min(tim)+1 -status=sample(c(0,1),size=n,replace=T) +status=sample(c(0,1),size=n,replace=TRUE) # first run glmnet diff --git a/selectiveInference/man/fsInf_maxZ.Rd b/selectiveInference/man/fsInf_maxZ.Rd deleted file mode 100644 index 89aa886e..00000000 --- a/selectiveInference/man/fsInf_maxZ.Rd +++ /dev/null @@ -1,86 +0,0 @@ -\name{fsInf_maxZ} -\alias{fsInf_maxZ} -\title{ -Selective inference for forward stepwise regression -} -\description{ -Computes maxZ selective p-values and confidence intervals for forward -stepwise regression -} -\usage{ - -fsInf_maxZ(obj, sigma=NULL, alpha=0.1, k=NULL, ndraw=8000, burnin=2000,verbose=FALSE) - -} - -\arguments{ - \item{obj}{ -Object returned by \code{\link{fs}} function -} -\item{sigma}{ -Estimate of error standard deviation. If NULL (default), this is estimated -using the mean squared residual of the full least squares fit when n >= 2p, and -using the standard deviation of y when n < 2p. In the latter case, the user -should use \code{\link{estimateSigma}} function for a more accurate estimate -} -\item{alpha}{ -Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) -} -\item{k}{ -See "type" argument below. Default is NULL, in which case k is taken to be the -the number of steps computed in the forward stepwise path -} -\item{ndraw}{Number of Monte Carlo samples generated} -\item{burnin}{ -Number of samples discarded at the beginning of the chain -} -\item{verbose}{Print out progress along the way? Default is FALSE} -} - -\details{ -This function computes selective maxZ p-values -for forward stepwise regression. These p-values are independent the under null, -so that stopping via the forwardStop rule yields guaranteed FDR control -} - -\value{ -\item{pv}{P-values for each model in the sequence} -\item{k}{Value of k specified in call} -\item{khat}{When type is "active", this is an estimated stopping point -declared by \code{\link{forwardStop}}} -\item{sigma}{Value of error standard deviation (sigma) used} -\item{vars}{Variables in active set} -\item{sign}{Signs of active coefficients} -\item{alpha}{Desired coverage (alpha/2 in each tail)} -\item{realized_maxZ}{Value of maxZ statistic computed at each step} -\item{call}{The call to fsInf_maxZ} -} - -\references{ -Will Fithian, Jonathan Taylor, Ryan Tibshirani, and Rob Tibshirani (2015). -Selective sequential model selection. arXiv:1512.02565.. - - -} - -\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} - -\seealso{\code{\link{fs}}} - -\examples{ -set.seed(33) -n = 50 -p = 10 -sigma = 1 -x = matrix(rnorm(n*p),n,p) -beta = c(3,2,rep(0,p-2)) -y = x\%*\%beta + sigma*rnorm(n) - -# run forward stepwise -fsfit = fs(x,y) - -# compute sequential p-values a -# (sigma estimated from full model) -out.seq = fsInf_maxZ(fsfit) -out.seq -} diff --git a/selectiveInference/src/symbols.rds b/selectiveInference/src/symbols.rds index 144e5c09f09f8470d4c91b8e736738b6e74ca38f..ef45ca37aedfc57cb07c2d5066bf881bf4359eb6 100644 GIT binary patch literal 367 zcmV-#0g(P5iwFP!000001HF{rPJ=KM#=F9l%`92sg||L|5yt%U8fGTmHD#39NNJ^I zn?Ag=Fj^R>H%>2>{?5ng$06s-p2h$G2pl*7&?)h}ssWtpGXyRe>2ri05=0r^28RO8 zjBtvUJk7bP%Q1@iCX2Bmeotwxtmv9#(pob7LR3!ut`KyESyu?HWzILv6do!>dj?Y* z!@*+@7Jm=dU2y!sVsLBm&hK{;+v)mYi|@mRiDeksi*sx-n)Q7D2Y?GpFmuF|>0`2w zTXnjf7EiZim=USDjwSfeziHK5e{b1fSiK3u!!=F|`#{onvcFvA`qG9{$5>=MXY;o5 z+E%Kl=S4;nV}{69)O>=Hl=Hl<&UB)qNFu3-u+1r7)qH^@W+Ej>X}e|l4zziu$_)8M z8h?Pr_sZ`U<;-u6yz;@WwP&x{Gu$0yn$%1y_q{k_SpQXQmz)WC#m&m>F0`bL NJ^{S|DnAbe005&lw21%! literal 274 zcmV+t0qy=DiwFP!000001HF=6YlAQphL2yYEp!O|A49Dh?r)3@b{B%CCA2Zl*pKbU zj~$(fZ34X-yU_ETH}82(B<~9V@FDO#@Pi&#`0}vESPKYYj&)96HiQd$&aMSsYvGQR za^K2sY!{@Gr@EpYOC4nz+43W+m5CH|$GVoMwvgGv+7@yX*~-(ji-@R?RyQ+pXf8LT zd!6pRIq>?|a3eXfAxJBOS#I!=xz6X(U{z=g(_}O8nohY-ZbELfph%qMe+CHs;STB} zae-wyTvLGi{y3IphL)NNrjS>o?;D?Z@Q%NrH)!rY8_m&??hj@`@h|o}eD#4yr^grK YQc%2l=TeGBJ;;9Q2RKP<`Ah=<05C;^xBvhE From 322e7f0b0ae34acea38775e413e83f57b956f3a1 Mon Sep 17 00:00:00 2001 From: tibshirani Date: Wed, 8 Jun 2016 16:41:08 -0700 Subject: [PATCH 144/396] rob put all of maxZ stuff into a folder for later use --- forLater/estimateLambda.Rd | 70 +++ forLater/funs.fixed.R | 198 ++++++++ forLater/funs.fs.R | 744 +++++++++++++++++++++++++++++++ forLater/maxZ/forLater | 86 ++++ forLater/maxZ/funs.constraints.R | 186 ++++++++ 5 files changed, 1284 insertions(+) create mode 100644 forLater/estimateLambda.Rd create mode 100644 forLater/funs.fixed.R create mode 100644 forLater/funs.fs.R create mode 100644 forLater/maxZ/forLater create mode 100644 forLater/maxZ/funs.constraints.R diff --git a/forLater/estimateLambda.Rd b/forLater/estimateLambda.Rd new file mode 100644 index 00000000..9cb1cdb9 --- /dev/null +++ b/forLater/estimateLambda.Rd @@ -0,0 +1,70 @@ +\name{estimateLambda} +\alias{estimateLambda} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Estimates the lasso tuning parameter lambda. +} +\description{ +Estimates the lasso tuning parameter lambda, for use in the selectiveInference +package +} +\usage{ +estimateLambda(x, sigma, nsamp=1000) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{sigma}{ +Estimate of error standard deviation +} +\item{nsamp}{Number of Monte carlo samples used for the estimation.} +} +\details{ +This function estimates the lasso tuning parameter lambda, using the estimate +2*E(||X^T eps||_infty) where eps ~ N(0,sigma^2), a vector of length n. +This estimate was proposed by Negahban et al (2012). +} +\value{ +\item{sigmahat}{The estimate of sigma} +\item{df}{The degrees of freedom of lasso fit used} +} +\references{ +Negahban, S. N., +Ravikumar, P., +Wainwright, M. J. +and Yu, B. +(2012). A unified +framework for high-dimensional analysis of +M-estimators with decomposable regularizers. +Statistical Science vol. 27, p 538-557. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +#NOT RUN +#set.seed(43) +#n=50 +#p=10 +#sigma=.7 +#x=matrix(rnorm(n*p),n,p) +#x=scale(x,T,F) +#beta=c(3,2,0,0,rep(0,p-4)) +#y=x%*%beta+sigma*rnorm(n) +#y=y-mean(y) +# +#estimate lambda usingthe known value of sigma +#lamhat=estimateLambda(x,sigma=.7) +# +#first estimate sigma +#sigmahat=estimateSigma(x,y)$sigmahat +#lamhat=estimateLambda(x,sigma=sigmahat) + +#compare to estimate from cv + +#out=cv.glmnet(x,y) +#out$lambda.min*n #remember that value from glmnet must be + # multiplied by n, to make it comparable. +} + diff --git a/forLater/funs.fixed.R b/forLater/funs.fixed.R new file mode 100644 index 00000000..ac005450 --- /dev/null +++ b/forLater/funs.fixed.R @@ -0,0 +1,198 @@ +# Lasso inference function (for fixed lambda). Note: here we are providing inference +# for the solution of +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 + +fixedLassoInf <- function(x, y, beta, lambda, intercept=TRUE, sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (length(beta) != p) stop("beta must have length equal to ncol(x)") + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) + warning(paste("type='full' does not make sense when p > n;", + "switching to type='partial'")) + + if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + } + else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + } + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + a = poly.pval(y,G,u,vj,sigma,bits) + pv[j] = a$pv + vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj # Unstandardize (mult by norm of vj) + + a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "fixedLassoInf" + return(out) +} + +############################## + +fixedLasso.poly <- function(x, y, beta, lambda, a) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + + P = diag(1,nrow(xa)) - xa %*% xap + G = -rbind(1/lambda * t(xac) %*% P, + -1/lambda * t(xac) %*% P, + -dz %*% xap) + u = -c(1 - t(xac) %*% t(xap) %*% za, + 1 + t(xac) %*% t(xap) %*% za, + -lambda * dz %*% xai %*% za) + + return(list(G=G,u=u)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} + +############################## + +print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + +#estimateLambda <- function(x, sigma, nsamp=1000){ +# checkargs.xy(x,rep(0,nrow(x))) +# if(nsamp < 10) stop("More Monte Carlo samples required for estimation") +# if (length(sigma)!=1) stop("sigma should be a number > 0") + # if (sigma<=0) stop("sigma should be a number > 0") + + # n = nrow(x) + # eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) + # lambda = 2*mean(apply(t(x)%*%eps,2,max)) + # return(lambda) +#} + diff --git a/forLater/funs.fs.R b/forLater/funs.fs.R new file mode 100644 index 00000000..b75923d3 --- /dev/null +++ b/forLater/funs.fs.R @@ -0,0 +1,744 @@ +# We compute the forward stepwise regression (FS) path given +# a response vector y and predictor matrix x. We assume +# that x has columns in general position. + +fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, + verbose=FALSE) { + + this.call = match.call() + checkargs.xy(x=x,y=y) + + # Center and scale, etc. + obj = standardize(x,y,intercept,normalize) + x = obj$x + y = obj$y + bx = obj$bx + by = obj$by + sx = obj$sx + n = nrow(x) + p = ncol(x) + + ##### + # To keep consistent with the lar function, we parametrize + # so that the first step has all zero coefficients, + # Also, an interesting note: the effective "lambda" (maximal + # correlation with the residual) may increase with stepwise! + # So we don't keep track of it + + ##### + # Find the first variable to enter and its sign + working_scale = sqrt(colSums(x^2)) + working_x = scale(x,center=F,scale=working_scale) + working_score = t(working_x)%*%y + i_hit = which.max(abs(working_score)) # Hitting coordinate + sign_hit = Sign(working_score[i_hit]) # Sign + signs = sign_hit # later signs will be appended to `signs` + + if (verbose) { + cat(sprintf("1. Adding variable %i, |A|=%i...",i_hit,1)) + } + + # Now iteratively find the new FS estimates + + # Things to keep track of, and return at the end + # JT: I guess the "buf" just saves us from making huge + # matrices we don't need? + + buf = min(maxsteps,500) + action = numeric(buf) # Actions taken + df = numeric(buf) # Degrees of freedom + beta = matrix(0,p,buf) # FS estimates + + # Buffered objects for selective maxZ test + + offset_pos_maxZ = matrix(Inf, p, buf) # upper bounds for selective maxZ + offset_neg_maxZ = matrix(Inf, p, buf) # lower bounds for selective maxZ + scale_maxZ = matrix(0, p, buf) # lower bounds for selective maxZ + realized_maxZ = numeric(buf) # lower bounds for selective maxZ + + action[1] = i_hit + df[1] = 0 + beta[,1] = 0 + + ##### + # Variables needed to compute truncation limits for + # selective maxZ test + + realized_maxZ[1] = c(sign_hit * working_score[i_hit]) + offset_pos_maxZ[,1] = Inf + offset_neg_maxZ[,1] = Inf + scale_maxZ[,1] = working_scale + working_resid_maxZ = y - x %*% beta[,1] + + # Gamma matrix! + gbuf = max(2*p*3,2000) # Space for 3 steps, at least + gi = 0 # index into rows of Gamma matrix + zi = 0 # index into rows of Gamma_maxZ matrix + + Gamma = matrix(0,gbuf,n) + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 + + # Gamma_maxZ is the rbind + # of residualized X_inactive's + + Gamma_maxZ = matrix(0,gbuf,n) + Gamma_maxZ[zi+Seq(1,p),] = t(x); zi = zi+p + + # nconstraint + nconstraint = numeric(buf) + vreg = matrix(0,buf,n) + nconstraint[1] = gi + vreg[1,] = sign_hit*x[,i_hit] / sum(x[,i_hit]^2) + + # Other things to keep track of, but not return + r = 1 # Size of active set + A = i_hit # Active set -- JT: isn't this basically the same as action? + I = Seq(1,p)[-i_hit] # Inactive set + X_active = x[,i_hit,drop=FALSE] # Matrix X[,A] + X_inactive = x[,-i_hit,drop=FALSE] # Matrix X[,I] + k = 2 # What step are we at? + # JT Why keep track of r and k instead of just saying k=r+1? + + # Compute a skinny QR decomposition of X_active + # JT: obs was used as variable name above -- this is something different, no? + # changed it to qr_X + + qr_X = qr(X_active) + Q = qr.Q(qr_X,complete=TRUE) + Q_active = Q[,1,drop=FALSE]; + Q_inactive = Q[,-1,drop=FALSE] + R = qr.R(qr_X) + + # Throughout the algorithm, we will maintain + # the decomposition X_active = Q_active*R. Dimensions: + # X_active: n x r + # Q_active: n x r + # Q_inactive: n x (n-r) + # R: r x r + + while (k<=maxsteps) { + ########## + # Check if we've reached the end of the buffer + if (k > length(action)) { + buf = length(action) + action = c(action,numeric(buf)) + df = c(df,numeric(buf)) + beta = cbind(beta,matrix(0,p,buf)) + nconstraint = c(nconstraint,numeric(buf)) + vreg = rbind(vreg,matrix(0,buf,n)) + + offset_pos_maxZ = cbind(offset_pos_maxZ, matrix(0, p, buf)) + offset_neg_maxZ = cbind(offset_neg_maxZ, matrix(0, p, buf)) + scale_maxZ = cbind(scale_maxZ, matrix(0, p, buf)) + realized_maxZ = c(realized_maxZ, numeric(buf)) + } + + # Key quantities for the next entry + + keepLs=backsolve(R,t(Q_active)%*%X_inactive) + + prev_scale = working_scale[-i_hit] # this variable used later for maxZ + X_inactive_resid = X_inactive - X_active %*% keepLs + working_scale = sqrt(colSums(X_inactive_resid^2)) # this variable used later for maxZ + working_x = scale(X_inactive_resid,center=F,scale=working_scale) + working_score = as.numeric(t(working_x)%*%y) + + beta_cur = backsolve(R,t(Q_active)%*%y) # must be computed before the break + # so we have it if we have + # completed the path + + # If the inactive set is empty, nothing will hit + if (r==min(n-intercept,p)) break + + # Otherwise find the next hitting time + else { + sign_score = Sign(working_score) + abs_score = sign_score * working_score + i_hit = which.max(abs_score) + sign_hit = sign_score[i_hit] + # keep track of necessary quantities for selective maxZ + + offset_shift = t(X_inactive) %*% (y - working_resid_maxZ) + realized_Z_scaled = realized_maxZ[k-1] * prev_scale + offset_pos_maxZ[I,k] = realized_Z_scaled + offset_shift + offset_neg_maxZ[I,k] = realized_Z_scaled - offset_shift + scale_maxZ[I,k] = working_scale + + working_resid_maxZ = y - X_active %*% beta_cur + } + + # Record the solution + # what is the difference between "action" and "A"? + + action[k] = I[i_hit] + df[k] = r + beta[A,k] = beta_cur + + # store the X_inactive_resid in Gamma_maxZ + + if (gi + p-r > nrow(Gamma_maxZ)) Gamma_maxZ = rbind(Gamma_maxZ,matrix(0,p-r,n)) + Gamma_maxZ[zi+Seq(1,p-r),] = t(X_inactive_resid); zi = zi+p-r + + # update maxZ variable + realized_maxZ[k] = sign_hit * working_score[i_hit] + + # Gamma matrix! + + if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) + working_x = t(sign_score*t(working_x)) + #Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 + + # nconstraint, regression contrast + nconstraint[k] = gi + vreg[k,] = sign_hit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) + + # Update all of the variables + r = r+1 + A = c(A,I[i_hit]) + I = I[-i_hit] + signs = c(signs,sign_hit) + X_active = cbind(X_active,X_inactive[,i_hit]) + X_inactive = X_inactive[,-i_hit,drop=FALSE] + + # Update the QR decomposition + updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) + Q_active = updated_qr$Q1 + + # JT: why do we store Q_inactive? Doesn't seem to be used. + Q_inactive = updated_qr$Q2 + R = updated_qr$R + + if (verbose) { + cat(sprintf("\n%i. Adding variable %i, |A|=%i...",k,A[r],r)) + } + + # Step counter + k = k+1 + } + + # Trim + action = action[Seq(1,k-1)] + df = df[Seq(1,k-1),drop=FALSE] + beta = beta[,Seq(1,k-1),drop=FALSE] + Gamma = Gamma[Seq(1,gi),,drop=FALSE] + nconstraint = nconstraint[Seq(1,k-1)] + vreg = vreg[Seq(1,k-1),,drop=FALSE] + + offset_pos_maxZ = offset_pos_maxZ[,Seq(1,k-1),drop=FALSE] + offset_neg_maxZ = offset_neg_maxZ[,Seq(1,k-1),drop=FALSE] + scale_maxZ = scale_maxZ[,Seq(1,k-1),drop=FALSE] + Gamma_maxZ = Gamma_maxZ[Seq(1,zi),,drop=FALSE] + + # If we reached the maximum number of steps + if (k>maxsteps) { + if (verbose) { + cat(sprintf("\nReached the maximum number of steps (%i),",maxsteps)) + cat(" skipping the rest of the path.") + } + completepath = FALSE + bls = NULL + } + + # Otherwise, note that we completed the path + else { + completepath = TRUE + + # Record the least squares solution. Note that + # we have already computed this + bls = rep(0,p) + if(length(keepLs)>0) bls[A] = keepLs + + } + + if (verbose) cat("\n") + + # Adjust for the effect of centering and scaling + if (intercept) df = df+1 + if (normalize) beta = beta/sx + if (normalize && completepath) bls = bls/sx + + # Assign column names + colnames(beta) = as.character(Seq(1,k-1)) + + out = list(action=action,sign=signs,df=df,beta=beta, + completepath=completepath,bls=bls, + Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, + intercept=intercept,normalize=normalize,call=this.call, + offset_pos_maxZ=offset_pos_maxZ,offset_neg_maxZ=offset_neg_maxZ, + scale_maxZ=scale_maxZ,Gamma_maxZ=Gamma_maxZ,realized_maxZ=realized_maxZ) + class(out) = "fs" + return(out) +} + +############################## + +# Coefficient function for fs + +coef.fs <- function(object, s, ...) { + if (object$completepath) { + k = length(object$action)+1 + beta = cbind(object$beta,object$bls) + } else { + k = length(object$action) + beta = object$beta + } + + if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) + knots = 1:k + dec = FALSE + return(coef.interpolate(beta,s,knots,dec)) +} + +# Prediction function for fs + +predict.fs <- function(object, newx, s, ...) { + beta = coef.fs(object,s) + if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) + else newx = scale(newx,object$bx,FALSE) + return(newx %*% beta + object$by) +} + +############################## + +# FS inference function + +fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.misc(sigma=sigma,alpha=alpha,k=k, + gridrange=gridrange,mult=mult,ntimes=ntimes) + if (class(obj) != "fs") stop("obj must be an object of class fs") + if (is.null(k) && type=="active") k = length(obj$action) + if (is.null(k) && type=="all") stop("k must be specified when type = all") + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + G = obj$Gamma + nconstraint = obj$nconstraint + sx = obj$sx + + if (is.null(sigma)) { + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + khat = NULL + + if (type == "active") { + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + vreg = obj$vreg[1:k,,drop=FALSE] + sign = obj$sign[1:k] + vars = obj$action[1:k] + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + Gj = G[1:nconstraint[j],] + uj = rep(0,nconstraint[j]) + vj = vreg[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + + khat = forwardStop(pv,alpha) + } + + else { + if (type == "aic") { + out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) + khat = out$khat + m = out$stopped * ntimes + G = rbind(out$G,G[1:nconstraint[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nconstraint[khat+m])) # (if we need to) + kk = khat + } + else { + G = G[1:nconstraint[k],] + u = rep(0,nconstraint[k]) + kk = k + } + + pv = vlo = vup = numeric(kk) + vmat = matrix(0,kk,n) + ci = tailarea = matrix(0,kk,2) + sign = numeric(kk) + vars = obj$action[1:kk] + xa = x[,vars] + M = pinv(crossprod(xa)) %*% t(xa) + + for (j in 1:kk) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + Gj = rbind(G,vj) + uj = c(u,0) + + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + } + + # JT: why do we output vup, vlo? Are they used somewhere else? + + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "fsInf" + return(out) +} + +############################## + +############################## + +# selected maxZ tests + +fsInf_maxZ = function(obj, sigma=NULL, alpha=0.1, k=NULL, + ndraw=8000, burnin=2000, verbose=FALSE) { + + this.call = match.call() + + checkargs.misc(sigma=sigma,alpha=alpha,k=k) + + if (class(obj) != "fs") stop("obj must be an object of class fs") + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + pv = c() + + if (is.null(sigma)) { + # TODO we need a sampler on a unit sphere + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + khat = NULL + + vars = obj$action[1:k] + zi = 0 + for (j in 1:k) { + if(verbose) cat(c("Step=",j),fill=T) + # the inactive set here does not + # include the variable at the j-th step + # so, at j==1, the inactive set is every variable + # at j==2, the inactive set is everything but the first one + + if (j > 1) { + active = vars[1:(j-1)] + inactive = (1:p)[-active] + } else { + inactive = 1:p + } + + collapsed_pos = apply(obj$offset_pos_maxZ[inactive,1:j,drop=FALSE], 1, min) + collapsed_neg = apply(obj$offset_neg_maxZ[inactive,1:j,drop=FALSE], 1, min) + cur_scale = obj$scale_maxZ[,j][inactive] + + # the matrix cur_adjusted_Xt is used to compute (always as length(y) columns) + # the maxZ or maxT for the sampled variables + # + cur_adjusted_Xt = obj$Gamma_maxZ[zi + Seq(1,p-j+1),,drop=FALSE]; zi = zi+p-j+1 # Xt for transpose + + # cur_X is used to enforce conditioning on + # the ever_active sufficient_statistics + + cur_X = obj$x[,inactive,drop=FALSE] + + # now we condition on solution up to now + # this is equivalent to finding vector of + # fitted values up to now and appropriately + # adjusting the box limits + + if (j > 1) { + cur_fitted = predict(obj, s=j) + cur_fitted = cur_fitted - mean(cur_fitted) + cur_offset = as.numeric(t(cur_X) %*% cur_fitted) + } + else { + cur_fitted = rep(0, length(y)) + cur_offset = rep(0, length(inactive)) + } + + final_upper = collapsed_pos - cur_offset + final_lower = -(collapsed_neg + cur_offset) + + # now, we sample from Y_star, a centered Gaussian with covariance sigma^2 I + # subject to the constraint + # t(cur_adjusted_Xt) %*% Y_star < final_upper + # -t(cur_adjusted_Xt) %*% Y_star < -final_lower + + # really, we want the covariance of Y_star to be \sigma^2 (I - cur_P) + # where P is projection on the j-1 previous variables + # but this doesn't matter as everything we do with the samples + # will be a function of (I - cur_P) Y_star and the constraints are + # expressible in terms of (I - cur_P) Y_star because + # we have adjusted X + + # IMPORTANT: after sampling Y_star, we have to add back cur_fitted + + # if n >= p, we could actually just draw cur_adjusted_Xt %*% Y_star + # because this has a simple box constraint + # with a generically non-degenerate covariance + + if (nrow(cur_adjusted_Xt) > length(y)) { + linear_part = rbind(cur_adjusted_Xt, -cur_adjusted_Xt) + offset = c(final_upper, -final_lower) + covariance = diag(rep(sigma^2, length(y))) + mean_param = cur_fitted # rep(0, length(y)) + initial_point = y + + truncated_y = sample_from_constraints(linear_part, + offset, + mean_param, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + + truncated_noise = truncated_y %*% t(cur_adjusted_Xt) + sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) + } else { # sample from a smaller dimensional gaussian + if (nrow(cur_adjusted_Xt) > 1) { + linear_part = rbind(diag(rep(1, nrow(cur_adjusted_Xt))), + diag(rep(-1, nrow(cur_adjusted_Xt)))) + covariance = sigma^2 * (cur_adjusted_Xt %*% t(cur_adjusted_Xt)) + offset = c(final_upper, -final_lower) + mean_param = cur_adjusted_Xt %*% cur_fitted # rep(0, nrow(cur_adjusted_Xt)) + initial_point = cur_adjusted_Xt %*% y + } else { + mean_param = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(cur_fitted))) + covariance = matrix(sigma^2 * sum(cur_adjusted_Xt^2)) + linear_part = matrix(c(1,-1), 2, 1) + offset = c(final_upper, -final_lower) + initial_point = as.numeric(sum(as.numeric(cur_adjusted_Xt) * as.numeric(y))) + } + truncated_noise = sample_from_constraints(linear_part, + offset, + mean_param, + covariance, + initial_point, + burnin=burnin, + ndraw=ndraw) + sample_maxZ = apply(abs(t(truncated_noise) / cur_scale), 2, max) + + } + + observed_maxZ = obj$realized_maxZ[j] + pval = sum(sample_maxZ > observed_maxZ) / ndraw + pval = 2 * min(pval, 1 - pval) + pv = c(pv, pval) + } + + khat = forwardStop(pv,alpha) + + out = list(pv=pv, + k=k, + khat=khat, + sigma=sigma, + vars=vars, + sign=obj$sign, + alpha=alpha, + realized_maxZ=obj$realized_maxZ, + call=this.call) + class(out) = "fsInf_maxZ" + return(out) +} + +############################## +# +# Print methods +# +############################## + +print.fs <- function(x, ...) { + cat("\nCall:\n") + dput(x$call) + + cat("\nSequence of FS moves:\n") + nsteps = length(x$action) + tab = cbind(1:nsteps,x$action,x$sign) + colnames(tab) = c("Step","Var","Sign") + rownames(tab) = rep("",nrow(tab)) + print(tab) + invisible() +} + +print.fsInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + if (x$type == "active") { + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) + tab = cbind(1:length(x$pv),x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", + "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) + } + + else if (x$type == "all") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + } + + else if (x$type == "aic") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + } + + invisible() +} + + +print.fsInf_maxZ <- function(obj) { + + cat("\nCall:\n") + dput(obj$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + obj$sigma)) + + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",obj$alpha)) + + tab = cbind(1:length(obj$pv), + obj$vars, + round(obj$sign*obj$realized_maxZ, 3), + round(obj$pv,3)) + colnames(tab) = c("Step", "Var", "Z-score", "P-value") + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",obj$khat)) + + invisible() +} + +############################## +# +# Plot methods +# +############################## + +plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { + if (x$completepath) { + k = length(x$action)+1 + beta = cbind(x$beta,x$bls) + } else { + k = length(x$action) + beta = x$beta + } + p = nrow(beta) + + xx = 1:k + xlab = "Step" + + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA + } + + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), + xlab=xlab,ylab="Coefficients",main="Forward stepwise path",...) + abline(h=0,lwd=2) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + invisible() +} + diff --git a/forLater/maxZ/forLater b/forLater/maxZ/forLater new file mode 100644 index 00000000..89aa886e --- /dev/null +++ b/forLater/maxZ/forLater @@ -0,0 +1,86 @@ +\name{fsInf_maxZ} +\alias{fsInf_maxZ} +\title{ +Selective inference for forward stepwise regression +} +\description{ +Computes maxZ selective p-values and confidence intervals for forward +stepwise regression +} +\usage{ + +fsInf_maxZ(obj, sigma=NULL, alpha=0.1, k=NULL, ndraw=8000, burnin=2000,verbose=FALSE) + +} + +\arguments{ + \item{obj}{ +Object returned by \code{\link{fs}} function +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{k}{ +See "type" argument below. Default is NULL, in which case k is taken to be the +the number of steps computed in the forward stepwise path +} +\item{ndraw}{Number of Monte Carlo samples generated} +\item{burnin}{ +Number of samples discarded at the beginning of the chain +} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective maxZ p-values +for forward stepwise regression. These p-values are independent the under null, +so that stopping via the forwardStop rule yields guaranteed FDR control +} + +\value{ +\item{pv}{P-values for each model in the sequence} +\item{k}{Value of k specified in call} +\item{khat}{When type is "active", this is an estimated stopping point +declared by \code{\link{forwardStop}}} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{realized_maxZ}{Value of maxZ statistic computed at each step} +\item{call}{The call to fsInf_maxZ} +} + +\references{ +Will Fithian, Jonathan Taylor, Ryan Tibshirani, and Rob Tibshirani (2015). +Selective sequential model selection. arXiv:1512.02565.. + + +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{\code{\link{fs}}} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values a +# (sigma estimated from full model) +out.seq = fsInf_maxZ(fsfit) +out.seq +} diff --git a/forLater/maxZ/funs.constraints.R b/forLater/maxZ/funs.constraints.R new file mode 100644 index 00000000..3bf6b77e --- /dev/null +++ b/forLater/maxZ/funs.constraints.R @@ -0,0 +1,186 @@ +# +# Some utilities for affine constraints +# + +# +# compute the square-root and inverse square-root of a non-negative +# definite matrix +# + +factor_covariance = function(S, rank=NA) { + if (is.na(rank)) { + rank = nrow(S) + } + svd_X = svd(S, nu=rank, nv=rank) + sqrt_cov = t(sqrt(svd_X$d[1:rank]) * t(svd_X$u[,1:rank])) + sqrt_inv = t((1. / sqrt(svd_X$d[1:rank])) * t(svd_X$u[,1:rank])) + + return(list(sqrt_cov=sqrt_cov, sqrt_inv=sqrt_inv)) +} + +# +# from a constraint, return an equivalent +# constraint and a whitening and inverse +# whitening map +# + +# law is Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset + +whiten_constraint = function(linear_part, offset, mean_param, covariance) { + + factor_cov = factor_covariance(covariance) + sqrt_cov = factor_cov$sqrt_cov + sqrt_inv = factor_cov$sqrt_inv + + new_A = linear_part %*% sqrt_cov + new_b = offset - linear_part %*% mean_param + + # rescale rows to have length 1 + + scaling = sqrt(apply(new_A^2, 1, sum)) + new_A = new_A / scaling + new_b = new_b / scaling + + # TODO: check these functions will behave when Z is a matrix. + + inverse_map = function(Z) { + # broadcasting here + # the columns of Z are same length as mean_param + return(sqrt_cov %*% Z + as.numeric(mean_param)) + } + + forward_map = function(W) { + return(sqrt_inv %*% (W - mean_param)) + } + + return(list(linear_part=new_A, + offset=new_b, + inverse_map=inverse_map, + forward_map=forward_map)) +} + +# +# sample from the law +# +# Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset + +sample_from_constraints = function(linear_part, + offset, + mean_param, + covariance, + initial_point, # point must be feasible for constraints + ndraw=8000, + burnin=2000, + accept_reject_params=NA) #TODO: implement accept reject check +{ + + whitened_con = whiten_constraint(linear_part, + offset, + mean_param, + covariance) + white_initial = whitened_con$forward_map(initial_point) + +# # try 100 draws of accept reject +# # if we get more than 50 good draws, then just return a smaller sample +# # of size (burnin+ndraw)/5 + +# if accept_reject_params: +# use_hit_and_run = False +# num_trial, min_accept, num_draw = accept_reject_params + +# def _accept_reject(sample_size, linear_part, offset): +# Z_sample = np.random.standard_normal((100, linear_part.shape[1])) +# constraint_satisfied = (np.dot(Z_sample, linear_part.T) - +# offset[None,:]).max(1) < 0 +# return Z_sample[constraint_satisfied] + +# Z_sample = _accept_reject(100, +# white_con.linear_part, +# white_con.offset) + +# if Z_sample.shape[0] >= min_accept: +# while True: +# Z_sample = np.vstack([Z_sample, +# _accept_reject(num_draw / 5, +# white_con.linear_part, +# white_con.offset)]) +# if Z_sample.shape[0] > num_draw: +# break +# white_samples = Z_sample +# else: +# use_hit_and_run = True +# else: +# use_hit_and_run = True + + use_hit_and_run = TRUE + + if (use_hit_and_run) { + + white_linear = whitened_con$linear_part + white_offset = whitened_con$offset + + # Inf cannot be used in C code + # In theory, these rows can be dropped + + rows_to_keep = white_offset < Inf + white_linear = white_linear[rows_to_keep,,drop=FALSE] + white_offset = white_offset[rows_to_keep] + + nstate = length(white_initial) + if (sum(rows_to_keep) > 0) { + if (ncol(white_linear) > 1) { + nconstraint = nrow(white_linear) + + directions = rbind(diag(rep(1, nstate)), + matrix(rnorm(nstate^2), nstate, nstate)) + + # normalize rows to have length 1 + + scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) + directions = directions / scaling + ndirection = nrow(directions) + + alphas = directions %*% t(white_linear) + U = white_linear %*% white_initial - white_offset + Z_sample = matrix(rep(0, nstate * ndraw), nstate, ndraw) + + result = .C("sample_truncnorm_white", + as.numeric(white_initial), + as.numeric(U), + as.numeric(t(directions)), + as.numeric(t(alphas)), + output=Z_sample, + as.integer(nconstraint), + as.integer(ndirection), + as.integer(nstate), + as.integer(burnin), + as.integer(ndraw), + package="selectiveInference") + Z_sample = result$output + } else { # the distribution is univariate + # we can just work out upper and lower limits + + white_linear = as.numeric(white_linear) + pos = (white_linear * white_offset) >= 0 + neg = (white_linear * white_offset) <= 0 + if (sum(pos) > 0) { + U = min((white_offset / white_linear)[pos]) + } else { + U = Inf + } + if (sum(neg) < 0) { + L = max((white_offset / white_linear)[neg]) + } else { + L = -Inf + } + Z_sample = matrix(qnorm((pnorm(U) - pnorm(L)) * runif(ndraw) + pnorm(L)), 1, ndraw) + } + } else { + Z_sample = matrix(rnorm(nstate * ndraw), nstate, ndraw) + } + } + + Z = t(whitened_con$inverse_map(Z_sample)) + return(Z) +} + From 74e3c482077d159be98a45c41c664f1b1cd8c8f1 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 21 Jul 2016 16:53:06 -0700 Subject: [PATCH 145/396] Simulation to compare saturated and selected models in fs --- forLater/josh/sim.selectedmodel.R | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 forLater/josh/sim.selectedmodel.R diff --git a/forLater/josh/sim.selectedmodel.R b/forLater/josh/sim.selectedmodel.R new file mode 100644 index 00000000..c278e984 --- /dev/null +++ b/forLater/josh/sim.selectedmodel.R @@ -0,0 +1,60 @@ +library(selectiveInference) +library(intervals) +setwd("~/Dropbox/work/R-software/forLater/josh") +source("selectiveInference/R/cv.R") +source("../../selectiveInference/R/funs.groupfs.R") +source("../../selectiveInference/R/funs.quadratic.R") +source("../../selectiveInference/R/funs.common.R") +source("../../selectiveInference/R/funs.fs.R") +source("../../selectiveInference/R/funs.lar.R") +source("../../selectiveInference/R/funs.inf.R") +library(MASS) +pinv = ginv + +set.seed(19) +niters <- 500 +known <- TRUE +n <- 50 +p <- 100 +maxsteps <- 8 +sparsity <- 5 +snr <- 2 +index <- 1:p + +x <- matrix(rnorm(n*p), nrow=n) + +instance <- function(n, p, sparsity, snr, maxsteps) { + y <- rnorm(n) + if (sparsity > 0) { + beta <- rep(0, p) + beta[1:sparsity] <- snr * sample(c(-1,1), sparsity, replace=T) + y <- y + x %*% beta + } + y <- y - mean(y) + fit <- groupfs(x, y, index, maxsteps=maxsteps, sigma=1, intercept=F, center=F, normalize=F) + fitfs <- fs(x, y, maxsteps=maxsteps, intercept=F, normalize=F) + if (any(fit$action != fitfs$action)) stop("Model paths did not agree") + pvfs <- fsInf(fitfs, sigma=1, k = maxsteps, type = "all") + pv <- groupfsInf(fit) + return(list(vars = fit$action, pvals = pv$pv, selpvals = pvfs$pv)) +} + +time <- system.time({ + output <- replicate(niters, instance(n, p, sparsity, snr, maxsteps)) +}) + +vars <- do.call(c, list(output[1,])) +pvals <- do.call(c, list(output[2,])) +selpvals <- do.call(c, list(output[3,])) + +save(vars, pvals, selpvals, + file = paste0("results/selected", + "_", ifelse(known, "TC", "TF"), + "_n", n, + "_p", p, + "_sparsity", sparsity, + "_snr", as.character(snr), + ".RData")) + +print(time) + From cb3ef4ab90389fe482d6f3628dea5018eafe0f7d Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 21 Jul 2016 16:56:06 -0700 Subject: [PATCH 146/396] Moving some common functions --- selectiveInference/R/funs.common.R | 50 ++++++++++-- selectiveInference/R/funs.fixed.R | 57 ++++++------- selectiveInference/R/funs.lar.R | 123 +++++++++++------------------ 3 files changed, 115 insertions(+), 115 deletions(-) diff --git a/selectiveInference/R/funs.common.R b/selectiveInference/R/funs.common.R index 7a4b9aaa..59457008 100644 --- a/selectiveInference/R/funs.common.R +++ b/selectiveInference/R/funs.common.R @@ -32,7 +32,7 @@ standardize <- function(x, y, intercept, normalize) { y = as.numeric(y) n = nrow(x) p = ncol(x) - + if (intercept) { bx = colMeans(x) by = mean(y) @@ -57,10 +57,10 @@ standardize <- function(x, y, intercept, normalize) { # Interpolation function to get coefficients coef.interpolate <- function(betas, s, knots, dec=TRUE) { - # Sort the s values + # Sort the s values o = order(s,dec=dec) s = s[o] - + k = length(s) mat = matrix(rep(knots,each=k),nrow=k) if (dec) b = s >= mat @@ -72,7 +72,7 @@ coef.interpolate <- function(betas, s, knots, dec=TRUE) { p = numeric(k) p[i] = 0 p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] - + beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) colnames(beta) = as.character(round(s,3)) rownames(beta) = NULL @@ -100,7 +100,7 @@ checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, mult=NULL, ntimes=NULL, beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, bh.q=NULL) { - + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") @@ -144,3 +144,43 @@ estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { return(list(sigmahat=sigma, df=nz)) } +# Update the QR factorization, after a column has been +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. + +updateQR <- function(Q1,Q2,R,col) { + m = nrow(Q1) + n = ncol(Q1) + k = ncol(Q2) + + a = .C("update1", + Q2=as.double(Q2), + w=as.double(t(Q2)%*%col), + m=as.integer(m), + k=as.integer(k), + dup=FALSE, + package="selectiveInference") + + Q2 = matrix(a$Q2,nrow=m) + w = c(t(Q1)%*%col,a$w) + + # Re-structure: delete a column from Q2, add one to + # Q1, and expand R + Q1 = cbind(Q1,Q2[,1]) + Q2 = Q2[,-1,drop=FALSE] + R = rbind(R,rep(0,n)) + R = cbind(R,w[Seq(1,n+1)]) + + return(list(Q1=Q1,Q2=Q2,R=R)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 4263c0b2..e1b9230d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -6,11 +6,11 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE) { - + family = match.arg(family) this.call = match.call() type = match.arg(type) - + if(family=="binomial") { if(type!="partial") stop("Only type= partial allowed with binomial family") out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, @@ -20,24 +20,24 @@ sigma=NULL, alpha=0.1, else if(family=="cox") { if(type!="partial") stop("Only type= partial allowed with Cox family") out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, - tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) return(out) } - + else{ - - + + checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") - if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { warning("Package Rmpfr is not installed, reverting to standard precision") bits = NULL } - + n = nrow(x) p = ncol(x) beta = as.numeric(beta) @@ -66,14 +66,14 @@ else{ "(to within specified tolerances). You might try rerunning", "glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - + # Get lasso polyhedral region, of form Gy >= u out = fixedLasso.poly(x,y,beta,lambda,vars) G = out$G u = out$u # Check polyhedral region - tol.poly = 0.01 + tol.poly = 0.01 if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", @@ -94,9 +94,9 @@ else{ "you may want to use the estimateSigma function")) } } - + k = length(vars) - pv = vlo = vup = numeric(k) + pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) ci = tailarea = matrix(0,k,2) sign = numeric(k) @@ -104,7 +104,7 @@ else{ if (type=="full" & p > n) warning(paste("type='full' does not make sense when p > n;", "switching to type='partial'")) - + if (type=="partial" || p > n) { xa = x[,vars,drop=F] M = pinv(crossprod(xa)) %*% t(xa) @@ -113,17 +113,17 @@ else{ M = pinv(crossprod(x)) %*% t(x) M = M[vars,,drop=F] } - + for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - + vj = M[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) sign[j] = sign(sum(vj*y)) vj = sign[j] * vj a = poly.pval(y,G,u,vj,sigma,bits) - pv[j] = a$pv + pv[j] = a$pv vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) @@ -133,12 +133,12 @@ else{ ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) tailarea[j,] = a$tailarea } - + out = list(type=type,lambda=lambda,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, vars=vars,sign=sign,sigma=sigma,alpha=alpha, sd=sigma*sqrt(rowSums(vmat^2)), - coef0=vmat%*%y, + coef0=vmat%*%y, call=this.call) class(out) = "fixedLassoInf" return(out) @@ -160,7 +160,7 @@ function(x, y, beta, lambda, a) { P = diag(1,nrow(xa)) - xa %*% xap #NOTE: inactive constraints not needed below! - + G = -rbind( # 1/lambda * t(xac) %*% P, # -1/lambda * t(xac) %*% P, @@ -175,17 +175,6 @@ function(x, y, beta, lambda, a) { return(list(G=G,u=u)) } -# Moore-Penrose pseudo inverse for symmetric matrices - -pinv <- function(A, tol=.Machine$double.eps) { - e = eigen(A) - v = Re(e$vec) - d = Re(e$val) - d[d > tol] = 1/d[d > tol] - d[d < tol] = 0 - if (length(d)==1) return(v*d*v) - else return(v %*% diag(d) %*% t(v)) -} ############################## @@ -195,7 +184,7 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) - + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) cat("",fill=T) tab = cbind(x$vars, @@ -209,7 +198,7 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { } rownames(tab) = rep("",nrow(tab)) print(tab) - + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", ifelse(x$type=="partial","partial","full"))) invisible() @@ -220,10 +209,10 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { # if(nsamp < 10) stop("More Monte Carlo samples required for estimation") # if (length(sigma)!=1) stop("sigma should be a number > 0") # if (sigma<=0) stop("sigma should be a number > 0") - + # n = nrow(x) # eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) # lambda = 2*mean(apply(t(x)%*%eps,2,max)) # return(lambda) #} - + diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index 26669cad..f01ee3d8 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -17,7 +17,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, this.call = match.call() checkargs.xy(x=x,y=y) - + # Center and scale, etc. obj = standardize(x,y,intercept,normalize) x = obj$x @@ -49,7 +49,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, action = numeric(buf) # Actions taken df = numeric(buf) # Degrees of freedom beta = matrix(0,p,buf) # LAR estimates - + lambda[1] = hit action[1] = ihit df[1] = 0 @@ -91,14 +91,14 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, Q1 = Q[,1,drop=FALSE]; Q2 = Q[,-1,drop=FALSE] R = qr.R(obj) - + # Throughout the algorithm, we will maintain # the decomposition X1 = Q1*R. Dimenisons: # X1: n x r # Q1: n x r # Q2: n x (n-r) # R: r x r - + while (k<=maxsteps && lambda[k-1]>=minlam) { ########## # Check if we've reached the end of the buffer @@ -118,7 +118,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, b = backsolve(R,backsolve(R,s,transpose=TRUE)) aa = as.numeric(t(X2) %*% (y - X1 %*% a)) bb = as.numeric(t(X2) %*% (X1 %*% b)) - + # If the inactive set is empty, nothing will hit if (r==min(n-intercept,p)) hit = 0 @@ -128,9 +128,9 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, hits = aa/(shits-bb) # Make sure none of the hitting times are larger - # than the current lambda + # than the current lambda hits[hits>lambda[k-1]] = 0 - + ihit = which.max(hits) hit = hits[ihit] shit = shits[ihit] @@ -138,13 +138,13 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, # Stop if the next critical point is negative if (hit<=0) break - + # Record the critical lambda and solution lambda[k] = hit action[k] = I[ihit] df[k] = r beta[A,k] = a-hit*b - + # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) X2perp = X2 - X1 %*% backsolve(R,t(Q1)%*%X2) @@ -162,7 +162,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, crit = (t(c[,-ihit])%*%y - ratio*sum(c[,ihit]*y))/(1-ratio) mp[k] = max(max(crit[ip]),0) } - + # Update all of the variables r = r+1 A = c(A,I[ihit]) @@ -176,12 +176,12 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, Q1 = obj$Q1 Q2 = obj$Q2 R = obj$R - + if (verbose) { cat(sprintf("\n%i. lambda=%.3f, adding variable %i, |A|=%i...", k,hit,A[r],r)) } - + # Step counter k = k+1 } @@ -195,7 +195,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, nk = nk[Seq(1,k-1)] mp = mp[Seq(1,k-1)] vreg = vreg[Seq(1,k-1),,drop=FALSE] - + # If we reached the maximum number of steps if (k>maxsteps) { if (verbose) { @@ -215,11 +215,11 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, completepath = FALSE bls = NULL } - + # Otherwise, note that we completed the path else { completepath = TRUE - + # Record the least squares solution. Note that # we have already computed this bls = rep(0,p) @@ -227,19 +227,19 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, } if (verbose) cat("\n") - + # Adjust for the effect of centering and scaling if (intercept) df = df+1 if (normalize) beta = beta/sx if (normalize && completepath) bls = bls/sx - + # Assign column names colnames(beta) = as.character(round(lambda,3)) out = list(lambda=lambda,action=action,sign=s,df=df,beta=beta, completepath=completepath,bls=bls, Gamma=Gamma,nk=nk,vreg=vreg,mp=mp,x=x,y=y,bx=bx,by=by,sx=sx, - intercept=intercept,normalize=normalize,call=this.call) + intercept=intercept,normalize=normalize,call=this.call) class(out) = "lar" return(out) } @@ -253,7 +253,7 @@ lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, downdateQR <- function(Q1,Q2,R,col) { m = nrow(Q1) n = ncol(Q1) - + a = .C("downdate1", Q1=as.double(Q1), R=as.double(R), @@ -275,35 +275,6 @@ downdateQR <- function(Q1,Q2,R,col) { return(list(Q1=Q1,Q2=Q2,R=R)) } -# Update the QR factorization, after a column has been -# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. - -updateQR <- function(Q1,Q2,R,col) { - m = nrow(Q1) - n = ncol(Q1) - k = ncol(Q2) - - a = .C("update1", - Q2=as.double(Q2), - w=as.double(t(Q2)%*%col), - m=as.integer(m), - k=as.integer(k), - dup=FALSE, - package="selectiveInference") - - Q2 = matrix(a$Q2,nrow=m) - w = c(t(Q1)%*%col,a$w) - - # Re-structure: delete a column from Q2, add one to - # Q1, and expand R - Q1 = cbind(Q1,Q2[,1]) - Q2 = Q2[,-1,drop=FALSE] - R = rbind(R,rep(0,n)) - R = cbind(R,w[Seq(1,n+1)]) - - return(list(Q1=Q1,Q2=Q2,R=R)) -} - ############################## # Coefficient function for lar @@ -320,7 +291,7 @@ coef.lar <- function(object, s, mode=c("step","lambda"), ...) { lambda = object$lambda beta = object$beta } - + if (mode=="step") { if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) knots = 1:k @@ -330,7 +301,7 @@ coef.lar <- function(object, s, mode=c("step","lambda"), ...) { knots = lambda dec = TRUE } - + return(coef.interpolate(beta,s,knots,dec)) } @@ -350,9 +321,9 @@ predict.lasso <- predict.lar # Lar inference function -larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), +larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { - + this.call = match.call() type = match.arg(type) checkargs.misc(sigma=sigma,alpha=alpha,k=k, @@ -364,7 +335,7 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai warning("Package Rmpfr is not installed, reverting to standard precision") bits = NULL } - + k = min(k,length(obj$action)) # Round to last step x = obj$x y = obj$y @@ -385,11 +356,11 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai "you may want to use the estimateSigma function")) } } - + pv.spacing = pv.modspac = pv.covtest = khat = NULL - + if (type == "active") { - pv = vlo = vup = numeric(k) + pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) ci = tailarea = matrix(0,k,2) pv.spacing = pv.modspac = pv.covtest = numeric(k) @@ -399,7 +370,7 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - + Gj = G[1:nk[j],] uj = rep(0,nk[j]) vj = vreg[j,] @@ -411,12 +382,12 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, flip=(sign[j]==-1),bits=bits) - ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea - + pv.spacing[j] = spacing.pval(obj,sigma,j) pv.modspac[j] = modspac.pval(obj,sigma,j) pv.covtest[j] = covtest.pval(obj,sigma,j) @@ -424,7 +395,7 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai khat = forwardStop(pv,alpha) } - + else { if (type == "aic") { out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) @@ -439,22 +410,22 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai u = rep(0,nk[k]) kk = k } - - pv = vlo = vup = numeric(kk) + + pv = vlo = vup = numeric(kk) vmat = matrix(0,kk,n) ci = tailarea = matrix(0,kk,2) sign = numeric(kk) vars = obj$action[1:kk] xa = x[,vars] M = pinv(crossprod(xa)) %*% t(xa) - + for (j in 1:kk) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - + vj = M[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - sign[j] = sign(sum(vj*y)) + sign[j] = sign(sum(vj*y)) vj = sign[j] * vj Gj = rbind(G,vj) uj = c(u,0) @@ -472,7 +443,7 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai tailarea[j,] = a$tailarea } } - + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, pv.spacing=pv.spacing,pv.modspac=pv.modspac,pv.covtest=pv.covtest, @@ -488,10 +459,10 @@ spacing.pval <- function(obj, sigma, k) { v = obj$Gamma[obj$nk[k],] sd = sigma*sqrt(sum(v^2)) a = obj$mp[k] - + if (k==1) b = Inf else b = obj$lambda[k-1] - + return(tnorm.surv(obj$lambda[k],0,sd,a,b)) } @@ -505,7 +476,7 @@ modspac.pval <- function(obj, sigma, k) { warning(sprintf("Modified spacing p-values at step %i require %i steps of the lar path",k,k+1)) return(NA) } - + if (k==1) b = Inf else b = obj$lambda[k-1] @@ -543,7 +514,7 @@ covtest.pval <- function(obj, sigma, k) { print.lar <- function(x, ...) { cat("\nCall:\n") dput(x$call) - + cat("\nSequence of LAR moves:\n") nsteps = length(x$action) tab = cbind(1:nsteps,x$action,x$sign) @@ -566,7 +537,7 @@ print.larInf <- function(x, tailarea=TRUE, ...) { tab = cbind(1:length(x$pv),x$vars, round(x$sign*x$vmat%*%x$y,3), round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), - round(x$pv,3),round(x$ci,3),round(x$pv.spacing,3),round(x$pv.cov,3)) + round(x$pv,3),round(x$ci,3),round(x$pv.spacing,3),round(x$pv.cov,3)) colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt", "Spacing", "CovTest") if (tailarea) { @@ -609,7 +580,7 @@ print.larInf <- function(x, tailarea=TRUE, ...) { } rownames(tab) = rep("",nrow(tab)) print(tab) - + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) } @@ -618,7 +589,7 @@ print.larInf <- function(x, tailarea=TRUE, ...) { plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { - + if (x$completepath) { k = length(x$action)+1 lambda = c(x$lambda,0) @@ -629,7 +600,7 @@ plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, beta = x$beta } p = nrow(beta) - + xvar = match.arg(xvar) if (xvar=="norm") { xx = colSums(abs(beta)) @@ -656,6 +627,6 @@ plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, abline(h=0,lwd=2) matplot(xx,t(beta),type="l",lty=1,add=TRUE) if (breaks) abline(v=xx,lty=2) - if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) invisible() } From c6015aa32fd973d295760d9cfcfd4e4d78b1b9ab Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 27 Oct 2016 14:55:53 +0100 Subject: [PATCH 147/396] cvMakeFolds now has permutation built in --- forLater/josh/selectiveInference/R/cv.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R index dfd63951..fcc6e542 100644 --- a/forLater/josh/selectiveInference/R/cv.R +++ b/forLater/josh/selectiveInference/R/cv.R @@ -1,11 +1,18 @@ # ------------------------------------------------ # Cross-validation, preliminary -cvMakeFolds <- function(x, nfolds = 10) { - #inds <- sample(1:nrow(x), replace=FALSE) - inds <- 1:nrow(x) +cvMakeFolds <- function(x, nfolds = 5) { + inds <- sample(1:nrow(x), replace=FALSE) + #inds <- 1:nrow(x) foldsize <- floor(nrow(x)/nfolds) - lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize])) + folds <- lapply(1:nfolds, function(f) return(inds[1:foldsize+(f-1)*foldsize])) + if (nfolds*foldsize < nrow(x)) { + # remainder observations added to first several folds + for (i in 1:(nrow(x) - nfolds*foldsize)) { + folds[[i]] <- c(folds[[i]], inds[nfolds*foldsize + i]) + } + } + return(folds) } ############################################ @@ -87,6 +94,7 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TR fold <- folds[[f]] fit <- groupfs(X[-fold,], Y[-fold], index=index, maxsteps=maxsteps, sigma=sigma, intercept=FALSE, center=FALSE, normalize=FALSE) fit$fold <- fold + # Why is this commented out? ## projections[[f]] <- lapply(fit$projections, function(step.projs) { ## lapply(step.projs, function(proj) { ## # Reduce from n by n matrix to svdu_thresh From c467ba2267a21457bd4bbf3cee61c1eba5ad5b25 Mon Sep 17 00:00:00 2001 From: Joshua Loftus Date: Thu, 27 Oct 2016 15:01:33 +0100 Subject: [PATCH 148/396] Beginning cvlar implementation --- forLater/josh/selectiveInference/R/cv.R | 45 +++++++++++++++++++ forLater/josh/sim.cvlar.R | 58 +++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 forLater/josh/sim.cvlar.R diff --git a/forLater/josh/selectiveInference/R/cv.R b/forLater/josh/selectiveInference/R/cv.R index fcc6e542..046a4623 100644 --- a/forLater/josh/selectiveInference/R/cv.R +++ b/forLater/josh/selectiveInference/R/cv.R @@ -130,3 +130,48 @@ cvfs <- function(x, y, index = 1:ncol(x), maxsteps, sigma = NULL, intercept = TR invisible(fit) } + +cvlar <- function(x, y) { # other args + folds <- cvMakeFolds(x) + models <- lapply(folds, function(fold) { + x.train <- X + y.train <- Y + x.train[fold,] <- 0 + y.train[fold] <- 0 + x.test <- X[fold,] + y.test <- Y[fold] + larpath.train <- lar(x.train, y.train, maxsteps = maxsteps, intercept = F, normalize = F) + return(lff) + }) + + active.sets <- lapply(models, function(model) model$action) + lambdas <- lapply(models, function(model) model$lambda) + lmin <- min(unlist(lambdas)) + +# Interpolate lambda grid or parametrize by steps? +# interpolation probably requires re-writing cvRSSquads for +# penalized fits in order to make sense + +# do steps for now just to have something that works? + + RSSquads <- list() + for (s in 1:maxsteps) { + initial.active <- lapply(active.sets, function(a) a[1:s]) + RSSquads[[s]] <- cvRSSquad(X, folds, initial.active) + } + + RSSs <- lapply(RSSquads, function(Q) t(Y) %*% Q %*% Y) + sstar <- which.min(RSSs) + quadstar <- RSSquads[sstar][[1]] + + RSSquads <- lapply(RSSquads, function(quad) quad - quadstar) + RSSquads[[sstar]] <- NULL # remove the all zeroes case + + fit <- lar(X, Y, maxsteps=sstar, intercept = F, normalize = F) + +# Very tall Gamma encoding all cv-model paths + Gamma <- do.call(rbind, lapply(models, function(model) return(model$Gamma))) + +# more to do here +} + diff --git a/forLater/josh/sim.cvlar.R b/forLater/josh/sim.cvlar.R new file mode 100644 index 00000000..9169cc32 --- /dev/null +++ b/forLater/josh/sim.cvlar.R @@ -0,0 +1,58 @@ +# Choices + +# RSS: least-squares or penalized beta? +# depends on final model. Go with least-squares for now + +# fixed vs lar? (lar, apparently) +# fixed probably slower, but advantage of same lambda grid? +# is same lambda grid necessary? -- doesn't lar algorithm give all possible models anyway? +# i.e. for non-knot lambda just find where it is in lar path, take corresponding model + +# groups? later + +# TODO + +# copy larInf or groupfsInf? +# larInf: add CV quadratic constraints* & break/fix p-value computation +# -------- *but can we even use the ydecomp we use for quadratic? +# groupfsInf: some ugly rewriting, no cumprojs etc, but straightforward +# -------- downside: need to implement larInf basically + +# larInf +# [ ] is.null(sigma) don't estimate it + +# plan: +# expand Gamma for [-fold] indices? +# stack all the Gammas? or iterate through them? +# work backward from poly.pval <- larInf + + +# big picture / long term +# what OOP kind of design would lend itself to easily implementing more cv things? + +# Gamma: something x n +# Gamma %*% y >= 0 + +# pass 0-padded x[-fold] and y[-fold] to lar? + +library(selectiveInference) +setwd("/Users/joftius/Dropbox/work/R-software/forLater/josh") +source("selectiveInference/R/cv.R") + +set.seed(1) +n <- 100 +p <- 50 +maxsteps <- 10 +sparsity <- 3 +snr <- 2 +rho <- 0.1 +nfolds <- 5 + +x <- matrix(rnorm(n*p), nrow=n) +y <- rnorm(n) +beta <- rep(0, p) +beta[1:sparsity] <- 2* sqrt(2*log(p)/n) * sample(c(-1,1), sparsity, replace=T) +y <- y + x %*% beta +my <- mean(y) +y <- y - my + From 66454bfd8fff1a85d9b67353a6ea6eb99835466f Mon Sep 17 00:00:00 2001 From: yuvalbenjamini Date: Tue, 27 Dec 2016 13:00:12 +0200 Subject: [PATCH 149/396] bug fixes in funs.constraint.R for matrix handling and univariate sampling, and some added documentation. Aligns version of funs.constraints.R with the restrictedNormal package. --- forLater/maxZ/funs.constraints.R | 170 +++++++++++++++++++------------ 1 file changed, 107 insertions(+), 63 deletions(-) diff --git a/forLater/maxZ/funs.constraints.R b/forLater/maxZ/funs.constraints.R index 3bf6b77e..a04e34ef 100644 --- a/forLater/maxZ/funs.constraints.R +++ b/forLater/maxZ/funs.constraints.R @@ -2,11 +2,16 @@ # Some utilities for affine constraints # -# +# # compute the square-root and inverse square-root of a non-negative # definite matrix # +#' Compute the square-root and inverse square-root of a non-negative definite matrix. +#' @param S matrix +#' @param rank rank of svd +#' +#' factor_covariance = function(S, rank=NA) { if (is.na(rank)) { rank = nrow(S) @@ -26,9 +31,16 @@ factor_covariance = function(S, rank=NA) { # law is Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset +#' Transform non-iid problem into iid problem +#' @param linear_part matrix, linear part of constraints +#' @param offset vector, bias of constraints +#' @param mean_param vector of unconditional means +#' @param covariance vector of unconditional covariance +#' @return new \code{linear_part} and \code{offset} for 0-mean iid covariance problem, +#' and functions that map between the two problems. whiten_constraint = function(linear_part, offset, mean_param, covariance) { - factor_cov = factor_covariance(covariance) + factor_cov = factor_covariance(as.matrix(covariance)) sqrt_cov = factor_cov$sqrt_cov sqrt_inv = factor_cov$sqrt_inv @@ -41,7 +53,6 @@ whiten_constraint = function(linear_part, offset, mean_param, covariance) { new_A = new_A / scaling new_b = new_b / scaling - # TODO: check these functions will behave when Z is a matrix. inverse_map = function(Z) { # broadcasting here @@ -51,7 +62,7 @@ whiten_constraint = function(linear_part, offset, mean_param, covariance) { forward_map = function(W) { return(sqrt_inv %*% (W - mean_param)) - } + } return(list(linear_part=new_A, offset=new_b, @@ -59,65 +70,54 @@ whiten_constraint = function(linear_part, offset, mean_param, covariance) { forward_map=forward_map)) } -# -# sample from the law -# -# Z \sim N(mean_param, covariance) subject to constraints linear_part %*% Z \leq offset - -sample_from_constraints = function(linear_part, - offset, - mean_param, - covariance, - initial_point, # point must be feasible for constraints +#' Sample from multivariate normal distribution under affine restrictions +#' @description +#' \code{sample_from_constraints} returns a sample from the conditional +#' multivariate normal Z~ N(mean,covariance) s.t. A*Z <= B +#' +#' @param linear_part r x d matrix for r restrictions and d dimension of Z +#' @param offset r-dim vector of offsets +#' @param mean_param d-dim mean vector of the unconditional normal +#' @param covariance d x d covariance matrix of unconditional normal +#' @param initial_point d-dim vector that initializes the sampler (must meet restrictions) +#' @param ndraw size of sample +#' @param burnin samples to throw away before storing +#' @return Z ndraw x d matrix of samples +#' @export +#' @examples +#' +#' truncatedNorm = function(1000, c(0,0,0), identity(3), lower = -1, +#' upper = c(2,1,2), start.value = c(0,0,0)) +#' +#' constr = thresh2constraints(3, lower = c(1,1,1)) +#' +#' samp = sample_from_constraints(linear_part = constr$linear_part, +#' offset= constr$offset, +#' mean_param = c(0,0,0), +#' covariance = diag(3), +#' initial_point = c(1.5,1.5,1.5), +#' ndraw=100, +#' burnin=2000) +#' + +sample_from_constraints = function(linear_part, + offset, + mean_param, + covariance, + initial_point, ndraw=8000, - burnin=2000, - accept_reject_params=NA) #TODO: implement accept reject check + burnin=2000) { - whitened_con = whiten_constraint(linear_part, + whitened_con = whiten_constraint(linear_part, offset, - mean_param, + mean_param, covariance) - white_initial = whitened_con$forward_map(initial_point) - -# # try 100 draws of accept reject -# # if we get more than 50 good draws, then just return a smaller sample -# # of size (burnin+ndraw)/5 - -# if accept_reject_params: -# use_hit_and_run = False -# num_trial, min_accept, num_draw = accept_reject_params - -# def _accept_reject(sample_size, linear_part, offset): -# Z_sample = np.random.standard_normal((100, linear_part.shape[1])) -# constraint_satisfied = (np.dot(Z_sample, linear_part.T) - -# offset[None,:]).max(1) < 0 -# return Z_sample[constraint_satisfied] - -# Z_sample = _accept_reject(100, -# white_con.linear_part, -# white_con.offset) - -# if Z_sample.shape[0] >= min_accept: -# while True: -# Z_sample = np.vstack([Z_sample, -# _accept_reject(num_draw / 5, -# white_con.linear_part, -# white_con.offset)]) -# if Z_sample.shape[0] > num_draw: -# break -# white_samples = Z_sample -# else: -# use_hit_and_run = True -# else: -# use_hit_and_run = True - - use_hit_and_run = TRUE - - if (use_hit_and_run) { - - white_linear = whitened_con$linear_part - white_offset = whitened_con$offset + white_initial = whitened_con$forward_map(initial_point) + + + white_linear = whitened_con$linear_part + white_offset = whitened_con$offset # Inf cannot be used in C code # In theory, these rows can be dropped @@ -136,7 +136,7 @@ sample_from_constraints = function(linear_part, # normalize rows to have length 1 - scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) + scaling = apply(directions, 1, function(x) { return(sqrt(sum(x^2))) }) directions = directions / scaling ndirection = nrow(directions) @@ -177,10 +177,54 @@ sample_from_constraints = function(linear_part, } } else { Z_sample = matrix(rnorm(nstate * ndraw), nstate, ndraw) - } - } + } + + + Z = t(whitened_con$inverse_map(Z_sample)) + return(Z) +} - Z = t(whitened_con$inverse_map(Z_sample)) - return(Z) +#' Translate between coordinate thresholds and affine constraints +#' @description +#' \code{thresh2constraints} translates lower and upper constraints +#' on coordinates into linear and offset constraints (A*Z <= B). +#' lower and upper can have -Inf or Inf coordinates. +#' @param d dimension of vector +#' @param lower 1 or d-dim lower constraints +#' @param upper 1 or d-dim upper constraints +#' @export +thresh2constraints = function(d, lower = rep(-Inf, d), upper = rep(Inf,d)){ + stopifnot(is.element(length(lower),c(1,d))) + stopifnot(is.element(length(upper),c(1,d))) + + if (length(lower) == 1){ + lower = rep(lower, d) + } + if (length(upper) == 1){ + upper = rep(upper, d) + } + + + linear_part = matrix(ncol = d, nrow = 0) + offset = numeric(0) + lower_constraints = which(lower > -Inf) + for (l in lower_constraints){ + new_vec = rep(0,d) + new_vec[l] = -1 + linear_part = rbind(linear_part, new_vec) + offset = c(offset, -lower[l]) + } + upper_constraints = which(upper < Inf) + for (u in upper_constraints){ + new_vec = rep(0,d) + new_vec[u] = 1 + linear_part = rbind(linear_part, new_vec) + offset = c(offset, upper[u]) + } + + constraints = list(linear_part = linear_part, offset = offset) + return(constraints) } + + From 41145c70307e5b491984c883a6fa74dcc59fb617 Mon Sep 17 00:00:00 2001 From: tibs Date: Fri, 13 Jan 2017 11:42:03 -0800 Subject: [PATCH 150/396] rob --- selectiveInference/DESCRIPTION | 6 ++-- selectiveInference/NAMESPACE | 2 ++ selectiveInference/R/funs.fixed.R | 2 +- selectiveInference/R/funs.fixedLogit.R | 10 +++--- tests/test.fixed.R | 44 ++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 8 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index 58232f11..5b317da7 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -1,8 +1,8 @@ Package: selectiveInference Type: Package Title: Tools for Post-Selection Inference -Version: 1.1.3 -Date: 2016-02-8 +Version: 1.2.1 +Date: 2016-07-3 Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid Maintainer: Rob Tibshirani @@ -14,5 +14,5 @@ Suggests: Rmpfr Description: New tools for post-selection inference, for use with forward stepwise regression, least angle regression, the - lasso, and the many means problem. + lasso, and the many means problem. The lasso function implements Gaussian, logistic and Cox survival models. License: GPL-2 diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 4a764f92..1eba3384 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -40,4 +40,6 @@ importFrom("graphics", abline, axis, matplot) importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) importFrom("stats", "coef", "df", "lm", "pf") +importFrom("stats", "glm", "residuals", "vcov") + diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 4263c0b2..9abd8330 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -41,7 +41,7 @@ else{ n = nrow(x) p = ncol(x) beta = as.numeric(beta) - if (length(beta) != p) stop("beta must have length equal to ncol(x)") + if (length(beta) != p) stop("Since family='gaussian', beta must have length equal to ncol(x)") # If glmnet was run with an intercept term, center x and y if (intercept==TRUE) { diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index fb4092d7..19c972df 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -18,7 +18,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet n=length(y) p=ncol(x) # I assume that intcpt was used - if(length(beta)!=p+1) stop("beta must be of length ncol(x)+1, that is, it should include an intercept") + if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") nvar=sum(beta[-1]!=0) pv=vlo=vup=sd=rep(NA, nvar) ci=tailarea=matrix(NA,nvar,2) @@ -41,11 +41,12 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet etahat = xxm %*% bhat prhat = as.vector(exp(etahat) / (1 + exp(etahat))) ww=prhat*(1-prhat) - w=diag(ww) + # w=diag(ww) #check KKT z=etahat+(y-prhat)/ww - g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda + # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda + gg=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) @@ -62,7 +63,8 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet "'thresh' parameter, for a more accurate convergence.")) #constraints for active variables - MM=solve(t(xxm)%*%w%*%xxm) + # MM=solve(t(xxm)%*%w%*%xxm) + MM=solve(scale(t(xxm),F,1/ww)%*%xxm) gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized # at exact LASSO solution it should be s2[-1] dbeta = MM %*% gm diff --git a/tests/test.fixed.R b/tests/test.fixed.R index f1ce6cf0..9a09072f 100644 --- a/tests/test.fixed.R +++ b/tests/test.fixed.R @@ -510,3 +510,47 @@ g=t(X)%*%(Y-X%*%b)/lam2 out = fixedLassoInf(X,Y,beta,lam*n) + + + +# + +#gaussian +n=50 +p=10 +sigma=.7 +beta=c(0,0,0,0,rep(0,p-4)) +set.seed(43) +nsim = 1000 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta +for (i in 1:nsim) { + cat(i) +y=mu+sigma*rnorm(n) +#y=y-mean(y) +# first run glmnet + pf=c(rep(.001,4),rep(1,p-4)) + xs=scale(x,FALSE,pf) #scale cols of x by penalty factors + # first run glmnet + gfit = glmnet(xs,y,standardize=FALSE) + + + lambda = .8 + beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + + # compute fixed lambda p-values and selection intervals + aa = fixedLassoInf(xs,y,beta,lambda,sigma=sigma) + +pvals[i, which(beta != 0)] <- aa$pv +} +nulls = 1:nsim +np = pvals[nulls,-(1:4)] +mean(np[!is.na(np)] < 0.1) +o=!is.na(np) +plot((1:sum(o))/sum(o),sort(np)) +abline(0,1) +##### + + From 80db7958744f8ae746fbacc314223dd416fca763 Mon Sep 17 00:00:00 2001 From: tibs Date: Mon, 16 Jan 2017 09:26:23 -0800 Subject: [PATCH 151/396] rob bug from quanzao --- selectiveInference/DESCRIPTION | 2 +- selectiveInference/R/funs.fixed.R | 2 +- selectiveInference/R/funs.fixedLogit.R | 4 ++-- selectiveInference/src/symbols.rds | Bin 367 -> 367 bytes 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index 5b317da7..d697499f 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -1,7 +1,7 @@ Package: selectiveInference Type: Package Title: Tools for Post-Selection Inference -Version: 1.2.1 +Version: 1.2.2 Date: 2016-07-3 Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 4fc0f1af..b30d04ce 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -158,7 +158,7 @@ function(x, y, beta, lambda, a) { if (length(za)>1) dz = diag(za) if (length(za)==1) dz = matrix(za,1,1) - P = diag(1,nrow(xa)) - xa %*% xap +# P = diag(1,nrow(xa)) - xa %*% xap #NOTE: inactive constraints not needed below! G = -rbind( diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 19c972df..5b673546 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -45,8 +45,8 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet #check KKT z=etahat+(y-prhat)/ww - # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda - gg=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda + # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda + g=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) diff --git a/selectiveInference/src/symbols.rds b/selectiveInference/src/symbols.rds index ef45ca37aedfc57cb07c2d5066bf881bf4359eb6..06b0e85e927ffbb186121204a3be110d68965ede 100644 GIT binary patch literal 367 zcmV-#0g(P5iwFP!000001HF`8PlGTNhRes4%`92sg}43!BMkib8)hcnHDxQak_6JIPWlgp?6ULIF7c6t)_l2M@%=qRk*KdBZ0#Ng2zV>P#g%js+AFbJLu%b;B1>Aj(q=rLtRA?|{WKk$T84 z()a@`zE^&?C?~o(^2)7UXU|@{XM1H%>2>{?5ng$06s-p2h$G2pl*7&?)h}ssWtpGXyRe>2ri05=0r^28RO8 zjBtvUJk7bP%Q1@iCX2Bmeotwxtmv9#(pob7LR3!ut`KyESyu?HWzILv6do!>dj?Y* z!@*+@7Jm=dU2y!sVsLBm&hK{;+v)mYi|@mRiDeksi*sx-n)Q7D2Y?GpFmuF|>0`2w zTXnjf7EiZim=USDjwSfeziHK5e{b1fSiK3u!!=F|`#{onvcFvA`qG9{$5>=MXY;o5 z+E%Kl=S4;nV}{69)O>=Hl=Hl<&UB)qNFu3-u+1r7)qH^@W+Ej>X}e|l4zziu$_)8M z8h?Pr_sZ`U<;-u6yz;@WwP&x{Gu$0yn$%1y_q{k_SpQXQmz)WC#m&m>F0`bL NJ^{S|DnAbe005&lw21%! From 6c7885107e15efc9a2831def0f3da9ab8b24a7d0 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Tue, 7 Feb 2017 23:04:46 -0500 Subject: [PATCH 152/396] Committing so my changes aren't overwritten --- selectiveInference/R/funs.fs.R | 5 ++++- selectiveInference/R/funs.lar.R | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index b5ee511b..bd5c39aa 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -234,7 +234,10 @@ coef.fs <- function(object, s, ...) { predict.fs <- function(object, newx, s, ...) { beta = coef.fs(object,s) if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) - else newx = scale(newx,object$bx,FALSE) + else { + newx = matrix(newx,ncol=ncol(object$x)) + newx = scale(newx,object$bx,FALSE) + } return(newx %*% beta + object$by) } diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index 26669cad..795bbc30 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -339,7 +339,10 @@ coef.lar <- function(object, s, mode=c("step","lambda"), ...) { predict.lar <- function(object, newx, s, mode=c("step","lambda"), ...) { beta = coef.lar(object,s,mode) if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) - else newx = scale(newx,object$bx,FALSE) + else { + newx = matrix(newx,ncol=ncol(object$x)) + newx = scale(newx,object$bx,FALSE) + } return(newx %*% beta + object$by) } From 427e89c0dcca1a9e09576127b71019fedaf4d3cb Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Tue, 7 Feb 2017 23:08:11 -0500 Subject: [PATCH 153/396] Fixed a small bug in the coef functions for fs and lar: use of dec instead of decreasing in the call to coef.interpolate. --- selectiveInference/R/funs.common.R | 8 ++++---- selectiveInference/R/funs.fs.R | 4 ++-- selectiveInference/R/funs.lar.R | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.common.R b/selectiveInference/R/funs.common.R index 59457008..042e0747 100644 --- a/selectiveInference/R/funs.common.R +++ b/selectiveInference/R/funs.common.R @@ -56,14 +56,14 @@ standardize <- function(x, y, intercept, normalize) { # Interpolation function to get coefficients -coef.interpolate <- function(betas, s, knots, dec=TRUE) { +coef.interpolate <- function(beta, s, knots, decreasing=TRUE) { # Sort the s values - o = order(s,dec=dec) + o = order(s,decreasing=decreasing) s = s[o] k = length(s) mat = matrix(rep(knots,each=k),nrow=k) - if (dec) b = s >= mat + if (decreasing) b = s >= mat else b = s <= mat blo = max.col(b,ties.method="first") bhi = pmax(blo-1,1) @@ -73,7 +73,7 @@ coef.interpolate <- function(betas, s, knots, dec=TRUE) { p[i] = 0 p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] - beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) + beta = t((1-p)*t(beta[,blo,drop=FALSE]) + p*t(beta[,bhi,drop=FALSE])) colnames(beta) = as.character(round(s,3)) rownames(beta) = NULL diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index bd5c39aa..7a1d784a 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -225,8 +225,8 @@ coef.fs <- function(object, s, ...) { if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) knots = 1:k - dec = FALSE - return(coef.interpolate(beta,s,knots,dec)) + decreasing = FALSE + return(coef.interpolate(beta,s,knots,decreasing)) } # Prediction function for fs diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index 96df0c08..37d0ae32 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -295,14 +295,14 @@ coef.lar <- function(object, s, mode=c("step","lambda"), ...) { if (mode=="step") { if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) knots = 1:k - dec = FALSE + decreasing = FALSE } else { if (min(s)= %0.3f",min(lambda))) knots = lambda - dec = TRUE + decreasing = TRUE } - return(coef.interpolate(beta,s,knots,dec)) + return(coef.interpolate(beta,s,knots,decreasing)) } # Prediction function for lar From 4e50c4d8a77c617ded2613bf22843a91ec070d0b Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Tue, 7 Feb 2017 23:40:55 -0500 Subject: [PATCH 154/396] Fixed a small bug in fs function. Seems have been introduced when this function was modified notationally. The least squares solution was not being returned properly; now it is. --- selectiveInference/DESCRIPTION | 8 +++++--- selectiveInference/R/funs.fs.R | 9 ++++----- selectiveInference/man/factorDesign.Rd | 4 ++-- selectiveInference/man/groupfs.Rd | 5 +++-- selectiveInference/man/groupfsInf.Rd | 2 +- selectiveInference/man/predict.groupfs.Rd | 7 +------ selectiveInference/man/scaleGroups.Rd | 2 +- 7 files changed, 17 insertions(+), 20 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index d697499f..d0740ecb 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -12,7 +12,9 @@ Depends: survival Suggests: Rmpfr -Description: New tools for post-selection inference, for use - with forward stepwise regression, least angle regression, the - lasso, and the many means problem. The lasso function implements Gaussian, logistic and Cox survival models. +Description: New tools for post-selection inference, for use with forward + stepwise regression, least angle regression, the lasso, and the many means + problem. The lasso function implements Gaussian, logistic and Cox survival + models. License: GPL-2 +RoxygenNote: 5.0.1 diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 7a1d784a..62e6b7c6 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -106,8 +106,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, } # Key quantities for the next entry - keepLs=backsolve(R,t(Q_active)%*%X_inactive) - X_inactive_resid = X_inactive - X_active %*% keepLs + keepLs = backsolve(R,t(Q_active)%*%y) + X_inactive_resid = X_inactive - X_active %*% backsolve(R,t(Q_active)%*%X_inactive) working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) score = as.numeric(t(working_x)%*%y) @@ -127,7 +127,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, action[k] = I[i_hit] df[k] = r - beta[A,k] = backsolve(R,t(Q_active)%*%y) + beta[A,k] = keepLs # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) @@ -188,8 +188,7 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the least squares solution. Note that # we have already computed this - bls = rep(0,p) - if(length(keepLs)>0) bls[A] = keepLs + bls = rep(0,p); bls[A] = keepLs } if (verbose) cat("\n") diff --git a/selectiveInference/man/factorDesign.Rd b/selectiveInference/man/factorDesign.Rd index 8e061db6..7b13ae0f 100644 --- a/selectiveInference/man/factorDesign.Rd +++ b/selectiveInference/man/factorDesign.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{factorDesign} \alias{factorDesign} @@ -23,7 +23,7 @@ When using \code{\link{groupfs}} with factor variables call this function first \dontrun{ fd = factorDesign(warpbreaks) y = rnorm(nrow(fd$x)) -fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=FALSE) +fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=F) pvals = groupfsInf(fit) } } diff --git a/selectiveInference/man/groupfs.Rd b/selectiveInference/man/groupfs.Rd index a57c6dc8..5c2171a2 100644 --- a/selectiveInference/man/groupfs.Rd +++ b/selectiveInference/man/groupfs.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{groupfs} \alias{groupfs} @@ -41,7 +41,8 @@ x = matrix(rnorm(20*40), nrow=20) index = sort(rep(1:20, 2)) y = rnorm(20) + 2 * x[,1] - x[,4] fit = groupfs(x, y, index, maxsteps = 5) -pvals = groupfsInf(fit) +out = groupfsInf(fit) +out } \seealso{ \code{\link{groupfsInf}}, \code{\link{factorDesign}}. diff --git a/selectiveInference/man/groupfsInf.Rd b/selectiveInference/man/groupfsInf.Rd index 74b9a5e0..16efcaba 100644 --- a/selectiveInference/man/groupfsInf.Rd +++ b/selectiveInference/man/groupfsInf.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{groupfsInf} \alias{groupfsInf} diff --git a/selectiveInference/man/predict.groupfs.Rd b/selectiveInference/man/predict.groupfs.Rd index 4a382c7e..4b8394b3 100644 --- a/selectiveInference/man/predict.groupfs.Rd +++ b/selectiveInference/man/predict.groupfs.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{predict.groupfs} \alias{predict.groupfs} @@ -16,9 +16,4 @@ Make predictions or extract coefficients from a groupfs forward stepwise object. \value{ A vector of predictions or a vector of coefficients. } -\description{ -Prediction and coefficient functions for \code{\link{groupfs}}. - -Make predictions or extract coefficients from a groupfs forward stepwise object. -} diff --git a/selectiveInference/man/scaleGroups.Rd b/selectiveInference/man/scaleGroups.Rd index e5a93fab..46078905 100644 --- a/selectiveInference/man/scaleGroups.Rd +++ b/selectiveInference/man/scaleGroups.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/funs.groupfs.R \name{scaleGroups} \alias{scaleGroups} From d54ba2e65d83b54d563f2fbcbd4b600c6dab7417 Mon Sep 17 00:00:00 2001 From: Ryan Tibshirani Date: Tue, 7 Feb 2017 23:49:08 -0500 Subject: [PATCH 155/396] Created a new folder for our package versions. --- selectiveInference/R/funs.fs.R | 3 ++- .../selectiveInference_1.0.1.tar.gz | Bin .../selectiveInference_1.0.2.tar.gz | Bin .../selectiveInference_1.0.tar.gz | Bin 4 files changed, 2 insertions(+), 1 deletion(-) rename selectiveInference_1.0.1.tar.gz => versions/selectiveInference_1.0.1.tar.gz (100%) rename selectiveInference_1.0.2.tar.gz => versions/selectiveInference_1.0.2.tar.gz (100%) rename selectiveInference_1.0.tar.gz => versions/selectiveInference_1.0.tar.gz (100%) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 62e6b7c6..3f8a87c4 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -188,7 +188,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Record the least squares solution. Note that # we have already computed this - bls = rep(0,p); bls[A] = keepLs + bls = rep(0,p) + bls[A] = keepLs } if (verbose) cat("\n") diff --git a/selectiveInference_1.0.1.tar.gz b/versions/selectiveInference_1.0.1.tar.gz similarity index 100% rename from selectiveInference_1.0.1.tar.gz rename to versions/selectiveInference_1.0.1.tar.gz diff --git a/selectiveInference_1.0.2.tar.gz b/versions/selectiveInference_1.0.2.tar.gz similarity index 100% rename from selectiveInference_1.0.2.tar.gz rename to versions/selectiveInference_1.0.2.tar.gz diff --git a/selectiveInference_1.0.tar.gz b/versions/selectiveInference_1.0.tar.gz similarity index 100% rename from selectiveInference_1.0.tar.gz rename to versions/selectiveInference_1.0.tar.gz From a8701c140b99c94fe2d8135937b9cecbbd424408 Mon Sep 17 00:00:00 2001 From: Nicolas Ballarini Date: Thu, 15 Jun 2017 10:01:59 +0200 Subject: [PATCH 156/396] bug in fixedLogitLassoInf alpha for fixedLassoInf with family="Binomial" was set to .1 --- selectiveInference/R/funs.fixedLogit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 5b673546..e99f2810 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -96,7 +96,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet vup[jj]=junk$vup sd[jj]=junk$sd # junk2=mypoly.int.lee(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) - junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=alpha) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea From b38d76ebdec9fd38923d5152444433506b3ab3d0 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sun, 23 Jul 2017 11:44:06 -0700 Subject: [PATCH 157/396] exporting the general pivot, renaming it TG.pvalue --- selectiveInference/NAMESPACE | 3 ++- selectiveInference/R/funs.fixedCox.R | 2 +- selectiveInference/R/funs.fixedLogit.R | 6 ++--- selectiveInference/R/funs.inf.R | 34 ++++++++++++++------------ 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 1eba3384..f9ed6975 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -12,7 +12,8 @@ export(lar,fs, estimateSigma, manyMeans,print.manyMeans, groupfs,groupfsInf, - scaleGroups,factorDesign + scaleGroups,factorDesign, + TG.pvalue ) S3method("coef", "lar") diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index ff778d99..c5918c4b 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -62,7 +62,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda vj=rep(0,length(bbar));vj[jj]=s2[jj] - junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) + junk=TG.pvalue(bbar,A1,b1,vj,MM) pv[jj] = junk$pv vlo[jj]=junk$vlo diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 5b673546..8cd8c349 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -89,14 +89,14 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet for(jj in 1:sum(m)){ vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] # compute p-values - junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) + junk=TG.pvalue(bbar,A1,b1,vj,MM) pv[jj] = junk$pv vlo[jj]=junk$vlo vup[jj]=junk$vup sd[jj]=junk$sd - # junk2=mypoly.int.lee(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) - junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + # junk2=TG.pvalue(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) + junk2=TG.pvalue(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 423b4c3e..79cb6a58 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -247,24 +247,26 @@ aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { #these next two functions are used by the binomial and Cox options of fixedLassoInf -mypoly.pval.lee= -function(y, A, b, eta, Sigma, bits=NULL) { +TG.pvalue = function(Z, A, b, eta, Sigma, bits=NULL) { + # compute pvalues from poly lemma: full version from Lee et al for full matrix Sigma - nn=length(y) - eta=as.vector(eta) - temp = sum(eta*y) - vv=as.numeric(matrix(eta,nrow=1,ncol=nn)%*%Sigma%*%eta) - cc = Sigma%*%eta/vv - - z=(diag(nn)-matrix(cc,ncol=1)%*%eta)%*%y - rho=A%*%cc + n = length(Z) + eta = as.vector(eta) + b = as.vector(b) + target_estimate = sum(eta * Z) + var_estimate = as.numeric(matrix(eta, nrow=1, ncol=n) %*% Sigma %*% eta) + cross_cov = Sigma %*% eta - vec = (b- A %*% z)/rho - vlo = suppressWarnings(max(vec[rho<0])) - vup = suppressWarnings(min(vec[rho>0])) - sd=sqrt(vv) - pv = tnorm.surv(temp,0,sd,vlo,vup,bits) - return(list(pv=pv,vlo=vlo,vup=vup,sd=sd)) + resid = (diag(n) - matrix(cross_cov / var_estimate, ncol=1) %*% eta) %*% Z + rho = A %*% cross_cov / var_estimate + vec = (b - as.numeric(A %*% resid)) / rho + + vlo = suppressWarnings(max(vec[rho < 0])) + vup = suppressWarnings(min(vec[rho > 0])) + + sd = sqrt(var_estimate) + pv = tnorm.surv(target_estimate, 0 , sd, vlo, vup, bits) + return(list(pv=pv, vlo=vlo, vup=vup, sd=sd)) } From a86da851014a2ff0425d9687436e53d4947edcfc Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sun, 23 Jul 2017 11:54:34 -0700 Subject: [PATCH 158/396] exporting interval function as well --- selectiveInference/NAMESPACE | 3 ++- selectiveInference/R/funs.fixedCox.R | 2 +- selectiveInference/R/funs.fixedLogit.R | 4 +-- selectiveInference/R/funs.inf.R | 34 +++++++++++++++++--------- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index f9ed6975..7add9dfc 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -13,7 +13,8 @@ export(lar,fs, manyMeans,print.manyMeans, groupfs,groupfsInf, scaleGroups,factorDesign, - TG.pvalue + TG.pvalue, + TG.interval ) S3method("coef", "lar") diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index c5918c4b..00690072 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -69,7 +69,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda vup[jj]=junk$vup sd[jj]=junk$sd - junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha) + junk2=TG.interval(bbar, vj, vlo[jj], vup[jj], sd[jj], alpha) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 8cd8c349..6e3c113b 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -95,8 +95,8 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet vlo[jj]=junk$vlo vup[jj]=junk$vup sd[jj]=junk$sd - # junk2=TG.pvalue(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) - junk2=TG.pvalue(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + # junk2=TG.interval(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) + junk2=TG.interval(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 79cb6a58..ad64a3d1 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -271,24 +271,34 @@ TG.pvalue = function(Z, A, b, eta, Sigma, bits=NULL) { -mypoly.int.lee= - function(y,eta,vlo,vup,sd, alpha, gridrange=c(-100,100),gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { +TG.interval = function(Z, eta, vlo, vup, sd, alpha, + gridrange=c(-100,100), + gridpts=100, + griddepth=2, + flip=FALSE, + bits=NULL) { + # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma - temp = sum(eta*y) + target_estimate = sum(eta*Z) - xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) - fun = function(x) { tnorm.surv(temp,x,sd,vlo,vup,bits) } + param_grid = seq(gridrange[1]*sd, gridrange[2]*sd, length=gridpts) - int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) - tailarea = c(fun(int[1]),1-fun(int[2])) + pivot = function(param) { + tnorm.surv(target_estimate, param, sd, vlo, vup, bits) + } - if (flip) { - int = -int[2:1] - tailarea = tailarea[2:1] - } + interval = grid.search(param_grid, pivot, alpha/2, 1-alpha/2, gridpts, griddepth) + tailarea = c(pivot(interval[1]), 1- pivot(interval[2])) + + if (flip) { + interval = -interval[2:1] + tailarea = tailarea[2:1] + } - return(list(int=int,tailarea=tailarea)) + # int is not a good variable name, synonymous with integer... + return(list(int=interval, + tailarea=tailarea)) } From 33be053585011e5a9f7c213cb90d4897f1147b60 Mon Sep 17 00:00:00 2001 From: tibshirani Date: Sun, 23 Jul 2017 13:59:54 -0700 Subject: [PATCH 159/396] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 1a50e5b0..962151e0 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,11 @@ Code is in the directory selectiveInference/R. * funs.inf.R: Common functions for inference with fixed, fs, lar, and manymeans (but not group). * funs.lar.R: Inference for least angle regression. * funs.max.R: Some numerical approximations. Deprecated? + +## Installation +The latest release of the package can be installed through CRAN: + +```R +install.packages("selectiveInference") +``` +Code in repo is under development and may be unstable. From 4710b937ac39e44b99f15ae83c53f7416eb2a971 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 24 Jul 2017 11:05:56 -0700 Subject: [PATCH 160/396] added man pages for TG.pvalue, TG.interval --- selectiveInference/R/funs.inf.R | 10 +-- selectiveInference/man/TG.interval.Rd | 94 +++++++++++++++++++++++++++ selectiveInference/man/TG.pvalue.Rd | 78 ++++++++++++++++++++++ 3 files changed, 177 insertions(+), 5 deletions(-) create mode 100644 selectiveInference/man/TG.interval.Rd create mode 100644 selectiveInference/man/TG.pvalue.Rd diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index ad64a3d1..1b454fad 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -247,17 +247,17 @@ aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { #these next two functions are used by the binomial and Cox options of fixedLassoInf -TG.pvalue = function(Z, A, b, eta, Sigma, bits=NULL) { +TG.pvalue = function(Z, A, b, eta, Sigma, null_value=0, bits=NULL) { # compute pvalues from poly lemma: full version from Lee et al for full matrix Sigma n = length(Z) - eta = as.vector(eta) + eta = matrix(eta, ncol=1, nrow=n) b = as.vector(b) target_estimate = sum(eta * Z) - var_estimate = as.numeric(matrix(eta, nrow=1, ncol=n) %*% Sigma %*% eta) + var_estimate = sum(matrix(eta, nrow=1, ncol=n) %*% (Sigma %*% eta)) cross_cov = Sigma %*% eta - resid = (diag(n) - matrix(cross_cov / var_estimate, ncol=1) %*% eta) %*% Z + resid = (diag(n) - matrix(cross_cov / var_estimate, ncol=1, nrow=n) %*% matrix(eta, nrow=1, ncol=n)) %*% Z rho = A %*% cross_cov / var_estimate vec = (b - as.numeric(A %*% resid)) / rho @@ -265,7 +265,7 @@ TG.pvalue = function(Z, A, b, eta, Sigma, bits=NULL) { vup = suppressWarnings(min(vec[rho > 0])) sd = sqrt(var_estimate) - pv = tnorm.surv(target_estimate, 0 , sd, vlo, vup, bits) + pv = tnorm.surv(target_estimate, null_value, sd, vlo, vup, bits) return(list(pv=pv, vlo=vlo, vup=vup, sd=sd)) } diff --git a/selectiveInference/man/TG.interval.Rd b/selectiveInference/man/TG.interval.Rd new file mode 100644 index 00000000..d5426912 --- /dev/null +++ b/selectiveInference/man/TG.interval.Rd @@ -0,0 +1,94 @@ +\name{TG.interval} +\alias{TG.interval} + +\title{ +Truncated Gaussian confidence interval. +} +\description{ +Compute truncated Gaussian interval of Lee et al. (2016) with +arbitrary affine selection and covariance. +} +\usage{ +TG.interval(Z, eta, vlo, vup, sd, alpha, + gridrange=c(-100,100), + gridpts=100, + griddepth=2, + flip=FALSE, + bits=NULL) +} +\arguments{ +\item{Z}{ +Observed data (assumed to follow N(mu, Sigma) with sum(eta*mu)=null_value) +} +\item{eta}{ +Determines the target sum(eta*mu) and estimate sum(eta*Z). +} +\item{vlo}{ +Lower truncation limits for statistic. Can be computed with TG.pvalue. +} +\item{vup}{ +Upper truncation limits for statistic. Can be computed with TG.pvalue. +} +\item{sd}{ +Standard error of sum(eta*Z). +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{gridrange}{ +Grid range for constructing confidence intervals, on the standardized scale. +} +\item{gridpts}{ +??????? +} +\item{griddepth}{ +??????? +} +\item{flip}{ +??????? +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +} + +\details{ +This function computes selective confidence intervals based on the polyhedral +lemma of Lee et al. (2016). +} + +\value{ +\item{int}{Selective confidence interval.} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval.} +} + +\references{ +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2016). +Exact post-selection inference, with application to the lasso. Annals of Statistics, 44(3), 907-927. + +Jonathan Taylor and Robert Tibshirani (2017) Post-selection inference for math L1-penalized likelihood models. +Canadian Journal of Statistics, xx, 1-21. (Volume still not posted) + +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ + +A = diag(5) +b = rep(1, 5) +Z = rep(0, 5) +Sigma = diag(5) +eta = as.numeric(c(1, 1, 0, 0, 0)) +V = TG.pvalue(Z, A, b, Sigma, eta) +TG.interval(Z, eta, V$vlo, V$vup, V$sd, 0.05) +} + \ No newline at end of file diff --git a/selectiveInference/man/TG.pvalue.Rd b/selectiveInference/man/TG.pvalue.Rd new file mode 100644 index 00000000..5625d9d1 --- /dev/null +++ b/selectiveInference/man/TG.pvalue.Rd @@ -0,0 +1,78 @@ +\name{TG.pvalue} +\alias{TG.pvalue} + +\title{ +Truncated Gaussian p-value. +} +\description{ +Compute truncated Gaussian p-value of Lee et al. (2016) with +arbitrary affine selection and covariance. +} +\usage{ +TG.pvalue(Z, A, b, eta, Sigma, null_value=0, bits=NULL) +} +\arguments{ +\item{Z}{ +Observed data (assumed to follow N(mu, Sigma) with sum(eta*mu)=null_value) +} +\item{A}{ +Matrix specifiying affine inequalities AZ <= b +} +\item{b}{ +Offsets in the affine inequalities AZ <= b. +} +\item{eta}{ +Determines the target sum(eta*mu) and estimate sum(eta*Z). +} +\item{Sigma}{ +Covariance matrix of Z. +} +\item{null_value}{Hypothesized value of sum(eta*mu) under the null. +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +} +\details{ +This function computes selective p-values based on the polyhedral +lemma of Lee et al. (2016). +} + +\value{ +\item{pv}{One-sided P-values for active variables, uses the fact we have conditioned on the sign.} +\item{vlo}{Lower truncation limits for statistic} +\item{vup}{Upper truncation limits for statistic} +\item{sd}{Standard error of sum(eta*Z)} +} + +\references{ +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2016). +Exact post-selection inference, with application to the lasso. Annals of Statistics, 44(3), 907-927. + +Jonathan Taylor and Robert Tibshirani (2017) Post-selection inference for math L1-penalized likelihood models. +Canadian Journal of Statistics, xx, 1-21. (Volume still not posted) + +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ + +A = diag(5) +b = rep(1, 5) +Z = rep(0, 5) +Sigma = diag(5) +eta = as.numeric(c(1, 1, 0, 0, 0)) +TG.pvalue(Z, A, b, Sigma, eta) +TG.pvalue(Z, A, b, Sigma, eta, null_value=1) + +} + \ No newline at end of file From 275ebff654ce7e1ee87296219548fefaab65caee Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 24 Jul 2017 11:10:56 -0700 Subject: [PATCH 161/396] trying to get matrix multiplies to work --- selectiveInference/R/funs.inf.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 1b454fad..0506c292 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -253,9 +253,9 @@ TG.pvalue = function(Z, A, b, eta, Sigma, null_value=0, bits=NULL) { n = length(Z) eta = matrix(eta, ncol=1, nrow=n) b = as.vector(b) - target_estimate = sum(eta * Z) - var_estimate = sum(matrix(eta, nrow=1, ncol=n) %*% (Sigma %*% eta)) - cross_cov = Sigma %*% eta + target_estimate = sum(as.numeric(eta) * as.numeric(Z)) + var_estimate = sum(matrix(eta, nrow=1, ncol=n) %*% (Sigma %*% matrix(eta, ncol=1, nrow=n))) + cross_cov = Sigma %*% matrix(eta, ncol=1, nrow=n) resid = (diag(n) - matrix(cross_cov / var_estimate, ncol=1, nrow=n) %*% matrix(eta, nrow=1, ncol=n)) %*% Z rho = A %*% cross_cov / var_estimate @@ -280,7 +280,7 @@ TG.interval = function(Z, eta, vlo, vup, sd, alpha, # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma - target_estimate = sum(eta*Z) + target_estimate = sum(as.numeric(eta) * as.numeric(Z)) param_grid = seq(gridrange[1]*sd, gridrange[2]*sd, length=gridpts) From 2a98745a4d06872f27f39f42fe7c07c91f2afd85 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 2 Aug 2017 10:33:52 -0700 Subject: [PATCH 162/396] created new TG.limits function so TG.pvalue and TG.interval have same signature --- selectiveInference/NAMESPACE | 1 + selectiveInference/R/funs.fixedCox.R | 4 +- selectiveInference/R/funs.fixedLogit.R | 5 +-- selectiveInference/R/funs.inf.R | 27 +++++++++--- selectiveInference/man/TG.interval.Rd | 24 +++++------ selectiveInference/man/TG.limits.Rd | 60 ++++++++++++++++++++++++++ selectiveInference/man/TG.pvalue.Rd | 2 +- 7 files changed, 97 insertions(+), 26 deletions(-) create mode 100644 selectiveInference/man/TG.limits.Rd diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 7add9dfc..099fdc5c 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -14,6 +14,7 @@ export(lar,fs, groupfs,groupfsInf, scaleGroups,factorDesign, TG.pvalue, + TG.limits, TG.interval ) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index 00690072..cfe764e4 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -62,14 +62,14 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda vj=rep(0,length(bbar));vj[jj]=s2[jj] - junk=TG.pvalue(bbar,A1,b1,vj,MM) + junk=TG.pvalue(bbar, A1, b1, vj,MM) pv[jj] = junk$pv vlo[jj]=junk$vlo vup[jj]=junk$vup sd[jj]=junk$sd - junk2=TG.interval(bbar, vj, vlo[jj], vup[jj], sd[jj], alpha) + junk2=TG.interval(bbar, A1, b1, vj, MM, alpha) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index c4d00deb..19936b09 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -89,15 +89,14 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet for(jj in 1:sum(m)){ vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] # compute p-values - junk=TG.pvalue(bbar,A1,b1,vj,MM) + junk=TG.pvalue(bbar, A1, b1, vj, MM) pv[jj] = junk$pv vlo[jj]=junk$vlo vup[jj]=junk$vup sd[jj]=junk$sd - # junk2=TG.interval(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) - junk2=TG.interval(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 0506c292..c3412d81 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -247,13 +247,19 @@ aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { #these next two functions are used by the binomial and Cox options of fixedLassoInf -TG.pvalue = function(Z, A, b, eta, Sigma, null_value=0, bits=NULL) { +# Compute the truncation interval and SD of the corresponding Gaussian + +TG.limits = function(Z, A, b, eta, Sigma=NULL) { + + if (is.null(Sigma)) { + Sigma = diag(rep(1, n)) + } # compute pvalues from poly lemma: full version from Lee et al for full matrix Sigma + n = length(Z) eta = matrix(eta, ncol=1, nrow=n) b = as.vector(b) - target_estimate = sum(as.numeric(eta) * as.numeric(Z)) var_estimate = sum(matrix(eta, nrow=1, ncol=n) %*% (Sigma %*% matrix(eta, ncol=1, nrow=n))) cross_cov = Sigma %*% matrix(eta, ncol=1, nrow=n) @@ -265,13 +271,19 @@ TG.pvalue = function(Z, A, b, eta, Sigma, null_value=0, bits=NULL) { vup = suppressWarnings(min(vec[rho > 0])) sd = sqrt(var_estimate) - pv = tnorm.surv(target_estimate, null_value, sd, vlo, vup, bits) - return(list(pv=pv, vlo=vlo, vup=vup, sd=sd)) + return(list(vlo=vlo, vup=vup, sd=sd)) } +TG.pvalue = function(Z, A, b, eta, Sigma=NULL, null_value=0, bits=NULL) { + + limits.info = TG.limits(Z, A, b, eta, Sigma) + target_estimate = sum(as.numeric(eta) * as.numeric(Z)) + pv = tnorm.surv(target_estimate, null_value, limits.info$sd, limits.info$vlo, limits.info$vup, bits) + return(list(pv=pv, vlo=limits.info$vlo, vup=limits.info$vup, sd=limits.info$sd)) +} -TG.interval = function(Z, eta, vlo, vup, sd, alpha, +TG.interval = function(Z, A, b, eta, Sigma=NULL, alpha=0.1, gridrange=c(-100,100), gridpts=100, griddepth=2, @@ -280,12 +292,13 @@ TG.interval = function(Z, eta, vlo, vup, sd, alpha, # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma + limits.info = TG.limits(Z, A, b, eta, Sigma) target_estimate = sum(as.numeric(eta) * as.numeric(Z)) - param_grid = seq(gridrange[1]*sd, gridrange[2]*sd, length=gridpts) + param_grid = seq(gridrange[1] * limits.info$sd, gridrange[2] * limits.info$sd, length=gridpts) pivot = function(param) { - tnorm.surv(target_estimate, param, sd, vlo, vup, bits) + tnorm.surv(target_estimate, param, limits.info$sd, limits.info$vlo, limits.info$vup, bits) } interval = grid.search(param_grid, pivot, alpha/2, 1-alpha/2, gridpts, griddepth) diff --git a/selectiveInference/man/TG.interval.Rd b/selectiveInference/man/TG.interval.Rd index d5426912..d9a0d6e9 100644 --- a/selectiveInference/man/TG.interval.Rd +++ b/selectiveInference/man/TG.interval.Rd @@ -9,7 +9,7 @@ Compute truncated Gaussian interval of Lee et al. (2016) with arbitrary affine selection and covariance. } \usage{ -TG.interval(Z, eta, vlo, vup, sd, alpha, +TG.interval(Z, A, b, eta, Sigma=NULL, alpha=0.1, gridrange=c(-100,100), gridpts=100, griddepth=2, @@ -20,18 +20,18 @@ TG.interval(Z, eta, vlo, vup, sd, alpha, \item{Z}{ Observed data (assumed to follow N(mu, Sigma) with sum(eta*mu)=null_value) } +\item{A}{ +Matrix specifiying affine inequalities AZ <= b +} +\item{b}{ +Offsets in the affine inequalities AZ <= b. +} \item{eta}{ Determines the target sum(eta*mu) and estimate sum(eta*Z). } -\item{vlo}{ -Lower truncation limits for statistic. Can be computed with TG.pvalue. -} -\item{vup}{ -Upper truncation limits for statistic. Can be computed with TG.pvalue. -} -\item{sd}{ -Standard error of sum(eta*Z). -} +\item{Sigma}{ +Covariance matrix of Z. Defaults to identity. +} \item{alpha}{ Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) } @@ -82,13 +82,11 @@ Canadian Journal of Statistics, xx, 1-21. (Volume still not posted) \author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} \examples{ - A = diag(5) b = rep(1, 5) Z = rep(0, 5) Sigma = diag(5) eta = as.numeric(c(1, 1, 0, 0, 0)) -V = TG.pvalue(Z, A, b, Sigma, eta) -TG.interval(Z, eta, V$vlo, V$vup, V$sd, 0.05) +TG.interval(Z, A, b, Sigma, eta) } \ No newline at end of file diff --git a/selectiveInference/man/TG.limits.Rd b/selectiveInference/man/TG.limits.Rd new file mode 100644 index 00000000..8a71f274 --- /dev/null +++ b/selectiveInference/man/TG.limits.Rd @@ -0,0 +1,60 @@ +\name{TG.limits} +\alias{TG.limits} + +\title{ +Truncation limits and standard deviation. +} +\description{ +Compute truncated limits and SD for use in computing +p-values or confidence intervals of Lee et al. (2016). +} +\usage{ +TG.limits(Z, A, b, eta, Sigma) +} +\arguments{ +\item{Z}{ +Observed data (assumed to follow N(mu, Sigma) with sum(eta*mu)=null_value) +} +\item{A}{ +Matrix specifiying affine inequalities AZ <= b +} +\item{b}{ +Offsets in the affine inequalities AZ <= b. +} +\item{eta}{ +Determines the target sum(eta*mu) and estimate sum(eta*Z). +} +\item{Sigma}{ +Covariance matrix of Z. Defaults to identity. +} +} +\details{ +This function computes the limits of truncation and the implied +standard deviation in the polyhedral lemma of Lee et al. (2016). +} + +\value{ +\item{vlo}{Lower truncation limits for statistic} +\item{vup}{Upper truncation limits for statistic} +\item{sd}{Standard error of sum(eta*Z)} +} + +\references{ +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2016). +Exact post-selection inference, with application to the lasso. Annals of Statistics, 44(3), 907-927. + +Jonathan Taylor and Robert Tibshirani (2017) Post-selection inference for math L1-penalized likelihood models. +Canadian Journal of Statistics, xx, 1-21. (Volume still not posted) + +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +A = diag(5) +b = rep(1, 5) +Z = rep(0, 5) +Sigma = diag(5) +eta = as.numeric(c(1, 1, 0, 0, 0)) +TG.limits(Z, A, b, Sigma, eta) +} + \ No newline at end of file diff --git a/selectiveInference/man/TG.pvalue.Rd b/selectiveInference/man/TG.pvalue.Rd index 5625d9d1..a1ec9cef 100644 --- a/selectiveInference/man/TG.pvalue.Rd +++ b/selectiveInference/man/TG.pvalue.Rd @@ -25,7 +25,7 @@ Offsets in the affine inequalities AZ <= b. Determines the target sum(eta*mu) and estimate sum(eta*Z). } \item{Sigma}{ -Covariance matrix of Z. +Covariance matrix of Z. Defaults to identity. } \item{null_value}{Hypothesized value of sum(eta*mu) under the null. } From 9d417492db875115a772f05dfdf54401c5ce83c1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 2 Aug 2017 17:27:55 -0700 Subject: [PATCH 163/396] BF: arguments wrong in example --- selectiveInference/man/TG.interval.Rd | 2 +- selectiveInference/man/TG.limits.Rd | 2 +- selectiveInference/man/TG.pvalue.Rd | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/selectiveInference/man/TG.interval.Rd b/selectiveInference/man/TG.interval.Rd index d9a0d6e9..287f9d02 100644 --- a/selectiveInference/man/TG.interval.Rd +++ b/selectiveInference/man/TG.interval.Rd @@ -87,6 +87,6 @@ b = rep(1, 5) Z = rep(0, 5) Sigma = diag(5) eta = as.numeric(c(1, 1, 0, 0, 0)) -TG.interval(Z, A, b, Sigma, eta) +TG.interval(Z, A, b, eta, Sigma) } \ No newline at end of file diff --git a/selectiveInference/man/TG.limits.Rd b/selectiveInference/man/TG.limits.Rd index 8a71f274..dff98bec 100644 --- a/selectiveInference/man/TG.limits.Rd +++ b/selectiveInference/man/TG.limits.Rd @@ -55,6 +55,6 @@ b = rep(1, 5) Z = rep(0, 5) Sigma = diag(5) eta = as.numeric(c(1, 1, 0, 0, 0)) -TG.limits(Z, A, b, Sigma, eta) +TG.limits(Z, A, b, eta, Sigma) } \ No newline at end of file diff --git a/selectiveInference/man/TG.pvalue.Rd b/selectiveInference/man/TG.pvalue.Rd index a1ec9cef..8dd86def 100644 --- a/selectiveInference/man/TG.pvalue.Rd +++ b/selectiveInference/man/TG.pvalue.Rd @@ -71,8 +71,8 @@ b = rep(1, 5) Z = rep(0, 5) Sigma = diag(5) eta = as.numeric(c(1, 1, 0, 0, 0)) -TG.pvalue(Z, A, b, Sigma, eta) -TG.pvalue(Z, A, b, Sigma, eta, null_value=1) +TG.pvalue(Z, A, b, eta, Sigma) +TG.pvalue(Z, A, b, eta, Sigma, null_value=1) } \ No newline at end of file From fd0070a51aba038b6f2fd8887caad8cc43b056ac Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 2 Aug 2017 23:18:35 -0700 Subject: [PATCH 164/396] using TG base for all poly calcs --- selectiveInference/R/funs.fixed.R | 11 +++-- selectiveInference/R/funs.fs.R | 22 ++++++--- selectiveInference/R/funs.inf.R | 79 ++++++++++--------------------- selectiveInference/R/funs.lar.R | 22 ++++++--- 4 files changed, 66 insertions(+), 68 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index b30d04ce..ccf45e54 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -122,14 +122,19 @@ else{ vj = vj / mj # Standardize (divide by norm of vj) sign[j] = sign(sum(vj*y)) vj = sign[j] * vj - a = poly.pval(y,G,u,vj,sigma,bits) + + limits.info = TG.limits(y, -G, -u, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) - a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) tailarea[j,] = a$tailarea } diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index 3f8a87c4..fcd7c71e 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -299,15 +299,21 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic vj = vreg[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - a = poly.pval(y,Gj,uj,vj,sigma,bits) + + limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, bits=bits) + pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea } @@ -349,15 +355,19 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic Gj = rbind(G,vj) uj = c(u,0) - a = poly.pval(y,Gj,uj,vj,sigma,bits) + limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea } diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index c3412d81..0cec0954 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -1,48 +1,3 @@ -# Main p-value function - -poly.pval <- function(y, G, u, v, sigma, bits=NULL) { - z = sum(v*y) - vv = sum(v^2) - sd = sigma*sqrt(vv) - - rho = G %*% v / vv - vec = (u - G %*% y + rho*z) / rho - vlo = suppressWarnings(max(vec[rho>0])) - vup = suppressWarnings(min(vec[rho<0])) - - pv = tnorm.surv(z,0,sd,vlo,vup,bits) - return(list(pv=pv,vlo=vlo,vup=vup)) -} - -# Main confidence interval function - -poly.int <- function(y, G, u, v, sigma, alpha, gridrange=c(-100,100), - gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { - - z = sum(v*y) - vv = sum(v^2) - sd = sigma*sqrt(vv) - - rho = G %*% v / vv - vec = (u - G %*% y + rho*z) / rho - vlo = suppressWarnings(max(vec[rho>0])) - vup = suppressWarnings(min(vec[rho<0])) - - xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) - fun = function(x) { tnorm.surv(z,x,sd,vlo,vup,bits) } - - int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) - tailarea = c(fun(int[1]),1-fun(int[2])) - - if (flip) { - int = -int[2:1] - tailarea = tailarea[2:1] - } - - return(list(int=int,tailarea=tailarea)) -} - -############################## # Assuming that grid is in sorted order from smallest to largest, # and vals are monotonically increasing function values over the @@ -251,6 +206,8 @@ aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { TG.limits = function(Z, A, b, eta, Sigma=NULL) { + target_estimate = sum(as.numeric(eta) * as.numeric(Z)) + if (is.null(Sigma)) { Sigma = diag(rep(1, n)) } @@ -271,16 +228,14 @@ TG.limits = function(Z, A, b, eta, Sigma=NULL) { vup = suppressWarnings(min(vec[rho > 0])) sd = sqrt(var_estimate) - return(list(vlo=vlo, vup=vup, sd=sd)) + return(list(vlo=vlo, vup=vup, sd=sd, estimate=target_estimate)) } TG.pvalue = function(Z, A, b, eta, Sigma=NULL, null_value=0, bits=NULL) { limits.info = TG.limits(Z, A, b, eta, Sigma) - target_estimate = sum(as.numeric(eta) * as.numeric(Z)) - pv = tnorm.surv(target_estimate, null_value, limits.info$sd, limits.info$vlo, limits.info$vup, bits) - return(list(pv=pv, vlo=limits.info$vlo, vup=limits.info$vup, sd=limits.info$sd)) + return(TG.pvalue.base(limits.info, null_value=null_value, bits=bits)) } TG.interval = function(Z, A, b, eta, Sigma=NULL, alpha=0.1, @@ -290,15 +245,29 @@ TG.interval = function(Z, A, b, eta, Sigma=NULL, alpha=0.1, flip=FALSE, bits=NULL) { + limits.info = TG.limits(Z, A, b, eta, Sigma) + + return(TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + griddepth=griddepth, + flip=flip, + bits=bits)) +} + +TG.interval.base = function(limits.info, alpha=0.1, + gridrange=c(-100,100), + gridpts=100, + griddepth=2, + flip=FALSE, + bits=NULL) { + # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma - limits.info = TG.limits(Z, A, b, eta, Sigma) - target_estimate = sum(as.numeric(eta) * as.numeric(Z)) - param_grid = seq(gridrange[1] * limits.info$sd, gridrange[2] * limits.info$sd, length=gridpts) pivot = function(param) { - tnorm.surv(target_estimate, param, limits.info$sd, limits.info$vlo, limits.info$vup, bits) + tnorm.surv(limits.info$estimate, param, limits.info$sd, limits.info$vlo, limits.info$vup, bits) } interval = grid.search(param_grid, pivot, alpha/2, 1-alpha/2, gridpts, griddepth) @@ -314,6 +283,10 @@ TG.interval = function(Z, A, b, eta, Sigma=NULL, alpha=0.1, tailarea=tailarea)) } +TG.pvalue.base = function(limits.info, null_value=0, bits=NULL) { + pv = tnorm.surv(limits.info$estimate, null_value, limits.info$sd, limits.info$vlo, limits.info$vup, bits) + return(list(pv=pv, vlo=limits.info$vlo, vup=limits.info$vup, sd=limits.info$sd)) +} mydiag=function(x){ diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index 37d0ae32..df6c62c2 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -379,15 +379,20 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai vj = vreg[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - a = poly.pval(y,Gj,uj,vj,sigma,bits) + + limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.lowlevel(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.lowlevel(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea @@ -433,15 +438,20 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai Gj = rbind(G,vj) uj = c(u,0) - a = poly.pval(y,Gj,uj,vj,sigma,bits) + limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + a = TG.pvalue.lowlevel(limits.info, bits=bits) + pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, - flip=(sign[j]==-1),bits=bits) + a = TG.interval.lowlevel(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea } From 5d5d794ef8fd4c8e4e0700ded6b6e1ff7b3beaf2 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 2 Aug 2017 23:37:41 -0700 Subject: [PATCH 165/396] BF: forgot to rename lowlevel to base --- selectiveInference/R/funs.lar.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index df6c62c2..eb31b4e8 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -381,18 +381,18 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai vj = vj / mj # Standardize (divide by norm of vj) limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) - a = TG.pvalue.lowlevel(limits.info, bits=bits) + a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = TG.interval.lowlevel(limits.info, - alpha=alpha, - gridrange=gridrange, - flip=(sign[j]==-1), - bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea @@ -439,7 +439,7 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai uj = c(u,0) limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) - a = TG.pvalue.lowlevel(limits.info, bits=bits) + a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] @@ -447,11 +447,11 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) - a = TG.interval.lowlevel(limits.info, - alpha=alpha, - gridrange=gridrange, - flip=(sign[j]==-1), - bits=bits) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) tailarea[j,] = a$tailarea } From c8861e0a44e6edcfe06145ceb0b9355eb7d481d2 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 3 Aug 2017 11:23:08 -0700 Subject: [PATCH 166/396] added constraint description in docs, warning about constraint not being satisfied --- selectiveInference/R/funs.inf.R | 4 ++++ selectiveInference/man/TG.interval.Rd | 1 + selectiveInference/man/TG.limits.Rd | 1 + selectiveInference/man/TG.pvalue.Rd | 1 + 4 files changed, 7 insertions(+) diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 0cec0954..9d7e740f 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -208,6 +208,10 @@ TG.limits = function(Z, A, b, eta, Sigma=NULL) { target_estimate = sum(as.numeric(eta) * as.numeric(Z)) + if (max(A %*% as.numeric(Z) - b) > 0) { + warning('Contsraint not satisfied. A %*% Z should be elementwise less than or equal to b') + } + if (is.null(Sigma)) { Sigma = diag(rep(1, n)) } diff --git a/selectiveInference/man/TG.interval.Rd b/selectiveInference/man/TG.interval.Rd index 287f9d02..c898d179 100644 --- a/selectiveInference/man/TG.interval.Rd +++ b/selectiveInference/man/TG.interval.Rd @@ -7,6 +7,7 @@ Truncated Gaussian confidence interval. \description{ Compute truncated Gaussian interval of Lee et al. (2016) with arbitrary affine selection and covariance. +Z should satisfy A %*% Z elementise less then or equal b. } \usage{ TG.interval(Z, A, b, eta, Sigma=NULL, alpha=0.1, diff --git a/selectiveInference/man/TG.limits.Rd b/selectiveInference/man/TG.limits.Rd index dff98bec..cc005fc6 100644 --- a/selectiveInference/man/TG.limits.Rd +++ b/selectiveInference/man/TG.limits.Rd @@ -7,6 +7,7 @@ Truncation limits and standard deviation. \description{ Compute truncated limits and SD for use in computing p-values or confidence intervals of Lee et al. (2016). +Z should satisfy A %*% Z elementise less then or equal b. } \usage{ TG.limits(Z, A, b, eta, Sigma) diff --git a/selectiveInference/man/TG.pvalue.Rd b/selectiveInference/man/TG.pvalue.Rd index 8dd86def..d6e54b34 100644 --- a/selectiveInference/man/TG.pvalue.Rd +++ b/selectiveInference/man/TG.pvalue.Rd @@ -7,6 +7,7 @@ Truncated Gaussian p-value. \description{ Compute truncated Gaussian p-value of Lee et al. (2016) with arbitrary affine selection and covariance. +Z should satisfy A %*% Z elementise less then or equal b. } \usage{ TG.pvalue(Z, A, b, eta, Sigma, null_value=0, bits=NULL) From 4a9f1cf3607f4e676789d4f4461b2ecc91e0b646 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 3 Aug 2017 11:24:34 -0700 Subject: [PATCH 167/396] typo --- selectiveInference/man/TG.interval.Rd | 2 +- selectiveInference/man/TG.limits.Rd | 2 +- selectiveInference/man/TG.pvalue.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/selectiveInference/man/TG.interval.Rd b/selectiveInference/man/TG.interval.Rd index c898d179..50d2b938 100644 --- a/selectiveInference/man/TG.interval.Rd +++ b/selectiveInference/man/TG.interval.Rd @@ -7,7 +7,7 @@ Truncated Gaussian confidence interval. \description{ Compute truncated Gaussian interval of Lee et al. (2016) with arbitrary affine selection and covariance. -Z should satisfy A %*% Z elementise less then or equal b. +Z should satisfy A %*% Z elementwise less then or equal b. } \usage{ TG.interval(Z, A, b, eta, Sigma=NULL, alpha=0.1, diff --git a/selectiveInference/man/TG.limits.Rd b/selectiveInference/man/TG.limits.Rd index cc005fc6..e2df17e1 100644 --- a/selectiveInference/man/TG.limits.Rd +++ b/selectiveInference/man/TG.limits.Rd @@ -7,7 +7,7 @@ Truncation limits and standard deviation. \description{ Compute truncated limits and SD for use in computing p-values or confidence intervals of Lee et al. (2016). -Z should satisfy A %*% Z elementise less then or equal b. +Z should satisfy A %*% Z elementwise less then or equal b. } \usage{ TG.limits(Z, A, b, eta, Sigma) diff --git a/selectiveInference/man/TG.pvalue.Rd b/selectiveInference/man/TG.pvalue.Rd index d6e54b34..716e5b35 100644 --- a/selectiveInference/man/TG.pvalue.Rd +++ b/selectiveInference/man/TG.pvalue.Rd @@ -7,7 +7,7 @@ Truncated Gaussian p-value. \description{ Compute truncated Gaussian p-value of Lee et al. (2016) with arbitrary affine selection and covariance. -Z should satisfy A %*% Z elementise less then or equal b. +Z should satisfy A %*% Z elementwise less then or equal b. } \usage{ TG.pvalue(Z, A, b, eta, Sigma, null_value=0, bits=NULL) From 222fc7c4aaf65739e98c94f034d3187850527784 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 4 Aug 2017 13:35:44 -0700 Subject: [PATCH 168/396] BF: correcting the conditioning for FS --- selectiveInference/R/funs.fs.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index fcd7c71e..db0968b1 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -132,7 +132,8 @@ fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, # Gamma matrix! if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) working_x = t(sign_score*t(working_x)) - Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-r-1 Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 From b5f4bfd7188f150ed7a20940df442701cc21f036 Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 8 Aug 2017 01:19:13 -0700 Subject: [PATCH 169/396] Added debiased lasso for linear regression when type='full' and n < p --- selectiveInference/R/funs.fixed.R | 451 ++++++++++++++++++++-------- selectiveInference/R/linear.tests.R | 172 +++++++++++ 2 files changed, 500 insertions(+), 123 deletions(-) create mode 100644 selectiveInference/R/linear.tests.R diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index ccf45e54..3258b804 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -7,112 +7,170 @@ sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE) { - family = match.arg(family) + family = match.arg(family) this.call = match.call() type = match.arg(type) - + if(family=="binomial") { - if(type!="partial") stop("Only type= partial allowed with binomial family") - out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, - gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) - return(out) - } -else if(family=="cox") { - if(type!="partial") stop("Only type= partial allowed with Cox family") - out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, - tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) - return(out) - } - -else{ - - - - checkargs.xy(x,y) - if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") - if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") - checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, - gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) - if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { - warning("Package Rmpfr is not installed, reverting to standard precision") - bits = NULL + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) } - - n = nrow(x) - p = ncol(x) - beta = as.numeric(beta) - if (length(beta) != p) stop("Since family='gaussian', beta must have length equal to ncol(x)") - - # If glmnet was run with an intercept term, center x and y - if (intercept==TRUE) { - obj = standardize(x,y,TRUE,FALSE) - x = obj$x - y = obj$y + else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) } - - # Check the KKT conditions - g = t(x)%*%(y-x%*%beta) / lambda - if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) - warning(paste("Solution beta does not satisfy the KKT conditions", - "(to within specified tolerances)")) - - vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2))) - if(length(vars)==0){ + + else{ + + + + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (type == "full") { + if (p > n) { + # need intercept (if there is one) for debiased lasso + hbeta = beta + if (intercept == T) { + if (length(beta) != p + 1) { + stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1") + } + # remove intercept if included + beta = beta[-1] + } else if (length(beta) != p) { + stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)") + } + } + } else if (length(beta) != p) { + stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)") + } + + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) + # print(tol.coef) + vars = which(abs(beta) > tol.coef) + # print(beta) + # print(vars) + if(length(vars)==0){ cat("Empty model",fill=T) return() - } - if (any(sign(g[vars]) != sign(beta[vars]))) - warning(paste("Solution beta does not satisfy the KKT conditions", - "(to within specified tolerances). You might try rerunning", - "glmnet with a lower setting of the", - "'thresh' parameter, for a more accurate convergence.")) - - # Get lasso polyhedral region, of form Gy >= u - out = fixedLasso.poly(x,y,beta,lambda,vars) - G = out$G - u = out$u - - # Check polyhedral region - tol.poly = 0.01 - if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) - stop(paste("Polyhedral constraints not satisfied; you must recompute beta", - "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", - "and check whether the specified value of lambda is too small", - "(beyond the grid of values visited by glmnet).", - "You might also try rerunning glmnet with a lower setting of the", - "'thresh' parameter, for a more accurate convergence.")) - - # Estimate sigma - if (is.null(sigma)) { - if (n >= 2*p) { - oo = intercept - sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) } - else { - sigma = sd(y) - warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), - "you may want to use the estimateSigma function")) + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) + else out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) { + if (intercept == T) { + pp=p+1 + Xint <- cbind(rep(1,n),x) + # indices of selected predictors + S = c(1,vars + 1) + notS = which(abs(beta) <= tol.coef) + 1 + } else { + pp=p + Xint <- x + # indices of selected predictors + S = vars + notS = which(abs(beta) <= tol.coef) + } + + + XS = Xint[,S] + hbetaS = hbeta[S] + + # Reorder so that active set S is first + Xordered = Xint[,c(S,notS,recursive=T)] + + hsigma <- 1/n*(t(Xordered)%*%Xordered) + hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] + hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) + + # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R + htheta <- InverseLinfty(hsigma, n, verbose=FALSE) + + FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) + ithetasigma = (diag(pp)-(htheta%*%hsigma)) + + M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + # vector which is offset for testing debiased beta's + null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + if (intercept == T) { + M = M[-1,] # remove intercept row + null_value = null_value[-1] # remove intercept element + } + } else if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + null_value = rep(0,k) + } else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + null_value = rep(0,k) } - } - - k = length(vars) - pv = vlo = vup = numeric(k) - vmat = matrix(0,k,n) - ci = tailarea = matrix(0,k,2) - sign = numeric(k) - - if (type=="full" & p > n) - warning(paste("type='full' does not make sense when p > n;", - "switching to type='partial'")) - - if (type=="partial" || p > n) { - xa = x[,vars,drop=F] - M = pinv(crossprod(xa)) %*% t(xa) - } - else { - M = pinv(crossprod(x)) %*% t(x) - M = M[vars,,drop=F] - } for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) @@ -124,7 +182,7 @@ else{ vj = sign[j] * vj limits.info = TG.limits(y, -G, -u, vj, Sigma=diag(rep(sigma^2, n))) - a = TG.pvalue.base(limits.info, bits=bits) + a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) pv[j] = a$pv vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) @@ -135,7 +193,7 @@ else{ gridrange=gridrange, flip=(sign[j]==-1), bits=bits) - ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + ci[j,] = (a$int-null_value[j]) * mj # Unstandardize (mult by norm of vj) tailarea[j,] = a$tailarea } @@ -154,31 +212,178 @@ else{ fixedLasso.poly= -function(x, y, beta, lambda, a) { - xa = x[,a,drop=F] - xac = x[,!a,drop=F] - xai = pinv(crossprod(xa)) - xap = xai %*% t(xa) - za = sign(beta[a]) - if (length(za)>1) dz = diag(za) - if (length(za)==1) dz = matrix(za,1,1) - -# P = diag(1,nrow(xa)) - xa %*% xap - #NOTE: inactive constraints not needed below! - - G = -rbind( - # 1/lambda * t(xac) %*% P, - # -1/lambda * t(xac) %*% P, - -dz %*% xap + function(x, y, beta, lambda, a, inactive = FALSE) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + + if (inactive) { + P = diag(1,nrow(xa)) - xa %*% xap + + G = -rbind( + 1/lambda * t(xac) %*% P, + -1/lambda * t(xac) %*% P, + -dz %*% xap ) - lambda2=lambda - if(length(lambda)>1) lambda2=lambda[a] - u = -c( - # 1 - t(xac) %*% t(xap) %*% za, - # 1 + t(xac) %*% t(xap) %*% za, - -lambda2 * dz %*% xai %*% za) - - return(list(G=G,u=u)) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + 1 - t(xac) %*% t(xap) %*% za, + 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } else { + G = -rbind( + # 1/lambda * t(xac) %*% P, + # -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + # 1 - t(xac) %*% t(xap) %*% za, + # 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } + + return(list(G=G,u=u)) + } + +############################## + +### Functions borrowed from lasso_inference.R + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { + isgiven <- 1; + if (is.null(mu)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + mu.stop <- 0; + try.no <- 1; + incr <- 0; + while ((mu.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + mu.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + mu <- mu*resol; + } else { + incr <- 0; + mu <- mu/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + mu <- mu*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + mu.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + mu <- mu/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + mu <- mu*resol; + beta <- last.beta; + mu.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + mu0 <- rho/(1+rho); + beta <- rep(0,p); + + if (mu >= mu0){ + beta[i] <- (1-mu0)/sigma[i,i]; + returnlist <- list("optsol" = beta, "iter" = 0); + return(returnlist); + } + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-mu0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + if (iter>10) + vs <- -sigma.tilde%*%beta; + } + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } } ############################## diff --git a/selectiveInference/R/linear.tests.R b/selectiveInference/R/linear.tests.R new file mode 100644 index 00000000..a37f76f6 --- /dev/null +++ b/selectiveInference/R/linear.tests.R @@ -0,0 +1,172 @@ +robs.test <- function() { + n <- 100 + p <- 200 + + set.seed(11332) + + y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response + X <- matrix(rnorm(p*n),ncol = p) # p rand N(0,1) predictors + + X=scale(X,T,T)/sqrt(n-1) + lambda=1 + sigma = estimateSigma(X,y)$sigmahat + + las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=T) + hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=T)) + + + return(fixedLassoInf(X,y,hbeta[-1],lambda,family="gaussian",type="partial",intercept=T,sigma=sigma)$pv) +} + + +## Tests partial inf for X and y randomly generated from N(0,1) +nullTest <- function(X,y,lambda,intercept,type=c("full","partial")) { + n=nrow(X) + X=scale(X,T,T)/sqrt(n-1) + + sigma = estimateSigma(X,y)$sigmahat + + las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) + hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) + + if (type=="partial" || intercept==F) hbeta = hbeta[-1] + + return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type=type,intercept=intercept,sigma=sigma)) +} + +## Test partial inf for X and y where 10 variables are y with random additive N(0,0.5) noise +corrTest <- function(X,y,lambda,intercept,type=c("full","partial")) { + n=nrow(X) + corr.X = rep(y,10) + matrix(rnorm(n*10,0,0.5),ncol = 10) + X = cbind(corr.X,X) + X=scale(X,T,T)/sqrt(n-1) + + las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) + hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) + + sigma = estimateSigma(X,y)$sigmahat + + if (type=="partial" || intercept==F) hbeta = hbeta[-1] + + return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type=type,intercept=intercept,sigma=sigma)) +} + +## QQ plot of p-values for all null data now that bug fix is implemented +partial.qq.test <- function() { + n <- 100 + p <- 200 + + lambda=1 + + null.int.pvs <- c() + corr.int.pvs <- c() + null.pvs <- c() + corr.pvs <- c() + for(i in 1:25) { + y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response + X <- matrix(rnorm(p*n),ncol=p) # p rand N(0,1) predictors + + null <- nullTest(X,y,lambda,F,type="partial") + corr <- corrTest(X,y,lambda,F,type="partial") + null.pvs <- c(null.pvs,null$pv,recursive=T) + corr.pvs <- c(corr.pvs,corr$pv,recursive=T) + null.int <- nullTest(X,y,lambda,T,type="partial") + corr.int <- corrTest(X,y,lambda,T,type="partial") + null.int.pvs <- c(null.int.pvs,null.int$pv,recursive=T) + corr.int.pvs <- c(corr.int.pvs,corr.int$pv,recursive=T) + } + + qqplot(x=runif(length(null.pvs)),y=null.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. Null X w/o Intercept") + abline(0,1) + qqplot(x=runif(length(corr.pvs)),y=corr.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X w/o Intercept") + abline(0,1) + qqplot(x=runif(length(null.int.pvs)),y=null.int.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. Null X w/ Intercept") + abline(0,1) + qqplot(x=runif(length(corr.int.pvs)),y=corr.int.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X w/ Intercept") + abline(0,1) +} + +## QQ plot of p-values for all null data now that bug fix is implemented +pop.qq.test <- function() { + n <- 100 + p <- 200 + + lambda=1 + + null.int.pvs <- c() + corr.int.pvs <- c() + null.pvs <- c() + corr.pvs <- c() + for(i in 1:25) { + y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response + X <- matrix(rnorm(p*n),ncol=p) # p rand N(0,1) predictors + + null <- nullTest(X,y,lambda,F,type="full") + corr <- corrTest(X,y,lambda,F,type="full") + null.pvs <- c(null.pvs,null$pv,recursive=T) + corr.pvs <- c(corr.pvs,corr$pv,recursive=T) + null.int <- nullTest(X,y,lambda,T,type="full") + corr.int <- corrTest(X,y,lambda,T,type="full") + null.int.pvs <- c(null.int.pvs,null.int$pv,recursive=T) + corr.int.pvs <- c(corr.int.pvs,corr.int$pv,recursive=T) + } + + qqplot(x=runif(length(null.pvs)),y=null.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X w/o Intercept") + abline(0,1) + qqplot(x=runif(length(corr.pvs)),y=corr.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. 10 Corr. X w/o Intercept") + abline(0,1) + qqplot(x=runif(length(null.int.pvs)),y=null.int.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X w/ Intercept") + abline(0,1) + qqplot(x=runif(length(corr.int.pvs)),y=corr.int.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. 10 Corr. X w/ Intercept") + abline(0,1) +} + + + + +## QQ plot of p-values for data with correlated x now that bug fix implemented +power.partial.pval.dist <- function(n,p,intercept=T,lambda=1) { + pvs <- c() + for(i in 1:10) { + a <- powerPartialTest(n,p,intercept,lambda) + ps <- a$pv + pvs <- c(pvs,ps,recursive=T) + } + qqplot(x=runif(length(pvs)),y=pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X") + abline(0,1) +} + + + + +## Tests pop inf for X and y randomly generated +nullPopTest <- function(n,p,intercept=T,lambda=1) { + y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response + X <- matrix(rnorm(p*n),ncol = p) # p rand N(0,1) predictors + + # lambda <- 1 + X=scale(X,T,T)/sqrt(n-1) + + # lambda <- 1 + las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) + hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) + + ### perform post selection inference + + sigma = estimateSigma(X,y)$sigmahat + + if (intercept) return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type="full",intercept=intercept,sigma=sigma)) + else return(fixedLassoInf(X,y,hbeta[-1],lambda,family="gaussian",type="full",intercept=intercept,sigma=sigma)) +} + +## QQ plot of p-values for all null data now that bug fix is implemented +null.pop.pval.dist <- function(n,p,intercept=T,lambda=1) { + pvs <- c() + for(i in 1:10) { + a <- nullPopTest(n,p,intercept,lambda) + ps <- a$pv + pvs <- c(pvs,ps,recursive=T) + } + qqplot(x=runif(length(pvs)),y=pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X") + abline(0,1) +} \ No newline at end of file From 344f4045a5a5b519ec035928280078ac619349c9 Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 8 Aug 2017 13:47:25 -0700 Subject: [PATCH 170/396] now debias lasso only computes theta for the active set --- selectiveInference/R/funs.fixed.R | 20 +++++++++------- selectiveInference/R/linear.tests.R | 37 +---------------------------- 2 files changed, 13 insertions(+), 44 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 3258b804..ba959a7e 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -148,12 +148,15 @@ sigma=NULL, alpha=0.1, hsigma <- 1/n*(t(Xordered)%*%Xordered) hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) - + # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta <- InverseLinfty(hsigma, n, verbose=FALSE) + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) + # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) - ithetasigma = (diag(pp)-(htheta%*%hsigma)) + GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) + ithetasigma = (GS-(htheta%*%hsigma)) + # ithetasigma = (diag(pp) - (htheta%*%hsigma)) M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) # vector which is offset for testing debiased beta's @@ -254,20 +257,21 @@ fixedLasso.poly= ############################## -### Functions borrowed from lasso_inference.R +### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { - isgiven <- 1; +InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { + # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { + isgiven <- 1; if (is.null(mu)){ isgiven <- 0; } p <- nrow(sigma); - M <- matrix(0, p, p); + M <- matrix(0, e, p); xperc = 0; xp = round(p/10); - for (i in 1:p) { + for (i in 1:e) { if ((i %% xp)==0){ xperc = xperc+10; if (verbose) { diff --git a/selectiveInference/R/linear.tests.R b/selectiveInference/R/linear.tests.R index a37f76f6..bd65bade 100644 --- a/selectiveInference/R/linear.tests.R +++ b/selectiveInference/R/linear.tests.R @@ -15,7 +15,7 @@ robs.test <- function() { hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=T)) - return(fixedLassoInf(X,y,hbeta[-1],lambda,family="gaussian",type="partial",intercept=T,sigma=sigma)$pv) + return(fixedLassoInf(X,y,hbeta[-1],lambda,family="gaussian",type="partial",intercept=T,sigma=sigma)) } @@ -134,39 +134,4 @@ power.partial.pval.dist <- function(n,p,intercept=T,lambda=1) { } qqplot(x=runif(length(pvs)),y=pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X") abline(0,1) -} - - - - -## Tests pop inf for X and y randomly generated -nullPopTest <- function(n,p,intercept=T,lambda=1) { - y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response - X <- matrix(rnorm(p*n),ncol = p) # p rand N(0,1) predictors - - # lambda <- 1 - X=scale(X,T,T)/sqrt(n-1) - - # lambda <- 1 - las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) - hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) - - ### perform post selection inference - - sigma = estimateSigma(X,y)$sigmahat - - if (intercept) return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type="full",intercept=intercept,sigma=sigma)) - else return(fixedLassoInf(X,y,hbeta[-1],lambda,family="gaussian",type="full",intercept=intercept,sigma=sigma)) -} - -## QQ plot of p-values for all null data now that bug fix is implemented -null.pop.pval.dist <- function(n,p,intercept=T,lambda=1) { - pvs <- c() - for(i in 1:10) { - a <- nullPopTest(n,p,intercept,lambda) - ps <- a$pv - pvs <- c(pvs,ps,recursive=T) - } - qqplot(x=runif(length(pvs)),y=pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X") - abline(0,1) } \ No newline at end of file From 2e592f077fb84f64cbea218064ac0a672b16bab1 Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 8 Aug 2017 14:37:51 -0700 Subject: [PATCH 171/396] added optional 'add.targets' parameter to fixedLassoInference to allow user to add variables as targets for inference, even if they are not selected by the lasso --- selectiveInference/R/funs.fixed.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index ba959a7e..0c728c7e 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -2,7 +2,7 @@ # for the solution of # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 -fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, +fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE) { @@ -60,6 +60,12 @@ sigma=NULL, alpha=0.1, bits = NULL } + if (!is.null(add.targets) && (!is.vector(add.targets) + || !all(is.numeric(add.targets)) || !all(add.targets==floor(add.targets)) + || !all(add.targets >= 1 && add.targets <= p))) { + stop("'add.targets' must be a vector of integers between 1 and p") + } + # If glmnet was run with an intercept term, center x and y if (intercept==TRUE) { obj = standardize(x,y,TRUE,FALSE) @@ -117,12 +123,15 @@ sigma=NULL, alpha=0.1, } } + # add additional targets for inference if provided + if (!is.null(add.targets)) vars = sort(unique(c(vars,add.targets,recursive=T))) + k = length(vars) pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) ci = tailarea = matrix(0,k,2) sign = numeric(k) - + if (type=="full" & p > n) { if (intercept == T) { pp=p+1 From fcaf1d5b01f6a56bffe3456e7b74c505577adc60 Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 8 Aug 2017 14:53:48 -0700 Subject: [PATCH 172/396] fixed small bug with last commit --- selectiveInference/R/funs.fixed.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0c728c7e..7073e2ef 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -138,15 +138,15 @@ sigma=NULL, alpha=0.1, Xint <- cbind(rep(1,n),x) # indices of selected predictors S = c(1,vars + 1) - notS = which(abs(beta) <= tol.coef) + 1 } else { pp=p Xint <- x # indices of selected predictors S = vars - notS = which(abs(beta) <= tol.coef) + # notS = which(abs(beta) <= tol.coef) } + notS = setdiff(1:pp,S) XS = Xint[,S] hbetaS = hbeta[S] From 57daf4f311ac21fa4e3fc4c81a775f112d322795 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 8 Aug 2017 16:35:12 -0700 Subject: [PATCH 173/396] travis script --- .travis.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..71ff221c --- /dev/null +++ b/.travis.yml @@ -0,0 +1,11 @@ +language: r +cache: packages +sudo: false +r: + - oldrel + - release + - devel +warnings_are_errors: true +before_install: + - tlmgr install index # for texlive and vignette? + - cd selectiveInference From 73f5829aba52405aac7d95b627d02eaa3de43eac Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 8 Aug 2017 16:42:16 -0700 Subject: [PATCH 174/396] missing mpfr library? --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 71ff221c..aa61233f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,9 @@ r: - oldrel - release - devel +addons: + apt: + packages: libmpfr-dev warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? From 3289471fc016ba4128ebaa48dcd7f0b98885916a Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 8 Aug 2017 16:54:44 -0700 Subject: [PATCH 175/396] fixing Rd file --- selectiveInference/man/predict.groupfs.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/man/predict.groupfs.Rd b/selectiveInference/man/predict.groupfs.Rd index 4b8394b3..492b74fc 100644 --- a/selectiveInference/man/predict.groupfs.Rd +++ b/selectiveInference/man/predict.groupfs.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/funs.groupfs.R \name{predict.groupfs} \alias{predict.groupfs} -\title{Prediction and coefficient functions for \code{\link{groupfs}}. - +\title{Prediction and coefficient functions for \code{\link{groupfs}}.} +\description{ Make predictions or extract coefficients from a groupfs forward stepwise object.} \usage{ \method{predict}{groupfs}(object, newx) From 7e161740c4bb49096724a0ce36d65252be0b4407 Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 8 Aug 2017 20:33:04 -0700 Subject: [PATCH 176/396] added .travis.yml file --- .travis.yml | 27 +++++++++++++++++++++++++ selectiveInference/man/fixedLassoInf.Rd | 3 ++- 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..f4ebe356 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,27 @@ +language: r + +cache: packages + +sudo: false + +r: + + - oldrel + + - release + + - devel + +addons: + + apt: + + packages: libmpfr-dev + +warnings_are_errors: true + +before_install: + + - tlmgr install index # for texlive and vignette? + + - cd selectiveInference diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index c5a99120..d675c3fe 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -10,7 +10,7 @@ fixed value of the tuning parameter lambda } \usage{ fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", - "cox"),intercept=TRUE, status=NULL, sigma=NULL, alpha=0.1, + "cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE) } @@ -56,6 +56,7 @@ Significance level for confidence intervals (target is miscoverage alpha/2 in ea Was the lasso problem solved (e.g., by glmnet) with an intercept in the model? Default is TRUE. Must be TRUE for "binomial" family. Not used for 'cox" family, where no intercept is assumed. } +\item{add.targets}{Optional vector of predictors to be included as targets of inference, regardless of whether or not they are selected by the lasso. Default is NULL.} \item{status}{Censoring status for Cox model; 1=failurem 0=censored} \item{type}{Contrast type for p-values and confidence intervals: default is "partial"---meaning that the contrasts tested are the partial population From 1535c7c1a3518ec7df2a6c51df65dd43f3e134b7 Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 8 Aug 2017 20:54:56 -0700 Subject: [PATCH 177/396] testing travisci --- selectiveInference/R/linear.tests.R | 274 ++++++++++++++-------------- 1 file changed, 137 insertions(+), 137 deletions(-) diff --git a/selectiveInference/R/linear.tests.R b/selectiveInference/R/linear.tests.R index bd65bade..e113d0a4 100644 --- a/selectiveInference/R/linear.tests.R +++ b/selectiveInference/R/linear.tests.R @@ -1,137 +1,137 @@ -robs.test <- function() { - n <- 100 - p <- 200 - - set.seed(11332) - - y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response - X <- matrix(rnorm(p*n),ncol = p) # p rand N(0,1) predictors - - X=scale(X,T,T)/sqrt(n-1) - lambda=1 - sigma = estimateSigma(X,y)$sigmahat - - las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=T) - hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=T)) - - - return(fixedLassoInf(X,y,hbeta[-1],lambda,family="gaussian",type="partial",intercept=T,sigma=sigma)) -} - - -## Tests partial inf for X and y randomly generated from N(0,1) -nullTest <- function(X,y,lambda,intercept,type=c("full","partial")) { - n=nrow(X) - X=scale(X,T,T)/sqrt(n-1) - - sigma = estimateSigma(X,y)$sigmahat - - las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) - hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) - - if (type=="partial" || intercept==F) hbeta = hbeta[-1] - - return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type=type,intercept=intercept,sigma=sigma)) -} - -## Test partial inf for X and y where 10 variables are y with random additive N(0,0.5) noise -corrTest <- function(X,y,lambda,intercept,type=c("full","partial")) { - n=nrow(X) - corr.X = rep(y,10) + matrix(rnorm(n*10,0,0.5),ncol = 10) - X = cbind(corr.X,X) - X=scale(X,T,T)/sqrt(n-1) - - las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) - hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) - - sigma = estimateSigma(X,y)$sigmahat - - if (type=="partial" || intercept==F) hbeta = hbeta[-1] - - return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type=type,intercept=intercept,sigma=sigma)) -} - -## QQ plot of p-values for all null data now that bug fix is implemented -partial.qq.test <- function() { - n <- 100 - p <- 200 - - lambda=1 - - null.int.pvs <- c() - corr.int.pvs <- c() - null.pvs <- c() - corr.pvs <- c() - for(i in 1:25) { - y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response - X <- matrix(rnorm(p*n),ncol=p) # p rand N(0,1) predictors - - null <- nullTest(X,y,lambda,F,type="partial") - corr <- corrTest(X,y,lambda,F,type="partial") - null.pvs <- c(null.pvs,null$pv,recursive=T) - corr.pvs <- c(corr.pvs,corr$pv,recursive=T) - null.int <- nullTest(X,y,lambda,T,type="partial") - corr.int <- corrTest(X,y,lambda,T,type="partial") - null.int.pvs <- c(null.int.pvs,null.int$pv,recursive=T) - corr.int.pvs <- c(corr.int.pvs,corr.int$pv,recursive=T) - } - - qqplot(x=runif(length(null.pvs)),y=null.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. Null X w/o Intercept") - abline(0,1) - qqplot(x=runif(length(corr.pvs)),y=corr.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X w/o Intercept") - abline(0,1) - qqplot(x=runif(length(null.int.pvs)),y=null.int.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. Null X w/ Intercept") - abline(0,1) - qqplot(x=runif(length(corr.int.pvs)),y=corr.int.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X w/ Intercept") - abline(0,1) -} - -## QQ plot of p-values for all null data now that bug fix is implemented -pop.qq.test <- function() { - n <- 100 - p <- 200 - - lambda=1 - - null.int.pvs <- c() - corr.int.pvs <- c() - null.pvs <- c() - corr.pvs <- c() - for(i in 1:25) { - y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response - X <- matrix(rnorm(p*n),ncol=p) # p rand N(0,1) predictors - - null <- nullTest(X,y,lambda,F,type="full") - corr <- corrTest(X,y,lambda,F,type="full") - null.pvs <- c(null.pvs,null$pv,recursive=T) - corr.pvs <- c(corr.pvs,corr$pv,recursive=T) - null.int <- nullTest(X,y,lambda,T,type="full") - corr.int <- corrTest(X,y,lambda,T,type="full") - null.int.pvs <- c(null.int.pvs,null.int$pv,recursive=T) - corr.int.pvs <- c(corr.int.pvs,corr.int$pv,recursive=T) - } - - qqplot(x=runif(length(null.pvs)),y=null.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X w/o Intercept") - abline(0,1) - qqplot(x=runif(length(corr.pvs)),y=corr.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. 10 Corr. X w/o Intercept") - abline(0,1) - qqplot(x=runif(length(null.int.pvs)),y=null.int.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X w/ Intercept") - abline(0,1) - qqplot(x=runif(length(corr.int.pvs)),y=corr.int.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. 10 Corr. X w/ Intercept") - abline(0,1) -} - - - - -## QQ plot of p-values for data with correlated x now that bug fix implemented -power.partial.pval.dist <- function(n,p,intercept=T,lambda=1) { - pvs <- c() - for(i in 1:10) { - a <- powerPartialTest(n,p,intercept,lambda) - ps <- a$pv - pvs <- c(pvs,ps,recursive=T) - } - qqplot(x=runif(length(pvs)),y=pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X") - abline(0,1) -} \ No newline at end of file +# robs.test <- function() { +# n <- 100 +# p <- 200 +# +# set.seed(11332) +# +# y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response +# X <- matrix(rnorm(p*n),ncol = p) # p rand N(0,1) predictors +# +# X=scale(X,T,T)/sqrt(n-1) +# lambda=1 +# sigma = estimateSigma(X,y)$sigmahat +# +# las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=T) +# hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=T)) +# +# +# return(fixedLassoInf(X,y,hbeta[-1],lambda,family="gaussian",type="partial",intercept=T,sigma=sigma)) +# } +# +# +# ## Tests partial inf for X and y randomly generated from N(0,1) +# nullTest <- function(X,y,lambda,intercept,type=c("full","partial")) { +# n=nrow(X) +# X=scale(X,T,T)/sqrt(n-1) +# +# sigma = estimateSigma(X,y)$sigmahat +# +# las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) +# hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) +# +# if (type=="partial" || intercept==F) hbeta = hbeta[-1] +# +# return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type=type,intercept=intercept,sigma=sigma)) +# } +# +# ## Test partial inf for X and y where 10 variables are y with random additive N(0,0.5) noise +# corrTest <- function(X,y,lambda,intercept,type=c("full","partial")) { +# n=nrow(X) +# corr.X = rep(y,10) + matrix(rnorm(n*10,0,0.5),ncol = 10) +# X = cbind(corr.X,X) +# X=scale(X,T,T)/sqrt(n-1) +# +# las <- glmnet(X,y,family="gaussian",alpha=1,standardize=F,intercept=intercept) +# hbeta <- as.numeric(coef(las,x=X,y=y,s=lambda/n,exact=TRUE,intercept=intercept)) +# +# sigma = estimateSigma(X,y)$sigmahat +# +# if (type=="partial" || intercept==F) hbeta = hbeta[-1] +# +# return(fixedLassoInf(X,y,hbeta,lambda,family="gaussian",type=type,intercept=intercept,sigma=sigma)) +# } +# +# ## QQ plot of p-values for all null data now that bug fix is implemented +# partial.qq.test <- function() { +# n <- 100 +# p <- 200 +# +# lambda=1 +# +# null.int.pvs <- c() +# corr.int.pvs <- c() +# null.pvs <- c() +# corr.pvs <- c() +# for(i in 1:25) { +# y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response +# X <- matrix(rnorm(p*n),ncol=p) # p rand N(0,1) predictors +# +# null <- nullTest(X,y,lambda,F,type="partial") +# corr <- corrTest(X,y,lambda,F,type="partial") +# null.pvs <- c(null.pvs,null$pv,recursive=T) +# corr.pvs <- c(corr.pvs,corr$pv,recursive=T) +# null.int <- nullTest(X,y,lambda,T,type="partial") +# corr.int <- corrTest(X,y,lambda,T,type="partial") +# null.int.pvs <- c(null.int.pvs,null.int$pv,recursive=T) +# corr.int.pvs <- c(corr.int.pvs,corr.int$pv,recursive=T) +# } +# +# qqplot(x=runif(length(null.pvs)),y=null.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. Null X w/o Intercept") +# abline(0,1) +# qqplot(x=runif(length(corr.pvs)),y=corr.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X w/o Intercept") +# abline(0,1) +# qqplot(x=runif(length(null.int.pvs)),y=null.int.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. Null X w/ Intercept") +# abline(0,1) +# qqplot(x=runif(length(corr.int.pvs)),y=corr.int.pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X w/ Intercept") +# abline(0,1) +# } +# +# ## QQ plot of p-values for all null data now that bug fix is implemented +# pop.qq.test <- function() { +# n <- 100 +# p <- 200 +# +# lambda=1 +# +# null.int.pvs <- c() +# corr.int.pvs <- c() +# null.pvs <- c() +# corr.pvs <- c() +# for(i in 1:25) { +# y <- matrix(rnorm(n),ncol=1) # rand N(0,1) response +# X <- matrix(rnorm(p*n),ncol=p) # p rand N(0,1) predictors +# +# null <- nullTest(X,y,lambda,F,type="full") +# corr <- corrTest(X,y,lambda,F,type="full") +# null.pvs <- c(null.pvs,null$pv,recursive=T) +# corr.pvs <- c(corr.pvs,corr$pv,recursive=T) +# null.int <- nullTest(X,y,lambda,T,type="full") +# corr.int <- corrTest(X,y,lambda,T,type="full") +# null.int.pvs <- c(null.int.pvs,null.int$pv,recursive=T) +# corr.int.pvs <- c(corr.int.pvs,corr.int$pv,recursive=T) +# } +# +# qqplot(x=runif(length(null.pvs)),y=null.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X w/o Intercept") +# abline(0,1) +# qqplot(x=runif(length(corr.pvs)),y=corr.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. 10 Corr. X w/o Intercept") +# abline(0,1) +# qqplot(x=runif(length(null.int.pvs)),y=null.int.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. Null X w/ Intercept") +# abline(0,1) +# qqplot(x=runif(length(corr.int.pvs)),y=corr.int.pvs,xlab="Expected",ylab="Observed",main="Pop Coef. 10 Corr. X w/ Intercept") +# abline(0,1) +# } +# +# +# +# +# ## QQ plot of p-values for data with correlated x now that bug fix implemented +# power.partial.pval.dist <- function(n,p,intercept=T,lambda=1) { +# pvs <- c() +# for(i in 1:10) { +# a <- powerPartialTest(n,p,intercept,lambda) +# ps <- a$pv +# pvs <- c(pvs,ps,recursive=T) +# } +# qqplot(x=runif(length(pvs)),y=pvs,xlab="Expected",ylab="Observed",main="Partial Coef. 10 Corr. X") +# abline(0,1) +# } \ No newline at end of file From 248ce54c01282d428a9c31b862280166e52c6eb9 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 21:24:12 -0700 Subject: [PATCH 178/396] C code for one row of M --- selectiveInference/R/funs.fixed.R | 19 +++ selectiveInference/src/debiasing_matrix.c | 151 ++++++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 selectiveInference/src/debiasing_matrix.c diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 7073e2ef..da701e57 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -334,6 +334,25 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } +InverseLinftyOneRowC <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { + + p = nrow(sigma) + theta = rep(0, p) + + val = .C("find_one_row", + Sigma=as.double(sigma), + nrow=as.integer(p), + bound=as.double(mu), + theta=as.double(theta), + maxiter=as.integer(maxiter), + row=as.integer(i-1), + coord=as.integer(i-1), + dup=FALSE, + package="selectiveInference") + + return(val$theta) +} + InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { p <- nrow(sigma); rho <- max(abs(sigma[i,-i])) / sigma[i,i]; diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c new file mode 100644 index 00000000..0a626eaa --- /dev/null +++ b/selectiveInference/src/debiasing_matrix.c @@ -0,0 +1,151 @@ +#include +#include // for fabs + +// Find an approximate row of \hat{Sigma}^{-1} + +// Problem (4) of ???? + +// Update one coordinate + +double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int row, /* which row: 0-based */ + int coord) /* which coordinate to update: 0-based */ +{ + + double linear_term = 0; + double quadratic_term = 0; + double value = 0; + double *Sigma_ptr; + double *theta_ptr = theta; + int icol = 0; + + Sigma_ptr = ((double *) Sigma + nrow * coord); + + for (icol=0; icol < nrow; icol++) { + if (icol != coord) { + linear_term += (*Sigma_ptr) * (*theta_ptr); + } + else { + quadratic_term = (*Sigma_ptr); + } + Sigma_ptr += 1; + theta_ptr += 1; + } + + if (row == coord) { + linear_term += 1; + } + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + if (linear_term < -bound) { + value = - (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = (linear_term - bound) / quadratic_term; + } + + theta_ptr = ((double *) theta + coord); + *theta_ptr = value; + return(value); + +} + +double objective(double *Sigma, /* A covariance matrix: X^TX/n */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ +{ + int irow, icol; + double value = 0; + double *Sigma_ptr = Sigma; + double *theta_row_ptr, *theta_col_ptr; + + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 3)) { + break; + } + old_value = new_value; + } + + *nrow_ptr = iter-1; +} + From 4d6719990d1f49059e8fd672f27f784e4e3f2fbe Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 21:51:39 -0700 Subject: [PATCH 179/396] added a warning about feasibility -- but it didn't print in gist? --- selectiveInference/R/funs.fixed.R | 15 ++++++++++++--- selectiveInference/src/debiasing_matrix.c | 15 +++++++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index da701e57..1bf216e2 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -334,13 +334,15 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRowC <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { +InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50, threshold=1e-2 ) { - p = nrow(sigma) + p = nrow(Sigma) + basis_vector = rep(0, p) + basis_vector[i] = 1. theta = rep(0, p) val = .C("find_one_row", - Sigma=as.double(sigma), + Sigma=as.double(Sigma), nrow=as.integer(p), bound=as.double(mu), theta=as.double(theta), @@ -350,6 +352,13 @@ InverseLinftyOneRowC <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { dup=FALSE, package="selectiveInference") + # Check feasibility + + if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { + print("Solution for row of M does not seem to be feasible") + warning("Solution for row of M does not seem to be feasible") + } + return(val$theta) } diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 0a626eaa..6309df38 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -3,8 +3,12 @@ // Find an approximate row of \hat{Sigma}^{-1} -// Problem (4) of ???? +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - e_i^T\theta + \mu \|\theta\|_1 + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. // Update one coordinate double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ @@ -36,7 +40,7 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ } if (row == coord) { - linear_term += 1; + linear_term -= 1; } // Now soft-threshold the coord entry of theta @@ -44,11 +48,14 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| // with q=quadratic_term and l=linear_term + // With a negative linear term, solution should be + // positive + if (linear_term < -bound) { - value = - (-linear_term - bound) / quadratic_term; + value = (-linear_term - bound) / quadratic_term; } else if (linear_term > bound) { - value = (linear_term - bound) / quadratic_term; + value = -(linear_term - bound) / quadratic_term; } theta_ptr = ((double *) theta + coord); From e87d12dbcddd960e8d5856915818a251969deb49 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 21:54:45 -0700 Subject: [PATCH 180/396] removed print statement --- selectiveInference/R/funs.fixed.R | 1 - 1 file changed, 1 deletion(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 1bf216e2..53dc61ff 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -355,7 +355,6 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50, threshold=1e-2 ) { # Check feasibility if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { - print("Solution for row of M does not seem to be feasible") warning("Solution for row of M does not seem to be feasible") } From 210d0feff9932686e8210454138e577342520e21 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 22:00:19 -0700 Subject: [PATCH 181/396] using C for each row --- selectiveInference/R/funs.fixed.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 53dc61ff..b69dabb7 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -159,7 +159,8 @@ sigma=NULL, alpha=0.1, hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) + useC = TRUE + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, useC=useC) # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) @@ -269,7 +270,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { +InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, useC = FALSE) { # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { isgiven <- 1; if (is.null(mu)){ @@ -294,7 +295,11 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + if (useC == FALSE) { + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + } else { + output <- InverseLinftyOneRowC(sigma, i, mu, maxiter=maxiter) + } beta <- output$optsol iter <- output$iter if (isgiven==1){ @@ -334,7 +339,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50, threshold=1e-2 ) { +InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { p = nrow(Sigma) basis_vector = rep(0, p) From c7c41e417693578173e272e2653c46b3a3edaba1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 08:32:39 -0700 Subject: [PATCH 182/396] optimized the C code a bit -- still has debug statements --- selectiveInference/R/funs.fixed.R | 20 ++- selectiveInference/src/debiasing_matrix.c | 154 +++++++++++++++------- 2 files changed, 119 insertions(+), 55 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index b69dabb7..584018fd 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -348,10 +348,12 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { val = .C("find_one_row", Sigma=as.double(Sigma), + Sigma_diag=as.double(diag(Sigma)), + Sigma_theta=as.double(rep(0, p)), nrow=as.integer(p), bound=as.double(mu), theta=as.double(theta), - maxiter=as.integer(maxiter), + maxiter=as.integer(50), row=as.integer(i-1), coord=as.integer(i-1), dup=FALSE, @@ -359,6 +361,12 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { # Check feasibility + # DEBUG statements + #print(diag(Sigma)) + #print(0.5 * sum(val$theta * (Sigma %*% val$theta)) - val$theta[i] + mu * sum(abs(val$theta))) + #print(Sigma %*% val$theta - val$Sigma_theta) + #print(val$nrow) # number of iterations + if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { warning("Solution for row of M does not seem to be feasible") } @@ -372,11 +380,11 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { mu0 <- rho/(1+rho); beta <- rep(0,p); - if (mu >= mu0){ - beta[i] <- (1-mu0)/sigma[i,i]; - returnlist <- list("optsol" = beta, "iter" = 0); - return(returnlist); - } + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} diff.norm2 <- 1; last.norm2 <- 1; diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 6309df38..7dfeb1ec 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -11,34 +11,72 @@ // Therefore we don't have to negate the answer to get theta. // Update one coordinate -double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int row, /* which row: 0-based */ - int coord) /* which coordinate to update: 0-based */ +double objective(double *Sigma, /* A covariance matrix: X^TX/n */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ { + int irow, icol; + double value = 0; + double *Sigma_ptr = Sigma; + double *theta_row_ptr, *theta_col_ptr; + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 1.e-6 * (fabs(value) + fabs(old_value))) { // Update the linear term + + delta = value - old_value; + Sigma_ptr = ((double *) Sigma + coord * nrow); + Sigma_theta_ptr = ((double *) Sigma_theta); + + for (icol=0; icol before) { + fprintf(stderr, "not a descent step!!!!!!!!!!!!!!!!!!!!!\n"); + } - theta_row_ptr = theta; - theta_col_ptr = theta; - for (irow=0; irow 3)) { + if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 5)) { break; } + + fprintf(stderr, "%f %f value\n", old_value, new_value); old_value = new_value; } From f6ae738bad048aca71a22c94ba4fe8cdb9b1f3d7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 08:50:20 -0700 Subject: [PATCH 183/396] removing debug statements, making objective computation faster --- selectiveInference/R/funs.fixed.R | 6 --- selectiveInference/src/debiasing_matrix.c | 46 +++++------------------ 2 files changed, 9 insertions(+), 43 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 584018fd..aa8eac21 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -361,12 +361,6 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { # Check feasibility - # DEBUG statements - #print(diag(Sigma)) - #print(0.5 * sum(val$theta * (Sigma %*% val$theta)) - val$theta[i] + mu * sum(abs(val$theta))) - #print(Sigma %*% val$theta - val$Sigma_theta) - #print(val$nrow) # number of iterations - if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { warning("Solution for row of M does not seem to be feasible") } diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 7dfeb1ec..0849f38b 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -27,15 +27,16 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ for (irow=0; irow before) { - fprintf(stderr, "not a descent step!!!!!!!!!!!!!!!!!!!!!\n"); - } - + theta_ptr = ((double *) theta + coord); + *theta_ptr = value; } - Sigma_ptr = ((double *) Sigma + coord * nrow); - Sigma_theta_ptr = ((double *) Sigma_theta); - for (icol=0; icol 5)) { + if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { break; } - fprintf(stderr, "%f %f value\n", old_value, new_value); old_value = new_value; } From 1a8e2ad351c08f7eab632d6f7184651d1640e89e Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 09:23:01 -0700 Subject: [PATCH 184/396] slightly lower tolerance --- selectiveInference/src/debiasing_matrix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 0849f38b..420ecb99 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -141,7 +141,7 @@ void find_one_row(double *Sigma, /* A covariance matrix: X^TX/n */ bound, theta); double new_value; - double tol=1.e-10; + double tol=1.e-6; for (iter=0; iter Date: Wed, 16 Aug 2017 10:16:24 -0700 Subject: [PATCH 185/396] using active sets --- selectiveInference/R/funs.fixed.R | 2 + selectiveInference/src/debiasing_matrix.c | 144 ++++++++++++++++++++-- 2 files changed, 133 insertions(+), 13 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index aa8eac21..a30fc0bc 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -350,6 +350,8 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { Sigma=as.double(Sigma), Sigma_diag=as.double(diag(Sigma)), Sigma_theta=as.double(rep(0, p)), + ever_active=as.integer(i), + nactive_ptr=as.integer(1), nrow=as.integer(p), bound=as.double(mu), theta=as.double(theta), diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 420ecb99..e3ef4156 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -12,6 +12,8 @@ // Update one coordinate double objective(double *Sigma, /* A covariance matrix: X^TX/n */ + int *ever_active, /* Ever active set */ + int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ int row, /* which row: 0-based */ double bound, /* Lagrange multipler for \ell_1 */ @@ -21,33 +23,106 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ double value = 0; double *Sigma_ptr = Sigma; double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; theta_row_ptr = theta; theta_col_ptr = theta; + for (irow=0; irow 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { + fail += 1; + } + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > (1. + tol) * bound)) { + fail += 1; } } - if (irow == row) { - value -= (*theta_row_ptr); // the elementary basis vector term + else { + if (fabs(gradient) > (1. + tol) * bound) { + fail += 1; + } } - value = value + bound * fabs((*theta_row_ptr)); // the \ell_1 term - theta_row_ptr++; } - return(value); + return(fail == 0); } double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_diag, /* Diagonal entries of Sigma */ double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set */ + int *nactive_ptr, /* Size of ever active set */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ @@ -67,6 +142,8 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n double *quadratic_ptr = ((double *) Sigma_diag + coord); double quadratic_term = *quadratic_ptr; + int *ever_active_ptr; + Sigma_theta_ptr = ((double *) Sigma_theta + coord); linear_term = *Sigma_theta_ptr; @@ -97,7 +174,17 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n value = -(linear_term - bound) / quadratic_term; } - if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { // Update the linear term + // Add to active set if necessary + + if ((value != 0) && (is_active(coord, ever_active, nactive_ptr) == 0)) { + ever_active_ptr = ((int *) ever_active + *nactive_ptr); + *ever_active_ptr = coord; + *nactive_ptr += 1; + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; Sigma_ptr = ((double *) Sigma + coord * nrow); @@ -121,6 +208,8 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n void find_one_row(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_diag, /* Diagonal entry of covariance matrix */ double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set */ + int *nactive_ptr, /* Size of ever active set */ int *nrow_ptr, /* How many rows in Sigma */ double *bound_ptr, /* feasibility parameter */ double *theta, /* current value */ @@ -135,13 +224,17 @@ void find_one_row(double *Sigma, /* A covariance matrix: X^TX/n */ double bound = *bound_ptr; int nrow = *nrow_ptr; + fprintf(stderr, "starting now\n"); + double old_value = objective(Sigma, + ever_active, + nactive_ptr, nrow, row, bound, theta); double new_value; - double tol=1.e-6; + double tol=1.e-5; for (iter=0; iter 0)) { + fprintf(stderr, "ending in objective value check\n"); break; } From 096b0b8f9f92bb360f8d4f59bc9cbd6f5e4437c1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 17:02:26 -0700 Subject: [PATCH 186/396] begun the Rcpp extension --- selectiveInference/src/Makevars | 14 ++++++ selectiveInference/src/Rcpp-debias.cpp | 45 +++++++++++++++++++ .../src/{debiasing_matrix.c => debias.c} | 6 +-- selectiveInference/src/debias.h | 10 +++++ 4 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 selectiveInference/src/Makevars create mode 100644 selectiveInference/src/Rcpp-debias.cpp rename selectiveInference/src/{debiasing_matrix.c => debias.c} (97%) create mode 100644 selectiveInference/src/debias.h diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars new file mode 100644 index 00000000..191c7742 --- /dev/null +++ b/selectiveInference/src/Makevars @@ -0,0 +1,14 @@ +PKG_CFLAGS=-DR_PACKAGE=1 -I. +PKG_CPPFLAGS=-DR_PACKAGE=1 -I. +PKG_LIBS=-L. -ldebias + +$(SHLIB): Rcpp-debias.o RcppExports.o + +Rcpp-debias.o: debias.ts +RcppExports.o: debias.ts + +debias.ts: + gcc -shared -fPIC selectiveInference/src/debiasing_matrix.c -o debias.so + +clean: + rm -f debias.so diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp new file mode 100644 index 00000000..b154dcdd --- /dev/null +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -0,0 +1,45 @@ +#include // need to include the main Rcpp header file +#include // where find_one_row is defined + +// [[Rcpp::export]] +Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, + int row, // 0-based + double bound, + int maxiter) { + + int nrow = Sigma.nrow(); // number of features + int nactive = 1; + Rcpp::IntegerVector ever_active(1); + Rcpp::NumericVector Sigma_diag(nrow); + Rcpp::NumericVector Sigma_theta(nrow); + +} + Rcpp::NumericVector xUL, + int maxEval, double absErr, double tol, int vectorInterface, unsigned norm) { + + count = 0; /* Zero count */ + fun = f; + + Rcpp::NumericVector integral(fDim); + Rcpp::NumericVector errVals(fDim); + int retCode; + + // Rcpp::Rcout<<"Call Integrator" < Date: Wed, 16 Aug 2017 17:03:47 -0700 Subject: [PATCH 187/396] trying to save some copy time -- R doesn't seem to care about extra arguments to .C --- selectiveInference/R/funs.fixed.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index a30fc0bc..73a05355 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -346,19 +346,22 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { basis_vector[i] = 1. theta = rep(0, p) + Sigma_ = as.double(Sigma) + Sigma_diag_ = as.double(diag(Sigma)) + Sigma_theta_ = as.double(rep(0, p)) val = .C("find_one_row", - Sigma=as.double(Sigma), - Sigma_diag=as.double(diag(Sigma)), - Sigma_theta=as.double(rep(0, p)), - ever_active=as.integer(i), + Sigma=Sigma_, + Sigma_diag=Sigma_diag_, + Sigma_theta=Sigma_theta_, + ever_active=as.integer(i-1), nactive_ptr=as.integer(1), nrow=as.integer(p), bound=as.double(mu), theta=as.double(theta), maxiter=as.integer(50), row=as.integer(i-1), - coord=as.integer(i-1), - dup=FALSE, + DUP=FALSE, + NAOK=TRUE, package="selectiveInference") # Check feasibility From 5bf80181a3a407747c7ff44fafbc54928b3f7ac7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 17:44:22 -0700 Subject: [PATCH 188/396] don't lose this --- selectiveInference/src/Rcpp-debias.cpp | 64 +++++++++++++------------- selectiveInference/src/debias.c | 20 ++++---- selectiveInference/src/debias.h | 28 +++++++---- 3 files changed, 61 insertions(+), 51 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index b154dcdd..384c105b 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -1,5 +1,5 @@ #include // need to include the main Rcpp header file -#include // where find_one_row is defined +#include // where find_one_row_void is defined // [[Rcpp::export]] Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, @@ -8,38 +8,40 @@ Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, int maxiter) { int nrow = Sigma.nrow(); // number of features - int nactive = 1; + + // Active set + + int irow; + Rcpp::IntegerVector nactive(1); // An array so we can easily modify it Rcpp::IntegerVector ever_active(1); + int *ever_active_p = ever_active.begin(); + *ever_active_p = row; + + // Extract the diagonal Rcpp::NumericVector Sigma_diag(nrow); - Rcpp::NumericVector Sigma_theta(nrow); + double *sigma_p = Sigma_diag.begin(); -} - Rcpp::NumericVector xUL, - int maxEval, double absErr, double tol, int vectorInterface, unsigned norm) { - - count = 0; /* Zero count */ - fun = f; - - Rcpp::NumericVector integral(fDim); - Rcpp::NumericVector errVals(fDim); - int retCode; - - // Rcpp::Rcout<<"Call Integrator" < Date: Wed, 16 Aug 2017 17:52:02 -0700 Subject: [PATCH 189/396] segfaulting on the test_big.R :( --- selectiveInference/DESCRIPTION | 2 ++ selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fixed.R | 51 ++++++++++++++++--------------- selectiveInference/src/Makevars | 16 +++------- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index d0740ecb..d9026221 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -18,3 +18,5 @@ Description: New tools for post-selection inference, for use with forward models. License: GPL-2 RoxygenNote: 5.0.1 +LinkingTo: Rcpp +Imports: Rcpp diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 099fdc5c..ab2a1118 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,5 +44,5 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") - +importFrom("Rcpp", "sourceCpp") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index a30fc0bc..f4b5a26d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -341,34 +341,37 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { - p = nrow(Sigma) - basis_vector = rep(0, p) - basis_vector[i] = 1. - theta = rep(0, p) + theta = find_one_row(Sigma, i, mu, maxiter) + return(theta) +} +# p = nrow(Sigma) +# basis_vector = rep(0, p) +# basis_vector[i] = 1. +# theta = rep(0, p) - val = .C("find_one_row", - Sigma=as.double(Sigma), - Sigma_diag=as.double(diag(Sigma)), - Sigma_theta=as.double(rep(0, p)), - ever_active=as.integer(i), - nactive_ptr=as.integer(1), - nrow=as.integer(p), - bound=as.double(mu), - theta=as.double(theta), - maxiter=as.integer(50), - row=as.integer(i-1), - coord=as.integer(i-1), - dup=FALSE, - package="selectiveInference") +# val = .C("find_one_row", +# Sigma=as.double(Sigma), +# Sigma_diag=as.double(diag(Sigma)), +# Sigma_theta=as.double(rep(0, p)), +# ever_active=as.integer(i), +# nactive_ptr=as.integer(1), +# nrow=as.integer(p), +# bound=as.double(mu), +# theta=as.double(theta), +# maxiter=as.integer(50), +# row=as.integer(i-1), +# coord=as.integer(i-1), +# dup=FALSE, +# package="selectiveInference") - # Check feasibility +# # Check feasibility - if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { - warning("Solution for row of M does not seem to be feasible") - } +# if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { +# warning("Solution for row of M does not seem to be feasible") +# } - return(val$theta) -} +# return(val$theta) +# } InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { p <- nrow(sigma); diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars index 191c7742..c68d0bd5 100644 --- a/selectiveInference/src/Makevars +++ b/selectiveInference/src/Makevars @@ -1,14 +1,8 @@ -PKG_CFLAGS=-DR_PACKAGE=1 -I. -PKG_CPPFLAGS=-DR_PACKAGE=1 -I. -PKG_LIBS=-L. -ldebias +PKG_CFLAGS= -I. +PKG_CPPFLAGS= -I. +PKG_LIBS=-L. -$(SHLIB): Rcpp-debias.o RcppExports.o - -Rcpp-debias.o: debias.ts -RcppExports.o: debias.ts - -debias.ts: - gcc -shared -fPIC selectiveInference/src/debiasing_matrix.c -o debias.so +$(SHLIB): Rcpp-debias.o RcppExports.o debias.o clean: - rm -f debias.so + rm -f *o From 4caf22548fb079f0c42c256fe1b68d78f401e3d8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 18:24:14 -0700 Subject: [PATCH 190/396] changes to funs.fixed.R that will be overwritten --- selectiveInference/R/funs.fixed.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index a30fc0bc..0bbc1190 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -346,9 +346,12 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { basis_vector[i] = 1. theta = rep(0, p) + Sigma_ = as.double(Sigma) + Sigma_diag_ = as.double(diag(Sigma)) + val = .C("find_one_row", - Sigma=as.double(Sigma), - Sigma_diag=as.double(diag(Sigma)), + Sigma=Sigma_, + Sigma_diag=Sigma_diag_, Sigma_theta=as.double(rep(0, p)), ever_active=as.integer(i), nactive_ptr=as.integer(1), From f53af23587372422cfcbe4e5d9105e2a384edbc6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 18:41:19 -0700 Subject: [PATCH 191/396] success -- about 2ms now -- python still faster (as timed through rpy2) :) --- README.md | 9 +++++++ selectiveInference/R/funs.fixed.R | 35 ++++++-------------------- selectiveInference/src/Rcpp-debias.cpp | 8 +++--- selectiveInference/src/debias.c | 13 +++------- selectiveInference/src/debias.h | 8 +++--- 5 files changed, 29 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 962151e0..cf855e11 100644 --- a/README.md +++ b/README.md @@ -21,3 +21,12 @@ The latest release of the package can be installed through CRAN: install.packages("selectiveInference") ``` Code in repo is under development and may be unstable. + +## For development + +You will have to run + +```R +library(Rcpp) +Rcpp::compileAttributes('selectiveInference') +``` \ No newline at end of file diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index f4b5a26d..7314511c 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -295,6 +295,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta + useC = TRUE if (useC == FALSE) { output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) } else { @@ -341,37 +342,17 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { - theta = find_one_row(Sigma, i, mu, maxiter) - return(theta) -} -# p = nrow(Sigma) -# basis_vector = rep(0, p) -# basis_vector[i] = 1. -# theta = rep(0, p) + theta = find_one_row(Sigma, i-1, mu, maxiter) -# val = .C("find_one_row", -# Sigma=as.double(Sigma), -# Sigma_diag=as.double(diag(Sigma)), -# Sigma_theta=as.double(rep(0, p)), -# ever_active=as.integer(i), -# nactive_ptr=as.integer(1), -# nrow=as.integer(p), -# bound=as.double(mu), -# theta=as.double(theta), -# maxiter=as.integer(50), -# row=as.integer(i-1), -# coord=as.integer(i-1), -# dup=FALSE, -# package="selectiveInference") + # Check feasibility -# # Check feasibility + if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { + warning("Solution for row of M does not seem to be feasible") + } -# if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { -# warning("Solution for row of M does not seem to be feasible") -# } + return(theta) -# return(val$theta) -# } +} InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { p <- nrow(sigma); diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 384c105b..d8cd7294 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -37,11 +37,11 @@ Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, (double *) Sigma_theta.begin(), (int *) ever_active.begin(), (int *) nactive.begin(), - (int *) &nrow, - (double *) &bound, + nrow, + bound, (double *) theta.begin(), - (int *) &maxiter, - (int *) &row); + maxiter, + row); return theta; } diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 7932b2ee..f8ff6e3d 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -210,19 +210,15 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_theta, /* Sigma times theta */ int *ever_active, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int *nrow_ptr, /* How many rows in Sigma */ - double *bound_ptr, /* feasibility parameter */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ double *theta, /* current value */ - int *maxiter_ptr, /* how many iterations */ - int *row_ptr) /* which coordinate to update: 0-based */ + int maxiter, /* how many iterations */ + int row) /* which coordinate to update: 0-based */ { - int maxiter = *maxiter_ptr; int iter = 0; int icoord = 0; - int row = *row_ptr; - double bound = *bound_ptr; - int nrow = *nrow_ptr; fprintf(stderr, "starting now\n"); @@ -299,6 +295,5 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ old_value = new_value; } - *nrow_ptr = iter-1; } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 28d2a882..107d9317 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -8,11 +8,11 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_theta, /* Sigma times theta */ int *ever_active, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int *nrow_ptr, /* How many rows in Sigma */ - double *bound_ptr, /* feasibility parameter */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ double *theta, /* current value */ - int *maxiter_ptr, /* how many iterations */ - int *row_ptr); /* which coordinate to update: 0-based */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ From 7920c8c5fd8564e5410104ac993303ceddb1205b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:27:06 -0700 Subject: [PATCH 192/396] BF: column major, removing print statements --- selectiveInference/src/debias.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index f8ff6e3d..fd9be790 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -41,10 +40,8 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col_ptr = ((int *) ever_active + icol); active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); - - fprintf(stderr, "%d %d \n", active_row, active_col); - Sigma_ptr = ((double *) Sigma + nrow * active_row + active_col); + Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); } @@ -220,8 +217,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ int iter = 0; int icoord = 0; - fprintf(stderr, "starting now\n"); - double old_value = objective(Sigma, ever_active, nactive_ptr, @@ -252,7 +247,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ nrow, row, bound) == 1) { - fprintf(stderr, "ending in first KKT check\n"); break; } @@ -275,7 +269,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ nrow, row, bound) == 1) { - fprintf(stderr, "ending in second KKT check\n"); break; } @@ -288,7 +281,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ theta); if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { - fprintf(stderr, "ending in objective value check\n"); break; } From 6756ab2935c0dd93ab4bde228db31e09066b14a6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:28:14 -0700 Subject: [PATCH 193/396] changing name of function, evaluation of feasibility in Rcpp --- Makefile | 4 ++++ README.md | 5 ++--- selectiveInference/R/funs.fixed.R | 8 +++++--- selectiveInference/src/Rcpp-debias.cpp | 25 ++++++++++++++++++++----- 4 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..dfe6002f --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +Rcpp: + rm selectiveInference/src/RcppExports.cpp + rm selectiveInference/R/RcppExports.R + Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" \ No newline at end of file diff --git a/README.md b/README.md index cf855e11..0a608557 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,6 @@ Code in repo is under development and may be unstable. You will have to run -```R -library(Rcpp) -Rcpp::compileAttributes('selectiveInference') +``` +make Rcpp ``` \ No newline at end of file diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 7314511c..f2b98386 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -342,11 +342,13 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { - theta = find_one_row(Sigma, i-1, mu, maxiter) + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter) # C function uses 0-based indexing + theta = result$theta + feasible_val = result$feasible_val - # Check feasibility + # Check feasibility - if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { + if (feasible_val > 1.01 * mu) { warning("Solution for row of M does not seem to be feasible") } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index d8cd7294..48f484f5 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -2,10 +2,10 @@ #include // where find_one_row_void is defined // [[Rcpp::export]] -Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, - int row, // 0-based - double bound, - int maxiter) { +Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, + int row, // 0-based + double bound, + int maxiter) { int nrow = Sigma.nrow(); // number of features @@ -42,6 +42,21 @@ Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, (double *) theta.begin(), maxiter, row); + + // Check whether feasible + + double feasible_val = 0; + double val; + for (irow=0; irow feasible_val) { + feasible_val = fabs(val); + } + } - return theta; + return(Rcpp::List::create(Rcpp::Named("theta") = theta, + Rcpp::Named("feasible_val") = feasible_val)); } From 0e584354e49c2fb68039761a0d6b040c1e37c1b9 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:37:00 -0700 Subject: [PATCH 194/396] RF: renamed C function, removing R solver for one row --- selectiveInference/R/funs.fixed.R | 82 ++------------------------ selectiveInference/src/Rcpp-debias.cpp | 25 ++++---- selectiveInference/src/debias.c | 22 +++---- selectiveInference/src/debias.h | 20 +++---- 4 files changed, 40 insertions(+), 109 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index f2b98386..d0bc8615 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -159,8 +159,8 @@ sigma=NULL, alpha=0.1, hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - useC = TRUE - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, useC=useC) + + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) @@ -270,7 +270,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, useC = FALSE) { +InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { isgiven <- 1; if (is.null(mu)){ @@ -295,12 +295,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - useC = TRUE - if (useC == FALSE) { - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) - } else { - output <- InverseLinftyOneRowC(sigma, i, mu, maxiter=maxiter) - } + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter) beta <- output$optsol iter <- output$iter if (isgiven==1){ @@ -340,7 +335,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50) { result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter) # C function uses 0-based indexing theta = result$theta @@ -352,73 +347,8 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { warning("Solution for row of M does not seem to be feasible") } - return(theta) - -} + return(result) -InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { - p <- nrow(sigma); - rho <- max(abs(sigma[i,-i])) / sigma[i,i]; - mu0 <- rho/(1+rho); - beta <- rep(0,p); - - #if (mu >= mu0){ - # beta[i] <- (1-mu0)/sigma[i,i]; - # returnlist <- list("optsol" = beta, "iter" = 0); - # return(returnlist); - #} - - diff.norm2 <- 1; - last.norm2 <- 1; - iter <- 1; - iter.old <- 1; - beta[i] <- (1-mu0)/sigma[i,i]; - beta.old <- beta; - sigma.tilde <- sigma; - diag(sigma.tilde) <- 0; - vs <- -sigma.tilde%*%beta; - - while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ - - for (j in 1:p){ - oldval <- beta[j]; - v <- vs[j]; - if (j==i) - v <- v+1; - beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; - if (oldval != beta[j]){ - vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; - } - } - - iter <- iter + 1; - if (iter==2*iter.old){ - d <- beta - beta.old; - diff.norm2 <- sqrt(sum(d*d)); - last.norm2 <-sqrt(sum(beta*beta)); - iter.old <- iter; - beta.old <- beta; - if (iter>10) - vs <- -sigma.tilde%*%beta; - } - } - - returnlist <- list("optsol" = beta, "iter" = iter) - return(returnlist) -} - -SoftThreshold <- function( x, lambda ) { - # - # Standard soft thresholding - # - if (x>lambda){ - return (x-lambda);} - else { - if (x< (-lambda)){ - return (x+lambda);} - else { - return (0); } - } } ############################## diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 48f484f5..bb8b5cd1 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -32,16 +32,16 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Now call our C function - find_one_row_void((double *) Sigma.begin(), - (double *) Sigma_diag.begin(), - (double *) Sigma_theta.begin(), - (int *) ever_active.begin(), - (int *) nactive.begin(), - nrow, - bound, - (double *) theta.begin(), - maxiter, - row); + int iter = find_one_row_((double *) Sigma.begin(), + (double *) Sigma_diag.begin(), + (double *) Sigma_theta.begin(), + (int *) ever_active.begin(), + (int *) nactive.begin(), + nrow, + bound, + (double *) theta.begin(), + maxiter, + row); // Check whether feasible @@ -57,6 +57,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, } } - return(Rcpp::List::create(Rcpp::Named("theta") = theta, - Rcpp::Named("feasible_val") = feasible_val)); + return(Rcpp::List::create(Rcpp::Named("optsol") = theta, + Rcpp::Named("feasible_val") = feasible_val, + Rcpp::Named("iter") = iter)); } diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index fd9be790..d2f671df 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -202,16 +202,16 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n } -void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ - double *Sigma_diag, /* Diagonal entry of covariance matrix */ - double *Sigma_theta, /* Sigma times theta */ - int *ever_active, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row) /* which coordinate to update: 0-based */ +int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ + double *Sigma_diag, /* Diagonal entry of covariance matrix */ + double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row) /* which coordinate to update: 0-based */ { int iter = 0; @@ -286,6 +286,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ old_value = new_value; } - + return(iter); } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 107d9317..48f9753f 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -3,16 +3,16 @@ extern "C" { #endif /* __cplusplus */ -void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ - double *Sigma_diag, /* Diagonal entry of covariance matrix */ - double *Sigma_theta, /* Sigma times theta */ - int *ever_active, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ +int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ + double *Sigma_diag, /* Diagonal entry of covariance matrix */ + double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ From 06143059f7732e98bd30f18976775a7a5954ba9b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:40:55 -0700 Subject: [PATCH 195/396] need to make Rcpp exports --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index aa61233f..a3fdd1f7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,8 +7,9 @@ r: - devel addons: apt: - packages: libmpfr-dev + packages: libmpfr-dev warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? + - make Rcpp - cd selectiveInference From ca60bc90d631c8723c031b6d7f8a2c748816a82a Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:44:51 -0700 Subject: [PATCH 196/396] ignore first two lines if status not OK? --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index dfe6002f..5c55bb82 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ Rcpp: - rm selectiveInference/src/RcppExports.cpp - rm selectiveInference/R/RcppExports.R + - rm -f selectiveInference/src/RcppExports.cpp + - rm -f selectiveInference/R/RcppExports.R Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" \ No newline at end of file From 60f6cdea4e355d70b305bd61062371dedc4b7e06 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:50:24 -0700 Subject: [PATCH 197/396] install Rcpp earlier --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a3fdd1f7..21270e09 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,5 +11,6 @@ addons: warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? + - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' - make Rcpp - cd selectiveInference From c1aef63975841d882e0a68ec4e84382b0b4c7cd6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 17 Aug 2017 00:26:42 -0700 Subject: [PATCH 198/396] using Rcpp for matrixcomps as well -- update1 and downdate1 -- examples now OK on Rcmd check --- selectiveInference/R/funs.common.R | 8 +---- selectiveInference/R/funs.lar.R | 9 +----- selectiveInference/src/Rcpp-matrixcomps.cpp | 34 ++++++++++++++++++++ selectiveInference/src/debias.h | 1 + selectiveInference/src/matrixcomps.c | 13 +++----- selectiveInference/src/matrixcomps.h | 12 +++++++ selectiveInference/src/symbols.rds | Bin 367 -> 0 bytes 7 files changed, 53 insertions(+), 24 deletions(-) create mode 100644 selectiveInference/src/Rcpp-matrixcomps.cpp create mode 100644 selectiveInference/src/matrixcomps.h delete mode 100644 selectiveInference/src/symbols.rds diff --git a/selectiveInference/R/funs.common.R b/selectiveInference/R/funs.common.R index 042e0747..678d736c 100644 --- a/selectiveInference/R/funs.common.R +++ b/selectiveInference/R/funs.common.R @@ -152,13 +152,7 @@ updateQR <- function(Q1,Q2,R,col) { n = ncol(Q1) k = ncol(Q2) - a = .C("update1", - Q2=as.double(Q2), - w=as.double(t(Q2)%*%col), - m=as.integer(m), - k=as.integer(k), - dup=FALSE, - package="selectiveInference") + a = update1_(as.matrix(Q2), t(Q2)%*%col, m, k) # Rcpp call Q2 = matrix(a$Q2,nrow=m) w = c(t(Q1)%*%col,a$w) diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index eb31b4e8..a01b2d32 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -254,14 +254,7 @@ downdateQR <- function(Q1,Q2,R,col) { m = nrow(Q1) n = ncol(Q1) - a = .C("downdate1", - Q1=as.double(Q1), - R=as.double(R), - col=as.integer(col-1), - m=as.integer(m), - n=as.integer(n), - dup=FALSE, - package="selectiveInference") + a = downdate1_(as.matrix(Q1), R, col, m, n) # Rcpp call Q1 = matrix(a$Q1,nrow=m) R = matrix(a$R,nrow=n) diff --git a/selectiveInference/src/Rcpp-matrixcomps.cpp b/selectiveInference/src/Rcpp-matrixcomps.cpp new file mode 100644 index 00000000..045b4b75 --- /dev/null +++ b/selectiveInference/src/Rcpp-matrixcomps.cpp @@ -0,0 +1,34 @@ +#include // need to include the main Rcpp header file +#include // where update1, downdate1 are defined + +// [[Rcpp::export]] +Rcpp::List update1_(Rcpp::NumericMatrix Q2, + Rcpp::NumericVector w, + int m, + int k) { + + update1(Q2.begin(), + w.begin(), + m, + k); + + return(Rcpp::List::create(Rcpp::Named("Q2") = Q2, + Rcpp::Named("w") = w)); +} + +// [[Rcpp::export]] +Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, + Rcpp::NumericMatrix R, + int j0, + int m, + int n) { + + downdate1(Q1.begin(), + R.begin(), + j0, + m, + n); + + return(Rcpp::List::create(Rcpp::Named("Q1") = Q1, + Rcpp::Named("R") = R)); +} diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 48f9753f..ed14cf5a 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -13,6 +13,7 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ double *theta, /* current value */ int maxiter, /* how many iterations */ int row); /* which coordinate to update: 0-based */ + #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/matrixcomps.c b/selectiveInference/src/matrixcomps.c index 4a516a1e..bec35060 100644 --- a/selectiveInference/src/matrixcomps.c +++ b/selectiveInference/src/matrixcomps.c @@ -53,11 +53,8 @@ void colrot(double *A, int j1, int j2, int m, int n, int i1, int i2, double c, d // where Q1 is m x n and R is n x n. The other part of // the Q matrix, Q2 m x (m-n), isn't needed so it isn't // passed for efficiency -void downdate1(double *Q1, double *R, int *j0p, int *mp, int *np) { - int j0,m,n,j; - j0 = *j0p; - m = *mp; - n = *np; +void downdate1(double *Q1, double *R, int j0, int m, int n) { + int j; double c,s; for (j=j0+1; j=1; j--) { diff --git a/selectiveInference/src/matrixcomps.h b/selectiveInference/src/matrixcomps.h new file mode 100644 index 00000000..8632e3e3 --- /dev/null +++ b/selectiveInference/src/matrixcomps.h @@ -0,0 +1,12 @@ +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +void update1(double *Q2, double *w, int m, int k); + +void downdate1(double *Q1, double *R, int j0, int m, int n); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/selectiveInference/src/symbols.rds b/selectiveInference/src/symbols.rds deleted file mode 100644 index 06b0e85e927ffbb186121204a3be110d68965ede..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 367 zcmV-#0g(P5iwFP!000001HF`8PlGTNhRes4%`92sg}43!BMkib8)hcnHDxQak_6JIPWlgp?6ULIF7c6t)_l2M@%=qRk*KdBZ0#Ng2zV>P#g%js+AFbJLu%b;B1>Aj(q=rLtRA?|{WKk$T84 z()a@`zE^&?C?~o(^2)7UXU|@{XM1 Date: Thu, 17 Aug 2017 12:20:11 -0700 Subject: [PATCH 199/396] using a warm start if available --- selectiveInference/R/funs.fixed.R | 28 ++++++++++++++++------ selectiveInference/src/Rcpp-debias.cpp | 33 ++++++++++---------------- selectiveInference/src/debias.h | 15 ++++++++---- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index d0bc8615..592d61d6 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -293,10 +293,13 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold mu.stop <- 0; try.no <- 1; incr <- 0; + + output = NULL + while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter) - beta <- output$optsol + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start + beta <- output$soln iter <- output$iter if (isgiven==1){ mu.stop <- 1 @@ -335,15 +338,26 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { + + # If soln_result is not Null, it is used as a warm start. + # It should be a list + # with entries "soln" and "Sigma_soln" + + if (is.null(soln_result)) { + soln = rep(0, nrow(Sigma)) + Sigma_soln = rep(0, nrow(Sigma)) + } + else { + soln = soln_result$soln + Sigma_soln = soln_result$Sigma_soln + } - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter) # C function uses 0-based indexing - theta = result$theta - feasible_val = result$feasible_val + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, Sigma_soln) # C function uses 0-based indexing # Check feasibility - if (feasible_val > 1.01 * mu) { + if (!result$kkt_check) { warning("Solution for row of M does not seem to be feasible") } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index bb8b5cd1..b529c4ae 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -5,7 +5,9 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, int row, // 0-based double bound, - int maxiter) { + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector Sigma_theta) { int nrow = Sigma.nrow(); // number of features @@ -25,11 +27,6 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, sigma_p[irow] = Sigma(irow, irow); } - // The solution and its product with Sigma - - Rcpp::NumericVector theta(nrow); - Rcpp::NumericVector Sigma_theta(nrow); - // Now call our C function int iter = find_one_row_((double *) Sigma.begin(), @@ -45,19 +42,15 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Check whether feasible - double feasible_val = 0; - double val; - for (irow=0; irow feasible_val) { - feasible_val = fabs(val); - } - } + int kkt_check = check_KKT(theta.begin(), + Sigma_theta.begin(), + nrow, + row, + bound); + + return(Rcpp::List::create(Rcpp::Named("soln") = theta, + Rcpp::Named("Sigma_soln") = Sigma_theta, + Rcpp::Named("iter") = iter, + Rcpp::Named("kkt_check") = kkt_check)); - return(Rcpp::List::create(Rcpp::Named("optsol") = theta, - Rcpp::Named("feasible_val") = feasible_val, - Rcpp::Named("iter") = iter)); } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index ed14cf5a..958df0e1 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -8,11 +8,18 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_theta, /* Sigma times theta */ int *ever_active, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ + +int check_KKT(double *theta, /* current theta */ + double *Sigma_theta, /* Sigma times theta */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound); /* Lagrange multipler for \ell_1 */ + #ifdef __cplusplus } /* extern "C" */ From a8805b8e0685773ca49f44832cff6a6d1db88906 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 17 Aug 2017 17:24:20 -0700 Subject: [PATCH 200/396] trying to find segfault --- selectiveInference/R/funs.fixed.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 592d61d6..4724f085 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -298,6 +298,10 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta + if (!is.null(output)) { + print(c('at least second try', try.no)) + } + print(output) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter From abf03513c56d897078132d8922b1d03c95ff43db Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 08:14:31 -0700 Subject: [PATCH 201/396] BF: order of arguments to is_active, renamed the function as well --- selectiveInference/src/debias.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index d2f671df..5701b4c1 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,3 +1,4 @@ +#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -41,6 +42,7 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); + fprintf(stderr, "%d %d %d\n", active_row, active_col, nactive); Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); @@ -54,9 +56,11 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ return(value); } -int is_active(int coord, - int *nactive_ptr, - int *ever_active) { +// Check if active and add it to active list if necessary + +int update_ever_active(int coord, + int *ever_active, + int *nactive_ptr) { int iactive; int active_var; int nactive = *nactive_ptr; @@ -65,9 +69,17 @@ int is_active(int coord, for (iactive=0; iactive Date: Fri, 18 Aug 2017 08:24:49 -0700 Subject: [PATCH 202/396] removing debug statements --- selectiveInference/R/funs.fixed.R | 4 --- selectiveInference/src/debias.c | 50 ++++++++++++++++++------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 4724f085..592d61d6 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -298,10 +298,6 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - if (!is.null(output)) { - print(c('at least second try', try.no)) - } - print(output) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 5701b4c1..08c46ff6 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -42,7 +41,6 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); - fprintf(stderr, "%d %d %d\n", active_row, active_col, nactive); Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); @@ -70,11 +68,12 @@ int update_ever_active(int coord, active_var = (*ever_active_ptr); if (active_var == coord) { - fprintf(stderr, "%d %d before\n", *nactive_ptr); + // Add it to the active set and increment the + // number of active variables + ever_active_ptr = ((int *) ever_active + *nactive_ptr); *ever_active_ptr = coord; *nactive_ptr += 1; - fprintf(stderr, "%d %d after\n", *nactive_ptr, coord); return(1); } @@ -126,7 +125,6 @@ int check_KKT(double *theta, /* current theta */ return(fail == 0); } - double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_diag, /* Diagonal entries of Sigma */ double *Sigma_theta, /* Sigma times theta */ @@ -136,7 +134,8 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n double bound, /* feasibility parameter */ double *theta, /* current value */ int row, /* which row: 0-based */ - int coord) /* which coordinate to update: 0-based */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this part of ever_active */ { double delta; @@ -185,7 +184,9 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n // Add to active set if necessary - update_ever_active(coord, ever_active, nactive_ptr); + if (!is_active) { + update_ever_active(coord, ever_active, nactive_ptr); + } // Update the linear term @@ -224,6 +225,8 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ int iter = 0; int icoord = 0; + int iactive = 0; + int *active_ptr; double old_value = objective(Sigma, ever_active, @@ -237,19 +240,24 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ for (iter=0; iter Date: Fri, 18 Aug 2017 08:34:01 -0700 Subject: [PATCH 203/396] not using the C code --- selectiveInference/R/funs.fixed.R | 1 + 1 file changed, 1 insertion(+) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0bbc1190..3c99951a 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -295,6 +295,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta + useC = FALSE if (useC == FALSE) { output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) } else { From de103f7532e3c889efc9bb7391affce454de8c58 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 08:36:20 -0700 Subject: [PATCH 204/396] removing useC --- selectiveInference/R/funs.fixed.R | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 3c99951a..fe54457f 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -159,8 +159,7 @@ sigma=NULL, alpha=0.1, hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - useC = TRUE - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, useC=useC) + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) @@ -270,7 +269,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, useC = FALSE) { +InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { isgiven <- 1; if (is.null(mu)){ @@ -295,12 +294,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - useC = FALSE - if (useC == FALSE) { - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) - } else { - output <- InverseLinftyOneRowC(sigma, i, mu, maxiter=maxiter) - } + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) beta <- output$optsol iter <- output$iter if (isgiven==1){ From d4875262900a90970daf16fe6e96368c6b1a50b0 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 08:43:12 -0700 Subject: [PATCH 205/396] BF: percent signs in example --- selectiveInference/man/fixedLassoInf.Rd | 35 +++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index d675c3fe..36335249 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -145,7 +145,7 @@ p = 10 sigma = 1 x = matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE) +x = scale(x,TRUE,TRUE) beta = c(3,2,rep(0,p-2)) y = x\%*\%beta + sigma*rnorm(n) @@ -165,10 +165,10 @@ out ## as above, but use lar function instead to get initial ## lasso fit (should get same results) - lfit = lar(x,y,normalize=FALSE) - beta = coef(lfit,s=lambda,mode="lambda") - out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) - out2 +lfit = lar(x,y,normalize=FALSE) +beta = coef(lfit,s=lambda,mode="lambda") +out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out2 ## mimic different penalty factors by first scaling x set.seed(43) @@ -249,5 +249,30 @@ status=sample(c(0,1),size=n,replace=TRUE) # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox") out + +# Debiased lasso or "full" + +n = 50 +p = 100 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x = scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = 2.8 +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) +out + } \ No newline at end of file From 7360386212cb047287211545c3eca4f8e8b477f4 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 09:02:24 -0700 Subject: [PATCH 206/396] smaller changes in mu per step in line search --- selectiveInference/R/funs.fixed.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 592d61d6..db5b9150 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -270,8 +270,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { - # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { +InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { isgiven <- 1; if (is.null(mu)){ isgiven <- 0; From a9a8bbbd4208086426a29ea3cd7dbaaacb807426 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 09:40:54 -0700 Subject: [PATCH 207/396] renaming Sigma_theta to gradient, making sure we loop through active set --- selectiveInference/src/debias.c | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 08c46ff6..5b1592e2 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -65,6 +65,7 @@ int update_ever_active(int coord, int *ever_active_ptr = ever_active; for (iactive=0; iactive Date: Fri, 18 Aug 2017 09:50:51 -0700 Subject: [PATCH 208/396] more renaming --- selectiveInference/src/debias.c | 36 ++++++++++++++++----------------- selectiveInference/src/debias.h | 14 ++++++------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 5b1592e2..e043934b 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -11,7 +11,7 @@ // Update one coordinate double objective(double *Sigma, /* A covariance matrix: X^TX/n */ - int *ever_active, /* Ever active set: 0-based */ + int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ int row, /* which row: 0-based */ @@ -31,13 +31,13 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ for (irow=0; irow Date: Fri, 18 Aug 2017 10:11:38 -0700 Subject: [PATCH 209/396] BF: forgot to rename in Rcpp file, passing active and ever_active from R --- selectiveInference/R/funs.fixed.R | 14 ++++++++++---- selectiveInference/src/Rcpp-debias.cpp | 23 ++++++++++++----------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index db5b9150..460cd0b3 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -341,18 +341,24 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { # If soln_result is not Null, it is used as a warm start. # It should be a list - # with entries "soln" and "Sigma_soln" + # with entries "soln", "gradient", "ever_active", "nactive" if (is.null(soln_result)) { soln = rep(0, nrow(Sigma)) - Sigma_soln = rep(0, nrow(Sigma)) + gradient = rep(0, nrow(Sigma)) + ever_active = rep(0, nrow(Sigma)) + ever_active[1] = i-1 # 0-based + ever_active = as.integer(ever_active) + nactive = as.integer(1) } else { soln = soln_result$soln - Sigma_soln = soln_result$Sigma_soln + gradient = soln_result$gradient + ever_active = as.integer(soln_result$ever_active) + nactive = as.integer(soln_result$nactive) } - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, Sigma_soln) # C function uses 0-based indexing + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index b529c4ae..b6c80d2d 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -7,31 +7,30 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, - Rcpp::NumericVector Sigma_theta) { + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive + ) { int nrow = Sigma.nrow(); // number of features // Active set int irow; - Rcpp::IntegerVector nactive(1); // An array so we can easily modify it - Rcpp::IntegerVector ever_active(1); - int *ever_active_p = ever_active.begin(); - *ever_active_p = row; // Extract the diagonal Rcpp::NumericVector Sigma_diag(nrow); - double *sigma_p = Sigma_diag.begin(); + double *sigma_diag_p = Sigma_diag.begin(); for (irow=0; irow Date: Fri, 18 Aug 2017 10:46:39 -0700 Subject: [PATCH 210/396] more renaming --- selectiveInference/src/debias.c | 44 +++++++++++++++++++-------------- selectiveInference/src/debias.h | 18 +++++++------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index e043934b..1b841e0a 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -10,7 +10,7 @@ // Therefore we don't have to negate the answer to get theta. // Update one coordinate -double objective(double *Sigma, /* A covariance matrix: X^TX/n */ +double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ @@ -20,7 +20,7 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ { int irow, icol; double value = 0; - double *Sigma_ptr = Sigma; + double *Sigma_ptr_tmp = Sigma_ptr; double *theta_row_ptr, *theta_col_ptr; int *active_row_ptr, *active_col_ptr; int active_row, active_col; @@ -41,9 +41,9 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); - Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order + Sigma_ptr_tmp = ((double *) Sigma_ptr + nrow * active_col + active_row); // Matrices are column-major order - value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); + value += 0.5 * (*Sigma_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr); } value = value + bound * fabs((*theta_row_ptr)); // the \ell_1 term } @@ -126,8 +126,8 @@ int check_KKT(double *theta, /* current theta */ return(fail == 0); } -double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ - double *Sigma_diag, /* Diagonal entries of Sigma */ +double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ double *gradient_ptr, /* Sigma times theta */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ @@ -143,12 +143,12 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n double linear_term = 0; double value = 0; double old_value; - double *Sigma_ptr; + double *Sigma_ptr_tmp; double *gradient_ptr_tmp; double *theta_ptr; int icol = 0; - double *quadratic_ptr = ((double *) Sigma_diag + coord); + double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); double quadratic_term = *quadratic_ptr; // int *ever_active_ptr_tmp; @@ -194,13 +194,13 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; - Sigma_ptr = ((double *) Sigma + coord * nrow); + Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); gradient_ptr_tmp = ((double *) gradient_ptr); for (icol=0; icol Date: Fri, 18 Aug 2017 11:28:31 -0700 Subject: [PATCH 211/396] trying to get arbitrary linfunc --- selectiveInference/R/funs.fixed.R | 12 +++- selectiveInference/src/Rcpp-debias.cpp | 2 + selectiveInference/src/debias.c | 87 +++++++++++++++----------- selectiveInference/src/debias.h | 3 +- 4 files changed, 65 insertions(+), 39 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 460cd0b3..0b6c0181 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -297,6 +297,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta + #print(c("trying ", try.no)) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter @@ -358,7 +359,16 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { nactive = as.integer(soln_result$nactive) } - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive) # C function uses 0-based indexing + linear_func = rep(0, p) + linear_func[i] = -1 + linear_func = as.numeric(linear_func) + + #print(c(soln, "soln")) + #print(c(gradient, "grad")) + #print(c(ever_active, "ever_active")) + #print(c(nactive, "nactive")) + #print(c(linear_func, "linear_func")) + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index b6c80d2d..83dd09c8 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -7,6 +7,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive @@ -29,6 +30,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Now call our C function int iter = find_one_row_((double *) Sigma.begin(), + (double *) linear_func.begin(), (double *) Sigma_diag.begin(), (double *) gradient.begin(), (int *) ever_active.begin(), diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 1b841e0a..0b34436b 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,3 +1,4 @@ +#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -11,16 +12,18 @@ // Update one coordinate double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ - double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ { int irow, icol; double value = 0; double *Sigma_ptr_tmp = Sigma_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; double *theta_row_ptr, *theta_col_ptr; int *active_row_ptr, *active_col_ptr; int active_row, active_col; @@ -45,12 +48,15 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ value += 0.5 * (*Sigma_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr); } - value = value + bound * fabs((*theta_row_ptr)); // the \ell_1 term - } + value += bound * fabs((*theta_row_ptr)); // the \ell_1 term + + // The linear term in the objective - theta_row_ptr = ((double *) theta + row); - value -= (*theta_row_ptr); // the elementary basis vector term + value += (*linear_func_ptr_tmp) * (*theta_row_ptr); + linear_func_ptr_tmp++; + } + return(value); } @@ -66,19 +72,22 @@ int update_ever_active(int coord, for (iactive=0; iactive Date: Fri, 18 Aug 2017 12:09:13 -0700 Subject: [PATCH 212/396] can solve arbitrary LASSO now --- selectiveInference/R/funs.fixed.R | 11 +++++---- selectiveInference/src/Rcpp-debias.cpp | 1 + selectiveInference/src/debias.c | 32 +++++++------------------- selectiveInference/src/debias.h | 6 ++--- 4 files changed, 18 insertions(+), 32 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0b6c0181..06c7a7d1 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -346,28 +346,29 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { if (is.null(soln_result)) { soln = rep(0, nrow(Sigma)) - gradient = rep(0, nrow(Sigma)) ever_active = rep(0, nrow(Sigma)) ever_active[1] = i-1 # 0-based ever_active = as.integer(ever_active) nactive = as.integer(1) + linear_func = rep(0, p) + linear_func[i] = -1 + linear_func = as.numeric(linear_func) + gradient = 1. * linear_func } else { soln = soln_result$soln gradient = soln_result$gradient ever_active = as.integer(soln_result$ever_active) nactive = as.integer(soln_result$nactive) + linear_func = soln_result$linear_func } - linear_func = rep(0, p) - linear_func[i] = -1 - linear_func = as.numeric(linear_func) - #print(c(soln, "soln")) #print(c(gradient, "grad")) #print(c(ever_active, "ever_active")) #print(c(nactive, "nactive")) #print(c(linear_func, "linear_func")) + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 83dd09c8..be02abd8 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -51,6 +51,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, + Rcpp::Named("linear_func") = linear_func, Rcpp::Named("iter") = iter, Rcpp::Named("kkt_check") = kkt_check, Rcpp::Named("ever_active") = ever_active, diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 0b34436b..b0669f8e 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -5,7 +5,8 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - e_i^T\theta + \mu \|\theta\|_1 +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf // Therefore we don't have to negate the answer to get theta. @@ -16,7 +17,6 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ double bound, /* Lagrange multipler for \ell_1 */ double *theta) /* current value */ { @@ -52,8 +52,8 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ // The linear term in the objective + linear_func_ptr_tmp = ((double *) linear_func_ptr + active_row); value += (*linear_func_ptr_tmp) * (*theta_row_ptr); - linear_func_ptr_tmp++; } @@ -74,7 +74,6 @@ int update_ever_active(int coord, ever_active_ptr_tmp = ((int *) ever_active_ptr + iactive); active_var = *ever_active_ptr_tmp; if (active_var == coord) { - // fprintf(stderr, "blah %d %d %d\n", coord, active_var, *nactive_ptr); return(1); } } @@ -92,11 +91,10 @@ int update_ever_active(int coord, return(0); } -int check_KKT(double *theta, /* current theta */ +int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Sigma times theta */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ - double bound) /* Lagrange multipler for \ell_1 */ + int nrow, /* how many rows in Sigma */ + double bound) /* Lagrange multipler for \ell_1 */ { // First check inactive @@ -113,9 +111,6 @@ int check_KKT(double *theta, /* current theta */ // Compute this coordinate of the gradient gradient = *gradient_ptr_tmp; - if (row == irow) { - gradient -= 1; - } if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound if ((*theta_ptr > 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { @@ -144,7 +139,6 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int row, /* which row: 0-based */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this part of ever_active */ { @@ -161,8 +155,6 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); double quadratic_term = *quadratic_ptr; - // int *ever_active_ptr_tmp; - gradient_ptr_tmp = ((double *) gradient_ptr + coord); linear_term = *gradient_ptr_tmp; @@ -172,9 +164,8 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T // The coord entry of gradient_ptr term has a diagonal term in it: // Sigma[coord, coord] * theta[coord] // This removes it. - linear_term -= quadratic_term * old_value; - linear_term += *((double *) linear_func_ptr + coord); + linear_term -= quadratic_term * old_value; // Now soft-threshold the coord entry of theta @@ -229,8 +220,7 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row) /* which coordinate to update: 0-based */ + int maxiter) { int iter = 0; @@ -243,7 +233,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ ever_active_ptr, nactive_ptr, nrow, - row, bound, theta); double new_value; @@ -265,7 +254,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ nrow, bound, theta, - row, *active_ptr, 1); active_ptr++; @@ -276,7 +264,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT(theta, gradient_ptr, nrow, - row, bound) == 1) { break; } @@ -294,7 +281,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ nrow, bound, theta, - row, icoord, 0); } @@ -304,7 +290,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT(theta, gradient_ptr, nrow, - row, bound) == 1) { break; } @@ -314,7 +299,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ ever_active_ptr, nactive_ptr, nrow, - row, bound, theta); diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index c20195c4..bb9f6d67 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -12,13 +12,13 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ + int maxiter) //, /* how many iterations */ +// int row); /* which coordinate to update: 0-based */ int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ + // int row, /* which row: 0-based */ double bound); /* Lagrange multipler for \ell_1 */ From d248a8f88c2e4e7e12a94369ae02edc95838a3f8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:17:44 -0700 Subject: [PATCH 213/396] BF: updating active set --- selectiveInference/src/debias.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 1b841e0a..107ae5d4 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -69,17 +69,20 @@ int update_ever_active(int coord, active_var = (*ever_active_ptr_tmp); if (active_var == coord) { - // Add it to the active set and increment the - // number of active variables - - ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr); - *ever_active_ptr_tmp = coord; - *nactive_ptr += 1; - return(1); } } + // If we have not returned yet, this variable + // was not in ever_active + + // Add it to the active set and increment the + // number of active variables + + ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr); + *ever_active_ptr_tmp = coord; + *nactive_ptr += 1; + return(0); } From aa515c972c8e1f918d6572338eae25efc28af16f Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:24:22 -0700 Subject: [PATCH 214/396] cosmetic change --- selectiveInference/src/debias.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 107ae5d4..532dbef6 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -68,7 +68,6 @@ int update_ever_active(int coord, ever_active_ptr_tmp = ((int *) ever_active_ptr + iactive); active_var = (*ever_active_ptr_tmp); if (active_var == coord) { - return(1); } } @@ -188,7 +187,7 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T // Add to active set if necessary - if (!is_active) { + if (is_active == 0) { update_ever_active(coord, ever_active_ptr, nactive_ptr); } From 5d5e8f3c4a0d1c0ed70097d3480f778a208c36f9 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:30:59 -0700 Subject: [PATCH 215/396] BF: fixing function signatures --- selectiveInference/src/debias.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index bb9f6d67..730b1fbc 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -12,13 +12,11 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter) //, /* how many iterations */ -// int row); /* which coordinate to update: 0-based */ + int maxiter); int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ - // int row, /* which row: 0-based */ double bound); /* Lagrange multipler for \ell_1 */ From d1debcd5bcd7932fdff213b882d83a8b47c1a875 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:31:30 -0700 Subject: [PATCH 216/396] BF: fixing function signatures --- selectiveInference/src/Rcpp-debias.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index be02abd8..db6e77e0 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -38,15 +38,13 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, nrow, bound, (double *) theta.begin(), - maxiter, - row); + maxiter); // Check whether feasible int kkt_check = check_KKT(theta.begin(), gradient.begin(), nrow, - row, bound); return(Rcpp::List::create(Rcpp::Named("soln") = theta, From 15762af11f53be618e21c83af9db194db8f9a537 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:12:02 -0700 Subject: [PATCH 217/396] BF: KKT wrong for active conditions, still have debug statements --- selectiveInference/R/funs.fixed.R | 8 +--- selectiveInference/src/Rcpp-debias.cpp | 1 - selectiveInference/src/debias.c | 52 ++++++++++++++++++-------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 06c7a7d1..e6e0c5f9 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -363,13 +363,7 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { linear_func = soln_result$linear_func } - #print(c(soln, "soln")) - #print(c(gradient, "grad")) - #print(c(ever_active, "ever_active")) - #print(c(nactive, "nactive")) - #print(c(linear_func, "linear_func")) - - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing + result = find_one_row_debiasingM(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index db6e77e0..1095c290 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -3,7 +3,6 @@ // [[Rcpp::export]] Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, - int row, // 0-based double bound, int maxiter, Rcpp::NumericVector theta, diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index b0669f8e..6af42c43 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -100,34 +100,52 @@ int check_KKT(double *theta, /* current theta */ int irow; int fail = 0; - double tol = 1.e-4; - double *theta_ptr, *gradient_ptr_tmp; + double tol = 1.e-5; + double *theta_ptr = theta; + double *gradient_ptr_tmp = gradient_ptr; double gradient; for (irow=0; irow 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { - fail += 1; + fprintf(stderr, "how does it look %d %f %f\n", irow, *theta_ptr, gradient); + + // Compute this coordinate of the gradient + + if (fabs(*theta_ptr) > tol) { // these coordinates of gradients should be equal to \pm bound + fprintf(stderr, "active %f %f\n", fabs(fabs(gradient) - bound), bound); + if (fabs(fabs(gradient) - bound) > tol * bound) { + fprintf(stderr, "here1 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; } - else if ((*theta_ptr < 0) && (fabs(gradient - bound) > (1. + tol) * bound)) { - fail += 1; + else if ((*theta_ptr > 0) && (gradient > 0)) { + fprintf(stderr, "here2 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; + } + else if ((*theta_ptr < 0) && (gradient < 0)) { + fprintf(stderr, "here3 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; } } else { + fprintf(stderr, "before4 %d %f %f\n", irow, *theta_ptr, gradient); if (fabs(gradient) > (1. + tol) * bound) { - fail += 1; + fprintf(stderr, "here4 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; } } + theta_ptr++; + gradient_ptr_tmp++; } + fprintf(stderr, "OK now\n"); return(fail == 0); + } double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ @@ -236,7 +254,7 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); double new_value; - double tol=1.e-5; + double tol=1.e-8; for (iter=0; iter 0)) { - break; - } +/* if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { */ +/* break; */ +/* } */ old_value = new_value; } From 9872b45ae05b79358c17effeaf7b7b6cca395fe7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:19:21 -0700 Subject: [PATCH 218/396] removing debug statements --- selectiveInference/src/debias.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 6af42c43..c9809e3e 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -109,41 +108,28 @@ int check_KKT(double *theta, /* current theta */ gradient = *gradient_ptr_tmp; - fprintf(stderr, "how does it look %d %f %f\n", irow, *theta_ptr, gradient); - // Compute this coordinate of the gradient if (fabs(*theta_ptr) > tol) { // these coordinates of gradients should be equal to \pm bound - fprintf(stderr, "active %f %f\n", fabs(fabs(gradient) - bound), bound); if (fabs(fabs(gradient) - bound) > tol * bound) { - fprintf(stderr, "here1 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } else if ((*theta_ptr > 0) && (gradient > 0)) { - fprintf(stderr, "here2 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } else if ((*theta_ptr < 0) && (gradient < 0)) { - fprintf(stderr, "here3 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } } else { - fprintf(stderr, "before4 %d %f %f\n", irow, *theta_ptr, gradient); if (fabs(gradient) > (1. + tol) * bound) { - fprintf(stderr, "here4 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } } theta_ptr++; gradient_ptr_tmp++; } - fprintf(stderr, "OK now\n"); return(fail == 0); } @@ -283,7 +269,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, bound) == 1) { - fprintf(stderr, "here5 \n"); break; } @@ -310,7 +295,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, bound) == 1) { - fprintf(stderr, "here6 \n"); break; } @@ -322,9 +306,9 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); -/* if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { */ -/* break; */ -/* } */ + if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { + break; + } old_value = new_value; } From be84df2690dc8a62f543693d80ff44dd2a751418 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:23:09 -0700 Subject: [PATCH 219/396] BF: KKT condition was wrong --- selectiveInference/src/debias.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 532dbef6..0d71e493 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -95,7 +95,7 @@ int check_KKT(double *theta, /* current theta */ int irow; int fail = 0; - double tol = 1.e-4; + double tol = 1.e-6; double *theta_ptr, *gradient_ptr_tmp; double gradient; @@ -111,10 +111,10 @@ int check_KKT(double *theta, /* current theta */ } if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound - if ((*theta_ptr > 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { + if ((*theta_ptr > 0) && (fabs(gradient + bound) > tol * bound)) { fail += 1; } - else if ((*theta_ptr < 0) && (fabs(gradient - bound) > (1. + tol) * bound)) { + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { fail += 1; } } From 0de59af0c8f90410ae621d981b5458e3b38e1b99 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:28:35 -0700 Subject: [PATCH 220/396] lenient tolerance --- selectiveInference/src/debias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index c9809e3e..52daea94 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -99,7 +99,7 @@ int check_KKT(double *theta, /* current theta */ int irow; int fail = 0; - double tol = 1.e-5; + double tol = 1.e-4; double *theta_ptr = theta; double *gradient_ptr_tmp = gradient_ptr; double gradient; From 190c0a0c1a491a265b203c258247b288e9bbaff7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:35:18 -0700 Subject: [PATCH 221/396] added a few targets to makefile --- Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5c55bb82..91a89a4d 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,14 @@ Rcpp: - rm -f selectiveInference/src/RcppExports.cpp - rm -f selectiveInference/R/RcppExports.R - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" \ No newline at end of file + Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" + +install: Rcpp + R CMD install selectiveInference + +build: + R CMD build selectiveInference + +check: Rcpp build + R CMD build selectiveInference + R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable \ No newline at end of file From 2c4cf3a19487cdcfcc7ad9912894c3238d35130b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:43:41 -0700 Subject: [PATCH 222/396] not checking objective --- selectiveInference/src/debias.c | 71 +++++++++++++++------------------ 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 52daea94..92e2fb44 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -90,34 +90,32 @@ int update_ever_active(int coord, return(0); } -int check_KKT(double *theta, /* current theta */ +int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Sigma times theta */ - int nrow, /* how many rows in Sigma */ - double bound) /* Lagrange multipler for \ell_1 */ + int nrow, /* how many rows in Sigma */ + double bound) /* Lagrange multipler for \ell_1 */ { // First check inactive int irow; int fail = 0; - double tol = 1.e-4; - double *theta_ptr = theta; - double *gradient_ptr_tmp = gradient_ptr; + double tol = 1.e-6; + double *theta_ptr, *gradient_ptr_tmp; double gradient; for (irow=0; irow tol) { // these coordinates of gradients should be equal to \pm bound - if (fabs(fabs(gradient) - bound) > tol * bound) { - return(0); - } - else if ((*theta_ptr > 0) && (gradient > 0)) { + gradient = *gradient_ptr_tmp; + + if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound + if ((*theta_ptr > 0) && (fabs(gradient + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr < 0) && (gradient < 0)) { + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { return(0); } } @@ -126,12 +124,9 @@ int check_KKT(double *theta, /* current theta */ return(0); } } - theta_ptr++; - gradient_ptr_tmp++; } - return(fail == 0); - + return(1); } double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ @@ -232,13 +227,13 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int iactive = 0; int *active_ptr; - double old_value = objective(Sigma_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - bound, - theta); +/* double old_value = objective(Sigma_ptr, */ +/* linear_func_ptr, */ +/* ever_active_ptr, */ +/* nactive_ptr, */ +/* nrow, */ +/* bound, */ +/* theta); */ double new_value; double tol=1.e-8; @@ -298,19 +293,19 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ break; } - new_value = objective(Sigma_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - bound, - theta); - - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { - break; - } - - old_value = new_value; +/* new_value = objective(Sigma_ptr, */ +/* linear_func_ptr, */ +/* ever_active_ptr, */ +/* nactive_ptr, */ +/* nrow, */ +/* bound, */ +/* theta); */ + +/* if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { */ +/* break; */ +/* } */ + +// old_value = new_value; } return(iter); } From 9ebcaabfce17a0cb7e4d48506b0695678ec495d8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:00:52 -0700 Subject: [PATCH 223/396] making Rcpp in Makevars now --- selectiveInference/src/Makevars | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars index c68d0bd5..cde36c98 100644 --- a/selectiveInference/src/Makevars +++ b/selectiveInference/src/Makevars @@ -2,7 +2,10 @@ PKG_CFLAGS= -I. PKG_CPPFLAGS= -I. PKG_LIBS=-L. -$(SHLIB): Rcpp-debias.o RcppExports.o debias.o +$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o debias.o clean: rm -f *o + +Rcpp: + Rscript -e "library(Rcpp); Rcpp::compileAttributes('..')" \ No newline at end of file From 735c4d6742b8782c7f1e8af328c0ab3bf3fca2e5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:01:44 -0700 Subject: [PATCH 224/396] trying without making Rcpp --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 21270e09..1120a443 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,5 +12,5 @@ warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' - - make Rcpp + #- make Rcpp - cd selectiveInference From 0865d09eebbbf1e2f8533f1badf09bb0ba04acd6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:26:02 -0700 Subject: [PATCH 225/396] making new quadratic_program C file --- selectiveInference/src/Rcpp-debias.cpp | 20 +- selectiveInference/src/debias.h | 21 +- selectiveInference/src/quadratic_program.c | 319 +++++++++++++++++++++ 3 files changed, 339 insertions(+), 21 deletions(-) create mode 100644 selectiveInference/src/quadratic_program.c diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 1095c290..c1b5e91c 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -28,16 +28,16 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Now call our C function - int iter = find_one_row_((double *) Sigma.begin(), - (double *) linear_func.begin(), - (double *) Sigma_diag.begin(), - (double *) gradient.begin(), - (int *) ever_active.begin(), - (int *) nactive.begin(), - nrow, - bound, - (double *) theta.begin(), - maxiter); + int iter = solve_qp((double *) Sigma.begin(), + (double *) linear_func.begin(), + (double *) Sigma_diag.begin(), + (double *) gradient.begin(), + (int *) ever_active.begin(), + (int *) nactive.begin(), + nrow, + bound, + (double *) theta.begin(), + maxiter); // Check whether feasible diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 730b1fbc..1284659e 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -3,23 +3,22 @@ extern "C" { #endif /* __cplusplus */ -int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *linear_func_ptr, /* Linear term in objective */ - double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ - double *gradient_ptr, /* Current gradient of quadratic loss */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter); +int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter); int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ double bound); /* Lagrange multipler for \ell_1 */ - #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c new file mode 100644 index 00000000..cf4d650f --- /dev/null +++ b/selectiveInference/src/quadratic_program.c @@ -0,0 +1,319 @@ +#include // for fabs + +// Find an approximate row of \hat{Sigma}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ +{ + int irow, icol; + double value = 0; + double *Sigma_ptr_tmp = Sigma_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; + + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ + double *gradient_ptr, /* Sigma times theta */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this part of ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *Sigma_ptr_tmp; + double *gradient_ptr_tmp; + double *theta_ptr; + int icol = 0; + + double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); + double quadratic_term = *quadratic_ptr; + + gradient_ptr_tmp = ((double *) gradient_ptr + coord); + linear_term = *gradient_ptr_tmp; + + theta_ptr = ((double *) theta + coord); + old_value = *theta_ptr; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // Sigma[coord, coord] * theta[coord] + // This removes it. + + linear_term -= quadratic_term * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = -(linear_term - bound) / quadratic_term; + } + + // Add to active set if necessary + + if (is_active == 0) { + update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + delta = value - old_value; + Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); + gradient_ptr_tmp = ((double *) gradient_ptr); + + for (icol=0; icol 0)) { + break; + } + old_value = new_value; + } + } + return(iter); +} + From bfad5057a3b91d80ae3477e74f66e59f24e8db3d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:27:05 -0700 Subject: [PATCH 226/396] need to make Rcpp for travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1120a443..21270e09 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,5 +12,5 @@ warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' - #- make Rcpp + - make Rcpp - cd selectiveInference From 1237c78d74ca87edec5b9e6496288618eca6d7d5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 22:29:06 -0700 Subject: [PATCH 227/396] both solvers working --- selectiveInference/R/funs.fixed.R | 11 ++-- selectiveInference/src/Rcpp-debias.cpp | 75 +++++++++++++++++++--- selectiveInference/src/debias.c | 62 ++++++++++-------- selectiveInference/src/debias.h | 18 ++++++ selectiveInference/src/quadratic_program.c | 9 +-- 5 files changed, 123 insertions(+), 52 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index df87b5e7..de609e79 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -297,7 +297,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - print(c("#######################trying ", try.no)) + #print(c("#######################trying ", try.no)) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter @@ -365,11 +365,10 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { linear_func = soln_result$linear_func } - result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing - result2 = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive) # C function uses 0-based indexing - - print('close?') - print(c(sqrt(sum((result$soln-result2$soln)^2)/sum(result$soln^2)), sqrt(sum(result$soln^2)), result2$nactive)) + result = find_one_row_debiasingM(Sigma, i, mu, maxiter, 0 * soln, gradient, ever_active, nactive) # C function uses 0-based indexing + #result1 = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) + #print("close?") + #print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index c1b5e91c..ce338549 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -2,15 +2,15 @@ #include // where find_one_row_void is defined // [[Rcpp::export]] -Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, - double bound, - int maxiter, - Rcpp::NumericVector theta, - Rcpp::NumericVector linear_func, - Rcpp::NumericVector gradient, - Rcpp::IntegerVector ever_active, - Rcpp::IntegerVector nactive - ) { +Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, + double bound, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive + ) { int nrow = Sigma.nrow(); // number of features @@ -41,14 +41,69 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Check whether feasible + int kkt_check = check_KKT_qp(theta.begin(), + gradient.begin(), + nrow, + bound); + + return(Rcpp::List::create(Rcpp::Named("soln") = theta, + Rcpp::Named("gradient") = gradient, + Rcpp::Named("linear_func") = linear_func, + Rcpp::Named("iter") = iter, + Rcpp::Named("kkt_check") = kkt_check, + Rcpp::Named("ever_active") = ever_active, + Rcpp::Named("nactive") = nactive)); + +} + +// [[Rcpp::export]] +Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, + int row, // 0-based + double bound, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive + ) { + + int nrow = Sigma.nrow(); // number of features + + // Active set + + int irow; + + // Extract the diagonal + Rcpp::NumericVector Sigma_diag(nrow); + double *sigma_diag_p = Sigma_diag.begin(); + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { - fail += 1; + return(0); } else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { - fail += 1; + return(0); } } else { if (fabs(gradient) > (1. + tol) * bound) { - fail += 1; + return(0); } } } - return(fail == 0); + return(1); } double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ @@ -230,16 +229,21 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int icoord = 0; int iactive = 0; int *active_ptr; + double old_value, new_value, tol=1.e-5; + + int check_objective = 1; + + if (check_objective) { + + old_value = objective(Sigma_ptr, + ever_active_ptr, + nactive_ptr, + nrow, + row, + bound, + theta); + } - double old_value = objective(Sigma_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - row, - bound, - theta); - double new_value; - double tol=1.e-5; for (iter=0; iter 0)) { + break; + } - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { - break; + old_value = new_value; } - - old_value = new_value; } return(iter); } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 1284659e..5e9d621d 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -14,11 +14,29 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *theta, /* current value */ int maxiter); +int check_KKT_qp(double *theta, /* current theta */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int nrow, /* how many rows in Sigma */ + double bound); /* Lagrange multipler for \ell_1 */ + +int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *Sigma_ptr_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ + int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ double bound); /* Lagrange multipler for \ell_1 */ + #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index c9233982..50d65148 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -1,5 +1,4 @@ #include // for fabs -#include // Find an approximate row of \hat{Sigma}^{-1} @@ -82,8 +81,6 @@ int update_ever_active_qp(int coord, // Add it to the active set and increment the // number of active variables - fprintf(stderr, "adding %d\n", coord); - ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr); *ever_active_ptr_tmp = coord; *nactive_ptr += 1; @@ -99,7 +96,6 @@ int check_KKT_qp(double *theta, /* current theta */ // First check inactive int irow; - int fail = 0; double tol = 1.e-6; double *theta_ptr, *gradient_ptr_tmp; double gradient; @@ -228,13 +224,11 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int iactive = 0; int *active_ptr; - int check_objective = 0; + int check_objective = 1; double old_value, new_value; double tol=1.e-8; - fprintf(stderr, "%d nactive start\n", *nactive_ptr); - if (check_objective) { old_value = objective_qp(Sigma_ptr, @@ -250,7 +244,6 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ for (iter=0; iter Date: Sun, 20 Aug 2017 15:16:24 -0700 Subject: [PATCH 228/396] passing tol as argument; both solvers agree --- selectiveInference/R/funs.fixed.R | 18 ++++-- selectiveInference/src/Rcpp-debias.cpp | 22 ++++++-- selectiveInference/src/debias.c | 64 +++++++++++++--------- selectiveInference/src/debias.h | 18 ++++-- selectiveInference/src/quadratic_program.c | 21 ++++--- 5 files changed, 90 insertions(+), 53 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index de609e79..c8f53526 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -338,7 +338,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6) { # If soln_result is not Null, it is used as a warm start. # It should be a list @@ -365,10 +365,18 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { linear_func = soln_result$linear_func } - result = find_one_row_debiasingM(Sigma, i, mu, maxiter, 0 * soln, gradient, ever_active, nactive) # C function uses 0-based indexing - #result1 = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) - #print("close?") - #print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) + soln1 = rep(0, p) + gradient1 = rep(0, p) + ever_active1 = rep(0, p) + ever_active1[1] = i-1 + nactive1 = as.integer(1) + result1 = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln1, gradient1, ever_active1, nactive1, kkt_tol, objective_tol) # C function uses 0-based indexing + result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) + print("close?") + print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) + print(c(result1$iter, result$iter, sum(result1$soln^2))) + + #result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index ce338549..a78ae81a 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -9,7 +9,9 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, - Rcpp::IntegerVector nactive + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol ) { int nrow = Sigma.nrow(); // number of features @@ -37,14 +39,17 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, nrow, bound, (double *) theta.begin(), - maxiter); + maxiter, + kkt_tol, + objective_tol); // Check whether feasible int kkt_check = check_KKT_qp(theta.begin(), gradient.begin(), nrow, - bound); + bound, + kkt_tol); return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, @@ -64,7 +69,9 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, Rcpp::NumericVector theta, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, - Rcpp::IntegerVector nactive + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol ) { int nrow = Sigma.nrow(); // number of features @@ -92,7 +99,9 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, bound, (double *) theta.begin(), maxiter, - row); + row, + kkt_tol, + objective_tol); // Check whether feasible @@ -100,7 +109,8 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, gradient.begin(), nrow, row, - bound); + bound, + kkt_tol); return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index e083ccca..4d360565 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,3 +1,4 @@ +#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -67,6 +68,7 @@ int update_ever_active(int coord, for (iactive=0; iactive 0) && (fabs(gradient + bound) > tol * bound)) { @@ -122,6 +127,7 @@ int check_KKT(double *theta, /* current theta */ return(0); } } + } return(1); @@ -129,15 +135,15 @@ int check_KKT(double *theta, /* current theta */ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ - double *gradient_ptr, /* Sigma times theta */ + double *gradient_ptr, /* Sigma times theta */ int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int row, /* which row: 0-based */ - int coord, /* which coordinate to update: 0-based */ - int is_active) /* Is this part of ever_active */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int row, /* which row: 0-based */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this part of ever_active */ { double delta; @@ -215,21 +221,23 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ - double *gradient_ptr, /* Sigma times theta */ + double *gradient_ptr, /* Sigma times theta */ int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row) /* which coordinate to update: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row, /* which coordinate to update: 0-based */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol) /* precision for checking relative decrease in objective value */ { int iter = 0; int icoord = 0; int iactive = 0; int *active_ptr; - double old_value, new_value, tol=1.e-5; + double old_value, new_value; int check_objective = 1; @@ -272,7 +280,8 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, row, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -299,7 +308,8 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, row, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -312,7 +322,7 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { + if (((old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { break; } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 5e9d621d..5510adf8 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -12,15 +12,19 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter); + int maxiter, /* how many iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol); /* precision for checking relative decrease in objective value */ + int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ - double bound); /* Lagrange multipler for \ell_1 */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol); /* precision for checking KKT conditions */ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *Sigma_ptr_ptr, /* Diagonal entry of covariance matrix */ + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ double *gradient_ptr, /* Current gradient of quadratic loss */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ @@ -28,14 +32,16 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double bound, /* feasibility parameter */ double *theta, /* current value */ int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ + int row, /* which coordinate to update: 0-based */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol); /* precision for checking relative decrease in objective value */ int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ int row, /* which row: 0-based */ - double bound); /* Lagrange multipler for \ell_1 */ - + double bound, /* Lagrange multipler for \ell_1 */ + double kkt_tol); /* precision for checking KKT conditions */ #ifdef __cplusplus } /* extern "C" */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index 50d65148..0cefac40 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -88,15 +88,15 @@ int update_ever_active_qp(int coord, return(0); } -int check_KKT_qp(double *theta, /* current theta */ +int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* Sigma times theta */ - int nrow, /* how many rows in Sigma */ - double bound) /* Lagrange multipler for \ell_1 */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol) /* precision for checking KKT conditions */ { // First check inactive int irow; - double tol = 1.e-6; double *theta_ptr, *gradient_ptr_tmp; double gradient; @@ -216,7 +216,9 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter) + int maxiter, /* max number of iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol) /* precision for checking relative decrease in objective value */ { int iter = 0; @@ -227,7 +229,6 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int check_objective = 1; double old_value, new_value; - double tol=1.e-8; if (check_objective) { @@ -268,7 +269,8 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT_qp(theta, gradient_ptr, nrow, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -294,7 +296,8 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT_qp(theta, gradient_ptr, nrow, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -307,7 +310,7 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { break; } old_value = new_value; From 8c956f84949acc42e65af548bd1c6b18ca31ddac Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sun, 20 Aug 2017 15:46:37 -0700 Subject: [PATCH 229/396] making a choice for solver --- selectiveInference/R/funs.fixed.R | 38 +++++++++++++++---------------- selectiveInference/src/debias.c | 2 -- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index c8f53526..2ddc3d51 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -26,8 +26,6 @@ sigma=NULL, alpha=0.1, else{ - - checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") @@ -338,7 +336,8 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6, + use_QP=TRUE) { # If soln_result is not Null, it is used as a warm start. # It should be a list @@ -352,31 +351,30 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt ever_active[1] = i-1 # 0-based ever_active = as.integer(ever_active) nactive = as.integer(1) - linear_func = rep(0, p) - linear_func[i] = -1 - linear_func = as.numeric(linear_func) - gradient = 1. * linear_func + if (use_QP) { + linear_func = rep(0, p) + linear_func[i] = -1 + linear_func = as.numeric(linear_func) + gradient = 1. * linear_func + } else { + gradient = rep(0, p) + } } else { soln = soln_result$soln gradient = soln_result$gradient ever_active = as.integer(soln_result$ever_active) nactive = as.integer(soln_result$nactive) - linear_func = soln_result$linear_func + if (use_QP) { + linear_func = soln_result$linear_func + } } - soln1 = rep(0, p) - gradient1 = rep(0, p) - ever_active1 = rep(0, p) - ever_active1[1] = i-1 - nactive1 = as.integer(1) - result1 = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln1, gradient1, ever_active1, nactive1, kkt_tol, objective_tol) # C function uses 0-based indexing - result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) - print("close?") - print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) - print(c(result1$iter, result$iter, sum(result1$soln^2))) - - #result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing + if (use_QP) { + result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) + } else { + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing + } # Check feasibility diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 4d360565..df9d0ead 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -68,7 +67,6 @@ int update_ever_active(int coord, for (iactive=0; iactive Date: Sun, 20 Aug 2017 16:05:30 -0700 Subject: [PATCH 230/396] RF: ever_active is now 1-based indices to be more R like --- selectiveInference/R/funs.fixed.R | 4 +-- selectiveInference/src/debias.c | 42 +++++++++++----------- selectiveInference/src/quadratic_program.c | 18 +++++----- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 2ddc3d51..ae9fdc63 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -348,7 +348,7 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt if (is.null(soln_result)) { soln = rep(0, p) ever_active = rep(0, p) - ever_active[1] = i-1 # 0-based + ever_active[1] = i # 1-based ever_active = as.integer(ever_active) nactive = as.integer(1) if (use_QP) { @@ -373,7 +373,7 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt if (use_QP) { result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) } else { - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing + result = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 1-based indexing for active set } # Check feasibility diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index df9d0ead..3e3efb5e 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -11,12 +11,12 @@ // Update one coordinate double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ - double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 1-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ { int irow, icol; double value = 0; @@ -32,13 +32,13 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ for (irow=0; irow Date: Sun, 20 Aug 2017 16:39:12 -0700 Subject: [PATCH 231/396] added an argument to limit the number of times the linesearch runs --- selectiveInference/R/funs.fixed.R | 15 +++++++-------- selectiveInference/man/fixedLassoInf.Rd | 10 +++++++--- selectiveInference/src/Rcpp-debias.cpp | 3 +++ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index ae9fdc63..0ca60a3c 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -3,9 +3,9 @@ # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, -sigma=NULL, alpha=0.1, - type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=5) { family = match.arg(family) this.call = match.call() @@ -158,7 +158,7 @@ sigma=NULL, alpha=0.1, # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, max.try=linesearch.try) # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) @@ -268,7 +268,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { +InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, max.try=10) { isgiven <- 1; if (is.null(mu)){ isgiven <- 0; @@ -293,13 +293,12 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold output = NULL - while ((mu.stop != 1)&&(try.no<10)){ + while ((mu.stop != 1) && (try.no // need to include the main Rcpp header file #include // where find_one_row_void is defined +// Below, the gradient should be equal to Sigma * theta + linear_func!! +// No check is done on this. + // [[Rcpp::export]] Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, From acbf82cf120bdfbf2c55c2ff3898425f44d234c6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sun, 20 Aug 2017 16:43:30 -0700 Subject: [PATCH 232/396] making it 10 tries as before --- selectiveInference/R/funs.fixed.R | 2 +- selectiveInference/man/fixedLassoInf.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0ca60a3c..b03fd4dd 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -5,7 +5,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=5) { + gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) { family = match.arg(family) this.call = match.call() diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 2f8517dc..bd740f86 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -12,7 +12,7 @@ fixed value of the tuning parameter lambda fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", "cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=5) + gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) } \arguments{ \item{x}{ From 7fa51202acc293c10b2b90da726212035a02b6f5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 28 Aug 2017 21:40:52 -0700 Subject: [PATCH 233/396] NF: stop early if active set gets too big --- selectiveInference/R/funs.fixed.R | 29 ++++++++++++++++------ selectiveInference/src/Rcpp-debias.cpp | 22 +++++++++++----- selectiveInference/src/debias.c | 11 +++++++- selectiveInference/src/debias.h | 7 +++--- selectiveInference/src/quadratic_program.c | 11 +++++++- 5 files changed, 61 insertions(+), 19 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 455304ac..082a1f69 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -271,7 +271,9 @@ fixedLasso.poly= ## Approximates inverse covariance matrix theta InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, max.try=10) { - isgiven <- 1; + max_active = max(50, 2 * sqrt(n)) + + isgiven <- 1; if (is.null(mu)){ isgiven <- 0; } @@ -295,17 +297,19 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold output = NULL + last.beta = NULL + while ((mu.stop != 1) && (try.no= max_active); + return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, Rcpp::Named("linear_func") = linear_func, Rcpp::Named("iter") = iter, Rcpp::Named("kkt_check") = kkt_check, Rcpp::Named("ever_active") = ever_active, - Rcpp::Named("nactive") = nactive)); + Rcpp::Named("nactive") = nactive, + Rcpp::Named("max_active_check") = max_active_check)); } @@ -74,7 +79,8 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, - double objective_tol + double objective_tol, + int max_active ) { int nrow = Sigma.nrow(); // number of features @@ -104,7 +110,8 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, maxiter, row, kkt_tol, - objective_tol); + objective_tol, + max_active); // Check whether feasible @@ -115,11 +122,14 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, bound, kkt_tol); + int max_active_check = (*(nactive.begin()) >= max_active); + return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, Rcpp::Named("iter") = iter, Rcpp::Named("kkt_check") = kkt_check, Rcpp::Named("ever_active") = ever_active, - Rcpp::Named("nactive") = nactive)); + Rcpp::Named("nactive") = nactive, + Rcpp::Named("max_active_check") = max_active_check)); } diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 3e3efb5e..2881d175 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -228,7 +228,8 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int maxiter, /* how many iterations */ int row, /* which coordinate to solve: 1-based */ double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol) /* precision for checking relative decrease in objective value */ + double objective_tol, /* precision for checking relative decrease in objective value */ + int max_active) /* Upper limit for size of active set -- otherwise break */ { int iter = 0; @@ -311,6 +312,14 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ break; } + // Check size of active set + + if (*nactive_ptr >= max_active) { + break; + } + + // Check relative decrease of objective + if (check_objective) { new_value = objective(Sigma_ptr, ever_active_ptr, diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 5510adf8..f76eb692 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -14,8 +14,8 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *theta, /* current value */ int maxiter, /* how many iterations */ double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol); /* precision for checking relative decrease in objective value */ - + double objective_tol, /* precision for checking relative decrease in objective value */ + int max_active); /* Upper limit for size of active set -- otherwise break */ int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ @@ -34,7 +34,8 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int maxiter, /* how many iterations */ int row, /* which coordinate to update: 0-based */ double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol); /* precision for checking relative decrease in objective value */ + double objective_tol, /* precision for checking relative decrease in objective value */ + int max_active); /* Upper limit for size of active set -- otherwise break */ int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index ba14d022..4afbd329 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -220,7 +220,8 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *theta, /* current value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol) /* precision for checking relative decrease in objective value */ + double objective_tol, /* precision for checking relative decrease in objective value */ + int max_active) /* Upper limit for size of active set -- otherwise break */ { int iter = 0; @@ -303,6 +304,14 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ break; } + // Check size of active set + + if (*nactive_ptr >= max_active) { + break; + } + + // Check relative decrease of objective + if (check_objective) { new_value = objective_qp(Sigma_ptr, linear_func_ptr, From 0c1f9ea5283d2d2d75b86eaf5f6258fdf19c7ce2 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 29 Aug 2017 17:55:44 -0700 Subject: [PATCH 234/396] renaming function, but all linesearch logic into a row-wise function --- selectiveInference/R/funs.fixed.R | 235 ++++++++++++++++-------------- 1 file changed, 129 insertions(+), 106 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 082a1f69..33f2cc33 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -158,10 +158,8 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, max.try=linesearch.try) + htheta = debiasing_matrix(hsigma, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) - # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) - FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) ithetasigma = (GS-(htheta%*%hsigma)) @@ -269,137 +267,162 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, max.try=10) { - max_active = max(50, 2 * sqrt(n)) +debiasing_matrix = function(Sigma, + nsample, + rows, + verbose=FALSE, + mu=NULL, # starting value of mu + linesearch=TRUE, # do a linesearch? + resol=1.2, # multiplicative factor for linesearch + max_active=NULL, # how big can active set get? + max_try=10, # how many steps in linesearch? + warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + objective_tol=1.e-4 # tolerance for relative decrease in objective + ) { + + + if (is.null(max_active)) { + max_active = max(50, 0.3 * nsample) + } + + p = nrow(Sigma); + M = matrix(0, length(rows), p); - isgiven <- 1; - if (is.null(mu)){ - isgiven <- 0; + if (is.null(mu)) { + mu = (1/sqrt(nsample)) * qnorm(1-(0.1/(p^2))) } - - p <- nrow(sigma); - M <- matrix(0, e, p); + xperc = 0; xp = round(p/10); - for (i in 1:e) { - if ((i %% xp)==0){ + idx = 1; + for (row in rows) { + + if ((idx %% xp)==0){ xperc = xperc+10; if (verbose) { print(paste(xperc,"% done",sep="")); } } - if (isgiven==0){ - mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); - } - mu.stop <- 0; - try.no <- 1; - incr <- 0; - output = NULL + output = debiasing_row(Sigma, + row, + mu, + linesearch=linesearch, + resol=resol, + max_active=max_active, + max_try=max_try, + warn_kkt=FALSE, + max_iter=max_iter, + kkt_tol=kkt_tol, + objective_tol=objective_tol) + + if (warn_kkt && (!output$kkt_check)) { + warning("Solution for row of M does not seem to be feasible") + } + + M[idx,] = output$soln; + idx = idx + 1; + } + return(M) +} - last.beta = NULL +debiasing_row = function (Sigma, + row, + mu, + linesearch=TRUE, # do a linesearch? + resol=1.2, # multiplicative factor for linesearch + max_active=NULL, # how big can active set get? + max_try=10, # how many steps in linesearch? + warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + objective_tol=1.e-4 # tolerance for relative decrease in objective + ) { - while ((mu.stop != 1) && (try.no 1){ - if ((incr == 1)&&(iter == (maxiter+1))){ - mu <- mu*resol; - } - if ((incr == 1)&&(iter < (maxiter+1))){ - mu.stop <- 1; - } - if ((incr == 0)&&(iter < (maxiter+1))){ - mu <- mu/resol; - } - if ((incr == 0) && (iter == (maxiter+1))){ - mu <- mu*resol; - beta <- last.beta; - mu.stop <- 1; - } - } - if (output$max_active_check) { - mu.stop <- 1; - beta <- last.beta; - } - } - try.no <- try.no+1 - last.beta <- beta - } - M[i,] <- beta; - } - return(M) -} + soln = rep(0, p) + ever_active = rep(0, p) + ever_active[1] = row # 1-based + ever_active = as.integer(ever_active) + nactive = as.integer(1) -InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6, - use_QP=TRUE, max_active=NULL) { + linear_func = rep(0, p) + linear_func[row] = -1 + linear_func = as.numeric(linear_func) + gradient = 1. * linear_func - # If soln_result is not Null, it is used as a warm start. - # It should be a list - # with entries "soln", "gradient", "ever_active", "nactive" + counter_idx = 1; + incr = 0; - p = nrow(Sigma) + last_output = NULL - if (is.null(max_active)) { - max_active = 50 # arbitrary? - } + while (counter_idx < max_try) { - if (is.null(soln_result)) { - soln = rep(0, p) - ever_active = rep(0, p) - ever_active[1] = i # 1-based - ever_active = as.integer(ever_active) - nactive = as.integer(1) - if (use_QP) { - linear_func = rep(0, p) - linear_func[i] = -1 - linear_func = as.numeric(linear_func) - gradient = 1. * linear_func - } else { - gradient = rep(0, p) - } - } - else { - soln = soln_result$soln - gradient = soln_result$gradient - ever_active = as.integer(soln_result$ever_active) - nactive = as.integer(soln_result$nactive) - if (use_QP) { - linear_func = soln_result$linear_func - } - } + result = solve_QP(Sigma, mu, max_iter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) - if (use_QP) { - result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) - } else { - result = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) # C function uses 1-based indexing for active set - } + iter = result$iter + + # Logic for whether we should continue the line search + + if (!linesearch) { + break + } + + if (counter_idx == 1){ + if (iter == (max_iter+1)){ + incr = 1; # was the original problem feasible? 1 if not + } else { + incr = 0; # original problem was feasible + } + } + + if (incr == 1) { # trying to find a feasible point + if ((iter < (max_iter+1)) && (counter_idx > 1)) { + break; # we've found a feasible point and solved the problem + } + mu = mu * resol; + } else { # trying to drop the bound parameter further + if ((iter == (max_iter + 1)) && (counter_idx > 1)) { + result = last_output; # problem seems infeasible because we didn't solve it + break; # so we revert to previously found solution + } + mu = mu / resol; + } + + # If the active set has grown to a certain size + # then we stop, presuming problem has become + # infeasible. + + # We revert to the previous solution + + if (result$max_active_check) { + result = last_output; + break; + } + + counter_idx = counter_idx + 1 + last_output = list(soln=result$soln, + kkt_check=result$kkt_check) + } # Check feasibility - if (!result$kkt_check) { + if (warn_kkt && (!result$kkt_check)) { warning("Solution for row of M does not seem to be feasible") } - return(result) + return(list(soln=result$soln, + kkt_check=result$kkt_check)) } From e2924290bc5a1a595fce93f1722f18d27f7dccc8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 29 Aug 2017 17:57:10 -0700 Subject: [PATCH 235/396] camel case function name (ew... but consistent) --- selectiveInference/R/funs.fixed.R | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 33f2cc33..3d28109b 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -158,7 +158,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta = debiasing_matrix(hsigma, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) + htheta = debiasingMatrix(hsigma, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) @@ -268,7 +268,7 @@ fixedLasso.poly= ## Approximates inverse covariance matrix theta -debiasing_matrix = function(Sigma, +debiasingMatrix = function(Sigma, nsample, rows, verbose=FALSE, @@ -306,17 +306,17 @@ debiasing_matrix = function(Sigma, print(paste(xperc,"% done",sep="")); } } - output = debiasing_row(Sigma, - row, - mu, - linesearch=linesearch, - resol=resol, - max_active=max_active, - max_try=max_try, - warn_kkt=FALSE, - max_iter=max_iter, - kkt_tol=kkt_tol, - objective_tol=objective_tol) + output = debiasingRow(Sigma, + row, + mu, + linesearch=linesearch, + resol=resol, + max_active=max_active, + max_try=max_try, + warn_kkt=FALSE, + max_iter=max_iter, + kkt_tol=kkt_tol, + objective_tol=objective_tol) if (warn_kkt && (!output$kkt_check)) { warning("Solution for row of M does not seem to be feasible") @@ -328,18 +328,18 @@ debiasing_matrix = function(Sigma, return(M) } -debiasing_row = function (Sigma, - row, - mu, - linesearch=TRUE, # do a linesearch? - resol=1.2, # multiplicative factor for linesearch - max_active=NULL, # how big can active set get? - max_try=10, # how many steps in linesearch? - warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? - max_iter=100, # how many iterations for each optimization problem - kkt_tol=1.e-4, # tolerance for the KKT conditions - objective_tol=1.e-4 # tolerance for relative decrease in objective - ) { +debiasingRow = function (Sigma, + row, + mu, + linesearch=TRUE, # do a linesearch? + resol=1.2, # multiplicative factor for linesearch + max_active=NULL, # how big can active set get? + max_try=10, # how many steps in linesearch? + warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + objective_tol=1.e-4 # tolerance for relative decrease in objective + ) { p = nrow(Sigma) From ec07564ccba45c7959074ea905422e3c744b86d4 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 29 Aug 2017 23:52:54 -0700 Subject: [PATCH 236/396] new export debiasingMatrix --- selectiveInference/NAMESPACE | 5 +- selectiveInference/R/funs.fixed.R | 57 +++++++----- selectiveInference/man/debiasingMatrix.Rd | 106 ++++++++++++++++++++++ 3 files changed, 144 insertions(+), 24 deletions(-) create mode 100644 selectiveInference/man/debiasingMatrix.Rd diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index ab2a1118..d72d56a9 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -6,8 +6,6 @@ export(lar,fs, print.larInf,print.fsInf, plot.lar,plot.fs, fixedLassoInf,print.fixedLassoInf, -# fixedLogitLassoInf,print.fixedLogitLassoInf, -# fixedCoxLassoInf,print.fixedCoxLassoInf, forwardStop, estimateSigma, manyMeans,print.manyMeans, @@ -15,7 +13,8 @@ export(lar,fs, scaleGroups,factorDesign, TG.pvalue, TG.limits, - TG.interval + TG.interval, + debiasingMatrix ) S3method("coef", "lar") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 3d28109b..0fca05fb 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -269,19 +269,19 @@ fixedLasso.poly= ## Approximates inverse covariance matrix theta debiasingMatrix = function(Sigma, - nsample, - rows, - verbose=FALSE, - mu=NULL, # starting value of mu - linesearch=TRUE, # do a linesearch? - resol=1.2, # multiplicative factor for linesearch - max_active=NULL, # how big can active set get? - max_try=10, # how many steps in linesearch? - warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? - max_iter=100, # how many iterations for each optimization problem - kkt_tol=1.e-4, # tolerance for the KKT conditions - objective_tol=1.e-4 # tolerance for relative decrease in objective - ) { + nsample, + rows, + verbose=FALSE, + mu=NULL, # starting value of mu + linesearch=TRUE, # do a linesearch? + scaling_factor=1.5, # multiplicative factor for linesearch + max_active=NULL, # how big can active set get? + max_try=10, # how many steps in linesearch? + warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + objective_tol=1.e-8 # tolerance for relative decrease in objective + ) { if (is.null(max_active)) { @@ -310,7 +310,7 @@ debiasingMatrix = function(Sigma, row, mu, linesearch=linesearch, - resol=resol, + scaling_factor=scaling_factor, max_active=max_active, max_try=max_try, warn_kkt=FALSE, @@ -322,7 +322,12 @@ debiasingMatrix = function(Sigma, warning("Solution for row of M does not seem to be feasible") } - M[idx,] = output$soln; + if (!is.null(output$soln)) { + M[idx,] = output$soln; + } else { + stop(paste("Unable to approximate inverse row ", row)); + } + idx = idx + 1; } return(M) @@ -332,13 +337,13 @@ debiasingRow = function (Sigma, row, mu, linesearch=TRUE, # do a linesearch? - resol=1.2, # multiplicative factor for linesearch + scaling_factor=1.2, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? - max_iter=100, # how many iterations for each optimization problem + max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions - objective_tol=1.e-4 # tolerance for relative decrease in objective + objective_tol=1.e-8 # tolerance for relative decrease in objective ) { p = nrow(Sigma) @@ -368,7 +373,17 @@ debiasingRow = function (Sigma, while (counter_idx < max_try) { - result = solve_QP(Sigma, mu, max_iter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + result = solve_QP(Sigma, + mu, + max_iter, + soln, + linear_func, + gradient, + ever_active, + nactive, + kkt_tol, + objective_tol, + max_active) iter = result$iter @@ -390,13 +405,13 @@ debiasingRow = function (Sigma, if ((iter < (max_iter+1)) && (counter_idx > 1)) { break; # we've found a feasible point and solved the problem } - mu = mu * resol; + mu = mu * scaling_factor; } else { # trying to drop the bound parameter further if ((iter == (max_iter + 1)) && (counter_idx > 1)) { result = last_output; # problem seems infeasible because we didn't solve it break; # so we revert to previously found solution } - mu = mu / resol; + mu = mu / scaling_factor; } # If the active set has grown to a certain size diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd new file mode 100644 index 00000000..973c0b41 --- /dev/null +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -0,0 +1,106 @@ +\name{debiasingMatrix} +\alias{debiasingMatrix} +\title{ +Find an approximate inverse of a non-negative definite matrix. +} +\description{ +Find some rows of an approximate inverse of a non-negative definite +symmetric matrix by solving optimization problem described +in Javanmard and Montanari (2013). +} +\usage{ +debiasingMatrix(Sigma, + nsample, + rows, + verbose=FALSE, + mu=NULL, + linesearch=TRUE, + scaling_factor=1.5, + max_active=NULL, + max_try=10, + warn_kkt=FALSE, + max_iter=100, + kkt_tol=1.e-4, + objective_tol=1.e-8) +} +\arguments{ +\item{Sigma}{ +A symmetric non-negative definite matrix, often a cross-covariance matrix. +} +\item{nsample}{ +Number of samples used in forming the cross-covariance matrix. +Used for default value of the bound parameter mu. +} +\item{rows}{ +Which rows of the approximate inverse to compute. +} +\item{verbose}{ +Print out progress as rows are being computed. +} +\item{mu}{ +Initial bound parameter for each row. Will be changed +if linesearch is TRUE. +} +\item{linesearch}{ +Run a line search to find as small as possible a bound parameter for each row? +} +\item{scaling_factor}{ +In the linesearch, the bound parameter is either multiplied or divided by this +factor at each step. +} +\item{max_active}{ +How large an active set to consider in solving the problem with coordinate descent. +Defaults to max(50, 0.3*nsample). +} +\item{max_try}{ +How many tries in the linesearch. +} +\item{warn_kkt}{ +Warn if the problem does not seem to be feasible after running the coordinate +descent algorithm. +} +\item{max_iter}{ +How many full iterations to run of the coordinate descent for each +value of the bound parameter. +} +\item{kkt_tol}{ +Tolerance value for assessing whether KKT conditions for solving the +dual problem and feasibility of the original problem. +} +\item{objective_tol}{ +Tolerance value for assessing convergence of the problem using relative +decrease of the objective. +} +} +\details{ +This function computes an approximate inverse +as described in Javanmard and Montanari (2013), specifically +display (4). The problem is solved by considering a dual +problem which has an objective similar to a LASSO problem and is solvable +by coordinate descent. For some values of mu the original +problem may not be feasible, in which case the dual problem has no solution. +An attempt to detect this is made by stopping when the active set grows quite +large, determined by max_active. +} + +\value{ +\item{M}{Rows of approximate inverse of Sigma.} +} + +\references{ +Adel Javanmard and Andrea Montanari (2013). +Confidence Intervals and Hypothesis Testing for High-Dimensional Regression. Arxiv: 1306.3171 +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ + +set.seed(10) +n = 50 +p = 100 +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n +M = debiasingMatrix(S, n, c(1,3,5)) + +} + \ No newline at end of file From 3c0ef8d50d185672c792c3be1fb307b48c394a1e Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 30 Aug 2017 00:00:16 -0700 Subject: [PATCH 237/396] minor edit --- selectiveInference/R/funs.fixed.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0fca05fb..083b3c6e 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -264,9 +264,8 @@ fixedLasso.poly= ############################## -### Functions borrowed and slightly modified from lasso_inference.R - ## Approximates inverse covariance matrix theta +## using coordinate descent debiasingMatrix = function(Sigma, nsample, @@ -333,6 +332,8 @@ debiasingMatrix = function(Sigma, return(M) } +# Find one row of the debiasing matrix + debiasingRow = function (Sigma, row, mu, From d94d217a493e66620f84a04617a0150f94b5d038 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 30 Aug 2017 00:01:44 -0700 Subject: [PATCH 238/396] BF: latex percent --- selectiveInference/man/debiasingMatrix.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index 973c0b41..28fc47bb 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -99,7 +99,7 @@ set.seed(10) n = 50 p = 100 X = matrix(rnorm(n * p), n, p) -S = t(X) %*% X / n +S = t(X) \%*\% X / n M = debiasingMatrix(S, n, c(1,3,5)) } From 3e9d62dd4af0721f14b887697c6b4295e2887874 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 30 Aug 2017 00:05:23 -0700 Subject: [PATCH 239/396] a little more doc --- selectiveInference/man/debiasingMatrix.Rd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index 28fc47bb..533a12c6 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -6,7 +6,9 @@ Find an approximate inverse of a non-negative definite matrix. \description{ Find some rows of an approximate inverse of a non-negative definite symmetric matrix by solving optimization problem described -in Javanmard and Montanari (2013). +in Javanmard and Montanari (2013). Can be used for approximate +Newton step from some consistent estimator (such as the LASSO) +to find a debiased solution. } \usage{ debiasingMatrix(Sigma, From 3d8b2481a99831e24c65bee34fb6291486353531 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 31 Aug 2017 14:05:21 -0700 Subject: [PATCH 240/396] exporting function to construct polyhedral constraints of LASSO --- selectiveInference/NAMESPACE | 3 +- selectiveInference/R/funs.fixed.R | 64 +++++++++---------- selectiveInference/man/fixedLassoPoly.Rd | 80 ++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 36 deletions(-) create mode 100644 selectiveInference/man/fixedLassoPoly.Rd diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index d72d56a9..fe0237ef 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -14,7 +14,8 @@ export(lar,fs, TG.pvalue, TG.limits, TG.interval, - debiasingMatrix + debiasingMatrix, + fixedLassoPoly ) S3method("coef", "lar") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 083b3c6e..c65731d6 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -93,14 +93,14 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co "'thresh' parameter, for a more accurate convergence.")) # Get lasso polyhedral region, of form Gy >= u - if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) - else out = fixedLasso.poly(x,y,beta,lambda,vars) - G = out$G - u = out$u + if (type == 'full' & p > n) out = fixedLassoPoly(x,y,lambda,beta,vars,inactive=TRUE) + else out = fixedLassoPoly(x,y,lambda,beta,vars) + A = out$A + b = out$b # Check polyhedral region tol.poly = 0.01 - if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + if (max(A %*% y - b) > tol.poly * sqrt(sum(y^2))) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", "and check whether the specified value of lambda is too small", @@ -191,7 +191,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co sign[j] = sign(sum(vj*y)) vj = sign[j] * vj - limits.info = TG.limits(y, -G, -u, vj, Sigma=diag(rep(sigma^2, n))) + limits.info = TG.limits(y, A, b, vj, Sigma=diag(rep(sigma^2, n))) a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) pv[j] = a$pv vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) @@ -221,45 +221,39 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co ############################# -fixedLasso.poly= - function(x, y, beta, lambda, a, inactive = FALSE) { - xa = x[,a,drop=F] - xac = x[,!a,drop=F] - xai = pinv(crossprod(xa)) - xap = xai %*% t(xa) - za = sign(beta[a]) +fixedLassoPoly = + function(X, y, lambda, beta, active, inactive = FALSE) { + Xa = X[,active,drop=F] + Xac = X[,!active,drop=F] + Xai = pinv(crossprod(Xa)) + Xap = Xai %*% t(Xa) + + za = sign(beta[active]) if (length(za)>1) dz = diag(za) if (length(za)==1) dz = matrix(za,1,1) - if (inactive) { - P = diag(1,nrow(xa)) - xa %*% xap + if (inactive) { # should we include the inactive constraints? + R = diag(1,nrow(Xa)) - Xa %*% Xap # R is residual forming matrix of selected model - G = -rbind( - 1/lambda * t(xac) %*% P, - -1/lambda * t(xac) %*% P, - -dz %*% xap + A = rbind( + 1/lambda * t(Xac) %*% R, + -1/lambda * t(Xac) %*% R, + -dz %*% Xap ) lambda2=lambda - if(length(lambda)>1) lambda2=lambda[a] - u = -c( - 1 - t(xac) %*% t(xap) %*% za, - 1 + t(xac) %*% t(xap) %*% za, - -lambda2 * dz %*% xai %*% za) + if(length(lambda)>1) lambda2=lambda[active] + b = c( + 1 - t(Xac) %*% t(Xap) %*% za, + 1 + t(Xac) %*% t(Xap) %*% za, + -lambda2 * dz %*% Xai %*% za) } else { - G = -rbind( - # 1/lambda * t(xac) %*% P, - # -1/lambda * t(xac) %*% P, - -dz %*% xap - ) + A = -dz %*% Xap lambda2=lambda - if(length(lambda)>1) lambda2=lambda[a] - u = -c( - # 1 - t(xac) %*% t(xap) %*% za, - # 1 + t(xac) %*% t(xap) %*% za, - -lambda2 * dz %*% xai %*% za) + if(length(lambda)>1) lambda2=lambda[active] + b = -lambda2 * dz %*% Xai %*% za } - return(list(G=G,u=u)) + return(list(A=A, b=b)) } ############################## diff --git a/selectiveInference/man/fixedLassoPoly.Rd b/selectiveInference/man/fixedLassoPoly.Rd new file mode 100644 index 00000000..61b1f30a --- /dev/null +++ b/selectiveInference/man/fixedLassoPoly.Rd @@ -0,0 +1,80 @@ +\name{fixedLassoPoly} +\alias{fixedLassoPoly} + +\title{ +Compute polyhedral constraints for a LASSO problem with +a fixed value of lambda. +} +\description{ +Compute polyhedral representation of the selection region of Lee et al. (2016). +By construction, y should satisfy A %*% y elementwise less then or equal b. +} +\usage{ +fixedLassoPoly(X, y, lambda, beta, active, inactive = FALSE) +} +\arguments{ +\item{X}{ +Design matrix of LASSO problem. +} +\item{y}{ +Response of LASSO problem. +} +\item{lambda}{ +Value of regularization parameter. +} +\item{beta}{ +Solution of LASSO problem with regularization parameter set to lambda. +} +\item{active}{ +Active set of the LASSO problem as a boolean vector. Should correspond +to the non-zeros of beta. +} +\item{inactive}{ +Form the inactive constraints as well? +} +} +\details{ +This function computes +the polyhedral representation of the selection region of Lee et al. (2016). +} + +\value{ +\item{A}{Linear part of the affine inequalities.} +\item{b}{RHS offset the affine inequalities.} +} + +\references{ +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2016). +Exact post-selection inference, with application to the lasso. Annals of Statistics, 44(3), 907-927. + +Jonathan Taylor and Robert Tibshirani (2017) Post-selection inference for math L1-penalized likelihood models. +Canadian Journal of Statistics, xx, 1-21. (Volume still not posted) +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ + +set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x = scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +active = (beta != 0) + +fixedLassoPoly(x, y, lambda, beta, active) + +} + \ No newline at end of file From d935d943aa29682ffc7131147e8b7e69ccb6c28c Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 31 Aug 2017 15:26:04 -0700 Subject: [PATCH 241/396] removing unused C code --- selectiveInference/src/Rcpp-debias.cpp | 65 ---- selectiveInference/src/debias.c | 229 +++++++------- selectiveInference/src/debias.h | 21 -- selectiveInference/src/quadratic_program.c | 332 --------------------- 4 files changed, 110 insertions(+), 537 deletions(-) delete mode 100644 selectiveInference/src/quadratic_program.c diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 47bbacaf..6420e515 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -68,68 +68,3 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::Named("max_active_check") = max_active_check)); } - -// [[Rcpp::export]] -Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, - int row, // 0-based - double bound, - int maxiter, - Rcpp::NumericVector theta, - Rcpp::NumericVector gradient, - Rcpp::IntegerVector ever_active, - Rcpp::IntegerVector nactive, - double kkt_tol, - double objective_tol, - int max_active - ) { - - int nrow = Sigma.nrow(); // number of features - - // Active set - - int irow; - - // Extract the diagonal - Rcpp::NumericVector Sigma_diag(nrow); - double *sigma_diag_p = Sigma_diag.begin(); - - for (irow=0; irow= max_active); - - return(Rcpp::List::create(Rcpp::Named("soln") = theta, - Rcpp::Named("gradient") = gradient, - Rcpp::Named("iter") = iter, - Rcpp::Named("kkt_check") = kkt_check, - Rcpp::Named("ever_active") = ever_active, - Rcpp::Named("nactive") = nactive, - Rcpp::Named("max_active_check") = max_active_check)); - -} diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 2881d175..4afbd329 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -4,23 +4,25 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - e_i^T\theta + \mu \|\theta\|_1 +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf // Therefore we don't have to negate the answer to get theta. // Update one coordinate -double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - int *ever_active_ptr, /* Ever active set: 1-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 1-based */ - double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ +double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ { int irow, icol; double value = 0; double *Sigma_ptr_tmp = Sigma_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; double *theta_row_ptr, *theta_col_ptr; int *active_row_ptr, *active_col_ptr; int active_row, active_col; @@ -32,34 +34,36 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { return(0); @@ -126,23 +123,22 @@ int check_KKT(double *theta, /* current theta */ return(0); } } - } return(1); } -double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ - double *gradient_ptr, /* Sigma times theta */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int row, /* which row: 1-based */ - int coord, /* which coordinate to update: 0-based */ - int is_active) /* Is this part of ever_active */ +double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ + double *gradient_ptr, /* Sigma times theta */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this coord in ever_active */ { double delta; @@ -169,10 +165,6 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T linear_term -= quadratic_term * old_value; - if (row - 1 == coord) { // Row is 1-based - linear_term -= 1; - } - // Now soft-threshold the coord entry of theta // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| @@ -191,7 +183,7 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T // Add to active set if necessary if ((is_active == 0) && (value != 0)) { - update_ever_active(coord, ever_active_ptr, nactive_ptr); + update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); } // Update the linear term @@ -217,38 +209,40 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T } -int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ - double *gradient_ptr, /* Sigma times theta */ - int *ever_active_ptr, /* Ever active set: 1-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row, /* which coordinate to solve: 1-based */ - double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active) /* Upper limit for size of active set -- otherwise break */ +int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Sigma times theta */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* max number of iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol, /* precision for checking relative decrease in objective value */ + int max_active) /* Upper limit for size of active set -- otherwise break */ { int iter = 0; int icoord = 0; int iactive = 0; int *active_ptr; - double old_value, new_value; int check_objective = 1; + double old_value, new_value; + if (check_objective) { - old_value = objective(Sigma_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - row, - bound, - theta); + old_value = objective_qp(Sigma_ptr, + linear_func_ptr, + ever_active_ptr, + nactive_ptr, + nrow, + bound, + theta); + } @@ -259,28 +253,27 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ active_ptr = (int *) ever_active_ptr; for (iactive=0; iactive < *nactive_ptr; iactive++) { - update_one_coord(Sigma_ptr, - Sigma_diag_ptr, - gradient_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - bound, - theta, - row, - *active_ptr-1, // Ever active set is 1-based - 1); + update_one_coord_qp(Sigma_ptr, + linear_func_ptr, + Sigma_diag_ptr, + gradient_ptr, + ever_active_ptr, + nactive_ptr, + nrow, + bound, + theta, + *active_ptr - 1, // Ever-active is 1-based + 1); active_ptr++; } // Check KKT - if (check_KKT(theta, - gradient_ptr, - nrow, - row, - bound, - kkt_tol) == 1) { + if (check_KKT_qp(theta, + gradient_ptr, + nrow, + bound, + kkt_tol) == 1) { break; } @@ -288,27 +281,26 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ for (icoord=0; icoord 0)) { + new_value = objective_qp(Sigma_ptr, + linear_func_ptr, + ever_active_ptr, + nactive_ptr, + nrow, + bound, + theta); + + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { break; } - old_value = new_value; } } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index f76eb692..7cab5e3c 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -23,27 +23,6 @@ int check_KKT_qp(double *theta, /* current theta */ double bound, /* Lagrange multipler for \ell_1 */ double tol); /* precision for checking KKT conditions */ -int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ - double *gradient_ptr, /* Current gradient of quadratic loss */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row, /* which coordinate to update: 0-based */ - double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ - -int check_KKT(double *theta, /* current theta */ - double *gradient_ptr, /* Current gradient of quadratic loss */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ - double bound, /* Lagrange multipler for \ell_1 */ - double kkt_tol); /* precision for checking KKT conditions */ - #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c deleted file mode 100644 index 4afbd329..00000000 --- a/selectiveInference/src/quadratic_program.c +++ /dev/null @@ -1,332 +0,0 @@ -#include // for fabs - -// Find an approximate row of \hat{Sigma}^{-1} - -// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf - -// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 -// where l is `linear_func` below - -// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf -// Therefore we don't have to negate the answer to get theta. -// Update one coordinate - -double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *linear_func_ptr, /* Linear term in objective */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in Sigma */ - double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ -{ - int irow, icol; - double value = 0; - double *Sigma_ptr_tmp = Sigma_ptr; - double *linear_func_ptr_tmp = linear_func_ptr; - double *theta_row_ptr, *theta_col_ptr; - int *active_row_ptr, *active_col_ptr; - int active_row, active_col; - int nactive = *nactive_ptr; - - theta_row_ptr = theta; - theta_col_ptr = theta; - - for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { - return(0); - } - else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { - return(0); - } - } - else { - if (fabs(gradient) > (1. + tol) * bound) { - return(0); - } - } - } - - return(1); -} - -double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *linear_func_ptr, /* Linear term in objective */ - double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ - double *gradient_ptr, /* Sigma times theta */ - int *ever_active_ptr, /* Ever active set: 1-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int coord, /* which coordinate to update: 0-based */ - int is_active) /* Is this coord in ever_active */ -{ - - double delta; - double linear_term = 0; - double value = 0; - double old_value; - double *Sigma_ptr_tmp; - double *gradient_ptr_tmp; - double *theta_ptr; - int icol = 0; - - double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); - double quadratic_term = *quadratic_ptr; - - gradient_ptr_tmp = ((double *) gradient_ptr + coord); - linear_term = *gradient_ptr_tmp; - - theta_ptr = ((double *) theta + coord); - old_value = *theta_ptr; - - // The coord entry of gradient_ptr term has a diagonal term in it: - // Sigma[coord, coord] * theta[coord] - // This removes it. - - linear_term -= quadratic_term * old_value; - - // Now soft-threshold the coord entry of theta - - // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| - // with q=quadratic_term and l=linear_term - - // With a negative linear term, solution should be - // positive - - if (linear_term < -bound) { - value = (-linear_term - bound) / quadratic_term; - } - else if (linear_term > bound) { - value = -(linear_term - bound) / quadratic_term; - } - - // Add to active set if necessary - - if ((is_active == 0) && (value != 0)) { - update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); - } - - // Update the linear term - - if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { - - delta = value - old_value; - Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); - gradient_ptr_tmp = ((double *) gradient_ptr); - - for (icol=0; icol= max_active) { - break; - } - - // Check relative decrease of objective - - if (check_objective) { - new_value = objective_qp(Sigma_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - bound, - theta); - - if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { - break; - } - old_value = new_value; - } - } - return(iter); -} - From 7004a0b894a36cc28c879cd100ce99ba3cba118d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 31 Aug 2017 15:31:16 -0700 Subject: [PATCH 242/396] moving file back --- selectiveInference/src/Makevars | 2 +- selectiveInference/src/{debias.c => quadratic_program.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename selectiveInference/src/{debias.c => quadratic_program.c} (100%) diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars index cde36c98..b77ed954 100644 --- a/selectiveInference/src/Makevars +++ b/selectiveInference/src/Makevars @@ -2,7 +2,7 @@ PKG_CFLAGS= -I. PKG_CPPFLAGS= -I. PKG_LIBS=-L. -$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o debias.o +$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o quadratic_program.o clean: rm -f *o diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/quadratic_program.c similarity index 100% rename from selectiveInference/src/debias.c rename to selectiveInference/src/quadratic_program.c From 5e671a4652b5fbb1dcd0ebc1269b38d5a92a8f3b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 31 Aug 2017 17:15:22 -0700 Subject: [PATCH 243/396] Rcpp exports --- selectiveInference/R/RcppExports.R | 15 +++++++ selectiveInference/src/RcppExports.cpp | 57 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 selectiveInference/R/RcppExports.R create mode 100644 selectiveInference/src/RcppExports.cpp diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R new file mode 100644 index 00000000..e04c446a --- /dev/null +++ b/selectiveInference/R/RcppExports.R @@ -0,0 +1,15 @@ +# This file was generated by Rcpp::compileAttributes +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { + .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) +} + +update1_ <- function(Q2, w, m, k) { + .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) +} + +downdate1_ <- function(Q1, R, j0, m, n) { + .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) +} + diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp new file mode 100644 index 00000000..feefe32e --- /dev/null +++ b/selectiveInference/src/RcppExports.cpp @@ -0,0 +1,57 @@ +// This file was generated by Rcpp::compileAttributes +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// solve_QP +Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); +RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); + Rcpp::traits::input_parameter< double >::type bound(boundSEXP); + Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); + Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); + Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); + Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); + __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return __result; +END_RCPP +} +// update1_ +Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); +RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); + Rcpp::traits::input_parameter< int >::type m(mSEXP); + Rcpp::traits::input_parameter< int >::type k(kSEXP); + __result = Rcpp::wrap(update1_(Q2, w, m, k)); + return __result; +END_RCPP +} +// downdate1_ +Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); +RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); + Rcpp::traits::input_parameter< int >::type j0(j0SEXP); + Rcpp::traits::input_parameter< int >::type m(mSEXP); + Rcpp::traits::input_parameter< int >::type n(nSEXP); + __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return __result; +END_RCPP +} From f4f448ec06c44ea44cee27484c16c6c42aa24d96 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 11 Sep 2017 11:08:06 -0700 Subject: [PATCH 244/396] unexporting fixedLassoPoly --- .../man => forLater}/fixedLassoPoly.Rd | 1 + selectiveInference/NAMESPACE | 3 +- selectiveInference/R/RcppExports.R | 8 ++-- selectiveInference/src/RcppExports.cpp | 44 ++++++++++++------- 4 files changed, 34 insertions(+), 22 deletions(-) rename {selectiveInference/man => forLater}/fixedLassoPoly.Rd (97%) diff --git a/selectiveInference/man/fixedLassoPoly.Rd b/forLater/fixedLassoPoly.Rd similarity index 97% rename from selectiveInference/man/fixedLassoPoly.Rd rename to forLater/fixedLassoPoly.Rd index 61b1f30a..88c54468 100644 --- a/selectiveInference/man/fixedLassoPoly.Rd +++ b/forLater/fixedLassoPoly.Rd @@ -75,6 +75,7 @@ beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] active = (beta != 0) fixedLassoPoly(x, y, lambda, beta, active) +fixedLassoPoly(x, y, lambda, beta, active, inactive=TRUE) } \ No newline at end of file diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index fe0237ef..d72d56a9 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -14,8 +14,7 @@ export(lar,fs, TG.pvalue, TG.limits, TG.interval, - debiasingMatrix, - fixedLassoPoly + debiasingMatrix ) S3method("coef", "lar") diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index e04c446a..622d1d20 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,15 +1,15 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index feefe32e..461b0e58 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,36 +22,48 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - __result = Rcpp::wrap(update1_(Q2, w, m, k)); - return __result; + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return __result; + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; END_RCPP } + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} From f0010f3a1bdb1d06c361dd4b1be9c83aeb69151f Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 11 Sep 2017 11:16:57 -0700 Subject: [PATCH 245/396] changing signs in arguments to TG.limits --- selectiveInference/R/funs.fs.R | 12 ++++++------ selectiveInference/R/funs.lar.R | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.fs.R b/selectiveInference/R/funs.fs.R index db0968b1..669cf146 100644 --- a/selectiveInference/R/funs.fs.R +++ b/selectiveInference/R/funs.fs.R @@ -295,13 +295,13 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - Gj = G[1:nconstraint[j],] - uj = rep(0,nconstraint[j]) + Aj = -G[1:nconstraint[j],] + bj = -rep(0,nconstraint[j]) vj = vreg[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv @@ -353,10 +353,10 @@ fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic vj = vj / mj # Standardize (divide by norm of vj) sign[j] = sign(sum(vj*y)) vj = sign[j] * vj - Gj = rbind(G,vj) - uj = c(u,0) + Aj = -rbind(G,vj) + bj = -c(u,0) - limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index a01b2d32..0fad04fc 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -367,13 +367,13 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai for (j in 1:k) { if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - Gj = G[1:nk[j],] - uj = rep(0,nk[j]) + Aj = -G[1:nk[j],] + bj = -rep(0,nk[j]) vj = vreg[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv sxj = sx[vars[j]] @@ -428,10 +428,10 @@ larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","ai vj = vj / mj # Standardize (divide by norm of vj) sign[j] = sign(sum(vj*y)) vj = sign[j] * vj - Gj = rbind(G,vj) - uj = c(u,0) + Aj = -rbind(G,vj) + bj = -c(u,0) - limits.info = TG.limits(y, -Gj, -uj, vj, Sigma=diag(rep(sigma^2, n))) + limits.info = TG.limits(y, Aj, bj, vj, Sigma=diag(rep(sigma^2, n))) a = TG.pvalue.base(limits.info, bits=bits) pv[j] = a$pv From 7040ad440f72fc0784c118c00c9c705945c65001 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 22 Sep 2017 15:41:08 -0700 Subject: [PATCH 246/396] copy of CRAN version 1.2.3 in selectiveInference-currentCRAN --- selectiveInference-currentCRAN/DESCRIPTION | 18 + selectiveInference-currentCRAN/MD5 | 35 + selectiveInference-currentCRAN/NAMESPACE | 45 + .../R/funs.common.R | 186 ++++ selectiveInference-currentCRAN/R/funs.fixed.R | 218 +++++ .../R/funs.fixedCox.R | 120 +++ .../R/funs.fixedLogit.R | 151 ++++ selectiveInference-currentCRAN/R/funs.fs.R | 482 +++++++++++ .../R/funs.groupfs.R | 794 ++++++++++++++++++ selectiveInference-currentCRAN/R/funs.inf.R | 299 +++++++ selectiveInference-currentCRAN/R/funs.lar.R | 632 ++++++++++++++ .../R/funs.manymeans.R | 196 +++++ selectiveInference-currentCRAN/R/funs.max.R | 84 ++ .../R/funs.quadratic.R | 257 ++++++ .../man/estimateSigma.Rd | 64 ++ .../man/factorDesign.Rd | 30 + .../man/fixedLassoInf.Rd | 252 ++++++ .../man/forwardStop.Rd | 55 ++ selectiveInference-currentCRAN/man/fs.Rd | 95 +++ selectiveInference-currentCRAN/man/fsInf.Rd | 135 +++ selectiveInference-currentCRAN/man/groupfs.Rd | 49 ++ .../man/groupfsInf.Rd | 31 + selectiveInference-currentCRAN/man/lar.Rd | 95 +++ selectiveInference-currentCRAN/man/larInf.Rd | 140 +++ .../man/manyMeans.Rd | 60 ++ selectiveInference-currentCRAN/man/plot.fs.Rd | 42 + .../man/plot.lar.Rd | 46 + .../man/predict.fs.Rd | 49 ++ .../man/predict.groupfs.Rd | 24 + .../man/predict.lar.Rd | 52 ++ .../man/scaleGroups.Rd | 28 + .../man/selectiveInference-internal.Rd | 24 + .../man/selectiveInference.Rd | 205 +++++ .../src/matrixcomps.c | 266 ++++++ .../src/selinf_init.c | 24 + .../src/truncnorm.c | 188 +++++ 36 files changed, 5471 insertions(+) create mode 100644 selectiveInference-currentCRAN/DESCRIPTION create mode 100644 selectiveInference-currentCRAN/MD5 create mode 100644 selectiveInference-currentCRAN/NAMESPACE create mode 100644 selectiveInference-currentCRAN/R/funs.common.R create mode 100644 selectiveInference-currentCRAN/R/funs.fixed.R create mode 100644 selectiveInference-currentCRAN/R/funs.fixedCox.R create mode 100644 selectiveInference-currentCRAN/R/funs.fixedLogit.R create mode 100644 selectiveInference-currentCRAN/R/funs.fs.R create mode 100644 selectiveInference-currentCRAN/R/funs.groupfs.R create mode 100644 selectiveInference-currentCRAN/R/funs.inf.R create mode 100644 selectiveInference-currentCRAN/R/funs.lar.R create mode 100644 selectiveInference-currentCRAN/R/funs.manymeans.R create mode 100644 selectiveInference-currentCRAN/R/funs.max.R create mode 100644 selectiveInference-currentCRAN/R/funs.quadratic.R create mode 100644 selectiveInference-currentCRAN/man/estimateSigma.Rd create mode 100644 selectiveInference-currentCRAN/man/factorDesign.Rd create mode 100644 selectiveInference-currentCRAN/man/fixedLassoInf.Rd create mode 100644 selectiveInference-currentCRAN/man/forwardStop.Rd create mode 100644 selectiveInference-currentCRAN/man/fs.Rd create mode 100644 selectiveInference-currentCRAN/man/fsInf.Rd create mode 100644 selectiveInference-currentCRAN/man/groupfs.Rd create mode 100644 selectiveInference-currentCRAN/man/groupfsInf.Rd create mode 100644 selectiveInference-currentCRAN/man/lar.Rd create mode 100644 selectiveInference-currentCRAN/man/larInf.Rd create mode 100644 selectiveInference-currentCRAN/man/manyMeans.Rd create mode 100644 selectiveInference-currentCRAN/man/plot.fs.Rd create mode 100644 selectiveInference-currentCRAN/man/plot.lar.Rd create mode 100644 selectiveInference-currentCRAN/man/predict.fs.Rd create mode 100644 selectiveInference-currentCRAN/man/predict.groupfs.Rd create mode 100644 selectiveInference-currentCRAN/man/predict.lar.Rd create mode 100644 selectiveInference-currentCRAN/man/scaleGroups.Rd create mode 100644 selectiveInference-currentCRAN/man/selectiveInference-internal.Rd create mode 100644 selectiveInference-currentCRAN/man/selectiveInference.Rd create mode 100644 selectiveInference-currentCRAN/src/matrixcomps.c create mode 100644 selectiveInference-currentCRAN/src/selinf_init.c create mode 100644 selectiveInference-currentCRAN/src/truncnorm.c diff --git a/selectiveInference-currentCRAN/DESCRIPTION b/selectiveInference-currentCRAN/DESCRIPTION new file mode 100644 index 00000000..0eb38f7a --- /dev/null +++ b/selectiveInference-currentCRAN/DESCRIPTION @@ -0,0 +1,18 @@ +Package: selectiveInference +Type: Package +Title: Tools for Post-Selection Inference +Version: 1.2.3 +Date: 2017-09-18 +Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, + Joshua Loftus, Stephen Reid +Maintainer: Rob Tibshirani +Depends: glmnet, intervals, survival +Suggests: Rmpfr +Description: New tools for post-selection inference, for use + with forward stepwise regression, least angle regression, the + lasso, and the many means problem. The lasso function implements Gaussian, logistic and Cox survival models. +License: GPL-2 +NeedsCompilation: yes +Packaged: 2017-09-19 23:49:11 UTC; tibs +Repository: CRAN +Date/Publication: 2017-09-20 03:14:10 UTC diff --git a/selectiveInference-currentCRAN/MD5 b/selectiveInference-currentCRAN/MD5 new file mode 100644 index 00000000..66755467 --- /dev/null +++ b/selectiveInference-currentCRAN/MD5 @@ -0,0 +1,35 @@ +89792835188231f03f117ab143c2fe46 *DESCRIPTION +5b8e448cf043849e190d2b71898eaad9 *NAMESPACE +9c5c032cb17908e6dea15a0b89d649a9 *R/funs.common.R +cf1d1199cf6cacb1d54fce08388d20cc *R/funs.fixed.R +faf5eed09c13d3e80270d82305f0b348 *R/funs.fixedCox.R +25e9f2957b4cbac8b11a283c69533f64 *R/funs.fixedLogit.R +c7af51c32236ef56a6ed0a525f52dce4 *R/funs.fs.R +fc41d0af77330bde0395f438c117c7d8 *R/funs.groupfs.R +632c61c8fc3da59cde6b337f7d4341a4 *R/funs.inf.R +dba7bfb08c9184569d97c14a0575c5a1 *R/funs.lar.R +ed45e9aa5e6383ff9888b35af9b30e9e *R/funs.manymeans.R +bd535e32d32e9cd0e723a5f9f00d9eef *R/funs.max.R +6daca48218e58720c1570784706c199a *R/funs.quadratic.R +d1db3866e82ad6e33baef9da4d994833 *man/estimateSigma.Rd +1747e0899ef985469ae560fb828755cb *man/factorDesign.Rd +1028942deac2fd45aaf2e49d94aa6dac *man/fixedLassoInf.Rd +60e2065f446f1d6dc11c77a5534580bc *man/forwardStop.Rd +2e6f87cd38e1f4b4cb60bfc8299dc1f4 *man/fs.Rd +1483067f07f71b2d996138877e4f48ef *man/fsInf.Rd +7d5ca8ce0ff81cf5f0e87cadffa29229 *man/groupfs.Rd +5ccec019c69b4832438b79830649e730 *man/groupfsInf.Rd +61bdaa3e5ac7bbe02d55f42530edf956 *man/lar.Rd +b25bc2d93c0b266dbec45d82a5d05004 *man/larInf.Rd +4da84515659e7a70fb7375dc2c791b4b *man/manyMeans.Rd +c7c96850986be5e1203cca414a410a32 *man/plot.fs.Rd +3dc4100747d7e72276a75c8e6beba37c *man/plot.lar.Rd +192e0031a10ace23df79a314cf90c648 *man/predict.fs.Rd +588230513bd05fd139c75d45f94a7cd6 *man/predict.groupfs.Rd +0b477548ac30e902eca27163a947e2ca *man/predict.lar.Rd +b275e61a2976d14595dc9dfea646675e *man/scaleGroups.Rd +0c21e5414145f4841c3897c995dad4c2 *man/selectiveInference-internal.Rd +b0bbe4ffe6e958215a85bb15fc43ab01 *man/selectiveInference.Rd +4313aa781953d7f1f6e75383e938e1c7 *src/matrixcomps.c +d7f4c478a9de5716b2da338ae6da2ea5 *src/selinf_init.c +11b2e6c34bc1ed181b407fc658a3b0af *src/truncnorm.c diff --git a/selectiveInference-currentCRAN/NAMESPACE b/selectiveInference-currentCRAN/NAMESPACE new file mode 100644 index 00000000..cf2f3b84 --- /dev/null +++ b/selectiveInference-currentCRAN/NAMESPACE @@ -0,0 +1,45 @@ +export(lar,fs, + larInf,fsInf, + coef.lar,coef.fs, + predict.lar,predict.fs, + print.lar,print.fs, + print.larInf,print.fsInf, + plot.lar,plot.fs, + fixedLassoInf,print.fixedLassoInf, +# fixedLogitLassoInf,print.fixedLogitLassoInf, +# fixedCoxLassoInf,print.fixedCoxLassoInf, + forwardStop, + estimateSigma, + manyMeans,print.manyMeans, + groupfs,groupfsInf, + scaleGroups,factorDesign + ) + +S3method("coef", "lar") +S3method("predict", "lar") +S3method("print", "lar") +S3method("plot", "lar") +S3method("print", "larInf") +S3method("coef", "fs") +S3method("predict", "fs") +S3method("print", "fs") +S3method("plot", "fs") +S3method("print", "fsInf") +S3method("print", "fixedLassoInf") +S3method("print", "fixedLogitLassoInf") +S3method("print", "fixedCoxLassoInf") +S3method("print", "manyMeans") +S3method("print", "groupfs") +S3method("print", "groupfsInf") + +useDynLib("selectiveInference",.registration=TRUE) +import(glmnet) +import(intervals) +import(survival) +importFrom("graphics", abline, axis, matplot) +importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, + qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) +importFrom("stats", "coef", "df", "lm", "pf") +importFrom("stats", "glm", "residuals", "vcov") + + diff --git a/selectiveInference-currentCRAN/R/funs.common.R b/selectiveInference-currentCRAN/R/funs.common.R new file mode 100644 index 00000000..59457008 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.common.R @@ -0,0 +1,186 @@ +# Special linear time order function, works only when x +# is a scrambled vector of integers. + +Order <- function(x) { + n = length(x) + o = numeric(n) + o[x] = Seq(1,n) + return(o) +} + +# Returns a sequence of integers from a to b if a <= b, +# otherwise nothing. You have no idea how important this +# function is... + +Seq <- function(a, b, ...) { + if (a<=b) return(seq(a,b,...)) + else return(numeric(0)) +} + +# Returns the sign of x, with Sign(0)=1. + +Sign <- function(x) { + return(-1+2*(x>=0)) +} + +############################## + +# Centering and scaling convenience function + +standardize <- function(x, y, intercept, normalize) { + x = as.matrix(x) + y = as.numeric(y) + n = nrow(x) + p = ncol(x) + + if (intercept) { + bx = colMeans(x) + by = mean(y) + x = scale(x,bx,FALSE) + y = y-mean(y) + } else { + bx = rep(0,p) + by = 0 + } + if (normalize) { + sx = sqrt(colSums(x^2)) + x = scale(x,FALSE,sx) + } else { + sx = rep(1,p) + } + + return(list(x=x,y=y,bx=bx,by=by,sx=sx)) +} + +############################## + +# Interpolation function to get coefficients + +coef.interpolate <- function(betas, s, knots, dec=TRUE) { + # Sort the s values + o = order(s,dec=dec) + s = s[o] + + k = length(s) + mat = matrix(rep(knots,each=k),nrow=k) + if (dec) b = s >= mat + else b = s <= mat + blo = max.col(b,ties.method="first") + bhi = pmax(blo-1,1) + + i = bhi==blo + p = numeric(k) + p[i] = 0 + p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] + + beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) + colnames(beta) = as.character(round(s,3)) + rownames(beta) = NULL + + # Return in original order + o = order(o) + return(beta[,o,drop=FALSE]) +} + +############################## + +checkargs.xy <- function(x, y) { + if (missing(x)) stop("x is missing") + if (is.null(x) || !is.matrix(x)) stop("x must be a matrix") + if (missing(y)) stop("y is missing") + if (is.null(y) || !is.numeric(y)) stop("y must be numeric") + if (ncol(x) == 0) stop("There must be at least one predictor [must have ncol(x) > 0]") + if (checkcols(x)) stop("x cannot have duplicate columns") + if (length(y) == 0) stop("There must be at least one data point [must have length(y) > 0]") + if (length(y)!=nrow(x)) stop("Dimensions don't match [length(y) != nrow(x)]") +} + +checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, + gridrange=NULL, gridpts=NULL, griddepth=NULL, + mult=NULL, ntimes=NULL, + beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, + bh.q=NULL) { + + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") + if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") + if (!is.null(k) && length(k) != 1) stop("k must be a single number") + if (!is.null(k) && (k < 1 || k != floor(k))) stop("k must be an integer >= 1") + if (!is.null(gridrange) && (length(gridrange) != 2 || gridrange[1] > gridrange[2])) + stop("gridrange must be an interval of the form c(a,b) with a <= b") + if (!is.null(gridpts) && (gridpts < 20 || gridpts != round(gridpts))) + stop("gridpts must be an integer >= 20") + if (!is.null(griddepth) && (griddepth > 10 || griddepth != round(griddepth))) + stop("griddepth must be an integer <= 10") + if (!is.null(mult) && mult < 0) stop("mult must be >= 0") + if (!is.null(ntimes) && (ntimes <= 0 || ntimes != round(ntimes))) + stop("ntimes must be an integer > 0") + if (!is.null(beta) && sum(beta!=0)==0) stop("Value of lambda too large, beta is zero") + if (!is.null(lambda) && length(lambda) != 1) stop("lambda must be a single number") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >=0") + if (!is.null(tol.beta) && tol.beta <= 0) stop("tol.beta must be > 0") + if (!is.null(tol.kkt) && tol.kkt <= 0) stop("tol.kkt must be > 0") +} + +# Make sure that no two columms of A are the same +# (this works with probability one). + +checkcols <- function(A) { + b = rnorm(nrow(A)) + a = sort(t(A)%*%b) + if (any(diff(a)==0)) return(TRUE) + return(FALSE) +} + +estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { + checkargs.xy(x,rep(0,nrow(x))) + if(nrow(x)<10) stop("Number of observations must be at least 10 to run estimateSigma") + cvfit=cv.glmnet(x,y,intercept=intercept,standardize=standardize) + lamhat=cvfit$lambda.min + fit=glmnet(x,y,standardize=standardize) + yhat=predict(fit,x,s=lamhat) + nz=sum(predict(fit,s=lamhat, type="coef")!=0) + sigma=sqrt(sum((y-yhat)^2)/(length(y)-nz-1)) + return(list(sigmahat=sigma, df=nz)) +} + +# Update the QR factorization, after a column has been +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. + +updateQR <- function(Q1,Q2,R,col) { + m = nrow(Q1) + n = ncol(Q1) + k = ncol(Q2) + + a = .C("update1", + Q2=as.double(Q2), + w=as.double(t(Q2)%*%col), + m=as.integer(m), + k=as.integer(k), + dup=FALSE, + package="selectiveInference") + + Q2 = matrix(a$Q2,nrow=m) + w = c(t(Q1)%*%col,a$w) + + # Re-structure: delete a column from Q2, add one to + # Q1, and expand R + Q1 = cbind(Q1,Q2[,1]) + Q2 = Q2[,-1,drop=FALSE] + R = rbind(R,rep(0,n)) + R = cbind(R,w[Seq(1,n+1)]) + + return(list(Q1=Q1,Q2=Q2,R=R)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} diff --git a/selectiveInference-currentCRAN/R/funs.fixed.R b/selectiveInference-currentCRAN/R/funs.fixed.R new file mode 100644 index 00000000..b30d04ce --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fixed.R @@ -0,0 +1,218 @@ +# Lasso inference function (for fixed lambda). Note: here we are providing inference +# for the solution of +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 + +fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, +sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + + family = match.arg(family) + this.call = match.call() + type = match.arg(type) + + if(family=="binomial") { + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } +else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + +else{ + + + + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (length(beta) != p) stop("Since family='gaussian', beta must have length equal to ncol(x)") + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) + warning(paste("type='full' does not make sense when p > n;", + "switching to type='partial'")) + + if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + } + else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + } + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + a = poly.pval(y,G,u,vj,sigma,bits) + pv[j] = a$pv + vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) + + a = poly.int(y,G,u,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call) + class(out) = "fixedLassoInf" + return(out) +} +} + +############################# + + +fixedLasso.poly= +function(x, y, beta, lambda, a) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + +# P = diag(1,nrow(xa)) - xa %*% xap + #NOTE: inactive constraints not needed below! + + G = -rbind( + # 1/lambda * t(xac) %*% P, + # -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + # 1 - t(xac) %*% t(xap) %*% za, + # 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + + return(list(G=G,u=u)) +} + +############################## + +print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$coef0 / x$sd,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + +#estimateLambda <- function(x, sigma, nsamp=1000){ +# checkargs.xy(x,rep(0,nrow(x))) +# if(nsamp < 10) stop("More Monte Carlo samples required for estimation") +# if (length(sigma)!=1) stop("sigma should be a number > 0") + # if (sigma<=0) stop("sigma should be a number > 0") + + # n = nrow(x) + # eps = sigma*matrix(rnorm(nsamp*n),n,nsamp) + # lambda = 2*mean(apply(t(x)%*%eps,2,max)) + # return(lambda) +#} + diff --git a/selectiveInference-currentCRAN/R/funs.fixedCox.R b/selectiveInference-currentCRAN/R/funs.fixedCox.R new file mode 100644 index 00000000..ff778d99 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fixedCox.R @@ -0,0 +1,120 @@ +fixedCoxLassoInf=function(x,y,status,beta,lambda,alpha=.1, type=c("partial"),tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + + checkargs.xy(x,y) + if(is.null(status)) stop("Must supply `status' argument") +if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have values 0 or 1") + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + n=nrow(x) + p=ncol(x) + nvar=sum(beta!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + + + m=beta!=0 +vars=which(m) +if(sum(m)>0){ + bhat=beta[beta!=0] #penalized coefs just for active variables + s2=sign(bhat) + + #check KKT + + aaa=coxph(Surv(y,status)~x[,m],init=bhat,iter.max=0) # this gives the Cox model at exactly bhat + # so when we compute gradient and score + # we are evaluating at the LASSO solution + # naming of variables could be improved... + res=residuals(aaa,type="score") +if(!is.matrix(res)) res=matrix(res,ncol=1) +scor=colSums(res) + g=(scor+lambda*s2)/(2*lambda) +# cat(c(g,lambda,tol.kkt),fill=T) + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + +# Hessian of partial likelihood at the LASSO solution +MM=vcov(aaa) + +bbar=(bhat+lambda*MM%*%s2) +A1=-(mydiag(s2)) +b1= -(mydiag(s2)%*%MM)%*%s2*lambda + + temp=max(A1%*%bbar-b1) + + +# compute p-values + +# JT: are we sure the signs of these are correctly handled? +# two sided p-values numerically agree with python but +# the one sided p-values are a bit off + + for(jj in 1:length(bbar)){ + vj=rep(0,length(bbar));vj[jj]=s2[jj] + + + junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) + + pv[jj] = junk$pv + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + + junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha) + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + + } + # JT: these don't seem to be the real one-step estimators + fit0=coxph(Surv(y,status)~x[,m]) + coef0=fit0$coef + se0=sqrt(diag(fit0$var)) + zscore0=coef0/se0 + + out = list(lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call) + class(out) = "fixedCoxLassoInf" +} +return(out) +} + + + +print.fixedCoxLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + diff --git a/selectiveInference-currentCRAN/R/funs.fixedLogit.R b/selectiveInference-currentCRAN/R/funs.fixedLogit.R new file mode 100644 index 00000000..5b673546 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fixedLogit.R @@ -0,0 +1,151 @@ + +fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + + type = match.arg(type) + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + checkargs.misc(beta=beta,lambda=lambda,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + + n=length(y) + p=ncol(x) + # I assume that intcpt was used + if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") + nvar=sum(beta[-1]!=0) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + +#do we need to worry about standardization? + +# obj = standardize(x,y,TRUE,FALSE) + # x = obj$x + # y = obj$y + + m=beta[-1]!=0 #active set + + bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars + s2=sign(bhat) + lam2m=diag(c(0,rep(lambda,sum(m)))) + + + xxm=cbind(1,x[,m]) + + etahat = xxm %*% bhat + prhat = as.vector(exp(etahat) / (1 + exp(etahat))) + ww=prhat*(1-prhat) + # w=diag(ww) + +#check KKT + z=etahat+(y-prhat)/ww + # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda + g=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda + if (any(abs(g) > 1+tol.kkt) ) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[-1][vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + #constraints for active variables + # MM=solve(t(xxm)%*%w%*%xxm) + MM=solve(scale(t(xxm),F,1/ww)%*%xxm) + gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized + # at exact LASSO solution it should be s2[-1] + dbeta = MM %*% gm + + # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... + bbar = bhat - dbeta + + A1=-(mydiag(s2))[-1,] + b1= (s2 * dbeta)[-1] + + tol.poly = 0.01 + if (max((A1 %*% bbar) - b1) > tol.poly) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + + + for(jj in 1:sum(m)){ + vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] + # compute p-values + junk=mypoly.pval.lee(bbar,A1,b1,vj,MM) + pv[jj] = junk$pv + + vlo[jj]=junk$vlo + vup[jj]=junk$vup + sd[jj]=junk$sd + # junk2=mypoly.int.lee(bbar[-1], A1, b1,vj,MM[-1,-1],alpha=.1) + junk2=mypoly.int.lee(bbar,vj,vlo[jj],vup[jj],sd[jj],alpha=.1) + + ci[jj,]=junk2$int + tailarea[jj,] = junk2$tailarea + } + + # JT: these are not the one step estimators but they are close + fit0=glm(y~x[,m],family="binomial") + sfit0=summary(fit0) + coef0=bbar[-1] #fit0$coef[-1] + se0=sqrt(diag(MM)[-1]) # sfit0$cov.scaled)[-1]) + zscore0=coef0/se0 + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call, + info.matrix=MM) # info.matrix is output just for debugging purposes at the moment + class(out) = "fixedLogitLassoInf" + return(out) + + } + + + +print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", + ifelse(x$type=="partial","partial","full"))) + invisible() +} + + + diff --git a/selectiveInference-currentCRAN/R/funs.fs.R b/selectiveInference-currentCRAN/R/funs.fs.R new file mode 100644 index 00000000..b5ee511b --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.fs.R @@ -0,0 +1,482 @@ +# We compute the forward stepwise regression (FS) path given +# a response vector y and predictor matrix x. We assume +# that x has columns in general position. + +fs <- function(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, + verbose=FALSE) { + + this.call = match.call() + checkargs.xy(x=x,y=y) + + # Center and scale, etc. + obj = standardize(x,y,intercept,normalize) + x = obj$x + y = obj$y + bx = obj$bx + by = obj$by + sx = obj$sx + n = nrow(x) + p = ncol(x) + + ##### + # To keep consistent with the lar function, we parametrize + # so that the first step has all zero coefficients, + # Also, an interesting note: the effective "lambda" (maximal + # correlation with the residual) may increase with stepwise! + # So we don't keep track of it + + ##### + # Find the first variable to enter and its sign + working_x = scale(x,center=F,scale=sqrt(colSums(x^2))) + score = t(working_x)%*%y + i_hit = which.max(abs(score)) # Hitting coordinate + sign_hit = Sign(score[i_hit]) # Sign + signs = sign_hit # later signs will be appended to `signs` + + if (verbose) { + cat(sprintf("1. Adding variable %i, |A|=%i...",i_hit,1)) + } + + # Now iteratively find the new FS estimates + + # Things to keep track of, and return at the end + # JT: I guess the "buf" just saves us from making huge + # matrices we don't need? + + buf = min(maxsteps,500) + action = numeric(buf) # Actions taken + df = numeric(buf) # Degrees of freedom + beta = matrix(0,p,buf) # FS estimates + + action[1] = i_hit + df[1] = 0 + beta[,1] = 0 + + # Gamma matrix! + gbuf = max(2*p*3,2000) # Space for 3 steps, at least + gi = 0 # index into rows of Gamma matrix + + Gamma = matrix(0,gbuf,n) + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]+working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(sign_hit*working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-1 + Gamma[gi+1,] = t(sign_hit*working_x[,i_hit]); gi = gi+1 + + # nconstraint + nconstraint = numeric(buf) + vreg = matrix(0,buf,n) + nconstraint[1] = gi + vreg[1,] = sign_hit*x[,i_hit] / sum(x[,i_hit]^2) + + # Other things to keep track of, but not return + r = 1 # Size of active set + A = i_hit # Active set -- JT: isn't this basically the same as action? + I = Seq(1,p)[-i_hit] # Inactive set + X_active = x[,i_hit,drop=FALSE] # Matrix X[,A] + X_inactive = x[,-i_hit,drop=FALSE] # Matrix X[,I] + k = 2 # What step are we at? + # JT Why keep track of r and k instead of just saying k=r+1? + + # Compute a skinny QR decomposition of X_active + # JT: obs was used as variable name above -- this is something different, no? + # changed it to qr_X + + qr_X = qr(X_active) + Q = qr.Q(qr_X,complete=TRUE) + Q_active = Q[,1,drop=FALSE]; + Q_inactive = Q[,-1,drop=FALSE] + R = qr.R(qr_X) + + # Throughout the algorithm, we will maintain + # the decomposition X_active = Q_active*R. Dimensions: + # X_active: n x r + # Q_active: n x r + # Q_inactive: n x (n-r) + # R: r x r + + while (k<=maxsteps) { + ########## + # Check if we've reached the end of the buffer + if (k > length(action)) { + buf = length(action) + action = c(action,numeric(buf)) + df = c(df,numeric(buf)) + beta = cbind(beta,matrix(0,p,buf)) + nconstraint = c(nconstraint,numeric(buf)) + vreg = rbind(vreg,matrix(0,buf,n)) + } + + # Key quantities for the next entry + keepLs=backsolve(R,t(Q_active)%*%X_inactive) + X_inactive_resid = X_inactive - X_active %*% keepLs + working_x = scale(X_inactive_resid,center=F,scale=sqrt(colSums(X_inactive_resid^2))) + score = as.numeric(t(working_x)%*%y) + + # If the inactive set is empty, nothing will hit + if (r==min(n-intercept,p)) break + + # Otherwise find the next hitting time + else { + sign_score = Sign(score) + abs_score = sign_score * score + i_hit = which.max(abs_score) + sign_hit = sign_score[i_hit] + } + + # Record the solution + # what is the difference between "action" and "A"? + + action[k] = I[i_hit] + df[k] = r + beta[A,k] = backsolve(R,t(Q_active)%*%y) + + # Gamma matrix! + if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) + working_x = t(sign_score*t(working_x)) + Gamma[gi+Seq(1,p-r),] = t(working_x); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(working_x[,i_hit]-working_x[,-i_hit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(working_x[,i_hit]); gi = gi+1 + + # nconstraint, regression contrast + nconstraint[k] = gi + vreg[k,] = sign_hit*X_inactive_resid[,i_hit] / sum(X_inactive_resid[,i_hit]^2) + + # Update all of the variables + r = r+1 + A = c(A,I[i_hit]) + I = I[-i_hit] + signs = c(signs,sign_hit) + X_active = cbind(X_active,X_inactive[,i_hit]) + X_inactive = X_inactive[,-i_hit,drop=FALSE] + + # Update the QR decomposition + updated_qr = updateQR(Q_active,Q_inactive,R,X_active[,r]) + Q_active = updated_qr$Q1 + + # JT: why do we store Q_inactive? Doesn't seem to be used. + Q_inactive = updated_qr$Q2 + R = updated_qr$R + + if (verbose) { + cat(sprintf("\n%i. Adding variable %i, |A|=%i...",k,A[r],r)) + } + + # Step counter + k = k+1 + } + + # Trim + action = action[Seq(1,k-1)] + df = df[Seq(1,k-1),drop=FALSE] + beta = beta[,Seq(1,k-1),drop=FALSE] + Gamma = Gamma[Seq(1,gi),,drop=FALSE] + nconstraint = nconstraint[Seq(1,k-1)] + vreg = vreg[Seq(1,k-1),,drop=FALSE] + + # If we reached the maximum number of steps + if (k>maxsteps) { + if (verbose) { + cat(sprintf("\nReached the maximum number of steps (%i),",maxsteps)) + cat(" skipping the rest of the path.") + } + completepath = FALSE + bls = NULL + } + + # Otherwise, note that we completed the path + else { + completepath = TRUE + + # Record the least squares solution. Note that + # we have already computed this + bls = rep(0,p) + if(length(keepLs)>0) bls[A] = keepLs + } + + if (verbose) cat("\n") + + # Adjust for the effect of centering and scaling + if (intercept) df = df+1 + if (normalize) beta = beta/sx + if (normalize && completepath) bls = bls/sx + + # Assign column names + colnames(beta) = as.character(Seq(1,k-1)) + + out = list(action=action,sign=signs,df=df,beta=beta, + completepath=completepath,bls=bls, + Gamma=Gamma,nconstraint=nconstraint,vreg=vreg,x=x,y=y,bx=bx,by=by,sx=sx, + intercept=intercept,normalize=normalize,call=this.call) + class(out) = "fs" + return(out) +} + +############################## + +# Coefficient function for fs + +coef.fs <- function(object, s, ...) { + if (object$completepath) { + k = length(object$action)+1 + beta = cbind(object$beta,object$bls) + } else { + k = length(object$action) + beta = object$beta + } + + if (min(s)<0 || max(s)>k) stop(sprintf("s must be between 0 and %i",k)) + knots = 1:k + dec = FALSE + return(coef.interpolate(beta,s,knots,dec)) +} + +# Prediction function for fs + +predict.fs <- function(object, newx, s, ...) { + beta = coef.fs(object,s) + if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) + else newx = scale(newx,object$bx,FALSE) + return(newx %*% beta + object$by) +} + +############################## + +# FS inference function + +fsInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.misc(sigma=sigma,alpha=alpha,k=k, + gridrange=gridrange,mult=mult,ntimes=ntimes) + if (class(obj) != "fs") stop("obj must be an object of class fs") + if (is.null(k) && type=="active") k = length(obj$action) + if (is.null(k) && type=="all") stop("k must be specified when type = all") + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + G = obj$Gamma + nconstraint = obj$nconstraint + sx = obj$sx + + if (is.null(sigma)) { + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + khat = NULL + + if (type == "active") { + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + vreg = obj$vreg[1:k,,drop=FALSE] + sign = obj$sign[1:k] + vars = obj$action[1:k] + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + Gj = G[1:nconstraint[j],] + uj = rep(0,nconstraint[j]) + vj = vreg[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + + khat = forwardStop(pv,alpha) + } + + else { + if (type == "aic") { + out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) + khat = out$khat + m = out$stopped * ntimes + G = rbind(out$G,G[1:nconstraint[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nconstraint[khat+m])) # (if we need to) + kk = khat + } + else { + G = G[1:nconstraint[k],] + u = rep(0,nconstraint[k]) + kk = k + } + + pv = vlo = vup = numeric(kk) + vmat = matrix(0,kk,n) + ci = tailarea = matrix(0,kk,2) + sign = numeric(kk) + vars = obj$action[1:kk] + xa = x[,vars] + M = pinv(crossprod(xa)) %*% t(xa) + + for (j in 1:kk) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + Gj = rbind(G,vj) + uj = c(u,0) + + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + } + + # JT: why do we output vup, vlo? Are they used somewhere else? + + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "fsInf" + return(out) +} + +############################## + + +############################## + +print.fs <- function(x, ...) { + cat("\nCall:\n") + dput(x$call) + + cat("\nSequence of FS moves:\n") + nsteps = length(x$action) + tab = cbind(1:nsteps,x$action,x$sign) + colnames(tab) = c("Step","Var","Sign") + rownames(tab) = rep("",nrow(tab)) + print(tab) + invisible() +} + +print.fsInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + if (x$type == "active") { + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) + tab = cbind(1:length(x$pv),x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", + "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) + } + + else if (x$type == "all") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + } + + else if (x$type == "aic") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + } + + invisible() +} + + +plot.fs <- function(x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) { + if (x$completepath) { + k = length(x$action)+1 + beta = cbind(x$beta,x$bls) + } else { + k = length(x$action) + beta = x$beta + } + p = nrow(beta) + + xx = 1:k + xlab = "Step" + + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA + } + + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), + xlab=xlab,ylab="Coefficients",main="Forward stepwise path",...) + abline(h=0,lwd=2) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + invisible() +} + diff --git a/selectiveInference-currentCRAN/R/funs.groupfs.R b/selectiveInference-currentCRAN/R/funs.groupfs.R new file mode 100644 index 00000000..b2c04472 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.groupfs.R @@ -0,0 +1,794 @@ +#' Select a model with forward stepwise. +#' +#' This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +#' +#' @param x Matrix of predictors (n by p). +#' @param y Vector of outcomes (length n). +#' @param index Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups. +#' @param maxsteps Maximum number of steps for forward stepwise. +#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion. +#' @param k Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}. +#' @param intercept Should an intercept be included in the model? Default is TRUE. Does not count as a step. +#' @param center Should the columns of the design matrix be centered? Default is TRUE. +#' @param normalize Should the design matrix be normalized? Default is TRUE. +#' @param aicstop Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}. +#' @param verbose Print out progress along the way? Default is FALSE. +#' @return An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. +#' @examples +#' x = matrix(rnorm(20*40), nrow=20) +#' index = sort(rep(1:20, 2)) +#' y = rnorm(20) + 2 * x[,1] - x[,4] +#' fit = groupfs(x, y, index, maxsteps = 5) +#' out = groupfsInf(fit) +#' out +#' @seealso \code{\link{groupfsInf}}, \code{\link{factorDesign}}. +groupfs <- function(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) { + + if (missing(index)) stop("Missing argument: index.") + p <- ncol(x) + n <- nrow(x) + + # Group labels + labels <- unique(index) + G <- length(labels) + inactive <- labels + active <- c() + + if (missing(maxsteps) || maxsteps >= min(n, G)) maxsteps <- min(n-1, G) + checkargs.xy(x=x, y=y) + checkargs.groupfs(x, index, maxsteps) + if (maxsteps > G) stop("maxsteps is larger than number of groups") + gsizes <- sort(rle(sort(index))$lengths, decreasing = TRUE) + if (sum(gsizes[1:maxsteps]) >= nrow(x)) { + maxsteps <- max(which(cumsum(gsizes) < nrow(x))) + warning(paste("If the largest groups are included the model will be saturated/overdetermined. To prevent this maxsteps has been changed to", maxsteps)) + } + + # Initialize copies of data for loop + by <- mean(y) + y.update <- y + if (intercept) y.update <- y - by + y.last <- y.update + + # Center and scale design matrix + xscaled <- scaleGroups(x, index, center, normalize) + xm <- xscaled$xm + xs <- xscaled$xs + x.update <- xscaled$x + + x.begin <- x.update + y.begin <- y.update + stopped <- FALSE + # Store all projections computed along the path + terms = projections = maxprojs = aicpens = maxpens = cumprojs = vector("list", maxsteps) + + # Store other information from each step + path.info <- data.frame(imax=integer(maxsteps), df=integer(maxsteps), AIC=numeric(maxsteps), RSS=numeric(maxsteps), RSSdrop=numeric(maxsteps), chisq=numeric(maxsteps)) + + modelrank <- as.numeric(intercept) + if (is.null(sigma)) { + modelrank <- modelrank + 1 + aic.begin <- aic.last <- n*(log(2*pi) + log(mean(y.update^2))) + k * (n + modelrank) + } else { + aic.begin <- aic.last <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + if (verbose) print(paste0("Start: AIC=", round(aic.begin, 3)), quote = FALSE) + + # Begin main loop + for (step in 1:maxsteps) { + + added <- add1.groupfs(x.update, y.update, index, labels, inactive, k, sigma) + + # Group to be added + imax <- added$imax + inactive <- setdiff(inactive, imax) + active <- union(active, imax) + inactive.inds <- which(!index %in% active) + + # Rank of group + modelrank <- modelrank + added$df + + # Stop without adding if model has become saturated + if (modelrank >= n) { + stop("Saturated model. Abandon ship!") + } + + # Regress added group out of y and inactive x + P.imax <- added$maxproj %*% t(added$maxproj) + P.imax <- diag(rep(1, n)) - P.imax + y.update <- P.imax %*% y.update + x.update[, inactive.inds] <- P.imax %*% x.update[, inactive.inds] + + # Compute AIC + if (is.null(sigma)) { + added$AIC <- n * log(added$maxterm/n) - k * added$df + n*log(2*pi) + k * (n + modelrank) + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + + projections[[step]] <- added$projections + maxprojs[[step]] <- added$maxproj + aicpens[[step]] <- added$aicpens + maxpens[[step]] <- added$maxpen + if (step == 1) cumprojs[[step]] <- P.imax + if (step > 1) cumprojs[[step]] <- P.imax %*% cumprojs[[step-1]] + terms[[step]] <- added$terms + + # Compute RSS for unadjusted chisq p-values + added$RSS <- sum(y.update^2) + scale.chisq <- 1 + + added$RSSdrop <- sum((y.last - y.update)^2) + added$chisq <- pchisq(added$RSSdrop/scale.chisq, lower.tail=FALSE, df = added$df) + y.last <- y.update + + # Projections are stored separately + step.info <- data.frame(added[-c(3:(length(added)-4))]) + path.info[step, ] <- step.info + + if (verbose) print(round(step.info, 3)) + + if (aicstop > 0 && step < maxsteps && step >= aicstop && aic.last < added$AIC) { + if (all(diff(c(aic.begin, path.info$AIC)[(step+1-aicstop):(step+1)]) > 0)) { + + if (is.null(sigma)) { + added$AIC <- n * log(added$maxterm/n) - k * added$df + n + n*log(2*pi) + k * modelrank + } else { + added$AIC <- sum(y.update^2)/sigma^2 - n + k * modelrank + } + + path.info <- path.info[1:step, ] + projections[(step+1):maxsteps] <- NULL + maxprojs[(step+1):maxsteps] <- NULL + aicpens[(step+1):maxsteps] <- NULL + maxpens[(step+1):maxsteps] <- NULL + cumprojs[(step+1):maxsteps] <- NULL + terms[(step+1):maxsteps] <- NULL + maxsteps <- step + stopped <- TRUE + break + } + } + aic.last <- added$AIC + } + + # Is there a better way of doing this? + # Use some projections already computed? + beta <- coef(lm(y.begin ~ x.begin[,index %in% path.info$imax]-1)) + names(beta) <- index[index %in% path.info$imax] + + # Create output object + value <- list(action = path.info$imax, L = path.info$L, AIC = path.info$AIC, projections = projections, maxprojs = maxprojs, aicpens = aicpens, maxpens = maxpens, cumprojs = cumprojs, log = path.info, index = index, y = y.begin, x = x.begin, coefficients = beta, bx = xm, by = by, sx = xs, sigma = sigma, intercept = intercept, call = match.call(), terms = terms) + + class(value) <- "groupfs" + attr(value, "center") <- center + attr(value, "normalize") <- normalize + attr(value, "labels") <- labels + attr(value, "maxsteps") <- maxsteps + attr(value, "sigma") <- sigma + attr(value, "k") <- k + attr(value, "aicstop") <- aicstop + attr(value, "stopped") <- stopped + if (is.null(attr(x, "varnames"))) { + attr(value, "varnames") <- colnames(x) + } else { + attr(value, "varnames") <- attr(x, "varnames") + } + return(value) +} + +#' Add one group to the model in \code{groupfs}. +#' +#' For internal use by \code{\link{groupfs}}. +#' +#' @param xr Design matrix at current step. +#' @param yr Response vector residual at current step. +#' @param index Group membership indicator of length p. +#' @param labels The unique elements of \code{index}. +#' @param inactive Labels of inactive groups. +#' @param k Multiplier of model size penalty, use \code{k = 2} for AIC, \code{k = log(n)} for BIC, or \code{k = log(p)} for RIC. +#' @param sigma Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. See \code{\link{extractAIC}} for details. +#' @return Index \code{imax} of added group, value \code{L} of maximized negative AIC, lists of projection matrices defining quadratic model selection event. +add1.groupfs <- function(xr, yr, index, labels, inactive, k, sigma = NULL) { + + # Use characters to avoid issues where + # list() populates NULL lists in the positions + # of the active variables + ### Question for later: does this slow down lapply? + keys = as.character(inactive) + n <- nrow(xr) + + # Compute sums of squares to determine which group is added + # penalized by rank of group if k > 0 + projections = aicpens = terms = vector("list", length(keys)) + names(projections) = names(terms) = names(aicpens) = keys + for (key in keys) { + inds <- which(index == key) + xi <- xr[,inds] + ui <- svdu_thresh(xi) + dfi <- ncol(ui) + projections[[key]] <- ui + uy <- t(ui) %*% yr + if (is.null(sigma)) { + aicpens[[key]] <- exp(k*dfi/n) + terms[[key]] <- (sum(yr^2) - sum(uy^2)) * aicpens[[key]] + } else { + aicpens[[key]] <- sigma^2 * k * dfi + terms[[key]] <- (sum(yr^2) - sum(uy^2)) + aicpens[[key]] + } + } + + # Maximizer = group to be added + terms.optind <- which.min(terms) + imax <- inactive[terms.optind] + optkey <- which(keys == imax) + maxproj <- projections[[optkey]] + maxpen <- aicpens[[optkey]] + maxterm <- terms[[optkey]] + projections[[optkey]] <- NULL + aicpens[[optkey]] <- NULL + + return(list(imax=imax, df = ncol(maxproj), projections = projections, maxproj = maxproj, aicpens = aicpens, maxpen = maxpen, maxterm = maxterm, terms = terms)) +} + +# ----------------------------------------------------------- + +#' Compute selective p-values for a model fitted by \code{groupfs}. +#' +#' Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). +#' +#' @param obj Object returned by \code{\link{groupfs}} function +#' @param sigma Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}. +#' @param verbose Print out progress along the way? Default is TRUE. +#' @return An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. +#' +#' \describe{ +#' \item{vars}{Labels of the active groups in the order they were included.} +#' \item{pv}{Selective p-values computed from appropriate truncated distributions.} +#' \item{sigma}{Estimate of error variance used in computing p-values.} +#' \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} +#' \item{df}{Rank of group of variables when it was added to the model.} +#' \item{support}{List of intervals defining the truncation region of the corresponding statistic.} +#' } +groupfsInf <- function(obj, sigma = NULL, verbose = TRUE) { + + if (!is.null(obj$cvobj) && attr(obj, "stopped")) { + stop("Cross-validation and early stopping cannot be used simultaneously.") + # This shouldn't happen in the first place! + # (it wouldn't anyway unless someone tries to trick it) + } + + n <- nrow(obj$x) + p <- ncol(obj$x) + maxsteps <- attr(obj, "maxsteps") + k <- attr(obj, "k") + index <- obj$index + x <- obj$x + y <- obj$y + Ep <- sum(index %in% obj$action) + + pvals = dfs = dfs2 = Tstats = numeric(maxsteps) + supports <- list() + + if (!is.null(sigma)) { + type <- "TC" + if (!is.null(obj$sigma)) { + cat(paste("Using specified value", sigma, "for sigma in place of the value", obj$sigma, "used by groupfs()\n")) + } + } else { + if (is.null(obj$sigma)) { + type <- "TF" + Pf <- svdu_thresh(obj$x[,which(obj$index %in% obj$action), drop = FALSE]) + dffull <- ncol(Pf) + df2 <- n - dffull - obj$intercept - 1 + Pfull <- Pf %*% t(Pf) + } else { + type <- "TC" + sigma <- obj$sigma + } + } + + # Compute p-value for each active group + for (j in 1:maxsteps) { + i <- obj$action[j] + if (verbose) { + string <- paste0("Step ", j, "/", attr(obj, "maxsteps"), ": computing P-value for group ", i) + if (!is.null(obj$cvobj)) string <- paste0(string, ", including constraints from cross-validation") + if (attr(obj, "stopped")) string <- paste0(string, ", including constraints from AICstop") + cat(paste(string, "\n")) + } + + if (type == "TC") { + # Form projection onto active set minus i + # and project x_i orthogonally + x_i <- obj$x[,which(obj$index == i), drop = FALSE] + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + x_minus_i <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + x_i <- x_i - x_minus_i %*% t(x_minus_i) %*% x_i + } + + # Project y onto what remains of x_i + Ugtilde <- svdu_thresh(x_i) + R <- t(Ugtilde) %*% obj$y + TC <- sqrt(sum(R^2)) + eta <- Ugtilde %*% R / TC + Z <- obj$y - eta * TC + dfi <- ncol(Ugtilde) + Tstats[j] <- TC + dfs[j] <- dfi + + ydecomp <- list(Z=Z, eta=eta) + + } else { + + if (length(obj$action) > 1) { + minus_i <- setdiff(obj$action, i) + Psub <- svdu_thresh(obj$x[,which(obj$index %in% minus_i), drop = FALSE]) + Z <- Psub %*% t(Psub) %*% obj$y + df1 <- dffull - ncol(Psub) + } else { + Z <- rep(0, n) + df1 <- dffull + obj$intercept + 1 + } + + C <- df1/df2 + R1 <- obj$y - Z + R2 <- obj$y - Pfull %*% obj$y + R1sq <- sum(R1^2) + R2sq <- sum(R2^2) + R <- sqrt(R1sq) + delta <- R1-R2 + Vdelta <- delta/sqrt(sum(delta^2)) + V2 <- R2/sqrt(R2sq) + TF <- (R1sq-R2sq)/(C*R2sq) + Tstats[j] <- TF + dfs[j] <- df1 + + ydecomp <- list(R=R, Z=Z, Vd=Vdelta, V2=V2, C=C) + + } + + intervallist <- truncationRegion(obj, ydecomp, type) + + # Additional constraints from cross-validation? + if (!is.null(obj$cvobj)) { + intervallist <- c(intervallist, do.call(c, + lapply(obj$cvobj, function(cvf) { + if (type == "TC") { + ydecomp <- list(R=R[-cvf$fold], eta=eta[-cvf$fold], Z=Z[-cvf$fold]) + } else { + ydecomp <- list(R=R, Z=Z[-cvf$fold], Vd=Vdelta[-cvf$fold], V2=V2[-cvf$fold], C=C) # C correct? + } + truncationRegion(cvf, ydecomp, type) + }))) + intervallist <- c(intervallist, + lapply(obj$cvquad, function(cvquad) { + if (type == "TC") { + etacvquad <- t(eta) %*% cvquad + A <- etacvquad %*% eta + B <- 2 * etacvquad %*% Z + C <- t(Z) %*% cvquad %*% Z + quadratic_roots(A, B, C, tol = 1e-15) + } else { + + zcvquad <- t(Z) %*% cvquad + vdcvquad <- t(Vdelta) %*% cvquad + v2cvquad <- t(V2) %*% cvquad + x0 <- zcvquad %*% Z + x1 <- 2*R*zcvquad %*% Vdelta + x2 <- 2*R*zcvquad %*% V2 + x12 <- 2*R^2*vdcvquad %*% V2 + x11 <- R^2*vdcvquad %*% Vdelta + x22 <- R^2*v2cvquad %*% V2 + TF_roots(R, C, coeffs = list(x0=x0, x1=x1, x2=x2, x12=x12, x11=x11, x22=x22)) + } + })) + } + + # Additional constraints from AIC stopping + if (attr(obj, "stopped")) { + aicintervals <- vector("list", maxsteps) + aicstop <- attr(obj, "aicstop") + if (type == "TC") { + pen0 <- k * obj$intercept + aic.begin <- aic.last <- sum(obj$y^2)/sigma^2 - n + k * obj$intercept + } else { + pen0 <- exp(k * (1+obj$intercept)/n) + aic.begin <- n*(log(2*pi) + log(mean(obj$y^2))) + k * (1 + n + obj$intercept) + } + AICs <- c(aic.begin, obj$AIC) + + ulist <- c(list(matrix(0, n, 1)), obj$maxprojs) + penlist <- c(pen0, obj$maxpens) + zlist <- vector("list", maxsteps+1) + zlist[[1]] <- zlist[[2]] <- Z + if (type == "TC") { + etalist <- vector("list", maxsteps+1) + etalist[[1]] <- etalist[[2]] <- eta + } else { + vdlist <- v2list <- vector("list", maxsteps+1) + vdlist[[1]] <- vdlist[[2]] <- Vdelta + v2list[[1]] <- v2list[[2]] <- V2 + } + if (maxsteps > 1) { + for (step in 1:(maxsteps-1)) { + cproj <- obj$cumprojs[[step]] + zlist[[step+2]] <- cproj %*% Z + if (type == "TC") { + etalist[[step+2]] <- cproj %*% eta + } else { + vdlist[[step+2]] <- cproj %*% Vdelta + v2list[[step+2]] <- cproj %*% V2 + } + } + } + + for (step in 1:maxsteps) { + # Compare AIC at s+1 to AIC at s + # roots() functions assume g indexes smaller AIC + # this is step+1 until the last step + peng <- penlist[[step+1]] + Ug <- ulist[[step+1]] + Uh <- ulist[[step]] + Zg <- zlist[[step+1]] + Zh <- zlist[[step]] + + if (type == "TC") { + penh <- 0 + etag <- etalist[[step+1]] + etah <- etalist[[step]] + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + + intstep <- quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol = 1e-15) + + } else { + penh <- 1 + Vdg <- vdlist[[step+1]] + Vdh <- vdlist[[step]] + V2g <- v2list[[step+1]] + V2h <- v2list[[step]] + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) + + if (AICs[step] < AICs[step+1]) { + coeffs <- lapply(coeffs, function(coeff) -coeff) + } + + intstep <- TF_roots(R, C, coeffs) + } + + aicintervals[[step]] <- intstep + } + intervallist <- c(intervallist, aicintervals) + } + + # Compute intersection: + region <- do.call(interval_union, intervallist) + region <- interval_union(region, Intervals(c(-Inf,0))) + E <- interval_complement(region, check_valid = FALSE) + + if (length(E) == 0) { + stop(paste("Empty support at step", j)) + } + supports[[j]] <- E + + # E is now potentially a union of intervals + if (type == "TC") { + pvals[j] <- TC_surv(TC, sigma, dfi, E) + } else { + # write TF_surv function first + pvals[j] <- TF_surv(TF, df1, df2, E) + } + + } + + if (any(is.nan(pvals))) { + nanp <- which(is.nan(pvals)) + pvals[nanp] <- 0 + warning(paste0("P-value NaNs of the form 0/0 converted to 0 for group(s) ", paste(obj$action[nanp], collapse=","), ". This typically occurs for numerical reasons in the presence of a large signal-to-noise ratio.")) + } + + names(pvals) <- obj$action + out <- list(vars = obj$action, pv=pvals) + if (type == "TC") { + out$TC <- Tstats + out$sigma <- sigma + } else { + out$TF <- Tstats + out$df2 <- df2 + } + out$df <- dfs + out$support <- supports + class(out) <- "groupfsInf" + if (!is.null(attr(obj, "varnames"))) { + attr(out, "varnames") <- attr(obj, "varnames") + } + return(out) +} + +# ----------------------------------------------------------- + +TC_surv <- function(TC, sigma, df, E) { + if (length(E) == 0) { + stop("Empty TC support") + } + + # Sum truncated cdf over each part of E + denom <- do.call(sum, lapply(1:nrow(E), function(v) { + tchi_interval(E[v,1], E[v,2], sigma, df) + })) + + # Sum truncated cdf from observed value to max of + # truncation region + numer <- do.call(sum, lapply(1:nrow(E), function(v) { + lower <- E[v,1] + upper <- E[v,2] + if (upper > TC) { + # Observed value is left of this interval's right endpoint + if (lower < TC) { + # Observed value is in this interval + return(tchi_interval(TC, upper, sigma, df)) + } else { + # Observed value is not in this interval + return(tchi_interval(lower, upper, sigma, df)) + } + } else { + # Observed value is right of this entire interval + return(0) + } + })) + + # Survival function + value <- numer/denom + # Force p-value to lie in the [0,1] interval + # in case of numerical issues + value <- max(0, min(1, value)) + value +} + +tchi_interval <- function(lower, upper, sigma, df) { + a <- (lower/sigma)^2 + b <- (upper/sigma)^2 + if (b == Inf) { + integral <- pchisq(a, df, lower.tail = FALSE) + } else { + integral <- pchisq(b, df) - pchisq(a, df) + } + if ((integral < .Machine$double.eps) && (b < Inf)) { + integral <- num_int_chi(a, b, df) + } + return(integral) +} + +num_int_chi <- function(a, b, df, nsamp = 10000) { + grid <- seq(from=a, to=b, length.out=nsamp) + integrand <- dchisq(grid, df) + return((b-a)*mean(integrand)) +} + +TF_surv <- function(TF, df1, df2, E) { + if (length(E) == 0) { + stop("Empty TF support") + } + + # Sum truncated cdf over each part of E + denom <- do.call(sum, lapply(1:nrow(E), function(v) { + TF_interval(E[v,1], E[v,2], df1, df2) + })) + + # Sum truncated cdf from observed value to max of + # truncation region + numer <- do.call(sum, lapply(1:nrow(E), function(v) { + lower <- E[v,1] + upper <- E[v,2] + if (upper > TF) { + # Observed value is left of this interval's right endpoint + if (lower < TF) { + # Observed value is in this interval + return(TF_interval(TF, upper, df1, df2)) + } else { + # Observed value is not in this interval + return(TF_interval(lower, upper, df1, df2)) + } + } else { + # Observed value is right of this entire interval + return(0) + } + })) + + # Survival function + value <- numer/denom + # Force p-value to lie in the [0,1] interval + # in case of numerical issues + #value <- max(0, min(1, value)) + value +} + +TF_interval <- function(lower, upper, df1, df2) { + a <- lower + b <- upper + if (b == Inf) { + integral <- pf(a, df1, df2, lower.tail = FALSE) + } else { + integral <- pf(b, df1, df2) - pf(a, df1, df2) + } + if ((integral < .Machine$double.eps) && (b < Inf)) { + integral <- num_int_F(a, b, df1, df2) + } + return(integral) +} + +num_int_F <- function(a, b, df1, df2, nsamp = 10000) { + grid <- seq(from=a, to=b, length.out=nsamp) + integrand <- df(grid, df1, df2) + return((b-a)*mean(integrand)) +} + +#' Center and scale design matrix by groups +#' +#' For internal use by \code{\link{groupfs}}. +#' +#' @param x Design matrix. +#' @param index Group membership indicator of length p. +#' @param center Center groups, default is TRUE. +#' @param normalize Scale groups by Frobenius norm, default is TRUE. +#' @return +#' \describe{ +#' \item{x}{Optionally centered/scaled design matrix.} +#' \item{xm}{Means of groups in original design matrix.} +#' \item{xs}{Frobenius norms of groups in original design matrix.} +#' } +scaleGroups <- function(x, index, center = TRUE, normalize = TRUE) { + keys <- unique(index) + xm <- rep(0, ncol(x)) + xs <- rep(1, ncol(x)) + + for (j in keys) { + inds <- which(index == j) + if (center) { + xmj <- mean(x[, inds]) + xm[inds] <- xmj + x[, inds] <- x[, inds] - xmj + } + normsq <- sum(x[, inds]^2) + xsj <- sqrt(normsq) + xs[inds] <- xsj + if (xsj > 0) { + if (normalize) x[, inds] <- x[, inds] / xsj + } else { + stop(paste("Design matrix contains identically zero group of variables:", j)) + } + } + return(list(x=x, xm=xm, xs=xs)) +} + +#' Expand a data frame with factors to form a design matrix with the full binary encoding of each factor. +#' +#' When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. +#' +#' @param df Data frame containing some columns which are \code{factors}. +#' @return List containing +#' \describe{ +#' \item{x}{Design matrix, the first columns contain any numeric variables from the original date frame.} +#' \item{index}{Group membership indicator for expanded matrix.} +#' } +#' @examples +#' \dontrun{ +#' fd = factorDesign(warpbreaks) +#' y = rnorm(nrow(fd$x)) +#' fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=F) +#' pvals = groupfsInf(fit) +#' } +factorDesign <- function(df) { + factor.inds <- sapply(df[1,], is.factor) + factor.labels <- which(factor.inds) + nfacs <- sum(factor.inds) + nlevs <- sapply(df[1,factor.inds], function(fac) nlevels(fac)) + totnlevs <- sum(nlevs) + num.num = indcounter = ncol(df) - nfacs + x <- matrix(nrow=nrow(df), ncol = totnlevs + num.num) + colnames(x) <- 1:ncol(x) + index <- integer(ncol(x)) + varnames <- character(ncol(df)) + if (num.num > 0) { + x[,1:num.num] <- df[, !factor.inds] + varnames[1:num.num] = colnames(x)[1:num.num] <- colnames(df)[1:num.num] + index[1:num.num] <- 1:num.num + indcounter <- indcounter + num.num - 1 + } + for (j in 1:nfacs) { + submat <- model.matrix(~ df[, factor.labels[j]] - 1) + indcounter <- indcounter+1 + submatinds <- indcounter:(indcounter+nlevs[j]-1) + indcounter <- indcounter + nlevs[j] - 1 + colnames(x)[submatinds] <- paste0(colnames(df)[num.num + j], ":", 1:nlevs[j]) + varnames[num.num + j] <- colnames(df)[num.num + j] + x[,submatinds] <- submat + index[submatinds] <- num.num + j + } + attr(x, "varnames") <- varnames + return(list(x = x, index = index)) +} + +svdu_thresh <- function(x) { + svdx <- svd(x) + inds <- svdx$d > svdx$d[1] * sqrt(.Machine$double.eps) + return(svdx$u[, inds, drop = FALSE]) +} + +flatten <- function(L) { + if (is.list(L[[1]])) return(unlist(L, recursive=FALSE)) + return(L) +} + +print.groupfs <- function(x, ...) { + cat("\nSequence of added groups:\n") + nsteps = length(x$action) + action <- x$action + vnames <- attr(x, "varnames") + if (length(vnames) > 0) action <- vnames[action] + tab = data.frame(Group = action, Rank = x$log$df, RSS = round(x$log$RSS, 3), AIC = round(x$log$AIC, 3)) + rownames(tab) = 1:nsteps + print(tab) + cat("\nUse groupfsInf() to compute P-values\n") + invisible() +} + + +coef.groupfs <- function(object, ...) { + return(object$coefficients) +} + +#' @name predict.groupfs +#' @aliases predict.groupfs +#' @aliases coef.groupfs +#' +#' @title Prediction and coefficient functions for \code{\link{groupfs}}. +#' +#' Make predictions or extract coefficients from a groupfs forward stepwise object. +#' +#' @param object Object returned by a call to \code{\link{groupfs}}. +#' @param newx Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used. +#' @return A vector of predictions or a vector of coefficients. +predict.groupfs <- function(object, newx) { + beta <- coef.groupfs(object) + if (missing(newx)) { + newx = object$x + } else { + newx <- scaleGroups(newx, object$index, attr(object, "center"), attr(object, "normalize"))$x + } + return(newx[, object$index %in% object$action] %*% beta + ifelse(object$intercept, object$by, 0)) +} + +print.groupfsInf <- function(x, ...) { + if (!is.null(x$sigma)) { + isTF <- FALSE + Tstat <- x$TC + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) + } else { + isTF <- TRUE + Tstat <- x$TF + } + action <- x$vars + vnames <- attr(x, "varnames") + if (length(vnames) > 0) action <- vnames[action] + tab = data.frame(Group = action, Pvalue = round(x$pv, 3), + TC = round(Tstat, 3), + df = x$df, Size = round(unlist(lapply(lapply(x$support, size), sum)), 3), + Ints = unlist(lapply(x$support, nrow)), Min =round(unlist(lapply(x$support, min)), 3), + Max = round(unlist(lapply(x$support, max)), 3)) + rownames(tab) = 1:length(x$vars) + if (isTF) names(tab)[3] <- "TF" + print(tab) + cat("\nInts is the number of intervals in the truncated chi selection region and Size is the sum of their lengths. Min and Max are the lowest and highest endpoints of the truncation region. No confidence intervals are reported by groupfsInf.\n") + invisible() +} + +checkargs.groupfs <- function(x, index, maxsteps) { + if (length(index) != ncol(x)) stop("Length of index does not match number of columns of x") + if ((round(maxsteps) != maxsteps) || (maxsteps <= 0)) stop("maxsteps must be an integer > 0") +} diff --git a/selectiveInference-currentCRAN/R/funs.inf.R b/selectiveInference-currentCRAN/R/funs.inf.R new file mode 100644 index 00000000..423b4c3e --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.inf.R @@ -0,0 +1,299 @@ +# Main p-value function + +poly.pval <- function(y, G, u, v, sigma, bits=NULL) { + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + pv = tnorm.surv(z,0,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup)) +} + +# Main confidence interval function + +poly.int <- function(y, G, u, v, sigma, alpha, gridrange=c(-100,100), + gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(z,x,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} + +############################## + +# Assuming that grid is in sorted order from smallest to largest, +# and vals are monotonically increasing function values over the +# grid, returns the grid end points such that the corresponding +# vals are approximately equal to {val1, val2} + +grid.search <- function(grid, fun, val1, val2, gridpts=100, griddepth=2) { + n = length(grid) + vals = fun(grid) + + ii = which(vals >= val1) + jj = which(vals <= val2) + if (length(ii)==0) return(c(grid[n],Inf)) # All vals < val1 + if (length(jj)==0) return(c(-Inf,grid[1])) # All vals > val2 + # RJT: the above logic is correct ... but for simplicity, instead, + # we could just return c(-Inf,Inf) + + i1 = min(ii); i2 = max(jj) + if (i1==1) lo = -Inf + else lo = grid.bsearch(grid[i1-1],grid[i1],fun,val1,gridpts, + griddepth-1,below=TRUE) + if (i2==n) hi = Inf + else hi = grid.bsearch(grid[i2],grid[i2+1],fun,val2,gridpts, + griddepth-1,below=FALSE) + return(c(lo,hi)) +} + +# Repeated bin search to find the point x in the interval [left, right] +# that satisfies f(x) approx equal to val. If below=TRUE, then we seek +# x such that the above holds and f(x) <= val; else we seek f(x) >= val. + +grid.bsearch <- function(left, right, fun, val, gridpts=100, griddepth=1, below=TRUE) { + n = gridpts + depth = 1 + + while (depth <= griddepth) { + grid = seq(left,right,length=n) + vals = fun(grid) + + if (below) { + ii = which(vals >= val) + if (length(ii)==0) return(grid[n]) # All vals < val (shouldn't happen) + if ((i0=min(ii))==1) return(grid[1]) # All vals > val (shouldn't happen) + left = grid[i0-1] + right = grid[i0] + } + + else { + ii = which(vals <= val) + if (length(ii)==0) return(grid[1]) # All vals > val (shouldn't happen) + if ((i0=max(ii))==n) return(grid[n]) # All vals < val (shouldn't happen) + left = grid[i0] + right = grid[i0+1] + } + + depth = depth+1 + } + + return(ifelse(below, left, right)) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector + +tnorm.surv <- function(z, mean, sd, a, b, bits=NULL) { + z = max(min(z,b),a) + + # Check silly boundary cases + p = numeric(length(mean)) + p[mean==-Inf] = 0 + p[mean==Inf] = 1 + + # Try the multi precision floating point calculation first + o = is.finite(mean) + mm = mean[o] + pp = mpfr.tnorm.surv(z,mm,sd,a,b,bits) + + # If there are any NAs, then settle for an approximation + oo = is.na(pp) + if (any(oo)) pp[oo] = bryc.tnorm.surv(z,mm[oo],sd,a,b) + + p[o] = pp + return(p) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean cane be a vector, using +# multi precision floating point calculations thanks to the Rmpfr package + +mpfr.tnorm.surv <- function(z, mean=0, sd=1, a, b, bits=NULL) { + # If bits is not NULL, then we are supposed to be using Rmpf + # (note that this was fail if Rmpfr is not installed; but + # by the time this function is being executed, this should + # have been properly checked at a higher level; and if Rmpfr + # is not installed, bits would have been previously set to NULL) + if (!is.null(bits)) { + z = Rmpfr::mpfr((z-mean)/sd, precBits=bits) + a = Rmpfr::mpfr((a-mean)/sd, precBits=bits) + b = Rmpfr::mpfr((b-mean)/sd, precBits=bits) + return(as.numeric((Rmpfr::pnorm(b)-Rmpfr::pnorm(z))/ + (Rmpfr::pnorm(b)-Rmpfr::pnorm(a)))) + } + + # Else, just use standard floating point calculations + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + return((pnorm(b)-pnorm(z))/(pnorm(b)-pnorm(a))) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector, based on +# A UNIFORM APPROXIMATION TO THE RIGHT NORMAL TAIL INTEGRAL, W Bryc +# Applied Mathematics and Computation +# Volume 127, Issues 23, 15 April 2002, Pages 365--374 +# https://math.uc.edu/~brycw/preprint/z-tail/z-tail.pdf + +bryc.tnorm.surv <- function(z, mean=0, sd=1, a, b) { + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + n = length(mean) + + term1 = exp(z*z) + o = a > -Inf + term1[o] = ff(a[o])*exp(-(a[o]^2-z[o]^2)/2) + term2 = rep(0,n) + oo = b < Inf + term2[oo] = ff(b[oo])*exp(-(b[oo]^2-z[oo]^2)/2) + p = (ff(z)-term2)/(term1-term2) + + # Sometimes the approximation can give wacky p-values, + # outside of [0,1] .. + #p[p<0 | p>1] = NA + p = pmin(1,pmax(0,p)) + return(p) +} + +ff <- function(z) { + return((z^2+5.575192695*z+12.7743632)/ + (z^3*sqrt(2*pi)+14.38718147*z*z+31.53531977*z+2*12.77436324)) +} + +# Return Prob(Z>z | Z in [a,b]), where mean can be a vector, based on +# Riemann approximation tricks, by Max G'Sell + +gsell.tnorm.surv <- function(z, mean=0, sd=1, a, b) { + return(max.approx.frac(a/sd,b/sd,z/sd,mean/sd)) +} + + +############################## + +forwardStop <- function(pv, alpha=.10){ + if (alpha<0 || alpha>1) stop("alpha must be in [0,1]") + if (min(pv,na.rm=T)<0 || max(pv,na.rm=T)>1) stop("pvalues must be in [0,1]") + val=-(1/(1:length(pv)))*cumsum(log(1-pv)) + oo = which(val <= alpha) + if (length(oo)==0) out=0 + else out = oo[length(oo)] + return(out) +} + +############################## + +aicStop <- function(x, y, action, df, sigma, mult=2, ntimes=2) { + n = length(y) + k = length(action) + aic = numeric(k) + G = matrix(0,nrow=0,ncol=n) + u = numeric(0) + count = 0 + + for (i in 1:k) { + A = action[1:i] + aic[i] = sum(lsfit(x[,A],y,intercept=F)$res^2) + mult*sigma^2*df[i] + + j = action[i] + if (i==1) xtil = x[,j] + else xtil = lsfit(x[,action[1:(i-1)]],x[,j],intercept=F)$res + s = sign(sum(xtil*y)) + + if (i==1 || aic[i] <= aic[i-1]) { + G = rbind(G,s*xtil/sqrt(sum(xtil^2))) + u = c(u,sqrt(mult)*sigma) + count = 0 + } + + else { + G = rbind(G,-s*xtil/sqrt(sum(xtil^2))) + u = c(u,-sqrt(mult)*sigma) + count = count+1 + if (count == ntimes) break + } + } + + if (i < k) { + khat = i - ntimes + aic = aic[1:i] + } + else khat = k + + return(list(khat=khat,G=G,u=u,aic=aic,stopped=(i0])) + sd=sqrt(vv) + pv = tnorm.surv(temp,0,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup,sd=sd)) +} + + + +mypoly.int.lee= + function(y,eta,vlo,vup,sd, alpha, gridrange=c(-100,100),gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + # compute sel intervals from poly lemmma, full version from Lee et al for full matrix Sigma + + temp = sum(eta*y) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(temp,x,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} + + + +mydiag=function(x){ + if(length(x)==1) out=x + if(length(x)>1) out=diag(x) + return(out) + } + diff --git a/selectiveInference-currentCRAN/R/funs.lar.R b/selectiveInference-currentCRAN/R/funs.lar.R new file mode 100644 index 00000000..f01ee3d8 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.lar.R @@ -0,0 +1,632 @@ +# We compute the least angle regression (LAR) path given +# a response vector y and predictor matrix x. We assume +# that x has columns in general position. + +# NOTE: the df estimates at each lambda_k can be thought of as the df +# for all solutions corresponding to lambda in (lambda_k,lambda_{k-1}), +# the open interval to the *right* of the current lambda_k. + +# NOTE: x having columns in general position implies that the +# centered x satisfies a modified version of the general position +# condition, where we replace k < min(n,p) by k < min(n-1,p) in +# the definition. This is still sufficient to imply the uniqueness +# of the lasso solution, on the centered x + +lar <- function(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, + verbose=FALSE) { + + this.call = match.call() + checkargs.xy(x=x,y=y) + + # Center and scale, etc. + obj = standardize(x,y,intercept,normalize) + x = obj$x + y = obj$y + bx = obj$bx + by = obj$by + sx = obj$sx + n = nrow(x) + p = ncol(x) + + ##### + # Find the first variable to enter and its sign + uhat = t(x)%*%y + ihit = which.max(abs(uhat)) # Hitting coordinate + hit = abs(uhat[ihit]) # Critical lambda + s = Sign(uhat[ihit]) # Sign + + if (verbose) { + cat(sprintf("1. lambda=%.3f, adding variable %i, |A|=%i...", + hit,ihit,1)) + } + + # Now iteratively find the new LAR estimate, and + # the next critical lambda + + # Things to keep track of, and return at the end + buf = min(maxsteps,500) + lambda = numeric(buf) # Critical lambdas + action = numeric(buf) # Actions taken + df = numeric(buf) # Degrees of freedom + beta = matrix(0,p,buf) # LAR estimates + + lambda[1] = hit + action[1] = ihit + df[1] = 0 + beta[,1] = 0 + + # Gamma matrix! + gbuf = max(2*p*3,2000) # Space for 3 steps, at least + gi = 0 + Gamma = matrix(0,gbuf,n) + Gamma[gi+Seq(1,p-1),] = t(s*x[,ihit]+x[,-ihit]); gi = gi+p-1 + Gamma[gi+Seq(1,p-1),] = t(s*x[,ihit]-x[,-ihit]); gi = gi+p-1 + Gamma[gi+1,] = t(s*x[,ihit]); gi = gi+1 + + # nk, regression contrast, M plus + nk = mp = numeric(buf) + vreg = matrix(0,buf,n) + + nk[1] = gi + vreg[1,] = s*x[,ihit] / sum(x[,ihit]^2) + if (p > 1) { + c = t(as.numeric(Sign(t(x)%*%y)) * t(x)) + ratio = t(c[,-ihit])%*%c[,ihit]/sum(c[,ihit]^2) + ip = 1-ratio > 0 + crit = (t(c[,-ihit])%*%y - ratio*sum(c[,ihit]*y))/(1-ratio) + mp[1] = max(max(crit[ip]),0) + } + + # Other things to keep track of, but not return + r = 1 # Size of active set + A = ihit # Active set + I = Seq(1,p)[-ihit] # Inactive set + X1 = x[,ihit,drop=FALSE] # Matrix X[,A] + X2 = x[,-ihit,drop=FALSE] # Matrix X[,I] + k = 2 # What step are we at? + + # Compute a skinny QR decomposition of X1 + obj = qr(X1) + Q = qr.Q(obj,complete=TRUE) + Q1 = Q[,1,drop=FALSE]; + Q2 = Q[,-1,drop=FALSE] + R = qr.R(obj) + + # Throughout the algorithm, we will maintain + # the decomposition X1 = Q1*R. Dimenisons: + # X1: n x r + # Q1: n x r + # Q2: n x (n-r) + # R: r x r + + while (k<=maxsteps && lambda[k-1]>=minlam) { + ########## + # Check if we've reached the end of the buffer + if (k > length(lambda)) { + buf = length(lambda) + lambda = c(lambda,numeric(buf)) + action = c(action,numeric(buf)) + df = c(df,numeric(buf)) + beta = cbind(beta,matrix(0,p,buf)) + nk = c(nk,numeric(buf)) + mp = c(mp,numeric(buf)) + vreg = rbind(vreg,matrix(0,buf,n)) + } + + # Key quantities for the hitting times + a = backsolve(R,t(Q1)%*%y) + b = backsolve(R,backsolve(R,s,transpose=TRUE)) + aa = as.numeric(t(X2) %*% (y - X1 %*% a)) + bb = as.numeric(t(X2) %*% (X1 %*% b)) + + # If the inactive set is empty, nothing will hit + if (r==min(n-intercept,p)) hit = 0 + + # Otherwise find the next hitting time + else { + shits = Sign(aa) + hits = aa/(shits-bb) + + # Make sure none of the hitting times are larger + # than the current lambda + hits[hits>lambda[k-1]] = 0 + + ihit = which.max(hits) + hit = hits[ihit] + shit = shits[ihit] + } + + # Stop if the next critical point is negative + if (hit<=0) break + + # Record the critical lambda and solution + lambda[k] = hit + action[k] = I[ihit] + df[k] = r + beta[A,k] = a-hit*b + + # Gamma matrix! + if (gi + 2*p > nrow(Gamma)) Gamma = rbind(Gamma,matrix(0,2*p+gbuf,n)) + X2perp = X2 - X1 %*% backsolve(R,t(Q1)%*%X2) + c = t(t(X2perp)/(shits-bb)) + Gamma[gi+Seq(1,p-r),] = shits*t(X2perp); gi = gi+p-r + Gamma[gi+Seq(1,p-r-1),] = t(c[,ihit]-c[,-ihit]); gi = gi+p-r-1 + Gamma[gi+1,] = t(c[,ihit]); gi = gi+1 + + # nk, regression contrast, M plus + nk[k] = gi + vreg[k,] = shit*X2perp[,ihit] / sum(X2perp[,ihit]^2) + if (ncol(c) > 1) { + ratio = t(c[,-ihit])%*%c[,ihit]/sum(c[,ihit]^2) + ip = 1-ratio > 0 + crit = (t(c[,-ihit])%*%y - ratio*sum(c[,ihit]*y))/(1-ratio) + mp[k] = max(max(crit[ip]),0) + } + + # Update all of the variables + r = r+1 + A = c(A,I[ihit]) + I = I[-ihit] + s = c(s,shit) + X1 = cbind(X1,X2[,ihit]) + X2 = X2[,-ihit,drop=FALSE] + + # Update the QR decomposition + obj = updateQR(Q1,Q2,R,X1[,r]) + Q1 = obj$Q1 + Q2 = obj$Q2 + R = obj$R + + if (verbose) { + cat(sprintf("\n%i. lambda=%.3f, adding variable %i, |A|=%i...", + k,hit,A[r],r)) + } + + # Step counter + k = k+1 + } + + # Trim + lambda = lambda[Seq(1,k-1)] + action = action[Seq(1,k-1)] + df = df[Seq(1,k-1),drop=FALSE] + beta = beta[,Seq(1,k-1),drop=FALSE] + Gamma = Gamma[Seq(1,gi),,drop=FALSE] + nk = nk[Seq(1,k-1)] + mp = mp[Seq(1,k-1)] + vreg = vreg[Seq(1,k-1),,drop=FALSE] + + # If we reached the maximum number of steps + if (k>maxsteps) { + if (verbose) { + cat(sprintf("\nReached the maximum number of steps (%i),",maxsteps)) + cat(" skipping the rest of the path.") + } + completepath = FALSE + bls = NULL + } + + # If we reached the minimum lambda + else if (lambda[k-1]k) stop(sprintf("s must be between 0 and %i",k)) + knots = 1:k + dec = FALSE + } else { + if (min(s)= %0.3f",min(lambda))) + knots = lambda + dec = TRUE + } + + return(coef.interpolate(beta,s,knots,dec)) +} + +# Prediction function for lar + +predict.lar <- function(object, newx, s, mode=c("step","lambda"), ...) { + beta = coef.lar(object,s,mode) + if (missing(newx)) newx = scale(object$x,FALSE,1/object$sx) + else newx = scale(newx,object$bx,FALSE) + return(newx %*% beta + object$by) +} + +coef.lasso <- coef.lar +predict.lasso <- predict.lar + +############################## + +# Lar inference function + +larInf <- function(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) { + + this.call = match.call() + type = match.arg(type) + checkargs.misc(sigma=sigma,alpha=alpha,k=k, + gridrange=gridrange,mult=mult,ntimes=ntimes) + if (class(obj) != "lar") stop("obj must be an object of class lar") + if (is.null(k) && type=="active") k = length(obj$action) + if (is.null(k) && type=="all") stop("k must be specified when type = all") + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + k = min(k,length(obj$action)) # Round to last step + x = obj$x + y = obj$y + p = ncol(x) + n = nrow(x) + G = obj$Gamma + nk = obj$nk + sx = obj$sx + + if (is.null(sigma)) { + if (n >= 2*p) { + oo = obj$intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + pv.spacing = pv.modspac = pv.covtest = khat = NULL + + if (type == "active") { + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + pv.spacing = pv.modspac = pv.covtest = numeric(k) + vreg = obj$vreg[1:k,,drop=FALSE] + sign = obj$sign[1:k] + vars = obj$action[1:k] + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + Gj = G[1:nk[j],] + uj = rep(0,nk[j]) + vj = vreg[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + + pv.spacing[j] = spacing.pval(obj,sigma,j) + pv.modspac[j] = modspac.pval(obj,sigma,j) + pv.covtest[j] = covtest.pval(obj,sigma,j) + } + + khat = forwardStop(pv,alpha) + } + + else { + if (type == "aic") { + out = aicStop(x,y,obj$action[1:k],obj$df[1:k],sigma,mult,ntimes) + khat = out$khat + m = out$stopped * ntimes + G = rbind(out$G,G[1:nk[khat+m],]) # Take ntimes more steps past khat + u = c(out$u,rep(0,nk[khat+m])) # (if we need to) + kk = khat + } + else { + G = G[1:nk[k],] + u = rep(0,nk[k]) + kk = k + } + + pv = vlo = vup = numeric(kk) + vmat = matrix(0,kk,n) + ci = tailarea = matrix(0,kk,2) + sign = numeric(kk) + vars = obj$action[1:kk] + xa = x[,vars] + M = pinv(crossprod(xa)) %*% t(xa) + + for (j in 1:kk) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + Gj = rbind(G,vj) + uj = c(u,0) + + a = poly.pval(y,Gj,uj,vj,sigma,bits) + pv[j] = a$pv + sxj = sx[vars[j]] + vlo[j] = a$vlo * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vup[j] = a$vup * mj / sxj # Unstandardize (mult by norm of vj / sxj) + vmat[j,] = vj * mj / sxj # Unstandardize (mult by norm of vj / sxj) + + a = poly.int(y,Gj,uj,vj,sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj / sxj # Unstandardize (mult by norm of vj / sxj) + tailarea[j,] = a$tailarea + } + } + + out = list(type=type,k=k,khat=khat,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + pv.spacing=pv.spacing,pv.modspac=pv.modspac,pv.covtest=pv.covtest, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + call=this.call) + class(out) = "larInf" + return(out) +} + +############################## + +spacing.pval <- function(obj, sigma, k) { + v = obj$Gamma[obj$nk[k],] + sd = sigma*sqrt(sum(v^2)) + a = obj$mp[k] + + if (k==1) b = Inf + else b = obj$lambda[k-1] + + return(tnorm.surv(obj$lambda[k],0,sd,a,b)) +} + +modspac.pval <- function(obj, sigma, k) { + v = obj$Gamma[obj$nk[k],] + sd = sigma*sqrt(sum(v^2)) + + if (k < length(obj$action)) a = obj$lambda[k+1] + else if (obj$completepath) a = 0 + else { + warning(sprintf("Modified spacing p-values at step %i require %i steps of the lar path",k,k+1)) + return(NA) + } + + if (k==1) b = Inf + else b = obj$lambda[k-1] + + return(tnorm.surv(obj$lambda[k],0,sd,a,b)) +} + +covtest.pval <- function(obj, sigma, k) { + A = which(obj$beta[,k]!=0) + sA = sign(obj$beta[A,k]) + lam1 = obj$lambda[k] + j = obj$action[k] + + if (k < length(obj$action)) { + lam2 = obj$lambda[k+1] + sj = sign(obj$beta[j,k+1]) + } else if (obj$completepath) { + lam2 = 0 + sj = sign(obj$bls[j]) + } else { + warning(sprintf("Cov test p-values at step %i require %i steps of the lar path",k,k+1)) + return(NA) + } + + x = obj$x + if (length(A)==0) term1 = 0 + else term1 = x[,A,drop=F] %*% solve(crossprod(x[,A,drop=F]),sA) + term2 = x[,c(A,j),drop=F] %*% solve(crossprod(x[,c(A,j),drop=F]),c(sA,sj)) + c = sum((term2 - term1)^2) + t = c * lam1 * (lam1-lam2) / sigma^2 + return(1-pexp(t)) +} + +############################## + +print.lar <- function(x, ...) { + cat("\nCall:\n") + dput(x$call) + + cat("\nSequence of LAR moves:\n") + nsteps = length(x$action) + tab = cbind(1:nsteps,x$action,x$sign) + colnames(tab) = c("Step","Var","Sign") + rownames(tab) = rep("",nrow(tab)) + print(tab) + invisible() +} + +print.larInf <- function(x, tailarea=TRUE, ...) { + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", + x$sigma)) + + if (x$type == "active") { + cat(sprintf("\nSequential testing results with alpha = %0.3f\n",x$alpha)) + cat("",fill=T) + tab = cbind(1:length(x$pv),x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3),round(x$pv.spacing,3),round(x$pv.cov,3)) + colnames(tab) = c("Step", "Var", "Coef", "Z-score", "P-value", + "LowConfPt", "UpConfPt", "Spacing", "CovTest") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from ForwardStop rule = %i\n",x$khat)) + } + + else if (x$type == "all") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$k,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + } + + else if (x$type == "aic") { + cat(sprintf("\nTesting results at step = %i, with alpha = %0.3f\n",x$khat,x$alpha)) + cat("",fill=T) + tab = cbind(x$vars, + round(x$sign*x$vmat%*%x$y,3), + round(x$sign*x$vmat%*%x$y/(x$sigma*sqrt(rowSums(x$vmat^2))),3), + round(x$pv,3),round(x$ci,3)) + colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") + if (tailarea) { + tab = cbind(tab,round(x$tailarea,3)) + colnames(tab)[(ncol(tab)-1):ncol(tab)] = c("LowTailArea","UpTailArea") + } + rownames(tab) = rep("",nrow(tab)) + print(tab) + + cat(sprintf("\nEstimated stopping point from AIC rule = %i\n",x$khat)) + } + + invisible() +} + +plot.lar <- function(x, xvar=c("norm","step","lambda"), breaks=TRUE, + omit.zeros=TRUE, var.labels=TRUE, ...) { + + if (x$completepath) { + k = length(x$action)+1 + lambda = c(x$lambda,0) + beta = cbind(x$beta,x$bls) + } else { + k = length(x$action) + lambda = x$lambda + beta = x$beta + } + p = nrow(beta) + + xvar = match.arg(xvar) + if (xvar=="norm") { + xx = colSums(abs(beta)) + xlab = "L1 norm" + } else if (xvar=="step") { + xx = 1:k + xlab = "Step" + } else { + xx = lambda + xlab = "Lambda" + } + + if (omit.zeros) { + good.inds = matrix(FALSE,p,k) + good.inds[beta!=0] = TRUE + changes = t(apply(beta,1,diff))!=0 + good.inds[cbind(changes,rep(F,p))] = TRUE + good.inds[cbind(rep(F,p),changes)] = TRUE + beta[!good.inds] = NA + } + + plot(c(),c(),xlim=range(xx,na.rm=T),ylim=range(beta,na.rm=T), + xlab=xlab,ylab="Coefficients",main="Least angle regression path",...) + abline(h=0,lwd=2) + matplot(xx,t(beta),type="l",lty=1,add=TRUE) + if (breaks) abline(v=xx,lty=2) + if (var.labels) axis(4,at=beta[,k],labels=1:p,cex=0.8,adj=0) + invisible() +} diff --git a/selectiveInference-currentCRAN/R/funs.manymeans.R b/selectiveInference-currentCRAN/R/funs.manymeans.R new file mode 100644 index 00000000..998e58d6 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.manymeans.R @@ -0,0 +1,196 @@ +### functions for computing the many means estimates- Stephen Reid +### returns +### i) selected indices +### ii) selection adjusted point estimates +### iii) selection adjusted interval estimates +### iv) selection adjusted p-value of hypothesis testing whether underlying signal is 0 + +######################### +##### MAIN FUNCTION ##### +######################### + +#### user-facing function for computing +#### selected set +#### point and interval estimates +#### p-values +#### input: +#### - y = Vector of observations +#### - alpha = Significance level used in CI construction +#### - bh.q = q parameter for BH(q) procedure (default: NULL) +#### - k = Number of largest elements to consider (default: NULL) +#### - sigma = Estimate of standard deviation of one of the components +#### output: +#### * A list (of class "mm") with the following components : +#### - mu.hat = Vector of length length(y) containing the estimated signal size. If a sample element is not selected, then its signal size estimate is 0 +#### - selected.set = Indices into the vector y of the sample elements that were selected by our procedure (either BH(q) or top-K) +#### - CIs = Matrix with two columns and number of rows equal to number of elements in selected.set. Provides the post-selection CI bounds for the estimated signal sizes of selected elements. CIs given is rows in the same order as encountered in selected.set +#### - p.vals = Vector of p-values for the test of nullity of the signals of the selected sample elemetns. P-values given in the same order as selected.set + +manyMeans <- function(y, alpha=0.1, bh.q=NULL, k=NULL, sigma=1, verbose=FALSE) { + this.call = match.call() + if (missing(y) || is.null(y)) stop("y must be specified") + if (!is.numeric(y)) stop("y must be numeric") + if (alpha <= 0 || alpha >= 1) stop("alpha must be between 0 and 1") + if (is.null(bh.q) && is.null(k)) stop("You must either bh.q or k; they cannot both be NULL") + if (!is.null(bh.q) && (bh.q <= 0 || bh.q >= 1)) stop("bh.q must be between 0 and 1") + if (!is.null(k) && (k < 1 || k > length(y) || k != round(k))) stop("k must be an integer between 1 and length(y)") + if (sigma <= 0) stop("sigma must be > 0") + + n = length(y) + if (!is.null(bh.q)) { # use BH selection procedure + + if (verbose && !is.null(k)) cat("(Both bh.q and k have been specified; k is being ignored)\n") + k = NULL + ci=NULL + ### find the selected set and threshold + p.vals = 2*pnorm (abs(y)/sigma, 0, 1, lower.tail=FALSE) + order.p.vals = order(p.vals) + sorted.p.vals = p.vals[order.p.vals] + + options (warn=-1) # ignore warning if max is over empty set + last.reject = max(which (sorted.p.vals <= bh.q*(1:n)/n)) + options (warn=0) # reinstitute warnings + + if (last.reject == -Inf){ # none rejected + if (verbose) cat("No sample elements selected.\n") + out = list(mu.hat=rep(0,n), selected.set=NULL, pv=NULL, ci=NULL, method="BH(q)", + bh.q=bh.q, k=NULL, threshold=NULL, sigma=sigma, call=this.call) + class(out) = "manyMeans" + return(out) + } + + selected.set = order.p.vals[1:n <= last.reject] + threshold = sigma*qnorm (last.reject/2/n, lower.tail=FALSE) + } + + else{ # use top-k selection procedure + + ### find the selected set and threshold + if (k == n) { # make no changes - return MLE + z.alpha = qnorm (alpha/2, lower.tail=FALSE) + cis = cbind(y - z.alpha*sigma, y + z.alpha*sigma) + p.vals = 2*pnorm (abs(y), 0, sigma, lower.tail=FALSE) + + out = list(mu.hat=y, selected.set=1:n, pv=p.vals, ci=ci, method="top-K", + bh.q=NULL, k=k, threshold=NULL, sigma=sigma, call=this.call) + class(out) = "manyMeans" + return(out) + } + + order.abs.y = order (-abs(y)) + sorted.abs.y = y[order.abs.y] + + selected.set = order.abs.y[1:k] + threshold = abs(sorted.abs.y[k+1]) + } + + ### estimate their underlying signal sizes + mu.hat = sapply (selected.set, function(s){ + uniroot(f=function(mu){tn.mean(mu, -threshold, threshold, sigma=sigma) - y[s]}, lower=-10000*sigma, upper=10000*sigma)$root + }) + + ### and CIs + right.ci = sapply (selected.set, function(s){ + uniroot (f=function(mu){tn.cdf (y[s], mu, -threshold, threshold, sigma=sigma) - (alpha/2)}, lower=-10000*sigma, upper=10000*sigma)$root + }) + left.ci = sapply (selected.set, function(s){ + uniroot (f=function(mu){tn.cdf (y[s], mu, -threshold, threshold, sigma=sigma) - (1-alpha/2)}, lower=-10000*sigma, upper=10000*sigma)$root + }) + + ### and p-values + p.vals = sapply (selected.set, function(s){tn.cdf (y[s], 0, -threshold, threshold, sigma=sigma)}) + p.vals = 2*pmin(p.vals, 1-p.vals) + + ### arrange + order.selected.set = order (selected.set) + selected.set = selected.set[order.selected.set] + mu.hat = mu.hat[order.selected.set] + left.ci = left.ci[order.selected.set] + right.ci = right.ci[order.selected.set] + p.vals = p.vals[order.selected.set] + + mu.hat.final = rep(0, n) + mu.hat.final[selected.set] = mu.hat + + out = list(mu.hat=mu.hat.final, selected.set=selected.set, pv=p.vals, ci=cbind(left.ci,right.ci), + method=ifelse(is.null(bh.q), "top-K", "BH(q)"), sigma=sigma, bh.q=bh.q, k=k, threshold=threshold, + call=this.call) + class(out) = "manyMeans" + return(out) +} + +#### prints a pretty data frame summarising the information of an object of the mm class +#### columns for index, signal size estimate, left and right CI bounds and p values +#### only for those sample elements selected by the selection procedure associated with the mmObj +print.manyMeans <- function(x, ...){ + cat("\nCall:\n") + dput(x$call) + + cat(sprintf("\nStandard deviation of noise sigma = %0.3f\n\n", + x$sigma)) + + tab = cbind(x$selected.set,x$mu.hat[x$selected.set],x$pv,x$ci) + tab = round(tab,3) + colnames(tab) = c("SelInd","MuHat","P-value","LowConfPt","UpConfPt") + rownames(tab) = rep("",nrow(tab)) + print(tab) +} + +############################### +##### AUXILIARY FUNCTIONS ##### +############################### + +#### function returning the cumulative distribution function value +#### of a truncated Gaussian RV, truncated to interval (-Inf, a) \union (b, Inf) +#### with underlying Gaussian having mean parameter mu and standard deviation sigma +#### at value y +tn.cdf = function(y, mu, a, b, sigma=1){ + ## denominator + d_right = pnorm (b, mu, sigma, lower.tail=FALSE, log.p=TRUE) + d_left = pnorm (a, mu, sigma, lower.tail=TRUE, log.p=TRUE) + d_max = max(d_right, d_left) + d_log = d_max + log(exp(d_left - d_max) + exp(d_right - d_max)) + + + # numerator + if (y > a & y < b){ + n_log = d_left + return (exp(n_log-d_log)) + }else{ + if (y > b){ + # b and y + n_y_tilde = pnorm (y, mu, sigma, lower.tail=FALSE, log.p=TRUE) + n_b_tilde = pnorm (b, mu, sigma, lower.tail=FALSE, log.p=TRUE) + n_yb = n_b_tilde + log(1 - exp(n_y_tilde-n_b_tilde)) + + # a + n_a = d_left + + # combine + return(exp(n_yb-d_log) + exp(n_a-d_log)) + }else{ + n_log = pnorm (y, mu, sigma, lower.tail=TRUE, log.p=TRUE) + return (exp(n_log-d_log)) + } + } +} + +##### function for computing the mean of an N(mu, 1) RV +##### truncated to be on the interval (-Inf, a) \union (b, Inf) +tn.mean = function(mu, a, b, sigma=1){ + # denominator + d_left = pnorm (a, mu, sigma, lower.tail=TRUE, log.p=TRUE) + d_right = pnorm (b, mu, sigma, lower.tail=FALSE, log.p=TRUE) + d_max = max(d_left, d_right) + d_log = d_max + log(exp(d_left - d_max) + exp(d_right - d_max)) + + # numerator + n_left = dnorm (b, mu, sigma, log=TRUE) + n_right = dnorm (a, mu, sigma, log=TRUE) + + if (n_left > n_right){ + mu + exp(n_left + log(1 - exp(n_right-n_left)) - d_log) + }else{ + mu - exp(n_right + log(1 - exp(n_left-n_right)) - d_log) + } +} diff --git a/selectiveInference-currentCRAN/R/funs.max.R b/selectiveInference-currentCRAN/R/funs.max.R new file mode 100644 index 00000000..97f34afb --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.max.R @@ -0,0 +1,84 @@ +#Mills lower bound on the integral of a standard normal over an interval +#Need to use something else near zero, where mills is bad. +mills.lb = function(a,b){ + t=5#threshold for switching to approximation + if(b=t){ + return(a*exp(-a^2/2)/(1+a^2)-exp(-b^2/2)/b) + } + if(b <= -t){ + return(mills.lb(-b,-a)) + } + #Note, like in the rest of this program, I'm not dividing by sqrt(2*pi), so we need to rescale here + sqrt(2*pi)*pnorm(min(b,t))-sqrt(2*pi)*pnorm(max(a,-t))+mills.lb(a,-t)+mills.lb(t,b) +} + + +#Truncate the interval. We chop off the ends near infinity, being careful +#so that the chopped tail is guaranteed to be close enough to its mills approximation +#a,b are the left and right endpoints, z is the mid point +#delta is the multiplicative error limit of the truncation on the final fraction (roughly) +truncate.interval = function(a,b,z,delta=1e-16){ + #Initialize some stuff + L.extra = 0#Extra probability to be added for the truncation on the left + R.extra = 0#Extra probability to be added for the truncation on the right + a.new = a#truncated interval bounds + b.new = b#truncated interval bounds + + #We need bounds on the integrals + RL.lb = mills.lb(a,b) + R.lb = mills.lb(z,b) + + #Now we bound the error we can tolerate in the tail approximations + eps.R = min(delta*R.lb,delta*RL.lb/2) + eps.L = delta*RL.lb/2 + + #For now, only truncate infinite end points + #Might want to change this one day, if we have trouble with super wide but finite intervals + if (b==Inf){ + f = function(x){x^2+log(1+x^2)+log(eps.R)}#encodes error of mills approximation + b.new = uniroot(f,c(1.1,1000))$root + b.new = max(b.new,z+1)#Don't truncate past z + R.extra = exp(-b.new^2/2)/b.new + } + if (a==-Inf){ + f = function(x){x^2+log(1+x^2)+log(eps.L)}#encodes error of mills approximation + a.new = -uniroot(f,c(1.1,1000))$root + a.new = min(a.new,z-1)#Don't truncate past z + L.extra = exp(-a.new^2/2)/a.new + } + + list(a=a.new,b=b.new,L.extra=L.extra,R.extra=R.extra,z=z) +} + + +#Approximates integral_a^b e^{-x^2/2+offset^2/2} dx +# offset is used to make ratios slightly more stable +# defaults to offset=0 +# Note that I've left out 1/sqrt(2*pi), you can add it in if you like +approx.int = function(a,b,n=1000,offset=0){ + delta = (b-a)/n #Step size, may want to vary in the future + x = seq(from=a,to=b,by=delta) + y = -x^2/2 + offset^2/2 # On the log scale + m = diff(y)/diff(x) # Line segment slopes + de = diff(exp(y)) # Difference on original scale + sum(de/m) #Sum of integrals of line segments (closed form) +} + +#Uses approx.int to evaluate int_x^b phi(z)dz / int_a^b phi(z)dz +#Right now offsets everything for a little more stability +#Uses truncation to handle infinite endpoints +max.approx.frac = function(a,b,x,mu=0,n=1000){ + returns = numeric(length(mu)) + for(i in 1:length(returns)){ + truncation = truncate.interval(a-mu[i],b-mu[i],x-mu[i]) + #Our offset will use the smaller of a and b in absolute value + offset = min(abs(truncation$a),abs(truncation$b)) + #The truncation also shifts by the mean, so we don't need to do it again for the end points + #but we do need to use the center z returned by truncation, rather than x, to match + left = approx.int(truncation$a,truncation$z,n,offset)+truncation$L.extra + right = approx.int(truncation$z,truncation$b,n,offset)+truncation$R.extra + returns[i] = right/(left+right) + } + returns +} diff --git a/selectiveInference-currentCRAN/R/funs.quadratic.R b/selectiveInference-currentCRAN/R/funs.quadratic.R new file mode 100644 index 00000000..799352b5 --- /dev/null +++ b/selectiveInference-currentCRAN/R/funs.quadratic.R @@ -0,0 +1,257 @@ +truncationRegion <- function(obj, ydecomp, type, tol = 1e-15) { + + n <- nrow(obj$x) + Z <- ydecomp$Z + if (type == "TC") { + eta <- ydecomp$eta + } else { + Vd <- ydecomp$Vd + V2 <- ydecomp$V2 + C <- ydecomp$C + R <- ydecomp$R + } + L <- lapply(1:length(obj$action), function(s) { + + Ug <- obj$maxprojs[[s]] + peng <- obj$maxpens[[s]] + if (s > 1) { + Zs <- obj$cumprojs[[s-1]] %*% Z + if (type == "TC") { + etas <- obj$cumprojs[[s-1]] %*% eta + } else { + Vds <- obj$cumprojs[[s-1]] %*% Vd + V2s <- obj$cumprojs[[s-1]] %*% V2 + } + } else { + Zs <- Z + if (type == "TC") { + etas <- eta + } else { + Vds <- Vd + V2s <- V2 + } + } + + num.projs <- length(obj$projections[[s]]) + if (num.projs == 0) { + return(list(Intervals(c(-Inf,0)))) + } else { + lapply(1:num.projs, function(l) { + + Uh <- obj$projections[[s]][[l]] + penh <- obj$aicpens[[s]][[l]] + # The quadratic form corresponding to + # (t*U + Z)^T %*% Q %*% (t*U + Z) \geq 0 + # we find the roots in t, if there are any + # and return the interval of potential t + if (type == "TC") { + coeffs <- quadratic_coefficients(obj$sigma, Ug, Uh, peng, penh, etas, etas, Zs, Zs) + quadratic_roots(coeffs$A, coeffs$B, coeffs$C, tol) + } else { + coeffs <- TF_coefficients(R, Ug, Uh, peng, penh, Zs, Zs, Vds, Vds, V2s, V2s) + roots <- TF_roots(R, C, coeffs) + return(roots) + } + }) + } + # LL is a list of intervals + }) + # L is now a list of lists of intervals + return(unlist(L, recursive = FALSE, use.names = FALSE)) +} + +quadratic_coefficients <- function(sigma, Ug, Uh, peng, penh, etag, etah, Zg, Zh) { + # g indexes minimizer, h the comparison + Uheta <- t(Uh) %*% etah + Ugeta <- t(Ug) %*% etag + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + etaZh <- t(etah) %*% Zh + etaZg <- t(etag) %*% Zg + if (is.null(sigma)) { + A <- penh * (sum(etah^2) - sum(Uheta^2)) - peng * (sum(etag^2) - sum(Ugeta^2)) + B <- 2 * penh * (etaZh - t(Uheta) %*% UhZ) - 2 * peng * (etaZg - t(Ugeta) %*% UgZ) + C <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + } else { + A <- (sum(etah^2) - sum(Uheta^2)) - (sum(etag^2) - sum(Ugeta^2)) + B <- 2 * (etaZh - t(Uheta) %*% UhZ) - 2 * (etaZg - t(Ugeta) %*% UgZ) + C <- (sum(Zh^2) - sum(UhZ^2) + penh) - (sum(Zg^2) - sum(UgZ^2) + peng) + } + return(list(A = A, B = B, C = C)) +} + +quadratic_roots <- function(A, B, C, tol) { + disc <- B^2 - 4*A*C + b2a <- -B/(2*A) + + if (disc > tol) { + # Real roots + pm <- sqrt(disc)/(2*A) + endpoints <- sort(c(b2a - pm, b2a + pm)) + + } else { + # No real roots + if (A > -tol) { + # Quadratic form always positive + return(Intervals(c(-Inf,0))) + } else { + # Quadratic form always negative + stop("Empty TC support is infeasible") + } + } + + if (A > tol) { + # Parabola opens upward + if (min(endpoints) > 0) { + # Both roots positive, union of intervals + return(Intervals(rbind(c(-Inf,0), endpoints))) + } else { + # At least one negative root + return(Intervals(c(-Inf, max(0, endpoints[2])))) + } + } else { + if (A < -tol) { + # Parabola opens downward + if (endpoints[2] < 0) { + # Positive quadratic form only when t negative + stop("Negative TC support is infeasible") + } else { + # Part which is positive + if (endpoints[1] > 0) { + return(Intervals(rbind(c(-Inf, endpoints[1]), c(endpoints[2], Inf)))) + } else { + return(Intervals(c(endpoints[2], Inf))) + } + } + } else { + # a is too close to 0, quadratic is actually linear + if (abs(B) > tol) { + if (B > 0) { + return(Intervals(c(-Inf, max(0, -C/B)))) + } else { + if (-C/B < 0) stop("Infeasible linear equation") + return(Intervals(rbind(c(-Inf, 0), c(-C/B, Inf)))) + } + } else { + warning("Ill-conditioned quadratic") + return(Intervals(c(-Inf,0))) + } + } + } +} + +# Helper functions for TF roots +roots_to_checkpoints <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(c(0, (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) +} +roots_to_partition <- function(roots) { + checkpoints <- unique(sort(c(0, roots))) + return(list(endpoints = c(checkpoints, Inf), midpoints = (checkpoints + c(checkpoints[-1], 200 + checkpoints[length(checkpoints)]))/2)) +} + +# Efficiently compute coefficients of one-dimensional TF slice function +TF_coefficients <- function(R, Ug, Uh, peng, penh, Zg, Zh, Vdg, Vdh, V2g, V2h) { + + UhZ <- t(Uh) %*% Zh + UgZ <- t(Ug) %*% Zg + UhVd <- t(Uh) %*% Vdh + UgVd <- t(Ug) %*% Vdg + UhV2 <- t(Uh) %*% V2h + UgV2 <- t(Ug) %*% V2g + VdZh <- sum(Vdh*Zh) + VdZg <- sum(Vdg*Zg) + V2Zh <- sum(V2h*Zh) + V2Zg <- sum(V2g*Zg) + + x0 <- penh * (sum(Zh^2) - sum(UhZ^2)) - peng * (sum(Zg^2) - sum(UgZ^2)) + x1 <- 2*R*(penh * (VdZh - sum(UhZ*UhVd)) - peng * (VdZg - sum(UgZ*UgVd))) + x2 <- 2*R*(penh * (V2Zh - sum(UhZ*UhV2)) - peng * (V2Zg - sum(UgZ*UgV2))) + x12 <- 2*R^2*(penh * (sum(Vdh*V2h) - sum(UhVd*UhV2)) - peng * (sum(Vdg*V2g) - sum(UgVd*UgV2))) + x11 <- R^2*(penh * (sum(Vdh^2) - sum(UhVd^2)) - peng * (sum(Vdg^2) - sum(UgVd^2))) + x22 <- R^2*(penh * (sum(V2h^2) - sum(UhV2^2)) - peng * (sum(V2g^2) - sum(UgV2^2))) + + return(list(x11=x11, x22=x22, x12=x12, x1=x1, x2=x2, x0=x0)) +} + +# Numerically solve for roots of TF slice using +# hybrid polyroot/uniroot approach + +TF_roots <- function(R, C, coeffs, tol = 1e-8, tol2 = 1e-6) { + + x11 <- coeffs$x11 + x22 <- coeffs$x22 + x12 <- coeffs$x12 + x1 <- coeffs$x1 + x2 <- coeffs$x2 + x0 <- coeffs$x0 + + g1 <- function(t) sqrt(C*t/(1+C*t)) + g2 <- function(t) 1/sqrt(1+C*t) + I <- function(t) x11*g1(t)^2 + x12*g1(t)*g2(t) + x22*g2(t)^2 + x1*g1(t) + x2*g2(t) + x0 + + z4 <- complex(real = -x11 + x22, imaginary = -x12)/4 + z3 <- complex(real = x2, imaginary = -x1)/2 + z2 <- complex(real = x11/2+x22/2+x0) + z1 <- Conj(z3) + z0 <- Conj(z4) + + zcoefs <- c(z0, z1, z2, z3, z4) + croots <- polyroot(zcoefs) + thetas <- Arg(croots) + # Can't specify polyroot precision :( + modinds <- Mod(croots) <= 1 + tol2 & Mod(croots) >= 1 - tol2 + angleinds <- thetas >=0 & thetas <= pi/2 + roots <- unique(thetas[which(modinds & angleinds)]) + troots <- tan(roots)^2/C + + checkpoints <- c() + if (length(troots) > 0) checkpoints <- roots_to_checkpoints(troots) + checkpoints <- sort( + c(checkpoints, 0, tol, tol2, + seq(from = sqrt(tol2), to = 1, length.out = 50), + seq(from = 1.2, to=50, length.out = 20), + 100, 1000, 10000)) + ## if (length(troots) == 0) { + ## # Polyroot didn't catch any roots + ## # ad-hoc check: + ## checkpoints <- c(0, tol, tol2, + ## seq(from = sqrt(tol2), to = 1, length.out = 50), + ## seq(from = 1.2, to=50, length.out = 20), + ## 100, 1000, 10000) + ## } else { + ## checkpoints <- roots_to_checkpoints(troots) + ## } + + signs <- sign(I(checkpoints)) + diffs <- c(0, diff(signs)) + changeinds <- which(diffs != 0) + + if (length(changeinds) > 0) { + + roots <- unlist(lapply(changeinds, function(ind) { + uniroot(I, lower = checkpoints[ind-1], upper = checkpoints[ind], tol = tol)$root + })) + + partition <- roots_to_partition(roots) + negative <- which(I(partition$midpoints) < 0) + + intervals <- matrix(NA, ncol=2) + for (i in 1:length(negative)) { + ind <- negative[i] + if ((i > 1) && (ind == negative[i-1] + 1)) { + # There was not a sign change at end of previous interval + intervals[nrow(intervals), 2] <- partition$endpoints[ind+1] + } else { + intervals <- rbind(intervals, c(partition$endpoints[ind], partition$endpoints[ind+1])) + } + } + + return(Intervals(intervals[-1,])) + } + + # Apparently no roots, always positive + if (I(0) < 0) stop("Infeasible constraint!") + return(Intervals(c(-Inf,0))) +} + diff --git a/selectiveInference-currentCRAN/man/estimateSigma.Rd b/selectiveInference-currentCRAN/man/estimateSigma.Rd new file mode 100644 index 00000000..c9561218 --- /dev/null +++ b/selectiveInference-currentCRAN/man/estimateSigma.Rd @@ -0,0 +1,64 @@ +\name{estimateSigma} +\alias{estimateSigma} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Estimate the noise standard deviation in regression +} +\description{ +Estimates the standard deviation of the noise, for use in the selectiveInference +package +} +\usage{ +estimateSigma(x, y, intercept=TRUE, standardize=TRUE) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{y}{ +Vector of outcomes (length n) +} +\item{intercept}{Should glmnet be run with an intercept? Default is TRUE} +\item{standardize}{Should glmnet be run with standardized predictors? Default is TRUE} +} +\details{ +This function estimates the standard deviation of the noise, in a linear regresion setting. +A lasso regression is fit, using cross-validation to estimate the tuning parameter lambda. +With sample size n, yhat equal to the predicted values and df being the number of nonzero +coefficients from the lasso fit, the estimate of sigma is \code{sqrt(sum((y-yhat)^2) / (n-df-1))}. +Important: if you are using glmnet to compute the lasso estimate, be sure to use the settings +for the "intercept" and "standardize" arguments in glmnet and estimateSigma. Same applies to fs +or lar, where the argument for standardization is called "normalize". +} +\value{ +\item{sigmahat}{The estimate of sigma} +\item{df}{The degrees of freedom of lasso fit used} +} +\references{ +Stephen Reid, Jerome Friedman, and Rob Tibshirani (2014). +A study of error variance estimation in lasso regression. arXiv:1311.5274. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# estimate sigma +sigmahat = estimateSigma(x,y)$sigmahat + +# run sequential inference with estimated sigma +out = fsInf(fsfit,sigma=sigmahat) +out +} + + diff --git a/selectiveInference-currentCRAN/man/factorDesign.Rd b/selectiveInference-currentCRAN/man/factorDesign.Rd new file mode 100644 index 00000000..8e061db6 --- /dev/null +++ b/selectiveInference-currentCRAN/man/factorDesign.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{factorDesign} +\alias{factorDesign} +\title{Expand a data frame with factors to form a design matrix with the full binary encoding of each factor.} +\usage{ +factorDesign(df) +} +\arguments{ +\item{df}{Data frame containing some columns which are \code{factors}.} +} +\value{ +List containing +\describe{ + \item{x}{Design matrix, the first columns contain any numeric variables from the original date frame.} + \item{index}{Group membership indicator for expanded matrix.} +} +} +\description{ +When using \code{\link{groupfs}} with factor variables call this function first to create a design matrix. +} +\examples{ +\dontrun{ +fd = factorDesign(warpbreaks) +y = rnorm(nrow(fd$x)) +fit = groupfs(fd$x, y, fd$index, maxsteps=2, intercept=FALSE) +pvals = groupfsInf(fit) +} +} + diff --git a/selectiveInference-currentCRAN/man/fixedLassoInf.Rd b/selectiveInference-currentCRAN/man/fixedLassoInf.Rd new file mode 100644 index 00000000..6795ec17 --- /dev/null +++ b/selectiveInference-currentCRAN/man/fixedLassoInf.Rd @@ -0,0 +1,252 @@ +\name{fixedLassoInf} +\alias{fixedLassoInf} + +\title{ +Inference for the lasso, with a fixed lambda +} +\description{ +Compute p-values and confidence intervals for the lasso estimate, at a +fixed value of the tuning parameter lambda +} +\usage{ +fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", + "cox"),intercept=TRUE, status=NULL, sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p); +} + \item{y}{ +Vector of outcomes (length n) +} + \item{beta}{ +Estimated lasso coefficients (e.g., from glmnet). This is of length p +(so the intercept is not included as the first component). + + Be careful! This function uses the "standard" lasso objective + \deqn{ + 1/2 \|y - x \beta\|_2^2 + \lambda \|\beta\|_1. + } + In contrast, glmnet multiplies the first term by a factor of 1/n. + So after running glmnet, to extract the beta corresponding to a value lambda, + you need to use \code{beta = coef(obj, s=lambda/n)[-1]}, + where obj is the object returned by glmnet (and [-1] removes the intercept, + which glmnet always puts in the first component) +} + \item{lambda}{ +Value of lambda used to compute beta. See the above warning +} + +\item{family}{Response type: "gaussian" (default), "binomial", or + "cox" (for censored survival data) } + +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate. +Not used for family= "binomial", or "cox" +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{intercept}{ +Was the lasso problem solved (e.g., by glmnet) with an intercept in the model? +Default is TRUE. Must be TRUE for "binomial" family. Not used for 'cox" family, where no intercept is assumed. +} +\item{status}{Censoring status for Cox model; 1=failurem 0=censored} +\item{type}{Contrast type for p-values and confidence intervals: default is +"partial"---meaning that the contrasts tested are the partial population +regression coefficients, within the active set of predictors; the alternative is +"full"---meaning that the full population regression coefficients are tested. +The latter does not make sense when p > n.} +\item{tol.beta}{ +Tolerance for determining if a coefficient is zero +} +\item{tol.kkt}{ +Tolerance for determining if an entry of the subgradient is zero +} +\item{gridrange}{ +Grid range for constructing confidence intervals, on the standardized scale +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +\item{verbose}{ +Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective p-values and confidence intervals for the lasso, +given a fixed value of the tuning parameter lambda. +Three different response types are supported: gaussian, binomial and Cox. +The confidence interval construction involves numerical search and can be fragile: +if the observed statistic is too close to either end of the truncation interval +(vlo and vup, see references), then one or possibly both endpoints of the interval of +desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} +gives the achieved Gaussian tail areas for the reported intervals---these should be close +to alpha/2, and can be used for error-checking purposes. + +Important!: Before running glmnet (or some other lasso-solver) x should be centered, that is x <- scale(X,TRUE,FALSE). +In addition, if standardization of the predictors is desired, x should be scaled as well: x <- scale(x,TRUE,TRUE). +Then when running glmnet, set standardize=F. See example below. + +The penalty.factor facility in glmmet-- allowing different penalties lambda for each predictor, +is not yet implemented in fixedLassoInf. However you can finesse this--- see the example below. One caveat- using this approach, a penalty factor of zero (forcing a predictor in) +is not allowed. + +Note that the coefficients and standard errors reported are unregularized. +Eg for the Gaussian, they are the usual least squares estimates and standard errors +for the model fit to the actice set from the lasso. +} +\value{ +\item{type}{Type of coefficients tested (partial or full)} +\item{lambda}{Value of tuning parameter lambda used} +\item{pv}{One-sided P-values for active variables, uses the fact we have conditioned on the sign.} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to lassoInf} +} + +\references{ +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). +Exact post-selection inference, with application to the lasso. arXiv:1311.6238. + + Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out + + +## as above, but use lar function instead to get initial +## lasso fit (should get same results) + lfit = lar(x,y,normalize=FALSE) + beta = coef(lfit,s=lambda,mode="lambda",x=x,y=y) + out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) + out2 + +## mimic different penalty factors by first scaling x + set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x=scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) +pf=c(rep(1,7),rep(.1,3)) #define penalty factors +pf=p*pf/sum(pf) # penalty factors should be rescaled so they sum to p +xs=scale(x,FALSE,pf) #scale cols of x by penalty factors +# first run glmnet +gfit = glmnet(xs,y,standardize=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = .8 +beta_hat = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(xs,y,beta_hat,lambda,sigma=sigma) + +#rescale conf points to undo the penalty factor +out$ci=t(scale(t(out$ci),FALSE,pf[out$vars])) +out + +#logistic model +set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + y = x\%*\%beta + sigma*rnorm(n) + y=1*(y>mean(y)) + # first run glmnet + gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + lambda = .8 + beta_hat = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial") + out + +#Cox model +set.seed(43) + n = 50 + p = 10 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + tim= tim-min(tim)+1 +status=sample(c(0,1),size=n,replace=TRUE) + # first run glmnet + + + gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + + # extract coef for a given lambda; note the 1/n factor! + + lambda = 1.5 + beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE,x=x,y=Surv(tim,status))) + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox") + out +} + \ No newline at end of file diff --git a/selectiveInference-currentCRAN/man/forwardStop.Rd b/selectiveInference-currentCRAN/man/forwardStop.Rd new file mode 100644 index 00000000..87eb7abd --- /dev/null +++ b/selectiveInference-currentCRAN/man/forwardStop.Rd @@ -0,0 +1,55 @@ +\name{forwardStop} +\alias{forwardStop} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +ForwardStop rule for sequential p-values +} +\description{ +Computes the ForwardStop sequential stopping rule of G'Sell et al (2014) +} +\usage{ +forwardStop(pv, alpha=0.1) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{pv}{ +Vector of **sequential** p-values, for example from fsInf or larInf +} + \item{alpha}{ +Desired type FDR level (between 0 and 1) +} +} +\details{ +Computes the ForwardStop sequential stopping rule of G'Sell et al (2014). +Guarantees FDR control at the level alpha, for independent p-values. +} +\value{ +Step number for sequential stop. +} +\references{ +Max Grazier G'Sell, Stefan Wager, Alexandra Chouldechova, and Rob Tibshirani (2014). +Sequential selection procedures and Fflse Discovery Rate Control. arXiv:1309.5352. +To appear in Journal of the Royal Statistical Society: Series B. +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out + +# estimate optimal stopping point +forwardStop(out$pv, alpha=.10) +} diff --git a/selectiveInference-currentCRAN/man/fs.Rd b/selectiveInference-currentCRAN/man/fs.Rd new file mode 100644 index 00000000..2a61c836 --- /dev/null +++ b/selectiveInference-currentCRAN/man/fs.Rd @@ -0,0 +1,95 @@ +\name{fs} +\alias{fs} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Forward stepwise regression +} +\description{ +This function implements forward stepwise regression, for use in the +selectiveInference package +} +\usage{ +fs(x, y, maxsteps=2000, intercept=TRUE, normalize=TRUE, verbose=FALSE) +} +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{y}{ +Vector of outcomes (length n) +} + \item{maxsteps}{ +Maximum number of steps to take +} +\item{intercept}{Should an intercept be included on the model? Default is TRUE} +\item{normalize}{Should the predictors be normalized? Default is TRUE. (Note: +this argument has no real effect on model selection since forward stepwise is +scale invariant already; however, it is included for completeness, and to match +the interface for the \code{lar} function) +} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function implements forward stepwise regression, adding the predictor at each +step that maximizes the absolute correlation between the predictors---once +orthogonalized with respect to the current model---and the residual. This entry +criterion is standard, and is equivalent to choosing the variable that achieves +the biggest drop in RSS at each step; it is used, e.g., by the \code{step} function +in R. Note that, for example, the \code{lars} package implements a stepwise option +(with type="step"), but uses a (mildly) different entry criterion, based on maximal +absolute correlation between the original (non-orthogonalized) predictors and the +residual. +} +\value{ +\item{action}{Vector of predictors in order of entry} +\item{sign}{Signs of coefficients of predictors, upon entry} +\item{df}{Degrees of freedom of each active model} +\item{beta}{Matrix of regression coefficients for each model along the path, +one column per model} +\item{completepath}{Was the complete stepwise path computed?} +\item{bls}{If completepath is TRUE, the full least squares coefficients} +\item{Gamma}{Matrix that captures the polyhedral selection at each step} +\item{nk}{Number of polyhedral constraints at each step in path} +\item{vreg}{Matrix of linear contrasts that gives coefficients of variables +to enter along the path} +\item{x}{Matrix of predictors used} +\item{y}{Vector of outcomes used} +\item{bx}{Vector of column means of original x} +\item{by}{Mean of original y} +\item{sx}{Norm of each column of original x} +\item{intercept}{Was an intercept included?} +\item{normalize}{Were the predictors normalized?} +\item{call}{The call to fs} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{ + \code{\link{fsInf}}, \code{\link{predict.fs}},\code{\link{coef.fs}}, \code{\link{plot.fs}} +} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise, plot results +fsfit = fs(x,y) +plot(fsfit) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = fsInf(fsfit) +out +} + + +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/selectiveInference-currentCRAN/man/fsInf.Rd b/selectiveInference-currentCRAN/man/fsInf.Rd new file mode 100644 index 00000000..613bc5a8 --- /dev/null +++ b/selectiveInference-currentCRAN/man/fsInf.Rd @@ -0,0 +1,135 @@ +\name{fsInf} +\alias{fsInf} +\title{ +Selective inference for forward stepwise regression +} +\description{ +Computes p-values and confidence intervals for forward +stepwise regression +} +\usage{ +fsInf(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) +} +\arguments{ + \item{obj}{ +Object returned by \code{\link{fs}} function +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{k}{ +See "type" argument below. Default is NULL, in which case k is taken to be the +the number of steps computed in the forward stepwise path +} +\item{type}{ +Type of analysis desired: with "active" (default), p-values and confidence intervals are +computed for each predictor as it is entered into the active step, all the way through +k steps; with "all", p-values and confidence intervals are computed for all variables in +the active model after k steps; with "aic", the number of steps k is first estimated using +a modified AIC criterion, and then the same type of analysis as in "all" is carried out for +this particular value of k. + +Note that the AIC scheme is defined to choose a number of steps k after which the AIC criterion +increases \code{ntimes} in a row, where \code{ntimes} can be specified by the user (see below). +Under this definition, the AIC selection event is characterizable as a polyhedral set, and hence +the extra conditioning can be taken into account exactly. Also note that an analogous BIC scheme +can be specified through the \code{mult} argument (see below) +} +\item{gridrange}{ +Grid range for constructing confidence intervals, on the standardized scale +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +\item{mult}{Multiplier for the AIC-style penalty. Hence a value of 2 (default) +gives AIC, whereas a value of log(n) would give BIC} +\item{ntimes}{Number of steps for which AIC-style criterion has to increase before +minimizing point is declared} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective p-values and confidence intervals (selection intervals) +for forward stepwise regression. The default is to report the results for +each predictor after its entry into the model. See the "type" argument for other options. +The confidence interval construction involves numerical search and can be fragile: +if the observed statistic is too close to either end of the truncation interval +(vlo and vup, see references), then one or possibly both endpoints of the interval of +desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} +gives the achieved Gaussian tail areas for the reported intervals---these should be close +to alpha/2, and can be used for error-checking purposes. +} + +\value{ +\item{type}{Type of analysis (active, all, or aic)} +\item{k}{Value of k specified in call} +\item{khat}{When type is "active", this is an estimated stopping point +declared by \code{\link{forwardStop}}; when type is "aic", this is the +value chosen by the modified AIC scheme} +\item{pv}{One sided P-values for active variables, uses the sign that a variable entered the model with.} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to fsInf} +} + +\references{ +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). +Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. + +Joshua Loftus and Jonathan Taylor (2014). A significance test for forward stepwise +model selection. arXiv:1405.3920. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{\code{\link{fs}}} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix +} diff --git a/selectiveInference-currentCRAN/man/groupfs.Rd b/selectiveInference-currentCRAN/man/groupfs.Rd new file mode 100644 index 00000000..a57c6dc8 --- /dev/null +++ b/selectiveInference-currentCRAN/man/groupfs.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{groupfs} +\alias{groupfs} +\title{Select a model with forward stepwise.} +\usage{ +groupfs(x, y, index, maxsteps, sigma = NULL, k = 2, intercept = TRUE, + center = TRUE, normalize = TRUE, aicstop = 0, verbose = FALSE) +} +\arguments{ +\item{x}{Matrix of predictors (n by p).} + +\item{y}{Vector of outcomes (length n).} + +\item{index}{Group membership indicator of length p. Check that \code{sort(unique(index)) = 1:G} where \code{G} is the number of distinct groups.} + +\item{maxsteps}{Maximum number of steps for forward stepwise.} + +\item{sigma}{Estimate of error standard deviation for use in AIC criterion. This determines the relative scale between RSS and the degrees of freedom penalty. Default is NULL corresponding to unknown sigma. When NULL, \code{link{groupfsInf}} performs truncated F inference instead of truncated \eqn{\chi}. See \code{\link[stats]{extractAIC}} for details on the AIC criterion.} + +\item{k}{Multiplier of model size penalty, the default is \code{k = 2} for AIC. Use \code{k = log(n)} for BIC, or \code{k = 2log(p)} for RIC (best for high dimensions, when \eqn{p > n}). If \eqn{G < p} then RIC may be too restrictive and it would be better to use \code{log(G) < k < 2log(p)}.} + +\item{intercept}{Should an intercept be included in the model? Default is TRUE. Does not count as a step.} + +\item{center}{Should the columns of the design matrix be centered? Default is TRUE.} + +\item{normalize}{Should the design matrix be normalized? Default is TRUE.} + +\item{aicstop}{Early stopping if AIC increases. Default is 0 corresponding to no early stopping. Positive integer values specify the number of times the AIC is allowed to increase in a row, e.g. with \code{aicstop = 2} the algorithm will stop if the AIC criterion increases for 2 steps in a row. The default of \code{\link[stats]{step}} corresponds to \code{aicstop = 1}.} + +\item{verbose}{Print out progress along the way? Default is FALSE.} +} +\value{ +An object of class "groupfs" containing information about the sequence of models in the forward stepwise algorithm. Call the function \code{\link{groupfsInf}} on this object to compute selective p-values. +} +\description{ +This function implements forward selection of linear models almost identically to \code{\link[stats]{step}} with \code{direction = "forward"}. The reason this is a separate function from \code{\link{fs}} is that groups of variables (e.g. dummies encoding levels of a categorical variable) must be handled differently in the selective inference framework. +} +\examples{ +x = matrix(rnorm(20*40), nrow=20) +index = sort(rep(1:20, 2)) +y = rnorm(20) + 2 * x[,1] - x[,4] +fit = groupfs(x, y, index, maxsteps = 5) +pvals = groupfsInf(fit) +} +\seealso{ +\code{\link{groupfsInf}}, \code{\link{factorDesign}}. +} + diff --git a/selectiveInference-currentCRAN/man/groupfsInf.Rd b/selectiveInference-currentCRAN/man/groupfsInf.Rd new file mode 100644 index 00000000..74b9a5e0 --- /dev/null +++ b/selectiveInference-currentCRAN/man/groupfsInf.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{groupfsInf} +\alias{groupfsInf} +\title{Compute selective p-values for a model fitted by \code{groupfs}.} +\usage{ +groupfsInf(obj, sigma = NULL, verbose = TRUE) +} +\arguments{ +\item{obj}{Object returned by \code{\link{groupfs}} function} + +\item{sigma}{Estimate of error standard deviation. Default is NULL and in this case groupfsInf uses the value of sigma specified to \code{\link{groupfs}}.} + +\item{verbose}{Print out progress along the way? Default is TRUE.} +} +\value{ +An object of class "groupfsInf" containing selective p-values for the fitted model \code{obj}. For comparison with \code{\link{fsInf}}, note that the option \code{type = "active"} is not available. + +\describe{ + \item{vars}{Labels of the active groups in the order they were included.} + \item{pv}{Selective p-values computed from appropriate truncated distributions.} + \item{sigma}{Estimate of error variance used in computing p-values.} + \item{TC or TF}{Observed value of truncated \eqn{\chi} or \eqn{F}.} + \item{df}{Rank of group of variables when it was added to the model.} + \item{support}{List of intervals defining the truncation region of the corresponding statistic.} +} +} +\description{ +Computes p-values for each group of variables in a model fitted by \code{\link{groupfs}}. These p-values adjust for selection by truncating the usual \eqn{\chi^2} statistics to the regions implied by the model selection event. If the \code{sigma} to \code{\link{groupfs}} was NULL then groupfsInf uses truncated \eqn{F} statistics instead of truncated \eqn{\chi}. The \code{sigma} argument to groupfsInf allows users to override and use \eqn{\chi}, but this is not recommended unless \eqn{\sigma} can be estimated well (i.e. \eqn{n > p}). +} + diff --git a/selectiveInference-currentCRAN/man/lar.Rd b/selectiveInference-currentCRAN/man/lar.Rd new file mode 100644 index 00000000..590a6633 --- /dev/null +++ b/selectiveInference-currentCRAN/man/lar.Rd @@ -0,0 +1,95 @@ +\name{lar} +\alias{lar} +\title{ +Least angle regression +} +\description{ +This function implements least angle regression, for use in the +selectiveInference package +} +\usage{ +lar(x, y, maxsteps=2000, minlam=0, intercept=TRUE, normalize=TRUE, + verbose=FALSE) +} + +\arguments{ + \item{x}{ +Matrix of predictors (n by p) +} + \item{y}{ +Vector of outcomes (length n) +} + \item{maxsteps}{ +Maximum number of steps to take +} +\item{minlam}{ +Minimum value of lambda to consider +} +\item{intercept}{Should an intercept be included on the model? Default is TRUE} +\item{normalize}{Should the predictors be normalized? Default is TRUE} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +The least angle regression algorithm is described in detail by Efron et al. (2002). +This function should match (in terms of its output) that from the \code{lars} package, +but returns additional information (namely, the polyhedral constraints) needed for the +selective inference calculations. +} + +\value{ +\item{lambda}{Values of lambda (knots) visited along the path} +\item{action}{Vector of predictors in order of entry} +\item{sign}{Signs of coefficients of predictors, upon entry} +\item{df}{Degrees of freedom of each active model} +\item{beta}{Matrix of regression coefficients for each model along the path, +one model per column} +\item{completepath}{Was the complete stepwise path computed?} +\item{bls}{If completepath is TRUE, the full least squares coefficients} +\item{Gamma}{Matrix that captures the polyhedral selection at each step} +\item{nk}{Number of polyhedral constraints at each step in path} +\item{vreg}{Matrix of linear contrasts that gives coefficients of variables +to enter along the path} +\item{mp}{Value of M+ (for internal use with the spacing test)} +\item{x}{Matrix of predictors used} +\item{y}{Vector of outcomes used} +\item{bx}{Vector of column means of original x} +\item{by}{Mean of original y} +\item{sx}{Norm of each column of original x} +\item{intercept}{Was an intercept included?} +\item{normalize}{Were the predictors normalized?} +\item{call}{The call to lar} +} + +\references{ +Brad Efron, Trevor Hastie, Iain Johnstone, and Rob Tibshirani (2002). +Least angle regression. Annals of Statistics (with discussion). + +See also the descriptions in Trevor Hastie, Rob Tibshirani, and +Jerome Friedman (2002, 2009). Elements of Statistical Learning. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Max G'Sell, Joshua Loftus, Stephen Reid} + +\seealso{ + \code{\link{larInf}}, \code{\link{predict.lar}}, \code{\link{coef.lar}}, \code{\link{plot.lar}} +} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR, plot results +larfit = lar(x,y) +plot(larfit) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out = larInf(larfit) +out +} diff --git a/selectiveInference-currentCRAN/man/larInf.Rd b/selectiveInference-currentCRAN/man/larInf.Rd new file mode 100644 index 00000000..8e3b2d03 --- /dev/null +++ b/selectiveInference-currentCRAN/man/larInf.Rd @@ -0,0 +1,140 @@ +\name{larInf} +\alias{larInf} +\title{ +Selective inference for least angle regression +} +\description{ +Computes p-values and confidence intervals for least +angle regression +} +\usage{ +larInf(obj, sigma=NULL, alpha=0.1, k=NULL, type=c("active","all","aic"), + gridrange=c(-100,100), bits=NULL, mult=2, ntimes=2, verbose=FALSE) +} +\arguments{ + \item{obj}{ +Object returned by \code{lar} function (not the \code{lars} function!) +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares fit when n >= 2p, and +using the standard deviation of y when n < 2p. In the latter case, the user +should use \code{\link{estimateSigma}} function for a more accurate estimate +} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{k}{ +See "type" argument below. Default is NULL, in which case k is taken to be the +the number of steps computed in the least angle regression path +} +\item{type}{ +Type of analysis desired: with "active" (default), p-values and confidence intervals are +computed for each predictor as it is entered into the active step, all the way through +k steps; with "all", p-values and confidence intervals are computed for all variables in +the active model after k steps; with "aic", the number of steps k is first estimated using +a modified AIC criterion, and then the same type of analysis as in "all" is carried out for +this particular value of k. + +Note that the AIC scheme is defined to choose a number of steps k after which the AIC criterion +increases \code{ntimes} in a row, where \code{ntimes} can be specified by the user (see below). +Under this definition, the AIC selection event is characterizable as a polyhedral set, and hence +the extra conditioning can be taken into account exactly. Also note that an analogous BIC scheme +can be specified through the \code{mult} argument (see below) +} +\item{gridrange}{ +Grid range for constructing confidence intervals, on the standardized scale +} +\item{bits}{ +Number of bits to be used for p-value and confidence interval calculations. Default is +NULL, in which case standard floating point calculations are performed. When not NULL, +multiple precision floating point calculations are performed with the specified number +of bits, using the R package \code{Rmpfr} (if this package is not installed, then a +warning is thrown, and standard floating point calculations are pursued). +Note: standard double precision uses 53 bits +so, e.g., a choice of 200 bits uses about 4 times double precision. The confidence +interval computation is sometimes numerically challenging, and the extra precision can be +helpful (though computationally more costly). In particular, extra precision might be tried +if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. +} +\item{mult}{Multiplier for the AIC-style penalty. Hence a value of 2 (default) +gives AIC, whereas a value of log(n) would give BIC} +\item{ntimes}{Number of steps for which AIC-style criterion has to increase before +minimizing point is declared} +\item{verbose}{Print out progress along the way? Default is FALSE} +} + +\details{ +This function computes selective p-values and confidence intervals (selection intervals) +for least angle regression. The default is to report the results for +each predictor after its entry into the model. See the "type" argument for other options. +The confidence interval construction involves numerical search and can be fragile: +if the observed statistic is too close to either end of the truncation interval +(vlo and vup, see references), then one or possibly both endpoints of the interval of +desired coverage cannot be computed, and default to +/- Inf. The output \code{tailarea} +gives the achieved Gaussian tail areas for the reported intervals---these should be close +to alpha/2, and can be used for error-checking purposes. +} + +\value{ + +\item{type}{Type of analysis (active, all, or aic)} +\item{k}{Value of k specified in call} +\item{khat}{When type is "active", this is an estimated stopping point +declared by \code{\link{forwardStop}}; when type is "aic", this is the +value chosen by the modified AIC scheme} +\item{pv}{P-values for active variables} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{pv.spacing}{P-values from the spacing test (here M+ is used)} +\item{pv.modspac}{P-values from the modified form of the spacing test +(here M+ is replaced by the next knot)} +\item{pv.covtest}{P-values from covariance test} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to larInf} +} + +\references{ +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). +Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\seealso{ + \code{\link{lar}} +} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR +larfit = lar(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = larInf(larfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = larInf(larfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = larInf(larfit,type="all",k=5) +out.fix +} + diff --git a/selectiveInference-currentCRAN/man/manyMeans.Rd b/selectiveInference-currentCRAN/man/manyMeans.Rd new file mode 100644 index 00000000..57fc4296 --- /dev/null +++ b/selectiveInference-currentCRAN/man/manyMeans.Rd @@ -0,0 +1,60 @@ +\name{manyMeans} +\alias{manyMeans} +\title{ +Selective inference for many normal means +} +\description{ +Computes p-values and confidence intervals for the largest k +among many normal means +} +\usage{ +manyMeans(y, alpha=0.1, bh.q=NULL, k=NULL, sigma=1, verbose=FALSE) +} + +\arguments{ + \item{y}{Vector of outcomes (length n)} +\item{alpha}{ +Significance level for confidence intervals (target is miscoverage alpha/2 in each tail) +} +\item{bh.q}{q parameter for BH(q) procedure} +\item{k}{Number of means to consider} +\item{sigma}{Estimate of error standard deviation} +\item{verbose}{Print out progress along the way? Default is FALSE} +} +\details{ +This function compute p-values and confidence intervals for the largest k +among many normal means. One can specify a fixed number of means k to consider, +or choose the number to consider via the BH rule. +} + +\value{ +\item{mu.hat}{ Vector of length n containing the estimated signal sizes. +If a sample element is not selected, then its signal size estimate is 0} +\item{selected.set}{Indices of the vector y of the sample elements that +were selected by the procedure (either BH(q) or top-K). Labelled "Selind" in output table.} +\item{pv}{P-values for selected signals} +\item{ci}{Confidence intervals} +\item{method}{Method used to choose number of means} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{bh.q}{BH q-value used} +\item{k}{Desired number of means} +\item{threshold}{Computed cutoff} +\item{call}{The call to manyMeans} +} + +\references{ +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). +Post-selection point and interval estimation of signal sizes in Gaussian samples. +arXiv:1405.3340. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(12345) +n = 100 +mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +y = mu + rnorm(n) +out = manyMeans(y, bh.q=0.1) +out +} diff --git a/selectiveInference-currentCRAN/man/plot.fs.Rd b/selectiveInference-currentCRAN/man/plot.fs.Rd new file mode 100644 index 00000000..4f770132 --- /dev/null +++ b/selectiveInference-currentCRAN/man/plot.fs.Rd @@ -0,0 +1,42 @@ +\name{plot.fs} +\alias{plot.fs} + +\title{ +Plot function for forward stepwise regression +} +\description{ +Plot coefficient profiles along the forward stepwise path +} + +\usage{ +\method{plot}{fs} (x, breaks=TRUE, omit.zeros=TRUE, var.labels=TRUE, ...) +} + +\arguments{ +\item{x}{ +Object returned by a call to \code{fs} function +} +\item{breaks}{Should vertical lines be drawn at each break point in the piecewise +linear coefficient paths? Default is TRUE} +\item{omit.zeros}{Should segments of the coefficients paths that are equal to +zero be omitted (to avoid clutter in the figure)? Default is TRUE} +\item{var.labels}{Should paths be labelled with corresponding variable numbers? +Default is TRUE} +\item{...}{Additional arguments for plotting} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise, plot results +fsfit = fs(x,y) +plot(fsfit) +} diff --git a/selectiveInference-currentCRAN/man/plot.lar.Rd b/selectiveInference-currentCRAN/man/plot.lar.Rd new file mode 100644 index 00000000..baa1195d --- /dev/null +++ b/selectiveInference-currentCRAN/man/plot.lar.Rd @@ -0,0 +1,46 @@ +\name{plot.lar} +\alias{plot.lar} + +\title{ +Plot function for least angle regression +} +\description{ +Plot coefficient profiles along the LAR path +} + +\usage{ +\method{plot}{lar}(x, xvar=c("norm","step","lambda"), breaks=TRUE, + omit.zeros=TRUE, var.labels=TRUE, ...) +} + +\arguments{ +\item{x}{ +Object returned by a call to \code{lar} function +(not the \code{lars} function!) +} +\item{xvar}{Either "norm" or "step" or "lambda", determining what is plotted +on the x-axis} +\item{breaks}{Should vertical lines be drawn at each break point in the piecewise +linear coefficient paths? Default is TRUE} +\item{omit.zeros}{Should segments of the coefficients paths that are equal to +zero be omitted (to avoid clutter in the figure)? Default is TRUE} +\item{var.labels}{Should paths be labelled with corresponding variable numbers? +Default is TRUE} +\item{...}{Additional arguments for plotting} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run LAR, plot results +larfit = lar(x,y) +plot(larfit) +} diff --git a/selectiveInference-currentCRAN/man/predict.fs.Rd b/selectiveInference-currentCRAN/man/predict.fs.Rd new file mode 100644 index 00000000..5e504824 --- /dev/null +++ b/selectiveInference-currentCRAN/man/predict.fs.Rd @@ -0,0 +1,49 @@ +\name{predict.fs} +\alias{predict.fs} +\alias{coef.fs} + +\title{ +Prediction and coefficient functions for forward stepwise +regression +} +\description{ +Make predictions or extract coefficients from a forward stepwise object +} +\usage{ +\method{predict}{fs}(object, newx, s, ...) +\method{coef}{fs}(object, s, ...) +} + +\arguments{ +\item{object}{ +Object returned by a call to \code{fs} function +} +\item{newx}{ +Matrix of x values at which the predictions are desired. If NULL, +the x values from forward stepwise fitting are used +} +\item{s}{ +Step number(s) at which predictions or coefficients are desired +} +\item{\dots}{Additional arguments} +} + +\value{ +Either a vector/matrix of predictions, or a vector/matrix of coefficients. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise and predict functions +obj = fs(x,y) +fit = predict(obj,x,s=3) +} diff --git a/selectiveInference-currentCRAN/man/predict.groupfs.Rd b/selectiveInference-currentCRAN/man/predict.groupfs.Rd new file mode 100644 index 00000000..4a382c7e --- /dev/null +++ b/selectiveInference-currentCRAN/man/predict.groupfs.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{predict.groupfs} +\alias{predict.groupfs} +\title{Prediction and coefficient functions for \code{\link{groupfs}}. + +Make predictions or extract coefficients from a groupfs forward stepwise object.} +\usage{ +\method{predict}{groupfs}(object, newx) +} +\arguments{ +\item{object}{Object returned by a call to \code{\link{groupfs}}.} + +\item{newx}{Matrix of x values at which the predictions are desired. If NULL, the x values from groupfs fitting are used.} +} +\value{ +A vector of predictions or a vector of coefficients. +} +\description{ +Prediction and coefficient functions for \code{\link{groupfs}}. + +Make predictions or extract coefficients from a groupfs forward stepwise object. +} + diff --git a/selectiveInference-currentCRAN/man/predict.lar.Rd b/selectiveInference-currentCRAN/man/predict.lar.Rd new file mode 100644 index 00000000..c91bed34 --- /dev/null +++ b/selectiveInference-currentCRAN/man/predict.lar.Rd @@ -0,0 +1,52 @@ +\name{predict.lar} +\alias{predict.lar} +\alias{coef.lar} + +\title{ +Prediction and coefficient functions for least angle regression +} +\description{ +Make predictions or extract coefficients from a least angle regression object +} +\usage{ +\method{predict}{lar}(object, newx, s, mode=c("step","lambda"), ...) +\method{coef}{lar}(object, s, mode=c("step","lambda"), ...) +} + +\arguments{ +\item{object}{ +Object returned by a call to \code{lar} function +(not the \code{lars} function!) +} +\item{newx}{ +Matrix of x values at which the predictions are desired. If NULL, +the x values from least angle regression fitting are used +} +\item{s}{ +Step number(s) or lambda value(s) at which predictions or coefficients +are desired +} +\item{mode}{Either "step" or "lambda", determining the role of s (above)} + +\item{\dots}{Additional arguments} +} + +\value{ +Either a vector/matrix of predictions, or a vector/matrix of coefficients. +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} + +\examples{ +set.seed(33) +n = 200 +p = 20 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(rep(3,10),rep(0,p-10)) +y = x\%*\%beta + sigma*rnorm(n) + +# run lar and predict functions +obj = lar(x,y) +fit = predict(obj,x,s=3) +} diff --git a/selectiveInference-currentCRAN/man/scaleGroups.Rd b/selectiveInference-currentCRAN/man/scaleGroups.Rd new file mode 100644 index 00000000..e5a93fab --- /dev/null +++ b/selectiveInference-currentCRAN/man/scaleGroups.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/funs.groupfs.R +\name{scaleGroups} +\alias{scaleGroups} +\title{Center and scale design matrix by groups} +\usage{ +scaleGroups(x, index, center = TRUE, normalize = TRUE) +} +\arguments{ +\item{x}{Design matrix.} + +\item{index}{Group membership indicator of length p.} + +\item{center}{Center groups, default is TRUE.} + +\item{normalize}{Scale groups by Frobenius norm, default is TRUE.} +} +\value{ +\describe{ + \item{x}{Optionally centered/scaled design matrix.} + \item{xm}{Means of groups in original design matrix.} + \item{xs}{Frobenius norms of groups in original design matrix.} +} +} +\description{ +For internal use by \code{\link{groupfs}}. +} + diff --git a/selectiveInference-currentCRAN/man/selectiveInference-internal.Rd b/selectiveInference-currentCRAN/man/selectiveInference-internal.Rd new file mode 100644 index 00000000..48081736 --- /dev/null +++ b/selectiveInference-currentCRAN/man/selectiveInference-internal.Rd @@ -0,0 +1,24 @@ +\name{selectiveInference-internal} +\title{Internal PMA functions} +\alias{print.fixedLassoInf} + \alias{print.fs} +\alias{print.fsInf} +\alias{print.larInf} +\alias{print.lar} + \alias{print.manyMeans} + + + +\description{Internal selectiveInference functions} +\usage{ +\method{print}{fs}(x, ...) +\method{print}{fsInf}(x, tailarea = TRUE, ...) +\method{print}{lar}(x,...) +\method{print}{larInf}(x, tailarea = TRUE, ...) +\method{print}{fixedLassoInf}(x, tailarea = TRUE, ...) +\method{print}{manyMeans}(x,...) + +} +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid} +\keyword{internal} + diff --git a/selectiveInference-currentCRAN/man/selectiveInference.Rd b/selectiveInference-currentCRAN/man/selectiveInference.Rd new file mode 100644 index 00000000..6e038d42 --- /dev/null +++ b/selectiveInference-currentCRAN/man/selectiveInference.Rd @@ -0,0 +1,205 @@ +\name{selectiveInference} +\alias{selectiveInference} +\docType{package} +\title{ +Tools for selective inference +} +\description{ +Functions to perform post-selection inference for forward +stepwise regression, least angle regression, the lasso and the +many normal means problem. The lasso function also supports logistic regression and the Cox model. +} +\details{ +\tabular{ll}{ +Package: \tab selectiveInference\cr +Type: \tab Package\cr +License: \tab GPL-2\cr +} + +This package provides tools for inference after selection, in forward stepwise +regression, least angle regression, the lasso, and the many normal means problem. +The functions compute p-values and selection intervals that properly account for +the inherent selection carried out by the procedure. These have exact finite sample +type I error and coverage under Gaussian errors. For the logistic and Cox familes (fixedLassoInf), + the coverage is asymptotically valid + +This R package was developed as part of the selective inference software project +in Python and R: + +\url{https://github.com/selective-inference} + +Some of the R code in this work is a modification of Python code from this +repository. Here is the current selective inference software team: + +Yuval Benjamini, +Leonard Blier, +Will Fithian, +Jason Lee, +Joshua Loftus, +Joshua Loftus, Stephen Reid, +Dennis Sun, +Yuekai Sun, +Jonathan Taylor, +Xiaoying Tian, +Ryan Tibshirani, +Rob Tibshirani + +The main functions included in the package are: +\code{\link{fs}}, +\code{\link{fsInf}}, +\code{\link{lar}}, +\code{\link{larInf}}, +\code{\link{fixedLassoInf}}, +\code{\link{manyMeans}} +} + +\author{Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid + +Maintainer: Rob Tibshirani +} + +\references{ +Ryan Tibshirani, Jonathan Taylor, Richard Lockhart, and Rob Tibshirani (2014). +Exact post-selection inference for sequential regression procedures. arXiv:1401.3889. + +Jason Lee, Dennis Sun, Yuekai Sun, and Jonathan Taylor (2013). +Exact post-selection inference, with application to the lasso. arXiv:1311.6238. + +Stephen Reid, Jonathan Taylor, and Rob Tibshirani (2014). +Post-selection point and interval estimation of signal sizes in Gaussian samples. +arXiv:1405.3340. + + +Jonathan Taylor and Robert Tibshirani (2016) Post-selection inference for L1-penalized likelihood models. +arXiv:1602.07358 + +} + +\examples{ +set.seed(33) +n = 50 +p = 10 +sigma = 1 +x = matrix(rnorm(n*p),n,p) +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# run forward stepwise +fsfit = fs(x,y) + +# compute sequential p-values and confidence intervals +# (sigma estimated from full model) +out.seq = fsInf(fsfit) +out.seq + +# compute p-values and confidence intervals after AIC stopping +out.aic = fsInf(fsfit,type="aic") +out.aic + +# compute p-values and confidence intervals after 5 fixed steps +out.fix = fsInf(fsfit,type="all",k=5) +out.fix + +## NOT RUN---lasso at fixed lambda- Gaussian family +## first run glmnet +# gfit = glmnet(x,y) + +## extract coef for a given lambda; note the 1/n factor! +## (and we don't save the intercept term) +# lambda = .1 +# beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +## compute fixed lambda p-values and selection intervals +# out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +# out + + +#lasso at fixed lambda- logistic family +#set.seed(43) + # n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + x=scale(x,TRUE,TRUE) + # +# beta = c(3,2,rep(0,p-2)) + # y = x%*%beta + sigma*rnorm(n) + # y=1*(y>mean(y)) + # first run glmnet + # gfit = glmnet(x,y,standardize=FALSE,family="binomial") + + # extract coef for a given lambda; note the 1/n factor! + # (and here we DO include the intercept term) + # lambda = .8 + # beta = coef(gfit, s=lambda/n, exact=TRUE) + + # # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,y,beta,lambda,family="binomial") + # out + +##lasso at fixed lambda- Cox family +#set.seed(43) +# n = 50 + # p = 10 + # sigma = 1 + + # x = matrix(rnorm(n*p),n,p) + # x=scale(x,TRUE,TRUE) + + # beta = c(3,2,rep(0,p-2)) + # tim = as.vector(x\%*\%beta + sigma*rnorm(n)) + # tim= tim-min(tim)+1 +#status=sample(c(0,1),size=n,replace=T) + # first run glmnet + # gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + # extract coef for a given lambda; note the 1/n factor! + + # lambda = 1.5 + # beta = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) + + # compute fixed lambda p-values and selection intervals + # out = fixedLassoInf(x,tim,beta,lambda,status=status,family="cox") + # out +## NOT RUN---many normal means +# set.seed(12345) +# n = 100 +# mu = c(rep(3,floor(n/5)), rep(0,n-floor(n/5))) +# y = mu + rnorm(n) +# out = manyMeans(y, bh.q=0.1) +# out + +## NOT RUN---forward stepwise with groups +# set.seed(1) +# n = 20 +# p = 40 +# x = matrix(rnorm(n*p), nrow=n) +# index = sort(rep(1:(p/2), 2)) +# y = rnorm(n) + 2 * x[,1] - x[,4] +# fit = groupfs(x, y, index, maxsteps = 5) +# out = groupfsInf(fit) +# out + +## NOT RUN---estimation of sigma for use in fsInf +## (or larInf or fixedLassoInf) +# set.seed(33) +# n = 50 +# p = 10 +# sigma = 1 +# x = matrix(rnorm(n*p),n,p) +# beta = c(3,2,rep(0,p-2)) +# y = x\%*\%beta + sigma*rnorm(n) + +## run forward stepwise +# fsfit = fs(x,y) + +## estimate sigma +# sigmahat = estimateSigma(x,y)$sigmahat + +## run sequential inference with estimated sigma +# out = fsInf(fit,sigma=sigmahat) +# out +} +\keyword{ package } + + diff --git a/selectiveInference-currentCRAN/src/matrixcomps.c b/selectiveInference-currentCRAN/src/matrixcomps.c new file mode 100644 index 00000000..4a516a1e --- /dev/null +++ b/selectiveInference-currentCRAN/src/matrixcomps.c @@ -0,0 +1,266 @@ +#include +#include + +// Matrices are stored as vectors, in column-major order + +// Givens rotation of a and b, stored in c and s +void givens(double a, double b, double *c, double *s) { + if (b==0) { + *c = 1; + *s = 0; + } + else { + if (fabs(b)>fabs(a)) { + double t = -a/b; + *s = 1/sqrt(1+t*t); + *c = (*s)*t; + } + else { + double t = -b/a; + *c = 1/sqrt(1+t*t); + *s = (*c)*t; + } + } +} + +// Givens rotation applied to rows i1 and i2 of the m x n +// matrix A, on the subset of columns j1 through j2 +void rowrot(double *A, int i1, int i2, int m, int n, int j1, int j2, double c, double s) { + int j; + double t1,t2; + for (j=j1; j<=j2; j++) { + t1 = A[i1+j*m]; + t2 = A[i2+j*m]; + A[i1+j*m] = c*t1-s*t2; + A[i2+j*m] = s*t1+c*t2; + } +} + +// Givens rotation applied to columns j1 and j2 of the m x n +// matrix A, on the subset of rows i1 through i2 +void colrot(double *A, int j1, int j2, int m, int n, int i1, int i2, double c, double s) { + int i; + double t1,t2; + for (i=i1; i<=i2; i++) { + t1 = A[i+j1*m]; + t2 = A[i+j2*m]; + A[i+j1*m] = c*t1-s*t2; + A[i+j2*m] = s*t1+c*t2; + } +} + +// Downdate the QR factorization after deleting column j0, +// where Q1 is m x n and R is n x n. The other part of +// the Q matrix, Q2 m x (m-n), isn't needed so it isn't +// passed for efficiency +void downdate1(double *Q1, double *R, int *j0p, int *mp, int *np) { + int j0,m,n,j; + j0 = *j0p; + m = *mp; + n = *np; + + double c,s; + for (j=j0+1; j=1; j--) { + // Compute the appropriate c and s + givens(w[j-1],w[j],&c,&s); + + // Pre-multiply w + rowrot(w,j-1,j,k,1,0,0,c,s); + + // Post-multiply Q2 + colrot(Q2,j-1,j,m,k,0,m-1,c,s); + } +} + +// Downdate the QR factorization after deleting the first row, +// where Q is m x m and R is m x n +void downdate2(double *Q, double *R, int *mp, int *np) { + int m,n,i; + m = *mp; + n = *np; + + double c,s; + for (i=m-1; i>=1; i--) { + // Compute the appropriate c and s + givens(Q[(i-1)*m],Q[i*m],&c,&s); + + // Post-mutiply Q + colrot(Q,i-1,i,m,m,0,m-1,c,s); + + // Pre-multiply R + if (i<=n) rowrot(R,i-1,i,m,n,i-1,n-1,c,s); + } +} + +// Update the QR factorization after adding the last row, +// where Q is m x m and R is m x n. For efficiency, Q is not +// passed, and only the first row of R is passed. Not counting +// its first row, the first q columns of R are zero +void update2(double *y, double *D, double *r, int *mp, int *np, int *qp) { + int m,n,q,j; + m = *mp; + n = *np; + q = *qp; + + double c,s; + for (j=0; j=0; i--) { + for (j=i; j=0; i--) { + for (j=i; j=0; i--) { + // Compute the appropriate c and s + givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); + + // Post-multiply R + colrot(R,i+q+1,i+q,m2,n,0,i,c,s); + + // Post-multiply D + colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); + + // Pre-multiply y + rowrot(y,i+q+1,i+q,n,1,0,0,c,s); + } +} + +// Make the R factor upper triangular, by Givens rotating +// its columns and rows, appropriately. Here A is m1 x n, +// Q is m2 x m2, and R is m2 x n with rank(R) = n-q-1. The +// first q columns of R are zero. The kth row of R is the +// last row with a zero element on the diagonal +void maketri4(double *y, double *A, double *Q, double *R, int *m1p, int *m2p, int *np, int *qp, int *kp) { + int m1,m2,n,q,k,i,j; + m1 = *m1p; + m2 = *m2p; + n = *np; + q = *qp; + k = *kp; + + double c,s; + + // First rotate the columns + for (i=k-1; i>=0; i--) { + // Compute the appropriate c and s + givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); + + // Post-multiply R + colrot(R,i+q+1,i+q,m2,n,0,i,c,s); + + // Post-multiply D + colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); + + // Pre-multiply y + rowrot(y,i+q+1,i+q,n,1,0,0,c,s); + } + + // Next rotate the rows + for (j=k+q+1; j +#include + +/* . entry points */ +extern void update1(double *Q2, double *w, int *mp, int *kp); +static R_NativePrimitiveArgType update1_t[] = { + REALSXP, REALSXP, INTSXP, INTSXP +}; + +extern void downdate1(double *Q1, double *R, int *j0p, int *mp, int *np); +static R_NativePrimitiveArgType downdate1_t[] = { + REALSXP, REALSXP, INTSXP, INTSXP, INTSXP +}; + +static const R_CMethodDef CEntries[] = { + {"update1", (DL_FUNC) &update1, 4}, + {"downdate1", (DL_FUNC) &downdate1, 5}, + {NULL, NULL, 0} +}; + +void R_init_cubature(DllInfo *dll) { + R_registerRoutines(dll, CEntries, NULL, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/selectiveInference-currentCRAN/src/truncnorm.c b/selectiveInference-currentCRAN/src/truncnorm.c new file mode 100644 index 00000000..cca61d93 --- /dev/null +++ b/selectiveInference-currentCRAN/src/truncnorm.c @@ -0,0 +1,188 @@ +#include +#include + +// Take a Gibbs hit and run step along a given direction + +// Assumes the covariance is identity + +void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *direction, /* direction we will take Gibbs step */ + double *U, /* A %*% state - b */ + double *alpha, /* A %*% direction */ + int nconstraint, /* number of rows of A */ + int nstate) /* dimension of state */ +{ + + int istate; + double value = 0; + + /* Compute V=\eta^Ty */ + + for (istate = 0; istate < nstate; istate++) { + value += direction[istate] * state[istate]; + } + + /* Compute upper and lower bounds */ + + double lower_bound = -1e12; + double upper_bound = 1e12; + double bound_val = 0; + double tol=1.e-7; + int iconstraint; + + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + + bound_val = -U[iconstraint] / alpha[iconstraint] + value; + + if ((alpha[iconstraint] > tol) && + (bound_val < upper_bound)) { + upper_bound = bound_val; + } + else if ((alpha[iconstraint] < -tol) && + (bound_val > lower_bound)) { + lower_bound = bound_val; + } + + } + + /* Ensure constraints are satisfied */ + + if (lower_bound > value) { + lower_bound = value - tol; + } + else if (upper_bound < value) { + upper_bound = value + tol; + } + + /* Check to see if constraints are satisfied */ + + /* if (lower_bound > upper_bound) { + + }*/ + + /* Now, take a step */ + + double tnorm; /* the 1D gaussian variable */ + double cdfU, cdfL, unif; /* temp variables */ + + if (upper_bound < -10) { + + /* use Exp approximation */ + /* the approximation is that */ + /* Z | lower_bound < Z < upper_bound */ + /* is fabs(upper_bound) * (upper_bound - Z) = E approx Exp(1) */ + /* so Z = upper_bound - E / fabs(upper_bound) */ + /* and the truncation of the exponential is */ + /* E < fabs(upper_bound - lower_bound) * fabs(upper_bound) = D */ + + /* this has distribution function (1 - exp(-x)) / (1 - exp(-D)) */ + /* so to draw from this distribution */ + /* we set E = - log(1 - U * (1 - exp(-D))) where U is Unif(0,1) */ + /* and Z (= tnorm below) is as stated */ + + unif = runif(0., 1.) * (1 - exp(-fabs((lower_bound - upper_bound) * upper_bound))); + tnorm = (upper_bound + log(1 - unif) / fabs(upper_bound)); + } + else if (lower_bound > 10) { + + /* here Z = lower_bound + E / fabs(lower_bound) (though lower_bound is positive) */ + /* and D = fabs((upper_bound - lower_bound) * lower_bound) */ + + unif = runif(0., 1.) * (1 - exp(-fabs((upper_bound - lower_bound) * lower_bound))); + tnorm = (lower_bound - log(1 - unif) / lower_bound); + } + else if (lower_bound < 0) { + cdfL = pnorm(lower_bound, 0., 1., 1, 0); + cdfU = pnorm(upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; + if (unif < 0.5) { + tnorm = qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = -qnorm(1-unif, 0., 1., 1, 0); + } + } + else { + cdfL = pnorm(-lower_bound, 0., 1., 1, 0); + cdfU = pnorm(-upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfL - cdfU) + cdfU; + if (unif < 0.5) { + tnorm = -qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = qnorm(1-unif, 0., 1., 1, 0); + } + } + + /* Now update the state and U */ + + double delta = tnorm - value; + + for (istate = 0; istate < nstate; istate++) { + state[istate] += delta * direction[istate]; + } + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + U[iconstraint] += delta * alpha[iconstraint] ; + } + + /* End of gibbs_step */ + +} + +void sample_truncnorm_white(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *U, /* A %*% state - b */ + double *directions, /* possible steps for sampler to take */ + /* assumed to be stored as list of columns of dimension nstate */ + /* has shape (nstate, ndirection) */ + double *alphas, /* The matrix A %*% directions */ + /* has shape (nconstraint, ndirection) */ + double *output, /* array in which to store samples */ + /* assumed will stored as list of vectors of dimension nstate */ + /* has shape (nstate, ndraw) */ + int *pnconstraint, /* number of rows of A */ + int *pndirection, /* the possible number of directions to choose from */ + /* `directions` should have size nstate*ndirection */ + int *pnstate, /* dimension of state */ + int *pburnin, /* number of burnin steps */ + int *pndraw) /* total number of samples to return */ +{ + + int iter_count; + int which_direction; + + int nconstraint = *pnconstraint; + int ndirection = *pndirection; + int nstate = *pnstate; + int burnin = *pburnin; + int ndraw = *pndraw; + + double *direction, *alpha; + + for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { + + which_direction = (int) floor(runif(0., 1.) * ndirection); + direction = ((double *) directions) + nstate * which_direction; + alpha = ((double *) alphas) + nconstraint * which_direction; + + /* take a step, which implicitly updates `state` and `U` */ + + gibbs_step(state, + direction, + U, + alpha, + nconstraint, + nstate); + + /* Store result if after burnin */ + + int istate; + if (iter_count >= burnin) { + for (istate = 0; istate < nstate; istate++) { + *output = state[istate]; + output++; + } + } + } + +} + From 0a3bc8d0d73cb47c985b40b0b28ee018a3357c59 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 22 Sep 2017 15:51:14 -0700 Subject: [PATCH 247/396] new versions of RcppExports --- selectiveInference/R/RcppExports.R | 8 ++--- selectiveInference/src/RcppExports.cpp | 44 ++++++++++++++++---------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index e04c446a..622d1d20 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,15 +1,15 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index feefe32e..461b0e58 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,36 +22,48 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - __result = Rcpp::wrap(update1_(Q2, w, m, k)); - return __result; + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return __result; + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; END_RCPP } + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} From 59e9c7e8050e6152c02bb638faadd43148b6231d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 25 Sep 2017 17:33:44 -0700 Subject: [PATCH 248/396] fixing examples for new glmnet, small cleanup of function signature --- selectiveInference/R/funs.fixed.R | 7 +++-- selectiveInference/R/funs.fixedCox.R | 10 ++++--- selectiveInference/man/fixedLassoInf.Rd | 35 ++++++++++++++----------- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index c65731d6..6e1ffa9d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -2,10 +2,13 @@ # for the solution of # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 -fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, +fixedLassoInf <- function(x, y, beta, + lambda, family=c("gaussian","binomial","cox"), + intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) { + gridrange=c(-100,100), bits=NULL, verbose=FALSE, + linesearch.try=10) { family = match.arg(family) this.call = match.call() diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index cfe764e4..92567bd3 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -1,6 +1,10 @@ -fixedCoxLassoInf=function(x,y,status,beta,lambda,alpha=.1, type=c("partial"),tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ - +fixedCoxLassoInf=function(x, y, status, + beta, lambda, + alpha=.1, type=c("partial"), + tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), + bits=NULL, verbose=FALSE, + this.call=NULL){ checkargs.xy(x,y) if(is.null(status)) stop("Must supply `status' argument") diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index bd740f86..5fd8c243 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -160,7 +160,7 @@ gfit = glmnet(x,y,standardize=FALSE) # extract coef for a given lambda; note the 1/n factor! # (and we don't save the intercept term) lambda = .8 -beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,y,beta,lambda,sigma=sigma) @@ -170,8 +170,8 @@ out ## as above, but use lar function instead to get initial ## lasso fit (should get same results) lfit = lar(x,y,normalize=FALSE) -beta = coef(lfit,s=lambda,mode="lambda") -out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +beta = coef(lfit, s=lambda, mode="lambda") +out2 = fixedLassoInf(x, y, beta, lambda, sigma=sigma) out2 ## mimic different penalty factors by first scaling x @@ -189,12 +189,12 @@ pf=c(rep(1,7),rep(.1,3)) #define penalty factors pf=p*pf/sum(pf) # penalty factors should be rescaled so they sum to p xs=scale(x,FALSE,pf) #scale cols of x by penalty factors # first run glmnet -gfit = glmnet(xs,y,standardize=FALSE) +gfit = glmnet(xs, y, standardize=FALSE) # extract coef for a given lambda; note the 1/n factor! # (and we don't save the intercept term) lambda = .8 -beta_hat = coef(gfit, s=lambda/n, exact=TRUE)[-1] +beta_hat = coef(gfit, x=xs, y=y, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals out = fixedLassoInf(xs,y,beta_hat,lambda,sigma=sigma) @@ -221,37 +221,40 @@ set.seed(43) # extract coef for a given lambda; note the 1/n factor! # (and here we DO include the intercept term) lambda = .8 - beta_hat = coef(gfit, s=lambda/n, exact=TRUE) + beta_hat = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE) # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial") out -#Cox model -set.seed(43) + + # Cox model + + set.seed(43) n = 50 p = 10 sigma = 1 - x = matrix(rnorm(n*p),n,p) - x=scale(x,TRUE,TRUE) + x = matrix(rnorm(n*p), n, p) + x=scale(x, TRUE, TRUE) beta = c(3,2,rep(0,p-2)) tim = as.vector(x\%*\%beta + sigma*rnorm(n)) tim= tim-min(tim)+1 -status=sample(c(0,1),size=n,replace=TRUE) + status=sample(c(0,1),size=n,replace=TRUE) # first run glmnet - gfit = glmnet(x,Surv(tim,status),standardize=FALSE,family="cox") + y = Surv(tim,status) + gfit = glmnet(x, y, standardize=FALSE, family="cox") # extract coef for a given lambda; note the 1/n factor! lambda = 1.5 - beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE)) - + beta_hat = as.numeric(coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)) + # compute fixed lambda p-values and selection intervals - out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox") + out = fixedLassoInf(x, tim, beta_hat, lambda, status=status, family="cox") out # Debiased lasso or "full" @@ -272,7 +275,7 @@ gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) # extract coef for a given lambda; note the 1/n factor! # (and we don't save the intercept term) lambda = 2.8 -beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] +beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) From 41dc63aade08b6b70c074351fe1f10fea830f340 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 25 Sep 2017 17:34:31 -0700 Subject: [PATCH 249/396] removing a tab --- selectiveInference/R/funs.fixedCox.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index 92567bd3..d6ebd6b7 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -1,5 +1,5 @@ fixedCoxLassoInf=function(x, y, status, - beta, lambda, + beta, lambda, alpha=.1, type=c("partial"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), From 86a92fdaa482355f148930cce04824aa3c7beb83 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 25 Sep 2017 17:56:21 -0700 Subject: [PATCH 250/396] WIP: writing a solver for wide X matrices --- selectiveInference/R/RcppExports.R | 4 + selectiveInference/src/Rcpp-debias.cpp | 72 ++++ selectiveInference/src/RcppExports.cpp | 22 ++ selectiveInference/src/debias.h | 21 ++ .../src/quadratic_program_wide.c | 341 ++++++++++++++++++ 5 files changed, 460 insertions(+) create mode 100644 selectiveInference/src/quadratic_program_wide.c diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index 622d1d20..1c030530 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -5,6 +5,10 @@ solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_a .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } +solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { + .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) +} + update1_ <- function(Q2, w, m, k) { .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 6420e515..d6e8741e 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -68,3 +68,75 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::Named("max_active_check") = max_active_check)); } + + +// [[Rcpp::export]] +Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, + double bound, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol, + int max_active + ) { + + int nrow = X.nrow(); // number of cases + int ncol = X.ncol(); // number of features + + // Active set + + int irow, icol; + + // Extract the diagonal + Rcpp::NumericVector X_diag(ncol); + double *X_diag_p = X_diag.begin(); + + for (icol=0; icol= max_active); + + return(Rcpp::List::create(Rcpp::Named("soln") = theta, + Rcpp::Named("gradient") = gradient, + Rcpp::Named("linear_func") = linear_func, + Rcpp::Named("iter") = iter, + Rcpp::Named("kkt_check") = kkt_check, + Rcpp::Named("ever_active") = ever_active, + Rcpp::Named("nactive") = nactive, + Rcpp::Named("max_active_check") = max_active_check)); + +} diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index 461b0e58..6939cf1e 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -26,6 +26,27 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// solve_QP_wide +Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); +RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); + Rcpp::traits::input_parameter< double >::type bound(boundSEXP); + Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); + Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); + Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); + Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); + rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; +END_RCPP +} // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { @@ -58,6 +79,7 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, + {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 11}, {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, {NULL, NULL, 0} diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 7cab5e3c..6de29407 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -23,6 +23,27 @@ int check_KKT_qp(double *theta, /* current theta */ double bound, /* Lagrange multipler for \ell_1 */ double tol); /* precision for checking KKT conditions */ +int solve_wide(double *X_ptr, /* A design matrix */ + double *linear_func_ptr, /* Linear term in objective */ + double *X_diag_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in X */ + int ncol, /* How many rows in X */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol, /* precision for checking relative decrease in objective value */ + int max_active); /* Upper limit for size of active set -- otherwise break */ + +int check_KKT_wide(double *theta, /* current theta */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol); /* precision for checking KKT conditions */ + #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c new file mode 100644 index 00000000..36a27235 --- /dev/null +++ b/selectiveInference/src/quadratic_program_wide.c @@ -0,0 +1,341 @@ +#include // for fabs + +// Find an approximate row of \hat{Sigma}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +// Throughout X is a design matrix + +double objective_wide(double *X_ptr, /* A design matrix */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in X */ + int ncol, /* how many columns in X */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ +{ + int irow, icol; + double value = 0; + double *X_ptr_tmp = X_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; + + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +double update_one_coord_wide(double *X_ptr, /* A design matrix*/ + double *linear_func_ptr, /* Linear term in objective */ + double *X_diag_ptr, /* Diagonal entries of Sigma */ + double *gradient_ptr, /* X^TX/n times theta */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in X */ + int ncol, /* How many rows in X */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this coord in ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *X_ptr_tmp; + double *gradient_ptr_tmp; + double *theta_ptr; + int icol = 0; + + double *quadratic_ptr = ((double *) X_diag_ptr + coord); + double quadratic_term = *quadratic_ptr; + + gradient_ptr_tmp = ((double *) gradient_ptr + coord); + linear_term = *gradient_ptr_tmp; + + theta_ptr = ((double *) theta + coord); + old_value = *theta_ptr; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // X[coord, coord] * theta[coord] + // This removes it. + + linear_term -= quadratic_term * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = -(linear_term - bound) / quadratic_term; + } + + // Add to active set if necessary + + if ((is_active == 0) && (value != 0)) { + update_ever_active_wide(coord, ever_active_ptr, nactive_ptr); + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + delta = value - old_value; + X_ptr_tmp = ((double *) X_ptr + coord * nrow); + gradient_ptr_tmp = ((double *) gradient_ptr); + + for (icol=0; icol= max_active) { + break; + } + + // Check relative decrease of objective + + if (check_objective) { + new_value = objective_wide(X_ptr, + linear_func_ptr, + ever_active_ptr, + nactive_ptr, + nrow, + ncol, + bound, + theta); + + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { + break; + } + old_value = new_value; + } + } + return(iter); +} + From 2f05bf60764ebd47ab04286e5f479badf31c6944 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 25 Sep 2017 17:57:01 -0700 Subject: [PATCH 251/396] for devel version -- needs intervals reinstalled --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 21270e09..541fa3a4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,6 @@ addons: warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? - - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' + - R -e 'install.packages(c("Rcpp", "intervals"), repos="http://cloud.r-project.org")' - make Rcpp - cd selectiveInference From 6238a92c3c484edc160123fa2be73562e5d272e5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 26 Sep 2017 11:44:27 -0700 Subject: [PATCH 252/396] WIP: working on wide X QP --- .../src/quadratic_program_wide.c | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 36a27235..21ad990d 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -14,6 +14,7 @@ // Throughout X is a design matrix double objective_wide(double *X_ptr, /* A design matrix */ + double *X_theta_ptr, /* Fitted values */ double *linear_func_ptr, /* Linear term in objective */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ @@ -24,7 +25,7 @@ double objective_wide(double *X_ptr, /* A design matrix */ { int irow, icol; double value = 0; - double *X_ptr_tmp = X_ptr; + double *X_theta_ptr_tmp = X_theta_ptr; double *linear_func_ptr_tmp = linear_func_ptr; double *theta_row_ptr, *theta_col_ptr; int *active_row_ptr, *active_col_ptr; @@ -34,29 +35,32 @@ double objective_wide(double *X_ptr, /* A design matrix */ theta_row_ptr = theta; theta_col_ptr = theta; - for (irow=0; irow Date: Tue, 26 Sep 2017 14:47:15 -0700 Subject: [PATCH 253/396] WIP: builds now, but segfaulting --- selectiveInference/R/RcppExports.R | 12 +- selectiveInference/src/Rcpp-debias.cpp | 51 ++- selectiveInference/src/RcppExports.cpp | 58 ++-- selectiveInference/src/debias.h | 39 ++- .../src/quadratic_program_wide.c | 296 +++++++++++++----- 5 files changed, 302 insertions(+), 154 deletions(-) diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index 1c030530..8af46403 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,19 +1,19 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# This file was generated by Rcpp::compileAttributes # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } -solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) +solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { + .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index d6e8741e..fcf0eb6a 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -77,6 +77,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, + Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, @@ -84,35 +85,41 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, int max_active ) { - int nrow = X.nrow(); // number of cases - int ncol = X.ncol(); // number of features + int ncase = X.nrow(); // number of cases + int nfeature = X.ncol(); // number of features // Active set - int irow, icol; + int icase, ifeature; + + // A vector to keep track of gradient updates + + Rcpp::IntegerVector need_update(nfeature); // Extract the diagonal - Rcpp::NumericVector X_diag(ncol); + Rcpp::NumericVector X_diag(nfeature); double *X_diag_p = X_diag.begin(); - for (icol=0; icol= max_active); + // Make sure gradient is updated -- essentially a matrix multiply + + update_gradient_wide((double *) gradient.begin(), + (double *) X_theta.begin(), + (double *) X.begin(), + (double *) linear_func.begin(), + (int *) need_update.begin(), + nfeature, + ncase); + return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, + Rcpp::Named("X_theta") = X_theta, Rcpp::Named("linear_func") = linear_func, Rcpp::Named("iter") = iter, Rcpp::Named("kkt_check") = kkt_check, diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index 6939cf1e..efa87d53 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// This file was generated by Rcpp::compileAttributes // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,70 +22,58 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; + __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return __result; END_RCPP } // solve_QP_wide -Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); +RcppExport SEXP selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type X_theta(X_thetaSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; + __result = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return __result; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); - return rcpp_result_gen; + __result = Rcpp::wrap(update1_(Q2, w, m, k)); + return __result; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return rcpp_result_gen; + __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return __result; END_RCPP } - -static const R_CallMethodDef CallEntries[] = { - {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, - {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 11}, - {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, - {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_selectiveInference(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 6de29407..70ba536f 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -24,26 +24,41 @@ int check_KKT_qp(double *theta, /* current theta */ double tol); /* precision for checking KKT conditions */ int solve_wide(double *X_ptr, /* A design matrix */ + double *X_theta_ptr, /* Fitted values */ double *linear_func_ptr, /* Linear term in objective */ double *X_diag_ptr, /* Diagonal entry of covariance matrix */ - double *gradient_ptr, /* Current gradient of quadratic loss */ - int *ever_active_ptr, /* Ever active set: 0-based */ + double *gradient_ptr, /* X times theta */ + int *need_update_ptr, /* Keeps track of updated gradient coords */ + int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in X */ - int ncol, /* How many rows in X */ + int ncase, /* How many rows in X */ + int nfeature, /* How many columns in X */ double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ + double *theta_ptr, /* current value */ + int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ + int max_active); /* Upper limit for size of active set -- otherwise break */ -int check_KKT_wide(double *theta, /* current theta */ - double *gradient_ptr, /* Current gradient of quadratic loss */ - int nrow, /* how many rows in Sigma */ - double bound, /* Lagrange multipler for \ell_1 */ - double tol); /* precision for checking KKT conditions */ +int check_KKT_wide(double *theta_ptr, /* current theta */ + double *gradient_ptr, /* X^TX/n times theta */ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* A design matrix */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int nfeature, /* how many columns in X */ + int ncase, /* how many rows in X */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol); /* precision for checking KKT conditions */ +void update_gradient_wide(double *gradient_ptr, /* X^TX/n times theta */ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* A design matrix */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int nfeature, /* how many columns in X */ + int ncase); /* how many rows in X */ + #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 21ad990d..be7411df 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -1,4 +1,5 @@ #include // for fabs +#include // Find an approximate row of \hat{Sigma}^{-1} @@ -13,64 +14,113 @@ // Throughout X is a design matrix -double objective_wide(double *X_ptr, /* A design matrix */ - double *X_theta_ptr, /* Fitted values */ +double objective_wide(double *X_theta_ptr, /* Fitted values */ double *linear_func_ptr, /* Linear term in objective */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in X */ - int ncol, /* how many columns in X */ + int ncase, /* how many rows in X */ + int nfeature, /* how many columns in X */ double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ + double *theta_ptr) /* current value */ { - int irow, icol; + int icase, iactive; double value = 0; double *X_theta_ptr_tmp = X_theta_ptr; double *linear_func_ptr_tmp = linear_func_ptr; - double *theta_row_ptr, *theta_col_ptr; - int *active_row_ptr, *active_col_ptr; - int active_row, active_col; + double *theta_ptr_tmp; + int *active_feature_ptr; + int active_feature; int nactive = *nactive_ptr; - theta_row_ptr = theta; - theta_col_ptr = theta; + // The term \|X\theta\|^2_2/n, with n=ncase - double entry = 0; // An entry of X\theta + for (icase=0; icase 0) && (fabs(gradient + bound) > tol * bound)) { + if (*theta_ptr_tmp != 0) { // these coordinates of gradients should be equal to -bound + // fprintf(stderr, "WTF\n"); + if ((*theta_ptr_tmp > 0) && (fabs(gradient + bound) > tol * bound)) { + // fprintf(stderr, "WTF2\n"); return(0); } - else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + // fprintf(stderr, "WTF3\n"); return(0); } } else { if (fabs(gradient) > (1. + tol) * bound) { + // fprintf(stderr, "inactive\n"); return(0); } } @@ -141,10 +218,12 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *gradient_ptr, /* X^TX/n times theta */ int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in X */ - int ncol, /* How many rows in X */ + double *X_theta_ptr, /* X\theta -- fitted values */ + int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ + int ncase, /* How many rows in X */ + int nfeature, /* How many rows in X */ double bound, /* feasibility parameter */ - double *theta, /* current value */ + double *theta_ptr, /* current value */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this coord in ever_active */ { @@ -154,18 +233,18 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double value = 0; double old_value; double *X_ptr_tmp; - double *gradient_ptr_tmp; - double *theta_ptr; - int icol = 0; + double *X_theta_ptr_tmp; + int *need_update_ptr_tmp; + double *theta_ptr_tmp; + int ifeature, icase; double *quadratic_ptr = ((double *) X_diag_ptr + coord); double quadratic_term = *quadratic_ptr; - gradient_ptr_tmp = ((double *) gradient_ptr + coord); - linear_term = *gradient_ptr_tmp; + linear_term = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, coord, ncase); - theta_ptr = ((double *) theta + coord); - old_value = *theta_ptr; + theta_ptr_tmp = ((double *) theta_ptr + coord); + old_value = *theta_ptr_tmp; // The coord entry of gradient_ptr term has a diagonal term in it: // X[coord, coord] * theta[coord] @@ -198,18 +277,26 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + // Set the update_gradient_ptr to 1 + + need_update_ptr_tmp = need_update_ptr; + for (ifeature=0; ifeature= max_active) { break; } // Check relative decrease of objective + // fprintf(stderr, "here7\n"); + if (check_objective) { - new_value = objective_wide(X_ptr, + new_value = objective_wide(X_theta_ptr, linear_func_ptr, ever_active_ptr, nactive_ptr, - nrow, - ncol, + ncase, + nfeature, bound, - theta); + theta_ptr); + + // fprintf(stderr, "here8\n"); if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { break; } old_value = new_value; } + + // fprintf(stderr, "here10\n"); + } return(iter); } From ed19c2942f72d331a5c7231a28c40b4bd94cb202 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 26 Sep 2017 14:49:04 -0700 Subject: [PATCH 254/396] test script for wide problem --- test.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 test.R diff --git a/test.R b/test.R new file mode 100644 index 00000000..0b486860 --- /dev/null +++ b/test.R @@ -0,0 +1,23 @@ +set.seed(43) + + n = 100 + p = 200 + lam = 10 + X = matrix(rnorm(n*p), n, p) + Y = rnorm(n) + library(selectiveInference) + p = ncol(X) + soln_R = rep(0, p) + grad = -t(X) %*% Y + ever_active = c(1, rep(0, p-1)) + nactive = as.integer(1) + kkt_tol = 1.e-12 + objective_tol = 1.e-12 + maxiter = 500 + Xtheta = rep(0, n) + soln_R = selectiveInference:::solve_QP(t(X) %*% X, lam, maxiter, soln_R, -t(X) %*% Y, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln + print(soln_R) + # test wide solver + + soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R, -t(X) %*% Y, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) + print(soln_R_wide) \ No newline at end of file From ce0dc0741063173f700e9e1e1237fd77c88d65ee Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 26 Sep 2017 14:55:18 -0700 Subject: [PATCH 255/396] test running now -- no segfault? --- test.R | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/test.R b/test.R index 0b486860..2d9349df 100644 --- a/test.R +++ b/test.R @@ -2,22 +2,28 @@ set.seed(43) n = 100 p = 200 - lam = 10 + lam = 0.1 X = matrix(rnorm(n*p), n, p) Y = rnorm(n) library(selectiveInference) p = ncol(X) soln_R = rep(0, p) - grad = -t(X) %*% Y + grad = -t(X) %*% Y / n ever_active = c(1, rep(0, p-1)) nactive = as.integer(1) kkt_tol = 1.e-12 objective_tol = 1.e-12 maxiter = 500 - Xtheta = rep(0, n) - soln_R = selectiveInference:::solve_QP(t(X) %*% X, lam, maxiter, soln_R, -t(X) %*% Y, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln + soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln print(soln_R) + Xtheta = rep(0, n) + nactive = as.integer(1) + ever_active = c(1, rep(0, p-1)) + soln_R = rep(0, p) + grad = - t(X) %*% Y / n # test wide solver - - soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R, -t(X) %*% Y, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) - print(soln_R_wide) \ No newline at end of file + soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) + print(soln_R_wide) + print(soln_R) + print(soln_R_wide$soln) + print(soln_R_wide$soln - soln_R) \ No newline at end of file From c26a06b243084eb9cdffa208ffaf9418e2c49850 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 26 Sep 2017 21:29:58 -0700 Subject: [PATCH 256/396] WIP: running but not agreeing with other solver --- selectiveInference/R/RcppExports.R | 10 ++-- selectiveInference/src/RcppExports.cpp | 55 ++++++++++++------- .../src/quadratic_program_wide.c | 4 ++ test.R | 38 +++++++++---- 4 files changed, 71 insertions(+), 36 deletions(-) diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index 8af46403..f5ebee43 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,19 +1,19 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index efa87d53..02a77413 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,16 +22,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // solve_QP_wide Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -44,36 +44,49 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - __result = Rcpp::wrap(update1_(Q2, w, m, k)); - return __result; + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return __result; + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; END_RCPP } + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, + {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index be7411df..ca9301e7 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -388,6 +388,7 @@ int solve_wide(double *X_ptr, /* A design matrix */ ncase, bound, kkt_tol) == 1) { + fprintf(stderr, "break1\n"); break; } @@ -426,6 +427,7 @@ int solve_wide(double *X_ptr, /* A design matrix */ ncase, bound, kkt_tol) == 1) { + fprintf(stderr, "break2\n"); break; } @@ -434,6 +436,7 @@ int solve_wide(double *X_ptr, /* A design matrix */ // fprintf(stderr, "here18 %d\n", *nactive_ptr); if (*nactive_ptr >= max_active) { + fprintf(stderr, "break3\n"); break; } @@ -454,6 +457,7 @@ int solve_wide(double *X_ptr, /* A design matrix */ // fprintf(stderr, "here8\n"); if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { + fprintf(stderr, "break5 %f %f %f %d\n", old_value, new_value, objective_tol, iter); break; } old_value = new_value; diff --git a/test.R b/test.R index 2d9349df..31264be0 100644 --- a/test.R +++ b/test.R @@ -2,28 +2,46 @@ set.seed(43) n = 100 p = 200 - lam = 0.1 + lam = 0.2 X = matrix(rnorm(n*p), n, p) Y = rnorm(n) library(selectiveInference) p = ncol(X) soln_R = rep(0, p) grad = -t(X) %*% Y / n - ever_active = c(1, rep(0, p-1)) + ever_active = as.integer(c(1, rep(0, p-1))) nactive = as.integer(1) kkt_tol = 1.e-12 - objective_tol = 1.e-12 + objective_tol = 1.e-16 maxiter = 500 - soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln + soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p) + print('active') + print(nactive) + print(ever_active) + print(soln_R$ever_active) + soln_R = soln_R$soln + soln_R_old = soln_R print(soln_R) Xtheta = rep(0, n) nactive = as.integer(1) - ever_active = c(1, rep(0, p-1)) + ever_active = as.integer(c(1, rep(0, p-1))) soln_R = rep(0, p) grad = - t(X) %*% Y / n # test wide solver - soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) - print(soln_R_wide) - print(soln_R) - print(soln_R_wide$soln) - print(soln_R_wide$soln - soln_R) \ No newline at end of file + soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R*1., -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) + print(nactive) + print(soln_R_wide$ever_active) + + print('diff') + print(soln_R_wide$soln - soln_R_old) + print(soln_R_wide$gradient[soln_R_wide$ever_active]) + print(max(abs(soln_R_wide$gradient[-soln_R_wide$ever_active]))) + print(soln_R_wide$kkt_check) + print(soln_R_wide$iter) +# print(Xtheta - X %*% soln_R_wide$soln) +# print(Xtheta) + + soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.7 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) +# print(Xtheta - X %*% soln_R_wide$soln) +# print(soln_R_wide$soln) + soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.5 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) \ No newline at end of file From bf65665533fcfad41225ea64f0bb8da72c753063 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 27 Sep 2017 22:12:46 -0700 Subject: [PATCH 257/396] WIP: C code for wide version working -- need to work on R code --- selectiveInference/src/Makevars | 2 +- selectiveInference/src/Rcpp-debias.cpp | 18 ++-- selectiveInference/src/debias.h | 46 +++++----- selectiveInference/src/quadratic_program.c | 56 ++++++------ .../src/quadratic_program_wide.c | 88 +++++++------------ 5 files changed, 92 insertions(+), 118 deletions(-) diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars index b77ed954..2adf9a63 100644 --- a/selectiveInference/src/Makevars +++ b/selectiveInference/src/Makevars @@ -2,7 +2,7 @@ PKG_CFLAGS= -I. PKG_CPPFLAGS= -I. PKG_LIBS=-L. -$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o quadratic_program.o +$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o quadratic_program.o quadratic_program_wide.o clean: rm -f *o diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index fcf0eb6a..ce8bb156 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -1,5 +1,5 @@ #include // need to include the main Rcpp header file -#include // where find_one_row_void is defined +#include // where solve_QP, solve_QP_wide are defined // Below, the gradient should be equal to Sigma * theta + linear_func!! // No check is done on this. @@ -97,15 +97,15 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::IntegerVector need_update(nfeature); // Extract the diagonal - Rcpp::NumericVector X_diag(nfeature); - double *X_diag_p = X_diag.begin(); + Rcpp::NumericVector nndef_diag(nfeature); + double *nndef_diag_p = nndef_diag.begin(); - for (icase=0; icase // for fabs -// Find an approximate row of \hat{Sigma}^{-1} +// Find an approximate row of \hat{nndef}^{-1} // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf @@ -11,17 +11,17 @@ // Therefore we don't have to negate the answer to get theta. // Update one coordinate -double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ +double objective_qp(double *nndef_ptr, /* A non-negative definite matrix */ double *linear_func_ptr, /* Linear term in objective */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in Sigma */ + int nrow, /* how many rows in nndef */ double bound, /* Lagrange multipler for \ell_1 */ double *theta) /* current value */ { int irow, icol; double value = 0; - double *Sigma_ptr_tmp = Sigma_ptr; + double *nndef_ptr_tmp = nndef_ptr; double *linear_func_ptr_tmp = linear_func_ptr; double *theta_row_ptr, *theta_col_ptr; int *active_row_ptr, *active_col_ptr; @@ -43,9 +43,9 @@ double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ active_col = *active_col_ptr - 1; // Ever-active is 1-based theta_col_ptr = ((double *) theta + active_col); - Sigma_ptr_tmp = ((double *) Sigma_ptr + nrow * active_col + active_row); // Matrices are column-major order + nndef_ptr_tmp = ((double *) nndef_ptr + nrow * active_col + active_row); // Matrices are column-major order - value += 0.5 * (*Sigma_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr); + value += 0.5 * (*nndef_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr); } value += bound * fabs((*theta_row_ptr)); // the \ell_1 term @@ -91,8 +91,8 @@ int update_ever_active_qp(int coord, } int check_KKT_qp(double *theta, /* current theta */ - double *gradient_ptr, /* Sigma times theta */ - int nrow, /* how many rows in Sigma */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int nrow, /* how many rows in nndef */ double bound, /* Lagrange multipler for \ell_1 */ double tol) /* precision for checking KKT conditions */ { @@ -128,13 +128,13 @@ int check_KKT_qp(double *theta, /* current theta */ return(1); } -double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ +double update_one_coord_qp(double *nndef_ptr, /* A non-negative definite matrix */ double *linear_func_ptr, /* Linear term in objective */ - double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ - double *gradient_ptr, /* Sigma times theta */ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* nndef times theta + linear_func */ int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ + int nrow, /* How many rows in nndef */ double bound, /* feasibility parameter */ double *theta, /* current value */ int coord, /* which coordinate to update: 0-based */ @@ -145,12 +145,12 @@ double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: double linear_term = 0; double value = 0; double old_value; - double *Sigma_ptr_tmp; + double *nndef_ptr_tmp; double *gradient_ptr_tmp; double *theta_ptr; int icol = 0; - double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); + double *quadratic_ptr = ((double *) nndef_diag_ptr + coord); double quadratic_term = *quadratic_ptr; gradient_ptr_tmp = ((double *) gradient_ptr + coord); @@ -160,7 +160,7 @@ double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: old_value = *theta_ptr; // The coord entry of gradient_ptr term has a diagonal term in it: - // Sigma[coord, coord] * theta[coord] + // nndef[coord, coord] * theta[coord] // This removes it. linear_term -= quadratic_term * old_value; @@ -191,13 +191,13 @@ double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; - Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); + nndef_ptr_tmp = ((double *) nndef_ptr + coord * nrow); gradient_ptr_tmp = ((double *) gradient_ptr); for (icol=0; icol // for fabs -#include // Find an approximate row of \hat{Sigma}^{-1} @@ -50,8 +49,6 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ active_feature_ptr = ((int *) ever_active_ptr + iactive); active_feature = *active_feature_ptr - 1; // Ever-active is 1-based - // fprintf(stderr, "active %d\n", active_feature); - theta_ptr_tmp = ((double *) theta_ptr + active_feature); linear_func_ptr_tmp = ((double *) linear_func_ptr + active_feature); value += (*linear_func_ptr_tmp) * (*theta_ptr_tmp); @@ -69,7 +66,7 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ double compute_gradient_coord(double *gradient_ptr, /* Gradient -- one coordinate will be updated if needed */ double *X_theta_ptr, /* Current fitted values */ - double *X_ptr, /* A design matrix */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ double *linear_func_ptr, /* Linear term in objective */ int *need_update_ptr, /* Which coordinates need to be updated? */ int coord, /* Coordinate we are trying to update */ @@ -85,11 +82,10 @@ double compute_gradient_coord(double *gradient_ptr, /* Gradient -- one co double value = 0.; need_update_ptr_tmp = ((int *) need_update_ptr + coord); + // Check if this coordinate needs updating if (*need_update_ptr_tmp == 1) { - // fprintf(stderr, "compute grad %d %d\n", ncase, coord); - for (icase=0; icase 0) && (fabs(gradient + bound) > tol * bound)) { - // fprintf(stderr, "WTF2\n"); return(0); } else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { - // fprintf(stderr, "WTF3\n"); return(0); } + } else { if (fabs(gradient) > (1. + tol) * bound) { - // fprintf(stderr, "inactive\n"); return(0); } } @@ -214,8 +206,8 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *linear_func_ptr, /* Linear term in objective */ - double *X_diag_ptr, /* Diagonal entries of Sigma */ - double *gradient_ptr, /* X^TX/n times theta */ + double *nndef_diag_ptr, /* Diagonal entries of Sigma */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ double *X_theta_ptr, /* X\theta -- fitted values */ @@ -238,8 +230,8 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *theta_ptr_tmp; int ifeature, icase; - double *quadratic_ptr = ((double *) X_diag_ptr + coord); - double quadratic_term = *quadratic_ptr; + double *diagonal_ptr = ((double *) nndef_diag_ptr + coord); + double diagonal_entry = *diagonal_ptr; linear_term = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, coord, ncase); @@ -247,24 +239,24 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ old_value = *theta_ptr_tmp; // The coord entry of gradient_ptr term has a diagonal term in it: - // X[coord, coord] * theta[coord] + // (X^TX)[coord, coord] * theta[coord] / ncase // This removes it. - linear_term -= quadratic_term * old_value; + linear_term -= diagonal_entry * old_value; // Now soft-threshold the coord entry of theta // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| - // with q=quadratic_term and l=linear_term + // with q=diagonal_entry and l=linear_term // With a negative linear term, solution should be // positive if (linear_term < -bound) { - value = (-linear_term - bound) / quadratic_term; + value = (-linear_term - bound) / diagonal_entry; } else if (linear_term > bound) { - value = -(linear_term - bound) / quadratic_term; + value = -(linear_term - bound) / diagonal_entry; } // Add to active set if necessary @@ -273,7 +265,7 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ update_ever_active_wide(coord, ever_active_ptr, nactive_ptr); } - // Update the linear term + // Update X\theta if changed if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { @@ -298,17 +290,22 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ theta_ptr_tmp = ((double *) theta_ptr + coord); *theta_ptr_tmp = value; + theta_ptr_tmp = theta_ptr; + for (ifeature=0; ifeature= max_active) { - fprintf(stderr, "break3\n"); break; } // Check relative decrease of objective - // fprintf(stderr, "here7\n"); - if (check_objective) { new_value = objective_wide(X_theta_ptr, linear_func_ptr, @@ -454,17 +433,12 @@ int solve_wide(double *X_ptr, /* A design matrix */ bound, theta_ptr); - // fprintf(stderr, "here8\n"); - if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { - fprintf(stderr, "break5 %f %f %f %d\n", old_value, new_value, objective_tol, iter); break; } old_value = new_value; } - // fprintf(stderr, "here10\n"); - } return(iter); } From f85a568facc0f6d72d424d486a56642b42ffd97d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 27 Sep 2017 22:35:15 -0700 Subject: [PATCH 258/396] test of wide solver --- test.R | 75 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/test.R b/test.R index 31264be0..0660840d 100644 --- a/test.R +++ b/test.R @@ -14,34 +14,57 @@ set.seed(43) kkt_tol = 1.e-12 objective_tol = 1.e-16 maxiter = 500 - soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p) - print('active') - print(nactive) - print(ever_active) - print(soln_R$ever_active) - soln_R = soln_R$soln - soln_R_old = soln_R - print(soln_R) - Xtheta = rep(0, n) + soln_R = rep(0, p) + grad = -t(X) %*% Y / n + ever_active = as.integer(c(1, rep(0, p-1))) + nactive = as.integer(1) + kkt_tol = 1.e-12 + objective_tol = 1.e-16 + maxiter = 500 + soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln + + # test wide solver + Xtheta = rep(0, n) nactive = as.integer(1) ever_active = as.integer(c(1, rep(0, p-1))) - soln_R = rep(0, p) + soln_R_wide = rep(0, p) grad = - t(X) %*% Y / n - # test wide solver - soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R*1., -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) - print(nactive) - print(soln_R_wide$ever_active) + soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R_wide, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p)$soln + +# soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p) +# print('active') +# print(nactive) +# print(ever_active) +# print(soln_R$ever_active) +# soln_R = soln_R$soln +# soln_R_old = soln_R +# print(soln_R) +# Xtheta = rep(0, n) +# nactive = as.integer(1) +# ever_active = as.integer(c(1, rep(0, p-1))) +# soln_R = rep(0, p) +# grad = - t(X) %*% Y / n +# # test wide solver +# soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R*1., -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) +# print(nactive) +# print(soln_R_wide$ever_active) - print('diff') - print(soln_R_wide$soln - soln_R_old) - print(soln_R_wide$gradient[soln_R_wide$ever_active]) - print(max(abs(soln_R_wide$gradient[-soln_R_wide$ever_active]))) - print(soln_R_wide$kkt_check) - print(soln_R_wide$iter) -# print(Xtheta - X %*% soln_R_wide$soln) -# print(Xtheta) + print('diff') + print(soln_R_wide - soln_R) +# print(soln_R_wide$gradient[soln_R_wide$ever_active]) +# print(max(abs(soln_R_wide$gradient[-soln_R_wide$ever_active]))) +# print(soln_R_wide$kkt_check) +# print(soln_R_wide$iter) +# print(max(abs(Xtheta - X %*% soln_R_wide$soln))) +# # print(Xtheta) - soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.7 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) -# print(Xtheta - X %*% soln_R_wide$soln) -# print(soln_R_wide$soln) - soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.5 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) \ No newline at end of file +# print('R objective') +# print(0.5 * sum(Xtheta^2)/n - sum(Xtheta*Y)/n + lam * 0.7 * sum(abs(soln_R_wide$soln))) +# print(max(abs(soln_R_wide$gradient - t(X) %*% X %*% soln_R_wide$soln / n + t(X) %*% Y / n))) +# print(lam) +# print(max(abs(soln_R_wide$gradient[soln_R_wide$soln != 0]))) +# print(which(soln_R_wide$soln != 0)) +# soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.7 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) +# # print(Xtheta - X %*% soln_R_wide$soln) +# # print(soln_R_wide$soln) +# soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.5 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) \ No newline at end of file From 636483b6afc5a29fd740cd8bcb1473d0cd2ea5d7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 27 Sep 2017 22:35:42 -0700 Subject: [PATCH 259/396] removing unneeded test, adding Rcpp exports --- selectiveInference/R/RcppExports.R | 10 ++-- selectiveInference/src/RcppExports.cpp | 55 ++++++++------------ test.R | 70 -------------------------- 3 files changed, 26 insertions(+), 109 deletions(-) delete mode 100644 test.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index f5ebee43..8af46403 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,19 +1,19 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# This file was generated by Rcpp::compileAttributes # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index 02a77413..efa87d53 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// This file was generated by Rcpp::compileAttributes // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,16 +22,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; + __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return __result; END_RCPP } // solve_QP_wide Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -44,49 +44,36 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; + __result = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return __result; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); - return rcpp_result_gen; + __result = Rcpp::wrap(update1_(Q2, w, m, k)); + return __result; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return rcpp_result_gen; + __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return __result; END_RCPP } - -static const R_CallMethodDef CallEntries[] = { - {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, - {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, - {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, - {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_selectiveInference(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} diff --git a/test.R b/test.R deleted file mode 100644 index 0660840d..00000000 --- a/test.R +++ /dev/null @@ -1,70 +0,0 @@ -set.seed(43) - - n = 100 - p = 200 - lam = 0.2 - X = matrix(rnorm(n*p), n, p) - Y = rnorm(n) - library(selectiveInference) - p = ncol(X) - soln_R = rep(0, p) - grad = -t(X) %*% Y / n - ever_active = as.integer(c(1, rep(0, p-1))) - nactive = as.integer(1) - kkt_tol = 1.e-12 - objective_tol = 1.e-16 - maxiter = 500 - soln_R = rep(0, p) - grad = -t(X) %*% Y / n - ever_active = as.integer(c(1, rep(0, p-1))) - nactive = as.integer(1) - kkt_tol = 1.e-12 - objective_tol = 1.e-16 - maxiter = 500 - soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln - - # test wide solver - Xtheta = rep(0, n) - nactive = as.integer(1) - ever_active = as.integer(c(1, rep(0, p-1))) - soln_R_wide = rep(0, p) - grad = - t(X) %*% Y / n - soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R_wide, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p)$soln - -# soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p) -# print('active') -# print(nactive) -# print(ever_active) -# print(soln_R$ever_active) -# soln_R = soln_R$soln -# soln_R_old = soln_R -# print(soln_R) -# Xtheta = rep(0, n) -# nactive = as.integer(1) -# ever_active = as.integer(c(1, rep(0, p-1))) -# soln_R = rep(0, p) -# grad = - t(X) %*% Y / n -# # test wide solver -# soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R*1., -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) -# print(nactive) -# print(soln_R_wide$ever_active) - - print('diff') - print(soln_R_wide - soln_R) -# print(soln_R_wide$gradient[soln_R_wide$ever_active]) -# print(max(abs(soln_R_wide$gradient[-soln_R_wide$ever_active]))) -# print(soln_R_wide$kkt_check) -# print(soln_R_wide$iter) -# print(max(abs(Xtheta - X %*% soln_R_wide$soln))) -# # print(Xtheta) - -# print('R objective') -# print(0.5 * sum(Xtheta^2)/n - sum(Xtheta*Y)/n + lam * 0.7 * sum(abs(soln_R_wide$soln))) -# print(max(abs(soln_R_wide$gradient - t(X) %*% X %*% soln_R_wide$soln / n + t(X) %*% Y / n))) -# print(lam) -# print(max(abs(soln_R_wide$gradient[soln_R_wide$soln != 0]))) -# print(which(soln_R_wide$soln != 0)) -# soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.7 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) -# # print(Xtheta - X %*% soln_R_wide$soln) -# # print(soln_R_wide$soln) -# soln_R_wide = selectiveInference:::solve_QP_wide(X, 0.5 * lam, maxiter, soln_R_wide$soln, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p) \ No newline at end of file From 4d376c497741c3d404a20e9428cc103efcd049ec Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 27 Sep 2017 23:10:33 -0700 Subject: [PATCH 260/396] added R code decision for wide or not; R check passing --- selectiveInference/R/funs.fixed.R | 91 +++++++++++++++-------- selectiveInference/man/debiasingMatrix.Rd | 15 +++- selectiveInference/src/debias.h | 4 +- 3 files changed, 72 insertions(+), 38 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 6e1ffa9d..f119a30d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -154,20 +154,24 @@ fixedLassoInf <- function(x, y, beta, # Reorder so that active set S is first Xordered = Xint[,c(S,notS,recursive=T)] + hsigmaS = 1/n*(t(XS)%*%XS) # hsigma[S,S] + hsigmaSinv = solve(hsigmaS) # pinv(hsigmaS) - hsigma <- 1/n*(t(Xordered)%*%Xordered) - hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] - hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) + FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) + GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) - # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R + is_wide = n < (2 * p) # somewhat arbitrary decision -- it is really for when we don't want to form with pxp matrices - htheta = debiasingMatrix(hsigma, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) + # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R + if (!is_wide) { + hsigma = 1/n*(t(Xordered)%*%Xordered) + htheta = debiasingMatrix(hsigma, is_wide, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) + ithetasigma = (GS-(htheta%*%hsigma)) + } else { + htheta = debiasingMatrix(Xordered, is_wide, n, 1:length(S), verbose=FALSE, max_try=linesearch.try, warn_kkt=TRUE) + ithetasigma = (GS-((htheta%*%t(Xordered)) %*% Xordered)/n) + } - FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) - GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) - ithetasigma = (GS-(htheta%*%hsigma)) - # ithetasigma = (diag(pp) - (htheta%*%hsigma)) - M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) # vector which is offset for testing debiased beta's null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) @@ -264,10 +268,11 @@ fixedLassoPoly = ## Approximates inverse covariance matrix theta ## using coordinate descent -debiasingMatrix = function(Sigma, +debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n depending on is_wide + is_wide, nsample, rows, - verbose=FALSE, + verbose=FALSE, mu=NULL, # starting value of mu linesearch=TRUE, # do a linesearch? scaling_factor=1.5, # multiplicative factor for linesearch @@ -284,7 +289,7 @@ debiasingMatrix = function(Sigma, max_active = max(50, 0.3 * nsample) } - p = nrow(Sigma); + p = ncol(Xinfo); M = matrix(0, length(rows), p); if (is.null(mu)) { @@ -302,12 +307,13 @@ debiasingMatrix = function(Sigma, print(paste(xperc,"% done",sep="")); } } - output = debiasingRow(Sigma, + output = debiasingRow(Xinfo, # could be X or t(X) %*% X / n depending on is_wide + is_wide, row, mu, linesearch=linesearch, scaling_factor=scaling_factor, - max_active=max_active, + max_active=max_active, max_try=max_try, warn_kkt=FALSE, max_iter=max_iter, @@ -329,14 +335,15 @@ debiasingMatrix = function(Sigma, return(M) } -# Find one row of the debiasing matrix +# Find one row of the debiasing matrix -- assuming X^TX/n is not too large -- i.e. X is tall -debiasingRow = function (Sigma, +debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n depending on is_wide + is_wide, row, mu, - linesearch=TRUE, # do a linesearch? + linesearch=TRUE, # do a linesearch? scaling_factor=1.2, # multiplicative factor for linesearch - max_active=NULL, # how big can active set get? + max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem @@ -344,10 +351,10 @@ debiasingRow = function (Sigma, objective_tol=1.e-8 # tolerance for relative decrease in objective ) { - p = nrow(Sigma) + p = ncol(Xinfo) if (is.null(max_active)) { - max_active = nrow(Sigma) + max_active = nrow(Xinfo) } # Initialize variables @@ -371,18 +378,37 @@ debiasingRow = function (Sigma, while (counter_idx < max_try) { - result = solve_QP(Sigma, - mu, - max_iter, - soln, - linear_func, - gradient, - ever_active, - nactive, - kkt_tol, - objective_tol, - max_active) + if (!is_wide) { + Sigma = Xinfo + result = solve_QP(Sigma, + mu, + max_iter, + soln, + linear_func, + gradient, + ever_active, + nactive, + kkt_tol, + objective_tol, + max_active) + } else { + X = Xinfo + n = nrow(X) + Xsoln = rep(0, n) + result = solve_QP_wide(X, + mu, + max_iter, + soln, + linear_func, + gradient, + Xsoln, + ever_active, + nactive, + kkt_tol, + objective_tol, + max_active) + } iter = result$iter # Logic for whether we should continue the line search @@ -439,6 +465,7 @@ debiasingRow = function (Sigma, } + ############################## print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index 533a12c6..8c0b3c15 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -11,7 +11,8 @@ Newton step from some consistent estimator (such as the LASSO) to find a debiased solution. } \usage{ -debiasingMatrix(Sigma, +debiasingMatrix(Xinfo, + is_wide, nsample, rows, verbose=FALSE, @@ -26,8 +27,14 @@ debiasingMatrix(Sigma, objective_tol=1.e-8) } \arguments{ -\item{Sigma}{ -A symmetric non-negative definite matrix, often a cross-covariance matrix. +\item{Xinfo}{ +Either a non-negative definite matrix S=t(X) %*% X / n or X itself. If +is_wide is TRUE, then Xinfo should be X, otherwise it should be S. +} +\item{is_wide}{ +Are we solving for rows of the debiasing matrix assuming it is +a wide matrix so that Xinfo=X and the non-negative definite +matrix of interest is t(X) %*% X / nrow(X). } \item{nsample}{ Number of samples used in forming the cross-covariance matrix. @@ -102,7 +109,7 @@ n = 50 p = 100 X = matrix(rnorm(n * p), n, p) S = t(X) \%*\% X / n -M = debiasingMatrix(S, n, c(1,3,5)) +M = debiasingMatrix(S, FALSE, n, c(1,3,5)) } \ No newline at end of file diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 916bc141..ebcbc933 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -21,7 +21,7 @@ int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* nndef times theta + linear_func */ int nrow, /* how many rows in nndef */ double bound, /* Lagrange multipler for \ell_1 */ - double tol) /* precision for checking KKT conditions */ + double tol); /* precision for checking KKT conditions */ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ double *X_theta_ptr, /* Fitted values */ @@ -49,7 +49,7 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ double bound, /* Lagrange multipler for \ell_1 */ - double tol) /* precision for checking KKT conditions */ + double tol); /* precision for checking KKT conditions */ void update_gradient_wide(double *gradient_ptr, /* X^TX/ncase times theta + linear_func */ double *X_theta_ptr, /* Current fitted values */ From fb7f85f60ebf6b2588230fa6cb6e1490c9a7dde2 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 27 Sep 2017 23:23:10 -0700 Subject: [PATCH 261/396] solving active problem more accurately with more iterations -- KKT check to break --- .../src/quadratic_program_wide.c | 120 ++++++++++++++---- 1 file changed, 97 insertions(+), 23 deletions(-) diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3504492a..c4f12310 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -163,11 +163,11 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ double *X_theta_ptr, /* Current fitted values */ double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ - double *linear_func_ptr, /* Linear term in objective */ + double *linear_func_ptr, /* Linear term in objective */ int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double bound, /* Lagrange multipler for \ell_1 */ double tol) /* precision for checking KKT conditions */ { // First check inactive @@ -204,6 +204,59 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ return(1); } +int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + double *theta_ptr, /* current theta */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int nfeature, /* how many columns in X */ + int ncase, /* how many rows in X */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol) /* precision for checking KKT conditions */ +{ + // First check inactive + + int iactive; + double *theta_ptr_tmp; + double gradient; + int ever_active_ptr_tmp; + int nactive = *nactive_ptr; + int active_feature; + int *active_feature_ptr; + + for (iactive=0; iactive 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *linear_func_ptr, /* Linear term in objective */ double *nndef_diag_ptr, /* Diagonal entries of Sigma */ @@ -323,10 +376,10 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int ifeature = 0; int iactive = 0; int *active_ptr; - int check_objective = 1; - double old_value, new_value; + int niter_active = 5; + int iter_active; if (check_objective) { @@ -343,26 +396,47 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX for (iter=0; iter Date: Thu, 28 Sep 2017 00:14:02 -0700 Subject: [PATCH 262/396] R code now produces almost identical answer when assuming wide or not --- selectiveInference/R/funs.fixed.R | 14 +- selectiveInference/man/debiasingMatrix.Rd | 5 +- selectiveInference/src/quadratic_program.c | 136 +++++++++++++----- .../src/quadratic_program_wide.c | 13 +- 4 files changed, 116 insertions(+), 52 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index f119a30d..1f9ac2cb 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -300,7 +300,6 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d xp = round(p/10); idx = 1; for (row in rows) { - if ((idx %% xp)==0){ xperc = xperc+10; if (verbose) { @@ -354,13 +353,13 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep p = ncol(Xinfo) if (is.null(max_active)) { - max_active = nrow(Xinfo) + max_active = min(nrow(Xinfo), ncol(Xinfo)) } # Initialize variables soln = rep(0, p) - + Xsoln = rep(0, n) ever_active = rep(0, p) ever_active[1] = row # 1-based ever_active = as.integer(ever_active) @@ -379,8 +378,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep while (counter_idx < max_try) { if (!is_wide) { - Sigma = Xinfo - result = solve_QP(Sigma, + result = solve_QP(Xinfo, # this is non-neg-def matrix mu, max_iter, soln, @@ -392,10 +390,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, max_active) } else { - X = Xinfo - n = nrow(X) - Xsoln = rep(0, n) - result = solve_QP_wide(X, + result = solve_QP_wide(Xinfo, # this is a design matrix mu, max_iter, soln, @@ -409,6 +404,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep max_active) } + iter = result$iter # Logic for whether we should continue the line search diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index 8c0b3c15..d0e908d2 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -108,8 +108,9 @@ set.seed(10) n = 50 p = 100 X = matrix(rnorm(n * p), n, p) -S = t(X) \%*\% X / n +S = t(X) %*% X / n M = debiasingMatrix(S, FALSE, n, c(1,3,5)) - +M2 = debiasingMatrix(X, TRUE, n, c(1,3,5)) +max(M - M2) } \ No newline at end of file diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index cee3fa4e..d9bd0170 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -17,7 +17,7 @@ double objective_qp(double *nndef_ptr, /* A non-negative definite matrix * int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in nndef */ double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ + double *theta_ptr) /* current value */ { int irow, icol; double value = 0; @@ -28,20 +28,20 @@ double objective_qp(double *nndef_ptr, /* A non-negative definite matrix * int active_row, active_col; int nactive = *nactive_ptr; - theta_row_ptr = theta; - theta_col_ptr = theta; + theta_row_ptr = theta_ptr; + theta_col_ptr = theta_ptr; for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + if (*theta_ptr_tmp != 0) { // these coordinates of gradients should be equal to -bound + if ((*theta_ptr_tmp > 0) && (fabs(gradient + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { return(0); } } @@ -128,6 +128,57 @@ int check_KKT_qp(double *theta, /* current theta */ return(1); } +int check_KKT_qp_active(int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + double *theta_ptr, /* current theta */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int nrow, /* how many rows in nndef */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol) /* precision for checking KKT conditions */ +{ + // First check inactive + + int iactive; + double *theta_ptr_tmp; + double gradient; + double *gradient_ptr_tmp; + int nactive = *nactive_ptr; + int active_feature; + int *active_feature_ptr; + + for (iactive=0; iactive 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + + double update_one_coord_qp(double *nndef_ptr, /* A non-negative definite matrix */ double *linear_func_ptr, /* Linear term in objective */ double *nndef_diag_ptr, /* Diagonal of nndef */ @@ -136,7 +187,7 @@ double update_one_coord_qp(double *nndef_ptr, /* A non-negative defini int *nactive_ptr, /* Size of ever active set */ int nrow, /* How many rows in nndef */ double bound, /* feasibility parameter */ - double *theta, /* current value */ + double *theta_ptr, /* current value */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this coord in ever_active */ { @@ -147,7 +198,7 @@ double update_one_coord_qp(double *nndef_ptr, /* A non-negative defini double old_value; double *nndef_ptr_tmp; double *gradient_ptr_tmp; - double *theta_ptr; + double *theta_ptr_tmp; int icol = 0; double *quadratic_ptr = ((double *) nndef_diag_ptr + coord); @@ -156,8 +207,8 @@ double update_one_coord_qp(double *nndef_ptr, /* A non-negative defini gradient_ptr_tmp = ((double *) gradient_ptr + coord); linear_term = *gradient_ptr_tmp; - theta_ptr = ((double *) theta + coord); - old_value = *theta_ptr; + theta_ptr_tmp = ((double *) theta_ptr + coord); + old_value = *theta_ptr_tmp; // The coord entry of gradient_ptr term has a diagonal term in it: // nndef[coord, coord] * theta[coord] @@ -200,8 +251,8 @@ double update_one_coord_qp(double *nndef_ptr, /* A non-negative defini nndef_ptr_tmp += 1; } - theta_ptr = ((double *) theta + coord); - *theta_ptr = value; + theta_ptr_tmp = ((double *) theta_ptr + coord); + *theta_ptr_tmp = value; } @@ -230,6 +281,8 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ int *active_ptr; int check_objective = 1; + int iter_active; + int niter_active=5; double old_value, new_value; @@ -248,23 +301,38 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ for (iter=0; iter // for fabs -// Find an approximate row of \hat{Sigma}^{-1} +// Find an approximate row of \hat{nndef}^{-1} // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf @@ -222,7 +222,6 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- int iactive; double *theta_ptr_tmp; double gradient; - int ever_active_ptr_tmp; int nactive = *nactive_ptr; int active_feature; int *active_feature_ptr; @@ -259,16 +258,16 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *linear_func_ptr, /* Linear term in objective */ - double *nndef_diag_ptr, /* Diagonal entries of Sigma */ - double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ double *X_theta_ptr, /* X\theta -- fitted values */ int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ - int ncase, /* How many rows in X */ - int nfeature, /* How many rows in X */ + int ncase, /* How many rows in X */ + int nfeature, /* How many rows in X */ double bound, /* feasibility parameter */ - double *theta_ptr, /* current value */ + double *theta_ptr, /* current value */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this coord in ever_active */ { From c0dab8cf27d226cbeb0c8a7c671c4155cfb546bb Mon Sep 17 00:00:00 2001 From: tibs Date: Tue, 3 Oct 2017 18:15:48 -0700 Subject: [PATCH 263/396] rob fixed type=full option --- selectiveInference/R/funs.fixed.R | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 1f9ac2cb..a2776e54 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -82,10 +82,11 @@ fixedLassoInf <- function(x, y, beta, tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) # print(tol.coef) - vars = which(abs(beta) > tol.coef) + # vars = which(abs(beta) > tol.coef) + vars = abs(beta) > tol.coef # print(beta) # print(vars) - if(length(vars)==0){ + if(sum(vars)==0){ cat("Empty model",fill=T) return() } @@ -96,10 +97,17 @@ fixedLassoInf <- function(x, y, beta, "'thresh' parameter, for a more accurate convergence.")) # Get lasso polyhedral region, of form Gy >= u +<<<<<<< HEAD + if (type == 'full') out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) + else out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u +======= if (type == 'full' & p > n) out = fixedLassoPoly(x,y,lambda,beta,vars,inactive=TRUE) else out = fixedLassoPoly(x,y,lambda,beta,vars) A = out$A b = out$b +>>>>>>> 726b917649c7aaabd030b2cab062836ca774ef57 # Check polyhedral region tol.poly = 0.01 @@ -127,7 +135,8 @@ fixedLassoInf <- function(x, y, beta, # add additional targets for inference if provided if (!is.null(add.targets)) vars = sort(unique(c(vars,add.targets,recursive=T))) - k = length(vars) + k = length(vars) + k=sum(vars) pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) ci = tailarea = matrix(0,k,2) From ae37fdcd5dce5d2e2870b25542e141810509d08c Mon Sep 17 00:00:00 2001 From: tibs Date: Tue, 3 Oct 2017 22:35:22 -0700 Subject: [PATCH 264/396] rob type=full --- selectiveInference/R/RcppExports.R | 10 ++--- selectiveInference/R/funs.fixed.R | 12 +----- selectiveInference/R/funs.inf.R | 2 +- selectiveInference/src/RcppExports.cpp | 55 ++++++++++++++++---------- 4 files changed, 42 insertions(+), 37 deletions(-) diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index 8af46403..f5ebee43 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,19 +1,19 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index a2776e54..102ca460 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -97,17 +97,10 @@ fixedLassoInf <- function(x, y, beta, "'thresh' parameter, for a more accurate convergence.")) # Get lasso polyhedral region, of form Gy >= u -<<<<<<< HEAD - if (type == 'full') out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) - else out = fixedLasso.poly(x,y,beta,lambda,vars) - G = out$G - u = out$u -======= - if (type == 'full' & p > n) out = fixedLassoPoly(x,y,lambda,beta,vars,inactive=TRUE) + if (type == 'full') out = fixedLassoPoly(x,y,lambda,beta,vars,inactive=TRUE) else out = fixedLassoPoly(x,y,lambda,beta,vars) A = out$A b = out$b ->>>>>>> 726b917649c7aaabd030b2cab062836ca774ef57 # Check polyhedral region tol.poly = 0.01 @@ -135,7 +128,7 @@ fixedLassoInf <- function(x, y, beta, # add additional targets for inference if provided if (!is.null(add.targets)) vars = sort(unique(c(vars,add.targets,recursive=T))) - k = length(vars) + # k = length(vars) k=sum(vars) pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) @@ -510,4 +503,3 @@ print.fixedLassoInf <- function(x, tailarea=TRUE, ...) { # lambda = 2*mean(apply(t(x)%*%eps,2,max)) # return(lambda) #} - diff --git a/selectiveInference/R/funs.inf.R b/selectiveInference/R/funs.inf.R index 9d7e740f..0688c1fb 100644 --- a/selectiveInference/R/funs.inf.R +++ b/selectiveInference/R/funs.inf.R @@ -209,7 +209,7 @@ TG.limits = function(Z, A, b, eta, Sigma=NULL) { target_estimate = sum(as.numeric(eta) * as.numeric(Z)) if (max(A %*% as.numeric(Z) - b) > 0) { - warning('Contsraint not satisfied. A %*% Z should be elementwise less than or equal to b') + warning('Constraint not satisfied. A %*% Z should be elementwise less than or equal to b') } if (is.null(Sigma)) { diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index efa87d53..02a77413 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,16 +22,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // solve_QP_wide Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -44,36 +44,49 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - __result = Rcpp::wrap(update1_(Q2, w, m, k)); - return __result; + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return __result; + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; END_RCPP } + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, + {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} From 8f356ff9228e0b148814761525fc65abd565c547 Mon Sep 17 00:00:00 2001 From: tibs Date: Wed, 4 Oct 2017 08:46:23 -0700 Subject: [PATCH 265/396] rob fixed another type=full bug --- selectiveInference/R/funs.fixed.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 102ca460..250a9d45 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -82,8 +82,8 @@ fixedLassoInf <- function(x, y, beta, tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) # print(tol.coef) - # vars = which(abs(beta) > tol.coef) - vars = abs(beta) > tol.coef + vars = which(abs(beta) > tol.coef) + # vars = abs(beta) > tol.coef # print(beta) # print(vars) if(sum(vars)==0){ @@ -97,8 +97,10 @@ fixedLassoInf <- function(x, y, beta, "'thresh' parameter, for a more accurate convergence.")) # Get lasso polyhedral region, of form Gy >= u - if (type == 'full') out = fixedLassoPoly(x,y,lambda,beta,vars,inactive=TRUE) - else out = fixedLassoPoly(x,y,lambda,beta,vars) +logical.vars=rep(FALSE,p) +logical.vars[vars]=TRUE + if (type == 'full') out = fixedLassoPoly(x,y,lambda,beta,logical.vars,inactive=TRUE) + else out = fixedLassoPoly(x,y,lambda,beta,logical.vars) A = out$A b = out$b @@ -128,8 +130,7 @@ fixedLassoInf <- function(x, y, beta, # add additional targets for inference if provided if (!is.null(add.targets)) vars = sort(unique(c(vars,add.targets,recursive=T))) - # k = length(vars) - k=sum(vars) + k = length(vars) pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) ci = tailarea = matrix(0,k,2) From 782290e86f61a8e1245f8581f5fd65059c7d855f Mon Sep 17 00:00:00 2001 From: tibs Date: Wed, 4 Oct 2017 09:35:38 -0700 Subject: [PATCH 266/396] small change to current CRAN --- selectiveInference-currentCRAN/DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference-currentCRAN/DESCRIPTION b/selectiveInference-currentCRAN/DESCRIPTION index 0eb38f7a..f7af810d 100644 --- a/selectiveInference-currentCRAN/DESCRIPTION +++ b/selectiveInference-currentCRAN/DESCRIPTION @@ -1,12 +1,12 @@ Package: selectiveInference Type: Package Title: Tools for Post-Selection Inference -Version: 1.2.3 +Version: 1.2.4 Date: 2017-09-18 Author: Ryan Tibshirani, Rob Tibshirani, Jonathan Taylor, Joshua Loftus, Stephen Reid Maintainer: Rob Tibshirani -Depends: glmnet, intervals, survival +Depends: glmnet, intervals, survival, R (>= 3.4.0) Suggests: Rmpfr Description: New tools for post-selection inference, for use with forward stepwise regression, least angle regression, the From 96e87abe19ad8e1b972000099fd2c837ef06e434 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 9 Sep 2017 14:14:56 -0700 Subject: [PATCH 267/396] travis failing with install for shell reason -- maybe caps works better? --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91a89a4d..e10e1114 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ Rcpp: Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" install: Rcpp - R CMD install selectiveInference + R CMD INSTALL selectiveInference build: R CMD build selectiveInference From be257f10ad4740eb16d3283432cbb2611a87cadb Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 10 Oct 2017 13:21:54 -0700 Subject: [PATCH 268/396] small cleanup of fixedLassoPoly --- selectiveInference/R/funs.fixed.R | 49 ++++++++++++++++++------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 250a9d45..6ddf5133 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -80,12 +80,9 @@ fixedLassoInf <- function(x, y, beta, warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) - tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) - # print(tol.coef) - vars = which(abs(beta) > tol.coef) - # vars = abs(beta) > tol.coef - # print(beta) - # print(vars) + tol.coef = tol.beta * sqrt(n / colSums(x^2)) + vars = which(abs(beta) > tol.coef) + if(sum(vars)==0){ cat("Empty model",fill=T) return() @@ -97,10 +94,17 @@ fixedLassoInf <- function(x, y, beta, "'thresh' parameter, for a more accurate convergence.")) # Get lasso polyhedral region, of form Gy >= u -logical.vars=rep(FALSE,p) -logical.vars[vars]=TRUE - if (type == 'full') out = fixedLassoPoly(x,y,lambda,beta,logical.vars,inactive=TRUE) - else out = fixedLassoPoly(x,y,lambda,beta,logical.vars) + + logical.vars=rep(FALSE,p) + logical.vars[vars]=TRUE + + if (type == 'full') { + out = fixedLassoPoly(x, y, lambda, beta, logical.vars, inactive=TRUE) + } + else { + out = fixedLassoPoly(x, y, lambda, beta, logical.vars) + } + A = out$A b = out$b @@ -233,8 +237,8 @@ logical.vars[vars]=TRUE fixedLassoPoly = function(X, y, lambda, beta, active, inactive = FALSE) { - Xa = X[,active,drop=F] - Xac = X[,!active,drop=F] + Xa = X[, active, drop=FALSE] + Xac = X[, !active, drop=FALSE] Xai = pinv(crossprod(Xa)) Xap = Xai %*% t(Xa) @@ -242,25 +246,30 @@ fixedLassoPoly = if (length(za)>1) dz = diag(za) if (length(za)==1) dz = matrix(za,1,1) + if(length(lambda)>1) { + lambdaA= lambda[active] + lambdaI = lambda[!active] + } else { + lambdaA = rep(lambda, sum(active)) + lambdaI = rep(lambda, sum(!active)) + } if (inactive) { # should we include the inactive constraints? - R = diag(1,nrow(Xa)) - Xa %*% Xap # R is residual forming matrix of selected model + R = diag(rep(1, nrow(Xa))) - Xa %*% Xap # R is residual forming matrix of selected model A = rbind( - 1/lambda * t(Xac) %*% R, - -1/lambda * t(Xac) %*% R, + 1/lambdaI * t(Xac) %*% R, + -1/lambdaI * t(Xac) %*% R, -dz %*% Xap ) lambda2=lambda - if(length(lambda)>1) lambda2=lambda[active] + b = c( 1 - t(Xac) %*% t(Xap) %*% za, 1 + t(Xac) %*% t(Xap) %*% za, - -lambda2 * dz %*% Xai %*% za) + -lambdaA * dz %*% Xai %*% za) } else { A = -dz %*% Xap - lambda2=lambda - if(length(lambda)>1) lambda2=lambda[active] - b = -lambda2 * dz %*% Xai %*% za + b = -lambdaA * dz %*% Xai %*% za } return(list(A=A, b=b)) From 25dce248fcd030f3ae05f0aba310a0adedd71c51 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 10 Oct 2017 14:45:03 -0700 Subject: [PATCH 269/396] rewriting fixedLassoPoly to allow for vector lambda (including unpenalized 0 coords) --- selectiveInference/R/RcppExports.R | 10 ++--- selectiveInference/R/funs.fixed.R | 42 +++++++++++--------- selectiveInference/src/RcppExports.cpp | 55 ++++++++++---------------- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index f5ebee43..8af46403 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,19 +1,19 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# This file was generated by Rcpp::compileAttributes # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 6ddf5133..0fa0d719 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -237,15 +237,13 @@ fixedLassoInf <- function(x, y, beta, fixedLassoPoly = function(X, y, lambda, beta, active, inactive = FALSE) { - Xa = X[, active, drop=FALSE] - Xac = X[, !active, drop=FALSE] - Xai = pinv(crossprod(Xa)) - Xap = Xai %*% t(Xa) - - za = sign(beta[active]) - if (length(za)>1) dz = diag(za) - if (length(za)==1) dz = matrix(za,1,1) - + + XA = X[, active, drop=FALSE] + XI = X[, !active, drop=FALSE] + XAi = pinv(crossprod(XA)) + XAp = XAi %*% t(XA) + Ir = t(XI) %*% t(XAp) # matrix in the "irrepresentable" condition + if(length(lambda)>1) { lambdaA= lambda[active] lambdaI = lambda[!active] @@ -253,23 +251,29 @@ fixedLassoPoly = lambdaA = rep(lambda, sum(active)) lambdaI = rep(lambda, sum(!active)) } + + penalized = lambdaA != 0 + signA = sign(beta[active]) + active_subgrad = signA * lambdaA + if (length(signA)>1) sign_diag = diag(signA) + if (length(signA)==1) sign_diag = matrix(signA, 1, 1) + if (inactive) { # should we include the inactive constraints? - R = diag(rep(1, nrow(Xa))) - Xa %*% Xap # R is residual forming matrix of selected model + RA = diag(rep(1, nrow(XA))) - XA %*% XAp # RA is residual forming matrix of selected model A = rbind( - 1/lambdaI * t(Xac) %*% R, - -1/lambdaI * t(Xac) %*% R, - -dz %*% Xap + t(XI) %*% RA, + -t(XI) %*% RA, + -(sign_diag %*% XAp)[penalized,] # no constraints for unpenalized ) - lambda2=lambda b = c( - 1 - t(Xac) %*% t(Xap) %*% za, - 1 + t(Xac) %*% t(Xap) %*% za, - -lambdaA * dz %*% Xai %*% za) + lambdaI - Ir %*% active_subgrad, + lambdaI + Ir %*% active_subgrad, + -(sign_diag %*% XAi %*% active_subgrad)[penalized]) } else { - A = -dz %*% Xap - b = -lambdaA * dz %*% Xai %*% za + A = -(sign_diag %*% XAp)[penalized,] # no constraints for unpenalized + b = -(sign_diag %*% XAi %*% active_subgrad)[penalized] } return(list(A=A, b=b)) diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index 02a77413..efa87d53 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// This file was generated by Rcpp::compileAttributes // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,16 +22,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; + __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return __result; END_RCPP } // solve_QP_wide Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -44,49 +44,36 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; + __result = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return __result; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); - return rcpp_result_gen; + __result = Rcpp::wrap(update1_(Q2, w, m, k)); + return __result; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return rcpp_result_gen; + __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return __result; END_RCPP } - -static const R_CallMethodDef CallEntries[] = { - {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, - {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, - {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, - {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_selectiveInference(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} From 17eaff40d0d880745815a44428978a1b84af78e7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 10 Oct 2017 14:48:59 -0700 Subject: [PATCH 270/396] undefined reference to n --- selectiveInference/R/funs.fixed.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0fa0d719..83c0f93b 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -375,7 +375,6 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep # Initialize variables soln = rep(0, p) - Xsoln = rep(0, n) ever_active = rep(0, p) ever_active[1] = row # 1-based ever_active = as.integer(ever_active) @@ -406,6 +405,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, max_active) } else { + Xsoln = rep(0, nrow(Xinfo)) result = solve_QP_wide(Xinfo, # this is a design matrix mu, max_iter, From eb2a5e4b47d270a71ddd8855a97e1c32955de0ed Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 10 Oct 2017 14:52:28 -0700 Subject: [PATCH 271/396] needed LaTeX escape chars --- selectiveInference/man/debiasingMatrix.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index d0e908d2..4da925e6 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -108,7 +108,7 @@ set.seed(10) n = 50 p = 100 X = matrix(rnorm(n * p), n, p) -S = t(X) %*% X / n +S = t(X) \%*\% X / n M = debiasingMatrix(S, FALSE, n, c(1,3,5)) M2 = debiasingMatrix(X, TRUE, n, c(1,3,5)) max(M - M2) From 43fbddf9d2a18dea84326867dcf0e0ac90bfdc12 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 10 Oct 2017 15:54:14 -0700 Subject: [PATCH 272/396] added a full n>p example --- selectiveInference/man/fixedLassoInf.Rd | 59 ++++++++++++++++++------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 5fd8c243..445e72a7 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -257,29 +257,54 @@ set.seed(43) out = fixedLassoInf(x, tim, beta_hat, lambda, status=status, family="cox") out -# Debiased lasso or "full" + # Debiased lasso or "full" -n = 50 -p = 100 -sigma = 1 + n = 50 + p = 100 + sigma = 1 -x = matrix(rnorm(n*p),n,p) -x = scale(x,TRUE,TRUE) + x = matrix(rnorm(n*p),n,p) + x = scale(x,TRUE,TRUE) -beta = c(3,2,rep(0,p-2)) -y = x\%*\%beta + sigma*rnorm(n) + beta = c(3,2,rep(0,p-2)) + y = x\%*\%beta + sigma*rnorm(n) -# first run glmnet -gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) + # first run glmnet + gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) -# extract coef for a given lambda; note the 1/n factor! -# (and we don't save the intercept term) -lambda = 2.8 -beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] + # extract coef for a given lambda; note the 1/n factor! + # (and we don't save the intercept term) + lambda = 2.8 + beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] -# compute fixed lambda p-values and selection intervals -out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) -out + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) + out + + # When n > p and "full" we use the full inverse + # instead of Javanmard and Montanari's approximate inverse + + n = 200 + p = 50 + sigma = 1 + + x = matrix(rnorm(n*p),n,p) + x = scale(x,TRUE,TRUE) + + beta = c(3,2,rep(0,p-2)) + y = x\%*\%beta + sigma*rnorm(n) + + # first run glmnet + gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) + + # extract coef for a given lambda; note the 1/n factor! + # (and we don't save the intercept term) + lambda = 2.8 + beta = coef(gfit, x=x, y=y, s=lambda/n, exact=TRUE)[-1] + + # compute fixed lambda p-values and selection intervals + out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) + out } \ No newline at end of file From a6fcd6e283c32e3b20c7f1cd762e0ccecf728915 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 10 Oct 2017 23:00:08 -0700 Subject: [PATCH 273/396] fix of signs for full --- selectiveInference/R/funs.fixed.R | 41 ++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 83c0f93b..be314af1 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -82,17 +82,20 @@ fixedLassoInf <- function(x, y, beta, tol.coef = tol.beta * sqrt(n / colSums(x^2)) vars = which(abs(beta) > tol.coef) + sign_vars = sign(beta[vars]) if(sum(vars)==0){ cat("Empty model",fill=T) return() } - if (any(sign(g[vars]) != sign(beta[vars]))) + + if (any(sign(g[vars]) != sign_vars)) { warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances). You might try rerunning", "glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - + } + # Get lasso polyhedral region, of form Gy >= u logical.vars=rep(FALSE,p) @@ -132,13 +135,19 @@ fixedLassoInf <- function(x, y, beta, } # add additional targets for inference if provided - if (!is.null(add.targets)) vars = sort(unique(c(vars,add.targets,recursive=T))) - - k = length(vars) + if (!is.null(add.targets)) { + # vars is boolean... + old_vars = vars & TRUE + vars[add.targets] = TRUE + sign_vars = sign(beta[vars]) + sign_vars[!old_vars] = NA + stop("`add.targets` not fully implemented yet") + } + + k = length(vars) pv = vlo = vup = numeric(k) vmat = matrix(0,k,n) ci = tailarea = matrix(0,k,2) - sign = numeric(k) if (type=="full" & p > n) { if (intercept == T) { @@ -202,20 +211,28 @@ fixedLassoInf <- function(x, y, beta, vj = M[j,] mj = sqrt(sum(vj^2)) vj = vj / mj # Standardize (divide by norm of vj) - sign[j] = sign(sum(vj*y)) - vj = sign[j] * vj + + if (!is.na(sign_vars[j])) { + vj = sign_vars[j] * vj + } limits.info = TG.limits(y, A, b, vj, Sigma=diag(rep(sigma^2, n))) a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) pv[j] = a$pv + if (is.na(sign_vars[j])) { # for variables not in the active set, report 2-sided pvalue + pv[j] = 2 * min(pv[j], 1 - pv[j]) + } vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) - vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) - + if (!is.na(sign_vars[j])) { + vmat[j,] = vj * mj * sign_vars[j] # Unstandardize (mult by norm of vj) and fix sign + } else { + vmat[j,] = vj * mj # Unstandardize (mult by norm of vj) + } a = TG.interval.base(limits.info, alpha=alpha, gridrange=gridrange, - flip=(sign[j]==-1), + flip=(sign_vars[j]==-1), bits=bits) ci[j,] = (a$int-null_value[j]) * mj # Unstandardize (mult by norm of vj) tailarea[j,] = a$tailarea @@ -223,7 +240,7 @@ fixedLassoInf <- function(x, y, beta, out = list(type=type,lambda=lambda,pv=pv,ci=ci, tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, - vars=vars,sign=sign,sigma=sigma,alpha=alpha, + vars=vars,sign=sign_vars,sigma=sigma,alpha=alpha, sd=sigma*sqrt(rowSums(vmat^2)), coef0=vmat%*%y, call=this.call) From 8ad2f63d42d04bdd5212ac9c9b10ad75d98498e3 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 11 Oct 2017 07:38:07 -0700 Subject: [PATCH 274/396] variable rename --- selectiveInference/R/funs.fixedCox.R | 12 ++++++------ selectiveInference/R/funs.fixedLogit.R | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index d6ebd6b7..75305112 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -29,7 +29,7 @@ if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have valu vars=which(m) if(sum(m)>0){ bhat=beta[beta!=0] #penalized coefs just for active variables - s2=sign(bhat) + sign_bhat=sign(bhat) #check KKT @@ -40,7 +40,7 @@ if(sum(m)>0){ res=residuals(aaa,type="score") if(!is.matrix(res)) res=matrix(res,ncol=1) scor=colSums(res) - g=(scor+lambda*s2)/(2*lambda) + g=(scor+lambda*sign_bhat)/(2*lambda) # cat(c(g,lambda,tol.kkt),fill=T) if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", @@ -49,9 +49,9 @@ scor=colSums(res) # Hessian of partial likelihood at the LASSO solution MM=vcov(aaa) -bbar=(bhat+lambda*MM%*%s2) -A1=-(mydiag(s2)) -b1= -(mydiag(s2)%*%MM)%*%s2*lambda +bbar=(bhat+lambda*MM%*%sign_bhat) +A1=-(mydiag(sign_bhat)) +b1= -(mydiag(sign_bhat)%*%MM)%*%sign_bhat*lambda temp=max(A1%*%bbar-b1) @@ -63,7 +63,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda # the one sided p-values are a bit off for(jj in 1:length(bbar)){ - vj=rep(0,length(bbar));vj[jj]=s2[jj] + vj=rep(0,length(bbar));vj[jj]=sign_bhat[jj] junk=TG.pvalue(bbar, A1, b1, vj,MM) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 19936b09..16cafdc5 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -32,7 +32,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet m=beta[-1]!=0 #active set bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars - s2=sign(bhat) + sign_bhat=sign(bhat) lam2m=diag(c(0,rep(lambda,sum(m)))) @@ -66,14 +66,14 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet # MM=solve(t(xxm)%*%w%*%xxm) MM=solve(scale(t(xxm),F,1/ww)%*%xxm) gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized - # at exact LASSO solution it should be s2[-1] + # at exact LASSO solution it should be sign_bhat[-1] dbeta = MM %*% gm - # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... + # bbar=(bhat+lam2m%*%MM%*%sign_bhat) # JT: this is wrong, shouldn't use sign of intercept anywhere... bbar = bhat - dbeta - A1=-(mydiag(s2))[-1,] - b1= (s2 * dbeta)[-1] + A1=-(mydiag(sign_bhat))[-1,] + b1= (sign_bhat * dbeta)[-1] tol.poly = 0.01 if (max((A1 %*% bbar) - b1) > tol.poly) @@ -87,7 +87,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet for(jj in 1:sum(m)){ - vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] + vj=c(rep(0,sum(m)+1));vj[jj+1]=sign_bhat[jj+1] # compute p-values junk=TG.pvalue(bbar, A1, b1, vj, MM) pv[jj] = junk$pv From 749c136eddbbb9928c0e8940b09def5d8aece550 Mon Sep 17 00:00:00 2001 From: tibs Date: Wed, 11 Oct 2017 10:08:57 -0700 Subject: [PATCH 275/396] rob added unifTest.R --- selectiveInference/R/RcppExports.R | 10 +-- selectiveInference/src/RcppExports.cpp | 55 ++++++++----- tests/unifTest.R | 110 +++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 26 deletions(-) create mode 100644 tests/unifTest.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index 8af46403..f5ebee43 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,19 +1,19 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index efa87d53..02a77413 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,16 +22,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // solve_QP_wide Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -44,36 +44,49 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - __result = Rcpp::wrap(update1_(Q2, w, m, k)); - return __result; + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return __result; + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; END_RCPP } + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, + {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/tests/unifTest.R b/tests/unifTest.R new file mode 100644 index 00000000..535fa683 --- /dev/null +++ b/tests/unifTest.R @@ -0,0 +1,110 @@ + +library(selectiveInference) + +library(glmnet) + +set.seed(424) + +#n=100 +#p=30 + +n=20 +p=40 +sigma=.4 +beta=c(3,2,-1,4,-2,2,rep(0,p-6)) +#beta=rep(0,p) + +tr=beta!=0 + +#type="full" +type="part" + +nsim = 1000 +lambda=.3 +nzb=0 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta + +for (i in 1:nsim) { + cat(i) +y=mu+sigma*rnorm(n) +y=y-mean(y) +# first run glmnet +gfit=glmnet(x,y,intercept=F,standardize=F,thresh=1e-8) + +#extract coef for a given lambda; Note the 1/n factor! + bhat = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y)[-1] + nzb=nzb+sum(bhat!=0) +# compute fixed lambda p-values and selection intervals +aa = fixedLassoInf(x,y,bhat,lambda,intercept=F,sigma=sigma,type=type) +pvals[i, aa$vars] <- aa$pv +} + +# summarize results + +if(type=="partial"){ +nulls=rowSums(is.na(pvals[,tr]))==0 # for type=partial, nonnull setting +np = pvals[nulls,-(1:sum(beta!=0))] +} + +if(type=="full"){ +nulls=1:nrow(pvals) # for type=full non null setting +np = pvals[nulls,-(1:sum(beta!=0))] +} + + + +#np=pvals #for null setting + +o=!is.na(np) + +#check uniformity + +plot((1:sum(o))/sum(o),sort(np[o]),xlab="Expected pvalue",ylab="Observed pvalue") +abline(0,1) + + + # estimate and plot FDR + +pvadj=pvadj.by=pvadj.holm=matrix(NA,nsim,p) +for(ii in 1:nsim){ + o=!is.na(pvals[ii,]) + pvadj[ii,o]=p.adjust(pvals[ii,o],method="BH") + pvadj.by[ii,o]=p.adjust(pvals[ii,o],method="BY") + pvadj.holm[ii,o]=p.adjust(pvals[ii,o],method="holm") + } +qqlist=fdr=se=fdr.by=se.by=fdr.holm=se.holm=c(.05, .1,.15,.2,.25,.3) +jj=0 +for(qq in qqlist){ + jj=jj+1 + +r=v=r.by=v.by=r.holm=v.holm=rep(NA,nsim) +for(ii in 1:nsim){ + v[ii]=sum( (pvadj[ii,] Date: Wed, 11 Oct 2017 10:32:44 -0700 Subject: [PATCH 276/396] rob modified unifTest.R --- tests/unifTest.R | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/tests/unifTest.R b/tests/unifTest.R index 535fa683..bc15bf73 100644 --- a/tests/unifTest.R +++ b/tests/unifTest.R @@ -1,15 +1,16 @@ -library(selectiveInference) +library(selectiveInference, lib.loc="/Users/tibs/dropbox/git/R-software/mylib") library(glmnet) set.seed(424) -#n=100 -#p=30 +n=100 +p=30 + +n=100 +p=200 -n=20 -p=40 sigma=.4 beta=c(3,2,-1,4,-2,2,rep(0,p-6)) #beta=rep(0,p) @@ -17,7 +18,7 @@ beta=c(3,2,-1,4,-2,2,rep(0,p-6)) tr=beta!=0 #type="full" -type="part" +type="partial" nsim = 1000 lambda=.3 @@ -28,7 +29,7 @@ x = scale(x,T,T)/sqrt(n-1) mu = x%*%beta for (i in 1:nsim) { - cat(i) + cat(i,fill=T) y=mu+sigma*rnorm(n) y=y-mean(y) # first run glmnet @@ -68,26 +69,25 @@ abline(0,1) # estimate and plot FDR -pvadj=pvadj.by=pvadj.holm=matrix(NA,nsim,p) +pvadj=pvadj.by=matrix(NA,nsim,p) for(ii in 1:nsim){ - o=!is.na(pvals[ii,]) - pvadj[ii,o]=p.adjust(pvals[ii,o],method="BH") - pvadj.by[ii,o]=p.adjust(pvals[ii,o],method="BY") - pvadj.holm[ii,o]=p.adjust(pvals[ii,o],method="holm") + oo=!is.na(pvals[ii,]) + pvadj[ii,oo]=p.adjust(pvals[ii,oo],method="BH") + pvadj.by[ii,oo]=p.adjust(pvals[ii,oo],method="BY") + } -qqlist=fdr=se=fdr.by=se.by=fdr.holm=se.holm=c(.05, .1,.15,.2,.25,.3) +qqlist=c(.05, .1,.15,.2,.25,.3) +fdr=se=fdr.by=se.by=rep(NA,length(qqlist)) jj=0 for(qq in qqlist){ jj=jj+1 -r=v=r.by=v.by=r.holm=v.holm=rep(NA,nsim) +r=v=r.by=v.by=rep(NA,nsim) for(ii in 1:nsim){ v[ii]=sum( (pvadj[ii,] Date: Wed, 11 Oct 2017 10:46:34 -0700 Subject: [PATCH 277/396] rob modified unifTest.R --- tests/unifTest.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unifTest.R b/tests/unifTest.R index bc15bf73..3f251caa 100644 --- a/tests/unifTest.R +++ b/tests/unifTest.R @@ -106,3 +106,9 @@ lines(qqlist,fdr.by,type="b",col=3) abline(0,1,lty=2) title(paste("n=",as.character(n)," p=",as.character(p)," ",as.character(type))) legend("bottomright",c("BH","BY"),col=c(1,3),lty=1) + + +pv=pvals[ii,] +pvv=sort(pv) +oo=which(pvv<=.2*(1:length(pvv))/length(pvv)) +oo=oo[length(oo)] From 5f971f6c36c11b4fbe04bb9263dcd4563ac22339 Mon Sep 17 00:00:00 2001 From: tibs Date: Wed, 11 Oct 2017 10:08:57 -0700 Subject: [PATCH 278/396] rob added unifTest.R --- selectiveInference/R/RcppExports.R | 10 +-- selectiveInference/src/RcppExports.cpp | 55 ++++++++----- tests/unifTest.R | 110 +++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 26 deletions(-) create mode 100644 tests/unifTest.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R index 8af46403..f5ebee43 100644 --- a/selectiveInference/R/RcppExports.R +++ b/selectiveInference/R/RcppExports.R @@ -1,19 +1,19 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) } solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) + .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) } update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) } downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) } diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp index efa87d53..02a77413 100644 --- a/selectiveInference/src/RcppExports.cpp +++ b/selectiveInference/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -7,10 +7,10 @@ using namespace Rcpp; // solve_QP Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -22,16 +22,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // solve_QP_wide Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { +RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type bound(boundSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); @@ -44,36 +44,49 @@ BEGIN_RCPP Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - __result = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return __result; + rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); + return rcpp_result_gen; END_RCPP } // update1_ Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); - __result = Rcpp::wrap(update1_(Q2, w, m, k)); - return __result; + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; END_RCPP } // downdate1_ Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); Rcpp::traits::input_parameter< int >::type j0(j0SEXP); Rcpp::traits::input_parameter< int >::type m(mSEXP); Rcpp::traits::input_parameter< int >::type n(nSEXP); - __result = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return __result; + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; END_RCPP } + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, + {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/tests/unifTest.R b/tests/unifTest.R new file mode 100644 index 00000000..535fa683 --- /dev/null +++ b/tests/unifTest.R @@ -0,0 +1,110 @@ + +library(selectiveInference) + +library(glmnet) + +set.seed(424) + +#n=100 +#p=30 + +n=20 +p=40 +sigma=.4 +beta=c(3,2,-1,4,-2,2,rep(0,p-6)) +#beta=rep(0,p) + +tr=beta!=0 + +#type="full" +type="part" + +nsim = 1000 +lambda=.3 +nzb=0 +pvals <- matrix(NA, nrow=nsim, ncol=p) +x = matrix(rnorm(n*p),n,p) +x = scale(x,T,T)/sqrt(n-1) +mu = x%*%beta + +for (i in 1:nsim) { + cat(i) +y=mu+sigma*rnorm(n) +y=y-mean(y) +# first run glmnet +gfit=glmnet(x,y,intercept=F,standardize=F,thresh=1e-8) + +#extract coef for a given lambda; Note the 1/n factor! + bhat = coef(gfit, s=lambda/n, exact=TRUE,x=x,y=y)[-1] + nzb=nzb+sum(bhat!=0) +# compute fixed lambda p-values and selection intervals +aa = fixedLassoInf(x,y,bhat,lambda,intercept=F,sigma=sigma,type=type) +pvals[i, aa$vars] <- aa$pv +} + +# summarize results + +if(type=="partial"){ +nulls=rowSums(is.na(pvals[,tr]))==0 # for type=partial, nonnull setting +np = pvals[nulls,-(1:sum(beta!=0))] +} + +if(type=="full"){ +nulls=1:nrow(pvals) # for type=full non null setting +np = pvals[nulls,-(1:sum(beta!=0))] +} + + + +#np=pvals #for null setting + +o=!is.na(np) + +#check uniformity + +plot((1:sum(o))/sum(o),sort(np[o]),xlab="Expected pvalue",ylab="Observed pvalue") +abline(0,1) + + + # estimate and plot FDR + +pvadj=pvadj.by=pvadj.holm=matrix(NA,nsim,p) +for(ii in 1:nsim){ + o=!is.na(pvals[ii,]) + pvadj[ii,o]=p.adjust(pvals[ii,o],method="BH") + pvadj.by[ii,o]=p.adjust(pvals[ii,o],method="BY") + pvadj.holm[ii,o]=p.adjust(pvals[ii,o],method="holm") + } +qqlist=fdr=se=fdr.by=se.by=fdr.holm=se.holm=c(.05, .1,.15,.2,.25,.3) +jj=0 +for(qq in qqlist){ + jj=jj+1 + +r=v=r.by=v.by=r.holm=v.holm=rep(NA,nsim) +for(ii in 1:nsim){ + v[ii]=sum( (pvadj[ii,] Date: Wed, 11 Oct 2017 10:32:44 -0700 Subject: [PATCH 279/396] rob modified unifTest.R --- tests/unifTest.R | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/tests/unifTest.R b/tests/unifTest.R index 535fa683..bc15bf73 100644 --- a/tests/unifTest.R +++ b/tests/unifTest.R @@ -1,15 +1,16 @@ -library(selectiveInference) +library(selectiveInference, lib.loc="/Users/tibs/dropbox/git/R-software/mylib") library(glmnet) set.seed(424) -#n=100 -#p=30 +n=100 +p=30 + +n=100 +p=200 -n=20 -p=40 sigma=.4 beta=c(3,2,-1,4,-2,2,rep(0,p-6)) #beta=rep(0,p) @@ -17,7 +18,7 @@ beta=c(3,2,-1,4,-2,2,rep(0,p-6)) tr=beta!=0 #type="full" -type="part" +type="partial" nsim = 1000 lambda=.3 @@ -28,7 +29,7 @@ x = scale(x,T,T)/sqrt(n-1) mu = x%*%beta for (i in 1:nsim) { - cat(i) + cat(i,fill=T) y=mu+sigma*rnorm(n) y=y-mean(y) # first run glmnet @@ -68,26 +69,25 @@ abline(0,1) # estimate and plot FDR -pvadj=pvadj.by=pvadj.holm=matrix(NA,nsim,p) +pvadj=pvadj.by=matrix(NA,nsim,p) for(ii in 1:nsim){ - o=!is.na(pvals[ii,]) - pvadj[ii,o]=p.adjust(pvals[ii,o],method="BH") - pvadj.by[ii,o]=p.adjust(pvals[ii,o],method="BY") - pvadj.holm[ii,o]=p.adjust(pvals[ii,o],method="holm") + oo=!is.na(pvals[ii,]) + pvadj[ii,oo]=p.adjust(pvals[ii,oo],method="BH") + pvadj.by[ii,oo]=p.adjust(pvals[ii,oo],method="BY") + } -qqlist=fdr=se=fdr.by=se.by=fdr.holm=se.holm=c(.05, .1,.15,.2,.25,.3) +qqlist=c(.05, .1,.15,.2,.25,.3) +fdr=se=fdr.by=se.by=rep(NA,length(qqlist)) jj=0 for(qq in qqlist){ jj=jj+1 -r=v=r.by=v.by=r.holm=v.holm=rep(NA,nsim) +r=v=r.by=v.by=rep(NA,nsim) for(ii in 1:nsim){ v[ii]=sum( (pvadj[ii,] Date: Wed, 11 Oct 2017 10:46:34 -0700 Subject: [PATCH 280/396] rob modified unifTest.R --- tests/unifTest.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unifTest.R b/tests/unifTest.R index bc15bf73..3f251caa 100644 --- a/tests/unifTest.R +++ b/tests/unifTest.R @@ -106,3 +106,9 @@ lines(qqlist,fdr.by,type="b",col=3) abline(0,1,lty=2) title(paste("n=",as.character(n)," p=",as.character(p)," ",as.character(type))) legend("bottomright",c("BH","BY"),col=c(1,3),lty=1) + + +pv=pvals[ii,] +pvv=sort(pv) +oo=which(pvv<=.2*(1:length(pvv))/length(pvv)) +oo=oo[length(oo)] From 3da6c507fa366aaa8c8b4b86124092d40bea5255 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 25 Oct 2017 14:06:10 -0700 Subject: [PATCH 281/396] NF: density functions for randomized LASSO --- selectiveInference/src/randomized_lasso.c | 201 ++++++++++++++++++++++ selectiveInference/src/randomized_lasso.h | 42 +++++ 2 files changed, 243 insertions(+) create mode 100644 selectiveInference/src/randomized_lasso.c create mode 100644 selectiveInference/src/randomized_lasso.h diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c new file mode 100644 index 00000000..ac55cf48 --- /dev/null +++ b/selectiveInference/src/randomized_lasso.c @@ -0,0 +1,201 @@ +#include // for fabs + +// Augmented density for randomized LASSO after +// Gaussian randomization + +// Described in https://arxiv.org/abs/1609.05609 + +// Gaussian is product of IID N(0, noise_scale^2) density +// Evaluated at A_D D + A_O O + h + +// Laplace is product of IID Laplace with scale noise_scale +// Also evaluated at A_D D + A_O O + h + +double log_density_gaussian(double noise_scale, // Scale of randomization + int ndim, // Number of features -- "p" + int ninternal, // Dimension of internal data representation often 1 + int noptimization, // Dimension of optimization variables -- "p" + double *internal_linear, // A_D -- linear part for data + double *internal_state, // D -- data state + double *optimization_linear, // A_O -- linear part for optimization variables + double *optimization_state, // O -- optimization state + double *offset) // h -- offset in affine transform -- "p" dimensional +{ + int irow, icol; + double denom = 2 * noise_scale * noise_scale; + double value = 0; + double reconstruction = 0; + double *offset_ptr; + double *internal_linear_ptr; + double *internal_state_ptr; + double *optimization_linear_ptr; + double *optimization_state_ptr; + + for (irow=0; irow Date: Wed, 25 Oct 2017 16:35:52 -0700 Subject: [PATCH 282/396] T to TRUE --- selectiveInference/R/funs.fixed.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index be314af1..66a30887 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -6,7 +6,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"), intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, - type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) { @@ -150,7 +150,7 @@ fixedLassoInf <- function(x, y, beta, ci = tailarea = matrix(0,k,2) if (type=="full" & p > n) { - if (intercept == T) { + if (intercept == TRUE) { pp=p+1 Xint <- cbind(rep(1,n),x) # indices of selected predictors From b23b189f380ae04ee7a5de0728e55110f46eee42 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 13:08:05 -0700 Subject: [PATCH 283/396] updated wide C code -- can now do ridge as well as variable weights -- ready to solve randomized LASSO --- selectiveInference/R/funs.fixed.R | 40 +++- selectiveInference/src/Rcpp-debias.cpp | 35 +++- selectiveInference/src/debias.h | 22 ++- selectiveInference/src/quadratic_program.c | 108 +++++++---- .../src/quadratic_program_wide.c | 176 ++++++++++++------ selectiveInference/src/randomized_lasso.c | 13 -- 6 files changed, 270 insertions(+), 124 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 66a30887..aaceaaad 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -189,8 +189,10 @@ fixedLassoInf <- function(x, y, beta, } M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + # vector which is offset for testing debiased beta's null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + if (intercept == T) { M = M[-1,] # remove intercept row null_value = null_value[-1] # remove intercept element @@ -238,12 +240,23 @@ fixedLassoInf <- function(x, y, beta, tailarea[j,] = a$tailarea } - out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, - vars=vars,sign=sign_vars,sigma=sigma,alpha=alpha, - sd=sigma*sqrt(rowSums(vmat^2)), - coef0=vmat%*%y, - call=this.call) + out = list(type=type, + lambda=lambda, + pv=pv, + ci=ci, + tailarea=tailarea, + vlo=vlo, + vup=vup, + vmat=vmat, + y=y, + vars=vars, + sign=sign_vars, + sigma=sigma, + alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call) + class(out) = "fixedLassoInf" return(out) } @@ -374,7 +387,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep row, mu, linesearch=TRUE, # do a linesearch? - scaling_factor=1.2, # multiplicative factor for linesearch + scaling_factor=1.5, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? @@ -420,11 +433,15 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, - max_active) + max_active, + FALSE, # objective_stop + FALSE, # kkt_stop + TRUE) # param_stop } else { Xsoln = rep(0, nrow(Xinfo)) result = solve_QP_wide(Xinfo, # this is a design matrix - mu, + rep(mu, p), # vector of Lagrange multipliers + 0, # ridge_term max_iter, soln, linear_func, @@ -434,7 +451,10 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, - max_active) + max_active, + FALSE, # objective_stop + FALSE, # kkt_stop + TRUE) # param_stop } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index ce8bb156..24bbae88 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -15,7 +15,10 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, - int max_active + int max_active, + int objective_stop, + int kkt_stop, + int param_stop ) { int nrow = Sigma.nrow(); // number of features @@ -28,6 +31,8 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::NumericVector Sigma_diag(nrow); double *sigma_diag_p = Sigma_diag.begin(); + Rcpp::NumericVector theta_old(nrow); + for (irow=0; irow= max_active); diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index ebcbc933..052af7a1 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -9,13 +9,17 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ double *gradient_ptr, /* nndef times theta */ int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in nndef */ + int nfeature, /* How many features in nndef */ double bound, /* feasibility parameter */ double *theta, /* current value */ + double *theta_old, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop); /* Break based on parameter convergence? */ int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* nndef times theta + linear_func */ @@ -33,12 +37,17 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int *nactive_ptr, /* Size of ever active set */ int ncase, /* How many rows in X */ int nfeature, /* How many columns in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop); /* Break based on parameter convergence? */ int check_KKT_wide(double *theta_ptr, /* current theta */ double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ @@ -48,8 +57,9 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ - double tol); /* precision for checking KKT conditions */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double tol); /* precision for checking KKT conditions */ void update_gradient_wide(double *gradient_ptr, /* X^TX/ncase times theta + linear_func */ double *X_theta_ptr, /* Current fitted values */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index d9bd0170..1bc7fa34 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -92,19 +92,19 @@ int update_ever_active_qp(int coord, int check_KKT_qp(double *theta_ptr, /* current theta */ double *gradient_ptr, /* nndef times theta + linear_func */ - int nrow, /* how many rows in nndef */ + int nfeature, /* how many features in nndef */ double bound, /* Lagrange multipler for \ell_1 */ double tol) /* precision for checking KKT conditions */ { // First check inactive - int irow; + int ifeature; double *theta_ptr_tmp, *gradient_ptr_tmp; double gradient; - for (irow=0; irow 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; - nndef_ptr_tmp = ((double *) nndef_ptr + coord * nrow); + nndef_ptr_tmp = ((double *) nndef_ptr + coord * nfeature); gradient_ptr_tmp = ((double *) gradient_ptr); - for (icol=0; icol= max_active) { @@ -380,12 +417,12 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ // Check relative decrease of objective - if (check_objective) { + if (objective_stop) { new_value = objective_qp(nndef_ptr, linear_func_ptr, ever_active_ptr, nactive_ptr, - nrow, + nfeature, bound, theta); @@ -394,6 +431,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ } old_value = new_value; } + } return(iter); } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3beb578a..c6cb9f3f 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf @@ -19,11 +19,13 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ int *nactive_ptr, /* Size of ever active set */ int ncase, /* how many rows in X */ int nfeature, /* how many columns in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr) /* current value */ { int icase, iactive; double value = 0; + double *bound_ptr_tmp; double *X_theta_ptr_tmp = X_theta_ptr; double *linear_func_ptr_tmp = linear_func_ptr; double *theta_ptr_tmp; @@ -55,8 +57,12 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ // The \ell_1 term - value += bound * fabs((*theta_ptr_tmp)); + bound_ptr_tmp = ((double *) bound_ptr + active_feature); + value += (*bound_ptr_tmp) * fabs((*theta_ptr_tmp)); + // The ridge term + + value += 0.5 * ridge_term * (*theta_ptr_tmp) * (*theta_ptr_tmp); } return(value); @@ -167,34 +173,39 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double tol) /* precision for checking KKT conditions */ { // First check inactive int ifeature; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; double gradient; for (ifeature=0; ifeature 0) && (fabs(gradient + bound) > tol * bound)) { + if ((*theta_ptr_tmp != 0) && (bound != 0)) { // these coordinates of gradients should be equal to -bound + + if ((*theta_ptr_tmp > 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { return(0); } - + } - else { + else if (bound != 0) { if (fabs(gradient) > (1. + tol) * bound) { return(0); } @@ -214,13 +225,16 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multipliers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double tol) /* precision for checking KKT conditions */ { // First check inactive int iactive; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; double gradient; int nactive = *nactive_ptr; int active_feature; @@ -230,23 +244,26 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- active_feature_ptr = ((int *) ever_active_ptr + iactive); active_feature = *active_feature_ptr - 1; // Ever-active is 1-based + theta_ptr_tmp = ((double *) theta_ptr + active_feature); + bound_ptr_tmp = ((double *) bound_ptr + active_feature); + bound = *bound_ptr_tmp; // Compute this coordinate of the gradient gradient = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, active_feature, ncase); - if (*theta_ptr_tmp != 0) { // these coordinates of gradients should be equal to -bound + if ((*theta_ptr_tmp != 0) && (bound != 0)) { // these coordinates of gradients should be equal to -bound - if ((*theta_ptr_tmp > 0) && (fabs(gradient + bound) > tol * bound)) { + if ((*theta_ptr_tmp > 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { return(0); } } - else { + else if (bound != 0) { if (fabs(gradient) > (1. + tol) * bound) { return(0); } @@ -266,7 +283,8 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ int ncase, /* How many rows in X */ int nfeature, /* How many rows in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this coord in ever_active */ @@ -280,6 +298,8 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *X_theta_ptr_tmp; int *need_update_ptr_tmp; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; int ifeature, icase; double *diagonal_ptr = ((double *) nndef_diag_ptr + coord); @@ -290,6 +310,9 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ theta_ptr_tmp = ((double *) theta_ptr + coord); old_value = *theta_ptr_tmp; + bound_ptr_tmp = ((double *) bound_ptr + coord); + bound = *bound_ptr_tmp; + // The coord entry of gradient_ptr term has a diagonal term in it: // (X^TX)[coord, coord] * theta[coord] / ncase // This removes it. @@ -298,17 +321,17 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ // Now soft-threshold the coord entry of theta - // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| - // with q=diagonal_entry and l=linear_term + // Objective is t \mapsto (q+eps)/2 * t^2 + l * t + bound |t| + + // with q=diagonal_entry and l=linear_term and eps=ridge_Term // With a negative linear term, solution should be // positive if (linear_term < -bound) { - value = (-linear_term - bound) / diagonal_entry; + value = (-linear_term - bound) / (diagonal_entry + ridge_term); } else if (linear_term > bound) { - value = -(linear_term - bound) / diagonal_entry; + value = -(linear_term - bound) / (diagonal_entry + ridge_term); } // Add to active set if necessary @@ -363,24 +386,36 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int *nactive_ptr, /* Size of ever active set */ int ncase, /* How many rows in X */ int nfeature, /* How many columns in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active) /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop) /* Break based on parameter convergence? */ { int iter = 0; + int iter_old = 1; int ifeature = 0; int iactive = 0; int *active_ptr; - int check_objective = 1; + double old_value, new_value; int niter_active = 5; int iter_active; - if (check_objective) { + double norm_diff = 1.; + double norm_last = 1.; + double delta; + double threshold = 1.e-2; + double *theta_ptr_tmp, *theta_old_ptr_tmp; + + if (objective_stop) { old_value = objective_wide(X_theta_ptr, linear_func_ptr, @@ -388,8 +423,10 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX nactive_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr); + new_value = old_value; } @@ -412,7 +449,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr, *active_ptr - 1, // Ever-active is 1-based 1); @@ -431,7 +469,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, nfeature, ncase, - bound, + bound_ptr, + ridge_term, kkt_tol) == 1) { break; } @@ -440,17 +479,20 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check KKT - if (check_KKT_wide(theta_ptr, - gradient_ptr, - X_theta_ptr, - X_ptr, - linear_func_ptr, - need_update_ptr, - nfeature, - ncase, - bound, - kkt_tol) == 1) { - break; + if (kkt_stop) { + if (check_KKT_wide(theta_ptr, + gradient_ptr, + X_theta_ptr, + X_ptr, + linear_func_ptr, + need_update_ptr, + nfeature, + ncase, + bound_ptr, + ridge_term, + kkt_tol) == 1) { + break; + } } // Update all variables @@ -467,7 +509,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr, ifeature, 0); @@ -475,19 +518,45 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check KKT - if (check_KKT_wide(theta_ptr, - gradient_ptr, - X_theta_ptr, - X_ptr, - linear_func_ptr, - need_update_ptr, - nfeature, - ncase, - bound, - kkt_tol) == 1) { - break; + if (kkt_stop) { + if (check_KKT_wide(theta_ptr, + gradient_ptr, + X_theta_ptr, + X_ptr, + linear_func_ptr, + need_update_ptr, + nfeature, + ncase, + bound_ptr, + ridge_term, + kkt_tol) == 1) { + break; + } + } + + // Check based on norm -- from Adel's debiasing code + + if (param_stop) { + if (iter == 2 * iter_old) { + iter_old = iter; + norm_diff = 0; + norm_last = 0; + for (ifeature=0; ifeature= max_active) { @@ -496,14 +565,15 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check relative decrease of objective - if (check_objective) { + if (objective_stop) { new_value = objective_wide(X_theta_ptr, linear_func_ptr, ever_active_ptr, nactive_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr); if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c index ac55cf48..123c81de 100644 --- a/selectiveInference/src/randomized_lasso.c +++ b/selectiveInference/src/randomized_lasso.c @@ -126,8 +126,6 @@ double log_density_gaussian_conditional(double noise_scale, // Scale double denom = 2 * noise_scale * noise_scale; double reconstruction = 0; double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; double *optimization_linear_ptr; double *optimization_state_ptr; @@ -164,8 +162,6 @@ double log_density_laplace_conditional(double noise_scale, // Scale double value = 0; double reconstruction = 0; double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; double *optimization_linear_ptr; double *optimization_state_ptr; @@ -176,15 +172,6 @@ double log_density_laplace_conditional(double noise_scale, // Scale offset_ptr = ((double *) offset + irow); reconstruction = *offset_ptr; - // Internal (i.e. data) contribution - for (icol=0; icol Date: Thu, 26 Oct 2017 13:08:37 -0700 Subject: [PATCH 284/396] removing machine generated files --- selectiveInference/R/RcppExports.R | 19 ------ selectiveInference/src/RcppExports.cpp | 92 -------------------------- 2 files changed, 111 deletions(-) delete mode 100644 selectiveInference/R/RcppExports.R delete mode 100644 selectiveInference/src/RcppExports.cpp diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R deleted file mode 100644 index f5ebee43..00000000 --- a/selectiveInference/R/RcppExports.R +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) -} - -solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) -} - -update1_ <- function(Q2, w, m, k) { - .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) -} - -downdate1_ <- function(Q1, R, j0, m, n) { - .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) -} - diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp deleted file mode 100644 index 02a77413..00000000 --- a/selectiveInference/src/RcppExports.cpp +++ /dev/null @@ -1,92 +0,0 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#include - -using namespace Rcpp; - -// solve_QP -Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); - Rcpp::traits::input_parameter< double >::type bound(boundSEXP); - Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); - Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); - Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); - Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; -END_RCPP -} -// solve_QP_wide -Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); - Rcpp::traits::input_parameter< double >::type bound(boundSEXP); - Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type X_theta(X_thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); - Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); - Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); - Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; -END_RCPP -} -// update1_ -Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); - Rcpp::traits::input_parameter< int >::type m(mSEXP); - Rcpp::traits::input_parameter< int >::type k(kSEXP); - rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); - return rcpp_result_gen; -END_RCPP -} -// downdate1_ -Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); - Rcpp::traits::input_parameter< int >::type j0(j0SEXP); - Rcpp::traits::input_parameter< int >::type m(mSEXP); - Rcpp::traits::input_parameter< int >::type n(nSEXP); - rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return rcpp_result_gen; -END_RCPP -} - -static const R_CallMethodDef CallEntries[] = { - {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, - {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, - {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, - {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_selectiveInference(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} From 2c0e3d60e883620ee6b2ba970c719dc00d08ee6c Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 14:30:59 -0700 Subject: [PATCH 285/396] WIP: function to fit randomized lasso --- selectiveInference/R/funs.randomized.R | 64 ++++++++++++++++++++++++++ tests/test_randomized.R | 14 ++++++ 2 files changed, 78 insertions(+) create mode 100644 selectiveInference/R/funs.randomized.R create mode 100644 tests/test_randomized.R diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R new file mode 100644 index 00000000..01e6aa40 --- /dev/null +++ b/selectiveInference/R/funs.randomized.R @@ -0,0 +1,64 @@ +# Functions to fit and "infer" about parameters in the +# randomized LASSO +# +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 + +fit_randomized_lasso = function(X, + y, + lam, + noise_scale, + ridge_term, + noise_type=c('gaussian', 'laplace'), + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + param_stop=TRUE) +{ + + n = nrow(X); p = ncol(X) + + noise_type = match.arg(noise_type) + + if (noise_type == 'gaussian') { + D = Norm(mean=0, sd=noise_scale) + } + else if (noise_type == 'laplace') { + D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + } + perturb_ = distr::r(D)(p) + + lam = as.numeric(lam) + if (length(lam) == 1) { + lam = rep(lam, p) + } + if (length(lam) != p) { + stop("Lagrange parameter should be single float or of length ncol(X)") + } + + soln = rep(0, p) + Xsoln = rep(0, n) + linear_func = (- t(X) %*% y - perturb_) + gradient = 1. * linear_func + ever_active = rep(0, p) + nactive = as.integer(0) + + result = solve_QP_wide(X, # design matrix + lam, # vector of Lagrange multipliers + ridge_term / n, # ridge_term + max_iter, + soln, + linear_func, + gradient, + Xsoln, + ever_active, + nactive, + kkt_tol, + objective_tol, + p, + objective_stop, # objective_stop + kkt_stop, # kkt_stop + param_stop) # param_stop + return(result) +} diff --git a/tests/test_randomized.R b/tests/test_randomized.R new file mode 100644 index 00000000..e4e35a12 --- /dev/null +++ b/tests/test_randomized.R @@ -0,0 +1,14 @@ +library(selectiveInference) + +test = function() { + + n = 100; p = 50 + X = matrix(rnorm(n * p), n, p) + y = rnorm(n) + lam = 20 / sqrt(n) + noise_scale = 0.01 * sqrt(n) + ridge_term = .1 / sqrt(n) + fit_randomized_lasso(X, y, lam, noise_scale, ridge_term) +} + +print(test()) From 2439d7318bcf9bec47be22766e1e35192abcf31f Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 21:36:18 -0700 Subject: [PATCH 286/396] check that solution is same as old code -- currently failing --- tests/test_debiasing.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/test_debiasing.R diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R new file mode 100644 index 00000000..27bd6214 --- /dev/null +++ b/tests/test_debiasing.R @@ -0,0 +1,27 @@ +library(selectiveInference) +source('oldcode.R') + +n = 500; p = 50 + +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n + +mu = 7.791408e-02 + +A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000) +A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000) + +B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000) + +C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] +C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] + +par(mfrow=c(2,3)) +plot(A1[1,], C1[1,]) +plot(A1[1,], B1[1,]) +plot(B1[1,], C1[1,]) + +plot(A1[1,], A2[1,]) +plot(B1[1,], B2[1,]) +plot(C1[1,], C2[1,]) From 83b04268b9f39749b37444914e227c63b87fb220 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:17:39 -0700 Subject: [PATCH 287/396] bug found in qp solver -- look at tests/test_QP.R --- selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fixed.R | 5 + selectiveInference/R/funs.randomized.R | 2 + selectiveInference/src/Rcpp-debias.cpp | 4 + selectiveInference/src/debias.h | 2 + selectiveInference/src/quadratic_program.c | 4 +- .../src/quadratic_program_wide.c | 6 +- tests/test_QP.R | 15 ++ tests/test_debiasing.R | 172 +++++++++++++++++- 9 files changed, 204 insertions(+), 8 deletions(-) create mode 100644 tests/test_QP.R diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index d72d56a9..c7d08a1e 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,4 +44,4 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("Rcpp", "sourceCpp") - +importFrom("distr", "Norm", "DExp") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index aaceaaad..54903162 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -327,6 +327,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective ) { @@ -363,6 +364,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d warn_kkt=FALSE, max_iter=max_iter, kkt_tol=kkt_tol, + parameter_tol=parameter_tol, objective_tol=objective_tol) if (warn_kkt && (!output$kkt_check)) { @@ -393,6 +395,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective ) { @@ -433,6 +436,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, + parameter_tol, max_active, FALSE, # objective_stop FALSE, # kkt_stop @@ -451,6 +455,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, + parameter_tol, max_active, FALSE, # objective_stop FALSE, # kkt_stop diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 01e6aa40..25a0b95a 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -11,6 +11,7 @@ fit_randomized_lasso = function(X, noise_type=c('gaussian', 'laplace'), max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter objective_tol=1.e-8, # tolerance for relative decrease in objective objective_stop=FALSE, kkt_stop=TRUE, @@ -56,6 +57,7 @@ fit_randomized_lasso = function(X, nactive, kkt_tol, objective_tol, + parameter_tol, p, objective_stop, # objective_stop kkt_stop, # kkt_stop diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 24bbae88..9cda705a 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -15,6 +15,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, + double parameter_tol, int max_active, int objective_stop, int kkt_stop, @@ -52,6 +53,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, maxiter, kkt_tol, objective_tol, + parameter_tol, max_active, objective_stop, kkt_stop, @@ -92,6 +94,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, + double parameter_tol, int max_active, int objective_stop, int kkt_stop, @@ -142,6 +145,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, maxiter, kkt_tol, objective_tol, + parameter_tol, max_active, objective_stop, kkt_stop, diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 052af7a1..d3db26d7 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -16,6 +16,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -44,6 +45,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index 1bc7fa34..822ddf53 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -273,6 +273,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -292,7 +293,6 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ double norm_diff = 1.; double norm_last = 1.; double delta; - double threshold = 1.e-2; double *theta_ptr, *theta_old_ptr; if (objective_stop) { @@ -403,7 +403,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ norm_diff = sqrt(norm_diff); norm_last = sqrt(norm_last); - if (norm_diff < threshold * norm_last) { + if (norm_diff < parameter_tol * norm_last) { break; } } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index c6cb9f3f..3e4bdb09 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf @@ -393,6 +393,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -412,7 +413,6 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX double norm_diff = 1.; double norm_last = 1.; double delta; - double threshold = 1.e-2; double *theta_ptr_tmp, *theta_old_ptr_tmp; if (objective_stop) { @@ -552,7 +552,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX norm_diff = sqrt(norm_diff); norm_last = sqrt(norm_last); - if (norm_diff < threshold * norm_last) { + if (norm_diff < parameter_tol * norm_last) { break; } } diff --git a/tests/test_QP.R b/tests/test_QP.R new file mode 100644 index 00000000..cf5ca646 --- /dev/null +++ b/tests/test_QP.R @@ -0,0 +1,15 @@ +library(selectiveInference) +### Test + +n = 100; p = 50 + +X = matrix(rnorm(n * p), n, p) +Y = rnorm(n) +lam = 2 + +soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 1.e-12, 0)$soln +G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) +soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] + +print(soln1) +print(soln2) \ No newline at end of file diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index 27bd6214..b1fdf24c 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -1,7 +1,143 @@ library(selectiveInference) -source('oldcode.R') -n = 500; p = 50 + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(mu)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + mu.stop <- 0; + try.no <- 1; + incr <- 0; + while ((mu.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + mu.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + mu <- mu*resol; + } else { + incr <- 0; + mu <- mu/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + mu <- mu*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + mu.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + mu <- mu/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + mu <- mu*resol; + beta <- last.beta; + mu.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + mu0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-mu0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Test + +n = 100; p = 50 X = matrix(rnorm(n * p), n, p) S = t(X) %*% X / n @@ -25,3 +161,35 @@ plot(B1[1,], C1[1,]) plot(A1[1,], A2[1,]) plot(B1[1,], B2[1,]) plot(C1[1,], C2[1,]) + +print(c('A', sum(A1[1,] == 0))) +print(c('B', sum(B1[1,] == 0))) +print(c('C', sum(C1[1,] == 0))) + +## Are our points feasible + +feasibility = function(S, soln, j, mu) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(max(abs(G)), mu)) +} + +print(c('feasibility A', feasibility(S, A1[1,], 1, mu))) +print(c('feasibility B', feasibility(S, B1[1,], 1, mu))) +print(c('feasibility C', feasibility(S, C1[1,], 1, mu))) + +active_KKT = function(S, soln, j, mu) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) +} + +print(c('active_KKT A', active_KKT(S, A1[1,], 1, mu))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, mu))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, mu))) + + From f13bce45b2e05c6454adeb1dea82be5f02e7844d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:19:20 -0700 Subject: [PATCH 288/396] allowing randomization to be 0 in solver --- selectiveInference/R/funs.randomized.R | 18 +++++++++++------- tests/test_QP.R | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 25a0b95a..b79e2fb8 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -22,14 +22,18 @@ fit_randomized_lasso = function(X, noise_type = match.arg(noise_type) - if (noise_type == 'gaussian') { - D = Norm(mean=0, sd=noise_scale) + if (noise_scale > 0) { + if (noise_type == 'gaussian') { + D = Norm(mean=0, sd=noise_scale) + } + else if (noise_type == 'laplace') { + D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + } + perturb_ = distr::r(D)(p) + } else { + perturb_ = rep(0, p) } - else if (noise_type == 'laplace') { - D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. - } - perturb_ = distr::r(D)(p) - + lam = as.numeric(lam) if (length(lam) == 1) { lam = rep(lam, p) diff --git a/tests/test_QP.R b/tests/test_QP.R index cf5ca646..4aebec37 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -7,7 +7,7 @@ X = matrix(rnorm(n * p), n, p) Y = rnorm(n) lam = 2 -soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 1.e-12, 0)$soln +soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 0, 0)$soln G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] From a6064f27241eab8d2abc0ac253b6d4b41c31f9ae Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:25:32 -0700 Subject: [PATCH 289/396] cosmetic edit --- selectiveInference/src/Rcpp-debias.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 9cda705a..5c181848 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -112,12 +112,13 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::IntegerVector need_update(nfeature); - // Extract the diagonal + Rcpp::NumericVector theta_old(nfeature); + + // Extract the diagonal -- divide by ncase + Rcpp::NumericVector nndef_diag(nfeature); double *nndef_diag_p = nndef_diag.begin(); - Rcpp::NumericVector theta_old(nfeature); - for (ifeature=0; ifeature Date: Thu, 26 Oct 2017 22:46:48 -0700 Subject: [PATCH 290/396] LASSO solver is right up to scale --- tests/test_QP.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_QP.R b/tests/test_QP.R index 4aebec37..61c4d539 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -9,7 +9,8 @@ lam = 2 soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 0, 0)$soln G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) -soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] +soln2 = coef(G, s=lam/n, exact=TRUE, x=X, y=Y)[-1] print(soln1) -print(soln2) \ No newline at end of file +print(soln2) +plot(soln1, soln2) From cc88b978c6719157b4a5f36762496f0fbbae7fb1 Mon Sep 17 00:00:00 2001 From: Nicolas Ballarini Date: Fri, 27 Oct 2017 10:04:32 +0200 Subject: [PATCH 291/396] Confidence intervals when estimate is negative Confidence intervals are not flipped when beta hat is negative --- selectiveInference/R/funs.fixedCox.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index d6ebd6b7..2fa8c083 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -73,7 +73,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda vup[jj]=junk$vup sd[jj]=junk$sd - junk2=TG.interval(bbar, A1, b1, vj, MM, alpha) + junk2=TG.interval(bbar, A1, b1, vj, MM, alpha, flip=(s2[jj]==-1)) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea From 7b231508cc47fec41b074ee0352d42bf8c8cc0d7 Mon Sep 17 00:00:00 2001 From: Nicolas Ballarini Date: Fri, 27 Oct 2017 10:04:40 +0200 Subject: [PATCH 292/396] Confidence intervals when estimate is negative Confidence intervals are not flipped when beta hat is negative --- selectiveInference/R/funs.fixedLogit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 19936b09..4ab33980 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -96,7 +96,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet vup[jj]=junk$vup sd[jj]=junk$sd - junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha) + junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha, flip=(s2[jj+1]==-1)) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea From 928fee90f833cd0a2cee39040c075d9292d9fd21 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 11 Oct 2017 07:38:07 -0700 Subject: [PATCH 293/396] variable rename --- selectiveInference/R/funs.fixedCox.R | 12 ++++++------ selectiveInference/R/funs.fixedLogit.R | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index d6ebd6b7..75305112 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -29,7 +29,7 @@ if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have valu vars=which(m) if(sum(m)>0){ bhat=beta[beta!=0] #penalized coefs just for active variables - s2=sign(bhat) + sign_bhat=sign(bhat) #check KKT @@ -40,7 +40,7 @@ if(sum(m)>0){ res=residuals(aaa,type="score") if(!is.matrix(res)) res=matrix(res,ncol=1) scor=colSums(res) - g=(scor+lambda*s2)/(2*lambda) + g=(scor+lambda*sign_bhat)/(2*lambda) # cat(c(g,lambda,tol.kkt),fill=T) if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", @@ -49,9 +49,9 @@ scor=colSums(res) # Hessian of partial likelihood at the LASSO solution MM=vcov(aaa) -bbar=(bhat+lambda*MM%*%s2) -A1=-(mydiag(s2)) -b1= -(mydiag(s2)%*%MM)%*%s2*lambda +bbar=(bhat+lambda*MM%*%sign_bhat) +A1=-(mydiag(sign_bhat)) +b1= -(mydiag(sign_bhat)%*%MM)%*%sign_bhat*lambda temp=max(A1%*%bbar-b1) @@ -63,7 +63,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda # the one sided p-values are a bit off for(jj in 1:length(bbar)){ - vj=rep(0,length(bbar));vj[jj]=s2[jj] + vj=rep(0,length(bbar));vj[jj]=sign_bhat[jj] junk=TG.pvalue(bbar, A1, b1, vj,MM) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 19936b09..16cafdc5 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -32,7 +32,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet m=beta[-1]!=0 #active set bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars - s2=sign(bhat) + sign_bhat=sign(bhat) lam2m=diag(c(0,rep(lambda,sum(m)))) @@ -66,14 +66,14 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet # MM=solve(t(xxm)%*%w%*%xxm) MM=solve(scale(t(xxm),F,1/ww)%*%xxm) gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized - # at exact LASSO solution it should be s2[-1] + # at exact LASSO solution it should be sign_bhat[-1] dbeta = MM %*% gm - # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... + # bbar=(bhat+lam2m%*%MM%*%sign_bhat) # JT: this is wrong, shouldn't use sign of intercept anywhere... bbar = bhat - dbeta - A1=-(mydiag(s2))[-1,] - b1= (s2 * dbeta)[-1] + A1=-(mydiag(sign_bhat))[-1,] + b1= (sign_bhat * dbeta)[-1] tol.poly = 0.01 if (max((A1 %*% bbar) - b1) > tol.poly) @@ -87,7 +87,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet for(jj in 1:sum(m)){ - vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] + vj=c(rep(0,sum(m)+1));vj[jj+1]=sign_bhat[jj+1] # compute p-values junk=TG.pvalue(bbar, A1, b1, vj, MM) pv[jj] = junk$pv From 2e9b11d9ce155df3a8d201ce7af434a25ba142d0 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 25 Oct 2017 14:06:10 -0700 Subject: [PATCH 294/396] NF: density functions for randomized LASSO --- selectiveInference/src/randomized_lasso.c | 201 ++++++++++++++++++++++ selectiveInference/src/randomized_lasso.h | 42 +++++ 2 files changed, 243 insertions(+) create mode 100644 selectiveInference/src/randomized_lasso.c create mode 100644 selectiveInference/src/randomized_lasso.h diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c new file mode 100644 index 00000000..ac55cf48 --- /dev/null +++ b/selectiveInference/src/randomized_lasso.c @@ -0,0 +1,201 @@ +#include // for fabs + +// Augmented density for randomized LASSO after +// Gaussian randomization + +// Described in https://arxiv.org/abs/1609.05609 + +// Gaussian is product of IID N(0, noise_scale^2) density +// Evaluated at A_D D + A_O O + h + +// Laplace is product of IID Laplace with scale noise_scale +// Also evaluated at A_D D + A_O O + h + +double log_density_gaussian(double noise_scale, // Scale of randomization + int ndim, // Number of features -- "p" + int ninternal, // Dimension of internal data representation often 1 + int noptimization, // Dimension of optimization variables -- "p" + double *internal_linear, // A_D -- linear part for data + double *internal_state, // D -- data state + double *optimization_linear, // A_O -- linear part for optimization variables + double *optimization_state, // O -- optimization state + double *offset) // h -- offset in affine transform -- "p" dimensional +{ + int irow, icol; + double denom = 2 * noise_scale * noise_scale; + double value = 0; + double reconstruction = 0; + double *offset_ptr; + double *internal_linear_ptr; + double *internal_state_ptr; + double *optimization_linear_ptr; + double *optimization_state_ptr; + + for (irow=0; irow Date: Wed, 25 Oct 2017 16:35:52 -0700 Subject: [PATCH 295/396] T to TRUE --- selectiveInference/R/funs.fixed.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index be314af1..66a30887 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -6,7 +6,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"), intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, - type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) { @@ -150,7 +150,7 @@ fixedLassoInf <- function(x, y, beta, ci = tailarea = matrix(0,k,2) if (type=="full" & p > n) { - if (intercept == T) { + if (intercept == TRUE) { pp=p+1 Xint <- cbind(rep(1,n),x) # indices of selected predictors From 3748723714d2fc2b837d3e21f8f5e0976d6be9dc Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 13:08:05 -0700 Subject: [PATCH 296/396] updated wide C code -- can now do ridge as well as variable weights -- ready to solve randomized LASSO --- selectiveInference/R/funs.fixed.R | 40 +++- selectiveInference/src/Rcpp-debias.cpp | 35 +++- selectiveInference/src/debias.h | 22 ++- selectiveInference/src/quadratic_program.c | 108 +++++++---- .../src/quadratic_program_wide.c | 176 ++++++++++++------ selectiveInference/src/randomized_lasso.c | 13 -- 6 files changed, 270 insertions(+), 124 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 66a30887..aaceaaad 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -189,8 +189,10 @@ fixedLassoInf <- function(x, y, beta, } M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + # vector which is offset for testing debiased beta's null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + if (intercept == T) { M = M[-1,] # remove intercept row null_value = null_value[-1] # remove intercept element @@ -238,12 +240,23 @@ fixedLassoInf <- function(x, y, beta, tailarea[j,] = a$tailarea } - out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, - vars=vars,sign=sign_vars,sigma=sigma,alpha=alpha, - sd=sigma*sqrt(rowSums(vmat^2)), - coef0=vmat%*%y, - call=this.call) + out = list(type=type, + lambda=lambda, + pv=pv, + ci=ci, + tailarea=tailarea, + vlo=vlo, + vup=vup, + vmat=vmat, + y=y, + vars=vars, + sign=sign_vars, + sigma=sigma, + alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call) + class(out) = "fixedLassoInf" return(out) } @@ -374,7 +387,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep row, mu, linesearch=TRUE, # do a linesearch? - scaling_factor=1.2, # multiplicative factor for linesearch + scaling_factor=1.5, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? @@ -420,11 +433,15 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, - max_active) + max_active, + FALSE, # objective_stop + FALSE, # kkt_stop + TRUE) # param_stop } else { Xsoln = rep(0, nrow(Xinfo)) result = solve_QP_wide(Xinfo, # this is a design matrix - mu, + rep(mu, p), # vector of Lagrange multipliers + 0, # ridge_term max_iter, soln, linear_func, @@ -434,7 +451,10 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, - max_active) + max_active, + FALSE, # objective_stop + FALSE, # kkt_stop + TRUE) # param_stop } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index ce8bb156..24bbae88 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -15,7 +15,10 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, - int max_active + int max_active, + int objective_stop, + int kkt_stop, + int param_stop ) { int nrow = Sigma.nrow(); // number of features @@ -28,6 +31,8 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::NumericVector Sigma_diag(nrow); double *sigma_diag_p = Sigma_diag.begin(); + Rcpp::NumericVector theta_old(nrow); + for (irow=0; irow= max_active); diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index ebcbc933..052af7a1 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -9,13 +9,17 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ double *gradient_ptr, /* nndef times theta */ int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in nndef */ + int nfeature, /* How many features in nndef */ double bound, /* feasibility parameter */ double *theta, /* current value */ + double *theta_old, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop); /* Break based on parameter convergence? */ int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* nndef times theta + linear_func */ @@ -33,12 +37,17 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int *nactive_ptr, /* Size of ever active set */ int ncase, /* How many rows in X */ int nfeature, /* How many columns in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop); /* Break based on parameter convergence? */ int check_KKT_wide(double *theta_ptr, /* current theta */ double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ @@ -48,8 +57,9 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ - double tol); /* precision for checking KKT conditions */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double tol); /* precision for checking KKT conditions */ void update_gradient_wide(double *gradient_ptr, /* X^TX/ncase times theta + linear_func */ double *X_theta_ptr, /* Current fitted values */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index d9bd0170..1bc7fa34 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -92,19 +92,19 @@ int update_ever_active_qp(int coord, int check_KKT_qp(double *theta_ptr, /* current theta */ double *gradient_ptr, /* nndef times theta + linear_func */ - int nrow, /* how many rows in nndef */ + int nfeature, /* how many features in nndef */ double bound, /* Lagrange multipler for \ell_1 */ double tol) /* precision for checking KKT conditions */ { // First check inactive - int irow; + int ifeature; double *theta_ptr_tmp, *gradient_ptr_tmp; double gradient; - for (irow=0; irow 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; - nndef_ptr_tmp = ((double *) nndef_ptr + coord * nrow); + nndef_ptr_tmp = ((double *) nndef_ptr + coord * nfeature); gradient_ptr_tmp = ((double *) gradient_ptr); - for (icol=0; icol= max_active) { @@ -380,12 +417,12 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ // Check relative decrease of objective - if (check_objective) { + if (objective_stop) { new_value = objective_qp(nndef_ptr, linear_func_ptr, ever_active_ptr, nactive_ptr, - nrow, + nfeature, bound, theta); @@ -394,6 +431,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ } old_value = new_value; } + } return(iter); } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3beb578a..c6cb9f3f 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf @@ -19,11 +19,13 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ int *nactive_ptr, /* Size of ever active set */ int ncase, /* how many rows in X */ int nfeature, /* how many columns in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr) /* current value */ { int icase, iactive; double value = 0; + double *bound_ptr_tmp; double *X_theta_ptr_tmp = X_theta_ptr; double *linear_func_ptr_tmp = linear_func_ptr; double *theta_ptr_tmp; @@ -55,8 +57,12 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ // The \ell_1 term - value += bound * fabs((*theta_ptr_tmp)); + bound_ptr_tmp = ((double *) bound_ptr + active_feature); + value += (*bound_ptr_tmp) * fabs((*theta_ptr_tmp)); + // The ridge term + + value += 0.5 * ridge_term * (*theta_ptr_tmp) * (*theta_ptr_tmp); } return(value); @@ -167,34 +173,39 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double tol) /* precision for checking KKT conditions */ { // First check inactive int ifeature; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; double gradient; for (ifeature=0; ifeature 0) && (fabs(gradient + bound) > tol * bound)) { + if ((*theta_ptr_tmp != 0) && (bound != 0)) { // these coordinates of gradients should be equal to -bound + + if ((*theta_ptr_tmp > 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { return(0); } - + } - else { + else if (bound != 0) { if (fabs(gradient) > (1. + tol) * bound) { return(0); } @@ -214,13 +225,16 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multipliers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double tol) /* precision for checking KKT conditions */ { // First check inactive int iactive; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; double gradient; int nactive = *nactive_ptr; int active_feature; @@ -230,23 +244,26 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- active_feature_ptr = ((int *) ever_active_ptr + iactive); active_feature = *active_feature_ptr - 1; // Ever-active is 1-based + theta_ptr_tmp = ((double *) theta_ptr + active_feature); + bound_ptr_tmp = ((double *) bound_ptr + active_feature); + bound = *bound_ptr_tmp; // Compute this coordinate of the gradient gradient = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, active_feature, ncase); - if (*theta_ptr_tmp != 0) { // these coordinates of gradients should be equal to -bound + if ((*theta_ptr_tmp != 0) && (bound != 0)) { // these coordinates of gradients should be equal to -bound - if ((*theta_ptr_tmp > 0) && (fabs(gradient + bound) > tol * bound)) { + if ((*theta_ptr_tmp > 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { return(0); } } - else { + else if (bound != 0) { if (fabs(gradient) > (1. + tol) * bound) { return(0); } @@ -266,7 +283,8 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ int ncase, /* How many rows in X */ int nfeature, /* How many rows in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this coord in ever_active */ @@ -280,6 +298,8 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *X_theta_ptr_tmp; int *need_update_ptr_tmp; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; int ifeature, icase; double *diagonal_ptr = ((double *) nndef_diag_ptr + coord); @@ -290,6 +310,9 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ theta_ptr_tmp = ((double *) theta_ptr + coord); old_value = *theta_ptr_tmp; + bound_ptr_tmp = ((double *) bound_ptr + coord); + bound = *bound_ptr_tmp; + // The coord entry of gradient_ptr term has a diagonal term in it: // (X^TX)[coord, coord] * theta[coord] / ncase // This removes it. @@ -298,17 +321,17 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ // Now soft-threshold the coord entry of theta - // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| - // with q=diagonal_entry and l=linear_term + // Objective is t \mapsto (q+eps)/2 * t^2 + l * t + bound |t| + + // with q=diagonal_entry and l=linear_term and eps=ridge_Term // With a negative linear term, solution should be // positive if (linear_term < -bound) { - value = (-linear_term - bound) / diagonal_entry; + value = (-linear_term - bound) / (diagonal_entry + ridge_term); } else if (linear_term > bound) { - value = -(linear_term - bound) / diagonal_entry; + value = -(linear_term - bound) / (diagonal_entry + ridge_term); } // Add to active set if necessary @@ -363,24 +386,36 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int *nactive_ptr, /* Size of ever active set */ int ncase, /* How many rows in X */ int nfeature, /* How many columns in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active) /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop) /* Break based on parameter convergence? */ { int iter = 0; + int iter_old = 1; int ifeature = 0; int iactive = 0; int *active_ptr; - int check_objective = 1; + double old_value, new_value; int niter_active = 5; int iter_active; - if (check_objective) { + double norm_diff = 1.; + double norm_last = 1.; + double delta; + double threshold = 1.e-2; + double *theta_ptr_tmp, *theta_old_ptr_tmp; + + if (objective_stop) { old_value = objective_wide(X_theta_ptr, linear_func_ptr, @@ -388,8 +423,10 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX nactive_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr); + new_value = old_value; } @@ -412,7 +449,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr, *active_ptr - 1, // Ever-active is 1-based 1); @@ -431,7 +469,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, nfeature, ncase, - bound, + bound_ptr, + ridge_term, kkt_tol) == 1) { break; } @@ -440,17 +479,20 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check KKT - if (check_KKT_wide(theta_ptr, - gradient_ptr, - X_theta_ptr, - X_ptr, - linear_func_ptr, - need_update_ptr, - nfeature, - ncase, - bound, - kkt_tol) == 1) { - break; + if (kkt_stop) { + if (check_KKT_wide(theta_ptr, + gradient_ptr, + X_theta_ptr, + X_ptr, + linear_func_ptr, + need_update_ptr, + nfeature, + ncase, + bound_ptr, + ridge_term, + kkt_tol) == 1) { + break; + } } // Update all variables @@ -467,7 +509,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr, ifeature, 0); @@ -475,19 +518,45 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check KKT - if (check_KKT_wide(theta_ptr, - gradient_ptr, - X_theta_ptr, - X_ptr, - linear_func_ptr, - need_update_ptr, - nfeature, - ncase, - bound, - kkt_tol) == 1) { - break; + if (kkt_stop) { + if (check_KKT_wide(theta_ptr, + gradient_ptr, + X_theta_ptr, + X_ptr, + linear_func_ptr, + need_update_ptr, + nfeature, + ncase, + bound_ptr, + ridge_term, + kkt_tol) == 1) { + break; + } + } + + // Check based on norm -- from Adel's debiasing code + + if (param_stop) { + if (iter == 2 * iter_old) { + iter_old = iter; + norm_diff = 0; + norm_last = 0; + for (ifeature=0; ifeature= max_active) { @@ -496,14 +565,15 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check relative decrease of objective - if (check_objective) { + if (objective_stop) { new_value = objective_wide(X_theta_ptr, linear_func_ptr, ever_active_ptr, nactive_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr); if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c index ac55cf48..123c81de 100644 --- a/selectiveInference/src/randomized_lasso.c +++ b/selectiveInference/src/randomized_lasso.c @@ -126,8 +126,6 @@ double log_density_gaussian_conditional(double noise_scale, // Scale double denom = 2 * noise_scale * noise_scale; double reconstruction = 0; double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; double *optimization_linear_ptr; double *optimization_state_ptr; @@ -164,8 +162,6 @@ double log_density_laplace_conditional(double noise_scale, // Scale double value = 0; double reconstruction = 0; double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; double *optimization_linear_ptr; double *optimization_state_ptr; @@ -176,15 +172,6 @@ double log_density_laplace_conditional(double noise_scale, // Scale offset_ptr = ((double *) offset + irow); reconstruction = *offset_ptr; - // Internal (i.e. data) contribution - for (icol=0; icol Date: Thu, 26 Oct 2017 13:08:37 -0700 Subject: [PATCH 297/396] removing machine generated files --- selectiveInference/R/RcppExports.R | 19 ------ selectiveInference/src/RcppExports.cpp | 92 -------------------------- 2 files changed, 111 deletions(-) delete mode 100644 selectiveInference/R/RcppExports.R delete mode 100644 selectiveInference/src/RcppExports.cpp diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R deleted file mode 100644 index f5ebee43..00000000 --- a/selectiveInference/R/RcppExports.R +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) -} - -solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) -} - -update1_ <- function(Q2, w, m, k) { - .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) -} - -downdate1_ <- function(Q1, R, j0, m, n) { - .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) -} - diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp deleted file mode 100644 index 02a77413..00000000 --- a/selectiveInference/src/RcppExports.cpp +++ /dev/null @@ -1,92 +0,0 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#include - -using namespace Rcpp; - -// solve_QP -Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); - Rcpp::traits::input_parameter< double >::type bound(boundSEXP); - Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); - Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); - Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); - Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; -END_RCPP -} -// solve_QP_wide -Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); - Rcpp::traits::input_parameter< double >::type bound(boundSEXP); - Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type X_theta(X_thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); - Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); - Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); - Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; -END_RCPP -} -// update1_ -Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); - Rcpp::traits::input_parameter< int >::type m(mSEXP); - Rcpp::traits::input_parameter< int >::type k(kSEXP); - rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); - return rcpp_result_gen; -END_RCPP -} -// downdate1_ -Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); - Rcpp::traits::input_parameter< int >::type j0(j0SEXP); - Rcpp::traits::input_parameter< int >::type m(mSEXP); - Rcpp::traits::input_parameter< int >::type n(nSEXP); - rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return rcpp_result_gen; -END_RCPP -} - -static const R_CallMethodDef CallEntries[] = { - {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, - {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, - {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, - {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_selectiveInference(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} From ede2816a33aed4a7bc1c8aad81585c549c1422d6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 14:30:59 -0700 Subject: [PATCH 298/396] WIP: function to fit randomized lasso --- selectiveInference/R/funs.randomized.R | 64 ++++++++++++++++++++++++++ tests/test_randomized.R | 14 ++++++ 2 files changed, 78 insertions(+) create mode 100644 selectiveInference/R/funs.randomized.R create mode 100644 tests/test_randomized.R diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R new file mode 100644 index 00000000..01e6aa40 --- /dev/null +++ b/selectiveInference/R/funs.randomized.R @@ -0,0 +1,64 @@ +# Functions to fit and "infer" about parameters in the +# randomized LASSO +# +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 + +fit_randomized_lasso = function(X, + y, + lam, + noise_scale, + ridge_term, + noise_type=c('gaussian', 'laplace'), + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + param_stop=TRUE) +{ + + n = nrow(X); p = ncol(X) + + noise_type = match.arg(noise_type) + + if (noise_type == 'gaussian') { + D = Norm(mean=0, sd=noise_scale) + } + else if (noise_type == 'laplace') { + D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + } + perturb_ = distr::r(D)(p) + + lam = as.numeric(lam) + if (length(lam) == 1) { + lam = rep(lam, p) + } + if (length(lam) != p) { + stop("Lagrange parameter should be single float or of length ncol(X)") + } + + soln = rep(0, p) + Xsoln = rep(0, n) + linear_func = (- t(X) %*% y - perturb_) + gradient = 1. * linear_func + ever_active = rep(0, p) + nactive = as.integer(0) + + result = solve_QP_wide(X, # design matrix + lam, # vector of Lagrange multipliers + ridge_term / n, # ridge_term + max_iter, + soln, + linear_func, + gradient, + Xsoln, + ever_active, + nactive, + kkt_tol, + objective_tol, + p, + objective_stop, # objective_stop + kkt_stop, # kkt_stop + param_stop) # param_stop + return(result) +} diff --git a/tests/test_randomized.R b/tests/test_randomized.R new file mode 100644 index 00000000..e4e35a12 --- /dev/null +++ b/tests/test_randomized.R @@ -0,0 +1,14 @@ +library(selectiveInference) + +test = function() { + + n = 100; p = 50 + X = matrix(rnorm(n * p), n, p) + y = rnorm(n) + lam = 20 / sqrt(n) + noise_scale = 0.01 * sqrt(n) + ridge_term = .1 / sqrt(n) + fit_randomized_lasso(X, y, lam, noise_scale, ridge_term) +} + +print(test()) From 32feb0eae830e98d5dcd28b74c327d9bb195b6d1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 21:36:18 -0700 Subject: [PATCH 299/396] check that solution is same as old code -- currently failing --- tests/test_debiasing.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/test_debiasing.R diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R new file mode 100644 index 00000000..27bd6214 --- /dev/null +++ b/tests/test_debiasing.R @@ -0,0 +1,27 @@ +library(selectiveInference) +source('oldcode.R') + +n = 500; p = 50 + +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n + +mu = 7.791408e-02 + +A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000) +A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000) + +B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000) + +C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] +C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] + +par(mfrow=c(2,3)) +plot(A1[1,], C1[1,]) +plot(A1[1,], B1[1,]) +plot(B1[1,], C1[1,]) + +plot(A1[1,], A2[1,]) +plot(B1[1,], B2[1,]) +plot(C1[1,], C2[1,]) From 0a99f5fb50f48b4ab5cac1006b33c9acd8958ece Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:17:39 -0700 Subject: [PATCH 300/396] bug found in qp solver -- look at tests/test_QP.R --- selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fixed.R | 5 + selectiveInference/R/funs.randomized.R | 2 + selectiveInference/src/Rcpp-debias.cpp | 4 + selectiveInference/src/debias.h | 2 + selectiveInference/src/quadratic_program.c | 4 +- .../src/quadratic_program_wide.c | 6 +- tests/test_QP.R | 15 ++ tests/test_debiasing.R | 172 +++++++++++++++++- 9 files changed, 204 insertions(+), 8 deletions(-) create mode 100644 tests/test_QP.R diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index d72d56a9..c7d08a1e 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,4 +44,4 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("Rcpp", "sourceCpp") - +importFrom("distr", "Norm", "DExp") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index aaceaaad..54903162 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -327,6 +327,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective ) { @@ -363,6 +364,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d warn_kkt=FALSE, max_iter=max_iter, kkt_tol=kkt_tol, + parameter_tol=parameter_tol, objective_tol=objective_tol) if (warn_kkt && (!output$kkt_check)) { @@ -393,6 +395,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective ) { @@ -433,6 +436,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, + parameter_tol, max_active, FALSE, # objective_stop FALSE, # kkt_stop @@ -451,6 +455,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, + parameter_tol, max_active, FALSE, # objective_stop FALSE, # kkt_stop diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 01e6aa40..25a0b95a 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -11,6 +11,7 @@ fit_randomized_lasso = function(X, noise_type=c('gaussian', 'laplace'), max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter objective_tol=1.e-8, # tolerance for relative decrease in objective objective_stop=FALSE, kkt_stop=TRUE, @@ -56,6 +57,7 @@ fit_randomized_lasso = function(X, nactive, kkt_tol, objective_tol, + parameter_tol, p, objective_stop, # objective_stop kkt_stop, # kkt_stop diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 24bbae88..9cda705a 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -15,6 +15,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, + double parameter_tol, int max_active, int objective_stop, int kkt_stop, @@ -52,6 +53,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, maxiter, kkt_tol, objective_tol, + parameter_tol, max_active, objective_stop, kkt_stop, @@ -92,6 +94,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, + double parameter_tol, int max_active, int objective_stop, int kkt_stop, @@ -142,6 +145,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, maxiter, kkt_tol, objective_tol, + parameter_tol, max_active, objective_stop, kkt_stop, diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 052af7a1..d3db26d7 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -16,6 +16,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -44,6 +45,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index 1bc7fa34..822ddf53 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -273,6 +273,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -292,7 +293,6 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ double norm_diff = 1.; double norm_last = 1.; double delta; - double threshold = 1.e-2; double *theta_ptr, *theta_old_ptr; if (objective_stop) { @@ -403,7 +403,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ norm_diff = sqrt(norm_diff); norm_last = sqrt(norm_last); - if (norm_diff < threshold * norm_last) { + if (norm_diff < parameter_tol * norm_last) { break; } } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index c6cb9f3f..3e4bdb09 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf @@ -393,6 +393,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -412,7 +413,6 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX double norm_diff = 1.; double norm_last = 1.; double delta; - double threshold = 1.e-2; double *theta_ptr_tmp, *theta_old_ptr_tmp; if (objective_stop) { @@ -552,7 +552,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX norm_diff = sqrt(norm_diff); norm_last = sqrt(norm_last); - if (norm_diff < threshold * norm_last) { + if (norm_diff < parameter_tol * norm_last) { break; } } diff --git a/tests/test_QP.R b/tests/test_QP.R new file mode 100644 index 00000000..cf5ca646 --- /dev/null +++ b/tests/test_QP.R @@ -0,0 +1,15 @@ +library(selectiveInference) +### Test + +n = 100; p = 50 + +X = matrix(rnorm(n * p), n, p) +Y = rnorm(n) +lam = 2 + +soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 1.e-12, 0)$soln +G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) +soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] + +print(soln1) +print(soln2) \ No newline at end of file diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index 27bd6214..b1fdf24c 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -1,7 +1,143 @@ library(selectiveInference) -source('oldcode.R') -n = 500; p = 50 + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(mu)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + mu.stop <- 0; + try.no <- 1; + incr <- 0; + while ((mu.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + mu.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + mu <- mu*resol; + } else { + incr <- 0; + mu <- mu/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + mu <- mu*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + mu.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + mu <- mu/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + mu <- mu*resol; + beta <- last.beta; + mu.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + mu0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-mu0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Test + +n = 100; p = 50 X = matrix(rnorm(n * p), n, p) S = t(X) %*% X / n @@ -25,3 +161,35 @@ plot(B1[1,], C1[1,]) plot(A1[1,], A2[1,]) plot(B1[1,], B2[1,]) plot(C1[1,], C2[1,]) + +print(c('A', sum(A1[1,] == 0))) +print(c('B', sum(B1[1,] == 0))) +print(c('C', sum(C1[1,] == 0))) + +## Are our points feasible + +feasibility = function(S, soln, j, mu) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(max(abs(G)), mu)) +} + +print(c('feasibility A', feasibility(S, A1[1,], 1, mu))) +print(c('feasibility B', feasibility(S, B1[1,], 1, mu))) +print(c('feasibility C', feasibility(S, C1[1,], 1, mu))) + +active_KKT = function(S, soln, j, mu) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) +} + +print(c('active_KKT A', active_KKT(S, A1[1,], 1, mu))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, mu))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, mu))) + + From 242218e6f3da9153fca4e3a1cc0fcd47b0ebad8a Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:19:20 -0700 Subject: [PATCH 301/396] allowing randomization to be 0 in solver --- selectiveInference/R/funs.randomized.R | 18 +++++++++++------- tests/test_QP.R | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 25a0b95a..b79e2fb8 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -22,14 +22,18 @@ fit_randomized_lasso = function(X, noise_type = match.arg(noise_type) - if (noise_type == 'gaussian') { - D = Norm(mean=0, sd=noise_scale) + if (noise_scale > 0) { + if (noise_type == 'gaussian') { + D = Norm(mean=0, sd=noise_scale) + } + else if (noise_type == 'laplace') { + D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + } + perturb_ = distr::r(D)(p) + } else { + perturb_ = rep(0, p) } - else if (noise_type == 'laplace') { - D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. - } - perturb_ = distr::r(D)(p) - + lam = as.numeric(lam) if (length(lam) == 1) { lam = rep(lam, p) diff --git a/tests/test_QP.R b/tests/test_QP.R index cf5ca646..4aebec37 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -7,7 +7,7 @@ X = matrix(rnorm(n * p), n, p) Y = rnorm(n) lam = 2 -soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 1.e-12, 0)$soln +soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 0, 0)$soln G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] From 360dcbd722a674f9168ae7f2c1aa230b024ad95c Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:25:32 -0700 Subject: [PATCH 302/396] cosmetic edit --- selectiveInference/src/Rcpp-debias.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 9cda705a..5c181848 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -112,12 +112,13 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::IntegerVector need_update(nfeature); - // Extract the diagonal + Rcpp::NumericVector theta_old(nfeature); + + // Extract the diagonal -- divide by ncase + Rcpp::NumericVector nndef_diag(nfeature); double *nndef_diag_p = nndef_diag.begin(); - Rcpp::NumericVector theta_old(nfeature); - for (ifeature=0; ifeature Date: Thu, 26 Oct 2017 22:46:48 -0700 Subject: [PATCH 303/396] LASSO solver is right up to scale --- tests/test_QP.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_QP.R b/tests/test_QP.R index 4aebec37..61c4d539 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -9,7 +9,8 @@ lam = 2 soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 0, 0)$soln G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) -soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] +soln2 = coef(G, s=lam/n, exact=TRUE, x=X, y=Y)[-1] print(soln1) -print(soln2) \ No newline at end of file +print(soln2) +plot(soln1, soln2) From 506a205b316e71e2a7f2c562afc14d20a9b0ffef Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 23:55:49 -0700 Subject: [PATCH 304/396] BF: fixing Xsoln incorrectly set in wide solver --- selectiveInference/R/funs.fixed.R | 24 +++++++++++++------ selectiveInference/R/funs.randomized.R | 4 ++-- .../src/quadratic_program_wide.c | 2 +- tests/test_QP.R | 2 +- tests/test_debiasing.R | 13 ++++++---- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 54903162..22a31e5a 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -326,6 +326,9 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem + kkt_stop=TRUE, # stop based on KKT conditions? + parameter_stop=TRUE, # stop based on relative convergence of parameter? + objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective @@ -363,6 +366,9 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d max_try=max_try, warn_kkt=FALSE, max_iter=max_iter, + kkt_stop=kkt_stop, + parameter_stop=parameter_stop, + objective_stop=objective_stop, kkt_tol=kkt_tol, parameter_tol=parameter_tol, objective_tol=objective_tol) @@ -394,6 +400,9 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem + kkt_stop=TRUE, # stop based on KKT conditions? + parameter_stop=TRUE, # stop based on relative convergence of parameter? + objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective @@ -423,6 +432,8 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep last_output = NULL + Xsoln = rep(0, n) + while (counter_idx < max_try) { if (!is_wide) { @@ -438,11 +449,10 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, - FALSE, # objective_stop - FALSE, # kkt_stop - TRUE) # param_stop + objective_stop, + kkt_stop, + parameter_stop) } else { - Xsoln = rep(0, nrow(Xinfo)) result = solve_QP_wide(Xinfo, # this is a design matrix rep(mu, p), # vector of Lagrange multipliers 0, # ridge_term @@ -457,9 +467,9 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, - FALSE, # objective_stop - FALSE, # kkt_stop - TRUE) # param_stop + objective_stop, + kkt_stop, + parameter_stop) } diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index b79e2fb8..7bf60080 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -44,13 +44,13 @@ fit_randomized_lasso = function(X, soln = rep(0, p) Xsoln = rep(0, n) - linear_func = (- t(X) %*% y - perturb_) + linear_func = (- t(X) %*% y - perturb_) / n gradient = 1. * linear_func ever_active = rep(0, p) nactive = as.integer(0) result = solve_QP_wide(X, # design matrix - lam, # vector of Lagrange multipliers + lam / n, # vector of Lagrange multipliers ridge_term / n, # ridge_term max_iter, soln, diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3e4bdb09..3546fcda 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n + l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf diff --git a/tests/test_QP.R b/tests/test_QP.R index 61c4d539..17642259 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -1,7 +1,7 @@ library(selectiveInference) ### Test -n = 100; p = 50 +n = 80; p = 50 X = matrix(rnorm(n * p), n, p) Y = rnorm(n) diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index b1fdf24c..e2743fea 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -144,16 +144,19 @@ S = t(X) %*% X / n mu = 7.791408e-02 -A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000) -A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000) +tol = 1.e-12 -B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000) -B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000) +A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) + +B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] par(mfrow=c(2,3)) + plot(A1[1,], C1[1,]) plot(A1[1,], B1[1,]) plot(B1[1,], C1[1,]) @@ -185,6 +188,8 @@ active_KKT = function(S, soln, j, mu) { E = rep(0, p) E[j] = 1 G = S %*% soln - E + print(which(soln != 0)) + print(G[j]) return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) } From 0be39c476d946569d472a4c7e41b232a9eca67e0 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 00:03:09 -0700 Subject: [PATCH 305/396] gradient not current? --- selectiveInference/R/funs.fixed.R | 3 ++- tests/test_debiasing.R | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 22a31e5a..8d3f7279 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -525,7 +525,8 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep } return(list(soln=result$soln, - kkt_check=result$kkt_check)) + kkt_check=result$kkt_check, + gradient=result$gradient)) } diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index e2743fea..50b43d27 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -146,14 +146,15 @@ mu = 7.791408e-02 tol = 1.e-12 -A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +rows = c(1:2) +A1 = debiasingMatrix(S, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +A2 = debiasingMatrix(S / n, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +B1 = debiasingMatrix(X, TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] -C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] +C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[rows,] par(mfrow=c(2,3)) From 3ee2a434ff0e8e8c138cc688931f84eb8144ab26 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 00:07:30 -0700 Subject: [PATCH 306/396] print R2 from lm --- tests/test_QP.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_QP.R b/tests/test_QP.R index 17642259..79638e0e 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -14,3 +14,4 @@ soln2 = coef(G, s=lam/n, exact=TRUE, x=X, y=Y)[-1] print(soln1) print(soln2) plot(soln1, soln2) +print(summary(lm(soln1 ~ soln2))) \ No newline at end of file From 92812fc08b09015b8e713dd3dd3e5f44100dd9c6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 09:47:31 -0700 Subject: [PATCH 307/396] without linesearch we know agree with Adel's code at fixed mu --- selectiveInference/DESCRIPTION | 2 +- selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fixed.R | 29 ++++++---- selectiveInference/R/funs.randomized.R | 31 +++++----- tests/test_debiasing.R | 80 +++++++++++++------------- 5 files changed, 75 insertions(+), 69 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index d9026221..fad072df 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -9,7 +9,7 @@ Maintainer: Rob Tibshirani Depends: glmnet, intervals, - survival + survival, Suggests: Rmpfr Description: New tools for post-selection inference, for use with forward diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index c7d08a1e..d72d56a9 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,4 +44,4 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("Rcpp", "sourceCpp") -importFrom("distr", "Norm", "DExp") + diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 8d3f7279..19284871 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -319,7 +319,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d nsample, rows, verbose=FALSE, - mu=NULL, # starting value of mu + bound=NULL, # starting value of bound linesearch=TRUE, # do a linesearch? scaling_factor=1.5, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? @@ -342,8 +342,8 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d p = ncol(Xinfo); M = matrix(0, length(rows), p); - if (is.null(mu)) { - mu = (1/sqrt(nsample)) * qnorm(1-(0.1/(p^2))) + if (is.null(bound)) { + bound = (1/sqrt(nsample)) * qnorm(1-(0.1/(p^2))) } xperc = 0; @@ -359,7 +359,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d output = debiasingRow(Xinfo, # could be X or t(X) %*% X / n depending on is_wide is_wide, row, - mu, + bound, linesearch=linesearch, scaling_factor=scaling_factor, max_active=max_active, @@ -393,7 +393,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n depending on is_wide is_wide, row, - mu, + bound, linesearch=TRUE, # do a linesearch? scaling_factor=1.5, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? @@ -414,9 +414,11 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep max_active = min(nrow(Xinfo), ncol(Xinfo)) } + # Initialize variables soln = rep(0, p) + soln = as.numeric(soln) ever_active = rep(0, p) ever_active[1] = row # 1-based ever_active = as.integer(ever_active) @@ -432,13 +434,16 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep last_output = NULL - Xsoln = rep(0, n) + if (is_wide) { + n = nrow(Xinfo) + Xsoln = as.numeric(rep(0, n)) + } while (counter_idx < max_try) { if (!is_wide) { result = solve_QP(Xinfo, # this is non-neg-def matrix - mu, + bound, max_iter, soln, linear_func, @@ -453,9 +458,9 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep kkt_stop, parameter_stop) } else { - result = solve_QP_wide(Xinfo, # this is a design matrix - rep(mu, p), # vector of Lagrange multipliers - 0, # ridge_term + result = solve_QP_wide(Xinfo, # this is a design matrix + as.numeric(rep(bound, p)), # vector of Lagrange multipliers + 0, # ridge_term max_iter, soln, linear_func, @@ -493,13 +498,13 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep if ((iter < (max_iter+1)) && (counter_idx > 1)) { break; # we've found a feasible point and solved the problem } - mu = mu * scaling_factor; + bound = bound * scaling_factor; } else { # trying to drop the bound parameter further if ((iter == (max_iter + 1)) && (counter_idx > 1)) { result = last_output; # problem seems infeasible because we didn't solve it break; # so we revert to previously found solution } - mu = mu / scaling_factor; + bound = bound / scaling_factor; } # If the active set has grown to a certain size diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 7bf60080..3a171f79 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -3,19 +3,19 @@ # # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 -fit_randomized_lasso = function(X, - y, - lam, - noise_scale, - ridge_term, - noise_type=c('gaussian', 'laplace'), - max_iter=100, # how many iterations for each optimization problem - kkt_tol=1.e-4, # tolerance for the KKT conditions - parameter_tol=1.e-8, # tolerance for relative convergence of parameter - objective_tol=1.e-8, # tolerance for relative decrease in objective - objective_stop=FALSE, - kkt_stop=TRUE, - param_stop=TRUE) +randomizedLASSO = function(X, + y, + lam, + noise_scale, + ridge_term, + noise_type=c('gaussian', 'laplace'), + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + param_stop=TRUE) { n = nrow(X); p = ncol(X) @@ -24,12 +24,11 @@ fit_randomized_lasso = function(X, if (noise_scale > 0) { if (noise_type == 'gaussian') { - D = Norm(mean=0, sd=noise_scale) + perturb_ = rnorm(p) * noise_scale } else if (noise_type == 'laplace') { - D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + perturb_ = rexp(p) * (2 * rbinom(p, 1, 0.5) - 1) * noise_scale } - perturb_ = distr::r(D)(p) } else { perturb_ = rep(0, p) } diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index 50b43d27..8e81b169 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -2,9 +2,9 @@ library(selectiveInference) ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { +InverseLinfty <- function(sigma, n, resol=1.5, bound=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { isgiven <- 1; - if (is.null(mu)){ + if (is.null(bound)){ isgiven <- 0; } @@ -19,43 +19,43 @@ InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e print(paste(xperc,"% done",sep="")); } } if (isgiven==0){ - mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + bound <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); } - mu.stop <- 0; + bound.stop <- 0; try.no <- 1; incr <- 0; - while ((mu.stop != 1)&&(try.no<10)){ + while ((bound.stop != 1)&&(try.no<10)){ last.beta <- beta - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + output <- InverseLinftyOneRow(sigma, i, bound, maxiter=maxiter, threshold=threshold) beta <- output$optsol iter <- output$iter if (isgiven==1){ - mu.stop <- 1 + bound.stop <- 1 } else{ if (try.no==1){ if (iter == (maxiter+1)){ incr <- 1; - mu <- mu*resol; + bound <- bound*resol; } else { incr <- 0; - mu <- mu/resol; + bound <- bound/resol; } } if (try.no > 1){ if ((incr == 1)&&(iter == (maxiter+1))){ - mu <- mu*resol; + bound <- bound*resol; } if ((incr == 1)&&(iter < (maxiter+1))){ - mu.stop <- 1; + bound.stop <- 1; } if ((incr == 0)&&(iter < (maxiter+1))){ - mu <- mu/resol; + bound <- bound/resol; } if ((incr == 0)&&(iter == (maxiter+1))){ - mu <- mu*resol; + bound <- bound*resol; beta <- last.beta; - mu.stop <- 1; + bound.stop <- 1; } } } @@ -66,14 +66,14 @@ InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e return(M) } -InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { +InverseLinftyOneRow <- function ( sigma, i, bound, maxiter=50, threshold=1e-10) { p <- nrow(sigma); rho <- max(abs(sigma[i,-i])) / sigma[i,i]; - mu0 <- rho/(1+rho); + bound0 <- rho/(1+rho); beta <- rep(0,p); - #if (mu >= mu0){ - # beta[i] <- (1-mu0)/sigma[i,i]; + #if (bound >= bound0){ + # beta[i] <- (1-bound0)/sigma[i,i]; # returnlist <- list("optsol" = beta, "iter" = 0); # return(returnlist); #} @@ -82,7 +82,7 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { last.norm2 <- 1; iter <- 1; iter.old <- 1; - beta[i] <- (1-mu0)/sigma[i,i]; + beta[i] <- (1-bound0)/sigma[i,i]; beta.old <- beta; sigma.tilde <- sigma; diag(sigma.tilde) <- 0; @@ -95,7 +95,7 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { v <- vs[j]; if (j==i) v <- v+1; - beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + beta[j] <- SoftThreshold(v,bound)/sigma[j,j]; if (oldval != beta[j]){ vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; } @@ -112,7 +112,7 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { # vs <- -sigma.tilde%*%beta; } - # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, bound)) } @@ -142,19 +142,21 @@ n = 100; p = 50 X = matrix(rnorm(n * p), n, p) S = t(X) %*% X / n -mu = 7.791408e-02 +debiasing_bound = 7.791408e-02 tol = 1.e-12 -rows = c(1:2) -A1 = debiasingMatrix(S, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -A2 = debiasingMatrix(S / n, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +rows = as.integer(c(1:2)) +print('here') +print(rows) +A1 = debiasingMatrix(S, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) -B1 = debiasingMatrix(X, TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +A2 = debiasingMatrix(S / n, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B1 = debiasingMatrix(X, TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) -C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[rows,] -C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[rows,] +C1 = InverseLinfty(S, n, bound=debiasing_bound, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, bound=debiasing_bound, maxiter=1000)[rows,] par(mfrow=c(2,3)) @@ -172,30 +174,30 @@ print(c('C', sum(C1[1,] == 0))) ## Are our points feasible -feasibility = function(S, soln, j, mu) { +feasibility = function(S, soln, j, debiasing_bound) { p = nrow(S) E = rep(0, p) E[j] = 1 G = S %*% soln - E - return(c(max(abs(G)), mu)) + return(c(max(abs(G)), debiasing_bound)) } -print(c('feasibility A', feasibility(S, A1[1,], 1, mu))) -print(c('feasibility B', feasibility(S, B1[1,], 1, mu))) -print(c('feasibility C', feasibility(S, C1[1,], 1, mu))) +print(c('feasibility A', feasibility(S, A1[1,], 1, debiasing_bound))) +print(c('feasibility B', feasibility(S, B1[1,], 1, debiasing_bound))) +print(c('feasibility C', feasibility(S, C1[1,], 1, debiasing_bound))) -active_KKT = function(S, soln, j, mu) { +active_KKT = function(S, soln, j, debiasing_bound) { p = nrow(S) E = rep(0, p) E[j] = 1 G = S %*% soln - E print(which(soln != 0)) print(G[j]) - return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) + return(c(G[soln != 0] * sign(soln)[soln != 0], debiasing_bound)) } -print(c('active_KKT A', active_KKT(S, A1[1,], 1, mu))) -print(c('active_KKT B', active_KKT(S, B1[1,], 1, mu))) -print(c('active_KKT C', active_KKT(S, C1[1,], 1, mu))) +print(c('active_KKT A', active_KKT(S, A1[1,], 1, debiasing_bound))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, debiasing_bound))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, debiasing_bound))) From abf0824bde4fc76ddde1b5aa66669ff7ab19a469 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 10:03:45 -0700 Subject: [PATCH 308/396] fixing documentation --- selectiveInference/NAMESPACE | 1 + selectiveInference/man/debiasingMatrix.Rd | 27 +++++++++-- selectiveInference/src/quadratic_program.c | 43 +++++++++-------- .../src/quadratic_program_wide.c | 46 ++++++++++--------- 4 files changed, 71 insertions(+), 46 deletions(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index d72d56a9..e7b1e800 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -43,5 +43,6 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") +importFrom("stats", "rbinom", "rexp") importFrom("Rcpp", "sourceCpp") diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index 4da925e6..b95c75b1 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -16,14 +16,18 @@ debiasingMatrix(Xinfo, nsample, rows, verbose=FALSE, - mu=NULL, + bound=NULL, linesearch=TRUE, scaling_factor=1.5, max_active=NULL, max_try=10, warn_kkt=FALSE, max_iter=100, + kkt_stop=TRUE, + parameter_stop=TRUE, + objective_stop=TRUE, kkt_tol=1.e-4, + parameter_tol=1.e-4, objective_tol=1.e-8) } \arguments{ @@ -38,7 +42,7 @@ matrix of interest is t(X) %*% X / nrow(X). } \item{nsample}{ Number of samples used in forming the cross-covariance matrix. -Used for default value of the bound parameter mu. +Used for default value of the bound parameter. } \item{rows}{ Which rows of the approximate inverse to compute. @@ -46,7 +50,7 @@ Which rows of the approximate inverse to compute. \item{verbose}{ Print out progress as rows are being computed. } -\item{mu}{ +\item{bound}{ Initial bound parameter for each row. Will be changed if linesearch is TRUE. } @@ -72,10 +76,25 @@ descent algorithm. How many full iterations to run of the coordinate descent for each value of the bound parameter. } +\item{kkt_stop}{ +If TRUE, check to stop coordinate descent when KKT conditions are approximately satisfied. +} +\item{parameter_stop}{ +If TRUE, check to stop coordinate descent based on relative convergence of parameter vector, +checked at geometrically spaced iterations 2^k. +} +\item{objective_stop}{ +If TRUE, check to stop coordinate descent based on relative decrease of objective value, +checked at geometrically spaced iterations 2^k. +} \item{kkt_tol}{ Tolerance value for assessing whether KKT conditions for solving the dual problem and feasibility of the original problem. } +\item{parameter_tol}{ +Tolerance value for assessing convergence of the problem using relative +convergence of the parameter. +} \item{objective_tol}{ Tolerance value for assessing convergence of the problem using relative decrease of the objective. @@ -86,7 +105,7 @@ This function computes an approximate inverse as described in Javanmard and Montanari (2013), specifically display (4). The problem is solved by considering a dual problem which has an objective similar to a LASSO problem and is solvable -by coordinate descent. For some values of mu the original +by coordinate descent. For some values of bound the original problem may not be feasible, in which case the dual problem has no solution. An attempt to detect this is made by stopping when the active set grows quite large, determined by max_active. diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index 822ddf53..1f7fcb3a 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -385,10 +385,12 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ } } - // Check based on norm -- from Adel's debiasing code - if (param_stop) { - if (iter == 2 * iter_old) { + if (iter == 2 * iter_old) { // Geometric iterations from Adel's code + + // Check based on norm + + if (param_stop) { iter_old = iter; norm_diff = 0; norm_last = 0; @@ -407,6 +409,24 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ break; } } + + // Check relative decrease of objective + + if (objective_stop) { + new_value = objective_qp(nndef_ptr, + linear_func_ptr, + ever_active_ptr, + nactive_ptr, + nfeature, + bound, + theta); + + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { + break; + } + old_value = new_value; + } + } // Check size of active set @@ -415,23 +435,6 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ break; } - // Check relative decrease of objective - - if (objective_stop) { - new_value = objective_qp(nndef_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - nfeature, - bound, - theta); - - if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { - break; - } - old_value = new_value; - } - } return(iter); } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3546fcda..41e29cec 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -534,10 +534,11 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX } } - // Check based on norm -- from Adel's debiasing code + if (iter == 2 * iter_old) { // Geometric iterations from Adel's code - if (param_stop) { - if (iter == 2 * iter_old) { + // Check based on norm + + if (param_stop) { iter_old = iter; norm_diff = 0; norm_last = 0; @@ -556,32 +557,33 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX break; } } + + // Check relative decrease of objective + + if (objective_stop) { + new_value = objective_wide(X_theta_ptr, + linear_func_ptr, + ever_active_ptr, + nactive_ptr, + ncase, + nfeature, + bound_ptr, + ridge_term, + theta_ptr); + + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { + break; + } + old_value = new_value; + } } + // Check size of active set if (*nactive_ptr >= max_active) { break; } - // Check relative decrease of objective - - if (objective_stop) { - new_value = objective_wide(X_theta_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - ncase, - nfeature, - bound_ptr, - ridge_term, - theta_ptr); - - if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { - break; - } - old_value = new_value; - } - } return(iter); } From 1658ecbad9ab779909e3985dda9750903b39d86e Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 10:29:46 -0700 Subject: [PATCH 309/396] checking comparison example again -- looks good --- selectiveInference/R/funs.fixed.R | 13 +- selectiveInference/man/debiasingMatrix.Rd | 4 +- tests/debiased_lasso/comparison_scaled.R | 78 ++ tests/debiased_lasso/comparison_unscaled.R | 78 ++ tests/debiased_lasso/javanmard_montanari.R | 770 ++++++++++++++++++++ tests/{ => debiased_lasso}/test_debiasing.R | 0 tests/debiased_lasso/test_debiasing_wide.R | 202 +++++ 7 files changed, 1138 insertions(+), 7 deletions(-) create mode 100644 tests/debiased_lasso/comparison_scaled.R create mode 100644 tests/debiased_lasso/comparison_unscaled.R create mode 100644 tests/debiased_lasso/javanmard_montanari.R rename tests/{ => debiased_lasso}/test_debiasing.R (100%) create mode 100644 tests/debiased_lasso/test_debiasing_wide.R diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 19284871..7c8318d9 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -8,7 +8,7 @@ fixedLassoInf <- function(x, y, beta, sigma=NULL, alpha=0.1, type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE, - linesearch.try=10) { + linesearch.try=10, offset_correction=TRUE) { family = match.arg(family) this.call = match.call() @@ -197,6 +197,9 @@ fixedLassoInf <- function(x, y, beta, M = M[-1,] # remove intercept row null_value = null_value[-1] # remove intercept element } + if (!offset_correction) { + null_value = 0 * null_value + } } else if (type=="partial" || p > n) { xa = x[,vars,drop=F] M = pinv(crossprod(xa)) %*% t(xa) @@ -325,13 +328,13 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? - max_iter=100, # how many iterations for each optimization problem + max_iter=50, # how many iterations for each optimization problem kkt_stop=TRUE, # stop based on KKT conditions? parameter_stop=TRUE, # stop based on relative convergence of parameter? objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter - objective_tol=1.e-8 # tolerance for relative decrease in objective + objective_tol=1.e-4 # tolerance for relative decrease in objective ) { @@ -399,13 +402,13 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? - max_iter=100, # how many iterations for each optimization problem + max_iter=50, # how many iterations for each optimization problem kkt_stop=TRUE, # stop based on KKT conditions? parameter_stop=TRUE, # stop based on relative convergence of parameter? objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter - objective_tol=1.e-8 # tolerance for relative decrease in objective + objective_tol=1.e-4 # tolerance for relative decrease in objective ) { p = ncol(Xinfo) diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index b95c75b1..6a348506 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -22,13 +22,13 @@ debiasingMatrix(Xinfo, max_active=NULL, max_try=10, warn_kkt=FALSE, - max_iter=100, + max_iter=50, kkt_stop=TRUE, parameter_stop=TRUE, objective_stop=TRUE, kkt_tol=1.e-4, parameter_tol=1.e-4, - objective_tol=1.e-8) + objective_tol=1.e-4) } \arguments{ \item{Xinfo}{ diff --git a/tests/debiased_lasso/comparison_scaled.R b/tests/debiased_lasso/comparison_scaled.R new file mode 100644 index 00000000..e0c0a6a2 --- /dev/null +++ b/tests/debiased_lasso/comparison_scaled.R @@ -0,0 +1,78 @@ +source('javanmard_montanari.R') + +############################################## + +# Runs nsims simulations under the global null, computing p-values +# using both the old code (slow one using Adel's code) and the new +# code (faster using Jon's code), and produces qq-plots for both. +# Runing 50 sims takes about 10-15 mins because old code is slow, so +# feel free to lower nsims if you want + + +library(selectiveInference) +library(glmnet) + +# set.seed(424) + +n=100 +p=200 + +sigma=.5 + +theor_lambda = sigma * sqrt(2 * log(p)) +lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) + +for (j in c(3,4,5,1,2)) { + +thresh = 1e-10 + +beta=rep(0,p) +type="full" + +nsim = 20 + +scaling = sqrt(n) +pvs_old = c() +pvs_new <- c() +pvs_old_0 = c() # don't add the offset correction +pvs_new_0 = c() # don't add the offset correction +for (i in 1:nsim) { + cat(i,fill=T) + x = matrix(rnorm(n*p),n,p) + x = scale(x,T,T) / scaling + mu = x%*%beta + y=mu+sigma*rnorm(n) + + # first run glmnet + gfit=glmnet(x,y,intercept=F,standardize=F,thresh=thresh) + + bhat = coef(gfit, s=lambda[j]/(sqrt(n) * scaling), exact=TRUE,x=x,y=y)[-1] + + if(sum(bhat != 0) > 0) { + + # compute fixed lambda p-values and selection intervals + + aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) + pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) + pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) + pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) + + cat() + } +} + +#check uniformity + +png(paste('comparison_scaled', j, '.png', sep='')) +plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') +plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) +plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) +plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) +abline(0,1) +legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +dev.off() +} \ No newline at end of file diff --git a/tests/debiased_lasso/comparison_unscaled.R b/tests/debiased_lasso/comparison_unscaled.R new file mode 100644 index 00000000..3bb408e3 --- /dev/null +++ b/tests/debiased_lasso/comparison_unscaled.R @@ -0,0 +1,78 @@ +source('javanmard_montanari.R') + +############################################## + +# Runs nsims simulations under the global null, computing p-values +# using both the old code (slow one using Adel's code) and the new +# code (faster using Jon's code), and produces qq-plots for both. +# Runing 50 sims takes about 10-15 mins because old code is slow, so +# feel free to lower nsims if you want + + +library(selectiveInference) +library(glmnet) + +# set.seed(424) + +n=100 +p=200 + +sigma=.5 + +theor_lambda = sigma * sqrt(2 * log(p)) +lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) + +for (j in c(3,4,5,1,2)) { + +thresh = 1e-10 + +beta=rep(0,p) +type="full" + +nsim = 20 + +scaling = sqrt(n) +pvs_old = c() +pvs_new <- c() +pvs_old_0 = c() # don't add the offset correction +pvs_new_0 = c() # don't add the offset correction +for (i in 1:nsim) { + cat(i,fill=T) + x = matrix(rnorm(n*p),n,p) + x = scale(x,T,T) / scaling + mu = x%*%beta + y=mu+sigma*rnorm(n) + + # first run glmnet + gfit=glmnet(x,y,intercept=F,standardize=F,thresh=thresh) + + bhat = coef(gfit, s=lambda[j]/(sqrt(n) * scaling), exact=TRUE,x=x,y=y)[-1] + + if(sum(bhat != 0) > 0) { + + # compute fixed lambda p-values and selection intervals + + aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) + pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) + pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) + pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) + + cat() + } +} + +#check uniformity + +png(paste('comparison_unscaled', j, '.png', sep='')) +plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') +plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) +plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) +plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) +abline(0,1) +legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +dev.off() +} \ No newline at end of file diff --git a/tests/debiased_lasso/javanmard_montanari.R b/tests/debiased_lasso/javanmard_montanari.R new file mode 100644 index 00000000..09f33558 --- /dev/null +++ b/tests/debiased_lasso/javanmard_montanari.R @@ -0,0 +1,770 @@ +# First part is only functions from the old code. At the bottom is +# the bit of code that actually compares the old vs new code + +###################################################### + +### Old code (using Adel's R code) + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(mu)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + mu.stop <- 0; + try.no <- 1; + incr <- 0; + while ((mu.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + mu.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + mu <- mu*resol; + } else { + incr <- 0; + mu <- mu/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + mu <- mu*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + mu.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + mu <- mu/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + mu <- mu*resol; + beta <- last.beta; + mu.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + mu0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-mu0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Functions borrowed from selective Inference (only fixedLassoInf and fixedLasso.poly are modified) + +# Special linear time order function, works only when x +# is a scrambled vector of integers. + +Order <- function(x) { + n = length(x) + o = numeric(n) + o[x] = Seq(1,n) + return(o) +} + +# Returns a sequence of integers from a to b if a <= b, +# otherwise nothing. You have no idea how important this +# function is... + +Seq <- function(a, b, ...) { + if (a<=b) return(seq(a,b,...)) + else return(numeric(0)) +} + +# Returns the sign of x, with Sign(0)=1. + +Sign <- function(x) { + return(-1+2*(x>=0)) +} + +############################## + +# Centering and scaling convenience function + +standardize <- function(x, y, intercept, normalize) { + x = as.matrix(x) + y = as.numeric(y) + n = nrow(x) + p = ncol(x) + + if (intercept) { + bx = colMeans(x) + by = mean(y) + x = scale(x,bx,FALSE) + y = y-mean(y) + } else { + bx = rep(0,p) + by = 0 + } + if (normalize) { + sx = sqrt(colSums(x^2)) + x = scale(x,FALSE,sx) + } else { + sx = rep(1,p) + } + + return(list(x=x,y=y,bx=bx,by=by,sx=sx)) +} + +############################## + +# Interpolation function to get coefficients + +coef.interpolate <- function(betas, s, knots, dec=TRUE) { + # Sort the s values + o = order(s,dec=dec) + s = s[o] + + k = length(s) + mat = matrix(rep(knots,each=k),nrow=k) + if (dec) b = s >= mat + else b = s <= mat + blo = max.col(b,ties.method="first") + bhi = pmax(blo-1,1) + + i = bhi==blo + p = numeric(k) + p[i] = 0 + p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] + + beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) + colnames(beta) = as.character(round(s,3)) + rownames(beta) = NULL + + # Return in original order + o = order(o) + return(beta[,o,drop=FALSE]) +} + +############################## + +checkargs.xy <- function(x, y) { + if (missing(x)) stop("x is missing") + if (is.null(x) || !is.matrix(x)) stop("x must be a matrix") + if (missing(y)) stop("y is missing") + if (is.null(y) || !is.numeric(y)) stop("y must be numeric") + if (ncol(x) == 0) stop("There must be at least one predictor [must have ncol(x) > 0]") + if (checkcols(x)) stop("x cannot have duplicate columns") + if (length(y) == 0) stop("There must be at least one data point [must have length(y) > 0]") + if (length(y)!=nrow(x)) stop("Dimensions don't match [length(y) != nrow(x)]") +} + +checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, + gridrange=NULL, gridpts=NULL, griddepth=NULL, + mult=NULL, ntimes=NULL, + beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, + bh.q=NULL) { + + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") + if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") + if (!is.null(k) && length(k) != 1) stop("k must be a single number") + if (!is.null(k) && (k < 1 || k != floor(k))) stop("k must be an integer >= 1") + if (!is.null(gridrange) && (length(gridrange) != 2 || gridrange[1] > gridrange[2])) + stop("gridrange must be an interval of the form c(a,b) with a <= b") + if (!is.null(gridpts) && (gridpts < 20 || gridpts != round(gridpts))) + stop("gridpts must be an integer >= 20") + if (!is.null(griddepth) && (griddepth > 10 || griddepth != round(griddepth))) + stop("griddepth must be an integer <= 10") + if (!is.null(mult) && mult < 0) stop("mult must be >= 0") + if (!is.null(ntimes) && (ntimes <= 0 || ntimes != round(ntimes))) + stop("ntimes must be an integer > 0") + if (!is.null(beta) && sum(beta!=0)==0) stop("Value of lambda too large, beta is zero") + # if (!is.null(lambda) && length(lambda) != 1) stop("lambda must be a single number") + if (!is.null(lambda) && length(lambda) != 1 && length(lambda) != length(beta)) stop("lambda must be a single number or equal to the length of beta") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >=0") + if (!is.null(tol.beta) && tol.beta <= 0) stop("tol.beta must be > 0") + if (!is.null(tol.kkt) && tol.kkt <= 0) stop("tol.kkt must be > 0") +} + +# Make sure that no two columms of A are the same +# (this works with probability one). + +checkcols <- function(A) { + b = rnorm(nrow(A)) + a = sort(t(A)%*%b) + if (any(diff(a)==0)) return(TRUE) + return(FALSE) +} + +estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { + checkargs.xy(x,rep(0,nrow(x))) + if(nrow(x)<10) stop("Number of observations must be at least 10 to run estimateSigma") + cvfit=cv.glmnet(x,y,intercept=intercept,standardize=standardize) + lamhat=cvfit$lambda.min + fit=glmnet(x,y,standardize=standardize) + yhat=predict(fit,x,s=lamhat) + nz=sum(predict(fit,s=lamhat, type="coef")!=0) + sigma=sqrt(sum((y-yhat)^2)/(length(y)-nz-1)) + return(list(sigmahat=sigma, df=nz)) +} + +# Update the QR factorization, after a column has been +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. + +updateQR <- function(Q1,Q2,R,col) { + m = nrow(Q1) + n = ncol(Q1) + k = ncol(Q2) + + a = .C("update1", + Q2=as.double(Q2), + w=as.double(t(Q2)%*%col), + m=as.integer(m), + k=as.integer(k), + dup=FALSE, + package="selectiveInference") + + Q2 = matrix(a$Q2,nrow=m) + w = c(t(Q1)%*%col,a$w) + + # Re-structure: delete a column from Q2, add one to + # Q1, and expand R + Q1 = cbind(Q1,Q2[,1]) + Q2 = Q2[,-1,drop=FALSE] + R = rbind(R,rep(0,n)) + R = cbind(R,w[Seq(1,n+1)]) + + return(list(Q1=Q1,Q2=Q2,R=R)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} + +############################## + +# Assuming that grid is in sorted order from smallest to largest, +# and vals are monotonically increasing function values over the +# grid, returns the grid end points such that the corresponding +# vals are approximately equal to {val1, val2} + +grid.search <- function(grid, fun, val1, val2, gridpts=100, griddepth=2) { + n = length(grid) + vals = fun(grid) + + ii = which(vals >= val1) + jj = which(vals <= val2) + if (length(ii)==0) return(c(grid[n],Inf)) # All vals < val1 + if (length(jj)==0) return(c(-Inf,grid[1])) # All vals > val2 + # RJT: the above logic is correct ... but for simplicity, instead, + # we could just return c(-Inf,Inf) + + i1 = min(ii); i2 = max(jj) + if (i1==1) lo = -Inf + else lo = grid.bsearch(grid[i1-1],grid[i1],fun,val1,gridpts, + griddepth-1,below=TRUE) + if (i2==n) hi = Inf + else hi = grid.bsearch(grid[i2],grid[i2+1],fun,val2,gridpts, + griddepth-1,below=FALSE) + return(c(lo,hi)) +} + +# Repeated bin search to find the point x in the interval [left, right] +# that satisfies f(x) approx equal to val. If below=TRUE, then we seek +# x such that the above holds and f(x) <= val; else we seek f(x) >= val. + +grid.bsearch <- function(left, right, fun, val, gridpts=100, griddepth=1, below=TRUE) { + n = gridpts + depth = 1 + + while (depth <= griddepth) { + grid = seq(left,right,length=n) + vals = fun(grid) + + if (below) { + ii = which(vals >= val) + if (length(ii)==0) return(grid[n]) # All vals < val (shouldn't happen) + if ((i0=min(ii))==1) return(grid[1]) # All vals > val (shouldn't happen) + left = grid[i0-1] + right = grid[i0] + } + + else { + ii = which(vals <= val) + if (length(ii)==0) return(grid[1]) # All vals > val (shouldn't happen) + if ((i0=max(ii))==n) return(grid[n]) # All vals < val (shouldn't happen) + left = grid[i0] + right = grid[i0+1] + } + + depth = depth+1 + } + + return(ifelse(below, left, right)) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector + +tnorm.surv <- function(z, mean, sd, a, b, bits=NULL) { + z = max(min(z,b),a) + + # Check silly boundary cases + p = numeric(length(mean)) + p[mean==-Inf] = 0 + p[mean==Inf] = 1 + + # Try the multi precision floating point calculation first + o = is.finite(mean) + mm = mean[o] + pp = mpfr.tnorm.surv(z,mm,sd,a,b,bits) + + # If there are any NAs, then settle for an approximation + oo = is.na(pp) + if (any(oo)) pp[oo] = bryc.tnorm.surv(z,mm[oo],sd,a,b) + + p[o] = pp + return(p) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean cane be a vector, using +# multi precision floating point calculations thanks to the Rmpfr package + +mpfr.tnorm.surv <- function(z, mean=0, sd=1, a, b, bits=NULL) { + # If bits is not NULL, then we are supposed to be using Rmpf + # (note that this was fail if Rmpfr is not installed; but + # by the time this function is being executed, this should + # have been properly checked at a higher level; and if Rmpfr + # is not installed, bits would have been previously set to NULL) + if (!is.null(bits)) { + z = Rmpfr::mpfr((z-mean)/sd, precBits=bits) + a = Rmpfr::mpfr((a-mean)/sd, precBits=bits) + b = Rmpfr::mpfr((b-mean)/sd, precBits=bits) + return(as.numeric((Rmpfr::pnorm(b)-Rmpfr::pnorm(z))/ + (Rmpfr::pnorm(b)-Rmpfr::pnorm(a)))) + } + + # Else, just use standard floating point calculations + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + return((pnorm(b)-pnorm(z))/(pnorm(b)-pnorm(a))) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector, based on +# A UNIFORM APPROXIMATION TO THE RIGHT NORMAL TAIL INTEGRAL, W Bryc +# Applied Mathematics and Computation +# Volume 127, Issues 23, 15 April 2002, Pages 365--374 +# https://math.uc.edu/~brycw/preprint/z-tail/z-tail.pdf + +bryc.tnorm.surv <- function(z, mean=0, sd=1, a, b) { + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + n = length(mean) + + term1 = exp(z*z) + o = a > -Inf + term1[o] = ff(a[o])*exp(-(a[o]^2-z[o]^2)/2) + term2 = rep(0,n) + oo = b < Inf + term2[oo] = ff(b[oo])*exp(-(b[oo]^2-z[oo]^2)/2) + p = (ff(z)-term2)/(term1-term2) + + # Sometimes the approximation can give wacky p-values, + # outside of [0,1] .. + #p[p<0 | p>1] = NA + p = pmin(1,pmax(0,p)) + return(p) +} + +ff <- function(z) { + return((z^2+5.575192695*z+12.7743632)/ + (z^3*sqrt(2*pi)+14.38718147*z*z+31.53531977*z+2*12.77436324)) +} + +############## MODIFIED FUNCTIONS ############### + +# Lasso inference function (for fixed lambda). Note: here we are providing inference +# for the solution of +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 + +oldFixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, + sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE, offset_correction=TRUE) { + + family = match.arg(family) + this.call = match.call() + type = match.arg(type) + + if(family=="binomial") { + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + + else{ + + + + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (type == "full") { + if (p > n) { + # need intercept (if there is one) for debiased lasso + hbeta = beta + if (intercept == T) { + if (length(beta) != p + 1) { + stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1") + } + # remove intercept if included + beta = beta[-1] + } else if (length(beta) != p) { + stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)") + } + } + } else if (length(beta) != p) { + stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)") + } + + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) + # print(tol.coef) + vars = which(abs(beta) > tol.coef) + # print(beta) + # print(vars) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) + else out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) { + if (intercept == T) { + pp=p+1 + Xint <- cbind(rep(1,n),x) + # indices of selected predictors + S = c(1,vars + 1) + notS = which(abs(beta) <= tol.coef) + 1 + } else { + pp=p + Xint <- x + # indices of selected predictors + S = vars + notS = which(abs(beta) <= tol.coef) + } + + + XS = Xint[,S] + hbetaS = hbeta[S] + + # Reorder so that active set S is first + Xordered = Xint[,c(S,notS,recursive=T)] + + hsigma <- 1/n*(t(Xordered)%*%Xordered) + hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] + hsigmaSinv <- pinv(hsigmaS) # solve(hsigmaS) + + # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R + htheta <- InverseLinfty(hsigma, n, verbose=FALSE) + + # 0-padding matrix + FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) + ithetasigma = (diag(pp)-(htheta%*%hsigma)) + + M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + # vector which is offset for testing debiased beta's + meanoffset <- -(((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + if (intercept == T) { + M = M[-1,] # remove intercept row + meanoffset = meanoffset[-1] # remove intercept element + } + if (offset_correction == FALSE) { + meanoffset = 0 * meanoffset + } + } else if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + meanoffset = rep(0,k) + } else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + meanoffset = rep(0,k) + } + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + + a = poly.pval(y,G,u,vj,offset=meanoffset[j],sigma,bits) + pv[j] = a$pv + vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) + + a = poly.int(y,G,u,vj,offset=meanoffset[j],sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call,M=M) + class(out) = "fixedLassoInf" + return(out) + } +} + + +fixedLasso.poly= + function(x, y, beta, lambda, a, inactive = FALSE) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + + if (inactive) { + P = diag(1,nrow(xa)) - xa %*% xap + + G = -rbind( + 1/lambda * t(xac) %*% P, + -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + 1 - t(xac) %*% t(xap) %*% za, + 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } else { + G = -rbind( + # 1/lambda * t(xac) %*% P, + # -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + # 1 - t(xac) %*% t(xap) %*% za, + # 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } + + return(list(G=G,u=u)) + } + + +# Main p-value function + +poly.pval <- function(y, G, u, v, sigma, offset=0, bits=NULL) { + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + pv = tnorm.surv(z,0-offset,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup)) +} + +# Main confidence interval function + +poly.int <- function(y, G, u, v, sigma, alpha, offset=0, gridrange=c(-100,100), + gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(z,x-offset,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} diff --git a/tests/test_debiasing.R b/tests/debiased_lasso/test_debiasing.R similarity index 100% rename from tests/test_debiasing.R rename to tests/debiased_lasso/test_debiasing.R diff --git a/tests/debiased_lasso/test_debiasing_wide.R b/tests/debiased_lasso/test_debiasing_wide.R new file mode 100644 index 00000000..62801da9 --- /dev/null +++ b/tests/debiased_lasso/test_debiasing_wide.R @@ -0,0 +1,202 @@ +library(selectiveInference) + + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, bound=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(bound)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + bound <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + bound.stop <- 0; + try.no <- 1; + incr <- 0; + while ((bound.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, bound, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + bound.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + bound <- bound*resol; + } else { + incr <- 0; + bound <- bound/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + bound <- bound*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + bound.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + bound <- bound/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + bound <- bound*resol; + beta <- last.beta; + bound.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, bound, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + bound0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (bound >= bound0){ + # beta[i] <- (1-bound0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-bound0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,bound)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, bound)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Test + +n = 100; p = 250 + +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n + +debiasing_bound = 0.2 + +tol = 1.e-12 + +rows = as.integer(c(1:2)) + +A1 = debiasingMatrix(S, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +A2 = debiasingMatrix(S / n, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B1 = debiasingMatrix(X, TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) + +C1 = InverseLinfty(S, n, bound=debiasing_bound, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, bound=debiasing_bound, maxiter=1000)[rows,] + +par(mfrow=c(2,3)) + +plot(A1[1,], C1[1,]) +plot(A1[1,], B1[1,]) +plot(B1[1,], C1[1,]) + +plot(A1[1,], A2[1,]) +plot(B1[1,], B2[1,]) +plot(C1[1,], C2[1,]) + +print(c('A', sum(A1[1,] == 0))) +print(c('B', sum(B1[1,] == 0))) +print(c('C', sum(C1[1,] == 0))) + +## Are our points feasible + +feasibility = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(max(abs(G)), debiasing_bound)) +} + +print(c('feasibility A', feasibility(S, A1[1,], 1, debiasing_bound))) +print(c('feasibility B', feasibility(S, B1[1,], 1, debiasing_bound))) +print(c('feasibility C', feasibility(S, C1[1,], 1, debiasing_bound))) + +active_KKT = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + print(which(soln != 0)) + print(G[j]) + return(c(G[soln != 0] * sign(soln)[soln != 0], debiasing_bound)) +} + +print(c('active_KKT A', active_KKT(S, A1[1,], 1, debiasing_bound))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, debiasing_bound))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, debiasing_bound))) + + +print(summary(lm(A1[1,] ~ C1[1,]))) \ No newline at end of file From e3049cffa9892b96e8122cbde31d9e83ebf147bd Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 10:36:58 -0700 Subject: [PATCH 310/396] removing the offset_correction which was just for the comparison --- selectiveInference/R/funs.fixed.R | 5 +---- tests/debiased_lasso/comparison_scaled.R | 13 +++---------- tests/debiased_lasso/comparison_unscaled.R | 13 +++---------- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 7c8318d9..d5e3d648 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -8,7 +8,7 @@ fixedLassoInf <- function(x, y, beta, sigma=NULL, alpha=0.1, type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE, - linesearch.try=10, offset_correction=TRUE) { + linesearch.try=10) { family = match.arg(family) this.call = match.call() @@ -197,9 +197,6 @@ fixedLassoInf <- function(x, y, beta, M = M[-1,] # remove intercept row null_value = null_value[-1] # remove intercept element } - if (!offset_correction) { - null_value = 0 * null_value - } } else if (type=="partial" || p > n) { xa = x[,vars,drop=F] M = pinv(crossprod(xa)) %*% t(xa) diff --git a/tests/debiased_lasso/comparison_scaled.R b/tests/debiased_lasso/comparison_scaled.R index e0c0a6a2..e296bd69 100644 --- a/tests/debiased_lasso/comparison_scaled.R +++ b/tests/debiased_lasso/comparison_scaled.R @@ -19,10 +19,9 @@ p=200 sigma=.5 -theor_lambda = sigma * sqrt(2 * log(p)) -lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) +lambda=c(0.25, 0.5, 1) -for (j in c(3,4,5,1,2)) { +for (j in c(3,2,1)) { thresh = 1e-10 @@ -54,12 +53,8 @@ for (i in 1:nsim) { aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) - cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) - dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) - pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) - pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) cat() } @@ -70,9 +65,7 @@ for (i in 1:nsim) { png(paste('comparison_scaled', j, '.png', sep='')) plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) -plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) -plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) abline(0,1) -legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +legend("bottomright", legend=c("Old", "New"), pch=c(23,24), pt.bg=c("green","purple")) dev.off() } \ No newline at end of file diff --git a/tests/debiased_lasso/comparison_unscaled.R b/tests/debiased_lasso/comparison_unscaled.R index 3bb408e3..eebda685 100644 --- a/tests/debiased_lasso/comparison_unscaled.R +++ b/tests/debiased_lasso/comparison_unscaled.R @@ -19,10 +19,9 @@ p=200 sigma=.5 -theor_lambda = sigma * sqrt(2 * log(p)) -lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) +lambda=c(0.25, 0.5, 1) -for (j in c(3,4,5,1,2)) { +for (j in c(3,2,1)) { thresh = 1e-10 @@ -54,12 +53,8 @@ for (i in 1:nsim) { aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) - cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) - dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) - pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) - pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) cat() } @@ -70,9 +65,7 @@ for (i in 1:nsim) { png(paste('comparison_unscaled', j, '.png', sep='')) plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) -plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) -plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) abline(0,1) -legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +legend("bottomright", legend=c("Old", "New"), pch=c(23,24), pt.bg=c("green","purple")) dev.off() } \ No newline at end of file From fc49dcd96d34040de96feca72174106df42d67d6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 28 Oct 2017 11:13:25 -0700 Subject: [PATCH 311/396] created the matrices for the affine transform, wrapper for calling Gaussian density --- selectiveInference/R/funs.fixed.R | 3 +- selectiveInference/R/funs.randomized.R | 34 ++++ selectiveInference/src/Rcpp-randomized.cpp | 40 +++++ selectiveInference/src/randomized_lasso.c | 2 + selectiveInference/src/randomized_lasso.c~ | 188 +++++++++++++++++++++ selectiveInference/src/randomized_lasso.h | 2 +- selectiveInference/src/randomized_lasso.h~ | 42 +++++ 7 files changed, 308 insertions(+), 3 deletions(-) create mode 100644 selectiveInference/src/Rcpp-randomized.cpp create mode 100644 selectiveInference/src/randomized_lasso.c~ create mode 100644 selectiveInference/src/randomized_lasso.h~ diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index d5e3d648..13d8b5e6 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -435,8 +435,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep last_output = NULL if (is_wide) { - n = nrow(Xinfo) - Xsoln = as.numeric(rep(0, n)) + Xsoln = as.numeric(rep(0, nrow(Xinfo))) } while (counter_idx < max_try) { diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 3a171f79..1da73b2d 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -37,6 +37,7 @@ randomizedLASSO = function(X, if (length(lam) == 1) { lam = rep(lam, p) } + if (length(lam) != p) { stop("Lagrange parameter should be single float or of length ncol(X)") } @@ -65,5 +66,38 @@ randomizedLASSO = function(X, objective_stop, # objective_stop kkt_stop, # kkt_stop param_stop) # param_stop + + + sign_soln = sign(result$soln) + + unpenalized = lam == 0 + active = !unpenalized * (sign_soln != 0) + inactive = !unpenzlied * (sign_soln == 0) + + unpenalized_set = which(unpenalized) + active_set = which(active) + inactive_set = which(inactive) + + coef_term = t(X) %*% X[,c(unpenalized_set, # the coefficients + active_set)] + coef_term = coef_term %*% diag(c(rep(1, sum(unpenalized)), sign_soln[active])) # coefficients are non-negative + coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term + + subgrad_term = cbind(matrix(0, sum(inactive), sum(active) + sum(unpenalized)), + diag(rep(1, sum(inactive)))) + linear_term = rbind(coef_term, + subgrad_term) + + offset_term = rep(0, p) + offset_term[active] = lam[active] * sign_soln[active] + + + + list(active_set = active_set, + inactive_set = inactive_set, + unpenalized_set = unpenalized_set, + sign_soln = sign_soln) + + return(result) } diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp new file mode 100644 index 00000000..7887f327 --- /dev/null +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -0,0 +1,40 @@ +#include // need to include the main Rcpp header file +#include // where densities are defined + +// [[Rcpp::export]] +Rcpp::NumericVector log_density_gaussian_(double noise_scale, // Scale of randomization + Rcpp::NumericMatrix internal_linear, // A_D -- linear part for data + Rcpp::NumericMatrix internal_state, // D -- data state -- matrix of shape (nopt, npts) + Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables + Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) + Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + + int npt = internal_state.ncol(); // Function is vectorized + if (optimization_state.ncol() != npt) { // Assuming each column is an internal or opt state because arrays are column major + Rcpp::stop("Number of optimization samples should equal the number of (internally represented) data."); + } + + int ndim = optimization_linear.nrow(); + if (internal_linear.nrow() != ndim) { + Rcpp::stop("Dimension of optimization range should be the same as the dimension of the data range."); + } + int ninternal = internal_linear.ncol(); + int noptimization = optimization_linear.ncol(); + + Rcpp::NumericVector result(npt); + + int ipt; + for (ipt=0; ipt // for fabs + +// Augmented density for randomized LASSO after +// Gaussian randomization + +// Described in https://arxiv.org/abs/1609.05609 + +// Gaussian is product of IID N(0, noise_scale^2) density +// Evaluated at A_D D + A_O O + h + +// Laplace is product of IID Laplace with scale noise_scale +// Also evaluated at A_D D + A_O O + h + +double log_density_gaussian(double noise_scale, // Scale of randomization + int ndim, // Number of features -- "p" + int ninternal, // Dimension of internal data representation often 1 + int noptimization, // Dimension of optimization variables -- "p" + double *internal_linear, // A_D -- linear part for data + double *internal_state, // D -- data state + double *optimization_linear, // A_O -- linear part for optimization variables + double *optimization_state, // O -- optimization state + double *offset) // h -- offset in affine transform -- "p" dimensional +{ + int irow, icol; + double denom = 2 * noise_scale * noise_scale; + double value = 0; + double reconstruction = 0; + double *offset_ptr; + double *internal_linear_ptr; + double *internal_state_ptr; + double *optimization_linear_ptr; + double *optimization_state_ptr; + + for (irow=0; irow Date: Sat, 28 Oct 2017 11:13:48 -0700 Subject: [PATCH 312/396] unwanted files --- selectiveInference/src/randomized_lasso.c~ | 188 --------------------- selectiveInference/src/randomized_lasso.h~ | 42 ----- 2 files changed, 230 deletions(-) delete mode 100644 selectiveInference/src/randomized_lasso.c~ delete mode 100644 selectiveInference/src/randomized_lasso.h~ diff --git a/selectiveInference/src/randomized_lasso.c~ b/selectiveInference/src/randomized_lasso.c~ deleted file mode 100644 index 123c81de..00000000 --- a/selectiveInference/src/randomized_lasso.c~ +++ /dev/null @@ -1,188 +0,0 @@ -#include // for fabs - -// Augmented density for randomized LASSO after -// Gaussian randomization - -// Described in https://arxiv.org/abs/1609.05609 - -// Gaussian is product of IID N(0, noise_scale^2) density -// Evaluated at A_D D + A_O O + h - -// Laplace is product of IID Laplace with scale noise_scale -// Also evaluated at A_D D + A_O O + h - -double log_density_gaussian(double noise_scale, // Scale of randomization - int ndim, // Number of features -- "p" - int ninternal, // Dimension of internal data representation often 1 - int noptimization, // Dimension of optimization variables -- "p" - double *internal_linear, // A_D -- linear part for data - double *internal_state, // D -- data state - double *optimization_linear, // A_O -- linear part for optimization variables - double *optimization_state, // O -- optimization state - double *offset) // h -- offset in affine transform -- "p" dimensional -{ - int irow, icol; - double denom = 2 * noise_scale * noise_scale; - double value = 0; - double reconstruction = 0; - double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; - double *optimization_linear_ptr; - double *optimization_state_ptr; - - for (irow=0; irow Date: Sat, 28 Oct 2017 11:16:12 -0700 Subject: [PATCH 313/396] wrapper for conditional density --- selectiveInference/src/Rcpp-randomized.cpp | 25 ++++++++++++++++++++++ tests/{ => randomized}/test_randomized.R | 0 2 files changed, 25 insertions(+) rename tests/{ => randomized}/test_randomized.R (100%) diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp index 7887f327..23f07978 100644 --- a/selectiveInference/src/Rcpp-randomized.cpp +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -38,3 +38,28 @@ Rcpp::NumericVector log_density_gaussian_(double noise_scale, return(result); } + +// [[Rcpp::export]] +Rcpp::NumericVector log_density_gaussian_conditional_(double noise_scale, // Scale of randomization + Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables + Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) + Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + + int npt = optimization_state.ncol(); // Function is vectorized + int ndim = optimization_linear.nrow(); + int noptimization = optimization_linear.ncol(); + + Rcpp::NumericVector result(npt); + + int ipt; + for (ipt=0; ipt Date: Sat, 28 Oct 2017 11:49:40 -0700 Subject: [PATCH 314/396] forming internal affine transform --- selectiveInference/R/funs.randomized.R | 58 +++++++++++++++++++------- tests/randomized/test_randomized.R | 5 ++- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 1da73b2d..3b104239 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -71,33 +71,61 @@ randomizedLASSO = function(X, sign_soln = sign(result$soln) unpenalized = lam == 0 - active = !unpenalized * (sign_soln != 0) - inactive = !unpenzlied * (sign_soln == 0) + active = (!unpenalized) & (sign_soln != 0) + inactive = (!unpenalized) & (sign_soln == 0) unpenalized_set = which(unpenalized) active_set = which(active) inactive_set = which(inactive) - coef_term = t(X) %*% X[,c(unpenalized_set, # the coefficients - active_set)] + # affine transform for optimization variables + + E = c(unpenalized_set, active_set) + I = inactive_set + X_E = X[,E] + X_I = X[,I] + L_E = t(X) %*% X[,E] + + coef_term = L_E coef_term = coef_term %*% diag(c(rep(1, sum(unpenalized)), sign_soln[active])) # coefficients are non-negative coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term - subgrad_term = cbind(matrix(0, sum(inactive), sum(active) + sum(unpenalized)), - diag(rep(1, sum(inactive)))) - linear_term = rbind(coef_term, + subgrad_term = matrix(0, p, sum(inactive)) # for subgrad + for (i in 1:sum(inactive)) { + subgrad_term[inactive_set[i], i] = 1 + } + + linear_term = cbind(coef_term, subgrad_term) offset_term = rep(0, p) offset_term[active] = lam[active] * sign_soln[active] - + opt_transform = list(linear_term=linear_term, + offset_term=offset_term) + + # affine transform for internal (data) variables + # for now just use parametric in terms of + # (\bar{\beta}_E, X_{-E}^T(y-X_E\bar{\beta}_E) + # + # we have to reconstruct -X^TY from this pair + # + + active_term = -L_E # for \bar{\beta}_E + + inactive_term = -subgrad_term + linear_term = cbind(active_term, + inactive_term) + offset_term = rep(0, p) + internal_transform = list(linear_term = linear_term, + offset_term = offset_term) + + return(list(active_set = active_set, + inactive_set = inactive_set, + unpenalized_set = unpenalized_set, + sign_soln = sign_soln, + opt_transform = opt_transform, + internal_transform = internal_transform + )) - list(active_set = active_set, - inactive_set = inactive_set, - unpenalized_set = unpenalized_set, - sign_soln = sign_soln) - - - return(result) } diff --git a/tests/randomized/test_randomized.R b/tests/randomized/test_randomized.R index e4e35a12..305139bc 100644 --- a/tests/randomized/test_randomized.R +++ b/tests/randomized/test_randomized.R @@ -8,7 +8,8 @@ test = function() { lam = 20 / sqrt(n) noise_scale = 0.01 * sqrt(n) ridge_term = .1 / sqrt(n) - fit_randomized_lasso(X, y, lam, noise_scale, ridge_term) + selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) } -print(test()) +A=test() +#print(test()) From 9d1c79b17d1f69dbad9c8d017ef01c968da0185d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 28 Oct 2017 12:03:00 -0700 Subject: [PATCH 315/396] vectorized densities evaluate OK -- need to check results --- selectiveInference/R/funs.randomized.R | 2 +- selectiveInference/src/Rcpp-randomized.cpp | 4 ++-- tests/randomized/test_randomized.R | 27 +++++++++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 3b104239..92b871eb 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -124,7 +124,7 @@ randomizedLASSO = function(X, inactive_set = inactive_set, unpenalized_set = unpenalized_set, sign_soln = sign_soln, - opt_transform = opt_transform, + optimization_transform = opt_transform, internal_transform = internal_transform )) diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp index 23f07978..d6e85175 100644 --- a/selectiveInference/src/Rcpp-randomized.cpp +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -7,7 +7,7 @@ Rcpp::NumericVector log_density_gaussian_(double noise_scale, Rcpp::NumericMatrix internal_state, // D -- data state -- matrix of shape (nopt, npts) Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) - Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + Rcpp::NumericVector offset) { // h -- offset in affine transform -- "p" dimensional int npt = internal_state.ncol(); // Function is vectorized if (optimization_state.ncol() != npt) { // Assuming each column is an internal or opt state because arrays are column major @@ -43,7 +43,7 @@ Rcpp::NumericVector log_density_gaussian_(double noise_scale, Rcpp::NumericVector log_density_gaussian_conditional_(double noise_scale, // Scale of randomization Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) - Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + Rcpp::NumericVector offset) { // h -- offset in affine transform -- "p" dimensional int npt = optimization_state.ncol(); // Function is vectorized int ndim = optimization_linear.nrow(); diff --git a/tests/randomized/test_randomized.R b/tests/randomized/test_randomized.R index 305139bc..87a30439 100644 --- a/tests/randomized/test_randomized.R +++ b/tests/randomized/test_randomized.R @@ -1,6 +1,6 @@ library(selectiveInference) -test = function() { +smoke_test = function() { n = 100; p = 50 X = matrix(rnorm(n * p), n, p) @@ -10,6 +10,27 @@ test = function() { ridge_term = .1 / sqrt(n) selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) } +A = smoke_test() -A=test() -#print(test()) +density_test = function() { + + random_lasso = smoke_test() + p = nrow(random_lasso$internal_transform$linear_term) + internal_state = matrix(rnorm(p * 20), p, 20) + optimization_state = matrix(rnorm(p * 20), p, 20) + offset = rnorm(p) + + selectiveInference:::log_density_gaussian_(10., + random_lasso$internal_transform$linear_term, + internal_state, + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) + + selectiveInference:::log_density_gaussian_conditional_(10., + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) +} + +density_test() From 5393dcd64b711aee524929a0c2eb55083b208d2c Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 28 Oct 2017 12:16:27 -0700 Subject: [PATCH 316/396] added laplace densities --- selectiveInference/src/Rcpp-randomized.cpp | 63 ++++++++++++++ tests/randomized/test_randomized.R | 95 +++++++++++++++++++--- 2 files changed, 145 insertions(+), 13 deletions(-) diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp index d6e85175..b2b2cd23 100644 --- a/selectiveInference/src/Rcpp-randomized.cpp +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -63,3 +63,66 @@ Rcpp::NumericVector log_density_gaussian_conditional_(double noise_scale, return(result); } + +// [[Rcpp::export]] +Rcpp::NumericVector log_density_laplace_(double noise_scale, // Scale of randomization + Rcpp::NumericMatrix internal_linear, // A_D -- linear part for data + Rcpp::NumericMatrix internal_state, // D -- data state -- matrix of shape (nopt, npts) + Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables + Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) + Rcpp::NumericVector offset) { // h -- offset in affine transform -- "p" dimensional + + int npt = internal_state.ncol(); // Function is vectorized + if (optimization_state.ncol() != npt) { // Assuming each column is an internal or opt state because arrays are column major + Rcpp::stop("Number of optimization samples should equal the number of (internally represented) data."); + } + + int ndim = optimization_linear.nrow(); + if (internal_linear.nrow() != ndim) { + Rcpp::stop("Dimension of optimization range should be the same as the dimension of the data range."); + } + int ninternal = internal_linear.ncol(); + int noptimization = optimization_linear.ncol(); + + Rcpp::NumericVector result(npt); + + int ipt; + for (ipt=0; ipt Date: Mon, 30 Oct 2017 21:53:04 -0700 Subject: [PATCH 317/396] using C-software submodule for selectiveInference/src --- .gitmodules | 3 + C-software | 1 + Makefile | 4 +- selectiveInference/src/debias.h | 76 --- selectiveInference/src/matrixcomps.c | 261 -------- selectiveInference/src/matrixcomps.h | 12 - selectiveInference/src/quadratic_program.c | 441 ------------- .../src/quadratic_program_wide.c | 590 ------------------ selectiveInference/src/randomized_lasso.c | 190 ------ selectiveInference/src/randomized_lasso.h | 42 -- selectiveInference/src/truncnorm.c | 188 ------ 11 files changed, 7 insertions(+), 1801 deletions(-) create mode 100644 .gitmodules create mode 160000 C-software delete mode 100644 selectiveInference/src/debias.h delete mode 100644 selectiveInference/src/matrixcomps.c delete mode 100644 selectiveInference/src/matrixcomps.h delete mode 100644 selectiveInference/src/quadratic_program.c delete mode 100644 selectiveInference/src/quadratic_program_wide.c delete mode 100644 selectiveInference/src/randomized_lasso.c delete mode 100644 selectiveInference/src/randomized_lasso.h delete mode 100644 selectiveInference/src/truncnorm.c diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..f8073fb9 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "C-software"] + path = C-software + url = https://github.com/selective-inference/C-software.git diff --git a/C-software b/C-software new file mode 160000 index 00000000..a3d9a172 --- /dev/null +++ b/C-software @@ -0,0 +1 @@ +Subproject commit a3d9a1723ce94cb430b5dfd3e058fd708a6bae7f diff --git a/Makefile b/Makefile index e10e1114..5580814b 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,13 @@ Rcpp: Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" install: Rcpp + cp C-software/src/* selectiveInference/src R CMD INSTALL selectiveInference build: + cp C-software/src/* selectiveInference/src R CMD build selectiveInference -check: Rcpp build +check: Rcpp build R CMD build selectiveInference R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable \ No newline at end of file diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h deleted file mode 100644 index d3db26d7..00000000 --- a/selectiveInference/src/debias.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifdef __cplusplus -extern "C" -{ -#endif /* __cplusplus */ - -int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ - double *linear_func_ptr, /* Linear term in objective */ - double *nndef_diag_ptr, /* Diagonal of nndef */ - double *gradient_ptr, /* nndef times theta */ - int *ever_active_ptr, /* Ever active set: 1-based */ - int *nactive_ptr, /* Size of ever active set */ - int nfeature, /* How many features in nndef */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - double *theta_old, /* previous value */ - int maxiter, /* max number of iterations */ - double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol, /* precision for checking relative decrease in objective value */ - double parameter_tol, /* precision for checking relative convergence of parameter */ - int max_active, /* Upper limit for size of active set -- otherwise break */ - int objective_stop, /* Break based on convergence of objective value? */ - int kkt_stop, /* Break based on KKT? */ - int param_stop); /* Break based on parameter convergence? */ - -int check_KKT_qp(double *theta, /* current theta */ - double *gradient_ptr, /* nndef times theta + linear_func */ - int nrow, /* how many rows in nndef */ - double bound, /* Lagrange multipler for \ell_1 */ - double tol); /* precision for checking KKT conditions */ - -int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ - double *X_theta_ptr, /* Fitted values */ - double *linear_func_ptr, /* Linear term in objective */ - double *nndef_diag_ptr, /* Diagonal entries of non-neg def matrix */ - double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ - int *need_update_ptr, /* Keeps track of updated gradient coords */ - int *ever_active_ptr, /* Ever active set: 1-based */ - int *nactive_ptr, /* Size of ever active set */ - int ncase, /* How many rows in X */ - int nfeature, /* How many columns in X */ - double *bound_ptr, /* Lagrange multipliers */ - double ridge_term, /* Ridge / ENet term */ - double *theta_ptr, /* current value */ - double *theta_old_ptr, /* previous value */ - int maxiter, /* max number of iterations */ - double kkt_tol, /* precision for checking KKT conditions */ - double objective_tol, /* precision for checking relative decrease in objective value */ - double parameter_tol, /* precision for checking relative convergence of parameter */ - int max_active, /* Upper limit for size of active set -- otherwise break */ - int objective_stop, /* Break based on convergence of objective value? */ - int kkt_stop, /* Break based on KKT? */ - int param_stop); /* Break based on parameter convergence? */ - -int check_KKT_wide(double *theta_ptr, /* current theta */ - double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ - double *X_theta_ptr, /* Current fitted values */ - double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ - double *linear_func_ptr, /* Linear term in objective */ - int *need_update_ptr, /* Which coordinates need to be updated? */ - int nfeature, /* how many columns in X */ - int ncase, /* how many rows in X */ - double *bound_ptr, /* Lagrange multiplers for \ell_1 */ - double ridge_term, /* Ridge / ENet term */ - double tol); /* precision for checking KKT conditions */ - -void update_gradient_wide(double *gradient_ptr, /* X^TX/ncase times theta + linear_func */ - double *X_theta_ptr, /* Current fitted values */ - double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ - double *linear_func_ptr, /* Linear term in objective */ - int *need_update_ptr, /* Which coordinates need to be updated? */ - int nfeature, /* how many columns in X */ - int ncase); /* how many rows in X */ - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ diff --git a/selectiveInference/src/matrixcomps.c b/selectiveInference/src/matrixcomps.c deleted file mode 100644 index bec35060..00000000 --- a/selectiveInference/src/matrixcomps.c +++ /dev/null @@ -1,261 +0,0 @@ -#include -#include - -// Matrices are stored as vectors, in column-major order - -// Givens rotation of a and b, stored in c and s -void givens(double a, double b, double *c, double *s) { - if (b==0) { - *c = 1; - *s = 0; - } - else { - if (fabs(b)>fabs(a)) { - double t = -a/b; - *s = 1/sqrt(1+t*t); - *c = (*s)*t; - } - else { - double t = -b/a; - *c = 1/sqrt(1+t*t); - *s = (*c)*t; - } - } -} - -// Givens rotation applied to rows i1 and i2 of the m x n -// matrix A, on the subset of columns j1 through j2 -void rowrot(double *A, int i1, int i2, int m, int n, int j1, int j2, double c, double s) { - int j; - double t1,t2; - for (j=j1; j<=j2; j++) { - t1 = A[i1+j*m]; - t2 = A[i2+j*m]; - A[i1+j*m] = c*t1-s*t2; - A[i2+j*m] = s*t1+c*t2; - } -} - -// Givens rotation applied to columns j1 and j2 of the m x n -// matrix A, on the subset of rows i1 through i2 -void colrot(double *A, int j1, int j2, int m, int n, int i1, int i2, double c, double s) { - int i; - double t1,t2; - for (i=i1; i<=i2; i++) { - t1 = A[i+j1*m]; - t2 = A[i+j2*m]; - A[i+j1*m] = c*t1-s*t2; - A[i+j2*m] = s*t1+c*t2; - } -} - -// Downdate the QR factorization after deleting column j0, -// where Q1 is m x n and R is n x n. The other part of -// the Q matrix, Q2 m x (m-n), isn't needed so it isn't -// passed for efficiency -void downdate1(double *Q1, double *R, int j0, int m, int n) { - int j; - - double c,s; - for (j=j0+1; j=1; j--) { - // Compute the appropriate c and s - givens(w[j-1],w[j],&c,&s); - - // Pre-multiply w - rowrot(w,j-1,j,k,1,0,0,c,s); - - // Post-multiply Q2 - colrot(Q2,j-1,j,m,k,0,m-1,c,s); - } -} - -// Downdate the QR factorization after deleting the first row, -// where Q is m x m and R is m x n -void downdate2(double *Q, double *R, int *mp, int *np) { - int m,n,i; - m = *mp; - n = *np; - - double c,s; - for (i=m-1; i>=1; i--) { - // Compute the appropriate c and s - givens(Q[(i-1)*m],Q[i*m],&c,&s); - - // Post-mutiply Q - colrot(Q,i-1,i,m,m,0,m-1,c,s); - - // Pre-multiply R - if (i<=n) rowrot(R,i-1,i,m,n,i-1,n-1,c,s); - } -} - -// Update the QR factorization after adding the last row, -// where Q is m x m and R is m x n. For efficiency, Q is not -// passed, and only the first row of R is passed. Not counting -// its first row, the first q columns of R are zero -void update2(double *y, double *D, double *r, int *mp, int *np, int *qp) { - int m,n,q,j; - m = *mp; - n = *np; - q = *qp; - - double c,s; - for (j=0; j=0; i--) { - for (j=i; j=0; i--) { - for (j=i; j=0; i--) { - // Compute the appropriate c and s - givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); - - // Post-multiply R - colrot(R,i+q+1,i+q,m2,n,0,i,c,s); - - // Post-multiply D - colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); - - // Pre-multiply y - rowrot(y,i+q+1,i+q,n,1,0,0,c,s); - } -} - -// Make the R factor upper triangular, by Givens rotating -// its columns and rows, appropriately. Here A is m1 x n, -// Q is m2 x m2, and R is m2 x n with rank(R) = n-q-1. The -// first q columns of R are zero. The kth row of R is the -// last row with a zero element on the diagonal -void maketri4(double *y, double *A, double *Q, double *R, int *m1p, int *m2p, int *np, int *qp, int *kp) { - int m1,m2,n,q,k,i,j; - m1 = *m1p; - m2 = *m2p; - n = *np; - q = *qp; - k = *kp; - - double c,s; - - // First rotate the columns - for (i=k-1; i>=0; i--) { - // Compute the appropriate c and s - givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); - - // Post-multiply R - colrot(R,i+q+1,i+q,m2,n,0,i,c,s); - - // Post-multiply D - colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); - - // Pre-multiply y - rowrot(y,i+q+1,i+q,n,1,0,0,c,s); - } - - // Next rotate the rows - for (j=k+q+1; j // for fabs - -// Find an approximate row of \hat{nndef}^{-1} - -// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf - -// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 -// where l is `linear_func` below - -// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf -// Therefore we don't have to negate the answer to get theta. -// Update one coordinate - -double objective_qp(double *nndef_ptr, /* A non-negative definite matrix */ - double *linear_func_ptr, /* Linear term in objective */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in nndef */ - double bound, /* Lagrange multipler for \ell_1 */ - double *theta_ptr) /* current value */ -{ - int irow, icol; - double value = 0; - double *nndef_ptr_tmp = nndef_ptr; - double *linear_func_ptr_tmp = linear_func_ptr; - double *theta_row_ptr, *theta_col_ptr; - int *active_row_ptr, *active_col_ptr; - int active_row, active_col; - int nactive = *nactive_ptr; - - theta_row_ptr = theta_ptr; - theta_col_ptr = theta_ptr; - - for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { - return(0); - } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { - return(0); - } - } - else { - if (fabs(gradient) > (1. + tol) * bound) { - return(0); - } - } - } - - return(1); -} - -int check_KKT_qp_active(int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - double *theta_ptr, /* current theta */ - double *gradient_ptr, /* nndef times theta + linear_func */ - int nfeature, /* how many features in nndef */ - double bound, /* Lagrange multipler for \ell_1 */ - double tol) /* precision for checking KKT conditions */ -{ - // First check inactive - - int iactive; - double *theta_ptr_tmp; - double gradient; - double *gradient_ptr_tmp; - int nactive = *nactive_ptr; - int active_feature; - int *active_feature_ptr; - - for (iactive=0; iactive 0) && (fabs(gradient + bound) > tol * bound)) { - return(0); - } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { - return(0); - } - - } - else { - if (fabs(gradient) > (1. + tol) * bound) { - return(0); - } - } - } - - return(1); -} - - -double update_one_coord_qp(double *nndef_ptr, /* A non-negative definite matrix */ - double *linear_func_ptr, /* Linear term in objective */ - double *nndef_diag_ptr, /* Diagonal of nndef */ - double *gradient_ptr, /* nndef times theta + linear_func */ - int *ever_active_ptr, /* Ever active set: 1-based */ - int *nactive_ptr, /* Size of ever active set */ - int nfeature, /* How many features in nndef */ - double bound, /* feasibility parameter */ - double *theta_ptr, /* current value */ - int coord, /* which coordinate to update: 0-based */ - int is_active) /* Is this coord in ever_active */ -{ - - double delta; - double linear_term = 0; - double value = 0; - double old_value; - double *nndef_ptr_tmp; - double *gradient_ptr_tmp; - double *theta_ptr_tmp; - int icol = 0; - - double *quadratic_ptr = ((double *) nndef_diag_ptr + coord); - double quadratic_term = *quadratic_ptr; - - gradient_ptr_tmp = ((double *) gradient_ptr + coord); - linear_term = *gradient_ptr_tmp; - - theta_ptr_tmp = ((double *) theta_ptr + coord); - old_value = *theta_ptr_tmp; - - // The coord entry of gradient_ptr term has a diagonal term in it: - // nndef[coord, coord] * theta[coord] - // This removes it. - - linear_term -= quadratic_term * old_value; - - // Now soft-threshold the coord entry of theta - - // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| - // with q=quadratic_term and l=linear_term - - // With a negative linear term, solution should be - // positive - - if (linear_term < -bound) { - value = (-linear_term - bound) / quadratic_term; - } - else if (linear_term > bound) { - value = -(linear_term - bound) / quadratic_term; - } - - // Add to active set if necessary - - if ((is_active == 0) && (value != 0)) { - update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); - } - - // Update the linear term - - if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { - - delta = value - old_value; - nndef_ptr_tmp = ((double *) nndef_ptr + coord * nfeature); - gradient_ptr_tmp = ((double *) gradient_ptr); - - for (icol=0; icol 0)) { - break; - } - old_value = new_value; - } - - } - - // Check size of active set - - if (*nactive_ptr >= max_active) { - break; - } - - } - return(iter); -} - diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c deleted file mode 100644 index 41e29cec..00000000 --- a/selectiveInference/src/quadratic_program_wide.c +++ /dev/null @@ -1,590 +0,0 @@ -#include // for fabs - -// Find an approximate row of \hat{nndef}^{-1} - -// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf - -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n + l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 -// where l is `linear_func` below - -// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf -// Therefore we don't have to negate the answer to get theta. -// Update one coordinate - -// Throughout X is a design matrix - -double objective_wide(double *X_theta_ptr, /* Fitted values */ - double *linear_func_ptr, /* Linear term in objective */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int ncase, /* how many rows in X */ - int nfeature, /* how many columns in X */ - double *bound_ptr, /* Lagrange multiplers for \ell_1 */ - double ridge_term, /* Ridge / ENet term */ - double *theta_ptr) /* current value */ -{ - int icase, iactive; - double value = 0; - double *bound_ptr_tmp; - double *X_theta_ptr_tmp = X_theta_ptr; - double *linear_func_ptr_tmp = linear_func_ptr; - double *theta_ptr_tmp; - int *active_feature_ptr; - int active_feature; - int nactive = *nactive_ptr; - - // The term \|X\theta\|^2_2/n, with n=ncase - - for (icase=0; icase 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { - return(0); - } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { - return(0); - } - - } - else if (bound != 0) { - if (fabs(gradient) > (1. + tol) * bound) { - return(0); - } - } - } - - return(1); -} - -int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - double *theta_ptr, /* current theta */ - double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ - double *X_theta_ptr, /* Current fitted values */ - double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ - double *linear_func_ptr, /* Linear term in objective */ - int *need_update_ptr, /* Which coordinates need to be updated? */ - int nfeature, /* how many columns in X */ - int ncase, /* how many rows in X */ - double *bound_ptr, /* Lagrange multipliers for \ell_1 */ - double ridge_term, /* Ridge / ENet term */ - double tol) /* precision for checking KKT conditions */ -{ - // First check inactive - - int iactive; - double *theta_ptr_tmp; - double *bound_ptr_tmp; - double bound; - double gradient; - int nactive = *nactive_ptr; - int active_feature; - int *active_feature_ptr; - - for (iactive=0; iactive 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { - return(0); - } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { - return(0); - } - - } - else if (bound != 0) { - if (fabs(gradient) > (1. + tol) * bound) { - return(0); - } - } - } - - return(1); -} - -double update_one_coord_wide(double *X_ptr, /* A design matrix*/ - double *linear_func_ptr, /* Linear term in objective */ - double *nndef_diag_ptr, /* Diagonal of nndef */ - double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ - int *ever_active_ptr, /* Ever active set: 1-based */ - int *nactive_ptr, /* Size of ever active set */ - double *X_theta_ptr, /* X\theta -- fitted values */ - int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ - int ncase, /* How many rows in X */ - int nfeature, /* How many rows in X */ - double *bound_ptr, /* Lagrange multipliers */ - double ridge_term, /* Ridge / ENet term */ - double *theta_ptr, /* current value */ - int coord, /* which coordinate to update: 0-based */ - int is_active) /* Is this coord in ever_active */ -{ - - double delta; - double linear_term = 0; - double value = 0; - double old_value; - double *X_ptr_tmp; - double *X_theta_ptr_tmp; - int *need_update_ptr_tmp; - double *theta_ptr_tmp; - double *bound_ptr_tmp; - double bound; - int ifeature, icase; - - double *diagonal_ptr = ((double *) nndef_diag_ptr + coord); - double diagonal_entry = *diagonal_ptr; - - linear_term = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, coord, ncase); - - theta_ptr_tmp = ((double *) theta_ptr + coord); - old_value = *theta_ptr_tmp; - - bound_ptr_tmp = ((double *) bound_ptr + coord); - bound = *bound_ptr_tmp; - - // The coord entry of gradient_ptr term has a diagonal term in it: - // (X^TX)[coord, coord] * theta[coord] / ncase - // This removes it. - - linear_term -= diagonal_entry * old_value; - - // Now soft-threshold the coord entry of theta - - // Objective is t \mapsto (q+eps)/2 * t^2 + l * t + bound |t| + - // with q=diagonal_entry and l=linear_term and eps=ridge_Term - - // With a negative linear term, solution should be - // positive - - if (linear_term < -bound) { - value = (-linear_term - bound) / (diagonal_entry + ridge_term); - } - else if (linear_term > bound) { - value = -(linear_term - bound) / (diagonal_entry + ridge_term); - } - - // Add to active set if necessary - - if ((is_active == 0) && (value != 0)) { - update_ever_active_wide(coord, ever_active_ptr, nactive_ptr); - } - - // Update X\theta if changed - - if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { - - // Set the update_gradient_ptr to 1 - - need_update_ptr_tmp = need_update_ptr; - for (ifeature=0; ifeature 0)) { - break; - } - old_value = new_value; - } - } - - // Check size of active set - - if (*nactive_ptr >= max_active) { - break; - } - - } - return(iter); -} - diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c deleted file mode 100644 index 1f396b16..00000000 --- a/selectiveInference/src/randomized_lasso.c +++ /dev/null @@ -1,190 +0,0 @@ -#include // for fabs - -// Augmented density for randomized LASSO after -// Gaussian randomization - -// Described in https://arxiv.org/abs/1609.05609 - -// Gaussian is product of IID N(0, noise_scale^2) density -// Evaluated at A_D D + A_O O + h - -// Laplace is product of IID Laplace with scale noise_scale -// Also evaluated at A_D D + A_O O + h - -// Matrices are assumed in column major order! - -double log_density_gaussian(double noise_scale, // Scale of randomization - int ndim, // Number of features -- "p" - int ninternal, // Dimension of internal data representation often 1 - int noptimization, // Dimension of optimization variables -- "p" - double *internal_linear, // A_D -- linear part for data - double *internal_state, // D -- data state - double *optimization_linear, // A_O -- linear part for optimization variables - double *optimization_state, // O -- optimization state - double *offset) // h -- offset in affine transform -- "p" dimensional -{ - int irow, icol; - double denom = 2 * noise_scale * noise_scale; - double value = 0; - double reconstruction = 0; - double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; - double *optimization_linear_ptr; - double *optimization_state_ptr; - - for (irow=0; irow -#include - -// Take a Gibbs hit and run step along a given direction - -// Assumes the covariance is identity - -void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ - double *direction, /* direction we will take Gibbs step */ - double *U, /* A %*% state - b */ - double *alpha, /* A %*% direction */ - int nconstraint, /* number of rows of A */ - int nstate) /* dimension of state */ -{ - - int istate; - double value = 0; - - /* Compute V=\eta^Ty */ - - for (istate = 0; istate < nstate; istate++) { - value += direction[istate] * state[istate]; - } - - /* Compute upper and lower bounds */ - - double lower_bound = -1e12; - double upper_bound = 1e12; - double bound_val = 0; - double tol=1.e-7; - int iconstraint; - - for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { - - bound_val = -U[iconstraint] / alpha[iconstraint] + value; - - if ((alpha[iconstraint] > tol) && - (bound_val < upper_bound)) { - upper_bound = bound_val; - } - else if ((alpha[iconstraint] < -tol) && - (bound_val > lower_bound)) { - lower_bound = bound_val; - } - - } - - /* Ensure constraints are satisfied */ - - if (lower_bound > value) { - lower_bound = value - tol; - } - else if (upper_bound < value) { - upper_bound = value + tol; - } - - /* Check to see if constraints are satisfied */ - - /* if (lower_bound > upper_bound) { - - }*/ - - /* Now, take a step */ - - double tnorm; /* the 1D gaussian variable */ - double cdfU, cdfL, unif; /* temp variables */ - - if (upper_bound < -10) { - - /* use Exp approximation */ - /* the approximation is that */ - /* Z | lower_bound < Z < upper_bound */ - /* is fabs(upper_bound) * (upper_bound - Z) = E approx Exp(1) */ - /* so Z = upper_bound - E / fabs(upper_bound) */ - /* and the truncation of the exponential is */ - /* E < fabs(upper_bound - lower_bound) * fabs(upper_bound) = D */ - - /* this has distribution function (1 - exp(-x)) / (1 - exp(-D)) */ - /* so to draw from this distribution */ - /* we set E = - log(1 - U * (1 - exp(-D))) where U is Unif(0,1) */ - /* and Z (= tnorm below) is as stated */ - - unif = runif(0., 1.) * (1 - exp(-fabs((lower_bound - upper_bound) * upper_bound))); - tnorm = (upper_bound + log(1 - unif) / fabs(upper_bound)); - } - else if (lower_bound > 10) { - - /* here Z = lower_bound + E / fabs(lower_bound) (though lower_bound is positive) */ - /* and D = fabs((upper_bound - lower_bound) * lower_bound) */ - - unif = runif(0., 1.) * (1 - exp(-fabs((upper_bound - lower_bound) * lower_bound))); - tnorm = (lower_bound - log(1 - unif) / lower_bound); - } - else if (lower_bound < 0) { - cdfL = pnorm(lower_bound, 0., 1., 1, 0); - cdfU = pnorm(upper_bound, 0., 1., 1, 0); - unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; - if (unif < 0.5) { - tnorm = qnorm(unif, 0., 1., 1, 0); - } - else { - tnorm = -qnorm(1-unif, 0., 1., 1, 0); - } - } - else { - cdfL = pnorm(-lower_bound, 0., 1., 1, 0); - cdfU = pnorm(-upper_bound, 0., 1., 1, 0); - unif = runif(0., 1.) * (cdfL - cdfU) + cdfU; - if (unif < 0.5) { - tnorm = -qnorm(unif, 0., 1., 1, 0); - } - else { - tnorm = qnorm(1-unif, 0., 1., 1, 0); - } - } - - /* Now update the state and U */ - - double delta = tnorm - value; - - for (istate = 0; istate < nstate; istate++) { - state[istate] += delta * direction[istate]; - } - for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { - U[iconstraint] += delta * alpha[iconstraint] ; - } - - /* End of gibbs_step */ - -} - -void sample_truncnorm_white(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ - double *U, /* A %*% state - b */ - double *directions, /* possible steps for sampler to take */ - /* assumed to be stored as list of columns of dimension nstate */ - /* has shape (nstate, ndirection) */ - double *alphas, /* The matrix A %*% directions */ - /* has shape (nconstraint, ndirection) */ - double *output, /* array in which to store samples */ - /* assumed will stored as list of vectors of dimension nstate */ - /* has shape (nstate, ndraw) */ - int *pnconstraint, /* number of rows of A */ - int *pndirection, /* the possible number of directions to choose from */ - /* `directions` should have size nstate*ndirection */ - int *pnstate, /* dimension of state */ - int *pburnin, /* number of burnin steps */ - int *pndraw) /* total number of samples to return */ -{ - - int iter_count; - int which_direction; - - int nconstraint = *pnconstraint; - int ndirection = *pndirection; - int nstate = *pnstate; - int burnin = *pburnin; - int ndraw = *pndraw; - - double *direction, *alpha; - - for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { - - which_direction = (int) floor(runif(0., 1.) * ndirection); - direction = ((double *) directions) + nstate * which_direction; - alpha = ((double *) alphas) + nconstraint * which_direction; - - /* take a step, which implicitly updates `state` and `U` */ - - gibbs_step(state, - direction, - U, - alpha, - nconstraint, - nstate); - - /* Store result if after burnin */ - - int istate; - if (iter_count >= burnin) { - for (istate = 0; istate < nstate; istate++) { - *output = state[istate]; - output++; - } - } - } - -} - From 39b967577e8e642f8da2c1d36277c0a6f47f0b6b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Oct 2017 21:54:41 -0700 Subject: [PATCH 318/396] updating travis script --- .travis.yml | 4 ++++ Makefile | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 541fa3a4..c324f88e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,5 +12,9 @@ warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? - R -e 'install.packages(c("Rcpp", "intervals"), repos="http://cloud.r-project.org")' + - cd C-software + - git submodule init + - git submodule update + - make src - make Rcpp - cd selectiveInference diff --git a/Makefile b/Makefile index 5580814b..671099fc 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,15 @@ Rcpp: - rm -f selectiveInference/R/RcppExports.R Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" -install: Rcpp - cp C-software/src/* selectiveInference/src +install: Rcpp src R CMD INSTALL selectiveInference -build: - cp C-software/src/* selectiveInference/src +build: src R CMD build selectiveInference +src: + cp C-software/src/* selectiveInference/src + check: Rcpp build R CMD build selectiveInference R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable \ No newline at end of file From cd918b1fa35e4721dafcc20e97ffc882a1edf21a Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 11 Oct 2017 07:38:07 -0700 Subject: [PATCH 319/396] variable rename --- selectiveInference/R/funs.fixedCox.R | 12 ++++++------ selectiveInference/R/funs.fixedLogit.R | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index 2fa8c083..9f35c950 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -29,7 +29,7 @@ if( sum(status==0)+sum(status==1)!=length(y)) stop("status vector must have valu vars=which(m) if(sum(m)>0){ bhat=beta[beta!=0] #penalized coefs just for active variables - s2=sign(bhat) + sign_bhat=sign(bhat) #check KKT @@ -40,7 +40,7 @@ if(sum(m)>0){ res=residuals(aaa,type="score") if(!is.matrix(res)) res=matrix(res,ncol=1) scor=colSums(res) - g=(scor+lambda*s2)/(2*lambda) + g=(scor+lambda*sign_bhat)/(2*lambda) # cat(c(g,lambda,tol.kkt),fill=T) if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", @@ -49,9 +49,9 @@ scor=colSums(res) # Hessian of partial likelihood at the LASSO solution MM=vcov(aaa) -bbar=(bhat+lambda*MM%*%s2) -A1=-(mydiag(s2)) -b1= -(mydiag(s2)%*%MM)%*%s2*lambda +bbar=(bhat+lambda*MM%*%sign_bhat) +A1=-(mydiag(sign_bhat)) +b1= -(mydiag(sign_bhat)%*%MM)%*%sign_bhat*lambda temp=max(A1%*%bbar-b1) @@ -63,7 +63,7 @@ b1= -(mydiag(s2)%*%MM)%*%s2*lambda # the one sided p-values are a bit off for(jj in 1:length(bbar)){ - vj=rep(0,length(bbar));vj[jj]=s2[jj] + vj=rep(0,length(bbar));vj[jj]=sign_bhat[jj] junk=TG.pvalue(bbar, A1, b1, vj,MM) diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 4ab33980..75a31e0a 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -32,7 +32,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet m=beta[-1]!=0 #active set bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars - s2=sign(bhat) + sign_bhat=sign(bhat) lam2m=diag(c(0,rep(lambda,sum(m)))) @@ -66,14 +66,14 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet # MM=solve(t(xxm)%*%w%*%xxm) MM=solve(scale(t(xxm),F,1/ww)%*%xxm) gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized - # at exact LASSO solution it should be s2[-1] + # at exact LASSO solution it should be sign_bhat[-1] dbeta = MM %*% gm - # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... + # bbar=(bhat+lam2m%*%MM%*%sign_bhat) # JT: this is wrong, shouldn't use sign of intercept anywhere... bbar = bhat - dbeta - A1=-(mydiag(s2))[-1,] - b1= (s2 * dbeta)[-1] + A1=-(mydiag(sign_bhat))[-1,] + b1= (sign_bhat * dbeta)[-1] tol.poly = 0.01 if (max((A1 %*% bbar) - b1) > tol.poly) @@ -87,7 +87,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet for(jj in 1:sum(m)){ - vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] + vj=c(rep(0,sum(m)+1));vj[jj+1]=sign_bhat[jj+1] # compute p-values junk=TG.pvalue(bbar, A1, b1, vj, MM) pv[jj] = junk$pv From ac2bbc82abd7a7e530f44a1be54d01f5ca940c22 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 25 Oct 2017 14:06:10 -0700 Subject: [PATCH 320/396] NF: density functions for randomized LASSO --- selectiveInference/src/randomized_lasso.c | 201 ++++++++++++++++++++++ selectiveInference/src/randomized_lasso.h | 42 +++++ 2 files changed, 243 insertions(+) create mode 100644 selectiveInference/src/randomized_lasso.c create mode 100644 selectiveInference/src/randomized_lasso.h diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c new file mode 100644 index 00000000..ac55cf48 --- /dev/null +++ b/selectiveInference/src/randomized_lasso.c @@ -0,0 +1,201 @@ +#include // for fabs + +// Augmented density for randomized LASSO after +// Gaussian randomization + +// Described in https://arxiv.org/abs/1609.05609 + +// Gaussian is product of IID N(0, noise_scale^2) density +// Evaluated at A_D D + A_O O + h + +// Laplace is product of IID Laplace with scale noise_scale +// Also evaluated at A_D D + A_O O + h + +double log_density_gaussian(double noise_scale, // Scale of randomization + int ndim, // Number of features -- "p" + int ninternal, // Dimension of internal data representation often 1 + int noptimization, // Dimension of optimization variables -- "p" + double *internal_linear, // A_D -- linear part for data + double *internal_state, // D -- data state + double *optimization_linear, // A_O -- linear part for optimization variables + double *optimization_state, // O -- optimization state + double *offset) // h -- offset in affine transform -- "p" dimensional +{ + int irow, icol; + double denom = 2 * noise_scale * noise_scale; + double value = 0; + double reconstruction = 0; + double *offset_ptr; + double *internal_linear_ptr; + double *internal_state_ptr; + double *optimization_linear_ptr; + double *optimization_state_ptr; + + for (irow=0; irow Date: Wed, 25 Oct 2017 16:35:52 -0700 Subject: [PATCH 321/396] T to TRUE --- selectiveInference/R/funs.fixed.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index be314af1..66a30887 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -6,7 +6,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"), intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, - type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) { @@ -150,7 +150,7 @@ fixedLassoInf <- function(x, y, beta, ci = tailarea = matrix(0,k,2) if (type=="full" & p > n) { - if (intercept == T) { + if (intercept == TRUE) { pp=p+1 Xint <- cbind(rep(1,n),x) # indices of selected predictors From 49d0b67884454543899680b91db1fed6c67e29a0 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 13:08:05 -0700 Subject: [PATCH 322/396] updated wide C code -- can now do ridge as well as variable weights -- ready to solve randomized LASSO --- selectiveInference/R/funs.fixed.R | 40 +++- selectiveInference/src/Rcpp-debias.cpp | 35 +++- selectiveInference/src/debias.h | 22 ++- selectiveInference/src/quadratic_program.c | 108 +++++++---- .../src/quadratic_program_wide.c | 176 ++++++++++++------ selectiveInference/src/randomized_lasso.c | 13 -- 6 files changed, 270 insertions(+), 124 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 66a30887..aaceaaad 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -189,8 +189,10 @@ fixedLassoInf <- function(x, y, beta, } M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + # vector which is offset for testing debiased beta's null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + if (intercept == T) { M = M[-1,] # remove intercept row null_value = null_value[-1] # remove intercept element @@ -238,12 +240,23 @@ fixedLassoInf <- function(x, y, beta, tailarea[j,] = a$tailarea } - out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, - vars=vars,sign=sign_vars,sigma=sigma,alpha=alpha, - sd=sigma*sqrt(rowSums(vmat^2)), - coef0=vmat%*%y, - call=this.call) + out = list(type=type, + lambda=lambda, + pv=pv, + ci=ci, + tailarea=tailarea, + vlo=vlo, + vup=vup, + vmat=vmat, + y=y, + vars=vars, + sign=sign_vars, + sigma=sigma, + alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call) + class(out) = "fixedLassoInf" return(out) } @@ -374,7 +387,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep row, mu, linesearch=TRUE, # do a linesearch? - scaling_factor=1.2, # multiplicative factor for linesearch + scaling_factor=1.5, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? @@ -420,11 +433,15 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, - max_active) + max_active, + FALSE, # objective_stop + FALSE, # kkt_stop + TRUE) # param_stop } else { Xsoln = rep(0, nrow(Xinfo)) result = solve_QP_wide(Xinfo, # this is a design matrix - mu, + rep(mu, p), # vector of Lagrange multipliers + 0, # ridge_term max_iter, soln, linear_func, @@ -434,7 +451,10 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, - max_active) + max_active, + FALSE, # objective_stop + FALSE, # kkt_stop + TRUE) # param_stop } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index ce8bb156..24bbae88 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -15,7 +15,10 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, - int max_active + int max_active, + int objective_stop, + int kkt_stop, + int param_stop ) { int nrow = Sigma.nrow(); // number of features @@ -28,6 +31,8 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::NumericVector Sigma_diag(nrow); double *sigma_diag_p = Sigma_diag.begin(); + Rcpp::NumericVector theta_old(nrow); + for (irow=0; irow= max_active); diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index ebcbc933..052af7a1 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -9,13 +9,17 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ double *gradient_ptr, /* nndef times theta */ int *ever_active_ptr, /* Ever active set: 1-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in nndef */ + int nfeature, /* How many features in nndef */ double bound, /* feasibility parameter */ double *theta, /* current value */ + double *theta_old, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop); /* Break based on parameter convergence? */ int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* nndef times theta + linear_func */ @@ -33,12 +37,17 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int *nactive_ptr, /* Size of ever active set */ int ncase, /* How many rows in X */ int nfeature, /* How many columns in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active); /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop); /* Break based on parameter convergence? */ int check_KKT_wide(double *theta_ptr, /* current theta */ double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ @@ -48,8 +57,9 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ - double tol); /* precision for checking KKT conditions */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double tol); /* precision for checking KKT conditions */ void update_gradient_wide(double *gradient_ptr, /* X^TX/ncase times theta + linear_func */ double *X_theta_ptr, /* Current fitted values */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index d9bd0170..1bc7fa34 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -92,19 +92,19 @@ int update_ever_active_qp(int coord, int check_KKT_qp(double *theta_ptr, /* current theta */ double *gradient_ptr, /* nndef times theta + linear_func */ - int nrow, /* how many rows in nndef */ + int nfeature, /* how many features in nndef */ double bound, /* Lagrange multipler for \ell_1 */ double tol) /* precision for checking KKT conditions */ { // First check inactive - int irow; + int ifeature; double *theta_ptr_tmp, *gradient_ptr_tmp; double gradient; - for (irow=0; irow 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; - nndef_ptr_tmp = ((double *) nndef_ptr + coord * nrow); + nndef_ptr_tmp = ((double *) nndef_ptr + coord * nfeature); gradient_ptr_tmp = ((double *) gradient_ptr); - for (icol=0; icol= max_active) { @@ -380,12 +417,12 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ // Check relative decrease of objective - if (check_objective) { + if (objective_stop) { new_value = objective_qp(nndef_ptr, linear_func_ptr, ever_active_ptr, nactive_ptr, - nrow, + nfeature, bound, theta); @@ -394,6 +431,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ } old_value = new_value; } + } return(iter); } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3beb578a..c6cb9f3f 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf @@ -19,11 +19,13 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ int *nactive_ptr, /* Size of ever active set */ int ncase, /* how many rows in X */ int nfeature, /* how many columns in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr) /* current value */ { int icase, iactive; double value = 0; + double *bound_ptr_tmp; double *X_theta_ptr_tmp = X_theta_ptr; double *linear_func_ptr_tmp = linear_func_ptr; double *theta_ptr_tmp; @@ -55,8 +57,12 @@ double objective_wide(double *X_theta_ptr, /* Fitted values */ // The \ell_1 term - value += bound * fabs((*theta_ptr_tmp)); + bound_ptr_tmp = ((double *) bound_ptr + active_feature); + value += (*bound_ptr_tmp) * fabs((*theta_ptr_tmp)); + // The ridge term + + value += 0.5 * ridge_term * (*theta_ptr_tmp) * (*theta_ptr_tmp); } return(value); @@ -167,34 +173,39 @@ int check_KKT_wide(double *theta_ptr, /* current theta */ int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double tol) /* precision for checking KKT conditions */ { // First check inactive int ifeature; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; double gradient; for (ifeature=0; ifeature 0) && (fabs(gradient + bound) > tol * bound)) { + if ((*theta_ptr_tmp != 0) && (bound != 0)) { // these coordinates of gradients should be equal to -bound + + if ((*theta_ptr_tmp > 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { return(0); } - + } - else { + else if (bound != 0) { if (fabs(gradient) > (1. + tol) * bound) { return(0); } @@ -214,13 +225,16 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- int *need_update_ptr, /* Which coordinates need to be updated? */ int nfeature, /* how many columns in X */ int ncase, /* how many rows in X */ - double bound, /* Lagrange multipler for \ell_1 */ + double *bound_ptr, /* Lagrange multipliers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ double tol) /* precision for checking KKT conditions */ { // First check inactive int iactive; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; double gradient; int nactive = *nactive_ptr; int active_feature; @@ -230,23 +244,26 @@ int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0- active_feature_ptr = ((int *) ever_active_ptr + iactive); active_feature = *active_feature_ptr - 1; // Ever-active is 1-based + theta_ptr_tmp = ((double *) theta_ptr + active_feature); + bound_ptr_tmp = ((double *) bound_ptr + active_feature); + bound = *bound_ptr_tmp; // Compute this coordinate of the gradient gradient = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, active_feature, ncase); - if (*theta_ptr_tmp != 0) { // these coordinates of gradients should be equal to -bound + if ((*theta_ptr_tmp != 0) && (bound != 0)) { // these coordinates of gradients should be equal to -bound - if ((*theta_ptr_tmp > 0) && (fabs(gradient + bound) > tol * bound)) { + if ((*theta_ptr_tmp > 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { return(0); } } - else { + else if (bound != 0) { if (fabs(gradient) > (1. + tol) * bound) { return(0); } @@ -266,7 +283,8 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ int ncase, /* How many rows in X */ int nfeature, /* How many rows in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this coord in ever_active */ @@ -280,6 +298,8 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ double *X_theta_ptr_tmp; int *need_update_ptr_tmp; double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; int ifeature, icase; double *diagonal_ptr = ((double *) nndef_diag_ptr + coord); @@ -290,6 +310,9 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ theta_ptr_tmp = ((double *) theta_ptr + coord); old_value = *theta_ptr_tmp; + bound_ptr_tmp = ((double *) bound_ptr + coord); + bound = *bound_ptr_tmp; + // The coord entry of gradient_ptr term has a diagonal term in it: // (X^TX)[coord, coord] * theta[coord] / ncase // This removes it. @@ -298,17 +321,17 @@ double update_one_coord_wide(double *X_ptr, /* A design matrix*/ // Now soft-threshold the coord entry of theta - // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| - // with q=diagonal_entry and l=linear_term + // Objective is t \mapsto (q+eps)/2 * t^2 + l * t + bound |t| + + // with q=diagonal_entry and l=linear_term and eps=ridge_Term // With a negative linear term, solution should be // positive if (linear_term < -bound) { - value = (-linear_term - bound) / diagonal_entry; + value = (-linear_term - bound) / (diagonal_entry + ridge_term); } else if (linear_term > bound) { - value = -(linear_term - bound) / diagonal_entry; + value = -(linear_term - bound) / (diagonal_entry + ridge_term); } // Add to active set if necessary @@ -363,24 +386,36 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int *nactive_ptr, /* Size of ever active set */ int ncase, /* How many rows in X */ int nfeature, /* How many columns in X */ - double bound, /* feasibility parameter */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ - int max_active) /* Upper limit for size of active set -- otherwise break */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int objective_stop, /* Break based on convergence of objective value? */ + int kkt_stop, /* Break based on KKT? */ + int param_stop) /* Break based on parameter convergence? */ { int iter = 0; + int iter_old = 1; int ifeature = 0; int iactive = 0; int *active_ptr; - int check_objective = 1; + double old_value, new_value; int niter_active = 5; int iter_active; - if (check_objective) { + double norm_diff = 1.; + double norm_last = 1.; + double delta; + double threshold = 1.e-2; + double *theta_ptr_tmp, *theta_old_ptr_tmp; + + if (objective_stop) { old_value = objective_wide(X_theta_ptr, linear_func_ptr, @@ -388,8 +423,10 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX nactive_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr); + new_value = old_value; } @@ -412,7 +449,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr, *active_ptr - 1, // Ever-active is 1-based 1); @@ -431,7 +469,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, nfeature, ncase, - bound, + bound_ptr, + ridge_term, kkt_tol) == 1) { break; } @@ -440,17 +479,20 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check KKT - if (check_KKT_wide(theta_ptr, - gradient_ptr, - X_theta_ptr, - X_ptr, - linear_func_ptr, - need_update_ptr, - nfeature, - ncase, - bound, - kkt_tol) == 1) { - break; + if (kkt_stop) { + if (check_KKT_wide(theta_ptr, + gradient_ptr, + X_theta_ptr, + X_ptr, + linear_func_ptr, + need_update_ptr, + nfeature, + ncase, + bound_ptr, + ridge_term, + kkt_tol) == 1) { + break; + } } // Update all variables @@ -467,7 +509,8 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX need_update_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr, ifeature, 0); @@ -475,19 +518,45 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check KKT - if (check_KKT_wide(theta_ptr, - gradient_ptr, - X_theta_ptr, - X_ptr, - linear_func_ptr, - need_update_ptr, - nfeature, - ncase, - bound, - kkt_tol) == 1) { - break; + if (kkt_stop) { + if (check_KKT_wide(theta_ptr, + gradient_ptr, + X_theta_ptr, + X_ptr, + linear_func_ptr, + need_update_ptr, + nfeature, + ncase, + bound_ptr, + ridge_term, + kkt_tol) == 1) { + break; + } + } + + // Check based on norm -- from Adel's debiasing code + + if (param_stop) { + if (iter == 2 * iter_old) { + iter_old = iter; + norm_diff = 0; + norm_last = 0; + for (ifeature=0; ifeature= max_active) { @@ -496,14 +565,15 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX // Check relative decrease of objective - if (check_objective) { + if (objective_stop) { new_value = objective_wide(X_theta_ptr, linear_func_ptr, ever_active_ptr, nactive_ptr, ncase, nfeature, - bound, + bound_ptr, + ridge_term, theta_ptr); if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c index ac55cf48..123c81de 100644 --- a/selectiveInference/src/randomized_lasso.c +++ b/selectiveInference/src/randomized_lasso.c @@ -126,8 +126,6 @@ double log_density_gaussian_conditional(double noise_scale, // Scale double denom = 2 * noise_scale * noise_scale; double reconstruction = 0; double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; double *optimization_linear_ptr; double *optimization_state_ptr; @@ -164,8 +162,6 @@ double log_density_laplace_conditional(double noise_scale, // Scale double value = 0; double reconstruction = 0; double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; double *optimization_linear_ptr; double *optimization_state_ptr; @@ -176,15 +172,6 @@ double log_density_laplace_conditional(double noise_scale, // Scale offset_ptr = ((double *) offset + irow); reconstruction = *offset_ptr; - // Internal (i.e. data) contribution - for (icol=0; icol Date: Thu, 26 Oct 2017 13:08:37 -0700 Subject: [PATCH 323/396] removing machine generated files --- selectiveInference/R/RcppExports.R | 19 ------ selectiveInference/src/RcppExports.cpp | 92 -------------------------- 2 files changed, 111 deletions(-) delete mode 100644 selectiveInference/R/RcppExports.R delete mode 100644 selectiveInference/src/RcppExports.cpp diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R deleted file mode 100644 index f5ebee43..00000000 --- a/selectiveInference/R/RcppExports.R +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active) -} - -solve_QP_wide <- function(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) { - .Call('_selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active) -} - -update1_ <- function(Q2, w, m, k) { - .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) -} - -downdate1_ <- function(Q1, R, j0, m, n) { - .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) -} - diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp deleted file mode 100644 index 02a77413..00000000 --- a/selectiveInference/src/RcppExports.cpp +++ /dev/null @@ -1,92 +0,0 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#include - -using namespace Rcpp; - -// solve_QP -Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); - Rcpp::traits::input_parameter< double >::type bound(boundSEXP); - Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); - Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); - Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); - Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; -END_RCPP -} -// solve_QP_wide -Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::NumericVector X_theta, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, int max_active); -RcppExport SEXP _selectiveInference_solve_QP_wide(SEXP XSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP X_thetaSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP, SEXP max_activeSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type X(XSEXP); - Rcpp::traits::input_parameter< double >::type bound(boundSEXP); - Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type X_theta(X_thetaSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); - Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); - Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); - Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); - Rcpp::traits::input_parameter< int >::type max_active(max_activeSEXP); - rcpp_result_gen = Rcpp::wrap(solve_QP_wide(X, bound, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, max_active)); - return rcpp_result_gen; -END_RCPP -} -// update1_ -Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); -RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); - Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); - Rcpp::traits::input_parameter< int >::type m(mSEXP); - Rcpp::traits::input_parameter< int >::type k(kSEXP); - rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); - return rcpp_result_gen; -END_RCPP -} -// downdate1_ -Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); -RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); - Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); - Rcpp::traits::input_parameter< int >::type j0(j0SEXP); - Rcpp::traits::input_parameter< int >::type m(mSEXP); - Rcpp::traits::input_parameter< int >::type n(nSEXP); - rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); - return rcpp_result_gen; -END_RCPP -} - -static const R_CallMethodDef CallEntries[] = { - {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 11}, - {"_selectiveInference_solve_QP_wide", (DL_FUNC) &_selectiveInference_solve_QP_wide, 12}, - {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, - {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_selectiveInference(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} From 74803104aef4efbbc0126a1a4d8033c61f608a82 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 14:30:59 -0700 Subject: [PATCH 324/396] WIP: function to fit randomized lasso --- selectiveInference/R/funs.randomized.R | 64 ++++++++++++++++++++++++++ tests/test_randomized.R | 14 ++++++ 2 files changed, 78 insertions(+) create mode 100644 selectiveInference/R/funs.randomized.R create mode 100644 tests/test_randomized.R diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R new file mode 100644 index 00000000..01e6aa40 --- /dev/null +++ b/selectiveInference/R/funs.randomized.R @@ -0,0 +1,64 @@ +# Functions to fit and "infer" about parameters in the +# randomized LASSO +# +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 + +fit_randomized_lasso = function(X, + y, + lam, + noise_scale, + ridge_term, + noise_type=c('gaussian', 'laplace'), + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + param_stop=TRUE) +{ + + n = nrow(X); p = ncol(X) + + noise_type = match.arg(noise_type) + + if (noise_type == 'gaussian') { + D = Norm(mean=0, sd=noise_scale) + } + else if (noise_type == 'laplace') { + D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + } + perturb_ = distr::r(D)(p) + + lam = as.numeric(lam) + if (length(lam) == 1) { + lam = rep(lam, p) + } + if (length(lam) != p) { + stop("Lagrange parameter should be single float or of length ncol(X)") + } + + soln = rep(0, p) + Xsoln = rep(0, n) + linear_func = (- t(X) %*% y - perturb_) + gradient = 1. * linear_func + ever_active = rep(0, p) + nactive = as.integer(0) + + result = solve_QP_wide(X, # design matrix + lam, # vector of Lagrange multipliers + ridge_term / n, # ridge_term + max_iter, + soln, + linear_func, + gradient, + Xsoln, + ever_active, + nactive, + kkt_tol, + objective_tol, + p, + objective_stop, # objective_stop + kkt_stop, # kkt_stop + param_stop) # param_stop + return(result) +} diff --git a/tests/test_randomized.R b/tests/test_randomized.R new file mode 100644 index 00000000..e4e35a12 --- /dev/null +++ b/tests/test_randomized.R @@ -0,0 +1,14 @@ +library(selectiveInference) + +test = function() { + + n = 100; p = 50 + X = matrix(rnorm(n * p), n, p) + y = rnorm(n) + lam = 20 / sqrt(n) + noise_scale = 0.01 * sqrt(n) + ridge_term = .1 / sqrt(n) + fit_randomized_lasso(X, y, lam, noise_scale, ridge_term) +} + +print(test()) From 38b2020b36ef81da2fe0fec8f265f3effc23610f Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 21:36:18 -0700 Subject: [PATCH 325/396] check that solution is same as old code -- currently failing --- tests/test_debiasing.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/test_debiasing.R diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R new file mode 100644 index 00000000..27bd6214 --- /dev/null +++ b/tests/test_debiasing.R @@ -0,0 +1,27 @@ +library(selectiveInference) +source('oldcode.R') + +n = 500; p = 50 + +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n + +mu = 7.791408e-02 + +A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000) +A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000) + +B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000) + +C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] +C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] + +par(mfrow=c(2,3)) +plot(A1[1,], C1[1,]) +plot(A1[1,], B1[1,]) +plot(B1[1,], C1[1,]) + +plot(A1[1,], A2[1,]) +plot(B1[1,], B2[1,]) +plot(C1[1,], C2[1,]) From dc423de5cf6094e18a986b1305b269aa159c422d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:17:39 -0700 Subject: [PATCH 326/396] bug found in qp solver -- look at tests/test_QP.R --- selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fixed.R | 5 + selectiveInference/R/funs.randomized.R | 2 + selectiveInference/src/Rcpp-debias.cpp | 4 + selectiveInference/src/debias.h | 2 + selectiveInference/src/quadratic_program.c | 4 +- .../src/quadratic_program_wide.c | 6 +- tests/test_QP.R | 15 ++ tests/test_debiasing.R | 172 +++++++++++++++++- 9 files changed, 204 insertions(+), 8 deletions(-) create mode 100644 tests/test_QP.R diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index d72d56a9..c7d08a1e 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,4 +44,4 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("Rcpp", "sourceCpp") - +importFrom("distr", "Norm", "DExp") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index aaceaaad..54903162 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -327,6 +327,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective ) { @@ -363,6 +364,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d warn_kkt=FALSE, max_iter=max_iter, kkt_tol=kkt_tol, + parameter_tol=parameter_tol, objective_tol=objective_tol) if (warn_kkt && (!output$kkt_check)) { @@ -393,6 +395,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective ) { @@ -433,6 +436,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, + parameter_tol, max_active, FALSE, # objective_stop FALSE, # kkt_stop @@ -451,6 +455,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep nactive, kkt_tol, objective_tol, + parameter_tol, max_active, FALSE, # objective_stop FALSE, # kkt_stop diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 01e6aa40..25a0b95a 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -11,6 +11,7 @@ fit_randomized_lasso = function(X, noise_type=c('gaussian', 'laplace'), max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter objective_tol=1.e-8, # tolerance for relative decrease in objective objective_stop=FALSE, kkt_stop=TRUE, @@ -56,6 +57,7 @@ fit_randomized_lasso = function(X, nactive, kkt_tol, objective_tol, + parameter_tol, p, objective_stop, # objective_stop kkt_stop, # kkt_stop diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 24bbae88..9cda705a 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -15,6 +15,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, + double parameter_tol, int max_active, int objective_stop, int kkt_stop, @@ -52,6 +53,7 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, maxiter, kkt_tol, objective_tol, + parameter_tol, max_active, objective_stop, kkt_stop, @@ -92,6 +94,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol, + double parameter_tol, int max_active, int objective_stop, int kkt_stop, @@ -142,6 +145,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, maxiter, kkt_tol, objective_tol, + parameter_tol, max_active, objective_stop, kkt_stop, diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 052af7a1..d3db26d7 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -16,6 +16,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -44,6 +45,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index 1bc7fa34..822ddf53 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -273,6 +273,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -292,7 +293,6 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ double norm_diff = 1.; double norm_last = 1.; double delta; - double threshold = 1.e-2; double *theta_ptr, *theta_old_ptr; if (objective_stop) { @@ -403,7 +403,7 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ norm_diff = sqrt(norm_diff); norm_last = sqrt(norm_last); - if (norm_diff < threshold * norm_last) { + if (norm_diff < parameter_tol * norm_last) { break; } } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index c6cb9f3f..3e4bdb09 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2 - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf @@ -393,6 +393,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX int maxiter, /* max number of iterations */ double kkt_tol, /* precision for checking KKT conditions */ double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ int max_active, /* Upper limit for size of active set -- otherwise break */ int objective_stop, /* Break based on convergence of objective value? */ int kkt_stop, /* Break based on KKT? */ @@ -412,7 +413,6 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX double norm_diff = 1.; double norm_last = 1.; double delta; - double threshold = 1.e-2; double *theta_ptr_tmp, *theta_old_ptr_tmp; if (objective_stop) { @@ -552,7 +552,7 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX norm_diff = sqrt(norm_diff); norm_last = sqrt(norm_last); - if (norm_diff < threshold * norm_last) { + if (norm_diff < parameter_tol * norm_last) { break; } } diff --git a/tests/test_QP.R b/tests/test_QP.R new file mode 100644 index 00000000..cf5ca646 --- /dev/null +++ b/tests/test_QP.R @@ -0,0 +1,15 @@ +library(selectiveInference) +### Test + +n = 100; p = 50 + +X = matrix(rnorm(n * p), n, p) +Y = rnorm(n) +lam = 2 + +soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 1.e-12, 0)$soln +G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) +soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] + +print(soln1) +print(soln2) \ No newline at end of file diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index 27bd6214..b1fdf24c 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -1,7 +1,143 @@ library(selectiveInference) -source('oldcode.R') -n = 500; p = 50 + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(mu)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + mu.stop <- 0; + try.no <- 1; + incr <- 0; + while ((mu.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + mu.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + mu <- mu*resol; + } else { + incr <- 0; + mu <- mu/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + mu <- mu*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + mu.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + mu <- mu/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + mu <- mu*resol; + beta <- last.beta; + mu.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + mu0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-mu0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Test + +n = 100; p = 50 X = matrix(rnorm(n * p), n, p) S = t(X) %*% X / n @@ -25,3 +161,35 @@ plot(B1[1,], C1[1,]) plot(A1[1,], A2[1,]) plot(B1[1,], B2[1,]) plot(C1[1,], C2[1,]) + +print(c('A', sum(A1[1,] == 0))) +print(c('B', sum(B1[1,] == 0))) +print(c('C', sum(C1[1,] == 0))) + +## Are our points feasible + +feasibility = function(S, soln, j, mu) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(max(abs(G)), mu)) +} + +print(c('feasibility A', feasibility(S, A1[1,], 1, mu))) +print(c('feasibility B', feasibility(S, B1[1,], 1, mu))) +print(c('feasibility C', feasibility(S, C1[1,], 1, mu))) + +active_KKT = function(S, soln, j, mu) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) +} + +print(c('active_KKT A', active_KKT(S, A1[1,], 1, mu))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, mu))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, mu))) + + From 71ff6e3080c6c93390fb6d074ef22085fbd036b0 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:19:20 -0700 Subject: [PATCH 327/396] allowing randomization to be 0 in solver --- selectiveInference/R/funs.randomized.R | 18 +++++++++++------- tests/test_QP.R | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 25a0b95a..b79e2fb8 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -22,14 +22,18 @@ fit_randomized_lasso = function(X, noise_type = match.arg(noise_type) - if (noise_type == 'gaussian') { - D = Norm(mean=0, sd=noise_scale) + if (noise_scale > 0) { + if (noise_type == 'gaussian') { + D = Norm(mean=0, sd=noise_scale) + } + else if (noise_type == 'laplace') { + D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + } + perturb_ = distr::r(D)(p) + } else { + perturb_ = rep(0, p) } - else if (noise_type == 'laplace') { - D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. - } - perturb_ = distr::r(D)(p) - + lam = as.numeric(lam) if (length(lam) == 1) { lam = rep(lam, p) diff --git a/tests/test_QP.R b/tests/test_QP.R index cf5ca646..4aebec37 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -7,7 +7,7 @@ X = matrix(rnorm(n * p), n, p) Y = rnorm(n) lam = 2 -soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 1.e-12, 0)$soln +soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 0, 0)$soln G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] From de8874bc22db314e2efeb99a34ed226aeea2745e Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 22:25:32 -0700 Subject: [PATCH 328/396] cosmetic edit --- selectiveInference/src/Rcpp-debias.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 9cda705a..5c181848 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -112,12 +112,13 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, Rcpp::IntegerVector need_update(nfeature); - // Extract the diagonal + Rcpp::NumericVector theta_old(nfeature); + + // Extract the diagonal -- divide by ncase + Rcpp::NumericVector nndef_diag(nfeature); double *nndef_diag_p = nndef_diag.begin(); - Rcpp::NumericVector theta_old(nfeature); - for (ifeature=0; ifeature Date: Thu, 26 Oct 2017 22:46:48 -0700 Subject: [PATCH 329/396] LASSO solver is right up to scale --- tests/test_QP.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_QP.R b/tests/test_QP.R index 4aebec37..61c4d539 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -9,7 +9,8 @@ lam = 2 soln1 = selectiveInference:::fit_randomized_lasso(X, Y, lam, 0, 0)$soln G = glmnet(X, Y, intercept=FALSE, standardize=FALSE) -soln2 = coef(G, s=1/n, exact=TRUE, x=X, y=Y)[-1] +soln2 = coef(G, s=lam/n, exact=TRUE, x=X, y=Y)[-1] print(soln1) -print(soln2) \ No newline at end of file +print(soln2) +plot(soln1, soln2) From 2f1686ee2c627061765f1aca1f0b533202eaf0a2 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 26 Oct 2017 23:55:49 -0700 Subject: [PATCH 330/396] BF: fixing Xsoln incorrectly set in wide solver --- selectiveInference/R/funs.fixed.R | 24 +++++++++++++------ selectiveInference/R/funs.randomized.R | 4 ++-- .../src/quadratic_program_wide.c | 2 +- tests/test_QP.R | 2 +- tests/test_debiasing.R | 13 ++++++---- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 54903162..22a31e5a 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -326,6 +326,9 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem + kkt_stop=TRUE, # stop based on KKT conditions? + parameter_stop=TRUE, # stop based on relative convergence of parameter? + objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective @@ -363,6 +366,9 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d max_try=max_try, warn_kkt=FALSE, max_iter=max_iter, + kkt_stop=kkt_stop, + parameter_stop=parameter_stop, + objective_stop=objective_stop, kkt_tol=kkt_tol, parameter_tol=parameter_tol, objective_tol=objective_tol) @@ -394,6 +400,9 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? max_iter=100, # how many iterations for each optimization problem + kkt_stop=TRUE, # stop based on KKT conditions? + parameter_stop=TRUE, # stop based on relative convergence of parameter? + objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter objective_tol=1.e-8 # tolerance for relative decrease in objective @@ -423,6 +432,8 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep last_output = NULL + Xsoln = rep(0, n) + while (counter_idx < max_try) { if (!is_wide) { @@ -438,11 +449,10 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, - FALSE, # objective_stop - FALSE, # kkt_stop - TRUE) # param_stop + objective_stop, + kkt_stop, + parameter_stop) } else { - Xsoln = rep(0, nrow(Xinfo)) result = solve_QP_wide(Xinfo, # this is a design matrix rep(mu, p), # vector of Lagrange multipliers 0, # ridge_term @@ -457,9 +467,9 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, - FALSE, # objective_stop - FALSE, # kkt_stop - TRUE) # param_stop + objective_stop, + kkt_stop, + parameter_stop) } diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index b79e2fb8..7bf60080 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -44,13 +44,13 @@ fit_randomized_lasso = function(X, soln = rep(0, p) Xsoln = rep(0, n) - linear_func = (- t(X) %*% y - perturb_) + linear_func = (- t(X) %*% y - perturb_) / n gradient = 1. * linear_func ever_active = rep(0, p) nactive = as.integer(0) result = solve_QP_wide(X, # design matrix - lam, # vector of Lagrange multipliers + lam / n, # vector of Lagrange multipliers ridge_term / n, # ridge_term max_iter, soln, diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3e4bdb09..3546fcda 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -4,7 +4,7 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n - l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n + l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 // where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf diff --git a/tests/test_QP.R b/tests/test_QP.R index 61c4d539..17642259 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -1,7 +1,7 @@ library(selectiveInference) ### Test -n = 100; p = 50 +n = 80; p = 50 X = matrix(rnorm(n * p), n, p) Y = rnorm(n) diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index b1fdf24c..e2743fea 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -144,16 +144,19 @@ S = t(X) %*% X / n mu = 7.791408e-02 -A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000) -A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000) +tol = 1.e-12 -B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000) -B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000) +A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) + +B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] par(mfrow=c(2,3)) + plot(A1[1,], C1[1,]) plot(A1[1,], B1[1,]) plot(B1[1,], C1[1,]) @@ -185,6 +188,8 @@ active_KKT = function(S, soln, j, mu) { E = rep(0, p) E[j] = 1 G = S %*% soln - E + print(which(soln != 0)) + print(G[j]) return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) } From 6444472cd41456ce627f05986ec4222d39f1f049 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 00:03:09 -0700 Subject: [PATCH 331/396] gradient not current? --- selectiveInference/R/funs.fixed.R | 3 ++- tests/test_debiasing.R | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 22a31e5a..8d3f7279 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -525,7 +525,8 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep } return(list(soln=result$soln, - kkt_check=result$kkt_check)) + kkt_check=result$kkt_check, + gradient=result$gradient)) } diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index e2743fea..50b43d27 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -146,14 +146,15 @@ mu = 7.791408e-02 tol = 1.e-12 -A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +rows = c(1:2) +A1 = debiasingMatrix(S, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +A2 = debiasingMatrix(S / n, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +B1 = debiasingMatrix(X, TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] -C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] +C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[rows,] par(mfrow=c(2,3)) From 9f2684462758fdafd5871fd748ce032a873209cd Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 00:07:30 -0700 Subject: [PATCH 332/396] print R2 from lm --- tests/test_QP.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_QP.R b/tests/test_QP.R index 17642259..79638e0e 100644 --- a/tests/test_QP.R +++ b/tests/test_QP.R @@ -14,3 +14,4 @@ soln2 = coef(G, s=lam/n, exact=TRUE, x=X, y=Y)[-1] print(soln1) print(soln2) plot(soln1, soln2) +print(summary(lm(soln1 ~ soln2))) \ No newline at end of file From 9c7bfcce31fbcf0bf87d381bceb6fe3d42029e04 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 09:47:31 -0700 Subject: [PATCH 333/396] without linesearch we know agree with Adel's code at fixed mu --- selectiveInference/DESCRIPTION | 2 +- selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fixed.R | 29 ++++++---- selectiveInference/R/funs.randomized.R | 31 +++++----- tests/test_debiasing.R | 80 +++++++++++++------------- 5 files changed, 75 insertions(+), 69 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index d9026221..fad072df 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -9,7 +9,7 @@ Maintainer: Rob Tibshirani Depends: glmnet, intervals, - survival + survival, Suggests: Rmpfr Description: New tools for post-selection inference, for use with forward diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index c7d08a1e..d72d56a9 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,4 +44,4 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("Rcpp", "sourceCpp") -importFrom("distr", "Norm", "DExp") + diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 8d3f7279..19284871 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -319,7 +319,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d nsample, rows, verbose=FALSE, - mu=NULL, # starting value of mu + bound=NULL, # starting value of bound linesearch=TRUE, # do a linesearch? scaling_factor=1.5, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? @@ -342,8 +342,8 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d p = ncol(Xinfo); M = matrix(0, length(rows), p); - if (is.null(mu)) { - mu = (1/sqrt(nsample)) * qnorm(1-(0.1/(p^2))) + if (is.null(bound)) { + bound = (1/sqrt(nsample)) * qnorm(1-(0.1/(p^2))) } xperc = 0; @@ -359,7 +359,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d output = debiasingRow(Xinfo, # could be X or t(X) %*% X / n depending on is_wide is_wide, row, - mu, + bound, linesearch=linesearch, scaling_factor=scaling_factor, max_active=max_active, @@ -393,7 +393,7 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n depending on is_wide is_wide, row, - mu, + bound, linesearch=TRUE, # do a linesearch? scaling_factor=1.5, # multiplicative factor for linesearch max_active=NULL, # how big can active set get? @@ -414,9 +414,11 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep max_active = min(nrow(Xinfo), ncol(Xinfo)) } + # Initialize variables soln = rep(0, p) + soln = as.numeric(soln) ever_active = rep(0, p) ever_active[1] = row # 1-based ever_active = as.integer(ever_active) @@ -432,13 +434,16 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep last_output = NULL - Xsoln = rep(0, n) + if (is_wide) { + n = nrow(Xinfo) + Xsoln = as.numeric(rep(0, n)) + } while (counter_idx < max_try) { if (!is_wide) { result = solve_QP(Xinfo, # this is non-neg-def matrix - mu, + bound, max_iter, soln, linear_func, @@ -453,9 +458,9 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep kkt_stop, parameter_stop) } else { - result = solve_QP_wide(Xinfo, # this is a design matrix - rep(mu, p), # vector of Lagrange multipliers - 0, # ridge_term + result = solve_QP_wide(Xinfo, # this is a design matrix + as.numeric(rep(bound, p)), # vector of Lagrange multipliers + 0, # ridge_term max_iter, soln, linear_func, @@ -493,13 +498,13 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep if ((iter < (max_iter+1)) && (counter_idx > 1)) { break; # we've found a feasible point and solved the problem } - mu = mu * scaling_factor; + bound = bound * scaling_factor; } else { # trying to drop the bound parameter further if ((iter == (max_iter + 1)) && (counter_idx > 1)) { result = last_output; # problem seems infeasible because we didn't solve it break; # so we revert to previously found solution } - mu = mu / scaling_factor; + bound = bound / scaling_factor; } # If the active set has grown to a certain size diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 7bf60080..3a171f79 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -3,19 +3,19 @@ # # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 -fit_randomized_lasso = function(X, - y, - lam, - noise_scale, - ridge_term, - noise_type=c('gaussian', 'laplace'), - max_iter=100, # how many iterations for each optimization problem - kkt_tol=1.e-4, # tolerance for the KKT conditions - parameter_tol=1.e-8, # tolerance for relative convergence of parameter - objective_tol=1.e-8, # tolerance for relative decrease in objective - objective_stop=FALSE, - kkt_stop=TRUE, - param_stop=TRUE) +randomizedLASSO = function(X, + y, + lam, + noise_scale, + ridge_term, + noise_type=c('gaussian', 'laplace'), + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + param_stop=TRUE) { n = nrow(X); p = ncol(X) @@ -24,12 +24,11 @@ fit_randomized_lasso = function(X, if (noise_scale > 0) { if (noise_type == 'gaussian') { - D = Norm(mean=0, sd=noise_scale) + perturb_ = rnorm(p) * noise_scale } else if (noise_type == 'laplace') { - D = DExp(rate = 1 / noise_scale) # D is a Laplace distribution with rate = 1. + perturb_ = rexp(p) * (2 * rbinom(p, 1, 0.5) - 1) * noise_scale } - perturb_ = distr::r(D)(p) } else { perturb_ = rep(0, p) } diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R index 50b43d27..8e81b169 100644 --- a/tests/test_debiasing.R +++ b/tests/test_debiasing.R @@ -2,9 +2,9 @@ library(selectiveInference) ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { +InverseLinfty <- function(sigma, n, resol=1.5, bound=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { isgiven <- 1; - if (is.null(mu)){ + if (is.null(bound)){ isgiven <- 0; } @@ -19,43 +19,43 @@ InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e print(paste(xperc,"% done",sep="")); } } if (isgiven==0){ - mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + bound <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); } - mu.stop <- 0; + bound.stop <- 0; try.no <- 1; incr <- 0; - while ((mu.stop != 1)&&(try.no<10)){ + while ((bound.stop != 1)&&(try.no<10)){ last.beta <- beta - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + output <- InverseLinftyOneRow(sigma, i, bound, maxiter=maxiter, threshold=threshold) beta <- output$optsol iter <- output$iter if (isgiven==1){ - mu.stop <- 1 + bound.stop <- 1 } else{ if (try.no==1){ if (iter == (maxiter+1)){ incr <- 1; - mu <- mu*resol; + bound <- bound*resol; } else { incr <- 0; - mu <- mu/resol; + bound <- bound/resol; } } if (try.no > 1){ if ((incr == 1)&&(iter == (maxiter+1))){ - mu <- mu*resol; + bound <- bound*resol; } if ((incr == 1)&&(iter < (maxiter+1))){ - mu.stop <- 1; + bound.stop <- 1; } if ((incr == 0)&&(iter < (maxiter+1))){ - mu <- mu/resol; + bound <- bound/resol; } if ((incr == 0)&&(iter == (maxiter+1))){ - mu <- mu*resol; + bound <- bound*resol; beta <- last.beta; - mu.stop <- 1; + bound.stop <- 1; } } } @@ -66,14 +66,14 @@ InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e return(M) } -InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { +InverseLinftyOneRow <- function ( sigma, i, bound, maxiter=50, threshold=1e-10) { p <- nrow(sigma); rho <- max(abs(sigma[i,-i])) / sigma[i,i]; - mu0 <- rho/(1+rho); + bound0 <- rho/(1+rho); beta <- rep(0,p); - #if (mu >= mu0){ - # beta[i] <- (1-mu0)/sigma[i,i]; + #if (bound >= bound0){ + # beta[i] <- (1-bound0)/sigma[i,i]; # returnlist <- list("optsol" = beta, "iter" = 0); # return(returnlist); #} @@ -82,7 +82,7 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { last.norm2 <- 1; iter <- 1; iter.old <- 1; - beta[i] <- (1-mu0)/sigma[i,i]; + beta[i] <- (1-bound0)/sigma[i,i]; beta.old <- beta; sigma.tilde <- sigma; diag(sigma.tilde) <- 0; @@ -95,7 +95,7 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { v <- vs[j]; if (j==i) v <- v+1; - beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + beta[j] <- SoftThreshold(v,bound)/sigma[j,j]; if (oldval != beta[j]){ vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; } @@ -112,7 +112,7 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { # vs <- -sigma.tilde%*%beta; } - # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, bound)) } @@ -142,19 +142,21 @@ n = 100; p = 50 X = matrix(rnorm(n * p), n, p) S = t(X) %*% X / n -mu = 7.791408e-02 +debiasing_bound = 7.791408e-02 tol = 1.e-12 -rows = c(1:2) -A1 = debiasingMatrix(S, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -A2 = debiasingMatrix(S / n, FALSE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +rows = as.integer(c(1:2)) +print('here') +print(rows) +A1 = debiasingMatrix(S, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) -B1 = debiasingMatrix(X, TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) -B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, mu=mu, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol) +A2 = debiasingMatrix(S / n, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B1 = debiasingMatrix(X, TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) -C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[rows,] -C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[rows,] +C1 = InverseLinfty(S, n, bound=debiasing_bound, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, bound=debiasing_bound, maxiter=1000)[rows,] par(mfrow=c(2,3)) @@ -172,30 +174,30 @@ print(c('C', sum(C1[1,] == 0))) ## Are our points feasible -feasibility = function(S, soln, j, mu) { +feasibility = function(S, soln, j, debiasing_bound) { p = nrow(S) E = rep(0, p) E[j] = 1 G = S %*% soln - E - return(c(max(abs(G)), mu)) + return(c(max(abs(G)), debiasing_bound)) } -print(c('feasibility A', feasibility(S, A1[1,], 1, mu))) -print(c('feasibility B', feasibility(S, B1[1,], 1, mu))) -print(c('feasibility C', feasibility(S, C1[1,], 1, mu))) +print(c('feasibility A', feasibility(S, A1[1,], 1, debiasing_bound))) +print(c('feasibility B', feasibility(S, B1[1,], 1, debiasing_bound))) +print(c('feasibility C', feasibility(S, C1[1,], 1, debiasing_bound))) -active_KKT = function(S, soln, j, mu) { +active_KKT = function(S, soln, j, debiasing_bound) { p = nrow(S) E = rep(0, p) E[j] = 1 G = S %*% soln - E print(which(soln != 0)) print(G[j]) - return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) + return(c(G[soln != 0] * sign(soln)[soln != 0], debiasing_bound)) } -print(c('active_KKT A', active_KKT(S, A1[1,], 1, mu))) -print(c('active_KKT B', active_KKT(S, B1[1,], 1, mu))) -print(c('active_KKT C', active_KKT(S, C1[1,], 1, mu))) +print(c('active_KKT A', active_KKT(S, A1[1,], 1, debiasing_bound))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, debiasing_bound))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, debiasing_bound))) From c2460a162cab1db17958f02c71cfcdab6234da8a Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 10:03:45 -0700 Subject: [PATCH 334/396] fixing documentation --- selectiveInference/NAMESPACE | 1 + selectiveInference/man/debiasingMatrix.Rd | 27 +++++++++-- selectiveInference/src/quadratic_program.c | 43 +++++++++-------- .../src/quadratic_program_wide.c | 46 ++++++++++--------- 4 files changed, 71 insertions(+), 46 deletions(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index d72d56a9..e7b1e800 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -43,5 +43,6 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") +importFrom("stats", "rbinom", "rexp") importFrom("Rcpp", "sourceCpp") diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index 4da925e6..b95c75b1 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -16,14 +16,18 @@ debiasingMatrix(Xinfo, nsample, rows, verbose=FALSE, - mu=NULL, + bound=NULL, linesearch=TRUE, scaling_factor=1.5, max_active=NULL, max_try=10, warn_kkt=FALSE, max_iter=100, + kkt_stop=TRUE, + parameter_stop=TRUE, + objective_stop=TRUE, kkt_tol=1.e-4, + parameter_tol=1.e-4, objective_tol=1.e-8) } \arguments{ @@ -38,7 +42,7 @@ matrix of interest is t(X) %*% X / nrow(X). } \item{nsample}{ Number of samples used in forming the cross-covariance matrix. -Used for default value of the bound parameter mu. +Used for default value of the bound parameter. } \item{rows}{ Which rows of the approximate inverse to compute. @@ -46,7 +50,7 @@ Which rows of the approximate inverse to compute. \item{verbose}{ Print out progress as rows are being computed. } -\item{mu}{ +\item{bound}{ Initial bound parameter for each row. Will be changed if linesearch is TRUE. } @@ -72,10 +76,25 @@ descent algorithm. How many full iterations to run of the coordinate descent for each value of the bound parameter. } +\item{kkt_stop}{ +If TRUE, check to stop coordinate descent when KKT conditions are approximately satisfied. +} +\item{parameter_stop}{ +If TRUE, check to stop coordinate descent based on relative convergence of parameter vector, +checked at geometrically spaced iterations 2^k. +} +\item{objective_stop}{ +If TRUE, check to stop coordinate descent based on relative decrease of objective value, +checked at geometrically spaced iterations 2^k. +} \item{kkt_tol}{ Tolerance value for assessing whether KKT conditions for solving the dual problem and feasibility of the original problem. } +\item{parameter_tol}{ +Tolerance value for assessing convergence of the problem using relative +convergence of the parameter. +} \item{objective_tol}{ Tolerance value for assessing convergence of the problem using relative decrease of the objective. @@ -86,7 +105,7 @@ This function computes an approximate inverse as described in Javanmard and Montanari (2013), specifically display (4). The problem is solved by considering a dual problem which has an objective similar to a LASSO problem and is solvable -by coordinate descent. For some values of mu the original +by coordinate descent. For some values of bound the original problem may not be feasible, in which case the dual problem has no solution. An attempt to detect this is made by stopping when the active set grows quite large, determined by max_active. diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index 822ddf53..1f7fcb3a 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -385,10 +385,12 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ } } - // Check based on norm -- from Adel's debiasing code - if (param_stop) { - if (iter == 2 * iter_old) { + if (iter == 2 * iter_old) { // Geometric iterations from Adel's code + + // Check based on norm + + if (param_stop) { iter_old = iter; norm_diff = 0; norm_last = 0; @@ -407,6 +409,24 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ break; } } + + // Check relative decrease of objective + + if (objective_stop) { + new_value = objective_qp(nndef_ptr, + linear_func_ptr, + ever_active_ptr, + nactive_ptr, + nfeature, + bound, + theta); + + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { + break; + } + old_value = new_value; + } + } // Check size of active set @@ -415,23 +435,6 @@ int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ break; } - // Check relative decrease of objective - - if (objective_stop) { - new_value = objective_qp(nndef_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - nfeature, - bound, - theta); - - if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { - break; - } - old_value = new_value; - } - } return(iter); } diff --git a/selectiveInference/src/quadratic_program_wide.c b/selectiveInference/src/quadratic_program_wide.c index 3546fcda..41e29cec 100644 --- a/selectiveInference/src/quadratic_program_wide.c +++ b/selectiveInference/src/quadratic_program_wide.c @@ -534,10 +534,11 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX } } - // Check based on norm -- from Adel's debiasing code + if (iter == 2 * iter_old) { // Geometric iterations from Adel's code - if (param_stop) { - if (iter == 2 * iter_old) { + // Check based on norm + + if (param_stop) { iter_old = iter; norm_diff = 0; norm_last = 0; @@ -556,32 +557,33 @@ int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX break; } } + + // Check relative decrease of objective + + if (objective_stop) { + new_value = objective_wide(X_theta_ptr, + linear_func_ptr, + ever_active_ptr, + nactive_ptr, + ncase, + nfeature, + bound_ptr, + ridge_term, + theta_ptr); + + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { + break; + } + old_value = new_value; + } } + // Check size of active set if (*nactive_ptr >= max_active) { break; } - // Check relative decrease of objective - - if (objective_stop) { - new_value = objective_wide(X_theta_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - ncase, - nfeature, - bound_ptr, - ridge_term, - theta_ptr); - - if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { - break; - } - old_value = new_value; - } - } return(iter); } From ce15b68f64132e2929530d92f3a84326d7d59a5d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 10:29:46 -0700 Subject: [PATCH 335/396] checking comparison example again -- looks good --- selectiveInference/R/funs.fixed.R | 13 +- selectiveInference/man/debiasingMatrix.Rd | 4 +- tests/debiased_lasso/comparison_scaled.R | 78 ++ tests/debiased_lasso/comparison_unscaled.R | 78 ++ tests/debiased_lasso/javanmard_montanari.R | 770 ++++++++++++++++++++ tests/{ => debiased_lasso}/test_debiasing.R | 0 tests/debiased_lasso/test_debiasing_wide.R | 202 +++++ 7 files changed, 1138 insertions(+), 7 deletions(-) create mode 100644 tests/debiased_lasso/comparison_scaled.R create mode 100644 tests/debiased_lasso/comparison_unscaled.R create mode 100644 tests/debiased_lasso/javanmard_montanari.R rename tests/{ => debiased_lasso}/test_debiasing.R (100%) create mode 100644 tests/debiased_lasso/test_debiasing_wide.R diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 19284871..7c8318d9 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -8,7 +8,7 @@ fixedLassoInf <- function(x, y, beta, sigma=NULL, alpha=0.1, type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE, - linesearch.try=10) { + linesearch.try=10, offset_correction=TRUE) { family = match.arg(family) this.call = match.call() @@ -197,6 +197,9 @@ fixedLassoInf <- function(x, y, beta, M = M[-1,] # remove intercept row null_value = null_value[-1] # remove intercept element } + if (!offset_correction) { + null_value = 0 * null_value + } } else if (type=="partial" || p > n) { xa = x[,vars,drop=F] M = pinv(crossprod(xa)) %*% t(xa) @@ -325,13 +328,13 @@ debiasingMatrix = function(Xinfo, # could be X or t(X) %*% X / n d max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? - max_iter=100, # how many iterations for each optimization problem + max_iter=50, # how many iterations for each optimization problem kkt_stop=TRUE, # stop based on KKT conditions? parameter_stop=TRUE, # stop based on relative convergence of parameter? objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter - objective_tol=1.e-8 # tolerance for relative decrease in objective + objective_tol=1.e-4 # tolerance for relative decrease in objective ) { @@ -399,13 +402,13 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep max_active=NULL, # how big can active set get? max_try=10, # how many steps in linesearch? warn_kkt=FALSE, # warn if KKT does not seem to be satisfied? - max_iter=100, # how many iterations for each optimization problem + max_iter=50, # how many iterations for each optimization problem kkt_stop=TRUE, # stop based on KKT conditions? parameter_stop=TRUE, # stop based on relative convergence of parameter? objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter - objective_tol=1.e-8 # tolerance for relative decrease in objective + objective_tol=1.e-4 # tolerance for relative decrease in objective ) { p = ncol(Xinfo) diff --git a/selectiveInference/man/debiasingMatrix.Rd b/selectiveInference/man/debiasingMatrix.Rd index b95c75b1..6a348506 100644 --- a/selectiveInference/man/debiasingMatrix.Rd +++ b/selectiveInference/man/debiasingMatrix.Rd @@ -22,13 +22,13 @@ debiasingMatrix(Xinfo, max_active=NULL, max_try=10, warn_kkt=FALSE, - max_iter=100, + max_iter=50, kkt_stop=TRUE, parameter_stop=TRUE, objective_stop=TRUE, kkt_tol=1.e-4, parameter_tol=1.e-4, - objective_tol=1.e-8) + objective_tol=1.e-4) } \arguments{ \item{Xinfo}{ diff --git a/tests/debiased_lasso/comparison_scaled.R b/tests/debiased_lasso/comparison_scaled.R new file mode 100644 index 00000000..e0c0a6a2 --- /dev/null +++ b/tests/debiased_lasso/comparison_scaled.R @@ -0,0 +1,78 @@ +source('javanmard_montanari.R') + +############################################## + +# Runs nsims simulations under the global null, computing p-values +# using both the old code (slow one using Adel's code) and the new +# code (faster using Jon's code), and produces qq-plots for both. +# Runing 50 sims takes about 10-15 mins because old code is slow, so +# feel free to lower nsims if you want + + +library(selectiveInference) +library(glmnet) + +# set.seed(424) + +n=100 +p=200 + +sigma=.5 + +theor_lambda = sigma * sqrt(2 * log(p)) +lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) + +for (j in c(3,4,5,1,2)) { + +thresh = 1e-10 + +beta=rep(0,p) +type="full" + +nsim = 20 + +scaling = sqrt(n) +pvs_old = c() +pvs_new <- c() +pvs_old_0 = c() # don't add the offset correction +pvs_new_0 = c() # don't add the offset correction +for (i in 1:nsim) { + cat(i,fill=T) + x = matrix(rnorm(n*p),n,p) + x = scale(x,T,T) / scaling + mu = x%*%beta + y=mu+sigma*rnorm(n) + + # first run glmnet + gfit=glmnet(x,y,intercept=F,standardize=F,thresh=thresh) + + bhat = coef(gfit, s=lambda[j]/(sqrt(n) * scaling), exact=TRUE,x=x,y=y)[-1] + + if(sum(bhat != 0) > 0) { + + # compute fixed lambda p-values and selection intervals + + aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) + pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) + pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) + pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) + + cat() + } +} + +#check uniformity + +png(paste('comparison_scaled', j, '.png', sep='')) +plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') +plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) +plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) +plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) +abline(0,1) +legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +dev.off() +} \ No newline at end of file diff --git a/tests/debiased_lasso/comparison_unscaled.R b/tests/debiased_lasso/comparison_unscaled.R new file mode 100644 index 00000000..3bb408e3 --- /dev/null +++ b/tests/debiased_lasso/comparison_unscaled.R @@ -0,0 +1,78 @@ +source('javanmard_montanari.R') + +############################################## + +# Runs nsims simulations under the global null, computing p-values +# using both the old code (slow one using Adel's code) and the new +# code (faster using Jon's code), and produces qq-plots for both. +# Runing 50 sims takes about 10-15 mins because old code is slow, so +# feel free to lower nsims if you want + + +library(selectiveInference) +library(glmnet) + +# set.seed(424) + +n=100 +p=200 + +sigma=.5 + +theor_lambda = sigma * sqrt(2 * log(p)) +lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) + +for (j in c(3,4,5,1,2)) { + +thresh = 1e-10 + +beta=rep(0,p) +type="full" + +nsim = 20 + +scaling = sqrt(n) +pvs_old = c() +pvs_new <- c() +pvs_old_0 = c() # don't add the offset correction +pvs_new_0 = c() # don't add the offset correction +for (i in 1:nsim) { + cat(i,fill=T) + x = matrix(rnorm(n*p),n,p) + x = scale(x,T,T) / scaling + mu = x%*%beta + y=mu+sigma*rnorm(n) + + # first run glmnet + gfit=glmnet(x,y,intercept=F,standardize=F,thresh=thresh) + + bhat = coef(gfit, s=lambda[j]/(sqrt(n) * scaling), exact=TRUE,x=x,y=y)[-1] + + if(sum(bhat != 0) > 0) { + + # compute fixed lambda p-values and selection intervals + + aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) + cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) + pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) + pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) + pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) + pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) + + cat() + } +} + +#check uniformity + +png(paste('comparison_unscaled', j, '.png', sep='')) +plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') +plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) +plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) +plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) +abline(0,1) +legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +dev.off() +} \ No newline at end of file diff --git a/tests/debiased_lasso/javanmard_montanari.R b/tests/debiased_lasso/javanmard_montanari.R new file mode 100644 index 00000000..09f33558 --- /dev/null +++ b/tests/debiased_lasso/javanmard_montanari.R @@ -0,0 +1,770 @@ +# First part is only functions from the old code. At the bottom is +# the bit of code that actually compares the old vs new code + +###################################################### + +### Old code (using Adel's R code) + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(mu)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + mu.stop <- 0; + try.no <- 1; + incr <- 0; + while ((mu.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + mu.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + mu <- mu*resol; + } else { + incr <- 0; + mu <- mu/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + mu <- mu*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + mu.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + mu <- mu/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + mu <- mu*resol; + beta <- last.beta; + mu.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + mu0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-mu0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Functions borrowed from selective Inference (only fixedLassoInf and fixedLasso.poly are modified) + +# Special linear time order function, works only when x +# is a scrambled vector of integers. + +Order <- function(x) { + n = length(x) + o = numeric(n) + o[x] = Seq(1,n) + return(o) +} + +# Returns a sequence of integers from a to b if a <= b, +# otherwise nothing. You have no idea how important this +# function is... + +Seq <- function(a, b, ...) { + if (a<=b) return(seq(a,b,...)) + else return(numeric(0)) +} + +# Returns the sign of x, with Sign(0)=1. + +Sign <- function(x) { + return(-1+2*(x>=0)) +} + +############################## + +# Centering and scaling convenience function + +standardize <- function(x, y, intercept, normalize) { + x = as.matrix(x) + y = as.numeric(y) + n = nrow(x) + p = ncol(x) + + if (intercept) { + bx = colMeans(x) + by = mean(y) + x = scale(x,bx,FALSE) + y = y-mean(y) + } else { + bx = rep(0,p) + by = 0 + } + if (normalize) { + sx = sqrt(colSums(x^2)) + x = scale(x,FALSE,sx) + } else { + sx = rep(1,p) + } + + return(list(x=x,y=y,bx=bx,by=by,sx=sx)) +} + +############################## + +# Interpolation function to get coefficients + +coef.interpolate <- function(betas, s, knots, dec=TRUE) { + # Sort the s values + o = order(s,dec=dec) + s = s[o] + + k = length(s) + mat = matrix(rep(knots,each=k),nrow=k) + if (dec) b = s >= mat + else b = s <= mat + blo = max.col(b,ties.method="first") + bhi = pmax(blo-1,1) + + i = bhi==blo + p = numeric(k) + p[i] = 0 + p[!i] = ((s-knots[blo])/(knots[bhi]-knots[blo]))[!i] + + beta = t((1-p)*t(betas[,blo,drop=FALSE]) + p*t(betas[,bhi,drop=FALSE])) + colnames(beta) = as.character(round(s,3)) + rownames(beta) = NULL + + # Return in original order + o = order(o) + return(beta[,o,drop=FALSE]) +} + +############################## + +checkargs.xy <- function(x, y) { + if (missing(x)) stop("x is missing") + if (is.null(x) || !is.matrix(x)) stop("x must be a matrix") + if (missing(y)) stop("y is missing") + if (is.null(y) || !is.numeric(y)) stop("y must be numeric") + if (ncol(x) == 0) stop("There must be at least one predictor [must have ncol(x) > 0]") + if (checkcols(x)) stop("x cannot have duplicate columns") + if (length(y) == 0) stop("There must be at least one data point [must have length(y) > 0]") + if (length(y)!=nrow(x)) stop("Dimensions don't match [length(y) != nrow(x)]") +} + +checkargs.misc <- function(sigma=NULL, alpha=NULL, k=NULL, + gridrange=NULL, gridpts=NULL, griddepth=NULL, + mult=NULL, ntimes=NULL, + beta=NULL, lambda=NULL, tol.beta=NULL, tol.kkt=NULL, + bh.q=NULL) { + + if (!is.null(sigma) && sigma <= 0) stop("sigma must be > 0") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >= 0") + if (!is.null(alpha) && (alpha <= 0 || alpha >= 1)) stop("alpha must be between 0 and 1") + if (!is.null(k) && length(k) != 1) stop("k must be a single number") + if (!is.null(k) && (k < 1 || k != floor(k))) stop("k must be an integer >= 1") + if (!is.null(gridrange) && (length(gridrange) != 2 || gridrange[1] > gridrange[2])) + stop("gridrange must be an interval of the form c(a,b) with a <= b") + if (!is.null(gridpts) && (gridpts < 20 || gridpts != round(gridpts))) + stop("gridpts must be an integer >= 20") + if (!is.null(griddepth) && (griddepth > 10 || griddepth != round(griddepth))) + stop("griddepth must be an integer <= 10") + if (!is.null(mult) && mult < 0) stop("mult must be >= 0") + if (!is.null(ntimes) && (ntimes <= 0 || ntimes != round(ntimes))) + stop("ntimes must be an integer > 0") + if (!is.null(beta) && sum(beta!=0)==0) stop("Value of lambda too large, beta is zero") + # if (!is.null(lambda) && length(lambda) != 1) stop("lambda must be a single number") + if (!is.null(lambda) && length(lambda) != 1 && length(lambda) != length(beta)) stop("lambda must be a single number or equal to the length of beta") + if (!is.null(lambda) && lambda < 0) stop("lambda must be >=0") + if (!is.null(tol.beta) && tol.beta <= 0) stop("tol.beta must be > 0") + if (!is.null(tol.kkt) && tol.kkt <= 0) stop("tol.kkt must be > 0") +} + +# Make sure that no two columms of A are the same +# (this works with probability one). + +checkcols <- function(A) { + b = rnorm(nrow(A)) + a = sort(t(A)%*%b) + if (any(diff(a)==0)) return(TRUE) + return(FALSE) +} + +estimateSigma <- function(x, y, intercept=TRUE, standardize=TRUE) { + checkargs.xy(x,rep(0,nrow(x))) + if(nrow(x)<10) stop("Number of observations must be at least 10 to run estimateSigma") + cvfit=cv.glmnet(x,y,intercept=intercept,standardize=standardize) + lamhat=cvfit$lambda.min + fit=glmnet(x,y,standardize=standardize) + yhat=predict(fit,x,s=lamhat) + nz=sum(predict(fit,s=lamhat, type="coef")!=0) + sigma=sqrt(sum((y-yhat)^2)/(length(y)-nz-1)) + return(list(sigmahat=sigma, df=nz)) +} + +# Update the QR factorization, after a column has been +# added. Here Q1 is m x n, Q2 is m x k, and R is n x n. + +updateQR <- function(Q1,Q2,R,col) { + m = nrow(Q1) + n = ncol(Q1) + k = ncol(Q2) + + a = .C("update1", + Q2=as.double(Q2), + w=as.double(t(Q2)%*%col), + m=as.integer(m), + k=as.integer(k), + dup=FALSE, + package="selectiveInference") + + Q2 = matrix(a$Q2,nrow=m) + w = c(t(Q1)%*%col,a$w) + + # Re-structure: delete a column from Q2, add one to + # Q1, and expand R + Q1 = cbind(Q1,Q2[,1]) + Q2 = Q2[,-1,drop=FALSE] + R = rbind(R,rep(0,n)) + R = cbind(R,w[Seq(1,n+1)]) + + return(list(Q1=Q1,Q2=Q2,R=R)) +} + +# Moore-Penrose pseudo inverse for symmetric matrices + +pinv <- function(A, tol=.Machine$double.eps) { + e = eigen(A) + v = Re(e$vec) + d = Re(e$val) + d[d > tol] = 1/d[d > tol] + d[d < tol] = 0 + if (length(d)==1) return(v*d*v) + else return(v %*% diag(d) %*% t(v)) +} + +############################## + +# Assuming that grid is in sorted order from smallest to largest, +# and vals are monotonically increasing function values over the +# grid, returns the grid end points such that the corresponding +# vals are approximately equal to {val1, val2} + +grid.search <- function(grid, fun, val1, val2, gridpts=100, griddepth=2) { + n = length(grid) + vals = fun(grid) + + ii = which(vals >= val1) + jj = which(vals <= val2) + if (length(ii)==0) return(c(grid[n],Inf)) # All vals < val1 + if (length(jj)==0) return(c(-Inf,grid[1])) # All vals > val2 + # RJT: the above logic is correct ... but for simplicity, instead, + # we could just return c(-Inf,Inf) + + i1 = min(ii); i2 = max(jj) + if (i1==1) lo = -Inf + else lo = grid.bsearch(grid[i1-1],grid[i1],fun,val1,gridpts, + griddepth-1,below=TRUE) + if (i2==n) hi = Inf + else hi = grid.bsearch(grid[i2],grid[i2+1],fun,val2,gridpts, + griddepth-1,below=FALSE) + return(c(lo,hi)) +} + +# Repeated bin search to find the point x in the interval [left, right] +# that satisfies f(x) approx equal to val. If below=TRUE, then we seek +# x such that the above holds and f(x) <= val; else we seek f(x) >= val. + +grid.bsearch <- function(left, right, fun, val, gridpts=100, griddepth=1, below=TRUE) { + n = gridpts + depth = 1 + + while (depth <= griddepth) { + grid = seq(left,right,length=n) + vals = fun(grid) + + if (below) { + ii = which(vals >= val) + if (length(ii)==0) return(grid[n]) # All vals < val (shouldn't happen) + if ((i0=min(ii))==1) return(grid[1]) # All vals > val (shouldn't happen) + left = grid[i0-1] + right = grid[i0] + } + + else { + ii = which(vals <= val) + if (length(ii)==0) return(grid[1]) # All vals > val (shouldn't happen) + if ((i0=max(ii))==n) return(grid[n]) # All vals < val (shouldn't happen) + left = grid[i0] + right = grid[i0+1] + } + + depth = depth+1 + } + + return(ifelse(below, left, right)) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector + +tnorm.surv <- function(z, mean, sd, a, b, bits=NULL) { + z = max(min(z,b),a) + + # Check silly boundary cases + p = numeric(length(mean)) + p[mean==-Inf] = 0 + p[mean==Inf] = 1 + + # Try the multi precision floating point calculation first + o = is.finite(mean) + mm = mean[o] + pp = mpfr.tnorm.surv(z,mm,sd,a,b,bits) + + # If there are any NAs, then settle for an approximation + oo = is.na(pp) + if (any(oo)) pp[oo] = bryc.tnorm.surv(z,mm[oo],sd,a,b) + + p[o] = pp + return(p) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean cane be a vector, using +# multi precision floating point calculations thanks to the Rmpfr package + +mpfr.tnorm.surv <- function(z, mean=0, sd=1, a, b, bits=NULL) { + # If bits is not NULL, then we are supposed to be using Rmpf + # (note that this was fail if Rmpfr is not installed; but + # by the time this function is being executed, this should + # have been properly checked at a higher level; and if Rmpfr + # is not installed, bits would have been previously set to NULL) + if (!is.null(bits)) { + z = Rmpfr::mpfr((z-mean)/sd, precBits=bits) + a = Rmpfr::mpfr((a-mean)/sd, precBits=bits) + b = Rmpfr::mpfr((b-mean)/sd, precBits=bits) + return(as.numeric((Rmpfr::pnorm(b)-Rmpfr::pnorm(z))/ + (Rmpfr::pnorm(b)-Rmpfr::pnorm(a)))) + } + + # Else, just use standard floating point calculations + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + return((pnorm(b)-pnorm(z))/(pnorm(b)-pnorm(a))) +} + +# Returns Prob(Z>z | Z in [a,b]), where mean can be a vector, based on +# A UNIFORM APPROXIMATION TO THE RIGHT NORMAL TAIL INTEGRAL, W Bryc +# Applied Mathematics and Computation +# Volume 127, Issues 23, 15 April 2002, Pages 365--374 +# https://math.uc.edu/~brycw/preprint/z-tail/z-tail.pdf + +bryc.tnorm.surv <- function(z, mean=0, sd=1, a, b) { + z = (z-mean)/sd + a = (a-mean)/sd + b = (b-mean)/sd + n = length(mean) + + term1 = exp(z*z) + o = a > -Inf + term1[o] = ff(a[o])*exp(-(a[o]^2-z[o]^2)/2) + term2 = rep(0,n) + oo = b < Inf + term2[oo] = ff(b[oo])*exp(-(b[oo]^2-z[oo]^2)/2) + p = (ff(z)-term2)/(term1-term2) + + # Sometimes the approximation can give wacky p-values, + # outside of [0,1] .. + #p[p<0 | p>1] = NA + p = pmin(1,pmax(0,p)) + return(p) +} + +ff <- function(z) { + return((z^2+5.575192695*z+12.7743632)/ + (z^3*sqrt(2*pi)+14.38718147*z*z+31.53531977*z+2*12.77436324)) +} + +############## MODIFIED FUNCTIONS ############### + +# Lasso inference function (for fixed lambda). Note: here we are providing inference +# for the solution of +# min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 + +oldFixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, + sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE, offset_correction=TRUE) { + + family = match.arg(family) + this.call = match.call() + type = match.arg(type) + + if(family=="binomial") { + if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + else if(family=="cox") { + if(type!="partial") stop("Only type= partial allowed with Cox family") + out=fixedCoxLassoInf(x,y,status,beta,lambda,alpha=alpha, type="partial",tol.beta=tol.beta, + tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) + return(out) + } + + else{ + + + + checkargs.xy(x,y) + if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") + if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (type == "full") { + if (p > n) { + # need intercept (if there is one) for debiased lasso + hbeta = beta + if (intercept == T) { + if (length(beta) != p + 1) { + stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1") + } + # remove intercept if included + beta = beta[-1] + } else if (length(beta) != p) { + stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)") + } + } + } else if (length(beta) != p) { + stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)") + } + + checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, + gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) + if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { + warning("Package Rmpfr is not installed, reverting to standard precision") + bits = NULL + } + + # If glmnet was run with an intercept term, center x and y + if (intercept==TRUE) { + obj = standardize(x,y,TRUE,FALSE) + x = obj$x + y = obj$y + } + + # Check the KKT conditions + g = t(x)%*%(y-x%*%beta) / lambda + if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances)")) + + tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) + # print(tol.coef) + vars = which(abs(beta) > tol.coef) + # print(beta) + # print(vars) + if(length(vars)==0){ + cat("Empty model",fill=T) + return() + } + if (any(sign(g[vars]) != sign(beta[vars]))) + warning(paste("Solution beta does not satisfy the KKT conditions", + "(to within specified tolerances). You might try rerunning", + "glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Get lasso polyhedral region, of form Gy >= u + if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) + else out = fixedLasso.poly(x,y,beta,lambda,vars) + G = out$G + u = out$u + + # Check polyhedral region + tol.poly = 0.01 + if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + stop(paste("Polyhedral constraints not satisfied; you must recompute beta", + "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", + "and check whether the specified value of lambda is too small", + "(beyond the grid of values visited by glmnet).", + "You might also try rerunning glmnet with a lower setting of the", + "'thresh' parameter, for a more accurate convergence.")) + + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + k = length(vars) + pv = vlo = vup = numeric(k) + vmat = matrix(0,k,n) + ci = tailarea = matrix(0,k,2) + sign = numeric(k) + + if (type=="full" & p > n) { + if (intercept == T) { + pp=p+1 + Xint <- cbind(rep(1,n),x) + # indices of selected predictors + S = c(1,vars + 1) + notS = which(abs(beta) <= tol.coef) + 1 + } else { + pp=p + Xint <- x + # indices of selected predictors + S = vars + notS = which(abs(beta) <= tol.coef) + } + + + XS = Xint[,S] + hbetaS = hbeta[S] + + # Reorder so that active set S is first + Xordered = Xint[,c(S,notS,recursive=T)] + + hsigma <- 1/n*(t(Xordered)%*%Xordered) + hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] + hsigmaSinv <- pinv(hsigmaS) # solve(hsigmaS) + + # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R + htheta <- InverseLinfty(hsigma, n, verbose=FALSE) + + # 0-padding matrix + FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) + ithetasigma = (diag(pp)-(htheta%*%hsigma)) + + M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) + # vector which is offset for testing debiased beta's + meanoffset <- -(((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) + if (intercept == T) { + M = M[-1,] # remove intercept row + meanoffset = meanoffset[-1] # remove intercept element + } + if (offset_correction == FALSE) { + meanoffset = 0 * meanoffset + } + } else if (type=="partial" || p > n) { + xa = x[,vars,drop=F] + M = pinv(crossprod(xa)) %*% t(xa) + meanoffset = rep(0,k) + } else { + M = pinv(crossprod(x)) %*% t(x) + M = M[vars,,drop=F] + meanoffset = rep(0,k) + } + + for (j in 1:k) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + vj = M[j,] + mj = sqrt(sum(vj^2)) + vj = vj / mj # Standardize (divide by norm of vj) + sign[j] = sign(sum(vj*y)) + vj = sign[j] * vj + + a = poly.pval(y,G,u,vj,offset=meanoffset[j],sigma,bits) + pv[j] = a$pv + vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) + vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) + + a = poly.int(y,G,u,vj,offset=meanoffset[j],sigma,alpha,gridrange=gridrange, + flip=(sign[j]==-1),bits=bits) + ci[j,] = a$int * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sigma*sqrt(rowSums(vmat^2)), + coef0=vmat%*%y, + call=this.call,M=M) + class(out) = "fixedLassoInf" + return(out) + } +} + + +fixedLasso.poly= + function(x, y, beta, lambda, a, inactive = FALSE) { + xa = x[,a,drop=F] + xac = x[,!a,drop=F] + xai = pinv(crossprod(xa)) + xap = xai %*% t(xa) + za = sign(beta[a]) + if (length(za)>1) dz = diag(za) + if (length(za)==1) dz = matrix(za,1,1) + + if (inactive) { + P = diag(1,nrow(xa)) - xa %*% xap + + G = -rbind( + 1/lambda * t(xac) %*% P, + -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + 1 - t(xac) %*% t(xap) %*% za, + 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } else { + G = -rbind( + # 1/lambda * t(xac) %*% P, + # -1/lambda * t(xac) %*% P, + -dz %*% xap + ) + lambda2=lambda + if(length(lambda)>1) lambda2=lambda[a] + u = -c( + # 1 - t(xac) %*% t(xap) %*% za, + # 1 + t(xac) %*% t(xap) %*% za, + -lambda2 * dz %*% xai %*% za) + } + + return(list(G=G,u=u)) + } + + +# Main p-value function + +poly.pval <- function(y, G, u, v, sigma, offset=0, bits=NULL) { + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + pv = tnorm.surv(z,0-offset,sd,vlo,vup,bits) + return(list(pv=pv,vlo=vlo,vup=vup)) +} + +# Main confidence interval function + +poly.int <- function(y, G, u, v, sigma, alpha, offset=0, gridrange=c(-100,100), + gridpts=100, griddepth=2, flip=FALSE, bits=NULL) { + + z = sum(v*y) + vv = sum(v^2) + sd = sigma*sqrt(vv) + + rho = G %*% v / vv + vec = (u - G %*% y + rho*z) / rho + vlo = suppressWarnings(max(vec[rho>0])) + vup = suppressWarnings(min(vec[rho<0])) + + xg = seq(gridrange[1]*sd,gridrange[2]*sd,length=gridpts) + fun = function(x) { tnorm.surv(z,x-offset,sd,vlo,vup,bits) } + + int = grid.search(xg,fun,alpha/2,1-alpha/2,gridpts,griddepth) + tailarea = c(fun(int[1]),1-fun(int[2])) + + if (flip) { + int = -int[2:1] + tailarea = tailarea[2:1] + } + + return(list(int=int,tailarea=tailarea)) +} diff --git a/tests/test_debiasing.R b/tests/debiased_lasso/test_debiasing.R similarity index 100% rename from tests/test_debiasing.R rename to tests/debiased_lasso/test_debiasing.R diff --git a/tests/debiased_lasso/test_debiasing_wide.R b/tests/debiased_lasso/test_debiasing_wide.R new file mode 100644 index 00000000..62801da9 --- /dev/null +++ b/tests/debiased_lasso/test_debiasing_wide.R @@ -0,0 +1,202 @@ +library(selectiveInference) + + +## Approximates inverse covariance matrix theta +InverseLinfty <- function(sigma, n, resol=1.5, bound=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { + isgiven <- 1; + if (is.null(bound)){ + isgiven <- 0; + } + + p <- nrow(sigma); + M <- matrix(0, p, p); + xperc = 0; + xp = round(p/10); + for (i in 1:p) { + if ((i %% xp)==0){ + xperc = xperc+10; + if (verbose) { + print(paste(xperc,"% done",sep="")); } + } + if (isgiven==0){ + bound <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); + } + bound.stop <- 0; + try.no <- 1; + incr <- 0; + while ((bound.stop != 1)&&(try.no<10)){ + last.beta <- beta + output <- InverseLinftyOneRow(sigma, i, bound, maxiter=maxiter, threshold=threshold) + beta <- output$optsol + iter <- output$iter + if (isgiven==1){ + bound.stop <- 1 + } + else{ + if (try.no==1){ + if (iter == (maxiter+1)){ + incr <- 1; + bound <- bound*resol; + } else { + incr <- 0; + bound <- bound/resol; + } + } + if (try.no > 1){ + if ((incr == 1)&&(iter == (maxiter+1))){ + bound <- bound*resol; + } + if ((incr == 1)&&(iter < (maxiter+1))){ + bound.stop <- 1; + } + if ((incr == 0)&&(iter < (maxiter+1))){ + bound <- bound/resol; + } + if ((incr == 0)&&(iter == (maxiter+1))){ + bound <- bound*resol; + beta <- last.beta; + bound.stop <- 1; + } + } + } + try.no <- try.no+1 + } + M[i,] <- beta; + } + return(M) +} + +InverseLinftyOneRow <- function ( sigma, i, bound, maxiter=50, threshold=1e-10) { + p <- nrow(sigma); + rho <- max(abs(sigma[i,-i])) / sigma[i,i]; + bound0 <- rho/(1+rho); + beta <- rep(0,p); + + #if (bound >= bound0){ + # beta[i] <- (1-bound0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} + + diff.norm2 <- 1; + last.norm2 <- 1; + iter <- 1; + iter.old <- 1; + beta[i] <- (1-bound0)/sigma[i,i]; + beta.old <- beta; + sigma.tilde <- sigma; + diag(sigma.tilde) <- 0; + vs <- -sigma.tilde%*%beta; + + while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ + + for (j in 1:p){ + oldval <- beta[j]; + v <- vs[j]; + if (j==i) + v <- v+1; + beta[j] <- SoftThreshold(v,bound)/sigma[j,j]; + if (oldval != beta[j]){ + vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; + } + } + + iter <- iter + 1; + if (iter==2*iter.old){ + d <- beta - beta.old; + diff.norm2 <- sqrt(sum(d*d)); + last.norm2 <-sqrt(sum(beta*beta)); + iter.old <- iter; + beta.old <- beta; + #if (iter>10) + # vs <- -sigma.tilde%*%beta; + } + + # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, bound)) + + } + + returnlist <- list("optsol" = beta, "iter" = iter) + return(returnlist) +} + +SoftThreshold <- function( x, lambda ) { + # + # Standard soft thresholding + # + if (x>lambda){ + return (x-lambda);} + else { + if (x< (-lambda)){ + return (x+lambda);} + else { + return (0); } + } +} + + +### Test + +n = 100; p = 250 + +X = matrix(rnorm(n * p), n, p) +S = t(X) %*% X / n + +debiasing_bound = 0.2 + +tol = 1.e-12 + +rows = as.integer(c(1:2)) + +A1 = debiasingMatrix(S, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +A2 = debiasingMatrix(S / n, FALSE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B1 = debiasingMatrix(X, TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) +B2 = debiasingMatrix(X / sqrt(n), TRUE, n, rows, bound=debiasing_bound, max_iter=1000, kkt_tol=tol, objective_tol=tol, parameter_tol=tol, linesearch=FALSE) + +C1 = InverseLinfty(S, n, bound=debiasing_bound, maxiter=1000)[rows,] +C2 = InverseLinfty(S / n, n, bound=debiasing_bound, maxiter=1000)[rows,] + +par(mfrow=c(2,3)) + +plot(A1[1,], C1[1,]) +plot(A1[1,], B1[1,]) +plot(B1[1,], C1[1,]) + +plot(A1[1,], A2[1,]) +plot(B1[1,], B2[1,]) +plot(C1[1,], C2[1,]) + +print(c('A', sum(A1[1,] == 0))) +print(c('B', sum(B1[1,] == 0))) +print(c('C', sum(C1[1,] == 0))) + +## Are our points feasible + +feasibility = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + return(c(max(abs(G)), debiasing_bound)) +} + +print(c('feasibility A', feasibility(S, A1[1,], 1, debiasing_bound))) +print(c('feasibility B', feasibility(S, B1[1,], 1, debiasing_bound))) +print(c('feasibility C', feasibility(S, C1[1,], 1, debiasing_bound))) + +active_KKT = function(S, soln, j, debiasing_bound) { + p = nrow(S) + E = rep(0, p) + E[j] = 1 + G = S %*% soln - E + print(which(soln != 0)) + print(G[j]) + return(c(G[soln != 0] * sign(soln)[soln != 0], debiasing_bound)) +} + +print(c('active_KKT A', active_KKT(S, A1[1,], 1, debiasing_bound))) +print(c('active_KKT B', active_KKT(S, B1[1,], 1, debiasing_bound))) +print(c('active_KKT C', active_KKT(S, C1[1,], 1, debiasing_bound))) + + +print(summary(lm(A1[1,] ~ C1[1,]))) \ No newline at end of file From e01d8f19052271141cfb10ffa4e9c52ea98a5e0b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 27 Oct 2017 10:36:58 -0700 Subject: [PATCH 336/396] removing the offset_correction which was just for the comparison --- selectiveInference/R/funs.fixed.R | 5 +---- tests/debiased_lasso/comparison_scaled.R | 13 +++---------- tests/debiased_lasso/comparison_unscaled.R | 13 +++---------- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 7c8318d9..d5e3d648 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -8,7 +8,7 @@ fixedLassoInf <- function(x, y, beta, sigma=NULL, alpha=0.1, type=c("partial", "full"), tol.beta=1e-5, tol.kkt=0.1, gridrange=c(-100,100), bits=NULL, verbose=FALSE, - linesearch.try=10, offset_correction=TRUE) { + linesearch.try=10) { family = match.arg(family) this.call = match.call() @@ -197,9 +197,6 @@ fixedLassoInf <- function(x, y, beta, M = M[-1,] # remove intercept row null_value = null_value[-1] # remove intercept element } - if (!offset_correction) { - null_value = 0 * null_value - } } else if (type=="partial" || p > n) { xa = x[,vars,drop=F] M = pinv(crossprod(xa)) %*% t(xa) diff --git a/tests/debiased_lasso/comparison_scaled.R b/tests/debiased_lasso/comparison_scaled.R index e0c0a6a2..e296bd69 100644 --- a/tests/debiased_lasso/comparison_scaled.R +++ b/tests/debiased_lasso/comparison_scaled.R @@ -19,10 +19,9 @@ p=200 sigma=.5 -theor_lambda = sigma * sqrt(2 * log(p)) -lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) +lambda=c(0.25, 0.5, 1) -for (j in c(3,4,5,1,2)) { +for (j in c(3,2,1)) { thresh = 1e-10 @@ -54,12 +53,8 @@ for (i in 1:nsim) { aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) - cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) - dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) - pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) - pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) cat() } @@ -70,9 +65,7 @@ for (i in 1:nsim) { png(paste('comparison_scaled', j, '.png', sep='')) plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) -plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) -plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) abline(0,1) -legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +legend("bottomright", legend=c("Old", "New"), pch=c(23,24), pt.bg=c("green","purple")) dev.off() } \ No newline at end of file diff --git a/tests/debiased_lasso/comparison_unscaled.R b/tests/debiased_lasso/comparison_unscaled.R index 3bb408e3..eebda685 100644 --- a/tests/debiased_lasso/comparison_unscaled.R +++ b/tests/debiased_lasso/comparison_unscaled.R @@ -19,10 +19,9 @@ p=200 sigma=.5 -theor_lambda = sigma * sqrt(2 * log(p)) -lambda=c(0.25, 0.5, 1, 0.8 * theor_lambda, theor_lambda) +lambda=c(0.25, 0.5, 1) -for (j in c(3,4,5,1,2)) { +for (j in c(3,2,1)) { thresh = 1e-10 @@ -54,12 +53,8 @@ for (i in 1:nsim) { aa = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) bb = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type) - cc = fixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) - dd = oldFixedLassoInf(x,y,bhat,lambda[j]*sqrt(n) / scaling,intercept=F,sigma=sigma,type=type, offset_correction=FALSE) pvs_new <- c(pvs_new, aa$pv, recursive=TRUE) pvs_old <- c(pvs_old, bb$pv,recursive=TRUE) - pvs_new_0 <- c(pvs_new_0, cc$pv, recursive=TRUE) - pvs_old_0 <- c(pvs_old_0, dd$pv, recursive=TRUE) cat() } @@ -70,9 +65,7 @@ for (i in 1:nsim) { png(paste('comparison_unscaled', j, '.png', sep='')) plot(ecdf(pvs_old), pch=23, col='green', xlim=c(0,1), ylim=c(0,1), main='ECDF of p-values') plot(ecdf(pvs_new), pch=24, col='purple', add=TRUE) -plot(ecdf(pvs_old_0), pch=23, col='red', add=TRUE) -plot(ecdf(pvs_new_0), pch=24, col='black', add=TRUE) abline(0,1) -legend("bottomright", legend=c("Old","New", "Old 0", "New 0"), pch=c(23,24,23,24), pt.bg=c("green","purple","red","black")) +legend("bottomright", legend=c("Old", "New"), pch=c(23,24), pt.bg=c("green","purple")) dev.off() } \ No newline at end of file From eedf0fde5bf16648e5dde7f3703f5eacf59928e8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 28 Oct 2017 11:13:25 -0700 Subject: [PATCH 337/396] created the matrices for the affine transform, wrapper for calling Gaussian density --- selectiveInference/R/funs.fixed.R | 3 +- selectiveInference/R/funs.randomized.R | 34 ++++ selectiveInference/src/Rcpp-randomized.cpp | 40 +++++ selectiveInference/src/randomized_lasso.c | 2 + selectiveInference/src/randomized_lasso.c~ | 188 +++++++++++++++++++++ selectiveInference/src/randomized_lasso.h | 2 +- selectiveInference/src/randomized_lasso.h~ | 42 +++++ 7 files changed, 308 insertions(+), 3 deletions(-) create mode 100644 selectiveInference/src/Rcpp-randomized.cpp create mode 100644 selectiveInference/src/randomized_lasso.c~ create mode 100644 selectiveInference/src/randomized_lasso.h~ diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index d5e3d648..13d8b5e6 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -435,8 +435,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep last_output = NULL if (is_wide) { - n = nrow(Xinfo) - Xsoln = as.numeric(rep(0, n)) + Xsoln = as.numeric(rep(0, nrow(Xinfo))) } while (counter_idx < max_try) { diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 3a171f79..1da73b2d 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -37,6 +37,7 @@ randomizedLASSO = function(X, if (length(lam) == 1) { lam = rep(lam, p) } + if (length(lam) != p) { stop("Lagrange parameter should be single float or of length ncol(X)") } @@ -65,5 +66,38 @@ randomizedLASSO = function(X, objective_stop, # objective_stop kkt_stop, # kkt_stop param_stop) # param_stop + + + sign_soln = sign(result$soln) + + unpenalized = lam == 0 + active = !unpenalized * (sign_soln != 0) + inactive = !unpenzlied * (sign_soln == 0) + + unpenalized_set = which(unpenalized) + active_set = which(active) + inactive_set = which(inactive) + + coef_term = t(X) %*% X[,c(unpenalized_set, # the coefficients + active_set)] + coef_term = coef_term %*% diag(c(rep(1, sum(unpenalized)), sign_soln[active])) # coefficients are non-negative + coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term + + subgrad_term = cbind(matrix(0, sum(inactive), sum(active) + sum(unpenalized)), + diag(rep(1, sum(inactive)))) + linear_term = rbind(coef_term, + subgrad_term) + + offset_term = rep(0, p) + offset_term[active] = lam[active] * sign_soln[active] + + + + list(active_set = active_set, + inactive_set = inactive_set, + unpenalized_set = unpenalized_set, + sign_soln = sign_soln) + + return(result) } diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp new file mode 100644 index 00000000..7887f327 --- /dev/null +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -0,0 +1,40 @@ +#include // need to include the main Rcpp header file +#include // where densities are defined + +// [[Rcpp::export]] +Rcpp::NumericVector log_density_gaussian_(double noise_scale, // Scale of randomization + Rcpp::NumericMatrix internal_linear, // A_D -- linear part for data + Rcpp::NumericMatrix internal_state, // D -- data state -- matrix of shape (nopt, npts) + Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables + Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) + Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + + int npt = internal_state.ncol(); // Function is vectorized + if (optimization_state.ncol() != npt) { // Assuming each column is an internal or opt state because arrays are column major + Rcpp::stop("Number of optimization samples should equal the number of (internally represented) data."); + } + + int ndim = optimization_linear.nrow(); + if (internal_linear.nrow() != ndim) { + Rcpp::stop("Dimension of optimization range should be the same as the dimension of the data range."); + } + int ninternal = internal_linear.ncol(); + int noptimization = optimization_linear.ncol(); + + Rcpp::NumericVector result(npt); + + int ipt; + for (ipt=0; ipt // for fabs + +// Augmented density for randomized LASSO after +// Gaussian randomization + +// Described in https://arxiv.org/abs/1609.05609 + +// Gaussian is product of IID N(0, noise_scale^2) density +// Evaluated at A_D D + A_O O + h + +// Laplace is product of IID Laplace with scale noise_scale +// Also evaluated at A_D D + A_O O + h + +double log_density_gaussian(double noise_scale, // Scale of randomization + int ndim, // Number of features -- "p" + int ninternal, // Dimension of internal data representation often 1 + int noptimization, // Dimension of optimization variables -- "p" + double *internal_linear, // A_D -- linear part for data + double *internal_state, // D -- data state + double *optimization_linear, // A_O -- linear part for optimization variables + double *optimization_state, // O -- optimization state + double *offset) // h -- offset in affine transform -- "p" dimensional +{ + int irow, icol; + double denom = 2 * noise_scale * noise_scale; + double value = 0; + double reconstruction = 0; + double *offset_ptr; + double *internal_linear_ptr; + double *internal_state_ptr; + double *optimization_linear_ptr; + double *optimization_state_ptr; + + for (irow=0; irow Date: Sat, 28 Oct 2017 11:13:48 -0700 Subject: [PATCH 338/396] unwanted files --- selectiveInference/src/randomized_lasso.c~ | 188 --------------------- selectiveInference/src/randomized_lasso.h~ | 42 ----- 2 files changed, 230 deletions(-) delete mode 100644 selectiveInference/src/randomized_lasso.c~ delete mode 100644 selectiveInference/src/randomized_lasso.h~ diff --git a/selectiveInference/src/randomized_lasso.c~ b/selectiveInference/src/randomized_lasso.c~ deleted file mode 100644 index 123c81de..00000000 --- a/selectiveInference/src/randomized_lasso.c~ +++ /dev/null @@ -1,188 +0,0 @@ -#include // for fabs - -// Augmented density for randomized LASSO after -// Gaussian randomization - -// Described in https://arxiv.org/abs/1609.05609 - -// Gaussian is product of IID N(0, noise_scale^2) density -// Evaluated at A_D D + A_O O + h - -// Laplace is product of IID Laplace with scale noise_scale -// Also evaluated at A_D D + A_O O + h - -double log_density_gaussian(double noise_scale, // Scale of randomization - int ndim, // Number of features -- "p" - int ninternal, // Dimension of internal data representation often 1 - int noptimization, // Dimension of optimization variables -- "p" - double *internal_linear, // A_D -- linear part for data - double *internal_state, // D -- data state - double *optimization_linear, // A_O -- linear part for optimization variables - double *optimization_state, // O -- optimization state - double *offset) // h -- offset in affine transform -- "p" dimensional -{ - int irow, icol; - double denom = 2 * noise_scale * noise_scale; - double value = 0; - double reconstruction = 0; - double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; - double *optimization_linear_ptr; - double *optimization_state_ptr; - - for (irow=0; irow Date: Sat, 28 Oct 2017 11:16:12 -0700 Subject: [PATCH 339/396] wrapper for conditional density --- selectiveInference/src/Rcpp-randomized.cpp | 25 ++++++++++++++++++++++ tests/{ => randomized}/test_randomized.R | 0 2 files changed, 25 insertions(+) rename tests/{ => randomized}/test_randomized.R (100%) diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp index 7887f327..23f07978 100644 --- a/selectiveInference/src/Rcpp-randomized.cpp +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -38,3 +38,28 @@ Rcpp::NumericVector log_density_gaussian_(double noise_scale, return(result); } + +// [[Rcpp::export]] +Rcpp::NumericVector log_density_gaussian_conditional_(double noise_scale, // Scale of randomization + Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables + Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) + Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + + int npt = optimization_state.ncol(); // Function is vectorized + int ndim = optimization_linear.nrow(); + int noptimization = optimization_linear.ncol(); + + Rcpp::NumericVector result(npt); + + int ipt; + for (ipt=0; ipt Date: Sat, 28 Oct 2017 11:49:40 -0700 Subject: [PATCH 340/396] forming internal affine transform --- selectiveInference/R/funs.randomized.R | 58 +++++++++++++++++++------- tests/randomized/test_randomized.R | 5 ++- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 1da73b2d..3b104239 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -71,33 +71,61 @@ randomizedLASSO = function(X, sign_soln = sign(result$soln) unpenalized = lam == 0 - active = !unpenalized * (sign_soln != 0) - inactive = !unpenzlied * (sign_soln == 0) + active = (!unpenalized) & (sign_soln != 0) + inactive = (!unpenalized) & (sign_soln == 0) unpenalized_set = which(unpenalized) active_set = which(active) inactive_set = which(inactive) - coef_term = t(X) %*% X[,c(unpenalized_set, # the coefficients - active_set)] + # affine transform for optimization variables + + E = c(unpenalized_set, active_set) + I = inactive_set + X_E = X[,E] + X_I = X[,I] + L_E = t(X) %*% X[,E] + + coef_term = L_E coef_term = coef_term %*% diag(c(rep(1, sum(unpenalized)), sign_soln[active])) # coefficients are non-negative coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term - subgrad_term = cbind(matrix(0, sum(inactive), sum(active) + sum(unpenalized)), - diag(rep(1, sum(inactive)))) - linear_term = rbind(coef_term, + subgrad_term = matrix(0, p, sum(inactive)) # for subgrad + for (i in 1:sum(inactive)) { + subgrad_term[inactive_set[i], i] = 1 + } + + linear_term = cbind(coef_term, subgrad_term) offset_term = rep(0, p) offset_term[active] = lam[active] * sign_soln[active] - + opt_transform = list(linear_term=linear_term, + offset_term=offset_term) + + # affine transform for internal (data) variables + # for now just use parametric in terms of + # (\bar{\beta}_E, X_{-E}^T(y-X_E\bar{\beta}_E) + # + # we have to reconstruct -X^TY from this pair + # + + active_term = -L_E # for \bar{\beta}_E + + inactive_term = -subgrad_term + linear_term = cbind(active_term, + inactive_term) + offset_term = rep(0, p) + internal_transform = list(linear_term = linear_term, + offset_term = offset_term) + + return(list(active_set = active_set, + inactive_set = inactive_set, + unpenalized_set = unpenalized_set, + sign_soln = sign_soln, + opt_transform = opt_transform, + internal_transform = internal_transform + )) - list(active_set = active_set, - inactive_set = inactive_set, - unpenalized_set = unpenalized_set, - sign_soln = sign_soln) - - - return(result) } diff --git a/tests/randomized/test_randomized.R b/tests/randomized/test_randomized.R index e4e35a12..305139bc 100644 --- a/tests/randomized/test_randomized.R +++ b/tests/randomized/test_randomized.R @@ -8,7 +8,8 @@ test = function() { lam = 20 / sqrt(n) noise_scale = 0.01 * sqrt(n) ridge_term = .1 / sqrt(n) - fit_randomized_lasso(X, y, lam, noise_scale, ridge_term) + selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) } -print(test()) +A=test() +#print(test()) From c175a8b192b56a1ea1ad9fabe688461f459f770f Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 28 Oct 2017 12:03:00 -0700 Subject: [PATCH 341/396] vectorized densities evaluate OK -- need to check results --- selectiveInference/R/funs.randomized.R | 2 +- selectiveInference/src/Rcpp-randomized.cpp | 4 ++-- tests/randomized/test_randomized.R | 27 +++++++++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 3b104239..92b871eb 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -124,7 +124,7 @@ randomizedLASSO = function(X, inactive_set = inactive_set, unpenalized_set = unpenalized_set, sign_soln = sign_soln, - opt_transform = opt_transform, + optimization_transform = opt_transform, internal_transform = internal_transform )) diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp index 23f07978..d6e85175 100644 --- a/selectiveInference/src/Rcpp-randomized.cpp +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -7,7 +7,7 @@ Rcpp::NumericVector log_density_gaussian_(double noise_scale, Rcpp::NumericMatrix internal_state, // D -- data state -- matrix of shape (nopt, npts) Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) - Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + Rcpp::NumericVector offset) { // h -- offset in affine transform -- "p" dimensional int npt = internal_state.ncol(); // Function is vectorized if (optimization_state.ncol() != npt) { // Assuming each column is an internal or opt state because arrays are column major @@ -43,7 +43,7 @@ Rcpp::NumericVector log_density_gaussian_(double noise_scale, Rcpp::NumericVector log_density_gaussian_conditional_(double noise_scale, // Scale of randomization Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) - Rcpp::NumericMatrix offset) { // h -- offset in affine transform -- "p" dimensional + Rcpp::NumericVector offset) { // h -- offset in affine transform -- "p" dimensional int npt = optimization_state.ncol(); // Function is vectorized int ndim = optimization_linear.nrow(); diff --git a/tests/randomized/test_randomized.R b/tests/randomized/test_randomized.R index 305139bc..87a30439 100644 --- a/tests/randomized/test_randomized.R +++ b/tests/randomized/test_randomized.R @@ -1,6 +1,6 @@ library(selectiveInference) -test = function() { +smoke_test = function() { n = 100; p = 50 X = matrix(rnorm(n * p), n, p) @@ -10,6 +10,27 @@ test = function() { ridge_term = .1 / sqrt(n) selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) } +A = smoke_test() -A=test() -#print(test()) +density_test = function() { + + random_lasso = smoke_test() + p = nrow(random_lasso$internal_transform$linear_term) + internal_state = matrix(rnorm(p * 20), p, 20) + optimization_state = matrix(rnorm(p * 20), p, 20) + offset = rnorm(p) + + selectiveInference:::log_density_gaussian_(10., + random_lasso$internal_transform$linear_term, + internal_state, + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) + + selectiveInference:::log_density_gaussian_conditional_(10., + random_lasso$optimization_transform$linear_term, + optimization_state, + offset) +} + +density_test() From c940537211e217b3dbeaa103699475e0ed67e7f1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sat, 28 Oct 2017 12:16:27 -0700 Subject: [PATCH 342/396] added laplace densities --- selectiveInference/src/Rcpp-randomized.cpp | 63 ++++++++++++++ tests/randomized/test_randomized.R | 95 +++++++++++++++++++--- 2 files changed, 145 insertions(+), 13 deletions(-) diff --git a/selectiveInference/src/Rcpp-randomized.cpp b/selectiveInference/src/Rcpp-randomized.cpp index d6e85175..b2b2cd23 100644 --- a/selectiveInference/src/Rcpp-randomized.cpp +++ b/selectiveInference/src/Rcpp-randomized.cpp @@ -63,3 +63,66 @@ Rcpp::NumericVector log_density_gaussian_conditional_(double noise_scale, return(result); } + +// [[Rcpp::export]] +Rcpp::NumericVector log_density_laplace_(double noise_scale, // Scale of randomization + Rcpp::NumericMatrix internal_linear, // A_D -- linear part for data + Rcpp::NumericMatrix internal_state, // D -- data state -- matrix of shape (nopt, npts) + Rcpp::NumericMatrix optimization_linear, // A_O -- linear part for optimization variables + Rcpp::NumericMatrix optimization_state, // O -- optimization state -- matrix of shape (ninternal, npts) + Rcpp::NumericVector offset) { // h -- offset in affine transform -- "p" dimensional + + int npt = internal_state.ncol(); // Function is vectorized + if (optimization_state.ncol() != npt) { // Assuming each column is an internal or opt state because arrays are column major + Rcpp::stop("Number of optimization samples should equal the number of (internally represented) data."); + } + + int ndim = optimization_linear.nrow(); + if (internal_linear.nrow() != ndim) { + Rcpp::stop("Dimension of optimization range should be the same as the dimension of the data range."); + } + int ninternal = internal_linear.ncol(); + int noptimization = optimization_linear.ncol(); + + Rcpp::NumericVector result(npt); + + int ipt; + for (ipt=0; ipt Date: Mon, 30 Oct 2017 22:05:21 -0700 Subject: [PATCH 343/396] BF: variable rename --- selectiveInference/R/funs.fixedCox.R | 2 +- selectiveInference/R/funs.fixedLogit.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixedCox.R b/selectiveInference/R/funs.fixedCox.R index 9f35c950..5cbe3301 100644 --- a/selectiveInference/R/funs.fixedCox.R +++ b/selectiveInference/R/funs.fixedCox.R @@ -73,7 +73,7 @@ b1= -(mydiag(sign_bhat)%*%MM)%*%sign_bhat*lambda vup[jj]=junk$vup sd[jj]=junk$sd - junk2=TG.interval(bbar, A1, b1, vj, MM, alpha, flip=(s2[jj]==-1)) + junk2=TG.interval(bbar, A1, b1, vj, MM, alpha, flip=(sign_bhat[jj]==-1)) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 75a31e0a..60b6451a 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -96,7 +96,7 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet vup[jj]=junk$vup sd[jj]=junk$sd - junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha, flip=(s2[jj+1]==-1)) + junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha, flip=(sign_bhat[jj+1]==-1)) ci[jj,]=junk2$int tailarea[jj,] = junk2$tailarea From 2917d6eb71f156c7c50e8cc1d05204ef4a7ee74e Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Oct 2017 22:22:47 -0700 Subject: [PATCH 344/396] BF: forgot to cd up directory --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index c324f88e..18769f7b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ before_install: - cd C-software - git submodule init - git submodule update + - cd .. - make src - make Rcpp - cd selectiveInference From 63e60df3b3a886fd3337707a6f09a4c1c52237be Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 30 Oct 2017 22:50:04 -0700 Subject: [PATCH 345/396] changing order of stop arguments --- selectiveInference/R/funs.fixed.R | 4 ++-- selectiveInference/src/Rcpp-debias.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 13d8b5e6..5df84d3c 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -453,8 +453,8 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, - objective_stop, kkt_stop, + objective_stop, parameter_stop) } else { result = solve_QP_wide(Xinfo, # this is a design matrix @@ -471,8 +471,8 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, - objective_stop, kkt_stop, + objective_stop, parameter_stop) } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 5c181848..112a6a80 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -17,8 +17,8 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double objective_tol, double parameter_tol, int max_active, - int objective_stop, int kkt_stop, + int objective_stop, int param_stop ) { @@ -55,8 +55,8 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, objective_tol, parameter_tol, max_active, - objective_stop, kkt_stop, + objective_stop, param_stop); // Check whether feasible @@ -96,8 +96,8 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, double objective_tol, double parameter_tol, int max_active, - int objective_stop, int kkt_stop, + int objective_stop, int param_stop ) { @@ -148,8 +148,8 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, objective_tol, parameter_tol, max_active, - objective_stop, kkt_stop, + objective_stop, param_stop); // Check whether feasible From f7744835149e57c4656740794e32e8c6670e28fe Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 31 Oct 2017 20:46:13 -0700 Subject: [PATCH 346/396] unused import --- selectiveInference/NAMESPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 7a56d450..0de5d3b1 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -45,4 +45,3 @@ importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("stats", "rbinom", "rexp") importFrom("Rcpp", "sourceCpp") -importFrom("distr", "Norm", "DExp") From da1b9b559f04a2a1f700f8209662a5dec8443587 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 31 Oct 2017 20:46:50 -0700 Subject: [PATCH 347/396] column major ordering in C library --- C-software | 2 +- selectiveInference/R/funs.fixed.R | 21 --- selectiveInference/src/Rcpp-debias.cpp | 6 +- selectiveInference/src/randomized_lasso.c | 188 ---------------------- selectiveInference/src/randomized_lasso.h | 42 ----- 5 files changed, 6 insertions(+), 253 deletions(-) delete mode 100644 selectiveInference/src/randomized_lasso.c delete mode 100644 selectiveInference/src/randomized_lasso.h diff --git a/C-software b/C-software index a3d9a172..963ef1fd 160000 --- a/C-software +++ b/C-software @@ -1 +1 @@ -Subproject commit a3d9a1723ce94cb430b5dfd3e058fd708a6bae7f +Subproject commit 963ef1fd4fa3b9599a0d24c6cc4882dff2204725 diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index c679922e..5df84d3c 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -405,11 +405,7 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_stop=TRUE, # stop based on relative decrease in objective? kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-4, # tolerance for relative convergence of parameter -<<<<<<< HEAD - objective_tol=1.e-8 # tolerance for relative decrease in objective -======= objective_tol=1.e-4 # tolerance for relative decrease in objective ->>>>>>> 232760d6aef5182e040b82e30555f4af5ad6803c ) { p = ncol(Xinfo) @@ -457,16 +453,6 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, -<<<<<<< HEAD - FALSE, # objective_stop - FALSE, # kkt_stop - TRUE) # param_stop - } else { - Xsoln = rep(0, nrow(Xinfo)) - result = solve_QP_wide(Xinfo, # this is a design matrix - rep(mu, p), # vector of Lagrange multipliers - 0, # ridge_term -======= kkt_stop, objective_stop, parameter_stop) @@ -474,7 +460,6 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep result = solve_QP_wide(Xinfo, # this is a design matrix as.numeric(rep(bound, p)), # vector of Lagrange multipliers 0, # ridge_term ->>>>>>> 232760d6aef5182e040b82e30555f4af5ad6803c max_iter, soln, linear_func, @@ -486,15 +471,9 @@ debiasingRow = function (Xinfo, # could be X or t(X) %*% X / n dep objective_tol, parameter_tol, max_active, -<<<<<<< HEAD - FALSE, # objective_stop - FALSE, # kkt_stop - TRUE) # param_stop -======= kkt_stop, objective_stop, parameter_stop) ->>>>>>> 232760d6aef5182e040b82e30555f4af5ad6803c } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 112a6a80..75ff1afd 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -101,6 +101,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, int param_stop ) { + int column_major = 1; // R has matrices in column major order int ncase = X.nrow(); // number of cases int nfeature = X.ncol(); // number of features @@ -139,6 +140,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (int *) nactive.begin(), ncase, nfeature, + column_major, (double *) bound.begin(), ridge_term, (double *) theta.begin(), @@ -162,6 +164,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (int *) need_update.begin(), nfeature, ncase, + column_major, (double *) bound.begin(), ridge_term, kkt_tol); @@ -176,7 +179,8 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (double *) linear_func.begin(), (int *) need_update.begin(), nfeature, - ncase); + ncase, + column_major); return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, diff --git a/selectiveInference/src/randomized_lasso.c b/selectiveInference/src/randomized_lasso.c deleted file mode 100644 index 123c81de..00000000 --- a/selectiveInference/src/randomized_lasso.c +++ /dev/null @@ -1,188 +0,0 @@ -#include // for fabs - -// Augmented density for randomized LASSO after -// Gaussian randomization - -// Described in https://arxiv.org/abs/1609.05609 - -// Gaussian is product of IID N(0, noise_scale^2) density -// Evaluated at A_D D + A_O O + h - -// Laplace is product of IID Laplace with scale noise_scale -// Also evaluated at A_D D + A_O O + h - -double log_density_gaussian(double noise_scale, // Scale of randomization - int ndim, // Number of features -- "p" - int ninternal, // Dimension of internal data representation often 1 - int noptimization, // Dimension of optimization variables -- "p" - double *internal_linear, // A_D -- linear part for data - double *internal_state, // D -- data state - double *optimization_linear, // A_O -- linear part for optimization variables - double *optimization_state, // O -- optimization state - double *offset) // h -- offset in affine transform -- "p" dimensional -{ - int irow, icol; - double denom = 2 * noise_scale * noise_scale; - double value = 0; - double reconstruction = 0; - double *offset_ptr; - double *internal_linear_ptr; - double *internal_state_ptr; - double *optimization_linear_ptr; - double *optimization_state_ptr; - - for (irow=0; irow Date: Tue, 31 Oct 2017 20:49:18 -0700 Subject: [PATCH 348/396] tests are in different directory --- tests/test_debiasing.R | 195 ---------------------------------------- tests/test_randomized.R | 14 --- 2 files changed, 209 deletions(-) delete mode 100644 tests/test_debiasing.R delete mode 100644 tests/test_randomized.R diff --git a/tests/test_debiasing.R b/tests/test_debiasing.R deleted file mode 100644 index b1fdf24c..00000000 --- a/tests/test_debiasing.R +++ /dev/null @@ -1,195 +0,0 @@ -library(selectiveInference) - - -## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-10, verbose = TRUE) { - isgiven <- 1; - if (is.null(mu)){ - isgiven <- 0; - } - - p <- nrow(sigma); - M <- matrix(0, p, p); - xperc = 0; - xp = round(p/10); - for (i in 1:p) { - if ((i %% xp)==0){ - xperc = xperc+10; - if (verbose) { - print(paste(xperc,"% done",sep="")); } - } - if (isgiven==0){ - mu <- (1/sqrt(n)) * qnorm(1-(0.1/(p^2))); - } - mu.stop <- 0; - try.no <- 1; - incr <- 0; - while ((mu.stop != 1)&&(try.no<10)){ - last.beta <- beta - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) - beta <- output$optsol - iter <- output$iter - if (isgiven==1){ - mu.stop <- 1 - } - else{ - if (try.no==1){ - if (iter == (maxiter+1)){ - incr <- 1; - mu <- mu*resol; - } else { - incr <- 0; - mu <- mu/resol; - } - } - if (try.no > 1){ - if ((incr == 1)&&(iter == (maxiter+1))){ - mu <- mu*resol; - } - if ((incr == 1)&&(iter < (maxiter+1))){ - mu.stop <- 1; - } - if ((incr == 0)&&(iter < (maxiter+1))){ - mu <- mu/resol; - } - if ((incr == 0)&&(iter == (maxiter+1))){ - mu <- mu*resol; - beta <- last.beta; - mu.stop <- 1; - } - } - } - try.no <- try.no+1 - } - M[i,] <- beta; - } - return(M) -} - -InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-10) { - p <- nrow(sigma); - rho <- max(abs(sigma[i,-i])) / sigma[i,i]; - mu0 <- rho/(1+rho); - beta <- rep(0,p); - - #if (mu >= mu0){ - # beta[i] <- (1-mu0)/sigma[i,i]; - # returnlist <- list("optsol" = beta, "iter" = 0); - # return(returnlist); - #} - - diff.norm2 <- 1; - last.norm2 <- 1; - iter <- 1; - iter.old <- 1; - beta[i] <- (1-mu0)/sigma[i,i]; - beta.old <- beta; - sigma.tilde <- sigma; - diag(sigma.tilde) <- 0; - vs <- -sigma.tilde%*%beta; - - while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ - - for (j in 1:p){ - oldval <- beta[j]; - v <- vs[j]; - if (j==i) - v <- v+1; - beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; - if (oldval != beta[j]){ - vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; - } - } - - iter <- iter + 1; - if (iter==2*iter.old){ - d <- beta - beta.old; - diff.norm2 <- sqrt(sum(d*d)); - last.norm2 <-sqrt(sum(beta*beta)); - iter.old <- iter; - beta.old <- beta; - #if (iter>10) - # vs <- -sigma.tilde%*%beta; - } - - # print(c(iter, maxiter, diff.norm2, threshold * last.norm2, threshold, mu)) - - } - - returnlist <- list("optsol" = beta, "iter" = iter) - return(returnlist) -} - -SoftThreshold <- function( x, lambda ) { - # - # Standard soft thresholding - # - if (x>lambda){ - return (x-lambda);} - else { - if (x< (-lambda)){ - return (x+lambda);} - else { - return (0); } - } -} - - -### Test - -n = 100; p = 50 - -X = matrix(rnorm(n * p), n, p) -S = t(X) %*% X / n - -mu = 7.791408e-02 - -A1 = debiasingMatrix(S, FALSE, n, 1:5, mu=mu, max_iter=1000) -A2 = debiasingMatrix(S / n, FALSE, n, 1:5, mu=mu, max_iter=1000) - -B1 = debiasingMatrix(X, TRUE, n, 1:5, mu=mu, max_iter=1000) -B2 = debiasingMatrix(X / sqrt(n), TRUE, n, 1:5, mu=mu, max_iter=1000) - -C1 = InverseLinfty(S, n, mu=mu, maxiter=1000)[1:5,] -C2 = InverseLinfty(S / n, n, mu=mu, maxiter=1000)[1:5,] - -par(mfrow=c(2,3)) -plot(A1[1,], C1[1,]) -plot(A1[1,], B1[1,]) -plot(B1[1,], C1[1,]) - -plot(A1[1,], A2[1,]) -plot(B1[1,], B2[1,]) -plot(C1[1,], C2[1,]) - -print(c('A', sum(A1[1,] == 0))) -print(c('B', sum(B1[1,] == 0))) -print(c('C', sum(C1[1,] == 0))) - -## Are our points feasible - -feasibility = function(S, soln, j, mu) { - p = nrow(S) - E = rep(0, p) - E[j] = 1 - G = S %*% soln - E - return(c(max(abs(G)), mu)) -} - -print(c('feasibility A', feasibility(S, A1[1,], 1, mu))) -print(c('feasibility B', feasibility(S, B1[1,], 1, mu))) -print(c('feasibility C', feasibility(S, C1[1,], 1, mu))) - -active_KKT = function(S, soln, j, mu) { - p = nrow(S) - E = rep(0, p) - E[j] = 1 - G = S %*% soln - E - return(c(G[soln != 0] * sign(soln)[soln != 0], mu)) -} - -print(c('active_KKT A', active_KKT(S, A1[1,], 1, mu))) -print(c('active_KKT B', active_KKT(S, B1[1,], 1, mu))) -print(c('active_KKT C', active_KKT(S, C1[1,], 1, mu))) - - diff --git a/tests/test_randomized.R b/tests/test_randomized.R deleted file mode 100644 index e4e35a12..00000000 --- a/tests/test_randomized.R +++ /dev/null @@ -1,14 +0,0 @@ -library(selectiveInference) - -test = function() { - - n = 100; p = 50 - X = matrix(rnorm(n * p), n, p) - y = rnorm(n) - lam = 20 / sqrt(n) - noise_scale = 0.01 * sqrt(n) - ridge_term = .1 / sqrt(n) - fit_randomized_lasso(X, y, lam, noise_scale, ridge_term) -} - -print(test()) From ae9aec856a4d6fdf77847dfa13d7139949dbff57 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 2 Nov 2017 14:53:58 -0700 Subject: [PATCH 349/396] WIP: have sample from optimization density, importance weight -- need to write pivot function --- .travis.yml | 2 +- selectiveInference/DESCRIPTION | 1 + selectiveInference/NAMESPACE | 1 + selectiveInference/R/funs.randomized.R | 97 +++++++++++++++++++++++++- tests/randomized/test_randomized.R | 14 ++++ 5 files changed, 112 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 18769f7b..76ab515c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ addons: warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? - - R -e 'install.packages(c("Rcpp", "intervals"), repos="http://cloud.r-project.org")' + - R -e 'install.packages(c("Rcpp", "intervals", "adaptMCMC", "glmnet"), repos="http://cloud.r-project.org")' - cd C-software - git submodule init - git submodule update diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index fad072df..c7656636 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -10,6 +10,7 @@ Depends: glmnet, intervals, survival, + adaptMCMC, Suggests: Rmpfr Description: New tools for post-selection inference, for use with forward diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 0de5d3b1..e8f36e0d 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -45,3 +45,4 @@ importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("stats", "rbinom", "rexp") importFrom("Rcpp", "sourceCpp") +importFrom("adaptMCMC", "MCMC") diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 3ca22b41..07af4682 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -67,7 +67,7 @@ randomizedLASSO = function(X, objective_stop, # objective_stop kkt_stop, # kkt_stop param_stop) # param_stop - + sign_soln = sign(result$soln) unpenalized = lam == 0 @@ -78,6 +78,14 @@ randomizedLASSO = function(X, active_set = which(active) inactive_set = which(inactive) + # observed opt state + + observed_scalings = abs(result$soln)[active] + observed_unpen = result$soln[unpenalized] + observed_subgrad = result$gradient[inactive] + + observed_opt_state = c(observed_unpen, observed_scalings, observed_subgrad) + # affine transform for optimization variables E = c(unpenalized_set, active_set) @@ -120,12 +128,97 @@ randomizedLASSO = function(X, internal_transform = list(linear_term = linear_term, offset_term = offset_term) + # density for sampling optimization variables + + observed_raw = -t(X) %*% Y + inactive_lam = lam[inactive_set] + inactive_start = sum(unpenalized) + sum(active) + active_start = sum(unpenalized) + + # XXX only for Gaussian so far + + log_optimization_density = function(opt_state + ) { + + + if ((sum(abs(opt_state[(inactive_start + 1):p]) > inactive_lam) > 0) || + (sum(opt_state[(active_start+1):inactive_start] < 0) > 0)) { + return(-Inf) + } + + D = log_density_gaussian_conditional_(noise_scale, + opt_transform$linear_term, + as.matrix(opt_state), + observed_raw) + return(D) + } + return(list(active_set = active_set, inactive_set = inactive_set, unpenalized_set = unpenalized_set, sign_soln = sign_soln, optimization_transform = opt_transform, - internal_transform = internal_transform + internal_transform = internal_transform, + log_optimization_density = log_optimization_density, + observed_opt_state = observed_opt_state, + observed_raw = observed_raw )) } + +sample_opt_variables = function(randomizedLASSO_obj, jump_scale, nsample=10000) { + return(MCMC(randomizedLASSO_obj$log_optimization_density, + nsample, + randomizedLASSO_obj$observed_opt_state, + acc.rate=0.2, + scale=jump_scale)) +} + +# Carry out a linear decompositon of an internal +# representation with respect to a target + +# Returns an affine transform into raw coordinates (i.e. \omega or randomization coordinates) + +linear_decomposition = function(observed_target, + observed_internal, + var_target, + cov_target_internal, + internal_transform) { + var_target = as.matrix(var_target) + if (nrow(var_target) == 1) { + nuisance = observed_internal - cov_target_internal * observed_target / var_target + target_linear = internal_transform$linear_part %*% cov_target_internal / var_target + } else { + nuisance = observed_internal - cov_target_internal %*% solve(var_target) %*% observed_target + target_linear = internal_transform$linear_part %*% cov_target_internal %*% solve(var_target) + } + target_offset = internal_transform$linear_part %*% nuisance + internal_transform$offset + return(list(linear_term=target_linear, + offset_term=target_offset)) +} + +# XXX only for Gaussian so far + +importance_weight = function(noise_scale, + target_sample, + opt_sample, + opt_transform, + target_transform, + observed_raw) { + + log_num = log_density_gaussian_(noise_scale, + target_transform$linear_term, + as.matrix(target_sample), + optimization_transform$linear_term, + as.matrix(opt_state), + target_transform$offset_term + optimization_transform$offset_term) + + log_den = log_density_gaussian_conditional_(noise_scale, + opt_transform$linear_term, + as.matrix(opt_sample), + observed_raw) + W = log_num - log_den + W = W - max(W) + return(exp(W)) +} + diff --git a/tests/randomized/test_randomized.R b/tests/randomized/test_randomized.R index b9b1750d..eef0eaaa 100644 --- a/tests/randomized/test_randomized.R +++ b/tests/randomized/test_randomized.R @@ -12,6 +12,20 @@ smoke_test = function() { } A = smoke_test() +sampler_test = function() { + + n = 100; p = 50 + X = matrix(rnorm(n * p), n, p) + y = rnorm(n) + lam = 20 / sqrt(n) + noise_scale = 0.01 * sqrt(n) + ridge_term = .1 / sqrt(n) + obj = selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) + S = selectiveInference:::sample_opt_variables(obj, jump_scale=rep(1/sqrt(n), p), nsample=10000) + return(S$samples[2001:10000,]) +} +B = sampler_test() + gaussian_density_test = function() { noise_scale = 10. From 177ab78e2af2060083b327c8b82854b0c937651d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 2 Nov 2017 17:13:12 -0700 Subject: [PATCH 350/396] update to C-software, using a compiler define for column major --- C-software | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/C-software b/C-software index 963ef1fd..158c64d8 160000 --- a/C-software +++ b/C-software @@ -1 +1 @@ -Subproject commit 963ef1fd4fa3b9599a0d24c6cc4882dff2204725 +Subproject commit 158c64d8d81fbcf434869c0c68f5bb7a4a9cdf5a From 1d60dfaf95a8263c6ec25e41d54b693a51eb6043 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 2 Nov 2017 17:13:30 -0700 Subject: [PATCH 351/396] update to C-software, using a compiler define for column major --- selectiveInference/src/Makevars | 4 ++-- selectiveInference/src/Rcpp-debias.cpp | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars index 2adf9a63..22c4c7da 100644 --- a/selectiveInference/src/Makevars +++ b/selectiveInference/src/Makevars @@ -1,5 +1,5 @@ -PKG_CFLAGS= -I. -PKG_CPPFLAGS= -I. +PKG_CFLAGS= -I. -DCOLUMN_MAJOR_ORDER +PKG_CPPFLAGS= -I. -DCOLUMN_MAJOR_ORDER PKG_LIBS=-L. $(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o quadratic_program.o quadratic_program_wide.o diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 75ff1afd..112a6a80 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -101,7 +101,6 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, int param_stop ) { - int column_major = 1; // R has matrices in column major order int ncase = X.nrow(); // number of cases int nfeature = X.ncol(); // number of features @@ -140,7 +139,6 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (int *) nactive.begin(), ncase, nfeature, - column_major, (double *) bound.begin(), ridge_term, (double *) theta.begin(), @@ -164,7 +162,6 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (int *) need_update.begin(), nfeature, ncase, - column_major, (double *) bound.begin(), ridge_term, kkt_tol); @@ -179,8 +176,7 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (double *) linear_func.begin(), (int *) need_update.begin(), nfeature, - ncase, - column_major); + ncase); return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, From 3ce4ea2044095a34be9203de7ed9c2cd43964dee Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Sun, 5 Nov 2017 14:59:24 -0800 Subject: [PATCH 352/396] pivots running but weights all zero --- selectiveInference/R/RcppExports.R | 35 +++++++ selectiveInference/R/funs.randomized.R | 14 +-- tests/randomized/test_instances.R | 121 +++++++++++++++++++++++++ tests/randomized/test_randomized.R | 1 + 4 files changed, 164 insertions(+), 7 deletions(-) create mode 100644 selectiveInference/R/RcppExports.R create mode 100644 tests/randomized/test_instances.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R new file mode 100644 index 00000000..e927a3af --- /dev/null +++ b/selectiveInference/R/RcppExports.R @@ -0,0 +1,35 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { + .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) +} + +solve_QP_wide <- function(X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { + .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) +} + +update1_ <- function(Q2, w, m, k) { + .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) +} + +downdate1_ <- function(Q1, R, j0, m, n) { + .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) +} + +log_density_gaussian_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_gaussian_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) +} + +log_density_gaussian_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_gaussian_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) +} + +log_density_laplace_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_laplace_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) +} + +log_density_laplace_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_laplace_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) +} + diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 07af4682..bf5f89d8 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -130,7 +130,7 @@ randomizedLASSO = function(X, # density for sampling optimization variables - observed_raw = -t(X) %*% Y + observed_raw = -t(X) %*% y inactive_lam = lam[inactive_set] inactive_start = sum(unpenalized) + sum(active) active_start = sum(unpenalized) @@ -187,12 +187,12 @@ linear_decomposition = function(observed_target, var_target = as.matrix(var_target) if (nrow(var_target) == 1) { nuisance = observed_internal - cov_target_internal * observed_target / var_target - target_linear = internal_transform$linear_part %*% cov_target_internal / var_target + target_linear = internal_transform$linear_term %*% cov_target_internal / var_target[1,1] } else { nuisance = observed_internal - cov_target_internal %*% solve(var_target) %*% observed_target - target_linear = internal_transform$linear_part %*% cov_target_internal %*% solve(var_target) + target_linear = internal_transform$linear_term %*% cov_target_internal %*% solve(var_target) } - target_offset = internal_transform$linear_part %*% nuisance + internal_transform$offset + target_offset = internal_transform$linear_term %*% nuisance + internal_transform$offset_term return(list(linear_term=target_linear, offset_term=target_offset)) } @@ -209,9 +209,9 @@ importance_weight = function(noise_scale, log_num = log_density_gaussian_(noise_scale, target_transform$linear_term, as.matrix(target_sample), - optimization_transform$linear_term, - as.matrix(opt_state), - target_transform$offset_term + optimization_transform$offset_term) + opt_transform$linear_term, + as.matrix(opt_sample), + target_transform$offset_term + opt_transform$offset_term) log_den = log_density_gaussian_conditional_(noise_scale, opt_transform$linear_term, diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R new file mode 100644 index 00000000..3119e0aa --- /dev/null +++ b/tests/randomized/test_instances.R @@ -0,0 +1,121 @@ +#library(devtools) +#devtools::install_github('jonathan-taylor/R-selective/selectiveInference') +library(selectiveInference, lib.loc='/Users/Jelena/anaconda/lib/R/library') + + +gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, + random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){ + if (!is.na(seed)){ + set.seed(seed) + } + + if (is.na(X)){ + X = sqrt(1-rho)*matrix(rnorm(n*p),n) + sqrt(rho)*matrix(rep(rnorm(n), p), nrow = n) + X = scale(X)/sqrt(n) + } + beta = rep(0, p) + if (s>0){ + beta[1:s] = seq(3, 6, length.out=s) + } + beta = sample(beta) + if (random_signs==TRUE & s>0){ + signs = sample(c(-1,1), s, replace = TRUE) + beta = beta * signs + } + y = X %*% beta + rnorm(n)*sigma + result = list(X=X,y=y,beta=beta) + return(result) +} + + +run_instance = function(n, p, s){ + rho=0.3 + lam=1.3 + sigma=1 + data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma) + X=data$X + print(dim(X)) + y=data$y + ridge_term=sd(y)/sqrt(n) + noise_scale = sd(y)/2 + lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) + + active_set = lasso_soln$active_set + inactive_set = lasso_soln$inactive_set + observed_raw = lasso_soln$observed_raw + opt_linear = lasso_soln$optimization_transform$linear_term + opt_offset = lasso_soln$optimization_transform$offset_term + observed_opt_state = lasso_soln$observed_opt_state + + nactive = length(active_set) + print(paste("nactive",nactive)) + B = opt_linear[,1:nactive] + beta_offset = observed_raw+opt_offset + if (nactive 0) { + return(-Inf) + } + D = selectiveInference:::log_density_gaussian_conditional_(noise_scale, + reduced_B, + as.matrix(observed_opt_state[1:nactive]), + reduced_beta_offset) + return(D) + } + lasso_soln$log_optimization_density = log_condl_optimization_density + lasso_soln$observed_opt_state = observed_opt_state[1:nactive] + S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), nactive), nsample=10000) + beta_samples = S$samples[2001:10000,] + print(paste("dim beta samples", dim(beta_samples))) + + X_E=X[, active_set] + X_minusE=X[, inactive_set] + target_cov = solve(t(X_E)%*%X_E)*sigma^2 + cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) * sigma^2 + observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y + observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) + internal_transform = lasso_soln$internal_transform + + pivots = rep(0, nactive) + for (i in 1:nactive){ + target_transform = selectiveInference:::linear_decomposition(observed_target[i], + observed_internal, + target_cov[i,i], + cov_target_internal[,i], + internal_transform) + target_sample = rnorm(nrow(beta_samples)) * sqrt(target_cov[i,i]) + + weights = selectiveInference:::importance_weight(noise_scale, + t(as.matrix(target_sample)), + t(beta_samples), + opt_transform, + target_transform, + observed_raw) + + pivots[i] = mean((target_sample Date: Sun, 5 Nov 2017 15:46:10 -0800 Subject: [PATCH 353/396] reorg --- tests/randomized/test_instances.R | 94 ++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 3119e0aa..1c125f2e 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -27,18 +27,7 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, return(result) } - -run_instance = function(n, p, s){ - rho=0.3 - lam=1.3 - sigma=1 - data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma) - X=data$X - print(dim(X)) - y=data$y - ridge_term=sd(y)/sqrt(n) - noise_scale = sd(y)/2 - lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) +conditional_density = function(noise_scale, lasso_soln){ active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set @@ -48,9 +37,9 @@ run_instance = function(n, p, s){ observed_opt_state = lasso_soln$observed_opt_state nactive = length(active_set) - print(paste("nactive",nactive)) B = opt_linear[,1:nactive] beta_offset = observed_raw+opt_offset + p=length(observed_opt_state) if (nactive 0) { - return(-Inf) - } - D = selectiveInference:::log_density_gaussian_conditional_(noise_scale, - reduced_B, - as.matrix(observed_opt_state[1:nactive]), - reduced_beta_offset) - return(D) + + if (sum(opt_state < 0) > 0) { + return(-Inf) } + D = selectiveInference:::log_density_gaussian_conditional_(noise_scale, + reduced_B, + as.matrix(observed_opt_state[1:nactive]), + reduced_beta_offset) + return(D) + } lasso_soln$log_optimization_density = log_condl_optimization_density lasso_soln$observed_opt_state = observed_opt_state[1:nactive] + lasso_soln$optimization_transform = opt_transform + return(lasso_soln) +} + + + +run_instance = function(n, p, s){ + rho=0.3 + lam=1. + sigma=1 + data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma) + X=data$X + print(dim(X)) + y=data$y + ridge_term=sd(y)/sqrt(n) + noise_scale = sd(y)/2 + lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) + active_set = lasso_soln$active_set + inactive_set = lasso_soln$inactive_set + nactive = length(active_set) + print(paste("nactive", nactive)) + + lasso_soln = conditional_density(noise_scale, lasso_soln) + S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), nactive), nsample=10000) - beta_samples = S$samples[2001:10000,] - print(paste("dim beta samples", dim(beta_samples))) + opt_samples = S$samples[2001:10000,] + print(paste("dim opt samples", toString(dim(opt_samples)))) + + observed_raw = lasso_soln$observed_raw + opt_linear = lasso_soln$optimization_transform$linear_term + opt_offset = lasso_soln$optimization_transform$offset_term + observed_opt_state = lasso_soln$observed_opt_state + opt_transform = lasso_soln$optimization_transform X_E=X[, active_set] X_minusE=X[, inactive_set] target_cov = solve(t(X_E)%*%X_E)*sigma^2 - cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) * sigma^2 + cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) internal_transform = lasso_soln$internal_transform @@ -91,16 +110,20 @@ run_instance = function(n, p, s){ target_cov[i,i], cov_target_internal[,i], internal_transform) - target_sample = rnorm(nrow(beta_samples)) * sqrt(target_cov[i,i]) - weights = selectiveInference:::importance_weight(noise_scale, - t(as.matrix(target_sample)), - t(beta_samples), + target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i]) + + pivot = function(candidate){ + weights = selectiveInference:::importance_weight(noise_scale, + t(as.matrix(target_sample))+candidate, + t(opt_samples), opt_transform, target_transform, observed_raw) - - pivots[i] = mean((target_sample Date: Sun, 5 Nov 2017 18:17:50 -0800 Subject: [PATCH 354/396] segfault error --- tests/randomized/test_instances.R | 46 ++++++++++++++++--------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 1c125f2e..55807fd7 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -66,28 +66,16 @@ conditional_density = function(noise_scale, lasso_soln){ } - -run_instance = function(n, p, s){ - rho=0.3 - lam=1. - sigma=1 - data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma) - X=data$X - print(dim(X)) - y=data$y - ridge_term=sd(y)/sqrt(n) - noise_scale = sd(y)/2 +run_instance = function(X,y,sigma, lam, noise_scale, ridge_term){ + n=nrow(X) + p=ncol(X) lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set nactive = length(active_set) print(paste("nactive", nactive)) - lasso_soln = conditional_density(noise_scale, lasso_soln) - - S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), nactive), nsample=10000) - opt_samples = S$samples[2001:10000,] - print(paste("dim opt samples", toString(dim(opt_samples)))) + #lasso_soln = conditional_density(noise_scale, lasso_soln) observed_raw = lasso_soln$observed_raw opt_linear = lasso_soln$optimization_transform$linear_term @@ -95,6 +83,11 @@ run_instance = function(n, p, s){ observed_opt_state = lasso_soln$observed_opt_state opt_transform = lasso_soln$optimization_transform + dim=length(observed_opt_state) + S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) + opt_samples = S$samples[2001:10000,] + print(paste("dim opt samples", toString(dim(opt_samples)))) + X_E=X[, active_set] X_minusE=X[, inactive_set] target_cov = solve(t(X_E)%*%X_E)*sigma^2 @@ -124,26 +117,35 @@ run_instance = function(n, p, s){ } pivots[i] = pivot(0) - print(pivots[i]) } - + print(paste("pivots", toString(pivots))) return(pivots) } -collect_instances = function(n,p,s, nsim=1){ +collect_results = function(n,p,s, nsim=2){ + rho=0.3 + lam=1. + sigma=1 sample_pivots = NULL for (i in 1:nsim){ - result = run_instance(n,p,s) + data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma) + X=data$X + print(dim(X)) + y=data$y + ridge_term=sd(y)/sqrt(n) + noise_scale = sd(y)/2 + result = run_instance(X,y,sigma, lam, noise_scale, ridge_term) sample_pivots = c(sample_pivots, result) } + jpeg('pivots.jpg') plot(ecdf(sample_pivots), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf") abline(0, 1, lty=2) dev.off() } - -collect_instances(n=100, p=20, s=0) +set.seed(1) +collect_results(n=100, p=20, s=0) From 68e27f4895a84f59118cdd9f29e2c788261d1659 Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Sun, 5 Nov 2017 18:56:06 -0800 Subject: [PATCH 355/396] bug in weights --- selectiveInference/R/funs.randomized.R | 2 +- tests/randomized/test_instances.R | 25 +++++++++++-------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index bf5f89d8..4fc357c6 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -216,7 +216,7 @@ importance_weight = function(noise_scale, log_den = log_density_gaussian_conditional_(noise_scale, opt_transform$linear_term, as.matrix(opt_sample), - observed_raw) + observed_raw+opt_transform$offset_term) W = log_num - log_den W = W - max(W) return(exp(W)) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 55807fd7..91673da5 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -30,7 +30,6 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, conditional_density = function(noise_scale, lasso_soln){ active_set = lasso_soln$active_set - inactive_set = lasso_soln$inactive_set observed_raw = lasso_soln$observed_raw opt_linear = lasso_soln$optimization_transform$linear_term opt_offset = lasso_soln$optimization_transform$offset_term @@ -41,15 +40,14 @@ conditional_density = function(noise_scale, lasso_soln){ beta_offset = observed_raw+opt_offset p=length(observed_opt_state) if (nactive 0) { return(-Inf) } @@ -77,25 +75,24 @@ run_instance = function(X,y,sigma, lam, noise_scale, ridge_term){ #lasso_soln = conditional_density(noise_scale, lasso_soln) - observed_raw = lasso_soln$observed_raw - opt_linear = lasso_soln$optimization_transform$linear_term - opt_offset = lasso_soln$optimization_transform$offset_term - observed_opt_state = lasso_soln$observed_opt_state - opt_transform = lasso_soln$optimization_transform - - dim=length(observed_opt_state) + dim=length(lasso_soln$observed_opt_state) + print(paste("chain dim", dim)) S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) opt_samples = S$samples[2001:10000,] print(paste("dim opt samples", toString(dim(opt_samples)))) X_E=X[, active_set] X_minusE=X[, inactive_set] - target_cov = solve(t(X_E)%*%X_E)*sigma^2 + target_cov = solve(t(X_E) %*% X_E)*sigma^2 cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) internal_transform = lasso_soln$internal_transform + observed_opt_state = lasso_soln$observed_opt_state + opt_transform = lasso_soln$optimization_transform + observed_raw = lasso_soln$observed_raw + pivots = rep(0, nactive) for (i in 1:nactive){ target_transform = selectiveInference:::linear_decomposition(observed_target[i], @@ -122,7 +119,7 @@ run_instance = function(X,y,sigma, lam, noise_scale, ridge_term){ return(pivots) } -collect_results = function(n,p,s, nsim=2){ +collect_results = function(n,p,s, nsim=1){ rho=0.3 lam=1. sigma=1 From 0fa7c4e17bfdb2af1a436f2b11a2d0a2824a355d Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Sun, 5 Nov 2017 19:24:41 -0800 Subject: [PATCH 356/396] condl bug fixed --- selectiveInference/R/funs.randomized.R | 3 +-- tests/randomized/test_instances.R | 10 ++++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 4fc357c6..0d1ae81b 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -145,11 +145,10 @@ randomizedLASSO = function(X, (sum(opt_state[(active_start+1):inactive_start] < 0) > 0)) { return(-Inf) } - D = log_density_gaussian_conditional_(noise_scale, opt_transform$linear_term, as.matrix(opt_state), - observed_raw) + observed_raw+opt_transform$offset_term) return(D) } diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 91673da5..3d4bc310 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -37,7 +37,7 @@ conditional_density = function(noise_scale, lasso_soln){ nactive = length(active_set) B = opt_linear[,1:nactive] - beta_offset = observed_raw+opt_offset + beta_offset = opt_offset p=length(observed_opt_state) if (nactive Date: Mon, 6 Nov 2017 11:31:28 -0800 Subject: [PATCH 357/396] added ci --- tests/randomized/test_instances.R | 31 ++++++++++++++++++++++++------ tests/randomized/test_randomized.R | 1 - 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 3d4bc310..54bbd59a 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -65,7 +65,7 @@ conditional_density = function(noise_scale, lasso_soln){ } -run_instance = function(X,y,sigma, lam, noise_scale, ridge_term){ +randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ n=nrow(X) p=ncol(X) lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) @@ -74,7 +74,7 @@ run_instance = function(X,y,sigma, lam, noise_scale, ridge_term){ nactive = length(active_set) print(paste("nactive", nactive)) - lasso_soln = conditional_density(noise_scale, lasso_soln) + #lasso_soln = conditional_density(noise_scale, lasso_soln) dim=length(lasso_soln$observed_opt_state) print(paste("chain dim", dim)) @@ -93,6 +93,7 @@ run_instance = function(X,y,sigma, lam, noise_scale, ridge_term){ observed_raw = lasso_soln$observed_raw pivots = rep(0, nactive) + ci = matrix(0, nactive, 2) for (i in 1:nactive){ target_transform = selectiveInference:::linear_decomposition(observed_target[i], observed_internal, @@ -110,11 +111,24 @@ run_instance = function(X,y,sigma, lam, noise_scale, ridge_term){ observed_raw) return(mean((target_sample Date: Mon, 6 Nov 2017 11:37:10 -0800 Subject: [PATCH 358/396] removed local import --- selectiveInference/R/RcppExports.R | 35 ------------------------------ tests/randomized/test_instances.R | 5 +---- 2 files changed, 1 insertion(+), 39 deletions(-) delete mode 100644 selectiveInference/R/RcppExports.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R deleted file mode 100644 index e927a3af..00000000 --- a/selectiveInference/R/RcppExports.R +++ /dev/null @@ -1,35 +0,0 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) -} - -solve_QP_wide <- function(X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { - .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) -} - -update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) -} - -downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) -} - -log_density_gaussian_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_gaussian_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) -} - -log_density_gaussian_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_gaussian_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) -} - -log_density_laplace_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_laplace_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) -} - -log_density_laplace_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_laplace_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) -} - diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 54bbd59a..9ae0afd1 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -1,7 +1,4 @@ -#library(devtools) -#devtools::install_github('jonathan-taylor/R-selective/selectiveInference') -library(selectiveInference, lib.loc='/Users/Jelena/anaconda/lib/R/library') - +library(selectiveInference) gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){ From 71b6586287a1cb980d439492548dcec7fbee18d1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 6 Nov 2017 17:26:54 -0800 Subject: [PATCH 359/396] BF: segfault seems fixed -- ensured all functions have some order of ncase, nfeature --- selectiveInference/src/Rcpp-debias.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 112a6a80..f5160ba2 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -160,8 +160,8 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (double *) X.begin(), (double *) linear_func.begin(), (int *) need_update.begin(), - nfeature, ncase, + nfeature, (double *) bound.begin(), ridge_term, kkt_tol); @@ -175,8 +175,8 @@ Rcpp::List solve_QP_wide(Rcpp::NumericMatrix X, (double *) X.begin(), (double *) linear_func.begin(), (int *) need_update.begin(), - nfeature, - ncase); + ncase, + nfeature); return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, From e0ee2a7f583fa0c1b785ba7180a89098c5887cbe Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 6 Nov 2017 17:27:42 -0800 Subject: [PATCH 360/396] jelena's inference functions --- selectiveInference/R/funs.randomized.R | 105 +++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 0d1ae81b..93a8dbbf 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -221,3 +221,108 @@ importance_weight = function(noise_scale, return(exp(W)) } +### Jelena's functions + +conditional_density = function(noise_scale, lasso_soln){ + + active_set = lasso_soln$active_set + observed_raw = lasso_soln$observed_raw + opt_linear = lasso_soln$optimization_transform$linear_term + opt_offset = lasso_soln$optimization_transform$offset_term + observed_opt_state = lasso_soln$observed_opt_state + + nactive = length(active_set) + B = opt_linear[,1:nactive] + beta_offset = opt_offset + p=length(observed_opt_state) + if (nactive 0) { + return(-Inf) + } + D = selectiveInference:::log_density_gaussian_conditional_(noise_scale, + reduced_B, + as.matrix(opt_state), + reduced_beta_offset) + return(D) + } + lasso_soln$log_optimization_density = log_condl_optimization_density + lasso_soln$observed_opt_state = observed_opt_state[1:nactive] + lasso_soln$optimization_transform = opt_transform + return(lasso_soln) +} + + +randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ + n=nrow(X) + p=ncol(X) + lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) + active_set = lasso_soln$active_set + inactive_set = lasso_soln$inactive_set + nactive = length(active_set) + print(paste("nactive", nactive)) + + #lasso_soln = conditional_density(noise_scale, lasso_soln) + + dim=length(lasso_soln$observed_opt_state) + print(paste("chain dim", dim)) + S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) + opt_samples = S$samples[2001:10000,] + print(paste("dim opt samples", toString(dim(opt_samples)))) + + X_E=X[, active_set] + X_minusE=X[, inactive_set] + target_cov = solve(t(X_E) %*% X_E)*sigma^2 + cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) + observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y + observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) + internal_transform = lasso_soln$internal_transform + opt_transform = lasso_soln$optimization_transform + observed_raw = lasso_soln$observed_raw + + pivots = rep(0, nactive) + ci = matrix(0, nactive, 2) + for (i in 1:nactive){ + target_transform = selectiveInference:::linear_decomposition(observed_target[i], + observed_internal, + target_cov[i,i], + cov_target_internal[,i], + internal_transform) + target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i]) + + pivot = function(candidate){ + weights = selectiveInference:::importance_weight(noise_scale, + t(as.matrix(target_sample))+candidate, + t(opt_samples), + opt_transform, + target_transform, + observed_raw) + return(mean((target_sample Date: Mon, 6 Nov 2017 18:03:36 -0800 Subject: [PATCH 361/396] BF: seems ncase and nfeature were swapped in some places in C code --- C-software | 2 +- tests/randomized/test_instances.R | 112 ++---------------------------- 2 files changed, 5 insertions(+), 109 deletions(-) diff --git a/C-software b/C-software index 158c64d8..ec6a954d 160000 --- a/C-software +++ b/C-software @@ -1 +1 @@ -Subproject commit 158c64d8d81fbcf434869c0c68f5bb7a4a9cdf5a +Subproject commit ec6a954d6b335439115e961abde91fa5a07a3669 diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 9ae0afd1..3ee447fc 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -7,7 +7,7 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, } if (is.na(X)){ - X = sqrt(1-rho)*matrix(rnorm(n*p),n) + sqrt(rho)*matrix(rep(rnorm(n), p), nrow = n) + X = sqrt(1-rho)*matrix(rnorm(n*p),n, p) + sqrt(rho)*matrix(rep(rnorm(n), p), nrow = n) X = scale(X)/sqrt(n) } beta = rep(0, p) @@ -24,119 +24,15 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, return(result) } -conditional_density = function(noise_scale, lasso_soln){ - - active_set = lasso_soln$active_set - observed_raw = lasso_soln$observed_raw - opt_linear = lasso_soln$optimization_transform$linear_term - opt_offset = lasso_soln$optimization_transform$offset_term - observed_opt_state = lasso_soln$observed_opt_state - - nactive = length(active_set) - B = opt_linear[,1:nactive] - beta_offset = opt_offset - p=length(observed_opt_state) - if (nactive 0) { - return(-Inf) - } - D = selectiveInference:::log_density_gaussian_conditional_(noise_scale, - reduced_B, - as.matrix(opt_state), - reduced_beta_offset) - return(D) - } - lasso_soln$log_optimization_density = log_condl_optimization_density - lasso_soln$observed_opt_state = observed_opt_state[1:nactive] - lasso_soln$optimization_transform = opt_transform - return(lasso_soln) -} - - -randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ - n=nrow(X) - p=ncol(X) - lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) - active_set = lasso_soln$active_set - inactive_set = lasso_soln$inactive_set - nactive = length(active_set) - print(paste("nactive", nactive)) - - #lasso_soln = conditional_density(noise_scale, lasso_soln) - - dim=length(lasso_soln$observed_opt_state) - print(paste("chain dim", dim)) - S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) - opt_samples = S$samples[2001:10000,] - print(paste("dim opt samples", toString(dim(opt_samples)))) - - X_E=X[, active_set] - X_minusE=X[, inactive_set] - target_cov = solve(t(X_E) %*% X_E)*sigma^2 - cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) - observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y - observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) - internal_transform = lasso_soln$internal_transform - opt_transform = lasso_soln$optimization_transform - observed_raw = lasso_soln$observed_raw - - pivots = rep(0, nactive) - ci = matrix(0, nactive, 2) - for (i in 1:nactive){ - target_transform = selectiveInference:::linear_decomposition(observed_target[i], - observed_internal, - target_cov[i,i], - cov_target_internal[,i], - internal_transform) - target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i]) - - pivot = function(candidate){ - weights = selectiveInference:::importance_weight(noise_scale, - t(as.matrix(target_sample))+candidate, - t(opt_samples), - opt_transform, - target_transform, - observed_raw) - return(mean((target_sample Date: Mon, 6 Nov 2017 18:14:48 -0800 Subject: [PATCH 362/396] minor cleanup -- doesn't seem to use conditional_density? --- selectiveInference/R/funs.randomized.R | 39 ++++++++++---------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 93a8dbbf..e0a1eec4 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -221,9 +221,7 @@ importance_weight = function(noise_scale, return(exp(W)) } -### Jelena's functions - -conditional_density = function(noise_scale, lasso_soln){ +conditional_density = function(noise_scale, lasso_soln) { active_set = lasso_soln$active_set observed_raw = lasso_soln$observed_raw @@ -241,7 +239,7 @@ conditional_density = function(noise_scale, lasso_soln){ opt_transform = list(linear_term=B, offset_term = beta_offset) reduced_B = chol(t(B) %*% B) - beta_offset = beta_offset+observed_raw + beta_offset = beta_offset + observed_raw reduced_beta_offset = solve(t(reduced_B)) %*% (t(B) %*% beta_offset) log_condl_optimization_density = function(opt_state) { @@ -260,26 +258,23 @@ conditional_density = function(noise_scale, lasso_soln){ return(lasso_soln) } +randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term){ -randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ - n=nrow(X) - p=ncol(X) - lasso_soln=selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) + n = nrow(X) + p = ncol(X) + lasso_soln = selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set nactive = length(active_set) - print(paste("nactive", nactive)) - - #lasso_soln = conditional_density(noise_scale, lasso_soln) - - dim=length(lasso_soln$observed_opt_state) + + dim = length(lasso_soln$observed_opt_state) print(paste("chain dim", dim)) S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) opt_samples = S$samples[2001:10000,] print(paste("dim opt samples", toString(dim(opt_samples)))) - X_E=X[, active_set] - X_minusE=X[, inactive_set] + X_E = X[, active_set] + X_minusE = X[, inactive_set] target_cov = solve(t(X_E) %*% X_E)*sigma^2 cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y @@ -288,7 +283,7 @@ randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ opt_transform = lasso_soln$optimization_transform observed_raw = lasso_soln$observed_raw - pivots = rep(0, nactive) + pvalus = rep(0, nactive) ci = matrix(0, nactive, 2) for (i in 1:nactive){ target_transform = selectiveInference:::linear_decomposition(observed_target[i], @@ -300,7 +295,7 @@ randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ pivot = function(candidate){ weights = selectiveInference:::importance_weight(noise_scale, - t(as.matrix(target_sample))+candidate, + t(as.matrix(target_sample)) + candidate, t(opt_samples), opt_transform, target_transform, @@ -314,15 +309,11 @@ randomized_inference = function(X,y,sigma, lam, noise_scale, ridge_term){ rootL = function(candidate){ return (pivot(observed_target[i]+candidate)-(1+level)/2) } - pivots[i] = pivot(0) + pvalues[i] = pivot(0) line_min = -10*sd(target_sample) line_max = 10*sd(target_sample) ci[i,1] = uniroot(rootU, c(line_min, line_max))$root+observed_target[i] - ci[i,2] = uniroot(rootL,c(line_min, line_max))$root+observed_target[i] - } - print(paste("pivots", toString(pivots))) - for (i in 1:nactive){ - print(paste("CIs", toString(ci[i,]))) + ci[i,2] = uniroot(rootL, c(line_min, line_max))$root+observed_target[i] } - return(list(pivots=pivots, ci=ci)) + return(list(pvalues=pvalues, ci=ci)) } From 07b6bb705d291f88e9ff9baaa01fc73969af1b6a Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Mon, 6 Nov 2017 22:50:13 -0800 Subject: [PATCH 363/396] coverages --- selectiveInference/R/RcppExports.R | 35 ++++++++++++++++++++++++++ selectiveInference/R/funs.randomized.R | 27 +++++++++++++------- tests/randomized/test_instances.R | 30 +++++++++++++--------- 3 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 selectiveInference/R/RcppExports.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R new file mode 100644 index 00000000..e927a3af --- /dev/null +++ b/selectiveInference/R/RcppExports.R @@ -0,0 +1,35 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { + .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) +} + +solve_QP_wide <- function(X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { + .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) +} + +update1_ <- function(Q2, w, m, k) { + .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) +} + +downdate1_ <- function(Q1, R, j0, m, n) { + .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) +} + +log_density_gaussian_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_gaussian_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) +} + +log_density_gaussian_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_gaussian_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) +} + +log_density_laplace_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_laplace_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) +} + +log_density_laplace_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_laplace_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) +} + diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index e0a1eec4..78c3a876 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -258,7 +258,7 @@ conditional_density = function(noise_scale, lasso_soln) { return(lasso_soln) } -randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term){ +randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level=0.9){ n = nrow(X) p = ncol(X) @@ -283,7 +283,7 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term){ opt_transform = lasso_soln$optimization_transform observed_raw = lasso_soln$observed_raw - pvalus = rep(0, nactive) + pvalues = rep(0, nactive) ci = matrix(0, nactive, 2) for (i in 1:nactive){ target_transform = selectiveInference:::linear_decomposition(observed_target[i], @@ -300,9 +300,8 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term){ opt_transform, target_transform, observed_raw) - return(mean((target_sampletrue_beta[i]){ + coverage[i]=1 + } + print(paste("ci", toString(result$ci[i,]))) + } + sample_pvalues = c(sample_pvalues, result$pvalues) + sample_coverage = c(sample_coverage, coverage) } - + print(paste("coverage", mean(sample_coverage))) jpeg('pivots.jpg') - plot(ecdf(sample_pivots), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf") + plot(ecdf(sample_pvalues), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf") abline(0, 1, lty=2) dev.off() } From 1115635aa730bf05c7901bcfbaa62c1248058b6f Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Tue, 7 Nov 2017 11:10:11 -0800 Subject: [PATCH 364/396] subgrad condition option added --- selectiveInference/R/funs.randomized.R | 9 +++++++-- tests/randomized/test_instances.R | 6 +++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 78c3a876..95b2a7a0 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -258,7 +258,8 @@ conditional_density = function(noise_scale, lasso_soln) { return(lasso_soln) } -randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level=0.9){ +randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, + condition_subgrad=FALSE, level=0.9){ n = nrow(X) p = ncol(X) @@ -266,7 +267,11 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set nactive = length(active_set) - + + if (condition_subgrad==TRUE){ + lasso_soln=conditional_density(noise_scale,lasso_soln) + } + dim = length(lasso_soln$observed_opt_state) print(paste("chain dim", dim)) S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 7bbc54ab..380323d5 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -25,8 +25,8 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, } -collect_results = function(n,p,s, nsim=10, level=0.9){ - rho=0. +collect_results = function(n,p,s, nsim=100, level=0.9){ + rho=0.3 lam=1. sigma=1 sample_pvalues = c() @@ -38,7 +38,7 @@ collect_results = function(n,p,s, nsim=10, level=0.9){ beta=data$beta ridge_term=sd(y)/sqrt(n) noise_scale = sd(y)/2 - result = selectiveInference:::randomized_inference(X,y,sigma,lam,noise_scale,ridge_term, level) + result = selectiveInference:::randomized_inference(X,y,sigma,lam,noise_scale,ridge_term, TRUE, level) true_beta = beta[result$active_set] coverage = rep(0, nrow(result$ci)) for (i in 1:nrow(result$ci)){ From e2f45859d8dad938dbdc9dacf0bb1f3f7a277c98 Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Tue, 7 Nov 2017 11:43:44 -0800 Subject: [PATCH 365/396] bug --- selectiveInference/R/funs.randomized.R | 9 +++------ tests/randomized/test_instances.R | 6 +++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 95b2a7a0..37b81218 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -258,8 +258,7 @@ conditional_density = function(noise_scale, lasso_soln) { return(lasso_soln) } -randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, - condition_subgrad=FALSE, level=0.9){ +randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level=0.9){ n = nrow(X) p = ncol(X) @@ -268,9 +267,7 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, inactive_set = lasso_soln$inactive_set nactive = length(active_set) - if (condition_subgrad==TRUE){ - lasso_soln=conditional_density(noise_scale,lasso_soln) - } + lasso_soln=conditional_density(noise_scale,lasso_soln) dim = length(lasso_soln$observed_opt_state) print(paste("chain dim", dim)) @@ -296,7 +293,7 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, target_cov[i,i], cov_target_internal[,i], internal_transform) - target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i]) + target_sample = rnorm(nrow(as.matrix(opt_samples))) * sqrt(target_cov[i,i]) pivot = function(candidate){ weights = selectiveInference:::importance_weight(noise_scale, diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 380323d5..4e8d5292 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -25,9 +25,9 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, } -collect_results = function(n,p,s, nsim=100, level=0.9){ +collect_results = function(n,p,s, nsim=1, level=0.9){ rho=0.3 - lam=1. + lam=2. sigma=1 sample_pvalues = c() sample_coverage = c() @@ -38,7 +38,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9){ beta=data$beta ridge_term=sd(y)/sqrt(n) noise_scale = sd(y)/2 - result = selectiveInference:::randomized_inference(X,y,sigma,lam,noise_scale,ridge_term, TRUE, level) + result = selectiveInference:::randomized_inference(X,y,sigma,lam,noise_scale,ridge_term, level) true_beta = beta[result$active_set] coverage = rep(0, nrow(result$ci)) for (i in 1:nrow(result$ci)){ From df777e381fa9f9b3630fc33ea161f1719fa4a394 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 11:55:21 -0800 Subject: [PATCH 366/396] using some default arguments now -- bug in C code for densities --- selectiveInference/R/RcppExports.R | 35 ------------ selectiveInference/R/funs.randomized.R | 73 ++++++++++++++++++-------- tests/randomized/test_instances.R | 4 +- tests/randomized/test_randomized.R | 4 +- 4 files changed, 54 insertions(+), 62 deletions(-) delete mode 100644 selectiveInference/R/RcppExports.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R deleted file mode 100644 index e927a3af..00000000 --- a/selectiveInference/R/RcppExports.R +++ /dev/null @@ -1,35 +0,0 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) -} - -solve_QP_wide <- function(X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { - .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) -} - -update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) -} - -downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) -} - -log_density_gaussian_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_gaussian_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) -} - -log_density_gaussian_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_gaussian_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) -} - -log_density_laplace_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_laplace_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) -} - -log_density_laplace_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_laplace_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) -} - diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 95b2a7a0..e065a5e7 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -3,11 +3,11 @@ # # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 -randomizedLASSO = function(X, +randomizedLasso = function(X, y, lam, - noise_scale, - ridge_term, + noise_scale=NULL, + ridge_term=NULL, noise_type=c('gaussian', 'laplace'), max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions @@ -20,6 +20,21 @@ randomizedLASSO = function(X, n = nrow(X); p = ncol(X) + mean_diag = mean(apply(X^2, 2, sum)) + + # default ridge term + + if (is.null(ridge_term)) { + ridge_term = sqrt(mean_diag) * sd(y) / sqrt(n) + } + + # default noise level + + if (is.null(noise_scale)) { + noise_scale = 0.5 * sd(y) * sqrt(mean_diag) + } + + print(c(noise_scale, ridge_term)) noise_type = match.arg(noise_type) if (noise_scale > 0) { @@ -246,10 +261,10 @@ conditional_density = function(noise_scale, lasso_soln) { if (sum(opt_state < 0) > 0) { return(-Inf) } - D = selectiveInference:::log_density_gaussian_conditional_(noise_scale, - reduced_B, - as.matrix(opt_state), - reduced_beta_offset) + D = log_density_gaussian_conditional_(noise_scale, + reduced_B, + as.matrix(opt_state), + reduced_beta_offset) return(D) } lasso_soln$log_optimization_density = log_condl_optimization_density @@ -258,12 +273,18 @@ conditional_density = function(noise_scale, lasso_soln) { return(lasso_soln) } -randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, - condition_subgrad=FALSE, level=0.9){ +randomizedLassoInf = function(X, + y, + lam, + sigma=NULL, + noise_scale=NULL, + ridge_term=NULL, + condition_subgrad=TRUE, + level=0.9) { n = nrow(X) p = ncol(X) - lasso_soln = selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) + lasso_soln = randomizedLasso(X, y, lam, noise_scale, ridge_term) active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set nactive = length(active_set) @@ -274,12 +295,20 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, dim = length(lasso_soln$observed_opt_state) print(paste("chain dim", dim)) - S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) + S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) opt_samples = S$samples[2001:10000,] print(paste("dim opt samples", toString(dim(opt_samples)))) X_E = X[, active_set] X_minusE = X[, inactive_set] + + # if no sigma given, use OLS estimate + + if (is.null(sigma)) { + lm_y = lm(y ~ X[,E] - 1) + sigma = sum(resid(lm_y)^2 / lm_y$df.resid) + } + print(c(sigma, 'sigma')) target_cov = solve(t(X_E) %*% X_E)*sigma^2 cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y @@ -291,20 +320,20 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, pvalues = rep(0, nactive) ci = matrix(0, nactive, 2) for (i in 1:nactive){ - target_transform = selectiveInference:::linear_decomposition(observed_target[i], - observed_internal, - target_cov[i,i], - cov_target_internal[,i], - internal_transform) + target_transform = linear_decomposition(observed_target[i], + observed_internal, + target_cov[i,i], + cov_target_internal[,i], + internal_transform) target_sample = rnorm(nrow(opt_samples)) * sqrt(target_cov[i,i]) pivot = function(candidate){ - weights = selectiveInference:::importance_weight(noise_scale, - t(as.matrix(target_sample)) + candidate, - t(opt_samples), - opt_transform, - target_transform, - observed_raw) + weights = importance_weight(noise_scale, + t(as.matrix(target_sample)) + candidate, + t(opt_samples), + opt_transform, + target_transform, + observed_raw) return(mean((target_sample+candidate Date: Tue, 7 Nov 2017 11:58:06 -0800 Subject: [PATCH 367/396] dealing with zero length active set --- selectiveInference/R/funs.randomized.R | 6 +++++- tests/randomized/test_instances.R | 30 +++++++++++++++----------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 37b81218..3d8ddec2 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -264,6 +264,9 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level p = ncol(X) lasso_soln = selectiveInference:::randomizedLASSO(X, y, lam, noise_scale, ridge_term) active_set = lasso_soln$active_set + if (length(active_set)==0){ + return (list(active_set=active_set, pvalues=c(), ci=c())) + } inactive_set = lasso_soln$inactive_set nactive = length(active_set) @@ -293,8 +296,9 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level target_cov[i,i], cov_target_internal[,i], internal_transform) + target_sample = rnorm(nrow(as.matrix(opt_samples))) * sqrt(target_cov[i,i]) - + print(length(target_sample)) pivot = function(candidate){ weights = selectiveInference:::importance_weight(noise_scale, t(as.matrix(target_sample)) + candidate, diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 4e8d5292..baafb158 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -39,22 +39,26 @@ collect_results = function(n,p,s, nsim=1, level=0.9){ ridge_term=sd(y)/sqrt(n) noise_scale = sd(y)/2 result = selectiveInference:::randomized_inference(X,y,sigma,lam,noise_scale,ridge_term, level) - true_beta = beta[result$active_set] - coverage = rep(0, nrow(result$ci)) - for (i in 1:nrow(result$ci)){ - if (result$ci[i,1]true_beta[i]){ - coverage[i]=1 + if (length(result$active_set)>0){ + true_beta = beta[result$active_set] + coverage = rep(0, nrow(result$ci)) + for (i in 1:nrow(result$ci)){ + if (result$ci[i,1]true_beta[i]){ + coverage[i]=1 + } + print(paste("ci", toString(result$ci[i,]))) } - print(paste("ci", toString(result$ci[i,]))) + sample_pvalues = c(sample_pvalues, result$pvalues) + sample_coverage = c(sample_coverage, coverage) } - sample_pvalues = c(sample_pvalues, result$pvalues) - sample_coverage = c(sample_coverage, coverage) } - print(paste("coverage", mean(sample_coverage))) - jpeg('pivots.jpg') - plot(ecdf(sample_pvalues), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf") - abline(0, 1, lty=2) - dev.off() + if (length(sample_coverage)>0){ + print(paste("coverage", mean(sample_coverage))) + jpeg('pivots.jpg') + plot(ecdf(sample_pvalues), xlim=c(0,1), main="Empirical CDF of null p-values", xlab="p-values", ylab="ecdf") + abline(0, 1, lty=2) + dev.off() + } } set.seed(1) From ab91969dfed3f76417f157faabfb7013d3295c71 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 13:18:30 -0800 Subject: [PATCH 368/396] now nut running after trying to set default arguments --- C-software | 2 +- selectiveInference/R/funs.randomized.R | 2 ++ tests/randomized/test_instances.R | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/C-software b/C-software index ec6a954d..8d06a251 160000 --- a/C-software +++ b/C-software @@ -1 +1 @@ -Subproject commit ec6a954d6b335439115e961abde91fa5a07a3669 +Subproject commit 8d06a251820689db939246e3362475bf4c56801e diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index e065a5e7..cd67f6dc 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -65,6 +65,7 @@ randomizedLasso = function(X, ever_active = rep(0, p) nactive = as.integer(0) + print('here') result = solve_QP_wide(X, # design matrix lam / n, # vector of Lagrange multipliers ridge_term / n, # ridge_term @@ -85,6 +86,7 @@ randomizedLasso = function(X, sign_soln = sign(result$soln) + print('now') unpenalized = lam == 0 active = (!unpenalized) & (sign_soln != 0) inactive = (!unpenalized) & (sign_soln == 0) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 404312ef..02f69845 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -36,7 +36,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9){ X=data$X y=data$y beta=data$beta - result = selectiveInference:::randomizedLassoInf(X, y, sigma, lam, level=level) + result = selectiveInference:::randomizedLassoInf(X, y, lam, level=level) true_beta = beta[result$active_set] coverage = rep(0, nrow(result$ci)) for (i in 1:nrow(result$ci)){ From 5b4bc08d6ccf2c411a0a4fb81f8d1a346afb154b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 13:23:03 -0800 Subject: [PATCH 369/396] BF: estimate of sigma --- selectiveInference/R/funs.randomized.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index cd67f6dc..c6b7a239 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -307,8 +307,8 @@ randomizedLassoInf = function(X, # if no sigma given, use OLS estimate if (is.null(sigma)) { - lm_y = lm(y ~ X[,E] - 1) - sigma = sum(resid(lm_y)^2 / lm_y$df.resid) + lm_y = lm(y ~ X_E - 1) + sigma = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid) } print(c(sigma, 'sigma')) target_cov = solve(t(X_E) %*% X_E)*sigma^2 From 46856e1a87fae22dde84b7a07da8c59772161ff8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 13:25:19 -0800 Subject: [PATCH 370/396] changing arguments --- selectiveInference/R/funs.randomized.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index c6b7a239..71a6a71b 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -286,7 +286,7 @@ randomizedLassoInf = function(X, n = nrow(X) p = ncol(X) - lasso_soln = randomizedLasso(X, y, lam, noise_scale, ridge_term) + lasso_soln = randomizedLasso(X, y, lam, noise_scale=noise_scale, ridge_term=ridge_term) active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set nactive = length(active_set) From d1ebca35bfc8e03f0bd6f527dc6f688e07953139 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 14:25:13 -0800 Subject: [PATCH 371/396] BF: noise_scale was NULL --- C-software | 2 +- selectiveInference/R/funs.randomized.R | 96 ++++++++++++++-------- selectiveInference/src/Rcpp-randomized.cpp | 3 +- tests/randomized/test_instances.R | 2 +- 4 files changed, 67 insertions(+), 36 deletions(-) diff --git a/C-software b/C-software index 8d06a251..225d8274 160000 --- a/C-software +++ b/C-software @@ -1 +1 @@ -Subproject commit 8d06a251820689db939246e3362475bf4c56801e +Subproject commit 225d8274ec54eea0ccf14d4ef27297dd699b54e5 diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 71a6a71b..869b277f 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -65,7 +65,6 @@ randomizedLasso = function(X, ever_active = rep(0, p) nactive = as.integer(0) - print('here') result = solve_QP_wide(X, # design matrix lam / n, # vector of Lagrange multipliers ridge_term / n, # ridge_term @@ -86,7 +85,6 @@ randomizedLasso = function(X, sign_soln = sign(result$soln) - print('now') unpenalized = lam == 0 active = (!unpenalized) & (sign_soln != 0) inactive = (!unpenalized) & (sign_soln == 0) @@ -156,16 +154,17 @@ randomizedLasso = function(X, log_optimization_density = function(opt_state ) { - - if ((sum(abs(opt_state[(inactive_start + 1):p]) > inactive_lam) > 0) || (sum(opt_state[(active_start+1):inactive_start] < 0) > 0)) { return(-Inf) } - D = log_density_gaussian_conditional_(noise_scale, - opt_transform$linear_term, - as.matrix(opt_state), - observed_raw+opt_transform$offset_term) + + A = opt_transform$linear_term %*% opt_state + observed_raw + opt_transform$offset_term + D = -apply(A^2, 2, sum) / noise_scale^2 +# D = log_density_gaussian_conditional_(noise_scale, +# opt_transform$linear_term, +# as.matrix(opt_state), +# observed_raw + opt_transform$offset_term) return(D) } @@ -177,7 +176,8 @@ randomizedLasso = function(X, internal_transform = internal_transform, log_optimization_density = log_optimization_density, observed_opt_state = observed_opt_state, - observed_raw = observed_raw + observed_raw = observed_raw, + noise_scale = noise_scale )) } @@ -222,17 +222,31 @@ importance_weight = function(noise_scale, target_transform, observed_raw) { - log_num = log_density_gaussian_(noise_scale, - target_transform$linear_term, - as.matrix(target_sample), - opt_transform$linear_term, - as.matrix(opt_sample), - target_transform$offset_term + opt_transform$offset_term) - - log_den = log_density_gaussian_conditional_(noise_scale, - opt_transform$linear_term, - as.matrix(opt_sample), - observed_raw+opt_transform$offset_term) + use_C_code = FALSE + if (!use_C_code) { + A = (opt_transform$linear_term %*% opt_sample + + target_transform$linear_term %*% target_sample) + A = apply(A, 2, function(x) {return(x + target_transform$offset_term + opt_transform$offset_term)}) + log_num = -apply(A^2, 2, sum) / noise_scale^2 + } else { + log_num = log_density_gaussian_(noise_scale, + target_transform$linear_term, + as.matrix(target_sample), + opt_transform$linear_term, + as.matrix(opt_sample), + target_transform$offset_term + opt_transform$offset_term) + } + + if (!use_C_code) { + A = opt_transform$linear_term %*% opt_sample + A = apply(A, 2, function(x) {return(x + observed_raw + opt_transform$offset_term)}) + log_den = -apply(A^2, 2, sum) / noise_scale^2 + } else { + log_den = log_density_gaussian_conditional_(noise_scale, + opt_transform$linear_term, + as.matrix(opt_sample), + observed_raw+opt_transform$offset_term) + } W = log_num - log_den W = W - max(W) return(exp(W)) @@ -263,11 +277,19 @@ conditional_density = function(noise_scale, lasso_soln) { if (sum(opt_state < 0) > 0) { return(-Inf) } - D = log_density_gaussian_conditional_(noise_scale, - reduced_B, - as.matrix(opt_state), - reduced_beta_offset) - return(D) + + use_C_code = FALSE + if (!use_C_code) { + A = reduced_B %*% as.matrix(opt_state) + reduced_beta_offset + A = apply(A, 2, function(x) {x + reduced_beta_offset}) + log_den = -apply(A^2, 2, sum) / noise_scale^2 + } else { + log_den = log_density_gaussian_conditional_(noise_scale, + reduced_B, + as.matrix(opt_state), + reduced_beta_offset) + } + return(log_den) } lasso_soln$log_optimization_density = log_condl_optimization_density lasso_soln$observed_opt_state = observed_opt_state[1:nactive] @@ -282,7 +304,9 @@ randomizedLassoInf = function(X, noise_scale=NULL, ridge_term=NULL, condition_subgrad=TRUE, - level=0.9) { + level=0.9, + nsample=10000, + burnin=2000) { n = nrow(X) p = ncol(X) @@ -291,14 +315,20 @@ randomizedLassoInf = function(X, inactive_set = lasso_soln$inactive_set nactive = length(active_set) - if (condition_subgrad==TRUE){ - lasso_soln=conditional_density(noise_scale,lasso_soln) - } + noise_scale = lasso_soln$noise_scale # set to default value in randomizedLasso + + if (condition_subgrad==TRUE){ + lasso_soln=conditional_density(noise_scale, lasso_soln) + } dim = length(lasso_soln$observed_opt_state) print(paste("chain dim", dim)) - S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) - opt_samples = S$samples[2001:10000,] + +# print(lasso_soln) + + + S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=nsample) + opt_samples = S$samples[(burnin+1):nsample,] print(paste("dim opt samples", toString(dim(opt_samples)))) X_E = X[, active_set] @@ -331,12 +361,12 @@ randomizedLassoInf = function(X, pivot = function(candidate){ weights = importance_weight(noise_scale, - t(as.matrix(target_sample)) + candidate, + t(as.matrix(target_sample) + candidate), t(opt_samples), opt_transform, target_transform, observed_raw) - return(mean((target_sample+candidate // need to include the main Rcpp header file #include // where densities are defined - +#include // [[Rcpp::export]] Rcpp::NumericVector log_density_gaussian_(double noise_scale, // Scale of randomization Rcpp::NumericMatrix internal_linear, // A_D -- linear part for data @@ -53,6 +53,7 @@ Rcpp::NumericVector log_density_gaussian_conditional_(double noise_scale, int ipt; for (ipt=0; ipt Date: Tue, 7 Nov 2017 14:31:09 -0800 Subject: [PATCH 372/396] seems to be running now with default arguments --- C-software | 2 +- selectiveInference/R/funs.randomized.R | 14 ++++---------- tests/randomized/test_instances.R | 1 + 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/C-software b/C-software index 225d8274..563bf1aa 160000 --- a/C-software +++ b/C-software @@ -1 +1 @@ -Subproject commit 225d8274ec54eea0ccf14d4ef27297dd699b54e5 +Subproject commit 563bf1aa370b55f8343693224717047f1df0d0c3 diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 869b277f..55bb0f29 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -34,7 +34,6 @@ randomizedLasso = function(X, noise_scale = 0.5 * sd(y) * sqrt(mean_diag) } - print(c(noise_scale, ridge_term)) noise_type = match.arg(noise_type) if (noise_scale > 0) { @@ -222,7 +221,7 @@ importance_weight = function(noise_scale, target_transform, observed_raw) { - use_C_code = FALSE + use_C_code = TRUE if (!use_C_code) { A = (opt_transform$linear_term %*% opt_sample + target_transform$linear_term %*% target_sample) @@ -278,7 +277,7 @@ conditional_density = function(noise_scale, lasso_soln) { return(-Inf) } - use_C_code = FALSE + use_C_code = TRUE if (!use_C_code) { A = reduced_B %*% as.matrix(opt_state) + reduced_beta_offset A = apply(A, 2, function(x) {x + reduced_beta_offset}) @@ -321,15 +320,10 @@ randomizedLassoInf = function(X, lasso_soln=conditional_density(noise_scale, lasso_soln) } - dim = length(lasso_soln$observed_opt_state) - print(paste("chain dim", dim)) + ndim = length(lasso_soln$observed_opt_state) -# print(lasso_soln) - - - S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=nsample) + S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), ndim), nsample=nsample) opt_samples = S$samples[(burnin+1):nsample,] - print(paste("dim opt samples", toString(dim(opt_samples)))) X_E = X[, active_set] X_minusE = X[, inactive_set] diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 54b219b7..e3e98166 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -47,6 +47,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9){ } sample_pvalues = c(sample_pvalues, result$pvalues) sample_coverage = c(sample_coverage, coverage) + print(paste("coverage", mean(sample_coverage))) } print(paste("coverage", mean(sample_coverage))) jpeg('pivots.jpg') From 8187b536f94e4845f6b73d7a56e1f3eb7d444396 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 14:35:31 -0800 Subject: [PATCH 373/396] forgot one bit of C code to use --- selectiveInference/R/funs.randomized.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 55bb0f29..2591b097 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -158,12 +158,16 @@ randomizedLasso = function(X, return(-Inf) } - A = opt_transform$linear_term %*% opt_state + observed_raw + opt_transform$offset_term - D = -apply(A^2, 2, sum) / noise_scale^2 -# D = log_density_gaussian_conditional_(noise_scale, -# opt_transform$linear_term, -# as.matrix(opt_state), -# observed_raw + opt_transform$offset_term) + use_C_code = TRUE + if (!use_C_code) { + A = opt_transform$linear_term %*% opt_state + observed_raw + opt_transform$offset_term + D = -apply(A^2, 2, sum) / noise_scale^2 + } else { + D = log_density_gaussian_conditional_(noise_scale, + opt_transform$linear_term, + as.matrix(opt_state), + observed_raw + opt_transform$offset_term) + } return(D) } From 6dae27c96cdfeef4ca8e7238fc83c4e16dc86eb7 Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Tue, 7 Nov 2017 14:36:53 -0800 Subject: [PATCH 374/396] weights running in |E|+1 dimensions --- selectiveInference/R/funs.randomized.R | 20 ++++++++++++++++---- tests/randomized/test_instances.R | 4 ++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 3d8ddec2..df53da53 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -296,16 +296,28 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level target_cov[i,i], cov_target_internal[,i], internal_transform) - + + target_opt_linear = cbind(target_transform$linear_term, opt_transform$linear_term) + reduced_target_opt_linear = chol(t(target_opt_linear) %*% target_opt_linear) + target_linear = reduced_target_opt_linear[,1] + temp = solve(t(reduced_target_opt_linear)) %*% t(target_opt_linear) + target_offset = temp %*% target_transform$offset_term + target_transform = list(linear_term = as.matrix(target_linear), offset_term = target_offset) + print(dim(reduced_target_opt_linear)) + opt_linear = reduced_target_opt_linear[,2:ncol(reduced_target_opt_linear)] + opt_offset = temp %*% opt_transform$offset_term + opt_transform_reduced = list(linear_term = as.matrix(opt_linear), offset_term = opt_offset) + + raw = target_transform$linear_term * observed_target[i] +target_transform$offset_term + target_sample = rnorm(nrow(as.matrix(opt_samples))) * sqrt(target_cov[i,i]) - print(length(target_sample)) pivot = function(candidate){ weights = selectiveInference:::importance_weight(noise_scale, t(as.matrix(target_sample)) + candidate, t(opt_samples), - opt_transform, + opt_transform_reduced, target_transform, - observed_raw) + raw) return(mean((target_sample+candidate Date: Tue, 7 Nov 2017 15:31:04 -0800 Subject: [PATCH 375/396] BF: opt_transform linear_term was not cast as matrix in conditional_density --- selectiveInference/R/funs.randomized.R | 33 ++++++++++++++++---------- tests/randomized/test_instances.R | 6 ++--- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 2591b097..de16951c 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -109,7 +109,13 @@ randomizedLasso = function(X, L_E = t(X) %*% X[,E] coef_term = L_E - coef_term = coef_term %*% diag(c(rep(1, sum(unpenalized)), sign_soln[active])) # coefficients are non-negative + signs_ = c(rep(1, sum(unpenalized)), sign_soln[active]) + if (length(signs_) == 1) { + coef_term = coef_term * signs_ + } else { + coef_term = coef_term %*% diag(signs_) # scaligns are non-negative + } + coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term subgrad_term = matrix(0, p, sum(inactive)) # for subgrad @@ -151,10 +157,10 @@ randomizedLasso = function(X, # XXX only for Gaussian so far - log_optimization_density = function(opt_state - ) { + log_optimization_density = function(opt_state) { + if ((sum(abs(opt_state[(inactive_start + 1):p]) > inactive_lam) > 0) || - (sum(opt_state[(active_start+1):inactive_start] < 0) > 0)) { + (sum(opt_state[(active_start + 1):inactive_start] < 0) > 0)) { return(-Inf) } @@ -188,7 +194,7 @@ randomizedLasso = function(X, sample_opt_variables = function(randomizedLASSO_obj, jump_scale, nsample=10000) { return(MCMC(randomizedLASSO_obj$log_optimization_density, nsample, - randomizedLASSO_obj$observed_opt_state, + randomizedLASSO_obj$observed_opt_state, acc.rate=0.2, scale=jump_scale)) } @@ -232,6 +238,7 @@ importance_weight = function(noise_scale, A = apply(A, 2, function(x) {return(x + target_transform$offset_term + opt_transform$offset_term)}) log_num = -apply(A^2, 2, sum) / noise_scale^2 } else { + log_num = log_density_gaussian_(noise_scale, target_transform$linear_term, as.matrix(target_sample), @@ -264,10 +271,11 @@ conditional_density = function(noise_scale, lasso_soln) { observed_opt_state = lasso_soln$observed_opt_state nactive = length(active_set) - B = opt_linear[,1:nactive] + B = opt_linear[,1:nactive,drop=FALSE] beta_offset = opt_offset - p=length(observed_opt_state) - if (nactive Date: Tue, 7 Nov 2017 16:00:25 -0800 Subject: [PATCH 376/396] working instance --- selectiveInference/R/funs.randomized.R | 16 ++++++++++------ tests/randomized/test_instances.R | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index df53da53..99da66d4 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -95,6 +95,7 @@ randomizedLASSO = function(X, L_E = t(X) %*% X[,E] coef_term = L_E + print(active_set) coef_term = coef_term %*% diag(c(rep(1, sum(unpenalized)), sign_soln[active])) # coefficients are non-negative coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term @@ -274,8 +275,9 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level dim = length(lasso_soln$observed_opt_state) print(paste("chain dim", dim)) - S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=10000) - opt_samples = S$samples[2001:10000,] + nsample=4000 + S = selectiveInference:::sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), dim), nsample=nsample) + opt_samples = S$samples[2001:nsample,] print(paste("dim opt samples", toString(dim(opt_samples)))) X_E = X[, active_set] @@ -290,6 +292,7 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level pvalues = rep(0, nactive) ci = matrix(0, nactive, 2) + for (i in 1:nactive){ target_transform = selectiveInference:::linear_decomposition(observed_target[i], observed_internal, @@ -303,7 +306,6 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level temp = solve(t(reduced_target_opt_linear)) %*% t(target_opt_linear) target_offset = temp %*% target_transform$offset_term target_transform = list(linear_term = as.matrix(target_linear), offset_term = target_offset) - print(dim(reduced_target_opt_linear)) opt_linear = reduced_target_opt_linear[,2:ncol(reduced_target_opt_linear)] opt_offset = temp %*% opt_transform$offset_term opt_transform_reduced = list(linear_term = as.matrix(opt_linear), offset_term = opt_offset) @@ -324,11 +326,13 @@ randomized_inference = function(X, y, sigma, lam, noise_scale, ridge_term, level return (pivot(observed_target[i]+candidate)-(1-level)/2) } rootL = function(candidate){ - return (pivot(observed_target[i]+candidate)-(1+level)/2) + return(pivot(observed_target[i]+candidate)-(1+level)/2) } pvalues[i] = pivot(0) - line_min = -20*sd(target_sample) - line_max = 20*sd(target_sample) + line_min = -10*sd(target_sample) + line_max = 10*sd(target_sample) + print(rootU(line_min)) + print(rootU(line_max)) if (rootU(line_min)*rootU(line_max)<0){ ci[i,2] = uniroot(rootU, c(line_min, line_max))$root+observed_target[i] } else{ diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 4b6df00a..8c353c98 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -25,9 +25,9 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, } -collect_results = function(n,p,s, nsim=100, level=0.9){ +collect_results = function(n,p,s, nsim=20, level=0.9){ rho=0.3 - lam=1. + lam=1.2 sigma=1 sample_pvalues = c() sample_coverage = c() From 571cf823d4da8fcb099984b92af79e07d4b3beae Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 16:16:10 -0800 Subject: [PATCH 377/396] export of randomized and man page --- selectiveInference/NAMESPACE | 3 +- selectiveInference/man/fixedLassoInf.Rd | 2 +- selectiveInference/man/randomizedLassoInf.Rd | 159 +++++++++++++++++++ 3 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 selectiveInference/man/randomizedLassoInf.Rd diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index e8f36e0d..da23e38e 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -14,7 +14,8 @@ export(lar,fs, TG.pvalue, TG.limits, TG.interval, - debiasingMatrix + debiasingMatrix, + randomizedLassoInf ) S3method("coef", "lar") diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 445e72a7..ac7ca213 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -113,7 +113,7 @@ is not allowed. Note that the coefficients and standard errors reported are unregularized. Eg for the Gaussian, they are the usual least squares estimates and standard errors -for the model fit to the actice set from the lasso. +for the model fit to the active set from the lasso. } \value{ \item{type}{Type of coefficients tested (partial or full)} diff --git a/selectiveInference/man/randomizedLassoInf.Rd b/selectiveInference/man/randomizedLassoInf.Rd new file mode 100644 index 00000000..35d0dc45 --- /dev/null +++ b/selectiveInference/man/randomizedLassoInf.Rd @@ -0,0 +1,159 @@ +\name{randomizedLassoInf} +\alias{randomizedLassoInf} + +\title{ +Inference for the randomized lasso, with a fixed lambda +} +\description{ +Compute p-values and confidence intervals based on selecting +an active set with the randomized lasso, at a +fixed value of the tuning parameter lambda and using Gaussian +randomization. +} +\usage{ +randomizedLassoInf(X, + y, + lam, + sigma=NULL, + noise_scale=NULL, + ridge_term=NULL, + condition_subgrad=TRUE, + level=0.9, + nsample=10000, + burnin=2000, + max_iter=100, + kkt_tol=1.e-4, + parameter_tol=1.e-8, + objective_tol=1.e-8, + objective_stop=FALSE, + kkt_stop=TRUE, + param_stop=TRUE) +} +\arguments{ + \item{X}{ +Matrix of predictors (n by p); +} + \item{y}{ +Vector of outcomes (length n) +} + \item{lam}{ +Value of lambda used to compute beta. See the above warning + Be careful! This function uses the "standard" lasso objective + \deqn{ + 1/2 \|y - x \beta\|_2^2 + \lambda \|\beta\|_1. + } + In contrast, glmnet multiplies the first term by a factor of 1/n. + So after running glmnet, to extract the beta corresponding to a value lambda, + you need to use \code{beta = coef(obj, s=lambda/n)[-1]}, + where obj is the object returned by glmnet (and [-1] removes the intercept, + which glmnet always puts in the first component) +} +\item{sigma}{ +Estimate of error standard deviation. If NULL (default), this is estimated +using the mean squared residual of the full least squares based on +selected active set. +} +\item{noise_scale}{ +Scale of Gaussian noise added to objective. Default is +0.5 * sd(y) times the sqrt of the mean of the trace of X^TX. +} +\item{ridge_term}{ +A small "elastic net" or ridge penalty is added to ensure +the randomized problem has a solution. +0.5 * sd(y) times the sqrt of the mean of the trace of X^TX divided by +sqrt(n). +} +\item{condition_subgrad}{ +In forming selective confidence intervals and p-values should we condition +on the inactive coordinates of the subgradient as well? +Default is TRUE. +} +\item{level} +{ +Level for confidence intervals. +} +\item{nsample} +{ +Number of samples of optimization variables to sample. +} +\item{burnin} +{ +How many samples of optimization variable to discard (should be less than nsample). +} +\item{max_iter} +{ +How many rounds of updates used of coordinate descent in solving randomized +LASSO. +} +\item{kkt_tol}{ +Tolerance for checking convergence based on KKT conditions. +} +\item{parameter_tol}{ +Tolerance for checking convergence based on convergence +of parameters. +} +\item{objective_tol}{ +Tolerance for checking convergence based on convergence +of objective value. +} +\item{kkt_stop}{ +Should we use KKT check to determine when to stop? +} +\item{parameter_tol}{ +Should we use convergence of parameters to determine when to stop? +} +\item{objective_tol}{ +Should we use convergence of objective value to determine when to stop? +} +} + +\details{ +This function computes selective p-values and confidence intervals for a +randomized version of the lasso, +given a fixed value of the tuning parameter lambda. + +} +\value{ +\item{type}{Type of coefficients tested (partial or full)} +\item{lambda}{Value of tuning parameter lambda used} +\item{pv}{One-sided P-values for active variables, uses the fact we have conditioned on the sign.} +\item{ci}{Confidence intervals} +\item{tailarea}{Realized tail areas (lower and upper) for each confidence interval} +\item{vlo}{Lower truncation limits for statistics} +\item{vup}{Upper truncation limits for statistics} +\item{vmat}{Linear contrasts that define the observed statistics} +\item{y}{Vector of outcomes} +\item{vars}{Variables in active set} +\item{sign}{Signs of active coefficients} +\item{alpha}{Desired coverage (alpha/2 in each tail)} +\item{sigma}{Value of error standard deviation (sigma) used} +\item{call}{The call to lassoInf} +} + +\references{ +Xiaoying Tian, and Jonathan Taylor (2015). +Selective inference with a randomized response. arxiv.org:1507.06739 + +Xiaoying Tian, Snigdha Panigrahi, Jelena Markovic, Nan Bi and Jonathan Taylor (2016). +Selective inference after solving a convex problem. +arxiv:1609.05609 + +} +\author{Jelena Markovic, Jonathan Taylor} + +\examples{ +set.seed(43) +n = 50 +p = 10 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x = scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +result = randomizedLassoInf(X, y, lam) + +} + From 9adbe9b8d7ff0db6b4c3e44bbab3d298c7be1a45 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 16:16:45 -0800 Subject: [PATCH 378/396] adding resid to namespace --- selectiveInference/NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index da23e38e..11a834e1 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -41,7 +41,7 @@ import(intervals) import(survival) importFrom("graphics", abline, axis, matplot) importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, - qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) + qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq, resid) importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") importFrom("stats", "rbinom", "rexp") From eda1482654c33abf23ac0cc6218e4a69266b9ea4 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 16:22:11 -0800 Subject: [PATCH 379/396] fixing to make check pass --- selectiveInference/R/funs.randomized.R | 24 +++++++++++++++++-- selectiveInference/man/randomizedLassoInf.Rd | 25 +++++++++----------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index de16951c..357941a6 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -317,12 +317,32 @@ randomizedLassoInf = function(X, condition_subgrad=TRUE, level=0.9, nsample=10000, - burnin=2000) { + burnin=2000, + max_iter=100, # how many iterations for each optimization problem + kkt_tol=1.e-4, # tolerance for the KKT conditions + parameter_tol=1.e-8, # tolerance for relative convergence of parameter + objective_tol=1.e-8, # tolerance for relative decrease in objective + objective_stop=FALSE, + kkt_stop=TRUE, + param_stop=TRUE) + { n = nrow(X) p = ncol(X) - lasso_soln = randomizedLasso(X, y, lam, noise_scale=noise_scale, ridge_term=ridge_term) + lasso_soln = randomizedLasso(X, + y, + lam, + noise_scale=noise_scale, + ridge_term=ridge_term, + max_iter=max_iter, + kkt_tol=kkt_tol, + parameter_tol=parameter_tol, + objective_tol=objective_tol, + objective_stop=objective_stop, + kkt_stop=kkt_stop, + param_stop=param_stop) + active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set nactive = length(active_set) diff --git a/selectiveInference/man/randomizedLassoInf.Rd b/selectiveInference/man/randomizedLassoInf.Rd index 35d0dc45..15e70f09 100644 --- a/selectiveInference/man/randomizedLassoInf.Rd +++ b/selectiveInference/man/randomizedLassoInf.Rd @@ -68,20 +68,16 @@ In forming selective confidence intervals and p-values should we condition on the inactive coordinates of the subgradient as well? Default is TRUE. } -\item{level} -{ +\item{level}{ Level for confidence intervals. } -\item{nsample} -{ +\item{nsample}{ Number of samples of optimization variables to sample. } -\item{burnin} -{ +\item{burnin}{ How many samples of optimization variable to discard (should be less than nsample). } -\item{max_iter} -{ +\item{max_iter}{ How many rounds of updates used of coordinate descent in solving randomized LASSO. } @@ -99,10 +95,10 @@ of objective value. \item{kkt_stop}{ Should we use KKT check to determine when to stop? } -\item{parameter_tol}{ +\item{parameter_stop}{ Should we use convergence of parameters to determine when to stop? } -\item{objective_tol}{ +\item{objective_stop}{ Should we use convergence of objective value to determine when to stop? } } @@ -145,13 +141,14 @@ arxiv:1609.05609 set.seed(43) n = 50 p = 10 -sigma = 1 +sigma = 0.2 +lam = 0.5 -x = matrix(rnorm(n*p),n,p) -x = scale(x,TRUE,TRUE) +X = matrix(rnorm(n*p), n, p) +X = scale(X, TRUE, TRUE) beta = c(3,2,rep(0,p-2)) -y = x\%*\%beta + sigma*rnorm(n) +y = X\%*\%beta + sigma*rnorm(n) result = randomizedLassoInf(X, y, lam) From 3ca443199d474f740a14f2c252cd181c771dd2e7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 16:23:34 -0800 Subject: [PATCH 380/396] param_stop --- selectiveInference/R/funs.randomized.R | 8 ++++---- selectiveInference/man/randomizedLassoInf.Rd | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 357941a6..ae8a463e 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -15,7 +15,7 @@ randomizedLasso = function(X, objective_tol=1.e-8, # tolerance for relative decrease in objective objective_stop=FALSE, kkt_stop=TRUE, - param_stop=TRUE) + parameter_stop=TRUE) { n = nrow(X); p = ncol(X) @@ -80,7 +80,7 @@ randomizedLasso = function(X, p, objective_stop, # objective_stop kkt_stop, # kkt_stop - param_stop) # param_stop + parameter_stop) # param_stop sign_soln = sign(result$soln) @@ -324,7 +324,7 @@ randomizedLassoInf = function(X, objective_tol=1.e-8, # tolerance for relative decrease in objective objective_stop=FALSE, kkt_stop=TRUE, - param_stop=TRUE) + parameter_stop=TRUE) { n = nrow(X) @@ -341,7 +341,7 @@ randomizedLassoInf = function(X, objective_tol=objective_tol, objective_stop=objective_stop, kkt_stop=kkt_stop, - param_stop=param_stop) + parameter_stop=parameter_stop) active_set = lasso_soln$active_set inactive_set = lasso_soln$inactive_set diff --git a/selectiveInference/man/randomizedLassoInf.Rd b/selectiveInference/man/randomizedLassoInf.Rd index 15e70f09..7ce8fb7b 100644 --- a/selectiveInference/man/randomizedLassoInf.Rd +++ b/selectiveInference/man/randomizedLassoInf.Rd @@ -145,7 +145,7 @@ sigma = 0.2 lam = 0.5 X = matrix(rnorm(n*p), n, p) -X = scale(X, TRUE, TRUE) +X = scale(X, TRUE, TRUE) / sqrt(n-1) beta = c(3,2,rep(0,p-2)) y = X\%*\%beta + sigma*rnorm(n) From 690f0ea59ea4eb1ddf52fa0415584def81af1e14 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 7 Nov 2017 18:19:35 -0800 Subject: [PATCH 381/396] fixing function signature --- selectiveInference/man/randomizedLassoInf.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/man/randomizedLassoInf.Rd b/selectiveInference/man/randomizedLassoInf.Rd index 7ce8fb7b..18b9611a 100644 --- a/selectiveInference/man/randomizedLassoInf.Rd +++ b/selectiveInference/man/randomizedLassoInf.Rd @@ -27,7 +27,7 @@ randomizedLassoInf(X, objective_tol=1.e-8, objective_stop=FALSE, kkt_stop=TRUE, - param_stop=TRUE) + parameter_stop=TRUE) } \arguments{ \item{X}{ From 6c1a123fdeee79427b7638939f24111cead6565e Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Tue, 7 Nov 2017 22:59:25 -0800 Subject: [PATCH 382/396] added mean and cov of a gaussian --- selectiveInference/R/RcppExports.R | 35 ++++++++++++++++++++++++++ selectiveInference/R/funs.randomized.R | 10 +++++++- 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 selectiveInference/R/RcppExports.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R new file mode 100644 index 00000000..e927a3af --- /dev/null +++ b/selectiveInference/R/RcppExports.R @@ -0,0 +1,35 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { + .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) +} + +solve_QP_wide <- function(X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { + .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) +} + +update1_ <- function(Q2, w, m, k) { + .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) +} + +downdate1_ <- function(Q1, R, j0, m, n) { + .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) +} + +log_density_gaussian_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_gaussian_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) +} + +log_density_gaussian_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_gaussian_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) +} + +log_density_laplace_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_laplace_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) +} + +log_density_laplace_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { + .Call('selectiveInference_log_density_laplace_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) +} + diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 6914fd7d..192f6489 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -239,7 +239,6 @@ importance_weight = function(noise_scale, A = apply(A, 2, function(x) {return(x + target_transform$offset_term + opt_transform$offset_term)}) log_num = -apply(A^2, 2, sum) / noise_scale^2 } else { - log_num = log_density_gaussian_(noise_scale, target_transform$linear_term, as.matrix(target_sample), @@ -262,6 +261,15 @@ importance_weight = function(noise_scale, W = W - max(W) return(exp(W)) } + +get_mean_cov = function(noise_scale, linear_term, offset_term){ + temp = solve(t(linear_term) %*% linear_term) + cov = noise_scale^2*temp + mean = temp %*% linear_term %*% offset_term + return(list(mean=mean, cov=cov)) +} + + conditional_density = function(noise_scale, lasso_soln) { From d2a3b9bf075fb76ace427831416e51cf8d322bb8 Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Tue, 7 Nov 2017 23:04:22 -0800 Subject: [PATCH 383/396] transpose --- selectiveInference/R/funs.randomized.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 192f6489..757b5554 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -265,7 +265,7 @@ importance_weight = function(noise_scale, get_mean_cov = function(noise_scale, linear_term, offset_term){ temp = solve(t(linear_term) %*% linear_term) cov = noise_scale^2*temp - mean = temp %*% linear_term %*% offset_term + mean = temp %*% t(linear_term) %*% offset_term return(list(mean=mean, cov=cov)) } From e2efbdcbe5b8902113ce34e9d9847ebd7c72077e Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Wed, 8 Nov 2017 18:15:24 -0800 Subject: [PATCH 384/396] working! bugs fixed. new sampler --- selectiveInference/R/funs.randomized.R | 83 +++++++++++++++------- selectiveInference/R/sampler.R | 96 ++++++++++++++++++++++++++ tests/randomized/test_instances.R | 53 ++++++++++++-- tests/randomized/test_sampler.R | 16 +++++ 4 files changed, 217 insertions(+), 31 deletions(-) create mode 100644 selectiveInference/R/sampler.R create mode 100644 tests/randomized/test_sampler.R diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 757b5554..da64f99c 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -33,11 +33,15 @@ randomizedLasso = function(X, if (is.null(noise_scale)) { noise_scale = 0.5 * sd(y) * sqrt(mean_diag) } - + + print(paste("ridge term", ridge_term)) + print(paste("noise scale", noise_scale)) + noise_type = match.arg(noise_type) if (noise_scale > 0) { if (noise_type == 'gaussian') { + set.seed(1) perturb_ = rnorm(p) * noise_scale } else if (noise_type == 'laplace') { @@ -65,8 +69,8 @@ randomizedLasso = function(X, nactive = as.integer(0) result = solve_QP_wide(X, # design matrix - lam / n, # vector of Lagrange multipliers - ridge_term / n, # ridge_term + lam / n, # vector of Lagrange multipliers + ridge_term / n, # ridge_term max_iter, soln, linear_func, @@ -76,12 +80,12 @@ randomizedLasso = function(X, nactive, kkt_tol, objective_tol, - parameter_tol, + parameter_tol, p, - objective_stop, # objective_stop - kkt_stop, # kkt_stop - parameter_stop) # param_stop - + objective_stop, # objective_stop + kkt_stop, # kkt_stop + parameter_stop) # param_stop + sign_soln = sign(result$soln) unpenalized = lam == 0 @@ -96,7 +100,11 @@ randomizedLasso = function(X, observed_scalings = abs(result$soln)[active] observed_unpen = result$soln[unpenalized] - observed_subgrad = result$gradient[inactive] + observed_subgrad = -n*result$gradient[inactive] + + if (length(which(abs(observed_subgrad)>lam[1]))){ + print("subgradient eq not satisfied") + } observed_opt_state = c(observed_unpen, observed_scalings, observed_subgrad) @@ -111,14 +119,15 @@ randomizedLasso = function(X, coef_term = L_E signs_ = c(rep(1, sum(unpenalized)), sign_soln[active]) + + coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term + if (length(signs_) == 1) { - coef_term = coef_term * signs_ + coef_term = coef_term * signs_ } else { - coef_term = coef_term %*% diag(signs_) # scaligns are non-negative + coef_term = coef_term %*% diag(signs_) # scaligns are non-negative } - - coef_term[active,] = coef_term[active,] + ridge_term * diag(rep(1, sum(active))) # ridge term - + subgrad_term = matrix(0, p, sum(inactive)) # for subgrad for (i in 1:sum(inactive)) { subgrad_term[inactive_set[i], i] = 1 @@ -155,7 +164,8 @@ randomizedLasso = function(X, inactive_lam = lam[inactive_set] inactive_start = sum(unpenalized) + sum(active) active_start = sum(unpenalized) - + + # XXX only for Gaussian so far log_optimization_density = function(opt_state) { @@ -185,9 +195,11 @@ randomizedLasso = function(X, optimization_transform = opt_transform, internal_transform = internal_transform, log_optimization_density = log_optimization_density, - observed_opt_state = observed_opt_state, + observed_opt_state = observed_opt_state, observed_raw = observed_raw, - noise_scale = noise_scale + noise_scale = noise_scale, + soln = result$soln, + perturb = perturb_ )) } @@ -314,19 +326,22 @@ conditional_density = function(noise_scale, lasso_soln) { lasso_soln$log_optimization_density = log_condl_optimization_density lasso_soln$observed_opt_state = observed_opt_state[1:nactive] lasso_soln$optimization_transform = opt_transform - return(lasso_soln) + reduced_opt_transform =list(linear_term = reduced_B, offset_term = reduced_beta_offset) + return(list(lasso_soln=lasso_soln, + reduced_opt_transform = reduced_opt_transform)) } randomizedLassoInf = function(X, y, lam, + sampler="A", sigma=NULL, noise_scale=NULL, ridge_term=NULL, condition_subgrad=TRUE, level=0.9, - nsample=10000, - burnin=2000, + nsample=10000, + burnin=2000, max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-8, # tolerance for relative convergence of parameter @@ -353,22 +368,35 @@ randomizedLassoInf = function(X, parameter_stop=parameter_stop) active_set = lasso_soln$active_set - if (length(active_set)==0){ + nactive = length(active_set) + print(paste("nactive", nactive)) + if (nactive==0){ return (list(active_set=active_set, pvalues=c(), ci=c())) } inactive_set = lasso_soln$inactive_set - nactive = length(active_set) + noise_scale = lasso_soln$noise_scale # set to default value in randomizedLasso if (condition_subgrad==TRUE){ - lasso_soln=conditional_density(noise_scale, lasso_soln) + condl_lasso=conditional_density(noise_scale, lasso_soln) + lasso_soln = condl_lasso$lasso_soln + reduced_opt_transform = condl_lasso$reduced_opt_transform } ndim = length(lasso_soln$observed_opt_state) - - S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), ndim), nsample=nsample) - opt_samples = as.matrix(S$samples[(burnin+1):nsample,,drop=FALSE]) + + if (sampler =="R"){ + S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), ndim), nsample=nsample) + opt_samples = as.matrix(S$samples[(burnin+1):nsample,,drop=FALSE]) + } else if (sampler == "A"){ + opt_samples = gaussian_sampler(noise_scale, + lasso_soln$observed_opt_state, + reduced_opt_transform$linear_term, + reduced_opt_transform$offset_term, + nsamples=nsample) + opt_sample = opt_samples[(burnin+1):nsample,] + } X_E = X[, active_set] X_minusE = X[, inactive_set] @@ -458,3 +486,6 @@ randomizedLassoInf = function(X, } return(list(active_set=active_set, pvalues=pvalues, ci=ci)) } + + + diff --git a/selectiveInference/R/sampler.R b/selectiveInference/R/sampler.R new file mode 100644 index 00000000..48efccc0 --- /dev/null +++ b/selectiveInference/R/sampler.R @@ -0,0 +1,96 @@ + +log_concave_sampler = function(negative_log_density, + grad_negative_log_density, + constraints, + observed, + nsamples){ + #print(constraints) + constraints = as.matrix(constraints) + dim = nrow(constraints) + + get_poisson_process = function(state){ + pos = as.matrix(state$pos) + velocity = as.matrix(state$velocity) + neg_velocity = velocity<0 + pos_velocity = velocity>0 + tau_min = 0 + tau_max = 10 + if (sum(neg_velocity)>0){ + R = (-constraints[neg_velocity,1]+pos[neg_velocity])/(-velocity[neg_velocity]) + tau_max = min(tau_max, min(R)) + L = (-constraints[neg_velocity,2]+pos[neg_velocity])/(-velocity[neg_velocity]) + tau_min = max(tau_min, max(L)) + } + if (sum(pos_velocity)>0){ + R = (constraints[pos_velocity,2]-pos[pos_velocity])/velocity[pos_velocity] + tau_max = min(tau_max, min(R)) + L = (constraints[pos_velocity,1]-pos[pos_velocity])/velocity[pos_velocity] + tau_min = max(tau_min, max(L)) + } + + f=function(t){as.numeric(t(velocity) %*% grad_negative_log_density(pos+velocity*t))} + tau_star = tau_max + if (f(tau_min)*f(tau_max)<0){ + tau_star = uniroot(f, c(tau_min, tau_max))$root + } else{ + if (negative_log_density(pos+velocity*tau_min)0){ @@ -61,7 +104,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=TRUE, l } set.seed(1) -collect_results(n=200, p=100, s=0, lam=2) - - +collect_results(n=100, p=2000, s=0, lam=3) +#test_randomized_lasso() +#test_KKT() diff --git a/tests/randomized/test_sampler.R b/tests/randomized/test_sampler.R new file mode 100644 index 00000000..e19a53fd --- /dev/null +++ b/tests/randomized/test_sampler.R @@ -0,0 +1,16 @@ + +test_log_concave_sampler = function(){ + samples = log_concave_sampler(negative_log_density= function(x){x^2/2}, + grad_negative_log_density=function(x){x}, + constraints = t(as.matrix(c(2,3))), + observed = 2, nsamples=10000) + mean(samples) + hist(samples) +} + + +test_gaussian_sampler =function(){ + samples = gaussian_sampler(1, 1, 1, 0,10000) + mean(samples) + hist(samples) +} From e739255b7e190f4de4f4c75338455656f58f3a6d Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Wed, 8 Nov 2017 21:33:12 -0800 Subject: [PATCH 385/396] active set zero --- tests/randomized/test_instances.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 9e2e0d2a..7f525a40 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -63,7 +63,7 @@ test_KKT=function(){ collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=TRUE, lam=1.2){ - rho=0. + rho=0.3 sigma=1 sample_pvalues = c() sample_coverage = c() @@ -80,9 +80,9 @@ collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=TRUE, l burnin=1000, nsample=5000, condition_subgrad=condition_subgrad) - true_beta = beta[result$active_set] - coverage = rep(0, nrow(result$ci)) if (length(result$active_set)>0){ + true_beta = beta[result$active_set] + coverage = rep(0, nrow(result$ci)) for (i in 1:nrow(result$ci)){ if (result$ci[i,1]true_beta[i]){ coverage[i]=1 @@ -104,7 +104,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=TRUE, l } set.seed(1) -collect_results(n=100, p=2000, s=0, lam=3) +collect_results(n=100, p=2000, s=0, lam=2.5) #test_randomized_lasso() #test_KKT() From b4fd8b521b4f5981250bc749fbfdf1e05df3faae Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Wed, 8 Nov 2017 23:45:39 -0800 Subject: [PATCH 386/396] removed set seed when gen randomization --- selectiveInference/R/funs.randomized.R | 1 - tests/randomized/test_instances.R | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index da64f99c..a086df9f 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -41,7 +41,6 @@ randomizedLasso = function(X, if (noise_scale > 0) { if (noise_type == 'gaussian') { - set.seed(1) perturb_ = rnorm(p) * noise_scale } else if (noise_type == 'laplace') { diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 7f525a40..7a89dd7d 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -76,7 +76,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=TRUE, l lam=lam, sigma=sigma, level=level, - sampler = "A", + sampler = "R", burnin=1000, nsample=5000, condition_subgrad=condition_subgrad) From d2438c9512802d506f41208c1e54412073079145 Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Thu, 9 Nov 2017 12:23:05 -0800 Subject: [PATCH 387/396] amir sampler working with subgrad condition false as well --- selectiveInference/R/funs.randomized.R | 25 +++++++++++++++++-------- selectiveInference/R/sampler.R | 10 ++++++---- tests/randomized/test_instances.R | 6 +++--- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index a086df9f..f3c3aacc 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -374,14 +374,23 @@ randomizedLassoInf = function(X, } inactive_set = lasso_soln$inactive_set - noise_scale = lasso_soln$noise_scale # set to default value in randomizedLasso - - if (condition_subgrad==TRUE){ + + constraints = matrix(0,nactive,2) + constraints[,2] = Inf + if (condition_subgrad==TRUE){ condl_lasso=conditional_density(noise_scale, lasso_soln) lasso_soln = condl_lasso$lasso_soln - reduced_opt_transform = condl_lasso$reduced_opt_transform - } + cur_opt_transform = condl_lasso$reduced_opt_transform + } else{ + if (nactive Date: Thu, 9 Nov 2017 15:41:32 -0800 Subject: [PATCH 388/396] logistic --- selectiveInference/R/funs.randomized.R | 67 ++++++++++++++++++++------ tests/randomized/test_instances.R | 31 ++++++++---- 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index f3c3aacc..ee414f4f 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -6,6 +6,7 @@ randomizedLasso = function(X, y, lam, + family="gaussian", noise_scale=NULL, ridge_term=NULL, noise_type=c('gaussian', 'laplace'), @@ -17,7 +18,6 @@ randomizedLasso = function(X, kkt_stop=TRUE, parameter_stop=TRUE) { - n = nrow(X); p = ncol(X) mean_diag = mean(apply(X^2, 2, sum)) @@ -86,7 +86,7 @@ randomizedLasso = function(X, parameter_stop) # param_stop sign_soln = sign(result$soln) - + unpenalized = lam == 0 active = (!unpenalized) & (sign_soln != 0) inactive = (!unpenalized) & (sign_soln == 0) @@ -113,8 +113,21 @@ randomizedLasso = function(X, I = inactive_set X_E = X[,E] X_I = X[,I] - L_E = t(X) %*% X[,E] - + + if (family=="binomial"){ + unpen_reg = glm(y~X_E-1, family="binomial") + unpen_est = unpen_reg$coefficients + pi_fn = function(beta){ + temp = X_E %*% as.matrix(beta) + return(as.vector(exp(temp)/(1+exp(temp)))) # n-dimensional + } + pi_vec = pi_fn(unpen_est) + W_E = diag(pi_vec*(1-pi_vec)) + } else if (family=="gaussian"){ + W_E = diag(rep(1,n)) + } + L_E = t(X) %*% W_E %*% X[,E] + coef_term = L_E signs_ = c(rep(1, sum(unpenalized)), sign_soln[active]) @@ -158,8 +171,12 @@ randomizedLasso = function(X, offset_term = offset_term) # density for sampling optimization variables - + observed_raw = -t(X) %*% y + if (family=="binomial"){ + beta_E = result$soln[active_set] + observed_raw = observed_raw+t(X)%*%pi_fn(beta_E)-L_E %*% beta_E + } inactive_lam = lam[inactive_set] inactive_start = sum(unpenalized) + sum(active) active_start = sum(unpenalized) @@ -333,6 +350,7 @@ conditional_density = function(noise_scale, lasso_soln) { randomizedLassoInf = function(X, y, lam, + family="gaussian", sampler="A", sigma=NULL, noise_scale=NULL, @@ -352,10 +370,11 @@ randomizedLassoInf = function(X, n = nrow(X) p = ncol(X) - + lasso_soln = randomizedLasso(X, y, lam, + family=family, noise_scale=noise_scale, ridge_term=ridge_term, max_iter=max_iter, @@ -410,17 +429,32 @@ randomizedLassoInf = function(X, X_E = X[, active_set] X_minusE = X[, inactive_set] - # if no sigma given, use OLS estimate - + + + if (family=="gaussian"){ + lm_y = lm(y ~ X_E - 1) + sigma_resid = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid) + observed_target = lm_y$coefficients + W_E = diag(rep(1,n)) + observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) + } else if (family=="binomial"){ + glm_y = glm(y~X_E-1) + sigma_resid = sqrt(sum(resid(glm_y)^2) / glm_y$df.resid) + observed_target = as.matrix(glm_y$coefficients) + temp = X_E%*%observed_target + pi_vec = exp(temp)/(1+exp(temp)) + observed_internal = c(observed_target, t(X_minusE) %*% (y-pi_vec)) + W_E=diag(as.vector(pi_vec *(1-pi_vec))) + } + + # if no sigma given, use the estimate + if (is.null(sigma)) { - lm_y = lm(y ~ X_E - 1) - sigma = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid) + sigma = sigma_resid } - - target_cov = solve(t(X_E) %*% X_E)*sigma^2 + + target_cov = solve(t(X_E) %*% W_E %*% X_E)*sigma^2 cov_target_internal = rbind(target_cov, matrix(0, nrow=p-nactive, ncol=nactive)) - observed_target = solve(t(X_E) %*% X_E) %*% t(X_E) %*% y - observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) internal_transform = lasso_soln$internal_transform opt_transform = lasso_soln$optimization_transform observed_raw = lasso_soln$observed_raw @@ -495,5 +529,10 @@ randomizedLassoInf = function(X, return(list(active_set=active_set, pvalues=pvalues, ci=ci)) } + + + + + diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index 2255109d..dc93ec51 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -1,7 +1,7 @@ library(selectiveInference) -gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, - random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){ +get_instance = function(n, p, s, sigma=1, rho=0, signal=6, family="gaussian", + X=NA, random_signs=TRUE, scale=TRUE, center=TRUE, seed=NA){ if (!is.na(seed)){ set.seed(seed) } @@ -19,11 +19,20 @@ gaussian_instance = function(n, p, s, sigma=1, rho=0, signal=6, X=NA, signs = sample(c(-1,1), s, replace = TRUE) beta = beta * signs } - y = X %*% beta + rnorm(n)*sigma + mu = X %*% beta + if (family=="gaussian"){ + y = mu + rnorm(n)*sigma + } else if (family=="binomial"){ + prob = exp(mu)/(1+exp(mu)) + y= rbinom(n,1, prob) + } result = list(X=X,y=y,beta=beta) return(result) } + + + test_randomized_lasso = function(n=100,p=200,s=0){ set.seed(1) data = gaussian_instance(n=n,p=p,s=s, rho=0.3, sigma=3) @@ -61,27 +70,29 @@ test_KKT=function(){ -collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=FALSE, lam=1.2){ +collect_results = function(n,p,s, nsim=100, level=0.9, + family = "gaussian", + condition_subgrad=FALSE, lam=1.2){ rho=0.3 sigma=1 sample_pvalues = c() sample_coverage = c() for (i in 1:nsim){ - data = gaussian_instance(n=n,p=p,s=s, rho=rho, sigma=sigma) + data = get_instance(n=n,p=p,s=s, rho=rho, sigma=sigma, family=family) X=data$X y=data$y - beta=data$beta result = selectiveInference:::randomizedLassoInf(X, y, - lam=lam, + lam, + family = family, + sampler = "A", sigma=sigma, level=level, - sampler = "A", burnin=1000, nsample=5000, condition_subgrad=condition_subgrad) if (length(result$active_set)>0){ - true_beta = beta[result$active_set] + true_beta = data$beta[result$active_set] coverage = rep(0, nrow(result$ci)) for (i in 1:nrow(result$ci)){ if (result$ci[i,1]true_beta[i]){ @@ -104,7 +115,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9, condition_subgrad=FALSE, } set.seed(1) -collect_results(n=100, p=20, s=0, lam=1.2) +collect_results(n=100, p=20, s=0, lam=0.5) #test_randomized_lasso() #test_KKT() From 157c1db486e8d9df877a0cfc7a0221d60776da39 Mon Sep 17 00:00:00 2001 From: Jelena Markovic Date: Thu, 9 Nov 2017 16:02:53 -0800 Subject: [PATCH 389/396] E of zero length --- selectiveInference/R/funs.randomized.R | 5 +++++ tests/randomized/test_instances.R | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index ee414f4f..9f2e4e28 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -114,6 +114,10 @@ randomizedLasso = function(X, X_E = X[,E] X_I = X[,I] + if (length(E)==0){ + return(list(active_set=c())) + } + if (family=="binomial"){ unpen_reg = glm(y~X_E-1, family="binomial") unpen_est = unpen_reg$coefficients @@ -126,6 +130,7 @@ randomizedLasso = function(X, } else if (family=="gaussian"){ W_E = diag(rep(1,n)) } + L_E = t(X) %*% W_E %*% X[,E] coef_term = L_E diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index dc93ec51..d5c3eca1 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -71,7 +71,7 @@ test_KKT=function(){ collect_results = function(n,p,s, nsim=100, level=0.9, - family = "gaussian", + family = "binomial", condition_subgrad=FALSE, lam=1.2){ rho=0.3 @@ -115,7 +115,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9, } set.seed(1) -collect_results(n=100, p=20, s=0, lam=0.5) +collect_results(n=100, p=20, s=0, lam=0.8) #test_randomized_lasso() #test_KKT() From 1dfbea8b3596f2f56c9f1ee7e5b29925e13f58cd Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 9 Nov 2017 21:59:39 -0800 Subject: [PATCH 390/396] fixing man page so check passes --- selectiveInference/R/RcppExports.R | 35 -------------------- selectiveInference/R/funs.randomized.R | 17 +++++----- selectiveInference/man/randomizedLassoInf.Rd | 5 +++ 3 files changed, 13 insertions(+), 44 deletions(-) delete mode 100644 selectiveInference/R/RcppExports.R diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R deleted file mode 100644 index e927a3af..00000000 --- a/selectiveInference/R/RcppExports.R +++ /dev/null @@ -1,35 +0,0 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { - .Call('selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) -} - -solve_QP_wide <- function(X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) { - .Call('selectiveInference_solve_QP_wide', PACKAGE = 'selectiveInference', X, bound, ridge_term, maxiter, theta, linear_func, gradient, X_theta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, max_active, kkt_stop, objective_stop, param_stop) -} - -update1_ <- function(Q2, w, m, k) { - .Call('selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) -} - -downdate1_ <- function(Q1, R, j0, m, n) { - .Call('selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) -} - -log_density_gaussian_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_gaussian_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) -} - -log_density_gaussian_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_gaussian_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) -} - -log_density_laplace_ <- function(noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_laplace_', PACKAGE = 'selectiveInference', noise_scale, internal_linear, internal_state, optimization_linear, optimization_state, offset) -} - -log_density_laplace_conditional_ <- function(noise_scale, optimization_linear, optimization_state, offset) { - .Call('selectiveInference_log_density_laplace_conditional_', PACKAGE = 'selectiveInference', noise_scale, optimization_linear, optimization_state, offset) -} - diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index f3c3aacc..f954e1cc 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -34,9 +34,6 @@ randomizedLasso = function(X, noise_scale = 0.5 * sd(y) * sqrt(mean_diag) } - print(paste("ridge term", ridge_term)) - print(paste("noise scale", noise_scale)) - noise_type = match.arg(noise_type) if (noise_scale > 0) { @@ -333,14 +330,14 @@ conditional_density = function(noise_scale, lasso_soln) { randomizedLassoInf = function(X, y, lam, - sampler="A", sigma=NULL, noise_scale=NULL, ridge_term=NULL, condition_subgrad=TRUE, level=0.9, - nsample=10000, - burnin=2000, + sampler=c("norejection", "adaptMCMC"), + nsample=10000, + burnin=2000, max_iter=100, # how many iterations for each optimization problem kkt_tol=1.e-4, # tolerance for the KKT conditions parameter_tol=1.e-8, # tolerance for relative convergence of parameter @@ -368,7 +365,7 @@ randomizedLassoInf = function(X, active_set = lasso_soln$active_set nactive = length(active_set) - print(paste("nactive", nactive)) + if (nactive==0){ return (list(active_set=active_set, pvalues=c(), ci=c())) } @@ -394,10 +391,12 @@ randomizedLassoInf = function(X, ndim = length(lasso_soln$observed_opt_state) - if (sampler =="R"){ + sampler = match.arg(sampler) + + if (sampler == "adaptMCMC"){ S = sample_opt_variables(lasso_soln, jump_scale=rep(1/sqrt(n), ndim), nsample=nsample) opt_samples = as.matrix(S$samples[(burnin+1):nsample,,drop=FALSE]) - } else if (sampler == "A"){ + } else if (sampler == "norejection") { opt_samples = gaussian_sampler(noise_scale, lasso_soln$observed_opt_state, cur_opt_transform$linear_term, diff --git a/selectiveInference/man/randomizedLassoInf.Rd b/selectiveInference/man/randomizedLassoInf.Rd index 18b9611a..e0f9d444 100644 --- a/selectiveInference/man/randomizedLassoInf.Rd +++ b/selectiveInference/man/randomizedLassoInf.Rd @@ -19,6 +19,7 @@ randomizedLassoInf(X, ridge_term=NULL, condition_subgrad=TRUE, level=0.9, + sampler=c("norejection", "adaptMCMC"), nsample=10000, burnin=2000, max_iter=100, @@ -71,6 +72,10 @@ Default is TRUE. \item{level}{ Level for confidence intervals. } +\item{sampler}{ +Which sampler to use -- default is a no-rejection sampler. Otherwise +use MCMC from the adaptMCMC package. +} \item{nsample}{ Number of samples of optimization variables to sample. } From b21260afe2e57afcc4053628591a71393463e3e2 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 9 Nov 2017 22:01:14 -0800 Subject: [PATCH 391/396] renaming file --- selectiveInference/R/{sampler.R => funs.sampler.R} | 1 + 1 file changed, 1 insertion(+) rename selectiveInference/R/{sampler.R => funs.sampler.R} (97%) diff --git a/selectiveInference/R/sampler.R b/selectiveInference/R/funs.sampler.R similarity index 97% rename from selectiveInference/R/sampler.R rename to selectiveInference/R/funs.sampler.R index c9e8ace3..3f7f5fd9 100644 --- a/selectiveInference/R/sampler.R +++ b/selectiveInference/R/funs.sampler.R @@ -1,3 +1,4 @@ +# A no-rejection MCMC algorithm Jelena and Amir have been working on log_concave_sampler = function(negative_log_density, grad_negative_log_density, From 183bc3abb230db8a828bf8db9f168dc1afde63e5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 9 Nov 2017 22:03:47 -0800 Subject: [PATCH 392/396] stop instead of print --- selectiveInference/R/funs.randomized.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index f954e1cc..5c5f3d11 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -98,8 +98,8 @@ randomizedLasso = function(X, observed_unpen = result$soln[unpenalized] observed_subgrad = -n*result$gradient[inactive] - if (length(which(abs(observed_subgrad)>lam[1]))){ - print("subgradient eq not satisfied") + if (sum(abs(observed_subgrad)>lam*(1.001)) > 0){ + stop("subgradient eq not satisfied") } observed_opt_state = c(observed_unpen, observed_scalings, observed_subgrad) From 42cd878bddf45a090b1ec54a0ca3aed7e4537bfa Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 9 Nov 2017 22:16:35 -0800 Subject: [PATCH 393/396] WIP: adding logistic option, need to solve logistic problem --- selectiveInference/R/funs.randomized.R | 27 ++++++++++---------- selectiveInference/man/randomizedLassoInf.Rd | 4 +++ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 1dc835cb..020698dc 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -6,7 +6,7 @@ randomizedLasso = function(X, y, lam, - family="gaussian", + family=c("gaussian","binomial"), noise_scale=NULL, ridge_term=NULL, noise_type=c('gaussian', 'laplace'), @@ -18,6 +18,8 @@ randomizedLasso = function(X, kkt_stop=TRUE, parameter_stop=TRUE) { + family = match.arg(family) + n = nrow(X); p = ncol(X) mean_diag = mean(apply(X^2, 2, sum)) @@ -65,8 +67,8 @@ randomizedLasso = function(X, nactive = as.integer(0) result = solve_QP_wide(X, # design matrix - lam / n, # vector of Lagrange multipliers - ridge_term / n, # ridge_term + lam / n, # vector of Lagrange multipliers + ridge_term / n, # ridge_term max_iter, soln, linear_func, @@ -177,7 +179,7 @@ randomizedLasso = function(X, observed_raw = -t(X) %*% y if (family=="binomial"){ beta_E = result$soln[active_set] - observed_raw = observed_raw+t(X)%*%pi_fn(beta_E)-L_E %*% beta_E + observed_raw = observed_raw + t(X)%*%pi_fn(beta_E) - L_E %*% beta_E } inactive_lam = lam[inactive_set] inactive_start = sum(unpenalized) + sum(active) @@ -213,11 +215,11 @@ randomizedLasso = function(X, optimization_transform = opt_transform, internal_transform = internal_transform, log_optimization_density = log_optimization_density, - observed_opt_state = observed_opt_state, + observed_opt_state = observed_opt_state, observed_raw = observed_raw, - noise_scale = noise_scale, - soln = result$soln, - perturb = perturb_ + noise_scale = noise_scale, + soln = result$soln, + perturb = perturb_ )) } @@ -352,8 +354,7 @@ conditional_density = function(noise_scale, lasso_soln) { randomizedLassoInf = function(X, y, lam, - family=c("gaussian", "logistic"), - sampler=c("norejection", "adaptMCMC"), + family=c("gaussian", "binomial"), sigma=NULL, noise_scale=NULL, ridge_term=NULL, @@ -436,15 +437,13 @@ randomizedLassoInf = function(X, X_E = X[, active_set] X_minusE = X[, inactive_set] - - - if (family=="gaussian"){ + if (family == "gaussian") { lm_y = lm(y ~ X_E - 1) sigma_resid = sqrt(sum(resid(lm_y)^2) / lm_y$df.resid) observed_target = lm_y$coefficients W_E = diag(rep(1,n)) observed_internal = c(observed_target, t(X_minusE) %*% (y-X_E%*% observed_target)) - } else if (family=="binomial"){ + } else if (family == "binomial") { glm_y = glm(y~X_E-1) sigma_resid = sqrt(sum(resid(glm_y)^2) / glm_y$df.resid) observed_target = as.matrix(glm_y$coefficients) diff --git a/selectiveInference/man/randomizedLassoInf.Rd b/selectiveInference/man/randomizedLassoInf.Rd index e0f9d444..351596e1 100644 --- a/selectiveInference/man/randomizedLassoInf.Rd +++ b/selectiveInference/man/randomizedLassoInf.Rd @@ -14,6 +14,7 @@ randomization. randomizedLassoInf(X, y, lam, + family=c("gaussian", "binomial"), sigma=NULL, noise_scale=NULL, ridge_term=NULL, @@ -49,6 +50,9 @@ Value of lambda used to compute beta. See the above warning where obj is the object returned by glmnet (and [-1] removes the intercept, which glmnet always puts in the first component) } +\item{family}{ +Response type: "gaussian" (default), "binomial". +} \item{sigma}{ Estimate of error standard deviation. If NULL (default), this is estimated using the mean squared residual of the full least squares based on From af3b818cb75b43f1dbc53c1e621fb6a7994027f7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 10 Nov 2017 08:43:56 -0800 Subject: [PATCH 394/396] fixing empty inactive set indexing problem --- selectiveInference/R/funs.randomized.R | 40 +++++++++++++++----------- tests/randomized/test_instances.R | 6 ++-- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/selectiveInference/R/funs.randomized.R b/selectiveInference/R/funs.randomized.R index 020698dc..5be79386 100644 --- a/selectiveInference/R/funs.randomized.R +++ b/selectiveInference/R/funs.randomized.R @@ -78,11 +78,11 @@ randomizedLasso = function(X, nactive, kkt_tol, objective_tol, - parameter_tol, + parameter_tol, p, - objective_stop, # objective_stop - kkt_stop, # kkt_stop - parameter_stop) # param_stop + objective_stop, # objective_stop + kkt_stop, # kkt_stop + parameter_stop) # param_stop sign_soln = sign(result$soln) @@ -100,7 +100,7 @@ randomizedLasso = function(X, observed_unpen = result$soln[unpenalized] observed_subgrad = -n*result$gradient[inactive] - if (sum(abs(observed_subgrad)>lam*(1.001)) > 0){ + if (sum(abs(observed_subgrad)>lam[inactive]*(1.001)) > 0){ stop("subgradient eq not satisfied") } @@ -144,14 +144,17 @@ randomizedLasso = function(X, coef_term = coef_term %*% diag(signs_) # scaligns are non-negative } - subgrad_term = matrix(0, p, sum(inactive)) # for subgrad - for (i in 1:sum(inactive)) { - subgrad_term[inactive_set[i], i] = 1 - } - - linear_term = cbind(coef_term, - subgrad_term) - + if (sum(inactive) > 0) { + subgrad_term = matrix(0, p, sum(inactive)) # for subgrad + for (i in 1:sum(inactive)) { + subgrad_term[inactive_set[i], i] = 1 + } + + linear_term = cbind(coef_term, + subgrad_term) + } else { + linear_term = coef_term + } offset_term = rep(0, p) offset_term[active] = lam[active] * sign_soln[active] @@ -167,9 +170,14 @@ randomizedLasso = function(X, active_term = -L_E # for \bar{\beta}_E - inactive_term = -subgrad_term - linear_term = cbind(active_term, - inactive_term) + if (sum(inactive) > 0) { + inactive_term = -subgrad_term + linear_term = cbind(active_term, + inactive_term) + } else { + linear_term = active_term + } + offset_term = rep(0, p) internal_transform = list(linear_term = linear_term, offset_term = offset_term) diff --git a/tests/randomized/test_instances.R b/tests/randomized/test_instances.R index d5c3eca1..8155e5f0 100644 --- a/tests/randomized/test_instances.R +++ b/tests/randomized/test_instances.R @@ -85,7 +85,7 @@ collect_results = function(n,p,s, nsim=100, level=0.9, result = selectiveInference:::randomizedLassoInf(X, y, lam, family = family, - sampler = "A", + sampler = "adaptMCMC", sigma=sigma, level=level, burnin=1000, @@ -114,8 +114,8 @@ collect_results = function(n,p,s, nsim=100, level=0.9, } } -set.seed(1) -collect_results(n=100, p=20, s=0, lam=0.8) +#set.seed(1) +collect_results(n=500, p=200, s=0, lam=0.8) #test_randomized_lasso() #test_KKT() From 9de1b7c4f7b9544262a7168d1717241841742888 Mon Sep 17 00:00:00 2001 From: tibs Date: Tue, 14 Nov 2017 21:25:27 -0800 Subject: [PATCH 395/396] rob chanegd NAMESPACE --- selectiveInference/NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 11a834e1..f6854c52 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -15,6 +15,7 @@ export(lar,fs, TG.limits, TG.interval, debiasingMatrix, + randomizedLasso, randomizedLassoInf ) From a6c495b17941ea6a8d518e15a6450750cf256e1b Mon Sep 17 00:00:00 2001 From: Corey Brier Date: Mon, 9 Apr 2018 13:32:37 -0400 Subject: [PATCH 396/396] Fixed handling of numeric variables in factorDesign --- selectiveInference/R/funs.groupfs.R | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/selectiveInference/R/funs.groupfs.R b/selectiveInference/R/funs.groupfs.R index b2c04472..9b3ac6f3 100644 --- a/selectiveInference/R/funs.groupfs.R +++ b/selectiveInference/R/funs.groupfs.R @@ -684,33 +684,32 @@ scaleGroups <- function(x, index, center = TRUE, normalize = TRUE) { #' pvals = groupfsInf(fit) #' } factorDesign <- function(df) { - factor.inds <- sapply(df[1,], is.factor) + factor.inds <- sapply(df[1, ], is.factor) factor.labels <- which(factor.inds) nfacs <- sum(factor.inds) - nlevs <- sapply(df[1,factor.inds], function(fac) nlevels(fac)) + nlevs <- sapply(df[1, factor.inds], function(fac) nlevels(fac)) totnlevs <- sum(nlevs) num.num = indcounter = ncol(df) - nfacs - x <- matrix(nrow=nrow(df), ncol = totnlevs + num.num) + x <- matrix(NA_real_, nrow = nrow(df), ncol = totnlevs + num.num) colnames(x) <- 1:ncol(x) index <- integer(ncol(x)) - varnames <- character(ncol(df)) + if (num.num > 0) { - x[,1:num.num] <- df[, !factor.inds] - varnames[1:num.num] = colnames(x)[1:num.num] <- colnames(df)[1:num.num] + x[, 1:num.num] <- as.matrix(df[, !factor.inds, drop = FALSE]) + colnames(x)[1:num.num] <- colnames(df)[!factor.inds] index[1:num.num] <- 1:num.num - indcounter <- indcounter + num.num - 1 } + for (j in 1:nfacs) { - submat <- model.matrix(~ df[, factor.labels[j]] - 1) - indcounter <- indcounter+1 - submatinds <- indcounter:(indcounter+nlevs[j]-1) + submat <- model.matrix(~df[, factor.labels[j]] - 1) + indcounter <- indcounter + 1 + submatinds <- indcounter:(indcounter + nlevs[j] - 1) indcounter <- indcounter + nlevs[j] - 1 - colnames(x)[submatinds] <- paste0(colnames(df)[num.num + j], ":", 1:nlevs[j]) - varnames[num.num + j] <- colnames(df)[num.num + j] - x[,submatinds] <- submat + colnames(x)[submatinds] <- paste0(colnames(df)[factor.inds][j], ":", 1:nlevs[j]) + x[, submatinds] <- submat index[submatinds] <- num.num + j } - attr(x, "varnames") <- varnames + attr(x, "varnames") <- c(colnames(df)[!factor.inds], colnames(df)[factor.inds]) return(list(x = x, index = index)) }