From 0764a80738313bcf31446abb2476c4b437b6e234 Mon Sep 17 00:00:00 2001 From: Pasquale Di Carlo <36417405+pdicarl3@users.noreply.github.com> Date: Sat, 17 Aug 2019 09:19:32 -0400 Subject: [PATCH 1/2] Update blockwiseModulesC.R --- R/blockwiseModulesC.R | 3662 +---------------------------------------- 1 file changed, 28 insertions(+), 3634 deletions(-) diff --git a/R/blockwiseModulesC.R b/R/blockwiseModulesC.R index 2b898a8..5963a88 100644 --- a/R/blockwiseModulesC.R +++ b/R/blockwiseModulesC.R @@ -1,3634 +1,28 @@ - -#Copyright (C) 2008 Peter Langfelder - -#This program is free software; you can redistribute it and/or -#modify it under the terms of the GNU General Public License -#as published by the Free Software Foundation; either version 2 -#of the License, or (at your option) any later version. - -#This program is distributed in the hope that it will be useful, -#but WITHOUT ANY WARRANTY; without even the implied warranty of -#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -#GNU General Public License for more details. - -#You should have received a copy of the GNU General Public License -#along with this program; if not, write to the Free Software -#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -# In this version the blocks are chosen by pre-clustering. - -#========================================================================================================== -# -# TOM similarity via a call to a compiled code. -# -#========================================================================================================== - -TOMsimilarityFromExpr = function( - datExpr, weights = NULL, - corType = "pearson", networkType = "unsigned", - power = 6, TOMType = "signed", TOMDenom = "min", - maxPOutliers = 1, - quickCor = 0, - pearsonFallback = "individual", - cosineCorrelation = FALSE, - replaceMissingAdjacencies = FALSE, - suppressTOMForZeroAdjacencies = FALSE, - suppressNegativeTOM = FALSE, - useInternalMatrixAlgebra = FALSE, - nThreads = 0, - verbose = 1, indent = 0) -{ - corTypeC = as.integer(pmatch(corType, .corTypes)-1); - if (is.na(corTypeC)) - stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", "))) - - TOMTypeC = as.integer(pmatch(TOMType, .TOMTypes)-1); - if (is.na(TOMTypeC)) - stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", "))) - - TOMDenomC = as.integer(pmatch(TOMDenom, .TOMDenoms)-1); - if (is.na(TOMDenomC)) - stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", "))) - - if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1."); - if (quickCor < 0) stop("quickCor must be positive."); - if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1."); - - fallback = as.integer(pmatch(pearsonFallback, .pearsonFallbacks)); - if (is.na(fallback)) - stop(paste("Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n", - paste(.pearsonFallbacks, collapse = ", "))) - - if (nThreads < 0) stop("nThreads must be positive."); - if (is.null(nThreads) || (nThreads==0)) nThreads = .useNThreads(); - - if ( (power<1) | (power>30) ) stop("power must be between 1 and 30."); - - networkTypeC = as.integer(charmatch(networkType, .networkTypes)-1); - if (is.na(networkTypeC)) - stop(paste("Unrecognized networkType argument.", - "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", "))); - dimEx = dim(datExpr); - if (length(dimEx)!=2) stop("datExpr has incorrect dimensions.") - if (length(weights) > 0) - { - if (!isTRUE(all.equal(dimEx, dim(weights)))) - stop("When 'weights' are given, they must have the same dimensions as 'datExpr'.") - if (any(!is.finite(weights))) - { - if (verbose > 0) - warning("Found non-finite weights. The corresponding data points will be removed."); - weights[!is.finite(weights)] = NA; - } - } - nGenes = dimEx[2]; - nSamples = dimEx[1]; - warn = as.integer(0); - - datExpr = as.matrix(datExpr); - if (length(weights) > 0) weights = as.matrix(weights) else weights = NULL; - - tom = .Call("tomSimilarity_call", datExpr, weights, - as.integer(corTypeC), as.integer(networkTypeC), as.double(power), - as.integer(TOMTypeC), as.integer(TOMDenomC), - as.double(maxPOutliers), as.double(quickCor), - as.integer(fallback), as.integer(cosineCorrelation), - as.integer(replaceMissingAdjacencies), - as.integer(suppressTOMForZeroAdjacencies), - as.integer(suppressNegativeTOM), - as.integer(useInternalMatrixAlgebra), - warn, - as.integer(nThreads), as.integer(verbose), as.integer(indent), PACKAGE = "WGCNA"); - - diag(tom) = 1; - return (tom); -} - - - -#====================================================================================================== -# -# TOMsimilarity (from adjacency) -# -#=================================================================================================== - -TOMsimilarity = function(adjMat, TOMType = "unsigned", TOMDenom = "min", - suppressTOMForZeroAdjacencies = FALSE, suppressNegativeTOM = FALSE, - useInternalMatrixAlgebra = FALSE, verbose = 1, indent = 0) -{ - TOMTypeC = pmatch(TOMType, .TOMTypes)-1; - if (is.na(TOMTypeC)) - stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", "))) - - if (TOMTypeC == 0) - stop("'TOMType' cannot be 'none' for this function."); - - TOMDenomC = pmatch(TOMDenom, .TOMDenoms)-1; - if (is.na(TOMDenomC)) - stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", "))) - - checkAdjMat(adjMat, min = if (TOMTypeC==2) -1 else 0, max = 1); - - if (any(is.na(adjMat))) adjMat[is.na(adjMat)] = 0; - #if (any(diag(adjMat)!=1)) diag(adjMat) = 1; <--- this is now done in compiled code - - #nGenes = dim(adjMat)[1]; - - #tom = matrix(0, nGenes, nGenes); - - #tomResult = .C("tomSimilarityFromAdj", as.double(as.matrix(adjMat)), as.integer(nGenes), - # as.integer(TOMTypeC), - # as.integer(TOMDenomC), - # tom = as.double(tom), as.integer(verbose), as.integer(indent), PACKAGE = "WGCNA") - #tom[,] = tomResult$tom; - #diag(tom) = 1; - #rm(tomResult); collectGarbage(); - tom = .Call("tomSimilarityFromAdj_call", as.matrix(adjMat), - as.integer(TOMTypeC), - as.integer(TOMDenomC), - as.integer(suppressTOMForZeroAdjacencies), - as.integer(suppressNegativeTOM), - as.integer(useInternalMatrixAlgebra), - as.integer(verbose), as.integer(indent), PACKAGE = "WGCNA") - gc(); - - tom; -} - -#========================================================================================================== -# -# TOMdist (from adjacency) -# -#========================================================================================================== - -TOMdist = function(adjMat, TOMType = "unsigned", TOMDenom = "min", - suppressTOMForZeroAdjacencies = FALSE, suppressNegativeTOM = FALSE, - useInternalMatrixAlgebra = FALSE, verbose = 1, indent = 0) -{ - 1-TOMsimilarity(adjMat, TOMType, TOMDenom, suppressTOMForZeroAdjacencies = suppressTOMForZeroAdjacencies, - suppressNegativeTOM = suppressNegativeTOM, - useInternalMatrixAlgebra = useInternalMatrixAlgebra, verbose = verbose, indent = indent) -} - - -#========================================================================================================== -# -# blockwiseModules -# -#========================================================================================================== -# Function to calculate modules and eigengenes from all genes. - -blockwiseModules = function( - # Input data - datExpr, - weights = NULL, - - # Data checking options - checkMissingData = TRUE, - - # Options for splitting data into blocks - blocks = NULL, - maxBlockSize = 5000, - blockSizePenaltyPower = 5, - nPreclusteringCenters = as.integer(min(ncol(datExpr)/20, 100*ncol(datExpr)/maxBlockSize)), - randomSeed = 54321, - - # load TOM from previously saved file? - - loadTOM = FALSE, - - # Network construction arguments: correlation options - - corType = "pearson", - maxPOutliers = 1, - quickCor = 0, - pearsonFallback = "individual", - cosineCorrelation = FALSE, - - # Adjacency function options - - power = 6, - networkType = "unsigned", - replaceMissingAdjacencies = FALSE, - - # Topological overlap options - - TOMType = "signed", - TOMDenom = "min", - suppressTOMForZeroAdjacencies = FALSE, - suppressNegativeTOM = FALSE, - - # Saving or returning TOM - - getTOMs = NULL, - saveTOMs = FALSE, - saveTOMFileBase = "blockwiseTOM", - - # Basic tree cut options - - deepSplit = 2, - detectCutHeight = 0.995, - minModuleSize = min(20, ncol(datExpr)/2 ), - - # Advanced tree cut options - - maxCoreScatter = NULL, minGap = NULL, - maxAbsCoreScatter = NULL, minAbsGap = NULL, - minSplitHeight = NULL, minAbsSplitHeight = NULL, - useBranchEigennodeDissim = FALSE, - minBranchEigennodeDissim = mergeCutHeight, - - stabilityLabels = NULL, - stabilityCriterion = c("Individual fraction", "Common fraction"), - minStabilityDissim = NULL, - pamStage = TRUE, pamRespectsDendro = TRUE, - - # Gene reassignment, module trimming, and module "significance" criteria - - reassignThreshold = 1e-6, - minCoreKME = 0.5, - minCoreKMESize = minModuleSize/3, - minKMEtoStay = 0.3, - - # Module merging options - - mergeCutHeight = 0.15, - impute = TRUE, - trapErrors = FALSE, - - # Output options - - numericLabels = FALSE, - - # Options controlling behaviour - - nThreads = 0, - useInternalMatrixAlgebra = FALSE, - useCorOptionsThroughout = TRUE, - verbose = 0, indent = 0, - ...) -{ - spaces = indentSpaces(indent); - - if (verbose>0) - printFlush(paste(spaces, "Calculating module eigengenes block-wise from all genes")); - - seedSaved = FALSE; - if (!is.null(randomSeed)) - { - if (exists(".Random.seed")) - { - seedSaved = TRUE; - savedSeed = .Random.seed - } - set.seed(randomSeed); - } - - intCorType = pmatch(corType, .corTypes); - if (is.na(intCorType)) - stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", "))) - - intTOMType = pmatch(TOMType, .TOMTypes); - if (is.na(intTOMType)) - stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", "))) - - TOMDenomC = pmatch(TOMDenom, .TOMDenoms)-1; - if (is.na(TOMDenomC)) - stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", "))) - - if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1."); - if (quickCor < 0) stop("quickCor must be positive."); - if (nThreads < 0) stop("nThreads must be positive."); - if (is.null(nThreads) || (nThreads==0)) nThreads = .useNThreads(); - - if ( (power<1) | (power>30) ) stop("power must be between 1 and 30."); - # if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1."); - - intNetworkType = charmatch(networkType, .networkTypes); - if (is.na(intNetworkType)) - stop(paste("Unrecognized networkType argument.", - "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", "))); - - fallback = pmatch(pearsonFallback, .pearsonFallbacks) - if (is.na(fallback)) - stop(spaste("Unrecognized value '", pearsonFallback, "' of argument 'pearsonFallback'.", - "Recognized values are (unique abbreviations of)\n", - paste(.pearsonFallbacks, collapse = ", "))) - - - datExpr = as.matrix(datExpr); - dimEx = dim(datExpr); - if (length(dimEx)!=2) stop("datExpr has incorrect dimensions.") - weights = .checkAndScaleWeights(weights, datExpr, scaleByMax = FALSE); - haveWeights = length(weights)>0; - - nGenes = dimEx[2]; - nSamples = dimEx[1]; - allLabels = rep(0, nGenes); - AllMEs = NULL; - allLabelIndex = NULL; - - originalSampleNames = rownames(datExpr); - originalGeneNames = colnames(datExpr); - - #if (maxBlockSize >= floor(sqrt(2^31)) ) - # stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.") - - if (!is.null(blocks) && (length(blocks)!=nGenes)) - stop("Input error: the length of 'geneRank' does not equal the number of genes in given 'datExpr'."); - - if (!is.null(getTOMs)) - warning("getTOMs is deprecated, please use saveTOMs instead."); - - # Check data for genes and samples that have too many missing values - - if (checkMissingData) - { - gsg = goodSamplesGenes(datExpr, weights = weights, verbose = verbose - 1, indent = indent + 1) - if (!gsg$allOK) - { - datExpr = datExpr[gsg$goodSamples, gsg$goodGenes]; - if (haveWeights) weights = weights[gsg$goodSamples, gsg$goodGenes]; - } - nGGenes = sum(gsg$goodGenes); - nGSamples = sum(gsg$goodSamples); - } else { - nGGenes = nGenes; - nGSamples = nSamples; - gsg = list(goodSamples = rep(TRUE, nSamples), goodGenes = rep(TRUE, nGenes), allOK = TRUE); - } - - if (any(is.na(datExpr))) - { - datExpr.scaled.imputed = t(impute.knn(t(scale(datExpr)))$data) - } else - datExpr.scaled.imputed = scale(datExpr); - - corFnc = .corFnc[intCorType]; - corOptions = .corOptionList[[intCorType]]; - - corFncAcceptsWeights = intCorType==1; - - if (useCorOptionsThroughout) - corOptions = c(corOptions, list(cosine = cosineCorrelation)); - # Do not add the quick argument here. The calculations carried out here will not be slow anyway. - if (intCorType==2 && useCorOptionsThroughout ) - corOptions = c(corOptions, list(maxPOutliers = maxPOutliers, pearsonFallback = pearsonFallback)); - - signed = networkType %in% c("signed", "signed hybrid"); - - # Set up advanced tree cut methods - - otherArgs = list(...); - - if (useBranchEigennodeDissim) - { - branchSplitFnc = list("branchEigengeneDissim"); - externalSplitOptions = list(list( corFnc = corFnc, corOptions = corOptions, - signed = signed)); - nExternalBranchSplitFnc = 1; - externalSplitFncNeedsDistance = FALSE; - minExternalSplit = minBranchEigennodeDissim; - } else { - branchSplitFnc = list(); - externalSplitOptions = list() - externalSplitFncNeedsDistance = logical(0); - nExternalBranchSplitFnc = 0; - minExternalSplit = numeric(0); - } - - if (!is.null(stabilityLabels)) - { - stabilityCriterion = match.arg(stabilityCriterion); - branchSplitFnc = c(branchSplitFnc, - if (stabilityCriterion=="Individual fraction") - "branchSplitFromStabilityLabels.individualFraction" else "branchSplitFromStabilityLabels"); - minExternalSplit = c(minExternalSplit, minStabilityDissim); - externalSplitFncNeedsDistance = c(externalSplitFncNeedsDistance, FALSE); - print(dim(stabilityLabels)); - externalSplitOptions = c(externalSplitOptions, list(list(stabilityLabels = stabilityLabels))) - } - - if ("useBranchSplit" %in% names(otherArgs)) - { - if (otherArgs$useBranchSplit) - { - nExternalBranchSplitFnc = nExternalBranchSplitFnc + 1; - branchSplitFnc[[nExternalBranchSplitFnc]] = "branchSplit" - externalSplitOptions[[nExternalBranchSplitFnc]] = list(discardProp = 0.08, minCentralProp = 0.75, - nConsideredPCs = 3, signed = signed, getDetails = FALSE); - externalSplitFncNeedsDistance[nExternalBranchSplitFnc] = FALSE; - minExternalSplit[ nExternalBranchSplitFnc] = otherArgs$minBranchSplit; - } - } - - # Split data into blocks if needed - - if (is.null(blocks)) - { - if (nGGenes > maxBlockSize) - { - if (verbose>1) printFlush(paste(spaces, "....pre-clustering genes to determine blocks..")); - clustering = projectiveKMeans(datExpr, preferredSize = maxBlockSize, - checkData = FALSE, - sizePenaltyPower = blockSizePenaltyPower, - nCenters = nPreclusteringCenters, - randomSeed = randomSeed, - verbose = verbose-2, indent = indent + 1); - gBlocks = .orderLabelsBySize(clustering$clusters) - if (verbose > 2) { printFlush("Block sizes:"); print(table(gBlocks)); } - } else - gBlocks = rep(1, nGGenes); - blocks = rep(NA, nGenes); - blocks[gsg$goodGenes] = gBlocks; - } else { - gBlocks = blocks[gsg$goodGenes]; - } - - blockLevels = as.numeric(levels(factor(gBlocks))); - blockSizes = table(gBlocks) - - if (any(blockSizes > sqrt(2^31)-1)) - printFlush(spaste(spaces, - "Found block(s) with size(s) larger than limit of 'int' indexing.\n", - spaces, " Support for such large blocks is experimental; please report\n", - spaces, " any issues to Peter.Langfelder@gmail.com.")); - - nBlocks = length(blockLevels); - - # Initialize various variables - - dendros = list(); - - TOMFiles = rep("", nBlocks); - blockGenes = list(); - - maxUsedLabel = 0; - for (blockNo in 1:nBlocks) - { - if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, ".")); - - blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]]; - block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]]; - selExpr = as.matrix(datExpr[, block]); - if (haveWeights) selWeights = weights[, block]; - nBlockGenes = length(block); - TOMFiles[blockNo] = spaste(saveTOMFileBase, "-block.", blockNo, ".RData"); - if (loadTOM) - { - if (verbose > 2) - printFlush(paste(spaces, " ..loading TOM for block", blockNo, "from file", TOMFiles[blockNo])); - x = try(load(file =TOMFiles[blockNo]), silent = TRUE); - if (x!="TOM") - { - loadTOM = FALSE - printFlush(spaste("Loading of TOM in block ", blockNo, " failed:\n file ", - TOMFiles[blockNo], - "\n either does not exist or does not contain the object 'TOM'.\n", - " Will recalculate TOM.")); - } else if (!inherits(TOM, "dist")) { - printFlush(spaste("TOM file ", TOMFiles[blockNo], - " does not contain object of the right type or size.\n", - " Will recalculate TOM.")) - } else { - size.1 = attr(TOM, "Size"); - if (length(size.1)!=1 || size.1!=nBlockGenes) - { - printFlush(spaste("TOM file ", TOMFiles[blockNo], - " does not contain object of the right type or size.\n", - " Will recalculate TOM.")) - loadTOM = FALSE - } else { - tom = as.matrix(TOM); - rm(TOM); - collectGarbage(); - } - } - } - - if (!loadTOM) - { - # Calculate TOM by calling a custom C function: - callVerb = max(0, verbose - 1); callInd = indent + 2; - CcorType = intCorType - 1; - CnetworkType = intNetworkType - 1; - CTOMType = intTOMType -1; - - warn = as.integer(0); - tom = .Call("tomSimilarity_call", selExpr, weights, - as.integer(CcorType), as.integer(CnetworkType), as.double(power), as.integer(CTOMType), - as.integer(TOMDenomC), - as.double(maxPOutliers), - as.double(quickCor), - as.integer(fallback), - as.integer(cosineCorrelation), - as.integer(replaceMissingAdjacencies), - as.integer(suppressTOMForZeroAdjacencies), - as.integer(suppressNegativeTOM), - as.integer(useInternalMatrixAlgebra), - warn, as.integer(nThreads), - as.integer(callVerb), as.integer(callInd), PACKAGE = "WGCNA"); - - # FIXME: warn if necessary - - if (saveTOMs) - { - TOM = as.dist(tom); - TOMFiles[blockNo] = paste(saveTOMFileBase, "-block.", blockNo, ".RData", sep=""); - if (verbose > 2) - printFlush(paste(spaces, " ..saving TOM for block", blockNo, "into file", TOMFiles[blockNo])); - save(TOM, file =TOMFiles[blockNo]); - rm (TOM) - collectGarbage(); - } - } - dissTom = 1-tom; - rm(tom); - collectGarbage(); - if (verbose>2) printFlush(paste(spaces, "....clustering..")); - - dendros[[blockNo]] = fastcluster::hclust(as.dist(dissTom), method = "average") - - if (verbose>2) printFlush(paste(spaces, "....detecting modules..")); - datExpr.scaled.imputed.block = datExpr.scaled.imputed[, block]; - if (nExternalBranchSplitFnc > 0) for (extBSFnc in 1:nExternalBranchSplitFnc) - externalSplitOptions[[extBSFnc]]$expr = datExpr.scaled.imputed.block; - - collectGarbage(); - - blockLabels = try(cutreeDynamic(dendro = dendros[[blockNo]], - deepSplit = deepSplit, - cutHeight = detectCutHeight, minClusterSize = minModuleSize, - method = "hybrid", distM = dissTom, - maxCoreScatter = maxCoreScatter, minGap = minGap, - maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap, - minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight, - externalBranchSplitFnc = branchSplitFnc, - minExternalSplit = minExternalSplit, - externalSplitOptions = externalSplitOptions, - externalSplitFncNeedsDistance = externalSplitFncNeedsDistance, - assumeSimpleExternalSpecification = FALSE, - - pamStage = pamStage, pamRespectsDendro = pamRespectsDendro, - verbose = verbose-3, indent = indent + 2), silent = FALSE); - collectGarbage(); - if (verbose > 8) - { - labels0 = blockLabels - if (interactive()) - plotDendroAndColors(dendros[[blockNo]], labels2colors(blockLabels), dendroLabels = FALSE, - main = paste("Block", blockNo), - rowText = blockLabels, textPositions = 1, rowTextAlignment = "center"); - if (FALSE) - plotDendroAndColors(dendros[[blockNo]], labels2colors(allLabels), dendroLabels = FALSE, - main = paste("Block", blockNo)); - } - if (class(blockLabels)=='try-error') - { - if (verbose>0) - { - printFlush(paste(spaces, "*** cutreeDynamic returned the following error:\n", - spaces, blockLabels, spaces, - "Stopping the module detection here.")); - } else - warning(paste("blockwiseModules: cutreeDynamic returned the following error:\n", - " ", blockLabels, "---> Continuing with next block. ")); - next; - } - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, "No modules detected in block", blockNo)); - } - blockNo = blockNo + 1; - next; - } - - blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel; - maxUsedLabel = max(blockLabels); - - if (verbose>2) printFlush(paste(spaces, "....calculating module eigengenes..")); - MEs = try(moduleEigengenes(selExpr[, blockLabels!=0], blockLabels[blockLabels!=0], impute = impute, - # subHubs = TRUE, trapErrors = FALSE, - verbose = verbose - 3, indent = indent + 2), silent = TRUE); - if (class(MEs)=='try-error') - { - if (trapErrors) - { - if (verbose>0) { - printFlush(paste(spaces, "*** moduleEigengenes failed with the following message:")); - printFlush(paste(spaces, " ", MEs)); - printFlush(paste(spaces, " ---> Stopping module detection here.")); - } else - warning(paste("blockwiseModules: moduleEigengenes failed with the following message:", - "\n ", MEs, "---> Continuing with next block. ")); - next; - } else stop(MEs); - } - - #propMEs = as.data.frame(MEs$eigengenes[, names(MEs$eigengenes)!="ME0"]); - - propMEs = MEs$eigengenes; - blockLabelIndex = as.numeric(substring(names(propMEs), 3)); - - deleteModules = NULL; - changedModules = NULL; - - # Check modules: make sure that of the genes present in the module, at least a minimum number - # have a correlation with the eigengene higher than a given cutoff. - - if (verbose>2) printFlush(paste(spaces, "....checking kME in modules..")); - for (mod in 1:ncol(propMEs)) - { - modGenes = (blockLabels==blockLabelIndex[mod]); - KME = do.call(corFnc, - c(list(selExpr[, modGenes], propMEs[, mod]), - if (corFncAcceptsWeights) list( - weights.x = if (haveWeights) weights[, modGenes] else NULL, - weights.y = NULL) else NULL, - corOptions)); - if (intNetworkType==1) KME = abs(KME); - if (sum(KME>minCoreKME) < minCoreKMESize) - { - blockLabels[modGenes] = 0; - deleteModules = c(deleteModules, mod); - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ", mod, ": of ", sum(modGenes), - " total genes in the module\n only ", sum(KME>minCoreKME), - " have the requisite high correlation with the eigengene.", sep="")); - } else if (sum(KME0) - { - # Remove genes whose KME is too low: - if (verbose > 2) - printFlush(paste(spaces, " ..removing", sum(KME0) < minModuleSize) - { - deleteModules = c(deleteModules, mod); - blockLabels[modGenes] = 0; - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod], - ": not enough genes in the module after removal of low KME genes.", sep="")); - } else { - changedModules = union(changedModules, blockLabelIndex[mod]); - } - } - } - - # Remove modules that are to be removed - - if (!is.null(deleteModules)) - { - propMEs = propMEs[, -deleteModules, drop = FALSE]; - modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules])); - blockLabels[modGenes] = 0; - modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules])); - allLabels[modAllGenes] = 0; - blockLabelIndex = blockLabelIndex[-deleteModules]; - } - - # Check if any modules are left - - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, "No significant modules detected in block", blockNo)); - } - blockNo = blockNo + 1; - next; - } - - # Update allMEs and allLabels - - if (is.null(AllMEs)) - { - AllMEs = propMEs; - } else - AllMEs = cbind(AllMEs, propMEs); - - allLabelIndex = c(allLabelIndex, blockLabelIndex); - - assigned = block[blockLabels!=0]; - allLabels[gsg$goodGenes][assigned] = blockLabels[blockLabels!=0]; - - rm(dissTom); - collectGarbage(); - - #if (blockNo < nBlocks) - #{ - # leftoverBlockGenes = block[allLabels[block]==0]; - # nLeftBG = length(leftoverBlockGenes); - # blocksLeft = c((blockNo+1):nBlocks); - # blockSizes = as.vector(table(blocks))[blocksLeft]; - # blocksOpen = blocksLeft[blockSizes < maxBlockSize]; - # nBlocksOpen = length(blocksOpen); - # if ((nLeftBG>0) && (nBlocksOpen>0)) - # { - # openSizes = blockSizes[blocksOpen]; - # centers = matrix(0, nGSamples, nBlocksOpen); - # for (cen in 1:nBlocksOpen) - # centers[, cen] = svd(datExpr[, blocks==blocksOpen[cen]], nu=1, nv=0)$u[,1]; - # dst = geneCenterDist(datExpr[, block], centers); - # rowMat = matrix(c(1:nrow(dst)), nrow(dst), ncol(dst)); - # colMat = matrix(c(1:ncol(dst)), nrow(dst), ncol(dst), byrow = TRUE); - # dstOrder = order(as.vector(dst)); - # while ((nLeftBG>0) && (nBlocksOpen>0)) - # { - # gene = colMat[dstOrder[1]]; - # rowMatV = as.vector(rowMat); - # colMatV = as.vector(colMat); - - - blockNo = blockNo + 1; - } - - # Check whether any of the already assigned genes should be re-assigned - - deleteModules = NULL; - goodLabels = allLabels[gsg$goodGenes]; - reassignIndex = rep(FALSE, length(goodLabels)); - if (sum(goodLabels!=0) > 0) - { - propLabels = goodLabels[goodLabels!=0]; - assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0]; - KME = do.call(match.fun(corFnc), - c(list(datExpr[, goodLabels!=0], AllMEs), - if (corFncAcceptsWeights) list( - weights.x = if(haveWeights) weights[, goodLabels!=0] else NULL, - weights.y = NULL) else NULL, - corOptions)); - if (intNetworkType == 1) KME = abs(KME) - nMods = ncol(AllMEs); - for (mod in 1:nMods) - { - modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]]; - KMEmodule = KME[modGenes, mod]; - KMEbest = apply(KME[modGenes, , drop = FALSE], 1, max); - candidates = (KMEmodule < KMEbest); - candidates[!is.finite(candidates)] = FALSE; - - if (FALSE) - { - modDiss = dissTom[goodLabels==allLabelIndex[mod], goodLabels==allLabelIndex[mod]]; - mod.k = colSums(modDiss); - boxplot(mod.k~candidates) - } - - if (sum(candidates) > 0) - { - pModule = corPvalueFisher(KMEmodule[candidates], nSamples); - whichBest = apply(KME[modGenes[candidates], , drop = FALSE], 1, which.max); - pBest = corPvalueFisher(KMEbest[candidates], nSamples); - reassign = ifelse(is.finite(pBest/pModule), (pBest/pModule < reassignThreshold), FALSE); - if (sum(reassign)>0) - { - if (verbose > 2) - printFlush(paste(spaces, " ..reassigning", sum(reassign), - "genes from module", mod, "to modules with higher KME.")); - allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign]; - changedModules = union(changedModules, whichBest[reassign]); - if (length(modGenes)-sum(reassign) < minModuleSize) - { - deleteModules = c(deleteModules, mod); - } else - changedModules = union(changedModules, mod); - } - } - } - } - - # Remove modules that are to be removed - - if (!is.null(deleteModules)) - { - AllMEs = AllMEs[, -deleteModules, drop = FALSE]; - genes = is.finite(match(allLabels, allLabelIndex[deleteModules])); - allLabels[genes] = 0; - allLabelIndex = allLabelIndex[-deleteModules]; - goodLabels = allLabels[gsg$goodGenes]; - } - - if (verbose>1) printFlush(paste(spaces, "..merging modules that are too close..")); - if (numericLabels) { - colors = allLabels - } else { - colors = labels2colors(allLabels) - } - mergedAllColors = colors; - MEsOK = TRUE; - mergedMods = try(mergeCloseModules(datExpr, colors[gsg$goodGenes], cutHeight = mergeCutHeight, - relabel = TRUE, # trapErrors = FALSE, - corFnc = corFnc, corOptions = corOptions, - impute = impute, - verbose = verbose-2, indent = indent + 2), silent = TRUE); - if (class(mergedMods)=='try-error') - { - warning(paste("blockwiseModules: mergeCloseModules failed with the following error message:\n ", - mergedMods, "\n--> returning unmerged colors.\n")); - MEs = try(moduleEigengenes(datExpr, colors[gsg$goodGenes], # subHubs = TRUE, trapErrors = FALSE, - impute = impute, - verbose = verbose-3, indent = indent+3), silent = TRUE); - if (class(MEs) == 'try-error') - { - if (!trapErrors) stop(MEs); - if (verbose>0) - { - printFlush(paste(spaces, "*** moduleEigengenes failed with the following error message:")); - printFlush(paste(spaces, " ", MEs)); - printFlush(paste(spaces, "*** returning no module eigengenes.\n")); - } else - warning(paste("blockwiseModules: moduleEigengenes failed with the following error message:\n ", - MEs, "\n--> returning no module eigengenes.\n")); - allSampleMEs = NULL; - MEsOK = FALSE; - } else { - if (sum(!MEs$validMEs)>0) - { - mergedAllColors[gsg$goodGenes] = MEs$validColors; - MEs = MEs$eigengenes[, MEs$validMEs]; - } else MEs = MEs$eigengenes; - allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(MEs))); - allSampleMEs[gsg$goodSamples, ] = MEs[,]; - names(allSampleMEs) = names(MEs); - rownames(allSampleMEs) = originalSampleNames; - } - } else { - mergedAllColors[gsg$goodGenes] = mergedMods$colors; - allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(mergedMods$newMEs))); - allSampleMEs[gsg$goodSamples, ] = mergedMods$newMEs[,]; - names(allSampleMEs) = names(mergedMods$newMEs); - rownames(allSampleMEs) = originalSampleNames; - } - - if (seedSaved) .Random.seed <<- savedSeed; - - if (!saveTOMs) TOMFiles = NULL; - - names(colors) = originalGeneNames; - names(mergedAllColors) = originalGeneNames; - list(colors = mergedAllColors, - unmergedColors = colors, - MEs = allSampleMEs, - goodSamples = gsg$goodSamples, - goodGenes = gsg$goodGenes, - dendrograms = dendros, - TOMFiles = TOMFiles, - blockGenes = blockGenes, - blocks = blocks, - MEsOK = MEsOK); -} - -#================================================================================== -# -# Helper functions -# -#================================================================================== - -# order labels by size - -.orderLabelsBySize = function(labels, exclude = NULL) -{ - levels.0 = sort(unique(labels)); - levels = levels.0[ !levels.0 %in% exclude] - levels.excl = levels.0 [levels.0 %in% exclude] - rearrange = labels %in% levels; - tab = table(labels [ rearrange ]); - rank = rank(-tab, ties.method = "first"); - - oldOrder = c(levels.excl, names(tab)); - newOrder = c(levels.excl, names(tab)[rank]); - if (is.numeric(labels)) newOrder = as.numeric(newOrder); - - newOrder[ match(labels, oldOrder) ] -} - -#====================================================================================================== -# -# Re-cut trees for blockwiseModules -# -#====================================================================================================== - -recutBlockwiseTrees = function(datExpr, - goodSamples, goodGenes, - blocks, - TOMFiles, - dendrograms, - corType = "pearson", - networkType = "unsigned", - deepSplit = 2, - detectCutHeight = 0.995, minModuleSize = min(20, ncol(datExpr)/2 ), - maxCoreScatter = NULL, minGap = NULL, - maxAbsCoreScatter = NULL, minAbsGap = NULL, - minSplitHeight = NULL, minAbsSplitHeight = NULL, - - useBranchEigennodeDissim = FALSE, - minBranchEigennodeDissim = mergeCutHeight, - - pamStage = TRUE, pamRespectsDendro = TRUE, - # minKMEtoJoin =0.7, - minCoreKME = 0.5, minCoreKMESize = minModuleSize/3, - minKMEtoStay = 0.3, - reassignThreshold = 1e-6, - mergeCutHeight = 0.15, impute = TRUE, - trapErrors = FALSE, numericLabels = FALSE, - verbose = 0, indent = 0, ...) -{ - spaces = indentSpaces(indent); - - #if (verbose>0) - # printFlush(paste(spaces, "Calculating module eigengenes block-wise from all genes")); - cutreeLabels = list() - intCorType = pmatch(corType, .corTypes); - if (is.na(intCorType)) - stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", "))) - - # if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1."); - - intNetworkType = charmatch(networkType, .networkTypes); - if (is.na(intNetworkType)) - stop(paste("Unrecognized networkType argument.", - "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", "))); - - dimEx = dim(datExpr); - if (length(dimEx)!=2) stop("datExpr has incorrect dimensions.") - - nGenes = dimEx[2]; - nSamples = dimEx[1]; - allLabels = rep(0, nGenes); - AllMEs = NULL; - allLabelIndex = NULL; - - originalSampleNames = rownames(datExpr); - originalGeneNames = colnames(datExpr); - - if (length(blocks)!=nGenes) - stop("Input error: the length of 'geneRank' does not equal the number of genes in given 'datExpr'."); - - # Check data for genes and samples that have too many missing values - - nGGenes = sum(goodGenes) - nGSamples = sum(goodSamples); - gsg = list(goodSamples = goodSamples, goodGenes = goodGenes, - allOK =(sum(!goodSamples) + sum(!goodGenes) == 0)); - - if (!gsg$allOK) datExpr = datExpr[goodSamples, goodGenes]; - - gBlocks = blocks[gsg$goodGenes]; - blockLevels = as.numeric(levels(factor(gBlocks))); - blockSizes = table(gBlocks) - nBlocks = length(blockLevels); - - datExpr.scaled.imputed = t(impute.knn(t(scale(datExpr)))$data) - if (any(is.na(datExpr))) - datExpr.scaled.imputed = t(impute.knn(t(scale(datExpr)))$data) - - corFnc = .corFnc[intCorType]; - corOptions = list(use = 'p'); - - signed = networkType %in% c("signed", "signed hybrid"); - # Set up advanced tree cut methods - - otherArgs = list(...); - - if (useBranchEigennodeDissim) - { - branchSplitFnc = list("branchEigengeneDissim"); - externalSplitOptions = list(list( corFnc = corFnc, corOptions = corOptions, - signed = signed)); - externalSplitFncNeedsDistance = FALSE; - nExternalBranchSplitFnc = 1; - minExternalSplit = minBranchEigennodeDissim; - } else { - branchSplitFnc = list(); - externalSplitOptions = list(list()) - externalSplitFncNeedsDistance = logical(0); - nExternalBranchSplitFnc = 0; - minExternalSplit = numeric(0); - } - - if ("useBranchSplit" %in% names(otherArgs)) - { - if (otherArgs$useBranchSplit) - { - nExternalBranchSplitFnc = nExternalBranchSplitFnc + 1; - branchSplitFnc[[nExternalBranchSplitFnc]] = "branchSplit" - externalSplitOptions[[nExternalBranchSplitFnc]] = list(discardProp = 0.08, minCentralProp = 0.75, - nConsideredPCs = 3, signed = signed, getDetails = FALSE); - minExternalSplit[ nExternalBranchSplitFnc] = otherArgs$minBranchSplit; - externalSplitFncNeedsDistance[ nExternalBranchSplitFnc] = FALSE; - } - } - - # Initialize various variables - - blockNo = 1; - maxUsedLabel = 0; - while (blockNo <= nBlocks) - { - if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, ".")); - - block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]]; - selExpr = as.matrix(datExpr[, block]); - nBlockGenes = length(block); - - TOM = NULL; # Will be loaded below; this gets rid of warnings form Rcheck. - - xx = try(load(TOMFiles[blockNo]), silent = TRUE); - if (class(xx)=='try-error') - { - printFlush(paste("************\n File name", TOMFiles[blockNo], - "appears invalid: the load function returned the following error:\n ", - xx)); - stop(); - } - if (xx!='TOM') - stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate variable.")); - - if (class(TOM)!="dist") - stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate distance structure.")); - - dissTom = as.matrix(1-TOM); - - if (verbose>2) printFlush(paste(spaces, "....detecting modules..")); - - datExpr.scaled.imputed.block = datExpr.scaled.imputed[, block]; - if (nExternalBranchSplitFnc > 0) for (extBSFnc in 1:nExternalBranchSplitFnc) - externalSplitOptions[[extBSFnc]]$expr = datExpr.scaled.imputed.block; - - blockLabels = try(cutreeDynamic(dendro = dendrograms[[blockNo]], - deepSplit = deepSplit, - cutHeight = detectCutHeight, minClusterSize = minModuleSize, - method ="hybrid", - maxCoreScatter = maxCoreScatter, minGap = minGap, - maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap, - minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight, - - externalBranchSplitFnc = branchSplitFnc, - minExternalSplit = minExternalSplit, - externalSplitOptions = externalSplitOptions, - externalSplitFncNeedsDistance = externalSplitFncNeedsDistance, - assumeSimpleExternalSpecification = FALSE, - - pamStage = pamStage, pamRespectsDendro = pamRespectsDendro, - distM = dissTom, - verbose = verbose-3, indent = indent + 2), silent = TRUE); - collectGarbage(); - cutreeLabels[[blockNo]] = blockLabels; - - if (class(blockLabels)=='try-error') - { - if (verbose>0) - { - printFlush(paste(spaces, "*** cutreeDynamic returned the following error:\n", - spaces, blockLabels, spaces, - "Stopping the module detection here.")); - } else - warning(paste("blockwiseModules: cutreeDynamic returned the following error:\n", - " ", blockLabels, "---> Continuing with next block. ")); - next; - } - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, "No modules detected in block", blockNo)); - } - blockNo = blockNo + 1; - next; - } - - blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel; - maxUsedLabel = max(blockLabels); - - if (verbose>2) printFlush(paste(spaces, "....calculating module eigengenes..")); - MEs = try(moduleEigengenes(selExpr[, blockLabels!=0], blockLabels[blockLabels!=0], impute = impute, - # subHubs = TRUE, trapErrors = FALSE, - verbose = verbose - 3, indent = indent + 2), silent = TRUE); - if (class(MEs)=='try-error') - { - if (trapErrors) - { - if (verbose>0) { - printFlush(paste(spaces, "*** moduleEigengenes failed with the following message:")); - printFlush(paste(spaces, " ", MEs)); - printFlush(paste(spaces, " ---> Stopping module detection here.")); - } else - warning(paste("blockwiseModules: moduleEigengenes failed with the following message:", - "\n ", MEs, "---> Continuing with next block. ")); - next; - } else stop(MEs); - } - - #propMEs = as.data.frame(MEs$eigengenes[, names(MEs$eigengenes)!="ME0"]); - - propMEs = MEs$eigengenes; - blockLabelIndex = as.numeric(substring(names(propMEs), 3)); - - deleteModules = NULL; - changedModules = NULL; - - # Check modules: make sure that of the genes present in the module, at least a minimum number - # have a correlation with the eigengene higher than a given cutoff. - - if (verbose>2) printFlush(paste(spaces, "....checking modules for statistical meaningfulness..")); - for (mod in 1:ncol(propMEs)) - { - modGenes = (blockLabels==blockLabelIndex[mod]); - corEval = parse(text = paste(corFnc, "(selExpr[, modGenes], propMEs[, mod]", - prepComma(.corOptions[intCorType]), ")")); - KME = as.vector(eval(corEval)); - if (intNetworkType==1) KME = abs(KME); - if (sum(KME>minCoreKME) < minCoreKMESize) - { - blockLabels[modGenes] = 0; - deleteModules = c(deleteModules, mod); - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ", mod, ": of ", sum(modGenes), - " total genes in the module\n only ", sum(KME>minCoreKME), - " have the requisite high correlation with the eigengene.", sep="")); - } else if (sum(KME0) - { - # Remove genes whose KME is too low: - if (verbose > 2) - printFlush(paste(spaces, " ..removing", sum(KME0) < minModuleSize) - { - deleteModules = c(deleteModules, mod); - blockLabels[modGenes] = 0; - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod], - ": not enough genes in the module after removal of low KME genes.", sep="")); - } else { - changedModules = union(changedModules, blockLabelIndex[mod]); - } - } - } - - # Remove modules that are to be removed - - if (!is.null(deleteModules)) - { - propMEs = propMEs[, -deleteModules, drop = FALSE]; - modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules])); - blockLabels[modGenes] = 0; - modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules])); - allLabels[modAllGenes] = 0; - blockLabelIndex = blockLabelIndex[-deleteModules]; - } - - # Check if any modules are left - - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, "No significant modules detected in block", blockNo)); - } - blockNo = blockNo + 1; - next; - } - - # Update allMEs and allLabels - - if (is.null(AllMEs)) - { - AllMEs = propMEs; - } else - AllMEs = cbind(AllMEs, propMEs); - - allLabelIndex = c(allLabelIndex, blockLabelIndex); - - assigned = block[blockLabels!=0]; - allLabels[assigned] = blockLabels[blockLabels!=0]; - - rm(dissTom); - collectGarbage(); - - blockNo = blockNo + 1; - } - - # Check whether any of the already assigned genes should be re-assigned - - deleteModules = NULL; - goodLabels = allLabels[gsg$goodGenes]; - if (sum(goodLabels!=0) > 0) - { - propLabels = goodLabels[goodLabels!=0]; - assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0]; - corEval = parse(text = paste(corFnc, "(datExpr[, goodLabels!=0], AllMEs", - prepComma(.corOptions[intCorType]), ")")); - KME = eval(corEval); - if (intNetworkType == 1) KME = abs(KME) - nMods = ncol(AllMEs); - for (mod in 1:nMods) - { - modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]]; - KMEmodule = KME[modGenes, mod]; - KMEbest = apply(KME[modGenes, , drop = FALSE], 1, max); - candidates = (KMEmodule < KMEbest); - candidates[!is.finite(candidates)] = FALSE; - if (sum(candidates) > 0) - { - pModule = corPvalueFisher(KMEmodule[candidates], nSamples); - whichBest = apply(KME[modGenes[candidates], , drop = FALSE], 1, which.max); - pBest = corPvalueFisher(KMEbest[candidates], nSamples); - reassign = ifelse(is.finite(pBest/pModule), (pBest/pModule < reassignThreshold), FALSE); - if (sum(reassign)>0) - { - if (verbose > 2) - printFlush(paste(spaces, " ..reassigning", sum(reassign), - "genes from module", mod, "to modules with higher KME.")); - allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign]; - changedModules = union(changedModules, whichBest[reassign]); - if (length(modGenes)-sum(reassign) < minModuleSize) - { - deleteModules = c(deleteModules, mod); - } else - changedModules = union(changedModules, mod); - } - } - } - } - - # Remove modules that are to be removed - - if (!is.null(deleteModules)) - { - AllMEs = AllMEs[, -deleteModules, drop = FALSE]; - genes = is.finite(match(allLabels, allLabelIndex[deleteModules])); - allLabels[genes] = 0; - allLabelIndex = allLabelIndex[-deleteModules]; - goodLabels = allLabels[gsg$goodGenes]; - } - - if (verbose>1) printFlush(paste(spaces, "..merging modules that are too close..")); - if (numericLabels) { - colors = allLabels - } else { - colors = labels2colors(allLabels) - } - mergedAllColors = colors; - MEsOK = TRUE; - mergedMods = try(mergeCloseModules(datExpr, colors[gsg$goodGenes], cutHeight = mergeCutHeight, - relabel = TRUE, # trapErrors = FALSE, - impute = impute, - verbose = verbose-2, indent = indent + 2), silent = TRUE); - if (class(mergedMods)=='try-error') - { - warning(paste("blockwiseModules: mergeCloseModules failed with the following error message:\n ", - mergedMods, "\n--> returning unmerged colors.\n")); - MEs = try(moduleEigengenes(datExpr, colors[gsg$goodGenes], # subHubs = TRUE, trapErrors = FALSE, - impute = impute, - verbose = verbose-3, indent = indent+3), silent = TRUE); - if (class(MEs) == 'try-error') - { - if (!trapErrors) stop(MEs); - if (verbose>0) - { - printFlush(paste(spaces, "*** moduleEigengenes failed with the following error message:")); - printFlush(paste(spaces, " ", MEs)); - printFlush(paste(spaces, "*** returning no module eigengenes.\n")); - } else - warning(paste("blockwiseModules: moduleEigengenes failed with the following error message:\n ", - MEs, "\n--> returning no module eigengenes.\n")); - allSampleMEs = NULL; - MEsOK = FALSE; - } else { - if (sum(!MEs$validMEs)>0) - { - colors[gsg$goodGenes] = MEs$validColors; - MEs = MEs$eigengenes[, MEs$validMEs]; - } else MEs = MEs$eigengenes; - allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(MEs))); - allSampleMEs[gsg$goodSamples, ] = MEs[,]; - names(allSampleMEs) = names(MEs); - } - } else { - mergedAllColors[gsg$goodGenes] = mergedMods$colors; - allSampleMEs = as.data.frame(matrix(NA, nrow = nSamples, ncol = ncol(mergedMods$newMEs))); - allSampleMEs[gsg$goodSamples, ] = mergedMods$newMEs[,]; - names(allSampleMEs) = names(mergedMods$newMEs); - rownames(allSampleMEs) = originalSampleNames; - } - - names(colors) = originalGeneNames; - names(mergedAllColors) = originalGeneNames; - list(colors = mergedAllColors, - unmergedColors = colors, - cutreeLabels = cutreeLabels, - MEs = allSampleMEs, - #goodSamples = gsg$goodSamples, - #goodGenes = gsg$goodGenes, - #dendrograms = dendrograms, - #TOMFiles = TOMFiles, - #blockGenes = blockGenes, - MEsOK = MEsOK); -} - -#========================================================================================================== -# -# blockwiseIndividualTOMs -# -#========================================================================================================== - -# This function calculates and saves blockwise topological overlaps for a given multi expression data. The -# argument blocks can be given to specify blocks, or the blocks can be omitted and will be calculated if -# necessary. - -# Note on naming of output files: %s will translate into set number, %N into set name (if given in -# multiExpr), %b into block number. - -.substituteTags = function(format, tags, replacements) -{ - nTags = length(tags); - if (length(replacements)!= nTags) stop("Length of tags and replacements must be the same."); - - for (t in 1:nTags) - format = gsub(as.character(tags[t]), as.character(replacements[t]), format, fixed = TRUE); - - format; -} - -.processFileName = function(format, setNumber, setNames, blockNumber, analysisName = "") -{ - # The following is a workaround around empty (NULL) setNames. Replaces the name with the setNumber. - if (is.null(setNames)) setNames = rep(setNumber, setNumber) - - .substituteTags(format, c("%s", "%N", "%b", "%a"), - c(setNumber, setNames[setNumber], blockNumber, analysisName)); -} - -blockwiseIndividualTOMs = function( - multiExpr, - multiWeights = NULL, - - # Data checking options - checkMissingData = TRUE, - - # Blocking options - blocks = NULL, - maxBlockSize = 5000, - blockSizePenaltyPower = 5, - nPreclusteringCenters = NULL, - randomSeed = 54321, - - # Network construction arguments: correlation options - corType = "pearson", - maxPOutliers = 1, - quickCor = 0, - pearsonFallback = "individual", - cosineCorrelation = FALSE, - - # Adjacency function options - power = 6, - networkType = "unsigned", - checkPower = TRUE, - replaceMissingAdjacencies = FALSE, - - # Topological overlap options - TOMType = "unsigned", - TOMDenom = "min", - suppressTOMForZeroAdjacencies = FALSE, - suppressNegativeTOM = FALSE, - - # Save individual TOMs? If not, they will be returned in the session. - saveTOMs = TRUE, - individualTOMFileNames = "individualTOM-Set%s-Block%b.RData", - - # General options - nThreads = 0, - useInternalMatrixAlgebra = FALSE, - verbose = 2, indent = 0) -{ - spaces = indentSpaces(indent); - - dataSize = checkSets(multiExpr, checkStructure = TRUE); - - if (dataSize$structureOK) - { - nSets = dataSize$nSets; - nGenes = dataSize$nGenes; - multiFormat = TRUE; - } else { - multiExpr = multiData(multiExpr); - multiWeights = multiData(multiWeights); - nSets = dataSize$nSets; - nGenes = dataSize$nGenes; - multiFormat = FALSE; - } - .checkAndScaleMultiWeights(multiWeights, multiExpr, scaleByMax = FALSE); - - if (length(power)!=1) - { - if (length(power)!=nSets) - stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'."); - } else { - power = rep(power, nSets); - } - - #if (maxBlockSize >= floor(sqrt(2^31)) ) - # stop("'maxBlockSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.") - - if (!is.null(blocks) && (length(blocks)!=nGenes)) - stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'."); - - if (verbose>0) - printFlush(paste(spaces, "Calculating topological overlaps block-wise from all genes")); - - intCorType = pmatch(corType, .corTypes); - if (is.na(intCorType)) - stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", "))) - - intTOMType = pmatch(TOMType, .TOMTypes); - if (is.na(intTOMType)) - stop(paste("Invalid 'TOMType'. Recognized values are", paste(.TOMTypes, collapse = ", "))) - - TOMDenomC = pmatch(TOMDenom, .TOMDenoms)-1; - if (is.na(TOMDenomC)) - stop(paste("Invalid 'TOMDenom'. Recognized values are", paste(.TOMDenoms, collapse = ", "))) - - if ( checkPower & ((sum(power<1)>0) | (sum(power>50)>0) ) ) stop("power must be between 1 and 50."); - - intNetworkType = charmatch(networkType, .networkTypes); - if (is.na(intNetworkType)) - stop(paste("Unrecognized networkType argument.", - "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", "))); - - if ( (maxPOutliers < 0) | (maxPOutliers > 1)) stop("maxPOutliers must be between 0 and 1."); - if (quickCor < 0) stop("quickCor must be positive."); - - fallback = pmatch(pearsonFallback, .pearsonFallbacks) - if (is.na(fallback)) - stop(paste("Unrecognized 'pearsonFallback'. Recognized values are (unique abbreviations of)\n", - paste(.pearsonFallbacks, collapse = ", "))) - - if (nThreads < 0) stop("nThreads must be positive."); - if (is.null(nThreads) || (nThreads==0)) nThreads = .useNThreads(); - - nSamples = dataSize$nSamples; - - # Check data for genes and samples that have too many missing values - - if (checkMissingData) - { - gsg = goodSamplesGenesMS(multiExpr, verbose = verbose - 1, indent = indent + 1) - if (!gsg$allOK) - { - multiExpr = mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes); - if (!is.null(multiWeights)) multiWeights = mtd.subset(multiWeights, gsg$goodSamples, gsg$goodGenes); - } - } else { - gsg = list(goodGenes = rep(TRUE, nGenes), goodSamples = lapply(nSamples, function(n) rep(TRUE, n)), - allOK = TRUE); - } - - nGGenes = sum(gsg$goodGenes); - nGSamples = rep(0, nSets); - for (set in 1:nSets) nGSamples[set] = sum(gsg$goodSamples[[set]]); - - if (is.null(blocks)) - { - if (nGGenes > maxBlockSize) - { - if (verbose>1) printFlush(paste(spaces, "....pre-clustering genes to determine blocks..")); - clustering = consensusProjectiveKMeans(multiExpr, preferredSize = maxBlockSize, - sizePenaltyPower = blockSizePenaltyPower, checkData = FALSE, - nCenters = nPreclusteringCenters, - randomSeed = randomSeed, - verbose = verbose-2, indent = indent + 1); - gBlocks = .orderLabelsBySize(clustering$clusters); - } else - gBlocks = rep(1, nGGenes); - blocks = rep(NA, nGenes); - blocks[gsg$goodGenes] = gBlocks; - } else { - gBlocks = blocks[gsg$goodGenes]; - } - - blockLevels = as.numeric(levels(factor(gBlocks))); - blockSizes = table(gBlocks) - nBlocks = length(blockLevels); - - if (any(blockSizes > sqrt(2^31)-1)) - printFlush(spaste(spaces, - "Found block(s) with size(s) larger than limit of 'int' indexing.\n", - spaces, " Support for such large blocks is experimental; please report\n", - spaces, " any issues to Peter.Langfelder@gmail.com.")); - - # check file names for uniqueness - - actualFileNames = NULL; - if (saveTOMs) - { - actualFileNames = matrix("", nSets, nBlocks); - for (set in 1:nSets) for (b in 1:nBlocks) - actualFileNames[set, b] = .processFileName(individualTOMFileNames, set, names(multiExpr), b); - - rownames(actualFileNames) = spaste("Set.", c(1:nSets)); - colnames(actualFileNames) = spaste("Block.", c(1:nBlocks)); - if (length(unique(as.vector(actualFileNames))) < nSets * nBlocks) - { - printFlush("Error: File names for (some) set/block combinations are not unique:"); - print(actualFileNames); - stop("File names must be unique."); - } - } - - # Initialize various variables - - blockGenes = list(); - blockNo = 1; - collectGarbage(); - setTomDS = list(); - # Here's where the analysis starts - for (blockNo in 1:nBlocks) - { - if (verbose>1 && nBlocks > 1) printFlush(paste(spaces, "..Working on block", blockNo, ".")); - # Select the block genes - block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]]; - nBlockGenes = length(block); - blockGenes[[blockNo]] = c(1:nGenes)[gsg$goodGenes][gBlocks==blockLevels[blockNo]]; - errorOccurred = FALSE; - - # Set up file names or memory space to hold the set TOMs - if (!saveTOMs) - { - setTomDS[[blockNo]] = array(0, dim = c(nBlockGenes*(nBlockGenes-1)/2, nSets)); - } - - # For each set: calculate and save TOM - - for (set in 1:nSets) - { - if (verbose>2) printFlush(paste(spaces, "....Working on set", set)) - selExpr = as.matrix(multiExpr[[set]]$data[, block]); - if (!is.null(multiWeights)) selWeights = as.matrix(multiWeights[[set]]$data[, block]) else - selWeights = NULL; - - # Calculate TOM by calling a custom C function: - callVerb = max(0, verbose - 1); callInd = indent + 2; - CcorType = intCorType - 1; - CnetworkType = intNetworkType - 1; - CTOMType = intTOMType - 1; - # tempExpr = as.double(as.matrix(selExpr)); - warn = 0L; - - tom = .Call("tomSimilarity_call", selExpr, selWeights, - as.integer(CcorType), as.integer(CnetworkType), as.double(power[set]), as.integer(CTOMType), - as.integer(TOMDenomC), - as.double(maxPOutliers), - as.double(quickCor), - as.integer(fallback), - as.integer(cosineCorrelation), - as.integer(replaceMissingAdjacencies), - as.integer(suppressTOMForZeroAdjacencies), - as.integer(suppressNegativeTOM), - as.integer(useInternalMatrixAlgebra), - warn, as.integer(nThreads), - as.integer(callVerb), as.integer(callInd), PACKAGE = "WGCNA"); - - # FIXME: warn if necessary - - tomDS = as.dist(tom); - # dim(tom) = c(nBlockGenes, nBlockGenes); - rm(tom); - - # Save the calculated TOM either to disk in chunks or to memory. - if (saveTOMs) - { - save(tomDS, file = actualFileNames[set, blockNo]); - } else { - setTomDS[[blockNo]] [, set] = tomDS[]; - } - } - rm(tomDS); collectGarbage(); - } - - if (!multiFormat) - { - gsg$goodSamples = gsg$goodSamples[[1]]; - } - - list(actualTOMFileNames = actualFileNames, - TOMSimilarities = if(!saveTOMs) setTomDS else NULL, - blocks = blocks, - blockGenes = blockGenes, - goodSamplesAndGenes = gsg, - nGGenes = nGGenes, - gBlocks = gBlocks, - nThreads = nThreads, - saveTOMs = saveTOMs, - intNetworkType = intNetworkType, - intCorType = intCorType, - nSets = nSets, - setNames = names(multiExpr) - ) -} - -#========================================================================================================== -# -# lowerTri2matrix -# -#========================================================================================================== - -lowerTri2matrix = function(x, diag = 1) -{ - if (class(x)=="dist") - { - mat = as.matrix(x) - } else { - n = length(x); - n1 = (1 + sqrt(1 + 8*n))/2 - if (floor(n1)!=n1) stop("Input length does not translate into matrix"); - mat = matrix(0, n1, n1); - mat[lower.tri(mat)] = x; - mat = mat + t(mat); - } - diag(mat) = diag; - mat; -} - -#========================================================================================================== -# -# blockwiseConsensusModules -# -#========================================================================================================== - - -.checkComponents = function(object, names) -{ - objNames = names(object); - inObj = names %in% objNames; - if (!all(inObj)) - stop(".checkComponents: object is missing the following components:\n", - paste(names[!inObj], collapse = ", ")); -} - -# Function to calculate consensus modules and eigengenes from all genes. - -blockwiseConsensusModules = function( - multiExpr, - - # Data checking options - checkMissingData = TRUE, - - # Blocking options - blocks = NULL, - maxBlockSize = 5000, - blockSizePenaltyPower = 5, - nPreclusteringCenters = NULL, - randomSeed = 54321, - - # individual TOM information - - individualTOMInfo = NULL, - useIndivTOMSubset = NULL, - - # Network construction arguments: correlation options - corType = "pearson", - maxPOutliers = 1, - quickCor = 0, - pearsonFallback = "individual", - cosineCorrelation = FALSE, - - # Adjacency function options - power = 6, - networkType = "unsigned", - checkPower = TRUE, - replaceMissingAdjacencies = FALSE, - - # Topological overlap options - - TOMType = "unsigned", - TOMDenom = "min", - suppressNegativeTOM = FALSE, - - # Save individual TOMs? - - saveIndividualTOMs = TRUE, - individualTOMFileNames = "individualTOM-Set%s-Block%b.RData", - - # Consensus calculation options: network calibration - - networkCalibration = c("single quantile", "full quantile", "none"), - - ## Save scaled TOMs? <-- leave this option for users willing to run consensusTOM on its own - #saveScaledIndividualTOMs = FALSE, - #scaledIndividualTOMFilePattern = "scaledIndividualTOM-Set%s-Block%b.RData", - - # Simple quantile calibration options - - calibrationQuantile = 0.95, - sampleForCalibration = TRUE, sampleForCalibrationFactor = 1000, - getNetworkCalibrationSamples = FALSE, - - # Consensus definition - - consensusQuantile = 0, - useMean = FALSE, - setWeights = NULL, - - # Saving the consensus TOM - - saveConsensusTOMs = FALSE, - consensusTOMFilePattern = "consensusTOM-block.%b.RData", - - # Internal handling of TOMs - - useDiskCache = TRUE, chunkSize = NULL, - cacheBase = ".blockConsModsCache", - cacheDir = ".", - - # Alternative consensus TOM input from a previous calculation - - consensusTOMInfo = NULL, - - # Basic tree cut options - - deepSplit = 2, - detectCutHeight = 0.995, minModuleSize = 20, - checkMinModuleSize = TRUE, - - # Advanced tree cut opyions - - maxCoreScatter = NULL, minGap = NULL, - maxAbsCoreScatter = NULL, minAbsGap = NULL, - minSplitHeight = NULL, minAbsSplitHeight = NULL, - - useBranchEigennodeDissim = FALSE, - minBranchEigennodeDissim = mergeCutHeight, - - stabilityLabels = NULL, - minStabilityDissim = NULL, - - pamStage = TRUE, pamRespectsDendro = TRUE, - - # Gene joining and removal from a module, and module "significance" criteria - - reassignThresholdPS = 1e-4, - trimmingConsensusQuantile = consensusQuantile, - # minKMEtoJoin =0.7, - minCoreKME = 0.5, minCoreKMESize = minModuleSize/3, - minKMEtoStay = 0.2, - - # Module eigengene calculation options - - impute = TRUE, - trapErrors = FALSE, - - # Module merging options - - equalizeQuantilesForModuleMerging = FALSE, - quantileSummaryForModuleMerging = "mean", - mergeCutHeight = 0.15, - mergeConsensusQuantile = consensusQuantile, - - - # Output options - - numericLabels = FALSE, - - # General options - nThreads = 0, - verbose = 2, indent = 0, - ...) -{ - spaces = indentSpaces(indent); - - dataSize = checkSets(multiExpr); - nSets = dataSize$nSets; - nGenes = dataSize$nGenes; - - if (length(power)!=1) - { - if (length(power)!=nSets) - stop("Invalid arguments: Length of 'power' must equal number of sets given in 'multiExpr'."); - } else { - power = rep(power, nSets); - } - - seedSaved = FALSE; - if (!is.null(randomSeed)) - { - if (exists(".Random.seed")) - { - seedSaved = TRUE; - savedSeed = .Random.seed - } - set.seed(randomSeed); - } - - if ( (consensusQuantile < 0) | (consensusQuantile > 1) ) - stop("'consensusQuantile' must be between 0 and 1."); - - if (checkMinModuleSize & (minModuleSize > nGenes/2)) - { - minModuleSize = nGenes/2; - warning("blockwiseConsensusModules: minModuleSize appeared too large and was lowered to", - minModuleSize, - ". If you insist on keeping the original minModuleSize, set checkMinModuleSize = FALSE."); - } - - if (verbose>0) - printFlush(paste(spaces, "Calculating consensus modules and module eigengenes", - "block-wise from all genes")); - - originalGeneNames = mtd.colnames(multiExpr); - originalSampleNames = mtd.apply(multiExpr, rownames); - - branchSplitFnc = NULL; - minBranchDissimilarities = numeric(0); - externalSplitFncNeedsDistance = logical(0); - if (useBranchEigennodeDissim) - { - branchSplitFnc = "mtd.branchEigengeneDissim"; - minBranchDissimilarities = minBranchEigennodeDissim; - externalSplitFncNeedsDistance = FALSE; - } - - if (!is.null(stabilityLabels)) - { - branchSplitFnc = c(branchSplitFnc, "branchSplitFromStabilityLabels"); - minBranchDissimilarities = c(minBranchDissimilarities, minStabilityDissim); - externalSplitFncNeedsDistance = c(externalSplitFncNeedsDistance, FALSE); - } - - # Basic checks on consensusTOMInfo - - if (!is.null(consensusTOMInfo)) - { - - .checkComponents(consensusTOMInfo, c("saveConsensusTOMs", "individualTOMInfo", "goodSamplesAndGenes")); - - if (length(consensusTOMInfo$individualTOMInfo$blocks)!=nGenes) - stop("Inconsistent number of genes in 'consensusTOMInfo$individualTOMInfo$blocks'."); - - if (!is.null(consensusTOMInfo$consensusQuantile) && - (consensusQuantile!=consensusTOMInfo$consensusQuantile) ) - warning(immediate. = TRUE, - "blockwiseConsensusModules: given (possibly default) 'consensusQuantile' is different\n", - "from the value recorded in 'consensusTOMInfo'. This is normally undesirable and may\n", - "indicate a mistake in the function call."); - } - - # Handle "other arguments" - - args = list(...); - if (is.null(args$reproduceBranchEigennodeQuantileError)) - { - reproduceBranchEigennodeQuantileError = FALSE; - } else reproduceBranchEigennodeQuantileError = args$reproduceBranchEigennodeQuantileError; - - # If topological overlaps weren't calculated yet, calculate them. - - removeIndividualTOMsOnExit = FALSE; - nBlocks.0 = length(unique(blocks)); - - if (is.null(individualTOMInfo)) - { - if (is.null(consensusTOMInfo)) - { - individualTOMInfo = blockwiseIndividualTOMs(multiExpr = multiExpr, - checkMissingData = checkMissingData, - blocks = blocks, - maxBlockSize = maxBlockSize, - blockSizePenaltyPower = blockSizePenaltyPower, - nPreclusteringCenters = nPreclusteringCenters, - randomSeed = randomSeed, - corType = corType, - maxPOutliers = maxPOutliers, - quickCor = quickCor, - pearsonFallback = pearsonFallback, - cosineCorrelation = cosineCorrelation, - power = power, - networkType = networkType, - replaceMissingAdjacencies= replaceMissingAdjacencies, - TOMType = TOMType, - TOMDenom = TOMDenom, - suppressNegativeTOM = suppressNegativeTOM, - saveTOMs = useDiskCache | nBlocks.0>1, - individualTOMFileNames = individualTOMFileNames, - nThreads = nThreads, - verbose = verbose, indent = indent); - removeIndividualTOMsOnExit = TRUE; - } else - individualTOMInfo = consensusTOMInfo$individualTOMInfo; - } - - if (is.null(useIndivTOMSubset)) - { - if (individualTOMInfo$nSets != nSets) - stop(paste("Number of sets in individualTOMInfo and in multiExpr do not agree.\n", - " To use a subset of individualTOMInfo, set useIndivTOMSubset appropriately.")); - - useIndivTOMSubset = c(1:nSets); - } - - if (length(useIndivTOMSubset)!=nSets) - stop("Length of 'useIndivTOMSubset' must equal the number of sets in 'multiExpr'"); - - if (length(unique(useIndivTOMSubset))!=nSets) - stop("Entries of 'useIndivTOMSubset' must be unique"); - - if (any(useIndivTOMSubset<1) | any(useIndivTOMSubset>individualTOMInfo$nSets)) - stop("All entries of 'useIndivTOMSubset' must be between 1 and the number of sets in individualTOMInfo"); - - # if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1."); - - intNetworkType = individualTOMInfo$intNetworkType; - intCorType = individualTOMInfo$intCorType; - - corFnc = match.fun(.corFnc[intCorType]); - corOptions = list(use = 'p'); - - fallback = pmatch(pearsonFallback, .pearsonFallbacks) - - nSamples = dataSize$nSamples; - - allLabels = rep(0, nGenes); - allLabelIndex = NULL; - - # Restrict data to goodSamples and goodGenes - - gsg = individualTOMInfo$goodSamplesAndGenes; - - # Restrict gsg to used sets - - gsg$goodSamples = gsg$goodSamples[useIndivTOMSubset]; - if (!gsg$allOK) - multiExpr = mtd.subset(multiExpr, gsg$goodSamples, gsg$goodGenes); - - # prepare scaled and imputed multiExpr. - multiExpr.scaled = mtd.apply(multiExpr, scale); - hasMissing = unlist(multiData2list(mtd.apply(multiExpr, function(x) { any(is.na(x)) }))); - # Impute those that have missing data - multiExpr.scaled.imputed = mtd.mapply(function(x, doImpute) - { if (doImpute) t(impute.knn(t(x))$data) else x }, - multiExpr.scaled, hasMissing); - - nGGenes = sum(gsg$goodGenes); - nGSamples = rep(0, nSets); - for (set in 1:nSets) nGSamples[set] = sum(gsg$goodSamples[[ set ]]); - - blocks = individualTOMInfo$blocks; - gBlocks = individualTOMInfo$gBlocks; - - blockLevels = sort(unique(gBlocks)); - blockSizes = table(gBlocks) - nBlocks = length(blockLevels); - - if (is.null(chunkSize)) chunkSize = as.integer(.largestBlockSize/nSets) - - reassignThreshold = reassignThresholdPS^nSets; - - consMEs = vector(mode = "list", length = nSets); - dendros = list(); - - maxUsedLabel = 0; - collectGarbage(); - # Here's where the analysis starts - - removeConsensusTOMOnExit = FALSE; - if (is.null(consensusTOMInfo) && (nBlocks==1 || saveConsensusTOMs || getNetworkCalibrationSamples)) - { - consensusTOMInfo = consensusTOM( - individualTOMInfo = individualTOMInfo, - useIndivTOMSubset = useIndivTOMSubset, - - networkCalibration = networkCalibration, - saveCalibratedIndividualTOMs = FALSE, - - calibrationQuantile = calibrationQuantile, - sampleForCalibration = sampleForCalibration, - sampleForCalibrationFactor = sampleForCalibrationFactor, - getNetworkCalibrationSamples = getNetworkCalibrationSamples, - - consensusQuantile = consensusQuantile, - useMean = useMean, - setWeights = setWeights, - - # Return options - saveConsensusTOMs = saveConsensusTOMs, - consensusTOMFilePattern = consensusTOMFilePattern, - returnTOMs = nBlocks==1, - - # Internal handling of TOMs - useDiskCache = useDiskCache, - chunkSize = chunkSize, - cacheBase = cacheBase, - cacheDir = cacheDir, - verbose = verbose, indent = indent); - removeConsensusTOMOnExit = !saveConsensusTOMs; - } - - blockwiseConsensusCalculation = is.null(consensusTOMInfo); - - for (blockNo in 1:nBlocks) - { - if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, ".")); - # Select block genes - block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]]; - nBlockGenes = length(block); - - selExpr = mtd.subset(multiExpr, , block); - errorOccurred = FALSE; - - if (blockwiseConsensusCalculation) - { - # This code is only reached if input saveConsensusTOMs is FALSE and there are at least 2 blocks. - consensusTOMInfo = consensusTOM( - individualTOMInfo = individualTOMInfo, - useIndivTOMSubset = useIndivTOMSubset, - - useBlocks = blockNo, - - networkCalibration = networkCalibration, - saveCalibratedIndividualTOMs = FALSE, - - calibrationQuantile = calibrationQuantile, - sampleForCalibration = sampleForCalibration, - sampleForCalibrationFactor = sampleForCalibrationFactor, - getNetworkCalibrationSamples = FALSE, - - consensusQuantile = consensusQuantile, - useMean = useMean, - setWeights = setWeights, - - saveConsensusTOMs = FALSE, - returnTOMs = TRUE, - - useDiskCache = useDiskCache, - chunkSize = chunkSize, - cacheBase = cacheBase, - cacheDir = cacheDir); - - consTomDS = consensusTOMInfo$consensusTOM[[1]]; - # Remove the consensus TOM from the structure. - consensusTOMInfo$consensusTOM[[1]] = NULL; - consensusTOMInfo$consensusTOM = NULL; - } else { - if (consensusTOMInfo$saveConsensusTOMs) - { - consTomDS = .loadObject(file = consensusTOMInfo$TOMFiles[blockNo], - size = nBlockGenes * (nBlockGenes-1)/2); - } else - consTomDS = consensusTOMInfo$consensusTOM[[blockNo]]; - } - - # Temporary "cast" so fastcluster::hclust doesn't complain about non-integer size. - - attr(consTomDS, "Size") = as.integer(attr(consTomDS, "Size")); - - consTomDS = 1-consTomDS; - - collectGarbage(); - - if (verbose>2) printFlush(paste(spaces, "....clustering and detecting modules..")); - errorOccured = FALSE; - dendros[[blockNo]] = fastcluster::hclust(consTomDS, method = "average"); - if (verbose > 8) - { - if (interactive()) - plot(dendros[[blockNo]], labels = FALSE, main = paste("Block", blockNo)); - } - - externalSplitOptions = list(); - e.index = 1; - if (useBranchEigennodeDissim) - { - externalSplitOptions[[e.index]] = list(multiExpr = mtd.subset(multiExpr.scaled.imputed,, block), - corFnc = corFnc, corOptions = corOptions, - consensusQuantile = consensusQuantile, - signed = networkType %in% c("signed", "signed hybrid"), - reproduceQuantileError = reproduceBranchEigennodeQuantileError); - e.index = e.index +1; - } - if (!is.null(stabilityLabels)) - { - externalSplitOptions[[e.index]] = list(stabilityLabels = stabilityLabels); - e.index = e.index + 1; - } - collectGarbage(); - #blockLabels = try(cutreeDynamic(dendro = dendros[[blockNo]], - blockLabels = cutreeDynamic(dendro = dendros[[blockNo]], - distM = as.matrix(consTomDS), - deepSplit = deepSplit, - cutHeight = detectCutHeight, minClusterSize = minModuleSize, - method ="hybrid", - maxCoreScatter = maxCoreScatter, minGap = minGap, - maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap, - minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight, - - externalBranchSplitFnc = branchSplitFnc, - minExternalSplit = minBranchDissimilarities, - externalSplitOptions = externalSplitOptions, - externalSplitFncNeedsDistance = externalSplitFncNeedsDistance, - assumeSimpleExternalSpecification = FALSE, - - pamStage = pamStage, pamRespectsDendro = pamRespectsDendro, - verbose = verbose-3, indent = indent + 2) - #verbose = verbose-3, indent = indent + 2), silent = TRUE); - if (verbose > 8) - { - print(table(blockLabels)); - if (interactive()) - plotDendroAndColors(dendros[[blockNo]], labels2colors(blockLabels), dendroLabels = FALSE, - main = paste("Block", blockNo)); - } - if (class(blockLabels)=='try-error') - { - (if (verbose>0) printFlush else warning) - (paste(spaces, "blockwiseConsensusModules: cutreeDynamic failed:\n ", spaces, - blockLabels, "\n", spaces, " Error occured in block", blockNo, "\n", - spaces, " Continuing with next block. ")); - next; - } else { - blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel; - maxUsedLabel = max(blockLabels); - } - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, "No modules detected in block", blockNo, - "--> continuing with next block.")) - } - next; - } - - # Calculate eigengenes for this batch - - if (verbose>2) printFlush(paste(spaces, "....calculating eigengenes..")); - blockAssigned = c(1:nBlockGenes)[blockLabels!=0]; - blockLabelIndex = as.numeric(levels(as.factor(blockLabels[blockAssigned]))); - blockConsMEs = try(multiSetMEs(selExpr, universalColors = blockLabels, - excludeGrey = TRUE, grey = 0, impute = impute, - # trapErrors = TRUE, returnValidOnly = TRUE, - verbose = verbose-4, indent = indent + 3), silent = TRUE); - if (class(blockConsMEs)=='try-error') - { - if (verbose>0) - { - printFlush(paste(spaces, "*** multiSetMEs failed with the message:")); - printFlush(paste(spaces, " ", blockConsMEs)); - printFlush(paste(spaces, "*** --> Ending module detection here")); - } else warning(paste("blocwiseConsensusModules: multiSetMEs failed with the message: \n", - " ", blockConsMEs, "\n--> continuing with next block.")); - next; - } - - deleteModules = NULL; - changedModules = NULL; - - # find genes whose closest module eigengene has cor higher than minKMEtoJoin and assign them - # Removed - should not change blocks before clustering them - #unassGenes = c(c(1:nGGenes)[-block][allLabels[-block]==0], block[blockLabels==0]); - #if (length(unassGenes) > 0) - #{ - #blockKME = array(0, dim = c(length(unassGenes), ncol(blockConsMEs[[1]]$data), nSets)); - #corEval = parse(text = paste(.corFnc[intCorType], - #"( multiExpr[[set]]$data[, unassGenes], blockConsMEs[[set]]$data,", - #.corOptions[intCorType], ")")) - #for (set in 1:nSets) blockKME[, , set] = eval(corEval); - #if (intNetworkType==1) blockKME = abs(blockKME); - #consKME = as.matrix(apply(blockKME, c(1,2), min)); - #consKMEmax = apply(consKME, 1, max); - #closestModule = blockLabelIndex[apply(consKME, 1, which.max)]; - #assign = (consKMEmax >= minKMEtoJoin ); - #if (sum(assign>0)) - #{ - #allLabels[unassGenes[assign]] = closestModule[assign]; - #changedModules = union(changedModules, closestModule[assign]); - #} - #rm(blockKME, consKME, consKMEmax); - #} - - collectGarbage(); - - # Check modules: make sure that of the genes present in the module, at least a minimum number - # have a correlation with the eigengene higher than a given cutoff, and that all member genes have - # the required minimum consensus KME - - if (verbose>2) - printFlush(paste(spaces, "....checking consensus modules for statistical meaningfulness..")); - - for (mod in 1:ncol(blockConsMEs[[1]]$data)) - { - modGenes = (blockLabels==blockLabelIndex[mod]); - KME = matrix(0, nrow = sum(modGenes), ncol = nSets); - corEval = parse(text = paste(.corFnc[intCorType], - "( selExpr[[set]]$data[, modGenes], blockConsMEs[[set]]$data[, mod]", - prepComma(.corOptions[intCorType]), ")")) - for (set in 1:nSets) KME[, set] = as.vector(eval(corEval)); - if (intNetworkType==1) KME = abs(KME); - consKME = apply(KME, 1, quantile, probs = trimmingConsensusQuantile, names = FALSE, na.rm = TRUE); - if (sum(consKME>minCoreKME) < minCoreKMESize) - { - blockLabels[modGenes] = 0; - deleteModules = union(deleteModules, mod); - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod], - ": of ", sum(modGenes), - " total genes in the module only ", sum(consKME>minCoreKME), - " have the requisite high correlation with the eigengene in all sets.", sep="")); - } else if (sum(consKME0) - { - if (verbose > 3) - printFlush(paste(spaces, " ..removing", sum(consKME0) < minModuleSize) - { - deleteModules = union(deleteModules, mod); - blockLabels[modGenes] = 0; - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod], - ": not enough genes in the module after removal of low KME genes.", sep="")); - } else { - changedModules = union(changedModules, blockLabelIndex[mod]); - } - } - } - - # Remove marked modules - - if (!is.null(deleteModules)) - { - for (set in 1:nSets) blockConsMEs[[set]]$data = blockConsMEs[[set]]$data[, -deleteModules]; - modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules])); - blockLabels[modGenes] = 0; - modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules])); - allLabels[modAllGenes] = 0; - blockLabelIndex = blockLabelIndex[-deleteModules]; - } - - # Check whether there's anything left - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, " ..No significant modules detected in block", blockNo)) - printFlush(paste(spaces, " ..continuing with next block.")); - } - next; - } - - # Update module eigengenes - - for (set in 1:nSets) - if (is.null(dim(blockConsMEs[[set]]$data))) - dim(blockConsMEs[[set]]$data) = c(length(blockConsMEs[[set]]$data), 1); - - if (is.null(consMEs[[1]])) - { - for (set in 1:nSets) consMEs[[set]] = list(data = blockConsMEs[[set]]$data); - } else for (set in 1:nSets) - consMEs[[set]]$data = cbind(consMEs[[set]]$data, blockConsMEs[[set]]$data); - - # Update allLabels - - allLabelIndex = c(allLabelIndex, blockLabelIndex); - allLabels[gsg$goodGenes][block[blockAssigned]] = blockLabels[blockAssigned]; - - collectGarbage(); - - } - - # Check whether any of the already assigned genes (in this or previous blocks) should be re-assigned - - if (verbose>2) - printFlush(paste(spaces, "....checking for genes that should be reassigned..")); - - deleteModules = NULL; - goodLabels = allLabels[gsg$goodGenes]; - if (sum(goodLabels!=0) > 0) - { - propLabels = goodLabels[goodLabels!=0]; - assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0]; - corEval = parse(text = paste(.corFnc[intCorType], - "(multiExpr[[set]]$data[, goodLabels!=0], consMEs[[set]]$data", - prepComma(.corOptions[intCorType]), ")")); - nMods = ncol(consMEs[[1]]$data); - lpValues = array(0, dim = c(length(propLabels), nMods, nSets)); - sumSign = array(0, dim = c(length(propLabels), nMods)); - if (verbose>3) - printFlush(paste(spaces, "......module membership p-values..")); - for (set in 1:nSets) - { - KME = eval(corEval); - if (intNetworkType == 1) KME = abs(KME) - lpValues[,,set] = -2*log(corPvalueFisher(KME, nGSamples[set], twoSided = FALSE)); - sumSign = sumSign + sign(KME); - } - if (verbose>3) - printFlush(paste(spaces, "......module membership scores..")); - scoreAll = as.matrix(apply(lpValues, c(1,2), sum)) * (nSets + sumSign)/(2*nSets); - scoreAll[!is.finite(scoreAll)] = 0.001 # This low should be enough - bestScore = apply(scoreAll, 1, max); - if (intNetworkType==1) sumSign = abs(sumSign); - if (verbose>3) - { - cat(paste(spaces, "......individual modules..")); - pind = initProgInd(); - } - for (mod in 1:nMods) - { - modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]]; - scoreMod = scoreAll[modGenes, mod]; - candidates = (bestScore[modGenes] > scoreMod); - candidates[!is.finite(candidates)] = FALSE; - if (sum(candidates) > 0) - { - pModule = pchisq(scoreMod[candidates], nSets, log.p = TRUE) - whichBest = apply(scoreAll[modGenes[candidates], ], 1, which.max); - pBest = pchisq(bestScore[modGenes[candidates]], nSets, log.p = TRUE); - reassign = ifelse(is.finite(pBest - pModule), - ( (pBest - pModule) < log(reassignThreshold) ), - FALSE); - if (sum(reassign)>0) - { - allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign]; - changedModules = union(changedModules, whichBest[reassign]); - if (length(modGenes)-sum(reassign) < minModuleSize) - { - deleteModules = union(deleteModules, mod); - } else - changedModules = union(changedModules, mod); - } - } - if (verbose > 3) pind = updateProgInd(mod/nMods, pind); - } - rm(lpValues, sumSign, scoreAll); - if (verbose > 3) printFlush(""); - } - - # Remove marked modules - - if (!is.null(deleteModules)) - { - # for (set in 1:nSets) consMEs[[set]]$data = consMEs[[set]]$data[, -deleteModules]; - modGenes = is.finite(match(allLabels, allLabelIndex[deleteModules])); - allLabels[modGenes] = 0; - # allLabelIndex = allLabelIndex[-deleteModules]; - } - - if (verbose>1) printFlush(paste(spaces, "..merging consensus modules that are too close..")); - #print(table(allLabels)); - #print(is.numeric(allLabels)) - if (numericLabels) { - colors = allLabels - } else { - colors = labels2colors(allLabels) - } - mergedColors = colors; - mergedMods = try(mergeCloseModules(multiExpr, colors[gsg$goodGenes], - equalizeQuantiles = equalizeQuantilesForModuleMerging, - quantileSummary = quantileSummaryForModuleMerging, - consensusQuantile = mergeConsensusQuantile, - cutHeight = mergeCutHeight, - relabel = TRUE, impute = impute, - verbose = verbose-2, indent = indent + 2), silent = TRUE); - if (class(mergedMods)=='try-error') - { - if (verbose>0) - { - printFlush(paste(spaces, 'blockwiseConsensusModules: mergeCloseModule failed with this message:\n', - spaces, ' ', mergedMods, spaces, - '---> returning unmerged consensus modules')); - } else warning(paste('blockwiseConsensusModules: mergeCloseModule failed with this message:\n ', - mergedMods, '---> returning unmerged consensus modules')); - MEs = try(multiSetMEs(multiExpr, universalColors = colors[gsg$goodGenes] - # trapErrors = TRUE, returnValidOnly = TRUE - ), silent = TRUE); - if (class(MEs)=='try-error') - { - warning(paste('blockwiseConsensusModules: ME calculation failed with this message:\n ', - MEs, '---> returning empty module eigengenes')); - allSampleMEs = NULL; - } else { - if (!MEs[[1]]$allOK) mergedColors[gsg$goodGenes] = MEs[[1]]$validColors; - allSampleMEs = vector(mode = "list", length = nSets); - names(allSampleMEs) = names(multiExpr); - for (set in 1:nSets) - { - allSampleMEs[[set]] = - list(data = as.data.frame(matrix(NA, nrow = nSamples[set], ncol = ncol(MEs[[set]]$data)))); - allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = MEs[[set]]$data[,]; - names(allSampleMEs[[set]]$data) = names(MEs[[set]]$data); - rownames(allSampleMEs[[set]]$data) = originalSampleNames[[set]]$data; - } - } - } else { - mergedColors[gsg$goodGenes] = mergedMods$colors; - allSampleMEs = vector(mode = "list", length = nSets); - names(allSampleMEs) = names(multiExpr); - for (set in 1:nSets) - { - allSampleMEs[[set]] = - list(data = as.data.frame(matrix(NA, nrow = nSamples[set], - ncol = ncol(mergedMods$newMEs[[1]]$data)))); - allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = mergedMods$newMEs[[set]]$data[,]; - names(allSampleMEs[[set]]$data) = names(mergedMods$newMEs[[set]]$data); - rownames(allSampleMEs[[set]]$data) = originalSampleNames[[set]]$data; - } - } - - if (seedSaved) .Random.seed <<- savedSeed; - - if (removeConsensusTOMOnExit) - { - .checkAndDelete(consensusTOMInfo$TOMFiles); - consensusTOMInfo$TOMFiles = NULL; - } - - if (removeIndividualTOMsOnExit) - { - .checkAndDelete(individualTOMInfo$actualTOMFileNames); - individualTOMInfo$actualTOMFileNames = NULL; - } - - # Under no circumstances return consensus TOM or individual TOM similarities within the returned list. - consensusTOMInfo$consensusTOM = NULL; - individualTOMInfo$TOMSimilarities = NULL - - names(mergedColors) = names(colors) = originalGeneNames; - list(colors = mergedColors, - unmergedColors = colors, - multiMEs = allSampleMEs, - goodSamples = gsg$goodSamples, - goodGenes = gsg$goodGenes, - dendrograms = dendros, - TOMFiles = consensusTOMInfo$TOMFiles, - blockGenes = individualTOMInfo$blockGenes, - blocks = blocks, - originCount = consensusTOMInfo$originCount, - networkCalibrationSamples = consensusTOMInfo$networkCalibrationSamples, - individualTOMInfo = individualTOMInfo, - consensusTOMInfo = if (saveConsensusTOMs) consensusTOMInfo else NULL, - consensusQuantile = consensusQuantile - ) -} - - -#========================================================================================================== -# -# recutConsensusTrees -# -#========================================================================================================== - -recutConsensusTrees = function(multiExpr, - goodSamples, goodGenes, - blocks, - TOMFiles, - dendrograms, - corType = "pearson", - networkType = "unsigned", - deepSplit = 2, - detectCutHeight = 0.995, minModuleSize = 20, - checkMinModuleSize = TRUE, - maxCoreScatter = NULL, minGap = NULL, - maxAbsCoreScatter = NULL, minAbsGap = NULL, - minSplitHeight = NULL, minAbsSplitHeight = NULL, - - useBranchEigennodeDissim = FALSE, - minBranchEigennodeDissim = mergeCutHeight, - - pamStage = TRUE, pamRespectsDendro = TRUE, - # minKMEtoJoin =0.7, - trimmingConsensusQuantile = 0, - minCoreKME = 0.5, minCoreKMESize = minModuleSize/3, - minKMEtoStay = 0.2, - reassignThresholdPS = 1e-4, - mergeCutHeight = 0.15, - mergeConsensusQuantile = trimmingConsensusQuantile, - impute = TRUE, - trapErrors = FALSE, - numericLabels = FALSE, - verbose = 2, indent = 0) -{ - spaces = indentSpaces(indent); - - dataSize = checkSets(multiExpr); - nSets = dataSize$nSets; - nGenes = dataSize$nGenes; - nSamples = dataSize$nSamples; - - originalGeneNames = mtd.colnames(multiExpr); - originalSampleNames = mtd.apply(multiExpr, rownames); - - if (length(blocks)!=nGenes) - stop("Input error: length of 'blocks' must equal number of genes in 'multiExpr'."); - - #if (verbose>0) - # printFlush(paste(spaces, "Calculating consensus modules and module eigengenes", - # "block-wise from all genes")); - - intCorType = pmatch(corType, .corTypes); - if (is.na(intCorType)) - stop(paste("Invalid 'corType'. Recognized values are", paste(.corTypes, collapse = ", "))) - - # if ( (minKMEtoJoin >1) | (minKMEtoJoin <0) ) stop("minKMEtoJoin must be between 0 and 1."); - - intNetworkType = charmatch(networkType, .networkTypes); - if (is.na(intNetworkType)) - stop(paste("Unrecognized networkType argument.", - "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", "))); - - allLabels = rep(0, nGenes); - allLabelIndex = NULL; - - corFnc = match.fun(.corFnc[intCorType]); - corOptions = list(use = 'p'); - - # Get rid of bad genes and bad samples - - gsg = list(goodGenes = goodGenes, goodSamples = goodSamples, allOK = TRUE); - - gsg$allOK = (sum(!gsg$goodGenes)==0); - nGGenes = sum(gsg$goodGenes); - nGSamples = rep(0, nSets); - for (set in 1:nSets) - { - nGSamples[set] = sum(gsg$goodSamples[[set]]); - gsg$allOK = gsg$allOK && (sum(!gsg$goodSamples[[set]])==0); - } - - if (!gsg$allOK) - for (set in 1:nSets) - multiExpr[[set]]$data = multiExpr[[set]]$data[gsg$goodSamples[[set]], gsg$goodGenes]; - - gBlocks = blocks[gsg$goodGenes]; - - blockLevels = as.numeric(levels(factor(gBlocks))); - blockSizes = table(gBlocks) - nBlocks = length(blockLevels); - - reassignThreshold = reassignThresholdPS^nSets; - - # prepare scaled and imputed multiExpr. - multiExpr.scaled = mtd.apply(multiExpr, scale); - hasMissing = unlist(multiData2list(mtd.apply(multiExpr, function(x) { any(is.na(x)) }))); - # Impute those that have missing data - multiExpr.scaled.imputed = mtd.mapply(function(x, doImpute) - { if (doImpute) t(impute.knn(t(x))$data) else x }, - multiExpr.scaled, hasMissing); - if (useBranchEigennodeDissim) - { - branchSplitFnc = "mtd.branchEigengeneDissim"; - } else - branchSplitFnc = NULL; - - # Initialize various variables - - consMEs = vector(mode = "list", length = nSets); - - blockNo = 1; - maxUsedLabel = 0; - collectGarbage(); - # Here's where the analysis starts - - while (blockNo <= nBlocks) - { - if (verbose>1) printFlush(paste(spaces, "..Working on block", blockNo, ".")); - # Select most connected genes - block = c(1:nGGenes)[gBlocks==blockLevels[blockNo]]; - nBlockGenes = length(block); - selExpr = vector(mode = "list", length = nSets); - for (set in 1:nSets) - selExpr[[set]] = list(data = multiExpr[[set]]$data[, block]); - - # This is how TOMs are saved: - #if (saveTOMs) - #{ - # TOMFiles[blockNo] = paste(saveTOMFileBase, "-block.", blockNo, ".RData", sep=""); - # save(consTomDS, file = TOMFiles[blockNo]); - #} - #consTomDS = 1-consTomDS; - #collectGarbage(); - - xx = try(load(TOMFiles[blockNo]), silent = TRUE); - if (class(xx)=='try-error') - { - printFlush(paste("************\n File name", TOMFiles[blockNo], - "appears invalid: the load function returned the following error:\n ", - xx)); - stop(); - } - if (xx!='consTomDS') - stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate variable.")); - - if (class(consTomDS)!="dist") - stop(paste("The file", TOMFiles[blockNo], "does not contain the appopriate distance structure.")); - - consTomDS = 1-consTomDS; - collectGarbage(); - - if (verbose>2) printFlush(paste(spaces, "....clustering and detecting modules..")); - errorOccured = FALSE; - - blockLabels = try(cutreeDynamic(dendro = dendrograms[[blockNo]], - deepSplit = deepSplit, - cutHeight = detectCutHeight, minClusterSize = minModuleSize, - method ="hybrid", - maxCoreScatter = maxCoreScatter, minGap = minGap, - maxAbsCoreScatter = maxAbsCoreScatter, minAbsGap = minAbsGap, - minSplitHeight = minSplitHeight, minAbsSplitHeight = minAbsSplitHeight, - - externalBranchSplitFnc = if (useBranchEigennodeDissim) - branchSplitFnc else NULL, - minExternalSplit = minBranchEigennodeDissim, - externalSplitOptions = list(multiExpr = mtd.subset(multiExpr.scaled.imputed, , block), - corFnc = corFnc, corOptions = corOptions, - consensusQuantile = mergeConsensusQuantile, - signed = networkType %in% c("signed", "signed hybrid")), - externalSplitFncNeedsDistance = FALSE, - - pamStage = pamStage, pamRespectsDendro = pamRespectsDendro, - distM = as.matrix(consTomDS), - verbose = verbose-3, indent = indent + 2), silent = TRUE); - if (class(blockLabels)=='try-error') - { - (if (verbose>0) printFlush else warning) - (paste(spaces, "blockwiseConsensusModules: cutreeDynamic failed:\n ", - blockLabels, "\nError occured in block", blockNo, "\nContinuing with next block.")); - next; - } else { - blockLabels[blockLabels>0] = blockLabels[blockLabels>0] + maxUsedLabel; - maxUsedLabel = max(blockLabels); - } - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, "No modules detected in block", blockNo)) - printFlush(paste(spaces, " Continuing with next block.")) - } - next; - } - - # Calculate eigengenes for this batch - - if (verbose>2) printFlush(paste(spaces, "....calculating eigengenes..")); - blockAssigned = c(1:nBlockGenes)[blockLabels!=0]; - blockLabelIndex = as.numeric(levels(as.factor(blockLabels[blockAssigned]))); - blockConsMEs = try(multiSetMEs(selExpr, universalColors = blockLabels, - excludeGrey = TRUE, grey = 0, impute = impute, - # trapErrors = TRUE, returnValidOnly = TRUE, - verbose = verbose-4, indent = indent + 3), silent = TRUE); - if (class(blockConsMEs)=='try-error') - { - if (verbose>0) - { - printFlush(paste(spaces, "*** multiSetMEs failed with the message:")); - printFlush(paste(spaces, " ", blockConsMEs)); - printFlush(paste(spaces, "*** --> Ending module detection here")); - } else warning(paste("blocwiseConsensusModules: multiSetMEs failed with the message: \n", - " ", blockConsMEs, "\n--> Continuing with next block.")); - next; - } - - deleteModules = NULL; - changedModules = NULL; - - # find genes whose closest module eigengene has cor higher than minKMEtoJoin and assign them - # Removed - should not change blocks before clustering them - #unassGenes = c(c(1:nGGenes)[-block][allLabels[-block]==0], block[blockLabels==0]); - #if (length(unassGenes) > 0) - #{ - #blockKME = array(0, dim = c(length(unassGenes), ncol(blockConsMEs[[1]]$data), nSets)); - #corEval = parse(text = paste(.corFnc[intCorType], - #"( multiExpr[[set]]$data[, unassGenes], blockConsMEs[[set]]$data,", - #.corOptions[intCorType], ")")) - #for (set in 1:nSets) blockKME[, , set] = eval(corEval); - #if (intNetworkType==1) blockKME = abs(blockKME); - #consKME = as.matrix(apply(blockKME, c(1,2), min)); - #consKMEmax = apply(consKME, 1, max); - #closestModule = blockLabelIndex[apply(consKME, 1, which.max)]; - #assign = (consKMEmax >= minKMEtoJoin ); - #if (sum(assign>0)) - #{ - #allLabels[unassGenes[assign]] = closestModule[assign]; - #changedModules = union(changedModules, closestModule[assign]); - #} - #rm(blockKME, consKME, consKMEmax); - #} - - collectGarbage(); - - # Check modules: make sure that of the genes present in the module, at least a minimum number - # have a correlation with the eigengene higher than a given cutoff, and that all member genes have - # the required minimum consensus KME - - if (verbose>2) - printFlush(paste(spaces, "....checking consensus modules for statistical meaningfulness..")); - - for (mod in 1:ncol(blockConsMEs[[1]]$data)) - { - modGenes = (blockLabels==blockLabelIndex[mod]); - KME = matrix(0, nrow = sum(modGenes), ncol = nSets); - corEval = parse(text = paste(.corFnc[intCorType], - "( selExpr[[set]]$data[, modGenes], blockConsMEs[[set]]$data[, mod]", - prepComma(.corOptions[intCorType]), ")")) - for (set in 1:nSets) KME[, set] = as.vector(eval(corEval)); - if (intNetworkType==1) KME = abs(KME); - consKME = apply(KME, 1, quantile, probs = trimmingConsensusQuantile, na.rm = TRUE); - if (sum(consKME>minCoreKME) < minCoreKMESize) - { - blockLabels[modGenes] = 0; - deleteModules = union(deleteModules, mod); - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod], - ": of ", sum(modGenes), - " total genes in the module only ", sum(consKME>minCoreKME), - " have the requisite high correlation with the eigengene in all sets.", sep="")); - } else if (sum(consKME0) - { - if (verbose > 3) - printFlush(paste(spaces, " ..removing", sum(consKME0) < minModuleSize) - { - deleteModules = union(deleteModules, mod); - blockLabels[modGenes] = 0; - if (verbose>3) - printFlush(paste(spaces, " ..deleting module ",blockLabelIndex[mod], - ": not enough genes in the module after removal of low KME genes.", sep="")); - } else { - changedModules = union(changedModules, blockLabelIndex[mod]); - } - } - } - - # Remove marked modules - - if (!is.null(deleteModules)) - { - for (set in 1:nSets) blockConsMEs[[set]]$data = blockConsMEs[[set]]$data[, -deleteModules]; - modGenes = is.finite(match(blockLabels, blockLabelIndex[deleteModules])); - blockLabels[modGenes] = 0; - modAllGenes = is.finite(match(allLabels, blockLabelIndex[deleteModules])); - allLabels[modAllGenes] = 0; - blockLabelIndex = blockLabelIndex[-deleteModules]; - } - - # Check whether there's anything left - if (sum(blockLabels>0)==0) - { - if (verbose>1) - { - printFlush(paste(spaces, " No significant modules detected in block", blockNo)) - printFlush(paste(spaces, " Continuing with next block.")); - } - next; - } - - # Update module eigengenes - - for (set in 1:nSets) - if (is.null(dim(blockConsMEs[[set]]$data))) - dim(blockConsMEs[[set]]$data) = c(length(blockConsMEs[[set]]$data), 1); - - if (is.null(consMEs[[1]])) - { - for (set in 1:nSets) consMEs[[set]] = list(data = blockConsMEs[[set]]$data); - } else for (set in 1:nSets) - consMEs[[set]]$data = cbind(consMEs[[set]]$data, blockConsMEs[[set]]$data); - - # Update allLabels - - allLabelIndex = c(allLabelIndex, blockLabelIndex); - allLabels[block[blockAssigned]] = blockLabels[blockAssigned]; - - collectGarbage(); - - blockNo = blockNo + 1; - - } - - # Check whether any of the already assigned genes (in this or previous blocks) should be re-assigned - - if (verbose>2) - printFlush(paste(spaces, "....checking for genes that should be reassigned..")); - - deleteModules = NULL; - goodLabels = allLabels[gsg$goodGenes]; - if (sum(goodLabels!=0) > 0) - { - propLabels = goodLabels[goodLabels!=0]; - assGenes = (c(1:nGenes)[gsg$goodGenes])[goodLabels!=0]; - corEval = parse(text = paste(.corFnc[intCorType], - "(multiExpr[[set]]$data[, goodLabels!=0], consMEs[[set]]$data", - prepComma(.corOptions[intCorType]), ")")); - nMods = ncol(consMEs[[1]]$data); - lpValues = array(0, dim = c(length(propLabels), nMods, nSets)); - sumSign = array(0, dim = c(length(propLabels), nMods)); - if (verbose>3) - printFlush(paste(spaces, "......module membership p-values..")); - for (set in 1:nSets) - { - KME = eval(corEval); - if (intNetworkType == 1) KME = abs(KME) - lpValues[,,set] = -2*log(corPvalueFisher(KME, nGSamples[set], twoSided = FALSE)); - sumSign = sumSign + sign(KME); - } - if (verbose>3) - printFlush(paste(spaces, "......module membership scores..")); - scoreAll = as.matrix(apply(lpValues, c(1,2), sum)) * (nSets + sumSign)/(2*nSets); - scoreAll[!is.finite(scoreAll)] = 0.001 # This low should be enough - bestScore = apply(scoreAll, 1, max); - if (intNetworkType==1) sumSign = abs(sumSign); - if (verbose>3) - { - cat(paste(spaces, "......individual modules..")); - pind = initProgInd(); - } - for (mod in 1:nMods) - { - modGenes = c(1:length(propLabels))[propLabels==allLabelIndex[mod]]; - scoreMod = scoreAll[modGenes, mod]; - candidates = (bestScore[modGenes] > scoreMod); - candidates[!is.finite(candidates)] = FALSE; - if (sum(candidates) > 0) - { - pModule = pchisq(scoreMod[candidates], nSets, log.p = TRUE) - whichBest = apply(scoreAll[modGenes[candidates], ], 1, which.max); - pBest = pchisq(bestScore[modGenes[candidates]], nSets, log.p = TRUE); - reassign = ifelse(is.finite(pBest - pModule), - ( (pBest - pModule) < log(reassignThreshold) ), - FALSE); - if (sum(reassign)>0) - { - allLabels[assGenes[modGenes[candidates][reassign]]] = whichBest[reassign]; - changedModules = union(changedModules, whichBest[reassign]); - if (length(modGenes)-sum(reassign) < minModuleSize) - { - deleteModules = union(deleteModules, mod); - } else - changedModules = union(changedModules, mod); - } - } - if (verbose > 3) pind = updateProgInd(mod/nMods, pind); - } - rm(lpValues, sumSign, scoreAll); - if (verbose > 3) printFlush(""); - } - - # Remove marked modules - - if (!is.null(deleteModules)) - { - # for (set in 1:nSets) consMEs[[set]]$data = consMEs[[set]]$data[, -deleteModules]; - modGenes = is.finite(match(allLabels, allLabelIndex[deleteModules])); - allLabels[modGenes] = 0; - # allLabelIndex = allLabelIndex[-deleteModules]; - } - - if (verbose>1) printFlush(paste(spaces, "..merging consensus modules that are too close..")); - #print(table(allLabels)); - #print(is.numeric(allLabels)) - if (numericLabels) { - colors = allLabels - } else { - colors = labels2colors(allLabels) - } - mergedColors = colors; - mergedMods = try(mergeCloseModules(multiExpr, colors[gsg$goodGenes], - consensusQuantile = mergeConsensusQuantile, - cutHeight = mergeCutHeight, - relabel = TRUE, impute = impute, - verbose = verbose-2, indent = indent + 2), silent = TRUE); - if (class(mergedMods)=='try-error') - { - if (verbose>0) - { - printFlush(paste(spaces, 'blockwiseConsensusModules: mergeCloseModule failed with this message:\n', - spaces, ' ', mergedMods, spaces, - '---> returning unmerged consensus modules')); - } else warning(paste('blockwiseConsensusModules: mergeCloseModule failed with this message:\n ', - mergedMods, '---> returning unmerged consensus modules')); - MEs = try(multiSetMEs(multiExpr, universalColors = colors[gsg$goodGenes] - # trapErrors = TRUE, returnValidOnly = TRUE - ), silent = TRUE); - if (class(MEs)=='try-error') - { - warning(paste('blockwiseConsensusModules: ME calculation failed with this message:\n ', - MEs, '---> returning empty module eigengenes')); - allSampleMEs = NULL; - } else { - if (!MEs[[1]]$allOK) mergedColors[gsg$goodGenes] = MEs[[1]]$validColors; - allSampleMEs = vector(mode = "list", length = nSets); - names(allSampleMEs) = names(multiExpr); - for (set in 1:nSets) - { - allSampleMEs[[set]] = - list(data = as.data.frame(matrix(NA, nrow = nSamples[set], ncol = ncol(MEs[[set]]$data)))); - allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = MEs[[set]]$data[,]; - names(allSampleMEs[[set]]$data) = names(MEs[[set]]$data); - rownames(allSampleMEs[[set]]$data) = originalSampleNames[[set]]$data; - } - } - } else { - mergedColors[gsg$goodGenes] = mergedMods$colors; - allSampleMEs = vector(mode = "list", length = nSets); - names(allSampleMEs) = names(multiExpr); - for (set in 1:nSets) - { - allSampleMEs[[set]] = - list(data = as.data.frame(matrix(NA, nrow = nSamples[set], - ncol = ncol(mergedMods$newMEs[[1]]$data)))); - allSampleMEs[[set]]$data[gsg$goodSamples[[set]], ] = mergedMods$newMEs[[set]]$data[,]; - names(allSampleMEs[[set]]$data) = names(mergedMods$newMEs[[set]]$data); - rownames(allSampleMEs[[set]]$data) = originalSampleNames[[set]]$data; - } - } - - names(mergedColors) = names(colors) = originalGeneNames; - list(colors = mergedColors, - unmergedColors = colors, - multiMEs = allSampleMEs - # goodSamples = gsg$goodSamples, - # goodGenes = gsg$goodGenes, - # dendrograms = dendros, - # blockGenes = blockGenes, - # originCount = originCount, - # TOMFiles = TOMFiles - ); -} - -#=================================================================================================================== -# -# Preliminary clustering via projective k-means -# -#=================================================================================================================== - -projectiveKMeans = function ( - datExpr, - preferredSize = 5000, - nCenters = as.integer(min(ncol(datExpr)/20, preferredSize^2/ncol(datExpr))), - sizePenaltyPower = 4, - networkType = "unsigned", - randomSeed = 54321, - checkData = TRUE, - imputeMissing = TRUE, - maxIterations = 1000, - verbose = 0, indent = 0 ) -{ - - centerGeneDist = function(centers, oldDst = NULL, changed = c(1:nCenters), - blockSize = 50000, verbose = 0, spaces = "") - { - if (is.null(oldDst)) - { - oldDst = array(0, c(nCenters, nGenes)); - changed = c(1:nCenters); - } - dstAll = oldDst; - nChanged = length(changed) - - nBlocks = ceiling(ncol(datExpr)/blockSize); - blocks = allocateJobs(ncol(datExpr), nBlocks); - - if (verbose > 5) - pind = initProgInd(spaste(spaces, " ..centerGeneDist: ")); - - for (b in 1:nBlocks) - { - if (intNetworkType==1) - { - dst = 1-abs(cor(centers[, changed], datExpr[, blocks[[b]] ])); - } else { - dst = 1-cor(centers[, changed], datExpr[, blocks[[b]] ]); - } - dstAll[changed, blocks[[b]]] = dst; - if (verbose > 5) pind = updateProgInd(b/nBlocks, pind); - } - dstAll; - } - - memberCenterDist = function(dst, membership, sizes = NULL, changed = c(1:nCenters), oldMCDist = NULL) - { - if (is.null(oldMCDist)) - { - changed = c(1:nCenters); - oldMCDist = rep(0, nGenes); - } - centerDist = oldMCDist; - if (!is.null(changed)) - { - if (is.null(sizes)) sizes = table(membership) - if (length(sizes)!=nCenters) - { - sizes2 = rep(0, nCenters); - sizes2[as.numeric(names(sizes))] = sizes; - sizes = sizes2; - } - if (is.finite(sizePenaltyPower)) - { - sizeCorrections = (sizes/preferredSize)^sizePenaltyPower; - sizeCorrections[sizeCorrections < 1] = 1; - } else { - sizeCorrections = rep(1, length(sizes)); - sizeCorrections[sizes > preferredSize] = Inf; - } - for (cen in changed) if (sizes[cen]!=0) - { - if (is.finite(sizeCorrections[cen])) - { - centerDist[membership==cen] = dst[cen, membership==cen] * sizeCorrections[cen]; - } else - centerDist[membership==cen] = 10 + dst[cen, membership==cen]; - } - } - centerDist; - } - - spaces = indentSpaces(indent); - if (verbose > 0) - printFlush(paste(spaces, "Projective K-means:")); - - datExpr = scale(as.matrix(datExpr)); - - if (any(is.na(datExpr))) - { - if (imputeMissing) - { - printFlush(spaste(spaces, "projectiveKMeans: imputing missing data in 'datExpr'.\n", - "To reproduce older results, use 'imputeMissing = FALSE'. ")); - datExpr = t(impute.knn(t(datExpr))$data); - } else { - printFlush(spaste(spaces, "projectiveKMeans: there are missing data in 'datExpr'.\n", - "SVD will not work; will use a weighted mean approximation.")); - } - } - - #if (preferredSize >= floor(sqrt(2^31)) ) - # stop("'preferredSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.") - - if (preferredSize >= sqrt(2^31)-1) - printFlush(spaste( - spaces, " Support for blocks larger than sqrt(2^31) is experimental; please report\n", - spaces, " any issues to Peter.Langfelder@gmail.com.")); - - if (exists(".Random.seed")) - { - seedSaved = TRUE; - savedSeed = .Random.seed - } else seedSaved = FALSE; - set.seed(randomSeed); - - nAllSamples = nrow(datExpr); - nAllGenes = ncol(datExpr); - - intNetworkType = charmatch(networkType, .networkTypes); - if (is.na(intNetworkType)) - stop(paste("Unrecognized networkType argument.", - "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", "))); - - if (verbose > 1) - printFlush(paste(spaces, "..using", nCenters, "centers.")); - - # Check data for genes and samples that have too many missing values - - if (checkData) - { - if (verbose > 0) - printFlush(paste(spaces, "..checking data for excessive number of missing values..")); - gsg = goodSamplesGenes(datExpr, verbose = verbose -1, indent = indent + 1) - if (!gsg$allOK) datExpr = datExpr[gsg$goodSamples, gsg$goodGenes]; - } - nGenes = ncol(datExpr); - nSamples = nrow(datExpr); - - datExpr[is.na(datExpr)] = 0; - - centers = matrix(0, nSamples, nCenters); - - randGeneIndex = sample(nGenes, size = nGenes); - temp = rep(c(1:nCenters), times = ceiling(nGenes/nCenters)); - membership = temp[randGeneIndex]; - - if (verbose > 0) - printFlush(paste(spaces, "..k-means clustering..")); - - changed = c(1:nCenters); dst = NULL; centerDist = NULL; - iteration = 0; - while (!is.null(changed) && (iteration < maxIterations)) - { - iteration = iteration + 1; - if (verbose > 1) printFlush(paste(spaces, " ..iteration", iteration)); - clusterSizes = table(membership); - if (verbose > 5) pind = initProgInd(paste(spaces, " ....calculating centers: ")) - for (cen in sort(changed)) - { - centers[, cen] = .alignedFirstPC(datExpr[, membership==cen], verbose = verbose-2, - indent = indent+2); - if (verbose > 5) pind = updateProgInd(cen/nCenters, pind); - } - if (verbose > 5) { pind = updateProgInd(1, pind); printFlush("")} - - if (verbose > 5) printFlush(paste(spaces, " ....calculating center to gene distances")); - dst = centerGeneDist(centers, dst, changed, verbose = verbose, spaces = spaces); - centerDist = memberCenterDist(dst, membership, clusterSizes, changed, centerDist); - - nearestDist = rep(0, nGenes); - nearest = rep(0, nGenes); - if (verbose > 5) printFlush(paste(spaces, " ....finding nearest center for each gene")); - minRes = .Call("minWhich_call", dst, 0L, PACKAGE = "WGCNA") - nearestDist = minRes$min; - nearest = minRes$which; - - if (sum(centerDist>nearestDist)>0) - { - proposedMemb = nearest; - accepted = FALSE; - while (!accepted && (sum(proposedMemb!=membership)>0)) - { - if (verbose > 2) - cat(paste(spaces, " ..proposing to move", sum(proposedMemb!=membership), "genes")); - moved = c(1:nGenes)[proposedMemb!=membership]; - newCentDist = memberCenterDist(dst, proposedMemb); - gotWorse = newCentDist[moved] > centerDist[moved] - if (sum(!is.finite(gotWorse))>0) - warning("Have NAs in gotWorse."); - if (sum(gotWorse)==0) - { - accepted = TRUE; - if (verbose > 2) printFlush(paste("..move accepted.")); - } else { - if (verbose > 2) printFlush(paste("..some genes got worse. Trying again.")); - ord = order(centerDist[moved[gotWorse]] - newCentDist[moved[gotWorse]]) - n = ceiling(length(ord)*3/5); - proposedMemb[moved[gotWorse][ord[c(1:n)]]] = membership[moved[gotWorse][ord[c(1:n)]]]; - } - } - if (accepted) - { - propSizes = table(proposedMemb); - keep = as.numeric(names(propSizes)); - centers = centers[, keep]; - dst = dst[keep, ]; - changedAll = union(membership[moved], proposedMemb[moved]); - changedKeep = changedAll[is.finite(match(changedAll, keep))]; - changed = rank(changedKeep); # This is a way to make say 1,3,4 into 1,2,3 - membership = as.numeric(as.factor(proposedMemb)); - if ( (verbose > 1) && (sum(keep) < nCenters)) - printFlush(paste(spaces, " ..dropping", nCenters - sum(keep), - "centers because ther clusters are empty.")); - nCenters = length(keep); - } else { - changed = NULL; - if (verbose > 2) printFlush(paste("Could not find a suitable move to improve the clustering.")); - } - } else { - changed = NULL; - if (verbose > 2) - printFlush(paste("Clustering is stable: all genes are closest to their assigned center.")); - } - if (verbose > 5) - { - printFlush("Sizes of biggest preliminary clusters:"); - order = order(-as.numeric(clusterSizes)); - print(as.numeric(clusterSizes)[order[1:min(100, length(order))]]); - } - } - - if (verbose > 2 & verbose <6) - { - printFlush("Sizes of preliminary clusters:"); - print(clusterSizes); - } - - - merged = sizeRestrictedClusterMerge( - datExpr, - clusters = membership, - clusterSizes = clusterSizes, - centers = centers, - maxSize = preferredSize, - networkType = networkType, - verbose = verbose, - indent = indent); - - list(clusters = merged$clusters, centers = merged$centers); -} - - -sizeRestrictedClusterMerge = function( - datExpr, - clusters, - clusterSizes = NULL, - centers = NULL, - maxSize, - networkType = "unsigned", - verbose = 0, - indent = 0) -{ - - spaces = indentSpaces(indent); - intNetworkType = charmatch(networkType, .networkTypes); - if (is.null(clusterSizes)) - { - clusterSizes = table(clusters); - centers = NULL; - } - - nCenters = length(clusterSizes); - nSamples = nrow(datExpr); - - if (is.null(centers)) - { - centers = matrix(0, nSamples, nCenters); - for (i in 1:nCenters) - { - if (clusterSizes[i] > 1) - { - centers[, i] = .alignedFirstPC(datExpr[, clusters==i], verbose = verbose-2, - indent = indent+2) - } else centers[, i] = scale(datExpr[, clusters==i])/(sum(is.finite(datExpr[, i]))-1) - } - } - - if (verbose > 0) printFlush(paste(spaces, "..merging smaller clusters...")); - small = (clusterSizes < maxSize); # typically all clusters will be below the max size, but is not a requirement. - if (intNetworkType==1) - { - clustDist = 1-abs(cor(centers[, small])); - } else { - clustDist = 1-cor(centers[, small]); - } - diag(clustDist) = 10; - - # merge nearby clusters if their sizes allow merging - done = FALSE; - while (!done & (sum(small)>1)) - { - smallIndex = c(1:nCenters)[small] - nSmall = sum(small); - distOrder = order(as.vector(clustDist))[seq(from=2, to = nSmall * (nSmall-1), by=2)]; - i = 1; canMerge = FALSE; - while (i <= length(distOrder) && (!canMerge)) - { - jj = as.integer( (distOrder[i]-1)/nSmall + 1); - ii = distOrder[i] - (jj-1) * nSmall - whichI = smallIndex[ii] - whichJ = smallIndex[jj]; - canMerge = sum(clusterSizes[c(whichI, whichJ)]) <= maxSize; - i = i + 1; - } - if (canMerge) - { - clusters[clusters==whichJ] = whichI; - clusterSizes[whichI] = clusterSizes[whichI] + clusterSizes[whichJ]; - centers[, whichI] = .alignedFirstPC(datExpr[, clusters==whichI], verbose = verbose-2, - indent = indent+2); - nCenters = nCenters -1; - if (verbose > 3) - printFlush(paste(spaces, " ..merged clusters", whichI, "and", whichJ, - "whose combined size is", clusterSizes[whichI])); - clusters[clusters>whichJ] = clusters[clusters>whichJ] - 1; - centers = centers[ , -whichJ, drop = FALSE]; - clusterSizes = clusterSizes[-whichJ]; - small = clusterSizes < maxSize; - if (sum(small) > 1) # Only update if there are at least 2 small clusters left - { - clustDist = clustDist[-jj, -jj, drop = FALSE]; - if (clusterSizes[whichI] < maxSize) - { - cr1 = c(cor(centers[, small], centers[, whichI])); - clustDist[, ii] = clustDist[ii, ] = if (intNetworkType==1) 1-abs(cr1) else 1-cr1; - clustDist[ii, ii] = 10; - } else { - clustDist = clustDist[-ii, -ii, drop = FALSE] - } - } - } else done = TRUE; - } - list(clusters = clusters, centers = centers); -} - -#====================================================================================================== -# -# Consensus preliminary partitioning -# -#====================================================================================================== - -consensusProjectiveKMeans = function ( - multiExpr, - preferredSize = 5000, - nCenters = NULL, - sizePenaltyPower = 4, - networkType = "unsigned", - randomSeed = 54321, - checkData = TRUE, - imputeMissing = TRUE, - useMean = (length(multiExpr) > 3), - maxIterations = 1000, - verbose = 0, indent = 0 ) -{ - centerGeneDist = function(centers, oldDst = NULL, changed = c(1:nCenters)) - { - if (is.null(oldDst)) - { - oldDst = array(0, c(nCenters, nGenes)); - changed = c(1:nCenters); - } - dstAll = oldDst; - nChanged = length(changed) - if (nChanged!=0) - { - dstX = array(0, c(nSets, nChanged, nGenes)); - for (set in 1:nSets) - if (intNetworkType==1) - { - dstX[set, , ] = 1-abs(cor(centers[[set]]$data[, changed], multiExpr[[set]]$data)); - } else { - dstX[set, , ] = 1-cor(centers[[set]]$data[, changed], multiExpr[[set]]$data); - } - if (nSets==1) - { - dstAll[changed, ] = dstX[1, , ]; - } else { - #dst = array(0, c(nChanged, nGenes)); - if (useMean) - { - dstAll[changed, ] = base::colMeans(dstX, dims = 1); - } else { - dstAll[changed, ] = -minWhichMin(-dstX, byRow = FALSE, dims = 1)$min; - } - } - } - dstAll; - } - - memberCenterDist = function(dst, membership, sizes = NULL, changed = c(1:nCenters), oldMCDist = NULL) - { - if (is.null(oldMCDist)) - { - changed = c(1:nCenters); - oldMCDist = rep(0, nGenes); - } - centerDist = oldMCDist; - if (!is.null(changed)) - { - if (is.null(sizes)) sizes = table(membership) - if (length(sizes)!=nCenters) - { - sizes2 = rep(0, nCenters); - sizes2[as.numeric(names(sizes))] = sizes; - sizes = sizes2; - } - if (is.finite(sizePenaltyPower)) - { - sizeCorrections = (sizes/preferredSize)^sizePenaltyPower; - sizeCorrections[sizeCorrections < 1] = 1; - } else { - sizeCorrections = rep(1, length(sizes)); - sizeCorrections[sizes > preferredSize] = Inf; - } - for (cen in changed) if (sizes[cen]!=0) - { - if (is.finite(sizeCorrections[cen])) - { - centerDist[membership==cen] = dst[cen, membership==cen] * sizeCorrections[cen]; - } else - centerDist[membership==cen] = 10 + dst[cen, membership==cen]; - } - } - centerDist; - } - - spaces = indentSpaces(indent); - - if (verbose > 0) - printFlush(paste(spaces, "Consensus projective K-means:")); - - allSize = checkSets(multiExpr); - nSamples = allSize$nSamples; - nGenes = allSize$nGenes; - nSets = allSize$nSets; - - #if (preferredSize >= floor(sqrt(2^31)) ) - # stop("'preferredSize must be less than ", floor(sqrt(2^31)), ". Please decrease it and try again.") - - if (preferredSize >= sqrt(2^31)-1) - printFlush(spaste( - spaces, " Support for blocks larger than sqrt(2^31) is experimental; please report\n", - spaces, " any issues to Peter.Langfelder@gmail.com.")); - - if (exists(".Random.seed")) - { - seedSaved = TRUE; - savedSeed = .Random.seed - } else seedSaved = FALSE; - set.seed(randomSeed); - - if (is.null(nCenters)) nCenters = as.integer(min(nGenes/20, 100 * nGenes/preferredSize)); - - if (verbose > 1) - printFlush(paste(spaces, "..using", nCenters, "centers.")); - - intNetworkType = charmatch(networkType, .networkTypes); - if (is.na(intNetworkType)) - stop(paste("Unrecognized networkType argument.", - "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", "))); - - for (set in 1:nSets) - multiExpr[[set]]$data = scale(as.matrix(multiExpr[[set]]$data)); - - # Check data for genes and samples that have too many missing values - if (checkData) - { - if (verbose > 0) - printFlush(paste(spaces, "..checking data for excessive number of missing values..")); - gsg = goodSamplesGenesMS(multiExpr, verbose = verbose - 1, indent = indent + 1); - for (set in 1:nSets) - { - if (!gsg$allOK) - multiExpr[[set]]$data = scale(multiExpr[[set]]$data[gsg$goodSamples[[set]], gsg$goodGenes]); - } - } - - anyNA = mtd.apply(multiExpr, function(x) any(is.na(x)), mdaSimplify = TRUE); - if (imputeMissing && any(anyNA)) - { - if (verbose > 0) - printFlush(paste(spaces, "..imputing missing data..")); - multiExpr[anyNA] = mtd.apply(multiExpr[anyNA], function(x) t(impute.knn(t(x))$data), mdaVerbose = verbose>1) - } else if (any(anyNA)) - { - printFlush(paste(spaces, "Found missing data. These will be replaced by zeros;\n", - spaces, " for a better replacement, use 'imputeMissing=TRUE'.")); - for (set in 1:nSets) - multiExpr[[set]]$data[is.na(multiExpr[[set]]$data)] = 0; - } - - setSize = checkSets(multiExpr); - nGenes = setSize$nGenes; - nSamples = setSize$nSamples; - - centers = vector(mode="list", length = nSets); - for (set in 1:nSets) - centers[[set]] = list(data = matrix(0, nSamples[set], nCenters)); - - randGeneIndex = sample(nGenes, size = nGenes); - temp = rep(c(1:nCenters), times = ceiling(nGenes/nCenters)); - membership = temp[randGeneIndex]; - - if (verbose > 0) - printFlush(paste(spaces, "..consensus k-means clustering..")); - - changed = c(1:nCenters); dst = NULL; - iteration = 0; - centerDist = NULL; - while (!is.null(changed) && (iteration < maxIterations)) - { - iteration = iteration + 1; - if (verbose > 1) printFlush(paste(spaces, " ..iteration", iteration)); - clusterSizes = table(membership); - for (set in 1:nSets) for (cen in changed) - centers[[set]]$data[, cen] = .alignedFirstPC(multiExpr[[set]]$data[, membership==cen], - verbose = verbose-2, indent = indent+2); - dst = centerGeneDist(centers, dst, changed); - centerDist = memberCenterDist(dst, membership, clusterSizes, changed, centerDist); - - changed = NULL; - minRes = minWhichMin(dst); - nearestDist = minRes$min; - nearest = minRes$which; - - if (sum(centerDist>nearestDist)>0) - { - proposedMemb = nearest; - accepted = FALSE; - while (!accepted && (sum(proposedMemb!=membership)>0)) - { - if (verbose > 2) - cat(paste(spaces, " ..proposing to move", sum(proposedMemb!=membership), "genes")); - moved = c(1:nGenes)[proposedMemb!=membership]; - newCentDist = memberCenterDist(dst, proposedMemb); - gotWorse = newCentDist[moved] > centerDist[moved] - if (sum(gotWorse)==0) - { - accepted = TRUE; - if (verbose > 2) printFlush(paste("..move accepted.")); - } else { - if (verbose > 2) printFlush(paste("..some genes got worse. Trying again.")); - ord = order(centerDist[moved[gotWorse]] - newCentDist[moved[gotWorse]]) - n = ceiling(length(ord)*3/5); - proposedMemb[moved[gotWorse][ord[c(1:n)]]] = membership[moved[gotWorse][ord[c(1:n)]]]; - } - } - if (accepted) - { - propSizes = table(proposedMemb); - keep = as.numeric(names(propSizes)); - for (set in 1:nSets) - centers[[set]]$data = centers[[set]]$data[, keep]; - dst = dst[keep, ]; - changedAll = union(membership[moved], proposedMemb[moved]); - changedKeep = changedAll[is.finite(match(changedAll, keep))]; - changed = rank(changedKeep); # This is a way to make say 1,3,4 into 1,2,3 - membership = as.numeric(as.factor(proposedMemb)); - if ( (verbose > 1) && (sum(keep) < nCenters)) - printFlush(paste(spaces, " ..dropping", nCenters - sum(keep), - "centers because ther clusters are empty.")); - nCenters = length(keep); - } else { - changed = NULL; - if (verbose > 2) printFlush(paste("Could not find a suitable move to improve the clustering.")); - } - } else { - changed = NULL; - if (verbose > 2) - printFlush(paste("Clustering is stable: all genes are closest to their assigned center.")); - } - } - - consCenterDist = function(centers, select) - { - if (is.logical(select)) - { - nC = sum(select); - } else { - nC = length(select); - } - distX = array(0, dim=c(nSets, nC, nC)); - for (set in 1:nSets) - { - if (intNetworkType==1) - { - distX[set, , ] = 1-abs(cor(centers[[set]]$data[, select])); - } else { - distX[set, , ] = 1-cor(centers[[set]]$data[, select]); - } - } - if (nSets==1) { - distX[1, , ] - } else - -minWhichMin(-distX, byRow = FALSE, dims = 1)$min; - } - - unmergedMembership = membership; - unmergedCenters = centers; - # merge nearby clusters if their sizes allow merging - if (verbose > 0) printFlush(paste(spaces, "..merging smaller clusters...")); - clusterSizes = as.vector(table(membership)); - small = (clusterSizes < preferredSize); - done = FALSE; - while (!done & (sum(small)>1)) - { - smallIndex = c(1:nCenters)[small] - nSmall = sum(small); - clustDist = consCenterDist(centers, smallIndex); - - diag(clustDist) = 10; - distOrder = order(as.vector(clustDist))[seq(from=2, to = nSmall * (nSmall-1), by=2)]; - i = 1; canMerge = FALSE; - while (i <= length(distOrder) && (!canMerge)) - { - whichJ = smallIndex[as.integer( (distOrder[i]-1)/nSmall + 1)]; - whichI = smallIndex[distOrder[i] - (whichJ-1) * nSmall]; - canMerge = sum(clusterSizes[c(whichI, whichJ)]) < preferredSize; - i = i + 1; - } - if (canMerge) - { - membership[membership==whichJ] = whichI; - clusterSizes[whichI] = sum(clusterSizes[c(whichI, whichJ)]); - #if (verbose > 1) - # printFlush(paste(spaces, " ..merged clusters", whichI, "and", whichJ, - # "whose combined size is", clusterSizes[whichI])); - for (set in 1:nSets) - centers[[set]]$data[, whichI] = .alignedFirstPC(multiExpr[[set]]$data[, membership==whichI], - verbose = verbose-2, indent = indent+2); - for (set in 1:nSets) - centers[[set]]$data = centers[[set]]$data[,-whichJ]; - membership[membership>whichJ] = membership[membership>whichJ] - 1; - nCenters = nCenters -1; - clusterSizes = as.vector(table(membership)); - small = (clusterSizes < preferredSize); - if (verbose > 3) - printFlush(paste(spaces, " ..merged clusters", whichI, "and", whichJ, - "whose combined size is", clusterSizes[whichI])); - } else done = TRUE; - } - - if (checkData) - { - membershipAll = rep(NA, allSize$nGenes); - membershipAll[gsg$goodGenes] = membership; - } else - membershipAll = membership; - - if (seedSaved) .Random.seed <<- savedSeed; - - return( list(clusters = membershipAll, centers = centers, unmergedClusters = unmergedMembership, - unmergedCenters = unmergedCenters) ); -} - +# Hello, + +# I am doing co-expression network with WGCNA on RNA-seq data (70-200 samples). +# While using blockwiseModules() function, I obtain modules smaller than the module size cut-off. +# Namely, minModuleSize <= 20, I got modules with just 1, 2, 4, genes. +# It seems a kind of exception in the function. + +# How can this be possible? + +# Further, I can say that this happens more frequently when the soft-thresholding power < 4 (unsigned, signed hybrid networks) or power < 9 (signed networks) even if the soft-threshold criterion is satisfied. The occurrence for larger powers is negligible, still some exceptions remain. + +# Here the code, the most is default: + +net = blockwiseModules(input_train, + power = beta_value, + networkType = NT, + randomSeed = seed, + minModuleSize = 20, + corType = "bicor", + mergeCutHeight = 0.15, + pamStage = TRUE, + pamRespectsDendro = FALSE, + TOMType = "signed", + saveTOMs = FALSE, + maxBlockSize = 5000, + numericLabels = TRUE, + nThreads = 4, + verbose = 3) From 9f67cb26f97c25a5f07a2ebcb4f90b69af997d60 Mon Sep 17 00:00:00 2001 From: Pasquale Di Carlo <36417405+pdicarl3@users.noreply.github.com> Date: Sat, 17 Aug 2019 09:22:57 -0400 Subject: [PATCH 2/2] Rename blockwiseModulesC.R to WGCNA blockwiseModules() output too small modules --- ...dulesC.R => WGCNA blockwiseModules() output too small modules} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{blockwiseModulesC.R => WGCNA blockwiseModules() output too small modules} (100%) diff --git a/R/blockwiseModulesC.R b/R/WGCNA blockwiseModules() output too small modules similarity index 100% rename from R/blockwiseModulesC.R rename to R/WGCNA blockwiseModules() output too small modules