Skip to content

Commit 22a6df2

Browse files
committedJun 7, 2016
Add files via upload
Added a few R functions
1 parent b3cbef6 commit 22a6df2

File tree

3 files changed

+89
-0
lines changed

3 files changed

+89
-0
lines changed
 

‎cor.prob.R

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
## Correlation matrix with p-values. See http://goo.gl/nahmV for documentation of this function
2+
cor.prob <- function (X, dfr = nrow(X) - 2) {
3+
R <- cor(X, use="pairwise.complete.obs")
4+
above <- row(R) < col(R)
5+
r2 <- R[above]^2
6+
Fstat <- r2 * dfr/(1 - r2)
7+
R[above] <- 1 - pf(Fstat, 1, dfr)
8+
R[row(R) == col(R)] <- NA
9+
R
10+
}

‎exploratory_analysis.R

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#Produce a histogram for each variable in a dataset.
2+
all.hist <- function(x){
3+
sapply(colnames(x), function(i)hist(x[,i], main = i,
4+
xlab="Value", col="lightblue"))
5+
}
6+
7+
#Produce a kernel density plot w/normal curve for each
8+
#variable in a dataset.
9+
all.density.plot <- function(x){
10+
sapply(colnames(x), function(i)plot(density(x[,i]),
11+
main = i, xlab="Value", col="red", lwd=2))
12+
}
13+
14+
#Produce a scatterplot matrix with histograms
15+
#on the diagonal and correlation statistics in the
16+
#upper panel using the base function pairs()
17+
#http://handlesman.blogspot.com/2011/03/matrix-plot-with-confidence-intervals.html
18+
19+
## put histograms on the diagonal
20+
panel.hist <- function(x, ...)
21+
{
22+
usr <- par("usr"); on.exit(par(usr))
23+
par(usr = c(usr[1:2], 0, 1.5) )
24+
h <- hist(x, plot = FALSE)
25+
breaks <- h$breaks; nB <- length(breaks)
26+
y <- h$counts; y <- y/max(y)
27+
rect(breaks[-nB], 0, breaks[-1], y, col="lavender", ...)
28+
}
29+
30+
## put correlations & 95% CIs on the upper panels,
31+
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
32+
{
33+
usr <- par("usr"); on.exit(par(usr))
34+
par(usr = c(0, 1, 0, 1))
35+
r <- cor(x, y,use="complete.obs")
36+
txt <- format(c(r, 0.123456789), digits=digits)[1]
37+
prefix <- "r = "
38+
rc <- cor.test(x,y)
39+
rci <- rc$conf.int
40+
txt2 <- format(c(rci, 0.123456789), digits=digits)[1]
41+
txt3 <- format(c(rci, 0.123456789), digits=digits)[2]
42+
prefix2 <- "\nCI = "
43+
txt <- paste(prefix, txt, prefix2, txt2, ", ", txt3, sep="")
44+
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
45+
text(0.5, 0.5, txt, cex = 1)
46+
}
47+
48+
#Combine helper functions
49+
pairs(iris[1:4], lower.panel=panel.smooth, cex = .8, pch = 21, bg="steelblue",
50+
diag.panel=panel.hist, cex.labels = 1.2, font.labels=2, upper.panel=panel.cor)
51+

‎hyperG_nestedLists.r

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
hyperG <- function(list1, list2){
2+
library(reshape2)
3+
uni1 <- sum(sapply(list1, length))
4+
uni2 <- sum(sapply(list1, length))
5+
universe <- max(c(uni1,uni2))
6+
universe=1904
7+
intersects <- sapply(list1, function(x) sapply(list2, function(y) length(intersect(x,y))))
8+
len1 <- t(as.data.frame(lapply(list1, length)))
9+
len2 <- t(as.data.frame(lapply(list2, length)))
10+
out <- cbind(melt(intersects), len_Var1=rep(len1, each=length(len2)), len_Var2=rep(len2, times=length(len1)))
11+
out <- subset(out, len_Var1 >= 5 & len_Var2 >=5)
12+
pvalue <- apply(out[,3:5], 1, function(x) 1-phyper(x[1],x[2],universe-x[2],x[3]))
13+
FDR <- p.adjust(pvalue, method='fdr')
14+
out <- cbind(out, pvalue, FDR)
15+
colnames(out) <- c('Var1', 'Var2', 'Overlap', 'len_Var1', 'len_Var2', 'pvalue', 'FDR')
16+
out
17+
}
18+
19+
#Probability of getting 100 or more white balls in a sample of size 400 from an urn
20+
#with 3000 white balls and 12000 black balls
21+
# white balls retrieved = overlap-1 (e.g. 100 or more)
22+
# total white balls = cluster size
23+
# total black balls = universe - cluster size
24+
# total draws = category size
25+
#cluster/category size are interchangable, but universe must always substact the former
26+
#1-phyper(white balls, total white balls, black balls, size)
27+
#1-phyper(99, 3000, 12000, 400)
28+
#1-phyper(overlap-1, cluster size, universe-cluster size, category size)

0 commit comments

Comments
 (0)