From cc2d53c5c64232fc1dc890b82c597d5fdfd289da Mon Sep 17 00:00:00 2001 From: rsquaredin Date: Sat, 27 Jan 2018 13:35:35 +0530 Subject: [PATCH 1/6] fixes #9 --- R/infer-levene-test.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/infer-levene-test.R b/R/infer-levene-test.R index 076ad10..0cd1a85 100644 --- a/R/infer-levene-test.R +++ b/R/infer-levene-test.R @@ -62,14 +62,14 @@ infer_levene_test <- function(variable, ...) UseMethod('infer_levene_test') #' @export #' @rdname infer_levene_test -infer_levene_test.default <- function(variable, ..., group_var = NA, +infer_levene_test.default <- function(variable, ..., group_var = NULL, trim.mean = 0.1) { varname <- deparse(substitute(variable)) - suppressWarnings( - if (is.na(group_var)) { + + if (is.null(group_var)) { if (is.data.frame(variable)) { z <- as.list(variable) @@ -96,7 +96,7 @@ infer_levene_test.default <- function(variable, ..., group_var = NA, } - ) + if (!is.factor(group_var)) { group_var <- as.factor(group_var) From 4c28857b4a7dd0e424f845abc93dc82ec3f7996d Mon Sep 17 00:00:00 2001 From: rsquaredin Date: Sat, 27 Jan 2018 13:44:28 +0530 Subject: [PATCH 2/6] ignore hex sticker --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 42fa68a..f5f4fad 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,4 @@ ^NEWS$ ^CONDUCT\.md$ ^.*\.jpg$ +^hex_inferr\.png$ From b6be49640dbafe4f524055167260789ea67e7601 Mon Sep 17 00:00:00 2001 From: rsquaredin Date: Sat, 27 Jan 2018 13:44:49 +0530 Subject: [PATCH 3/6] modify deprecated function --- R/infer-levene-test.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/infer-levene-test.R b/R/infer-levene-test.R index 0cd1a85..e8f8b11 100644 --- a/R/infer-levene-test.R +++ b/R/infer-levene-test.R @@ -129,7 +129,7 @@ infer_levene_test.default <- function(variable, ..., group_var = NULL, #' @rdname infer_levene_test #' @usage NULL #' -levene_test <- function(variable, ..., group_var = NA, +levene_test <- function(variable, ..., group_var = NULL, trim.mean = 0.1) { .Deprecated("infer_levene_test()") From cb6af5cf3d8a17ade4ad72cef4fab02cf47bad67 Mon Sep 17 00:00:00 2001 From: rsquaredin Date: Sat, 27 Jan 2018 13:52:03 +0530 Subject: [PATCH 4/6] update documentation --- man/infer_levene_test.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/infer_levene_test.Rd b/man/infer_levene_test.Rd index 312356f..ab435b4 100644 --- a/man/infer_levene_test.Rd +++ b/man/infer_levene_test.Rd @@ -10,7 +10,7 @@ \usage{ infer_levene_test(variable, ...) -\method{infer_levene_test}{default}(variable, ..., group_var = NA, +\method{infer_levene_test}{default}(variable, ..., group_var = NULL, trim.mean = 0.1) \method{infer_levene_test}{lm}(variable, ...) From 0ed1230684f6a9f3cab80d981565888da1095b79 Mon Sep 17 00:00:00 2001 From: rsquaredin Date: Sat, 27 Jan 2018 15:14:37 +0530 Subject: [PATCH 5/6] update levene test documentation in website --- docs/reference/infer_levene_test.html | 86 ++++++++++++++++++--------- 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/docs/reference/infer_levene_test.html b/docs/reference/infer_levene_test.html index 4e29cb1..5d16c90 100644 --- a/docs/reference/infer_levene_test.html +++ b/docs/reference/infer_levene_test.html @@ -6,13 +6,13 @@ -Levene's test for equality of variances — infer_levene_test • inferr +Levene's test for equality of variances — infer_levene_test • inferr - + @@ -23,14 +23,17 @@ - + + - + + + @@ -98,22 +101,22 @@
-

infer_levene_test reports Levene's robust test statistic +

infer_levene_test reports Levene's robust test statistic for the equality of variances and the two statistics proposed by Brown and Forsythe that replace the mean in -Levene's formula with alternative location estimators. The first alternative +Levene's formula with alternative location estimators. The first alternative replaces the mean with the median. The second alternative replaces the mean with the 10

-
infer_levene_test(variable, ...)
+    
infer_levene_test(variable, ...)
 
 # S3 method for default
-infer_levene_test(variable, ..., group_var = NA,
+infer_levene_test(variable, ..., group_var = NULL,
   trim.mean = 0.1)
 
 # S3 method for lm
@@ -123,36 +126,63 @@ 

Levene's test for equality of variances

infer_levene_test(variable, data, ...)

Arguments

-
-
variable
-
a numeric vector or formula or object of class lm
-
...
-
numeric vectors
-
group_var
-
a grouping variable
-
trim.mean
-
trimmed mean
-
data
-
a data frame
-
+ + + + + + + + + + + + + + + + + + + + + + +
variable

a numeric vector or formula or object of class lm

...

numeric vectors

group_var

a grouping variable

trim.mean

trimmed mean

data

a data frame

Value

-

infer_levene_test returns an object of class "infer_levene_test". -An object of class "infer_levene_test" is a list containing the +

infer_levene_test returns an object of class "infer_levene_test". +An object of class "infer_levene_test" is a list containing the following components:

+
bf

Brown and Forsythe f statistic

+
p_bf

p-value for Brown and Forsythe f statistic

+
lev

Levene's f statistic

+
p_lev

p-value for Levene's f statistic

+
bft

Brown and Forsythe f statistic using trimmed mean

+
p_bft

p-value for Brown and Forsythe f statistic using trimmed mean

+
avgs

mean for each level of the grouping variable

+
sds

standard deviations for each level of the grouping variable

+
avg

combined mean

+
sd

combined standard deviation

+
n

number of observations

+
n_df

numerator degrees of freedom

+
d_df

denominator degrees of freedom

+
levs

levels of the grouping variable

+
lens

number of observations for each level of the grouping variable

+
type

alternative hypothesis

+

Deprecated Function

-

levene_test() has been deprecated. Instead use infer_levene_test().

References

-

Bland, M. 2000. An Introduction to Medical Statistics. 3rd ed. Oxford: Oxford University Press.

-

Brown, M. B., and A. B. Forsythe. 1974. Robust tests for the equality of variances. Journal of the American Statistical -Association 69: 364–367.

-

Carroll, R. J., and H. Schneider. 1985. A note on Levene’s tests for equality of variances. Statistics and Probability +

Bland, M. 2000. An Introduction to Medical Statistics. 3rd ed. Oxford: Oxford University Press. + Brown, M. B., and A. B. Forsythe. 1974. Robust tests for the equality of variances. Journal of the American Statistical +Association 69: 364–367. + Carroll, R. J., and H. Schneider. 1985. A note on Levene’s tests for equality of variances. Statistics and Probability Letters 3: 191–194.

From 8d0b6ad26bd90b54e4c2c394e2e4b8c1447eaabd Mon Sep 17 00:00:00 2001 From: rsquaredin Date: Sat, 27 Jan 2018 15:14:55 +0530 Subject: [PATCH 6/6] fixes #11 --- R/infer-binom-test.R | 5 +- R/infer-output.R | 48 ++--- docs/articles/index.html | 11 +- docs/articles/intro.html | 245 ++++++++++++----------- docs/reference/index.html | 285 ++++++++++++++++----------- docs/reference/infer_binom_calc.html | 79 +++++--- man/infer_binom_calc.Rd | 2 - tests/testthat/test-binom.R | 61 +++--- 8 files changed, 401 insertions(+), 335 deletions(-) diff --git a/R/infer-binom-test.R b/R/infer-binom-test.R index b1483a7..5fe03cc 100644 --- a/R/infer-binom-test.R +++ b/R/infer-binom-test.R @@ -16,10 +16,8 @@ #' \item{exp_k}{expected number of successes} #' \item{obs_p}{assumed probability of success} #' \item{exp_p}{expected probability of success} -#' \item{ik}{the largest number <= \code{exp_k} such that Pr(k = ik) <= Pr(k = kobs)} #' \item{lower}{lower one sided p value} #' \item{upper}{upper one sided p value} -#' \item{two_tail}{two sided p value} #' @section Deprecated Functions: #' \code{binom_calc()} and \code{binom_test()} have been deprecated. Instead use #' \code{infer_binom_cal()} and \code{infer_binom_test()}. @@ -59,8 +57,7 @@ infer_binom_calc.default <- function(n, success, prob = 0.5, ...) { k <- binom_comp(n, success, prob) out <- list(n = n, k = k$k, exp_k = k$exp_k, obs_p = k$obs_p, - exp_p = k$exp_p, ik = k$ik, lower = k$lower, upper = k$upper, - two_tail = k$two_tail) + exp_p = k$exp_p, lower = k$lower, upper = k$upper) class(out) <- 'infer_binom_calc' return(out) diff --git a/R/infer-output.R b/R/infer-output.R index f8ea359..90b423d 100644 --- a/R/infer-output.R +++ b/R/infer-output.R @@ -81,8 +81,8 @@ print_binom <- function(data) { # test summary widths w6 <- nchar('Lower') - w7 <- nchar(paste0('Pr(k <= ', data$ik, ' or k >= ', data$k, ')')) - w8 <- nchar(paste0('Pr(k <= ', data$k, ' or k >= ', data$ik, ')')) + w7 <- nchar(paste0('Pr(k <= ', data$k, ' or k >= ', data$k, ')')) + w8 <- nchar(paste0('Pr(k <= ', data$k, ' or k >= ', data$k, ')')) w9 <- 8 w10 <- sum(w6, w7, w9, 9) w11 <- sum(w6, w8, w9, 9) @@ -97,17 +97,17 @@ print_binom <- function(data) { cat(" ", format('Tail', width = w6, justify = 'left'), fs(), format('Prob', width = w8, justify = 'centre'), fs(), format('p-value', width = w9, justify = 'centre'),'\n') cat(" ", rep("-", w11), sep = "", '\n') - cat(" ", format('Lower', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ')'), width = w8, justify = 'left'), fs(), - format(data$lower, width = w9, justify = 'centre'),'\n') - cat(" ", format('Upper', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w8, justify = 'left'), fs(), - format(data$upper, width = w9, justify = 'centre'),'\n') - if (data$ik < 0) { - cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(), - format(data$two_tail, width = w9, justify = 'centre'),'\n') - } else { - cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ' or k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(), - format(data$two_tail, width = w9, justify = 'centre'),'\n') - } + cat(" ", format('Lower', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ')'), width = w8, justify = 'centre'), fs(), + format(as.character(data$lower), width = w9, justify = 'centre'),'\n') + cat(" ", format('Upper', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w8, justify = 'centre'), fs(), + format(as.character(data$upper), width = w9, justify = 'centre'),'\n') + # if (data$ik < 0) { + # cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(), + # format(data$two_tail, width = w9, justify = 'centre'),'\n') + # } else { + # cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ' or k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(), + # format(data$two_tail, width = w9, justify = 'centre'),'\n') + # } cat(" ", rep("-", w11), sep = "", '\n') } else { @@ -117,17 +117,17 @@ print_binom <- function(data) { cat(" ", format('Tail', width = w6, justify = 'left'), fs(), format('Prob', width = w7, justify = 'centre'), fs(), format('p-value', width = w9, justify = 'centre'),'\n') cat(" ", rep("-", w10), sep = "", '\n') - cat(" ", format('Lower', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ')'), width = w7, justify = 'left'), fs(), - format(data$lower, width = w9, justify = 'centre'),'\n') - cat(" ", format('Upper', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w7, justify = 'left'), fs(), - format(data$upper, width = w9, justify = 'centre'),'\n') - if (data$ik < 0) { - cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w7, justify = 'left'), fs(), - format(data$two_tail, width = w9, justify = 'centre'),'\n') - } else { - cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$ik, ' or k >= ', data$k, ')'), width = w7, justify = 'left'), fs(), - format(data$two_tail, width = w9, justify = 'centre'),'\n') - } + cat(" ", format('Lower', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ')'), width = w7, justify = 'centre'), fs(), + format(as.character(data$lower), width = w9, justify = 'centre'),'\n') + cat(" ", format('Upper', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w7, justify = 'centre'), fs(), + format(as.character(data$upper), width = w9, justify = 'centre'),'\n') +# if (data$ik < 0) { +# cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w7, justify = 'left'), fs(), +# format(data$two_tail, width = w9, justify = 'centre'),'\n') +# } else { +# cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$ik, ' or k >= ', data$k, ')'), width = w7, justify = 'left'), fs(), +# format(data$two_tail, width = w9, justify = 'centre'),'\n') +# } cat(" ", rep("-", w10), sep = "", '\n') } diff --git a/docs/articles/index.html b/docs/articles/index.html index 24ec612..05f74d0 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -11,8 +11,8 @@ - + @@ -23,14 +23,17 @@ - + + - + + + @@ -96,7 +99,7 @@
diff --git a/docs/articles/intro.html b/docs/articles/intro.html index 358b652..fc674a0 100644 --- a/docs/articles/intro.html +++ b/docs/articles/intro.html @@ -6,10 +6,10 @@ Introduction to inferr • inferr - + - @@ -80,7 +80,7 @@ @@ -115,13 +115,13 @@

2017-12-12

These tests are described in more detail in the following sections.

- One Sample t Test

+One Sample t Test

A one sample t-test is used to determine whether a sample of observations comes from a population with a specific mean. The observations must be continuous, independent of each other, approximately distributed and should not contain any outliers.

- Example

+Example

Using the hsb data, test whether the average of write differs significantly from 50.

-
infer_os_t_test(hsb$write, mu = 50, type = 'all')
+
infer_os_t_test(hsb$write, mu = 50, type = 'all')
##                               One-Sample Statistics                               
 ## ---------------------------------------------------------------------------------
 ##  Variable    Obs     Mean     Std. Err.    Std. Dev.    [95% Conf. Interval] 
@@ -138,14 +138,14 @@ 

- Paired t test

+Paired t test

A paired (samples) t-test is used when you want to compare the means between two related groups of observations on some continuous dependent variable. In a paired sample test, each subject or entity is measured twice. It can be used to evaluate the effectiveness of training programs or treatments. If the dependent variable is dichotomous, use the McNemar test.

- Examples

+Examples

Using the hsb data, test whether the mean of read is equal to the mean of write.

-
# Lower Tail Test
-infer_ts_paired_ttest(hsb$read, hsb$write, alternative = 'less')
+
# Lower Tail Test
+infer_ts_paired_ttest(hsb$read, hsb$write, alternative = 'less')
##                          Paired Samples Statistics                          
 ## ---------------------------------------------------------------------------
 ## Variables    Obs    Mean     Std. Err.    Std. Dev.    [95% Conf. Interval] 
@@ -172,8 +172,8 @@ 

## --------------------------------------- ## read - write -0.873 199 0.192 ## ---------------------------------------

-
# Test all alternatives
-infer_ts_paired_ttest(hsb$read, hsb$write, alternative = 'all')
+
# Test all alternatives
+infer_ts_paired_ttest(hsb$read, hsb$write, alternative = 'all')
##                          Paired Samples Statistics                          
 ## ---------------------------------------------------------------------------
 ## Variables    Obs    Mean     Std. Err.    Std. Dev.    [95% Conf. Interval] 
@@ -199,7 +199,7 @@ 

- Two Independent Sample t Test

+Two Independent Sample t Test

An independent samples t-test is used to compare the means of a normally distributed continuous dependent variable for two unrelated groups. The dependent variable must be approximately normally distributed and the cases/subjects in the two groups must be different i.e. a subject in one group cannot also be a subject of the other group. It can be used to answer whether:

  • average number of products produced by two machines differ significantly?
  • @@ -207,11 +207,11 @@

- Example

+Example

Using the hsb data, test whether the mean for write is the same for males and females.

-
hsb2 <- inferr::hsb
-hsb2$female <- as.factor(hsb2$female)
-infer_ts_ind_ttest(hsb2, 'female', 'write', alternative = 'all')
+
hsb2 <- inferr::hsb
+hsb2$female <- as.factor(hsb2$female)
+infer_ts_ind_ttest(hsb2, 'female', 'write', alternative = 'all')
##                               Group Statistics                                
 ## -----------------------------------------------------------------------------
 ##   Group       Obs     Mean     Std. Err.    Std. Dev.    [95% Conf. Interval] 
@@ -252,14 +252,14 @@ 

- One Sample Test of Proportion

+One Sample Test of Proportion

One sample test of proportion compares proportion in one group to a specified population proportion.

- Examples

+Examples

Using hsb data, test whether the proportion of females is 50%.

-
# Using Variables
-infer_os_prop_test(as.factor(hsb$female), prob = 0.5)
+
# Using Variables
+infer_os_prop_test(as.factor(hsb$female), prob = 0.5)
##      Test Statistics      
 ## -------------------------
 ## Sample Size           200 
@@ -275,8 +275,8 @@ 

## 1 109 100 9.00 0.90 ## -----------------------------------------------------------------

Using Calculator

-
# Calculator
-infer_os_prop_test(200, prob = 0.5, phat = 0.3)
+
# Calculator
+infer_os_prop_test(200, prob = 0.5, phat = 0.3)
##      Test Statistics       
 ## --------------------------
 ## Sample Size            200 
@@ -295,17 +295,17 @@ 

- Two Sample Test of Proportion

+Two Sample Test of Proportion

Two sample test of proportion performs tests on the equality of proportions using large-sample statistics. It tests that a categorical variable has the same proportion within two groups or that two variables have the same proportion.

- Examples

+Examples

- Using Variables

+Using Variables

Using the treatment data, test equality of proportion of two treatments

-
# Using Variables
-infer_ts_prop_test(var1 = treatment$treatment1, var2 = treatment$treatment2, alternative = 'all')
+
# Using Variables
+infer_ts_prop_test(var1 = treatment$treatment1, var2 = treatment$treatment2, alternative = 'all')
##     Test Statistics      
 ## ------------------------
 ## Sample Size           50 
@@ -316,10 +316,10 @@ 

- Use Grouping Variable

+Use Grouping Variable

Using the treatment2 data, test whether outcome has same proportion for male and female

-
# Using Grouping Variable
-infer_ts_prop_grp(var = treatment2$outcome, group = treatment2$female, alternative = 'all')
+
# Using Grouping Variable
+infer_ts_prop_grp(var = treatment2$outcome, group = treatment2$female, alternative = 'all')
##     Test Statistics      
 ## ------------------------
 ## Sample Size           91 
@@ -330,10 +330,10 @@ 

- Using Calculator

+Using Calculator

Test whether the same proportion of people from two batches will pass a review exam for a training program. In the first batch of 30 participants, 30% passed the review, whereas in the second batch of 25 participants, 50% passed the review.

-
# Calculator
-infer_ts_prop_calc(n1 = 30, n2 = 25, p1 = 0.3, p2 = 0.5, alternative = 'all')
+
# Calculator
+infer_ts_prop_calc(n1 = 30, n2 = 25, p1 = 0.3, p2 = 0.5, alternative = 'all')
##      Test Statistics      
 ## -------------------------
 ## Sample Size            30 
@@ -346,7 +346,7 @@ 

- One Sample Variance Test

+One Sample Variance Test

One sample variance comparison test compares the standard deviation (variances) to a hypothesized value. It determines whether the standard deviation of a population is equal to a hypothesized value. It can be used to answer the following questions:

  • Is the variance equal to some pre-determined threshold value?
  • @@ -355,10 +355,10 @@

- Examples

+Examples

Using the mtcars data, compare the standard deviation of mpg to a hypothesized value.

-
# Lower Tail Test
-infer_os_var_test(mtcars$mpg, 0.3, alternative = 'less')
+
# Lower Tail Test
+infer_os_var_test(mtcars$mpg, 0.3, alternative = 'less')
##                             One-Sample Statistics                             
 ## -----------------------------------------------------------------------------
 ##  Variable    Obs     Mean      Std. Err.    Std. Dev.    [95% Conf. Interval] 
@@ -377,8 +377,8 @@ 

## ---------------------------------------- ## mpg 12511.436 31 1.0000 ## ----------------------------------------

-
# Test all alternatives
-infer_os_var_test(mtcars$mpg, 0.3, alternative = 'all')
+
# Test all alternatives
+infer_os_var_test(mtcars$mpg, 0.3, alternative = 'all')
##                             One-Sample Statistics                             
 ## -----------------------------------------------------------------------------
 ##  Variable    Obs     Mean      Std. Err.    Std. Dev.    [95% Conf. Interval] 
@@ -395,17 +395,17 @@ 

- Two Sample Variance Test

+Two Sample Variance Test

Two sample variance comparison tests equality of standard deviations (variances). It tests that the standard deviation of a continuous variable is same within two groups or the standard deviation of two continuous variables is equal.

- Example

+Example

- Use Grouping Variable

+Use Grouping Variable

Using the mtcars data, compare the standard deviation in miles per gallon for automatic and manual vehicles.

-
# Using Grouping Variable
-infer_ts_var_test(mtcars$mpg, group_var = mtcars$am, alternative = 'all')
+
# Using Grouping Variable
+infer_ts_var_test(mtcars$mpg, group_var = mtcars$am, alternative = 'all')
##                Variance Ratio Test                 
 ## --------------------------------------------------
 ##   Group      Obs    Mean     Std. Err.    Std. Dev. 
@@ -434,10 +434,10 @@ 

- Using Variables

+Using Variables

Using the hsb data, compare the standard deviation of reading and writing scores.

-
# Using Variables
-infer_ts_var_test(hsb$read, hsb$write, alternative = 'all')
+
# Using Variables
+infer_ts_var_test(hsb$read, hsb$write, alternative = 'all')
##                Variance Ratio Test                 
 ## --------------------------------------------------
 ##   Group      Obs    Mean     Std. Err.    Std. Dev. 
@@ -468,14 +468,14 @@ 

- Binomial Probability Test

+Binomial Probability Test

A one sample binomial test allows us to test whether the proportion of successes on a two-level categorical dependent variable significantly differs from a hypothesized value.

- Examples

+Examples

Using the hsb data, test whether the proportion of females and males are equal.

-
# Using variables
-infer_binom_test(as.factor(hsb$female), prob = 0.5)
+
# Using variables
+infer_binom_test(as.factor(hsb$female), prob = 0.5)
##              Binomial Test              
 ##  ---------------------------------------
 ##   Group     N     Obs. Prop    Exp. Prop 
@@ -485,19 +485,18 @@ 

## --------------------------------------- ## ## -## Test Summary -## --------------------------------------------- -## Tail Prob p-value -## --------------------------------------------- -## Lower Pr(k <= 109) 0.910518 -## Upper Pr(k >= 109) 0.114623 -## Two Pr(k <= 91 or k >= 109) 0.229247 -## ---------------------------------------------

+## Test Summary +## ---------------------------------------------- +## Tail Prob p-value +## ---------------------------------------------- +## Lower Pr(k <= 109) 0.910518 +## Upper Pr(k >= 109) 0.114623 +## ----------------------------------------------

- Using Calculator

-
# calculator
-infer_binom_calc(32, 16, prob = 0.5)
+Using Calculator +
# calculator
+infer_binom_calc(32, 16, prob = 0.5)
##             Binomial Test              
 ##  --------------------------------------
 ##   Group    N     Obs. Prop    Exp. Prop 
@@ -511,22 +510,21 @@ 

## -------------------------------------------- ## Tail Prob p-value ## -------------------------------------------- -## Lower Pr(k <= 16) 0.569975 -## Upper Pr(k >= 16) 0.569975 -## Two Pr(k <= 15 or k >= 16) 1 +## Lower Pr(k <= 16) 0.569975 +## Upper Pr(k >= 16) 0.569975 ## --------------------------------------------

- ANOVA

+ANOVA

The one-way analysis of variance (ANOVA) is used to determine whether there are any statistically significant differences between the means of two or more independent (unrelated) groups. It tests the null hypothesis that samples in two or more groups are drawn from populations with the same mean values. It cannot tell you which specific groups were statistically significantly different from each other but only that at least two groups were different and can be used only for numerical data.

- Examples

+Examples

Using the hsb data, test whether the mean of write differs between the three program types.

-
infer_oneway_anova(hsb, 'write', 'prog')
+
infer_oneway_anova(hsb, 'write', 'prog')
##                                 ANOVA                                  
 ## ----------------------------------------------------------------------
 ##                    Sum of                                             
@@ -552,15 +550,15 @@ 

- Chi Square Goodness of Fit Test

+Chi Square Goodness of Fit Test

A chi-square goodness of fit test allows us to compare the observed sample distribution with expected probability distribution. It tests whether the observed proportions for a categorical variable differ from hypothesized proportions. The proportion of cases expected in each group of categorical variable may be equal or unequal. It can be applied to any univariate distribution for which you can calculate the cumulative distribution function. It is applied to binned data and the value of the chi square test depends on how the data is binned. For the chi square approximation to be valid, the sample size must be sufficiently large.

- Example

+Example

Using the hsb data, test whether the observed proportions for race differs significantly from the hypothesized proportions.

-
# basic example
-race <- as.factor(hsb$race)
-infer_chisq_gof_test(race, c(20, 20, 20 , 140))
+
##     Test Statistics     
 ## -----------------------
 ## Chi-Square       5.0286 
@@ -579,10 +577,10 @@ 

## -----------------------------------------------------------------

- Continuity Correction

-
# using continuity correction
-race <- as.factor(hsb$race)
-infer_chisq_gof_test(race, c(20, 20, 20 , 140), correct = TRUE)
+Continuity Correction +
##     Test Statistics     
 ## -----------------------
 ## Chi-Square       4.3821 
@@ -604,13 +602,13 @@ 

- Chi Square Test of Independence

+Chi Square Test of Independence

A chi-square test is used when you want to test if there is a significant relationship between two nominal (categorical) variables.

- Examples

+Examples

Using the hsb data, test if there is a relationship between the type of school attended (schtyp) and students’ gender (female).

-
infer_chisq_assoc_test(as.factor(hsb$female), as.factor(hsb$schtyp))
+
infer_chisq_assoc_test(as.factor(hsb$female), as.factor(hsb$schtyp))
##                Chi Square Statistics                 
 ## 
 ## Statistics                     DF    Value      Prob 
@@ -624,7 +622,7 @@ 

## Cramer's V 0.0153 ## ----------------------------------------------------

Using the hsb data, test if there is a relationship between the type of school attended (schtyp) and students’ socio economic status (ses).

-
infer_chisq_assoc_test(as.factor(hsb$schtyp), as.factor(hsb$ses))
+
infer_chisq_assoc_test(as.factor(hsb$schtyp), as.factor(hsb$ses))
##                Chi Square Statistics                 
 ## 
 ## Statistics                     DF    Value      Prob 
@@ -639,17 +637,17 @@ 

- Levene’s Test

+Levene’s Test

Levene’s test is used to determine if k samples have equal variances. It is less sensitive to departures from normality and is an alternative to Bartlett’s test. This test returns Levene’s robust test statistic and the two statistics proposed by Brown and Forsythe that replace the mean in Levene’s formula with alternative location estimators. The first alternative replaces the mean with the median and the second alternative replaces the mean with the 10% trimmed mean.

- Examples

+Examples

- Use Grouping Variable

+Use Grouping Variable

Using the hsb data, test whether variance in reading score is same across race.

-
# Using Grouping Variable
-infer_levene_test(hsb$read, group_var = hsb$race)
+
# Using Grouping Variable
+infer_levene_test(hsb$read, group_var = hsb$race)
##            Summary Statistics             
 ## Levels    Frequency    Mean     Std. Dev  
 ## -----------------------------------------
@@ -672,10 +670,10 @@ 

- Using Variables

+Using Variables

Using the hsb data, test whether variance is equal for reading, writing and social studies scores.

-
# Using Variables
-infer_levene_test(hsb$read, hsb$write, hsb$socst)
+
# Using Variables
+infer_levene_test(hsb$read, hsb$write, hsb$socst)
##            Summary Statistics             
 ## Levels    Frequency    Mean     Std. Dev  
 ## -----------------------------------------
@@ -697,11 +695,11 @@ 

- Use Simple Linear Model

+Use Simple Linear Model

Using the hsb data, test whether variance in reading score is same for male and female students.

-
# Using Linear Regression Model
-m <- lm(read ~ female, data = hsb)
-infer_levene_test(m)
+
##            Summary Statistics             
 ## Levels    Frequency    Mean     Std. Dev  
 ## -----------------------------------------
@@ -722,10 +720,10 @@ 

- Using Formula

+Using Formula

Using the hsb data, test whether variance in reading score is same across school types.

-
# Using Formula
-infer_levene_test(as.formula(paste0('read ~ schtyp')), hsb)
+
# Using Formula
+infer_levene_test(as.formula(paste0('read ~ schtyp')), hsb)
##            Summary Statistics             
 ## Levels    Frequency    Mean     Std. Dev  
 ## -----------------------------------------
@@ -748,13 +746,13 @@ 

- Cochran’s Q Test

+Cochran’s Q Test

Cochran’s Q test is an extension to the McNemar test for related samples that provides a method for testing for differences between three or more matched sets of frequencies or proportions. It is a procedure for testing if the proportions of 3 or more dichotomous variables are equal in some population. These outcome variables have been measured on the same people or other statistical units.

- Example

+Example

The exam data set contains scores of 15 students for three exams (exam1, exam2, exam3). Test if three exams are equally difficult.

- +
##    Test Statistics     
 ## ----------------------
 ## N                   15 
@@ -766,7 +764,7 @@ 

- McNemar Test

+McNemar Test

McNemar test is a non parametric test created by Quinn McNemar and first published in Psychometrika in 1947. It is similar to a paired t test but applied to a dichotomous dependent variable. It is used to test if a statistically significant change in proportions have occurred on a dichotomous trait at two time points on the same population. It can be used to answer whether:

  • two products are equally appealing?
  • @@ -775,11 +773,11 @@

- Examples

+Examples

Using the hsb data, test if the proportion of students in himath and hiread group is equal.

-
himath <- ifelse(hsb$math > 60, 1, 0)
-hiread <- ifelse(hsb$read > 60, 1, 0)
-infer_mcnemar_test(table(himath, hiread))
+
##            Controls 
 ## ---------------------------------
 ## Cases       0       1       Total 
@@ -814,7 +812,7 @@ 

## odds ratio 1.1667 ## ----------------------

Perform the above test using matrix as input.

-
infer_mcnemar_test(matrix(c(135, 18, 21, 26), nrow = 2))
+
infer_mcnemar_test(matrix(c(135, 18, 21, 26), nrow = 2))
##            Controls 
 ## ---------------------------------
 ## Cases       0       1       Total 
@@ -852,17 +850,17 @@ 

- Runs Test for Randomness

+Runs Test for Randomness

Runs Test can be used to decide if a data set is from a random process. It tests whether observations of a sequence are serially independent i.e. whether they occur in a random order by counting how many runs there are above and below a threshold. A run is defined as a series of increasing values or a series of decreasing values. The number of increasing, or decreasing, values is the length of the run. By default, the median is used as the threshold. A small number of runs indicates positive serial correlation; a large number indicates negative serial correlation.

- Examples

+Examples

We will use runs test to check regression residuals for serial correlation.

-
# linear regression
-reg <- lm(mpg ~ disp, data = mtcars)
-
-# basic example
-infer_runs_test(residuals(reg))
+
## Runs Test
 ##  Total Cases:  32 
 ##  Test Value :  -0.9630856 
@@ -873,8 +871,8 @@ 

## Variance (Runs): 7.741935 ## z Statistic: -2.156386 ## p-value: 0.03105355

-
# drop values equal to threshold
-infer_runs_test(residuals(reg), drop = TRUE)
+
## Runs Test
 ##  Total Cases:  32 
 ##  Test Value :  -0.9630856 
@@ -885,8 +883,8 @@ 

## Variance (Runs): 7.741935 ## z Statistic: -2.156386 ## p-value: 0.03105355

-
# recode data in binary format
-infer_runs_test(residuals(reg), split = TRUE)
+
# recode data in binary format
+infer_runs_test(residuals(reg), split = TRUE)
## Runs Test
 ##  Total Cases:  32 
 ##  Test Value :  -0.9630856 
@@ -897,8 +895,8 @@ 

## Variance (Runs): 7.741935 ## z Statistic: -2.156386 ## p-value: 0.03105355

-
# use mean as threshold
-infer_runs_test(residuals(reg), mean = TRUE)
+
# use mean as threshold
+infer_runs_test(residuals(reg), mean = TRUE)
## Runs Test
 ##  Total Cases:  32 
 ##  Test Value :  -1.12757e-16 
@@ -909,8 +907,8 @@ 

## Variance (Runs): 7.189642 ## z Statistic: -2.027896 ## p-value: 0.04257089

-
# threshold to be used for counting runs
-infer_runs_test(residuals(reg), threshold = 0)
+
## Runs Test
 ##  Total Cases:  32 
 ##  Test Value :  0 
@@ -925,7 +923,7 @@ 

- Credits

+Credits

The examples and the data set used in the vignette are borrowed from the below listed sources:

  • What statistical analysis should I use? UCLA: Statistical Consulting Group. from http://www.ats.ucla.edu/stat/mult_pkg/whatstat/ @@ -937,7 +935,8 @@