Linear-Regression-Class/Week2-Inference (1).R at master · Ali93NY/Linear-Regression-Class · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Simple Linear Regression Inference

# Example from Lecture 1

x = 1:5
y = c(1, 1, 2, 2, 4)

reg = lm(y~x)
summary(reg)

# TasK: Test whether or not there is a linear association between
# advertising expenditures and sales using alpha = 0.05

# State the hypotheses:
# Ho: beta1 = 0 vs. Ha: beta1 not equal 0

# Note that summary provides the SE, test statistics and p-values
summary(reg)$coefficients

# To obtain just the SE of b1:
(SEb1 = summary(reg)$coefficients[2,2])


# How do we obtain the critical value from the t-table?
qt(0.975,3)
# Note we had to provide the left area corresponding to right area alpha/2
# df = n - 2 = 5 - 2 = 3

# State the decision rule:
# Reject H0 if |t_test| > crit.value

# Conclusion: Since 3.66 > 3.18 we reject Ho and conclude beta1 is significant, that is not equal 0
# Report p-value
summary(reg)$coefficients[2,4]

# We could have reached the same conclusion using the p-value:
# Since 0.035 < 0.05 we reject Ho

# Exercise: repeat the above for alpha = 0.01
alpha = 0.01
qt(1-alpha/2,3)

# Conclusion: Since |3.656| is not greater than 5.84
# we don't reject Ho


# Task: Test Ha: beta1 > 0
# p-value is
0.0354/2
# [1] 0.0177
# Or
summary(reg)$coefficients[2,4]/2

# Conclusion: Since 0.0177 < 0.05 we reject Ho and conclude beta1 > 0

# TasK: Obtain 95% CI for beta1 and interpret it

# Manually from the output:

lower = reg$coeff[2] - qt(0.975,3)*SEb1
lower
upper = reg$coeff[2] + qt(0.975,3)*SEb1
upper

# The 95% CI for beta1 is (0.09, 1.31)

# Interpretation: we are 95% confident that for each extra $100 in advertising
# sales increase by as little as $90 to as much as $1310

# Using built-in funcion
confint(reg)

# Or just for beta 1
confint(reg)[2,]


# Correlation (manual)
x.bar = mean(x)
y.bar = mean(y)
SSxy = sum((x-x.bar)*(y-y.bar))
SSxx = sum((x-x.bar)^2)
SSyy = sum((y-y.bar)^2)

(r = SSxy/sqrt(SSxx*SSyy))
# Note: Strong positive linear relationship

# Easier:
cor(x,y)

# Test the significance of the correlation
# and CI for population correlation coefficient rho:
cor.test(x,y)
# Note it is equivalent to the regression slope test (Same p-value and test statistic)
# 95% CI for rho is (0.11, 0.99)

# R-squared
r^2

# Or from the regression output:
summary(reg)$r.squared

# Predict y when x = 4
-0.1 + 0.7*4


newdata =data.frame(x=4)
# 95% CI
predict(reg, newdata, interval = "conf", level=0.95)
#fit      lwr      upr
#1 2.7 1.919478 3.480522

# 95% PI
predict(reg, newdata, interval = "predict", level=0.95)

# ANOVA
anova(reg)

## Fire damage example

data = read.table(file.choose(), header=T)
attach(data)
reg = lm(Damage ~ Distance)
summary(reg)

# Test if damage increases when distance increases

# Ho: beta1 = 0 vs. Ha: beta1 > 0
summary(reg)$coef[2,4]/2
# p-value = 6.239e-09
# Conclusion: Since p-value < 0.05 we reject Ho
# That is, fire damage increases as distance increases

# Confindence intervals for the coefficients
confint(reg)

plot(Distance, Damage)
abline(reg)

newdata = data.frame(Distance = 3.5)
predict(reg, newdata, interval = "confidence", level = 0.95)


predict(reg, newdata, interval = "predict", level = 0.95)