diff --git a/Assignment7.Rmd b/Assignment7.Rmd
index 105cbdf..984b710 100644
--- a/Assignment7.Rmd
+++ b/Assignment7.Rmd
@@ -1,7 +1,7 @@
 ---
 title: "Assignment 7 - Answers"
-author: "Charles Lang"
-date: "11/30/2016"
+author: "Shu-Yi Hsu"
+date: "12/17/2016"
 output: html_document
 ---
 
@@ -11,42 +11,88 @@ In the following assignment you will be looking at data from an one level of an
 
 #Upload data
 ```{r}
-
+D1 <- read.csv("online.data.csv")
+View(D1)
 ```
 
 #Visualization 
 ```{r}
 #Start by creating histograms of the distributions for all variables (#HINT: look up "facet" in the ggplot documentation)
 
-#Then visualize the relationships between variables
+library(ggplot2)
+library(tidyr)
+
+
+D1$level.up <- ifelse(D1$level.up == 'yes', 1, 0)
+View(D1)
+D2 <- D1[,-1]
+View(D2)
+Dlong <- gather(D2, 'vars', 'value', 1:6)
+Dlong$value <- as.numeric(Dlong$value)
+ggplot(Dlong,aes(value, ..density..)) + geom_histogram(binwidth = .1) + facet_wrap(~vars, scales ='free')
 
+
+#Then visualize the relationships between variables
+cor(D2)
+pairs(D2)
+qplot(x = forum.posts, y=pre.test.score, data =D2, main = "pre-test", margins =TRUE)
+qplot(x = forum.posts, y=post.test.score, data =D2, main = "post-test", margins =TRUE)
+qplot(x = forum.posts, y=av.assignment.score, data =D2, main = "Average assignment", margins =TRUE)
+qplot(x= av.assignment.score, y=post.test.score, data =D2, main = "post-test", margins =TRUE)
 #Try to capture an intution about the data and the relationships
 
 ```
 #Classification tree
 ```{r}
 #Create a classification tree that predicts whether a student "levels up" in the online course using three variables of your choice (As we did last time, set all controls to their minimums)
-
+install.packages("rpart.plot")
+library(rpart)
+library(rpart.plot)
+tree1<-rpart(level.up~ messages+ forum.posts+pre.test.score, method ="class", data=D1)
+str(D1)
+D1$messages <- as.numeric(D1$messages)
+D1$forum.posts <- as.numeric(D1$forum.posts)
+
+printcp(tree1)
 #Plot and generate a CP table for your tree 
-
+rpart.plot(tree1)
 #Generate a probability value that represents the probability that a student levels up based your classification tree 
 
-D1$pred <- predict(rp, type = "prob")[,2]#Last class we used type = "class" which predicted the classification for us, this time we are using type = "prob" to see the probability that our classififcation is based on.
+D1$pred <- predict(tree1, type = "prob") [,2]
+#Last class we used type = "class" which predicted the classification for us, this time we are using type = "prob" to see the probability that our classififcation is based on.
 ```
 ## Part II
 #Now you can generate the ROC curve for your model. You will need to install the package ROCR to do this.
 ```{r}
 library(ROCR)
-
 #Plot the curve
+
 pred.detail <- prediction(D1$pred, D1$level.up) 
 plot(performance(pred.detail, "tpr", "fpr"))
 abline(0, 1, lty = 2)
 
+pred.detail1 <- prediction(D1$pred1, D1$level.up) 
+plot(performance(pred.detail1, "tpr", "fpr"))
+abline(0, 1, lty = 2)
+
 #Calculate the Area Under the Curve
-unlist(slot(performance(Pred2,"auc"), "y.values"))#Unlist liberates the AUC value from the "performance" object created by ROCR
+unlist(slot(performance(Pred2,"auc"), "y.values"))
+
+unlist(slot(performance(pred.detail1,"auc"), "y.values"))
+
+#Unlist liberates the AUC value from the "performance" object created by ROCR
 
 #Now repeat this process, but using the variables you did not use for the previous model and compare the plots & results of your two models. Which one do you think was the better model? Why?
+tree2 <- rpart(level.up ~ forum.posts + av.assignment.score + pre.test.score, method ='class', control = rpart.control(minsplit =1, minbucket =1, cp =0.001), data =D1 )
+printcp(tree2)
+post(tree2, file = "tree2.ps", title = "Level Up - Tree 2")
+D1$pred2 <- predict(tree2, type = "prob")[,2]
+pred.detail2 <- prediction(D1$pred2, D1$level.up) 
+plot(performance(pred.detail2, "tpr", "fpr"))
+abline(0, 1, lty = 2)
+
+unlist(slot(performance(pred.detail2,"auc"), "y.values")) #0.997
+
 ```
 ## Part III
 #Thresholds
@@ -54,28 +100,30 @@ unlist(slot(performance(Pred2,"auc"), "y.values"))#Unlist liberates the AUC valu
 #Look at the ROC plot for your first model. Based on this plot choose a probability threshold that balances capturing the most correct predictions against false positives. Then generate a new variable in your data set that classifies each student according to your chosen threshold.
 
 threshold.pred1 <- 
-
+D1$threshold.pred1 <- ifelse(D1$pred > 0.1, 1, 0)
 #Now generate three diagnostics:
 
-D1$accuracy.model1 <-
+D1$accuracy.model1 <- (571+398)/(571+29+2+398) #0.969
 
-D1$precision.model1 <- 
+D1$precision.model1 <- 571/(571+2) #0.99
 
-D1$recall.model1 <- 
+D1$recall.model1 <- 571/(571+29) #0.9517
 
 #Finally, calculate Kappa for your model according to:
+table1 <- table(D1$level.up, D1$threshold.pred1)
 
 #First generate the table of comparisons
-table1 <- table(D1$level.up, D1$threshold.pred1)
+matrix1 <- as.matrix(table1)
 
 #Convert to matrix
-matrix1 <- as.matrix(table1)
+kappa(matrix1, exact = TRUE)/kappa(matrix1)
 
 #Calculate kappa
-kappa(matrix1, exact = TRUE)/kappa(matrix1)
+kappa(matrix1, exact = TRUE)/kappa(matrix1) # =1.103
 
 #Now choose a different threshold value and repeat these diagnostics. What conclusions can you draw about your two thresholds?
 
+
 ```
 
 ### To Submit Your Assignment
diff --git a/tree2.ps b/tree2.ps
new file mode 100644
index 0000000..3d80f79
Binary files /dev/null and b/tree2.ps differ