diff --git a/screenshoot.png b/screenshoot.png new file mode 100644 index 0000000..75ed3ef Binary files /dev/null and b/screenshoot.png differ diff --git a/sql-project.Rmd b/sql-project.Rmd index 99a7974..d5d4dcf 100644 --- a/sql-project.Rmd +++ b/sql-project.Rmd @@ -1,6 +1,6 @@ --- title: "sql-workshop" -author: "Charles Lang" +author: "Xuechun Li" output: html_document --- @@ -16,7 +16,7 @@ library(RMySQL) db_user <- 'admin' db_password <- 'testsql!' db_name <- 'oudb' -db_host <- 'PASTE YOUR ENDPOINT HERE' +db_host <- 'database-1.cljbhi3mo9lt.us-east-2.rds.amazonaws.com' db_port <- 3306 mydb <- dbConnect(MySQL(), user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port) @@ -55,6 +55,15 @@ dbReadTable(mydb, 'studentInfo') #EXERCISE 1 #Make two toy data sets with at least three variables and at least 30 rows each in them. Have a mix of numeric and character variables. Transfer these dataframes to your SQL database using the DBI commands. Name the tables whatever you like. +# install.packages("dplyr") +library(dplyr) +D1 <- select(studentInfo,"id_student","gender","highest_education","studied_credits") +D2 <- select(studentInfo,"id_student","region","imd_band","disability") + +dbWriteTable(mydb, "Table1", D1, overwrite = TRUE) +dbWriteTable(mydb,"Table2", D2, overwrite = TRUE) +dbReadTable(mydb, 'Table1') +dbReadTable(mydb, 'Table2') ``` ## Getting into SQL - READING @@ -83,8 +92,14 @@ dbGetQuery(mydb, "SELECT COUNT(*) FROM studentAssessment WHERE score > 50 AND id #EXERCISE 2 #Read one of your toy data tables, make sure the output is ordered in descending order, you rename one of the variables and the output is limited to the first 20 rows. +dbGetQuery(mydb, "SELECT id_student AS 'Student ID', + gender, highest_education,studied_credits FROM Table1 ORDER BY studied_credits + DESC LIMIT 20;") + #Read the other table according to a condition of one of the variables. +dbGetQuery(mydb, "SELECT * FROM Table2 WHERE region = 'North Western Region' ;") + ``` ## Getting into SQL - UPDATING @@ -123,7 +138,17 @@ dbGetQuery(mydb, "SELECT * FROM studentAssessment ORDER BY id_student LIMIT 10;" #EXERCISE 3 #Insert a new row in one of your toy data tables leaving one variable empty. Change one value in your other table. Display your new tables. Delete the row you edited and the row you inserted. +dbGetQuery(mydb, "INSERT INTO Table1 (id_student, gender, highest_education, + studied_credits) VALUES ('13579', 'F', 'Lower Than A Level','700');") +dbGetQuery(mydb, "SELECT * FROM Table1 ORDER BY studied_credits DESC LIMIT 10;") +dbGetQuery(mydb, "ALTER TABLE Table1 DROP COLUMN row_names;") +dbReadTable(mydb, 'Table1') +dbGetQuery(mydb, "UPDATE Table2 SET disability = 'S' WHERE id_student = 11391;") +dbReadTable(mydb, 'Table2') + +dbGetQuery(mydb, "DELETE FROM Table1 WHERE studied_credits = '700';") +dbGetQuery(mydb, "DELETE FROM Table2 WHERE disability = 'S';") ``` ## Add/Deleting Table @@ -159,6 +184,19 @@ dbGetQuery(mydb, "DROP TABLE IF EXISTS test;") #No error since it is only if it #EXERCISE 4 #Create a table that is exactly the same as your first toy data table but this time use SQL commands. Display your new table. Then delete the original table. +dbGetQuery(mydb,"CREATE TABLE new_table1 ( + id_student INTEGER, + gender TEXT, + highest_education TEXT, + studied_credits INTEGER + );") +dbListTables(mydb) + +dbGetQuery(mydb,"INSERT INTO new_table1 (id_student, gender, highest_education,studied_credits) SELECT id_student, gender, highest_education,studied_credits FROM Table1;") + +dbReadTable(mydb, 'new_table1') +dbGetQuery(mydb, "DROP TABLE Table1;") +dbListTables(mydb) ``` # NULL Value @@ -212,6 +250,21 @@ dbGetQuery(mydb,"INSERT INTO test2 (score, student) VALUES (NULL, 'A');") #EXERCISE 5 #Recreate one of your toy data tables with the constraint that for one of the integer variablesthe default value will be zero. Test your table by inserting some empty values. Display your new tables. Then delete your table. +dbGetQuery(mydb,"CREATE TABLE new_table2 ( + id_student INTEGER DEFAULT 0, + gender TEXT, + highest_education TEXT, + studied_credits INTEGER + );") + +dbGetQuery(mydb,"INSERT INTO new_table2 (id_student, gender, highest_education, studied_credits) VALUES ('1', 'M', 'A Level or Equivalent','300');") + +dbGetQuery(mydb,"INSERT INTO new_table2 (id_student, gender, highest_education, studied_credits) VALUES ('12345', 'F', 'HE Qualification','240');") + + +dbReadTable(mydb, 'new_table2') +dbGetQuery(mydb, "DROP TABLE new_table2;") +dbListTables(mydb) ``` @@ -227,6 +280,10 @@ dbGetQuery(mydb, "ALTER TABLE studentAssessment DROP COLUMN email;") #EXERCISE 6 #Add a column to one of your toy data tables with a default value of 3. Display your new table. Delete this column. + +dbGetQuery(mydb, "ALTER TABLE Table2 ADD years INTEGER DEFAULT 3 ") +dbReadTable(mydb, 'Table2') +dbGetQuery(mydb, "ALTER TABLE Table2 DROP COLUMN years;") ``` @@ -247,7 +304,26 @@ dbGetQuery(mydb, "DROP TABLE IF EXISTS test3;") #EXERCISE 7 #Create a new table with four variables and a primary key that is a sequential id value. +dbGetQuery(mydb,"CREATE TABLE Table3 ( + id_student INTEGER AUTO_INCREMENT PRIMARY KEY, + name TEXT, + score INTEGER, + age INTEGER + );") +dbGetQuery(mydb,"INSERT INTO Table3 ( + id_student, + name, + score, + age) VALUES (00001, 'Grace','95','18');") + +dbGetQuery(mydb,"INSERT INTO Table3 ( + id_student, + name, + score, + age) VALUES (00002, 'Veronica','90','20');") + + dbGetQuery(mydb, "SELECT * FROM Table3;") ``` ## Filtering (WHERE) @@ -276,8 +352,11 @@ dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE regio dbGetQuery(mydb, "SELECT id_student, gender, region FROM studentInfo WHERE region IN ('Wales','Ireland');") #EXERCISE 8 + #Query one of your original toy data tables, for two different conditions. +dbReadTable(mydb, 'Table2') +dbGetQuery(mydb, "SELECT region, disability FROM Table2 WHERE region IN ('Wales','Scotland') AND disability = 'Y' ;") ``` ## Removing Duplicates @@ -289,6 +368,11 @@ dbGetQuery(mydb, "SELECT DISTINCT region, gender FROM studentInfo;") #EXERCISE 9 #Insert a duplicate row into one of your toy data tables. Then query the table without including duplicates. +dbReadTable(mydb, 'Table2') +dbGetQuery(mydb,"INSERT INTO Table2 (id_student, region, imd_band, disability) VALUES ('28400', 'Scotland', '20-30%','N');") + +dbGetQuery(mydb, "SELECT DISTINCT id_student, region, imd_band, disability FROM Table2;") + ``` ## Conditional Expressions (non-standard) @@ -360,6 +444,26 @@ dbGetQuery(mydb, "SELECT * FROM left_table #EXERCISE 10 # Create a common id variable in your two toy data tables. Then join those tables so that your query returns all the values from one table and only those that match from the other. +dbGetQuery(mydb, "SELECT * FROM new_table1;") +dbGetQuery(mydb, "SELECT * FROM Table2;") + +dbGetQuery(mydb,"SELECT l.id_student AS new_table1, r.id_student AS Table2 + FROM new_table1 AS l + JOIN Table2 AS r ON l.id_student = r.id_student") +dbGetQuery(mydb,"SELECT l.id_student AS new_table1, r.id_student AS Table2 + FROM new_table1 AS l + RIGHT JOIN Table2 AS r ON l.id_student = r.id_student") +dbGetQuery(mydb,"SELECT l.id_student AS new_table1, r.id_student AS Table2 + FROM new_table1 AS l + LEFT JOIN Table2 AS r ON l.id_student = r.id_student") + +dbGetQuery(mydb, "ALTER TABLE Table2 DROP COLUMN row_names;") +dbReadTable(mydb, 'new_table1') +dbReadTable(mydb, 'Table2') + +dbGetQuery(mydb, "SELECT * FROM new_table1 + UNION + SELECT * FROM Table2;") ``` ```{r} #Now disconnect from your database