Skip to content

Commit 8e8dc2f

Browse files
authored
New demos for OML4R (from AskTOM Session from October 28, 2020) (#131)
* New OML4Py Notebook demos and updates to OML4SQL Notebooks New OML4Py Notebook demos and updates to OML4SQL Notebooks * New Demos for OML4R New Demos for Oracle Machine Learning for R
1 parent cf81bbf commit 8e8dc2f

File tree

3 files changed

+281
-1
lines changed

3 files changed

+281
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
#########################################################################
2+
##
3+
## Oracle Machine Learning for R Tour
4+
##
5+
## Copyright (c) 2020 Oracle Corporation
6+
##
7+
## The Universal Permissive License (UPL), Version 1.0
8+
##
9+
## https://oss.oracle.com/licenses/upl/
10+
##
11+
###########################################################################
12+
# In this Tour, we explore the different features of OML4R
13+
14+
rm(list=ls())
15+
16+
#-----------------------
17+
# TRANSPARENCY LAYER
18+
#-----------------------
19+
20+
library(ORE)
21+
options(ore.warn.order=FALSE)
22+
ore.connect(user="rquser",
23+
conn_string="ORCLPDB",
24+
host="localhost",
25+
password="rquser",
26+
all=TRUE)
27+
28+
#-- What tables are in the database schema we connected to?
29+
30+
ore.ls()
31+
32+
class(NARROW)
33+
colnames(NARROW)
34+
dim(NARROW)
35+
36+
summary(NARROW[,1:5])
37+
38+
#-- retrieve data from the database
39+
40+
narrow <- ore.pull(NARROW)
41+
class(narrow)
42+
43+
str(narrow) # data.frame
44+
45+
str(NARROW) # ore.frame proxy object
46+
47+
NARROW@dataQry # underlying data query for proxy object
48+
49+
#-- Column selection using standard R syntax
50+
51+
colnames(ONTIME_S)
52+
dim(ONTIME_S)
53+
54+
df <- ONTIME_S[,c("YEAR","DEST","ARRDELAY")]
55+
class(df) # an ore.frame proxy object
56+
dim(df)
57+
58+
head(df)
59+
head(ONTIME_S[,c(1,4,23)]) # project columns using column indexes
60+
head(ONTIME_S[,-(5:26)]) # exlcude columns using column indexes
61+
62+
#-- Row and column filtering
63+
64+
df1 <- df[df$ARRDELAY>20 | df$DEST=="BOS",1:3]
65+
head(df1,6)
66+
67+
#-- Aggregation
68+
69+
# How many flights per destination?
70+
71+
aggdata <- aggregate(ONTIME_S$DEST,
72+
by = list(ONTIME_S$DEST),
73+
FUN = length)
74+
names(aggdata) <- c("Destination","FlightCnt")
75+
class(aggdata)
76+
head(aggdata)
77+
78+
79+
#-- Overloaded dplyr using OREdplyr on ore.frames
80+
81+
library(OREdplyr) # load OREdplyr explicitly to use
82+
83+
select(ONTIME_S, YEAR, DEST, ARRDELAY, DEPDELAY) %>% head() # select columns
84+
85+
colnames(ONTIME_S)
86+
res <- select(ONTIME_S, -CANCELLED,-CANCELLATIONCODE, -DIVERTED) %>% head() # exclude columns
87+
colnames(res)
88+
89+
dim(ONTIME_S)
90+
filter(ONTIME_S, MONTH == 1, DAYOFMONTH == 1) %>% dim() # filter rows
91+
filter(ONTIME_S, DEPDELAY > 240) %>% dim()
92+
93+
# Group mean arrival delay by airline
94+
tbl_avg <- ONTIME_S %>%
95+
group_by(UNIQUECARRIER) %>%
96+
summarise(avgArrDelay = round(mean(ARRDELAY, na.rm = TRUE), digits=2)) %>%
97+
arrange(.$avgArrDelay)
98+
head(tbl_avg,10)
99+
tail(tbl_avg)
100+
101+
#-- Join / merge data
102+
103+
df1 <- data.frame(x1=1:5, y1=letters[1:5]) # create two data.frames
104+
df2 <- data.frame(x2=5:1, y2=letters[11:15])
105+
merge (df1, df2, by.x="x1", by.y="x2") # merge the data.frames
106+
107+
ore.drop(table="TEST_DF1")
108+
ore.drop(table="TEST_DF2")
109+
ore.create(df1, table="TEST_DF1") # create tables from the same data.frames
110+
ore.create(df2, table="TEST_DF2")
111+
merge (TEST_DF1, TEST_DF2, by.x="x1", by.y="x2") # merge the ore.frames
112+
113+
# using OREdplyr
114+
res <- TEST_DF1 %>% left_join(TEST_DF2, by=c("x1"="x2"))
115+
res
116+
117+
#-- Overloaded graphics functions
118+
119+
# Generate boxplot of airline flight delay by day of week
120+
delay <- ONTIME_S$ARRDELAY
121+
dayofweek <- ONTIME_S$DAYOFWEEK
122+
bd <- split(delay, dayofweek)
123+
boxplot(bd, notch = TRUE, col = "red", cex = 0.5, # statistics computed in-database
124+
outline = FALSE, axes = FALSE,
125+
main = "Airline Flight Delay by Day of Week",
126+
ylab = "Delay (minutes)", xlab = "Day of Week")
127+
axis(1, at=1:7, labels=c("Monday", "Tuesday", "Wednesday", "Thursday",
128+
"Friday", "Saturday", "Sunday"))
129+
axis(2)
130+
131+
#---------------------------------------------
132+
# Machine Learning
133+
#---------------------------------------------
134+
135+
#-- Classification using ore.odmNB
136+
137+
data(titanic3,package="PASWR")
138+
139+
t3 <- ore.push(titanic3) # create ore.frame proxy object as temporary table
140+
class(t3)
141+
142+
t3$survived <- ifelse(t3$survived == 1, "Yes", "No") # recoding
143+
144+
n.rows <- nrow(t3)
145+
set.seed(seed=6218945)
146+
random.sample <- sample(1:n.rows, ceiling(n.rows/2)) # generate sample indexes
147+
148+
t3.train <- t3[random.sample,] # train/test sampling using row indexing
149+
t3.test <- t3[setdiff(1:n.rows,random.sample),]
150+
151+
class(t3.train) # ore.frame proxy object
152+
153+
priors <- c(0.4, 0.6)
154+
names(priors) <- c("Yes", "No")
155+
156+
nb <- ore.odmNB(survived ~ pclass+sex+age+fare+embarked, t3.train, class.priors=priors)
157+
158+
nb.res <- predict (nb, t3.test,"survived")
159+
160+
head(nb.res,10)
161+
162+
# Compute the confusion matrix in-database
163+
with(nb.res, table(survived,PREDICTION, dnn = c("Actual","Predicted")))
164+
165+
#-----------------------
166+
# EMBEDDED R EXECUTION
167+
#-----------------------
168+
169+
# Random Red Dots
170+
171+
RandomRedDots <- function(numDots=100){
172+
id <- 1:10
173+
print(plot( 1:numDots, rnorm(numDots), pch = 21,
174+
bg = "red", cex = 2 ))
175+
data.frame(id=id, val=id / 100)
176+
}
177+
178+
RandomRedDots(100)
179+
180+
dev.off()
181+
res <- NULL
182+
res <- ore.doEval(RandomRedDots, numDots=200)
183+
res
184+
185+
ore.scriptDrop("RandomRedDots")
186+
ore.scriptCreate("RandomRedDots",RandomRedDots)
187+
dev.off()
188+
ore.doEval(FUN.NAME="RandomRedDots")
189+
190+
191+
#-- Go to SQL Developer in script '~/OML4R/OML4R Vignettes.sql' and invoke function from SQL
192+
193+
194+
#-- Group Apply
195+
196+
# Build one linear model per destination to predict arrival delay
197+
198+
ONTIME_S$DEST <- substr(as.character(ONTIME_S$DEST),1,3)
199+
DAT <- ONTIME_S[ONTIME_S$DEST %in% c("BOS","SFO","LAX","ORD","ATL","PHX","DEN"),]
200+
dim(DAT)
201+
202+
modList <- ore.groupApply(X=DAT,
203+
INDEX=DAT$DEST,
204+
function(dat) {
205+
lm(ARRDELAY ~ DISTANCE + DEPDELAY, dat)
206+
})
207+
length(modList)
208+
summary(modList$BOS) # return model for BOS
209+
summary(modList$SFO) # return model for SFO
210+
211+
# housekeeping
212+
213+
rm(list=ls())
214+
dev.off()
215+
216+
ore.drop(table="TEST_DF1")
217+
ore.drop(table="TEST_DF2")
218+
219+
ore.scriptDrop("RandomRedDots")
220+
221+
ore.disconnect()
222+
223+
################################################
224+
## End of Script
225+
################################################
226+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
--###########################################################################
2+
--##
3+
--## Oracle Machine Learning for R
4+
--##
5+
--## Demo Script for OML4R Tour
6+
--##
7+
--## Copyright (c) 2020 Oracle Corporation
8+
--##
9+
--## The Universal Permissive License (UPL), Version 1.0
10+
--##
11+
--## https://oss.oracle.com/licenses/upl/
12+
--##
13+
--###########################################################################
14+
15+
--# OML4R Tour
16+
17+
-- Random Red Dots
18+
-- Generate images of random red dots and a simple data.frame
19+
begin
20+
sys.rqScriptDrop('RandomRedDots2');
21+
sys.rqScriptCreate('RandomRedDots2',
22+
'function(num_dots_1=100, num_dots_2=10){
23+
id <- 1:10
24+
plot( 1:num_dots_1, rnorm(num_dots_1), pch = 21,
25+
bg = "red", cex = 2, main="Random Red Dots" )
26+
plot( 1:num_dots_2, rnorm(num_dots_2), pch = 21,
27+
bg = "red", cex = 2, main="Random Red Dots" )
28+
data.frame(id=id, val=id / 100)
29+
}');
30+
end;
31+
/
32+
33+
-- Return image only as PNG BLOB, one per image per row
34+
-- Structured content not returned with PNG option
35+
select ID, IMAGE
36+
from table(rqEval( NULL,'PNG','RandomRedDots2'));
37+
38+
-- Return structured data only by specifying table definition
39+
select id, val
40+
from table(rqEval( NULL,'select 1 id, 1 val from dual','RandomRedDots2'));
41+
42+
-- Return structured and image content within XML string
43+
select dbms_lob.substr( value, 4000, 1 )
44+
from table(rqEval(NULL, 'XML', 'RandomRedDots2'));
45+
46+
-- Pass arguments to change number of dots
47+
select ID, IMAGE
48+
from table(rqEval(cursor(select 500 "num_dots_1", 800 "num_dots_2" from dual),
49+
'PNG', 'RandomRedDots2'));
50+
51+
--#####################################################
52+
--## End of Script
53+
--#####################################################
54+

machine-learning/r/oml4r/oml4r-vignette-04-clustering.r

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ tail(predict(km.mod1,X,type=c("class","raw"),supplemental.cols=c("x","y")),3) #
9393
tail(predict(km.mod1,X,type="raw",supplemental.cols=c("x","y")),3) # ask for only raw probabilities with supp data
9494

9595
###################################################
96-
# Use Oracle Orthoginal Partitioning Clustering
96+
# Use Oracle Orthogonal Partitioning Clustering
9797
# density-based algorithm
9898
###################################################
9999

0 commit comments

Comments
 (0)