diff --git a/DESCRIPTION b/DESCRIPTION
index a57f69c..e6fb34b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -4,8 +4,8 @@ Title: Interface with Azure Machine Learning Datasets, Experiments and Web Servi
Description: Functions and datasets to support Azure Machine Learning. This
allows you to interact with datasets, as well as publish and consume R functions
as API services.
-Version: 0.2.11
-Date: 2016-07-01
+Version: 0.2.12
+Date: 2016-07-13
Authors@R: c(
person("Andrie", "de Vries", role=c("aut", "cre"), email="adevries@microsoft.com"),
person(family="Microsoft Corporation", role="cph"),
diff --git a/NAMESPACE b/NAMESPACE
index 50f979b..7bc74c0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,4 +1,4 @@
-# Generated by roxygen2 (4.1.1): do not edit by hand
+# Generated by roxygen2: do not edit by hand
S3method(print,Datasets)
S3method(print,Experiments)
diff --git a/R/consume.R b/R/consume.R
index caa3b54..a7f758f 100644
--- a/R/consume.R
+++ b/R/consume.R
@@ -29,6 +29,7 @@
#' @export
#'
#' @inheritParams refresh
+#' @inheritParams publishWebService
#' @param endpoint Either an AzureML web service endpoint returned by \code{\link{publishWebService}}, \code{\link{endpoints}}, or simply an AzureML web service from \code{\link{services}}; in the latter case the default endpoint for the service will be used.
#' @param ... variable number of requests entered as lists in key-value format; optionally a single data frame argument.
#' @param globalParam global parameters entered as a list, default value is an empty list
@@ -43,7 +44,7 @@
#' @family consumption functions
#' @importFrom jsonlite fromJSON
#' @example inst/examples/example_publish.R
-consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "output1", tries = 5)
+consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "output1", .retry = 5)
{
if(is.Service(endpoint))
{
@@ -73,7 +74,7 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu
}
}
# Make API call with parameters
- result <- callAPI(apiKey, requestUrl, requestsLists, globalParam, retryDelay, tries = tries)
+ result <- callAPI(apiKey, requestUrl, requestsLists, globalParam, retryDelay, .retry = .retry)
if(inherits(result, "error")) stop("AzureML returned an error code")
# Access output by converting from JSON into list and indexing into Results
@@ -104,14 +105,14 @@ consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "outpu
# @param keyvalues the data to be passed to the web service
# @param globalParam the global parameters for the web service
# @param retryDelay number of seconds to wait after failing (max 3 tries) to try again
-# @param tries the number of retry attempts
+# @param .retry the number of retry attempts
# @return result the response
#
# @importFrom jsonlite toJSON
# @importFrom curl handle_setheaders new_handle handle_setopt curl_fetch_memory
# @keywords internal
callAPI <- function(apiKey, requestUrl, keyvalues, globalParam,
- retryDelay=10, tries = 5) {
+ retryDelay=10, .retry = 5) {
# Set number of tries and HTTP status to 0
result <- NULL
# Construct request payload
@@ -136,7 +137,7 @@ callAPI <- function(apiKey, requestUrl, keyvalues, globalParam,
postfields = body
)
)
- r <- try_fetch(requestUrl, h, delay = retryDelay, tries = tries)
+ r <- try_fetch(requestUrl, h, no_retry_on = 400, delay = retryDelay, .retry = .retry)
result <- fromJSON(rawToChar(r$content))
if(r$status_code >= 400) {
stop(paste(capture.output(result), collapse="\n"))
diff --git a/R/datasets.R b/R/datasets.R
index df7a2f9..ee73f18 100644
--- a/R/datasets.R
+++ b/R/datasets.R
@@ -268,7 +268,7 @@ delete.datasets <- function(ws, name, host){
curl_escape(ws$id),
curl_escape(familyId)
)
- z <- try_fetch(uri, h, tries = 3, delay = 2)
+ z <- try_fetch(uri, h, .retry = 3, delay = 2)
z$status_code
}
status_code <- vapply(datasets$FamilyId, delete_one, FUN.VALUE = numeric(1), USE.NAMES = FALSE)
diff --git a/R/discover.R b/R/discover.R
index 6bc08b9..b5a733a 100644
--- a/R/discover.R
+++ b/R/discover.R
@@ -86,43 +86,15 @@ discoverSchema <- function(helpURL, scheme = "https",
# Accesses the names of the columns in the example
# and stores it in a list of column names
-# columnNames <- vector("list", length = length(inputExample))
-# columnNames <- list()
-# for(i in seq_along(inputExample)) {
-# columnNames[[i]] = names(inputExample)[i]
-# }
columnNames <- lapply(seq_along(inputExample), function(i)names(inputExample[i]))
- # Uses multiple nested loops to access the various paths in the
- # swagger document and find the execution path
- foundExecPath = FALSE
- pathNo = 0
- execPathNo = -1
- for(execPath in swagger$paths) {
- pathNo = pathNo + 1
- for(operationpath in execPath) {
- for(operation in operationpath) {
- # Goes through the characteristcs in every operation e.g. operationId
- for(charac in operation) {
- # Finds the path in which the
- # operationId (characteristic of the path) == execute
- # and sets the execution path number
- if(charac[1] == "execute")
- {
- #Sets found execution path to true
- foundExecPath = TRUE
- execPathNo = pathNo
- break
- }
- }
- }
- }
+ execPathNo <- grep("/execute\\?", names(swagger$paths))
+ if(is.numeric(execPathNo)) {
+ executePath <- names(swagger$paths)[[execPathNo]]
+ } else {
+ "Path not found"
}
- # Stores the execution path
- executePath <- if(foundExecPath) names(swagger$paths)[[execPathNo]]
- else "Path not found"
-
# Constructs the request URL with the parameters as well as execution path found.
# The separator is set to an empty string
requestUrl <- paste0(scheme,"://", host,
diff --git a/R/fetch.R b/R/fetch.R
index 641c1cc..498dd90 100644
--- a/R/fetch.R
+++ b/R/fetch.R
@@ -67,7 +67,7 @@ validate_response <- function(r){
# @param uri the uri to fetch
# @param handle a curl handle
# @param retry_on HTTP status codes that result in retry
-# @param tries number of tries before failing
+# @param .retry number of tries before failing
# @param delay in seconds between retries, subject to exponent
# @param exponent increment each successive delay by delay^exponent
# @param no_message_threshold Only show messages if delay is greater than this limit
@@ -76,20 +76,26 @@ validate_response <- function(r){
# @return the result of curl_fetch_memory(uri, handle)
#
try_fetch <- function(uri, handle,
- retry_on = c(400, 401, 440, 503, 504, 509),
- tries = 6,
+ retry_on = c(400, 401, 440, 503, 504, 509),
+ no_retry_on,
+ .retry = 6,
delay = 1, exponent = 2,
no_message_threshold = 1)
{
r = curl_fetch_memory(uri, handle)
- # if(r$status_code == 400) validate_response(r)
+ # if(r$status_code == 400){
+ # validate_response(r)
+ # }
+ if(!missing(no_retry_on) && !is.null(no_retry_on)){
+ retry_on <- setdiff(retry_on, no_retry_on)
+ }
if(!(r$status_code %in% retry_on)) {
validate_response(r)
return(r)
}
collisions = 1
printed_message <- FALSE
- while(collisions < (tries)) {
+ while(collisions < (.retry)) {
r = curl_fetch_memory(uri, handle)
if(!(r$status_code %in% retry_on)) {
validate_response(r)
diff --git a/R/internal.R b/R/internal.R
index 60103cd..53c7c81 100644
--- a/R/internal.R
+++ b/R/internal.R
@@ -39,7 +39,7 @@ get_datasets <- function(ws) {
h = new_handle()
handle_setheaders(h, .list = ws$.headers)
uri <- sprintf("%s/workspaces/%s/datasources", ws$.studioapi, ws$id)
- r <- try_fetch(uri = uri, handle = h, delay = 0.25, tries = 3)
+ r <- try_fetch(uri = uri, handle = h, delay = 0.25, .retry = 3)
msg <- paste("No results returned from datasets(ws).",
"Please check your workspace credentials and api_endpoint are correct.")
@@ -89,7 +89,7 @@ get_experiments <- function(ws) {
h = new_handle()
handle_setheaders(h, .list=ws$.headers)
uri = sprintf("%s/workspaces/%s/experiments", ws$.studioapi, ws$id)
- r <- try_fetch(uri = uri, handle = h, delay = 0.25, tries = 3)
+ r <- try_fetch(uri = uri, handle = h, delay = 0.25, .retry = 3)
msg <- paste("No results returned from experiments(ws).",
"Please check your workspace credentials and api_endpoint are correct.")
diff --git a/R/publish.R b/R/publish.R
index 6dd3f33..6d685e0 100644
--- a/R/publish.R
+++ b/R/publish.R
@@ -29,6 +29,7 @@
#' @export
#'
#' @inheritParams refresh
+#' @inheritParams workspace
#' @param fun a function to publish; the function must have at least one argument.
#' @param name name of the new web service; ignored when \code{serviceId} is specified (when updating an existing web service).
#'
@@ -43,6 +44,7 @@
#' @param serviceId optional Azure web service ID; use to update an existing service (see Note below).
#' @param host optional Azure regional host, defaulting to the global \code{management_endpoint} set in \code{\link{workspace}}
#' @param data.frame \code{TRUE} indicates that the function \code{fun} accepts a data frame as input and returns a data frame output; automatically set to \code{TRUE} when \code{inputSchema} is a data frame.
+#' @param .retry number of tries before failing
#'
#' @return A data.frame describing the new service endpoints, cf. \code{\link{endpoints}}. The output can be directly used by the \code{\link{consume}} function.
#'
@@ -83,11 +85,12 @@
#' @importFrom uuid UUIDgenerate
#' @importFrom curl new_handle handle_setheaders handle_setopt
publishWebService <- function(ws, fun, name,
- inputSchema, outputSchema, `data.frame`=FALSE,
- export=character(0),
- noexport=character(0),
+ inputSchema, outputSchema,
+ `data.frame` = FALSE,
+ export = character(0),
+ noexport = character(0),
packages,
- version="3.1.0",
+ version = "3.1.0",
serviceId,
host = ws$.management_endpoint,
.retry = 3)
@@ -130,18 +133,7 @@ publishWebService <- function(ws, fun, name,
}
}
- # inputSchema <- azureSchema(inputSchema)
- # outputSchema <- azureSchema(outputSchema)
- # if(`data.frame`) {
- # if(length(formals(fun)) != 1) {
- # stop("when data.frame=TRUE fun must only take one data.frame argument")
- # }
- # } else {
- # if(length(formals(fun)) != length(inputSchema)) {
- # stop("length(inputSchema) does not match the number of function arguments")
- # }
- # }
-
+
### Get and encode the dependencies
if(missing(packages)) packages=NULL
@@ -191,7 +183,7 @@ publishWebService <- function(ws, fun, name,
)
handle_setheaders(h, .list = httpheader)
handle_setopt(h, .list = opts)
- r = try_fetch(publishURL, handle = h, tries = .retry)
+ r = try_fetch(publishURL, handle = h, .retry = .retry)
result = rawToChar(r$content)
if(r$status_code >= 400) stop(result)
newService = fromJSON(result)
diff --git a/R/services.R b/R/services.R
index 329b7ad..347e83a 100644
--- a/R/services.R
+++ b/R/services.R
@@ -75,7 +75,7 @@ services <- function(ws, service_id, name, host = ws$.management_endpoint)
}
uri <- sprintf("%s/workspaces/%s/webservices%s", host, ws$id, service_id)
- r <- try_fetch(uri = uri, handle = h, delay = 0.25, tries = 3)
+ r <- try_fetch(uri = uri, handle = h, delay = 0.25, .retry = 3)
# if(inherits(r, "error")){
# msg <- paste("No results returned from datasets(ws).",
# "Please check your workspace credentials and api_endpoint are correct.")
diff --git a/R/workspace.R b/R/workspace.R
index dd7ccee..45efc86 100644
--- a/R/workspace.R
+++ b/R/workspace.R
@@ -73,22 +73,51 @@ default_api <- function(api_endpoint = "https://studioapi.azureml.net"){
#' \if{html}{\figure{authorization_token.png}{options: width="60\%" alt="Figure: authorization_token.png"}}
#' \if{latex}{\figure{authorizationToken.pdf}{options: width=7cm}}
#'
-#' @section Using a \code{settings.json} file:
+#'
+#' @section Using a settings.json file:
#' If any of the \code{id}, \code{auth}, \code{api_endpoint} or \code{management_endpoint} arguments are missing, the function attempts to read values from the \code{config} file with JSON format:
#' \preformatted{
#' {"workspace":{
#' "id": "enter your AzureML workspace id here",
#' "authorization_token": "enter your AzureML authorization token here",
#' "api_endpoint": "https://studioapi.azureml.net",
+#' }}
+#' }
+#'
+#' To explicitly add the management endpoint in the JSON file, use:
+#' \preformatted{
+#' {"workspace":{
+#' "id": "enter your AzureML workspace id here",
+#' "authorization_token": "enter your AzureML authorization token here",
+#' "api_endpoint": "https://studioapi.azureml.net",
#' "management_endpoint": "https://management.azureml.net"
#' }}
#' }
+#'
+#' @section Using a workspace in different Azure Machine Learning regions:
+#'
+#' By default, the Azure Machine Learning workspace is located in US South Central, but it is possible to create a workspace in different regions, including Europe West and Asia Southeast.
+#'
+#' To use a workspace in Asia Southeast, you can modify the api endpoint line in the JSON file:
+#' \preformatted{
+#' {"workspace": {
+#' "api_endpoint": ["https://asiasoutheast.studio.azureml.net"]
+#' }}
+#' }
+#'
+#' Similarly, for a workspace in Europe West:
+#' \preformatted{
+#' {"workspace": {
+#' "api_endpoint": ["https://europewest.studio.azureml.net"]
+#' }}
+#' }
+#'
#'
#'
#' @param id Optional workspace id from ML studio -> settings -> WORKSPACE ID. See the section "Finding your AzureML credentials" for more details.
#' @param auth Optional authorization token from ML studio -> settings -> AUTHORIZATION TOKENS. See the section "Finding your AzureML credentials" for more details.
-#' @param api_endpoint Optional AzureML API web service URI. Defaults to \code{https://studioap.azureml.net} if not provided and not specified in config. See note.
+#' @param api_endpoint Optional AzureML API web service URI. Defaults to \code{https://studioapi.azureml.net} if not provided and not specified in config. See note.
#' @param management_endpoint Optional AzureML management web service URI. Defaults to \code{https://management.azureml.net} if not provided and not specified in config. See note.
#' @param config Optional settings file containing id and authorization info. Used if any of the other arguments are missing. The default config file is \code{~/.azureml/settings.json}, but you can change this location by setting \code{options(AzureML.config = "newlocation")}. See the section "Using a settings.json file" for more details.
#' @param ... ignored
diff --git a/R/wrapper.R b/R/wrapper.R
index 3919115..9eb9ed2 100644
--- a/R/wrapper.R
+++ b/R/wrapper.R
@@ -21,47 +21,56 @@
# THE SOFTWARE.
-# `wrapper` is the expression executed in the AzureML R environment. The
-# publishWebService function sets up the environment "exportenv" from which
-# this expression follows.
+# `wrapper` is the expression executed in the AzureML R environment. The publishWebService function sets up the environment "exportenv" from which this expression follows.
-wrapperFunction <- function(){
- inputDF <- maml.mapInputPort(1)
- load('src/env.RData')
- if(!is.null(exportenv$..packages))
- {
- lapply(exportenv$..packages, function(pkg){
- if(!require(pkg, character.only = TRUE, quietly = TRUE))
- install.packages(pkg,
- repos = paste0('file:///', getwd(), '/src/packages'),
- lib = getwd()
- )
- })
- .libPaths(new = getwd())
- lapply(exportenv$..packages, require,
- quietly = TRUE, character.only=TRUE)
- }
- parent.env(exportenv) = globalenv()
-
- attach(exportenv, warn.conflicts = FALSE)
- if(..data.frame){
- outputDF <- as.data.frame(..fun(inputDF))
- colnames(outputDF) <- ..output_names
- } else {
- outputDF <- matrix(nrow = nrow(inputDF),
- ncol = length(..output_names)
- )
- outputDF <- as.data.frame(outputDF)
- names(outputDF) <- ..output_names
- for(i in 1:nrow(inputDF)){
- outputDF[i, ] <- do.call('..fun', inputDF[i, ])
- }
- }
- maml.mapOutputPort("outputDF")
-}
+# Note that exposing wrapperFunction() and wrapper will cause R CMD BUILD failures
+# The workaround is to comment out the wrapper function, and replace it with the text.
+# To update the function, uncomment the following function.
-wrapper <- paste(as.character(body(wrapperFunction)[-1]),
- collapse = "\n")
+
+### --- Do not remove this uncommented code ------------------------------------
+
+# wrapperFunction <- function(){
+# inputDF <- maml.mapInputPort(1)
+# load('src/env.RData')
+# if(!is.null(exportenv$..packages))
+# {
+# lapply(exportenv$..packages, function(pkg){
+# if(!require(pkg, character.only = TRUE, quietly = TRUE))
+# install.packages(pkg,
+# repos = paste0('file:///', getwd(), '/src/packages'),
+# lib = getwd()
+# )
+# })
+# .libPaths(new = getwd())
+# lapply(exportenv$..packages, require,
+# quietly = TRUE, character.only=TRUE)
+# }
+# parent.env(exportenv) = globalenv()
+#
+# attach(exportenv, warn.conflicts = FALSE)
+# if(..data.frame){
+# outputDF <- as.data.frame(..fun(inputDF))
+# colnames(outputDF) <- ..output_names
+# } else {
+# outputDF <- matrix(nrow = nrow(inputDF),
+# ncol = length(..output_names)
+# )
+# outputDF <- as.data.frame(outputDF)
+# names(outputDF) <- ..output_names
+# for(i in 1:nrow(inputDF)){
+# outputDF[i, ] <- do.call('..fun', inputDF[i, ])
+# }
+# }
+# maml.mapOutputPort("outputDF")
+# }
+#
+# wrapper <- paste(as.character(body(wrapperFunction)[-1]),
+# collapse = "\n")
+
+### --- End of Do not remove ---------------------------------------------------
+
+wrapper <- "inputDF <- maml.mapInputPort(1)\nload(\"src/env.RData\")\nif (!is.null(exportenv$..packages)) {\n lapply(exportenv$..packages, function(pkg) {\n if (!require(pkg, character.only = TRUE, quietly = TRUE)) \n install.packages(pkg, repos = paste0(\"file:///\", getwd(), \"/src/packages\"), lib = getwd())\n })\n .libPaths(new = getwd())\n lapply(exportenv$..packages, require, quietly = TRUE, character.only = TRUE)\n}\nparent.env(exportenv) = globalenv()\nattach(exportenv, warn.conflicts = FALSE)\nif (..data.frame) {\n outputDF <- as.data.frame(..fun(inputDF))\n colnames(outputDF) <- ..output_names\n} else {\n outputDF <- matrix(nrow = nrow(inputDF), ncol = length(..output_names))\n outputDF <- as.data.frame(outputDF)\n names(outputDF) <- ..output_names\n for (i in 1:nrow(inputDF)) {\n outputDF[i, ] <- do.call(\"..fun\", inputDF[i, ])\n }\n}\nmaml.mapOutputPort(\"outputDF\")"
@@ -74,8 +83,20 @@ wrapper <- paste(as.character(body(wrapperFunction)[-1]),
# @examples
# foo <- function(dat)head(dat, 10)
# test_wrapper(foo, iris)
-test_wrapper <- function(fun = function(x)head(x, 3), inputDF = iris, `data.frame` = TRUE)
+test_wrapper <- function(fun = function(x)head(x, 3), inputDF, `data.frame` = TRUE)
{
+ if(missing(inputDF) || is.null(inputDF)){
+ # replicate first 5 lines of iris
+ # this is a workaround to pass R CMD check
+ iris <- data.frame(
+ Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4),
+ Sepal.Width = c(3.5, 3, 3.2, 3.1, 3.6, 3.9),
+ Petal.Length = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7),
+ Petal.Width = c(0.2, 0.2, 0.2, 0.2, 0.2, 0.4),
+ Species = factor(rep(1, 6), levels = 1:3, labels = c("setosa", "versicolor", "virginica"))
+ )
+ inputDF <- iris
+ }
exportenv = new.env()
maml.mapInputPort <- function(x) as.data.frame(inputDF)
maml.mapOutputPort <- function(x) get(x)
diff --git a/inst/doc/getting_started.Rmd b/inst/doc/getting_started.Rmd
index d051904..6a8f846 100644
--- a/inst/doc/getting_started.Rmd
+++ b/inst/doc/getting_started.Rmd
@@ -51,7 +51,7 @@ in the package cover the following topics:
## Getting Started
-To get started, please navigate to [AzureML Studio](http://studio.azureml.net)
+To get started, please navigate to [AzureML Studio](https://studio.azureml.net)
and create a free account (not guest) or use your existing AzureML account.
After logging in, under the "Settings" tab, copy and paste your Workspace ID
from the "Name" sub-tab into your R console. From the "Authorization Tokens"
@@ -68,13 +68,13 @@ your AzureML sessions.
## Obtaining AzureML Credentials
Before using the package, it is necessary to first obtain the security
-credentials to your Azure Machine Learning workspace. You can find this be
-logging in at [https://studio.azureml.net](Azure ML web site). If you do not
+credentials to your Azure Machine Learning workspace. You can find this by
+logging in at the [AzureML Studio](https://studio.azureml.net). If you do not
have an account, you can create a free account (not guest) to use these APIs.
Once logged in, you will be brought to the Studio landing page. Using the
left-hand menu, navigate to the 'Settings' tab to find your Workspace ID. Note
-this, or copy it into your R session and store it is a variable, e.g. myWsID.
+this, or copy it into your R session and store it is a variable, e.g. `myWsID`.
diff --git a/inst/doc/getting_started.html b/inst/doc/getting_started.html
index da14d57..4760244 100644
--- a/inst/doc/getting_started.html
+++ b/inst/doc/getting_started.html
@@ -9,20 +9,21 @@
-
+
+
To get started, please navigate to AzureML Studio and create a free account (not guest) or use your existing AzureML account. After logging in, under the “Settings” tab, copy and paste your Workspace ID from the “Name” sub-tab into your R console. From the “Authorization Tokens” sub-tab, copy your Primary Authorization Token into your R console. You will need this information to access all package functionality.
+To get started, please navigate to AzureML Studio and create a free account (not guest) or use your existing AzureML account. After logging in, under the “Settings” tab, copy and paste your Workspace ID from the “Name” sub-tab into your R console. From the “Authorization Tokens” sub-tab, copy your Primary Authorization Token into your R console. You will need this information to access all package functionality.
The package defines a Workspace
class that represents an AzureML work space. Most of the functions in the package refer to a Workspace object directly or indirectly. Use the workspace()
function to create Workspace objects, either by explicitly specifying an AzureML workspace ID and authorization token. Workspace objects are simply R environments that actively cache details about your AzureML sessions.
Before using the package, it is necessary to first obtain the security credentials to your Azure Machine Learning workspace. You can find this be logging in at https://studio.azureml.net. If you do not have an account, you can create a free account (not guest) to use these APIs.
-Once logged in, you will be brought to the Studio landing page. Using the left-hand menu, navigate to the ‘Settings’ tab to find your Workspace ID. Note this, or copy it into your R session and store it is a variable, e.g. myWsID.
+Before using the package, it is necessary to first obtain the security credentials to your Azure Machine Learning workspace. You can find this by logging in at the AzureML Studio. If you do not have an account, you can create a free account (not guest) to use these APIs.
+Once logged in, you will be brought to the Studio landing page. Using the left-hand menu, navigate to the ‘Settings’ tab to find your Workspace ID. Note this, or copy it into your R session and store it is a variable, e.g. myWsID
.
Next, within the ‘Settings’ tab, use the overhead menu to navigate to the ‘Authorization Tokens’ tab and similarly note your Primary Authorization Token.
Use this package to upload and download datasets to and from AzureML, to interrogate experiments, to publish R functions as AzureML web services, and to run R data through existing web services and retrieve the output.
-Install the development version of the package directly from GitHub with:
-# Install devtools
-if(!require("devtools")) install.packages("devtools")
-devtools::install_github("RevolutionAnalytics/azureml")
-The package depends on:
-jsonlite
curl
miniCRAN
base64enc
uuid
Some of the examples use data and functions in:
-lme4
ggplot2
AzureML provides an interface to publish web services on Microsoft Azure Machine Learning (Azure ML) from your local R environment. The main functions in the package cover the following topics:
-To get started, please navigate to AzureML Studio and create a free account (not guest) or use your existing AzureML account. After logging in, under the “Settings” tab, copy and paste your Workspace ID from the “Name” sub-tab into your R console. From the “Authorization Tokens” sub-tab, copy your Primary Authorization Token into your R console. You will need this information to access all package functionality.
-The package defines a Workspace
class that represents an AzureML work space. Most of the functions in the package refer to a Workspace object directly or indirectly. Use the workspace()
function to create Workspace objects, either by explicitly specifying an AzureML workspace ID and authorization token. Workspace objects are simply R environments that actively cache details about your AzureML sessions.
Before using the package, it is necessary to first obtain the security credentials to your Azure Machine Learning workspace. You can find this by logging in at the AzureML Studio. If you do not have an account, you can create a free account (not guest) to use these APIs.
-Once logged in, you will be brought to the Studio landing page. Using the left-hand menu, navigate to the ‘Settings’ tab to find your Workspace ID. Note this, or copy it into your R session and store it is a variable, e.g. myWsID
.
Next, within the ‘Settings’ tab, use the overhead menu to navigate to the ‘Authorization Tokens’ tab and similarly note your Primary Authorization Token.
-library(AzureML)
-ws <- workspace(
- id = "your workspace ID",
- auth = "your authorization token"
-)
-or alternatively create a file in ~/.azureml/settings.json
with the JSON structure (api_endpoint
and management_endpoint
are optional):
{"workspace": {
- "id" : "test_id",
- "authorization_token" : "test_token",
- "api_endpoint" : "api_endpoint",
- "management_endpoint" : "management_endpoint"
-}}
-See ?workspace
for more details.
The datasets()
, experiments()
, and services()
functions return data frames that contain information about those objects available in the workspace.
The package caches R data frame objects describing available datasets, experiments and services in the workspace environment. That cache can be refreshed at any time with the refresh()
function. The data frame objects make it relatively easy to sort and filter the datasets, experiments, and services in arbitrary ways. The functions also include filtering options for specific and common filters, like looking up something by name.
Use the download.datasets()
and upload.dataset()
functions to download or upload data between R and your Azure workspace. The download.intermediate.dataset()
function can download ephemeral data from a port in an experiment that is not explicitly stored in your Azure workspace.
Use delete.datasets()
to remove and delete datasets from the workspace.
The endpoints()
function describes Azure web service endpoints, and works with supporting help functions like endpointHelp()
.
The publishWebService()
function publishes a custom R function as an AzureML web service, available for use by any client. The updateWebService()
and deleteWebServce()
update or delete existing web services, respectively.
Use the consume()
function to evaluate an Azure ML web service with new data uploaded to AzureML from your R environment.
Work with the AzureML package begins by defining a workspace object. The example below uses the configured workspace ID and authorization token in the ~/.azureml/settings.json
file. Alternatively specify these settings explicitly in the workspace()
function as outlined above. All of the examples require this step.
library(AzureML)
-ws <- workspace()
-ws
-## AzureML Workspace
-## Workspace ID : dd01c7e4a424432c9a9f83142d5cfec4
-## API endpoint : https://studio.azureml.net
-AzureML is a web service and sometimes operations can’t immediately proceed due to rate limiting or other factors. When this kind of thing occurs, the AzureML R package presents a warning and retries the service a few times before giving up with an error.
-AzureML datasets correspond more or less to R data frames. The AzureML package defines four basic dataset operations: list, upload, download, and delete.
-The following example illustrates listing available datasets in your workspace.
-head(datasets(ws)) # Or, equivalently: head(ws$datasets)
-## Name DataTypeId Size
-## 1 Flight_On_Time_Performance_July_October_2013.csv GenericCSV 100506313
-## 2 TestReadFromBlob Dataset 8102
-## 3 New York weather GenericCSV 116989
-## 4 airquality GenericTSV 2901
-## 5 dataset-test-upload-2015-11-17--22-12-47 GenericTSV 2901
-## 6 dataset-test-upload-2015-11-17--22-14-37 GenericTSV 2901
-## ...
-## 1 ...
-## 2 ...
-## 3 ...
-## 4 ...
-## 5 ...
-## 6 ...
-## ----------------------------------------------
-## AzureML datasets data.frame variables include:
-## [1] "VisualizeEndPoint" "SchemaEndPoint" "SchemaStatus"
-## [4] "Id" "DataTypeId" "Name"
-## [7] "Description" "FamilyId" "ResourceUploadId"
-## [10] "SourceOrigin" "Size" "CreatedDate"
-## [13] "Owner" "ExperimentId" "ClientVersion"
-## [16] "PromotedFrom" "UploadedFromFilename" "ServiceVersion"
-## [19] "IsLatest" "Category" "DownloadLocation"
-## [22] "IsDeprecated" "Culture" "Batch"
-## [25] "CreatedDateTicks"
-The list of datasets is presented as an a R data frame with class Datasets
. Its print method shows a summary of the datasets, along with all of the available variables. Use any normal R data frame operation to manipulate the datasets. For example, to see the “Owner” value of each dataset:
head(ws$datasets$Owner, n=20)
-## [1] "apdevries" "apdevries"
-## [3] "apdevries" "R"
-## [5] "R" "R"
-## [7] "R" "R"
-## [9] "R" "R"
-## [11] "R" "R"
-## [13] "R" "Microsoft Corporation"
-## [15] "Microsoft Corporation" "Microsoft Corporation"
-## [17] "Microsoft Corporation" "Microsoft Corporation"
-## [19] "Microsoft Corporation" "Microsoft Corporation"
-The next example illustrates downloading a specific dataset named “Airport Codes Dataset” from AzureML to your R session. This dataset is presented by AzureML as a “Generic CSV” dataset, and will be parsed by R’s read.table()
function. (Other formats are parsed by an appropriate parser, for example read.arff()
.) The example illustrates passing additional arguments to the read.table()
function used to parse the data from AzureML in this case.
airports <- download.datasets(ws, name = "Airport Codes Dataset", quote="\"")
-head(airports)
-## airport_id city state name
-## 1 10165 Adak Island AK Adak
-## 2 10299 Anchorage AK Ted Stevens Anchorage International
-## 3 10304 Aniak AK Aniak Airport
-## 4 10754 Barrow AK Wiley Post/Will Rogers Memorial
-## 5 10551 Bethel AK Bethel Airport
-## 6 10926 Cordova AK Merle K Mudhole Smith
-You can use download.datasets()
to download more than one dataset as a time, returning the results in a list of data frames.
Use the upload.dataset()
function to upload R data frames to AzureML.
upload.dataset(airquality, ws, name = "Air quality")
-## Name DataTypeId Size ...
-## 1 Air quality GenericTSV 2901 ...
-## ----------------------------------------------
-## AzureML datasets data.frame variables include:
-## [1] "VisualizeEndPoint" "SchemaEndPoint" "SchemaStatus"
-## [4] "Id" "DataTypeId" "Name"
-## [7] "Description" "FamilyId" "ResourceUploadId"
-## [10] "SourceOrigin" "Size" "CreatedDate"
-## [13] "Owner" "ExperimentId" "ClientVersion"
-## [16] "PromotedFrom" "UploadedFromFilename" "ServiceVersion"
-## [19] "IsLatest" "Category" "DownloadLocation"
-## [22] "IsDeprecated" "Culture" "Batch"
-## [25] "CreatedDateTicks"
-# Let's see what we've got:
-head(download.datasets(ws, name = "Air quality"))
-## Ozone Solar.R Wind Temp Month Day
-## 1 41 190 7.4 67 5 1
-## 2 36 118 8.0 72 5 2
-## 3 12 149 12.6 74 5 3
-## 4 18 313 11.5 62 5 4
-## 5 NA NA 14.3 56 5 5
-## 6 28 NA 14.9 66 5 6
-Delete one or more AzureML datasets with delete.datasets()
:
delete.datasets(ws, name="Air quality")
-## Request failed with status 400. Waiting 2.3 seconds before retry
-## ...
-## Name Deleted status_code
-## 1 Air quality TRUE 204
-Use the experiments()
function or simply use the ws$experiments
data frame object directly to list details about experiments in your AzureML workspace. The experiments()
function optionally filters experiments by ownership.
e <- experiments(ws, filter = "samples")
-head(e)
-## Description CreationTime ...
-## 1 Sample 6: Train, Test, Evaluate for Regression: 2015-08-27 21:34:57 ...
-## 2 Text Classification: Step 2 of 5, text preproces 2015-08-27 21:39:38 ...
-## 3 Quantile Regression: Car price prediction 2015-08-27 21:37:39 ...
-## 4 Multiclass Classification: News categorization 2015-08-27 21:36:23 ...
-## 5 Neural Network: Basic convolution 2015-08-27 21:36:48 ...
-## 6 Text Classification: Step 3B of 5, unigrams TF-I 2015-08-27 21:39:49 ...
-## -------------------------------------------------
-## AzureML experiments data.frame variables include:
-## [1] "ExperimentId"
-## [2] "Description"
-## [3] "Etag"
-## [4] "Creator"
-## [5] "IsArchived"
-## [6] "JobId"
-## [7] "VersionId"
-## [8] "RunId"
-## [9] "OriginalExperimentDocumentationLink"
-## [10] "Summary"
-## [11] "Category"
-## [12] "Tags"
-## [13] "StatusCode"
-## [14] "StatusDetail"
-## [15] "CreationTime"
-## [16] "StartTime"
-## [17] "EndTime"
-## [18] "Metadata"
-The ws$experiments
object is just an R data frame with class Experiments
. Its print method shows a summary of the available experiments, but it can otherwise be manipulated like a normal R data frame.
The list of experiments in your workspace is cached in the workspace environment. Use the refresh()
function to explicitly update the cache at any time, for example:
refresh(ws, "experiments")
-The AzureML package helps you to publish R functions as AzureML web services that can be consumed anywhere. You can also use the AzureML package to run R data through an existing web service and collect the output.
-The publishWebService()
publishes an R function as an AzureML web service. Consider this simple example R function:
add <- function(x, y) {
- x + y
-}
-Use the function publishWebService()
to publish the function as a service named “AzureML-vignette-silly”:
ws <- workspace()
-api <- publishWebService(
- ws,
- fun = add,
- name = "AzureML-vignette-silly",
- inputSchema = list(
- x = "numeric",
- y = "numeric"
- ),
- outputSchema = list(
- ans = "numeric"
- )
-)
-The example publishes a function of two scalar numeric arguments, returning a single numeric scalar output value. Note that we explicitly define the web service input and output schema in the example. See the examples below for more flexible ways of defining web services with functions of data frames.
-The result of publishWebService()
is an Endpoint
object, really just an R data frame with two elements: a list containing the details of the newly created web service, and a list of the endpoints of the web service. From here, you can pass the information on to another user, or use the information to use the web service from R:
class(api)
-## [1] "Endpoint" "data.frame"
-names(api)
-## [1] "Name" "Description"
-## [3] "CreationTime" "WorkspaceId"
-## [5] "WebServiceId" "HelpLocation"
-## [7] "PrimaryKey" "SecondaryKey"
-## [9] "ApiLocation" "PreventUpdate"
-## [11] "MaxConcurrentCalls" "DiagnosticsTraceLevel"
-## [13] "ThrottleLevel"
-The web service created is identical to a web service published through the Azure Machine Learning Studio. From the response, you can get the Web Service’s URL, API Key and Help Page URL, as shown above. The first two are needed to make calls to the web service. The latter has the sample code, sample request and other information for consuming the API from client apps such as mobile and web applications.
-The new web service will show up on the ‘Web Services’ tab of the Studio interface, and the service will have a help page for each endpoint, e.g.
-Note that AzureML allows multiple services to have the same name.
-(helpPageUrl <- api$HelpLocation)
-## [1] "https://studio.azureml.net/apihelp/workspaces/dd01c7e4a424432c9a9f83142d5cfec4/webservices/10b27c1cc68d11e59a9e9fda44a82b45/endpoints/4ce411c50ec3486db0a5c191af0309c1"
-Once published, you can update a web service using the updateWebService()
or publishWebService()
functions. The updateWebService()
function is just an alias for publishWebService()
, except that the argument serviceId
is compulsory.
api <- updateWebService(
- ws,
- fun = function(x, y) x - y,
- inputSchema = list(
- x = "numeric",
- y = "numeric"
- ),
- outputSchema = list(
- ans = "numeric"
- ),
- serviceId = api$WebServiceId # <<-- Required to update!
-)
-The “AzureML-vignette-silly” service now substracts two numbers instead of adding them.
-Use the services()
function to list in detail all of the available services in your AzureML workspace, or filter by web service name as shown below:
(webservices <- services(ws, name = "AzureML-vignette-silly"))
-## Id Name Description
-## 22 0118e986c68c11e5bf7d7d3f8709ee1b AzureML-vignette-silly <NA>
-## 24 7cca5998c68c11e581cfcf4167576cd7 AzureML-vignette-silly <NA>
-## 26 10b27c1cc68d11e59a9e9fda44a82b45 AzureML-vignette-silly <NA>
-## CreationTime WorkspaceId
-## 22 2016-01-29T13:27:14.153Z dd01c7e4a424432c9a9f83142d5cfec4
-## 24 2016-01-29T13:30:42.485Z dd01c7e4a424432c9a9f83142d5cfec4
-## 26 2016-01-29T13:34:50.512Z dd01c7e4a424432c9a9f83142d5cfec4
-## DefaultEndpointName EndpointCount
-## 22 default 1
-## 24 default 1
-## 26 default 1
-Given a service, use the endpoints()
function to list the AzureML service endpoints for the service:
ep <- endpoints(ws, webservices[1, ])
-class(ep)
-## [1] "Endpoint" "data.frame"
-names(ep)
-## [1] "Name" "Description"
-## [3] "CreationTime" "WorkspaceId"
-## [5] "WebServiceId" "HelpLocation"
-## [7] "PrimaryKey" "SecondaryKey"
-## [9] "ApiLocation" "PreventUpdate"
-## [11] "MaxConcurrentCalls" "DiagnosticsTraceLevel"
-## [13] "ThrottleLevel"
-The returned Endpoints
object contains all the information needed to consume a web service. The endpointHelp()
function returns detailed information about an endpoint including its input and output schema and URI.
Use the consume()
function to send data to your newly published web service API for scoring.
df <- data.frame(
- x = 1:5,
- y = 6:10
-)
-s <- services(ws, name = "AzureML-vignette-silly")
-s <- tail(s, 1) # use the last published function, in case of duplicate function names
-ep <- endpoints(ws, s)
-consume(ep, df)
-## ans
-## 1 -5
-## 2 -5
-## 3 -5
-## 4 -5
-## 5 -5
-Alternatively, the endpoint primary key and API location can be found on the help page for that specific endpoint, which can be found on Azure Machine Learning Studio. Using the Help Page URL, you can access sample code to build clients that can consume this web service in real time to make predictions.
-Use deleteWebservice()
to remove a webservice endpoint that you no longer need or want (like these silly examples):
deleteWebService(ws, name = "AzureML-vignette-silly")
-The simplest and perhaps most useful way to define a web service uses functions that take a single data frame argument and return a vector or data frame of results. The next example trains a generalized boosted regression model using the gbm package, publishes the model as a web service with name “AzureML-vignette-gbm”, and runs example data through the model for prediction using the consume()
function.
library(AzureML)
-library(MASS)
-library(gbm)
-## Loading required package: survival
-## Loading required package: lattice
-## Loading required package: splines
-## Loading required package: parallel
-## Loaded gbm 2.1.1
-ws <- workspace()
-test <- Boston[1:5, 1:13]
-
-set.seed(123)
-gbm1 <- gbm(medv ~ .,
- distribution = "gaussian",
- n.trees = 5000,
- interaction.depth = 8,
- n.minobsinnode = 1,
- shrinkage = 0.01,
- cv.folds = 5,
- data = Boston,
- n.cores = 1) # You can set this to n.cores = NULL to use all cores
-best.iter <- gbm.perf(gbm1, method="cv", plot=FALSE)
-
-mypredict <- function(newdata)
-{
- require(gbm)
- predict(gbm1, newdata, best.iter)
-}
-
-# Example use of the prediction function
-print(mypredict(test))
-## [1] 24.54431 21.15155 33.88859 34.06615 34.93906
-# Publish the service
-ep <- publishWebService(ws = ws, fun = mypredict, name = "AzureML-vignette-gbm",
- inputSchema = test)
-
-# Consume test data, comparing with result above
-print(consume(ep, test))
-## Request failed with status 401. Waiting 6.7 seconds before retry
-## .......
-## ans
-## 1 24.54431
-## 2 21.15155
-## 3 33.88859
-## 4 34.06615
-## 5 34.93906
-Notice that we don’t need to explicitly specific the inputSchema
or outputSchema
arguments when working with functions that use data frame I/O. When finished with this example, we can delete the example service with:
deleteWebService(ws, "AzureML-vignette-gbm")
-Try to use the data frame I/O interface as illustrated in the last example above. It’s simpler and more robust than using functions of scalars or lists and exhibits faster execution for large data sets.
-Use require()
in your function to explicitly load required packages.
The publishWebServce()
function uses codetools to bundle objects required by your function following R lexical scoping rules. The previous example, for instance, uses the best.iter
and gbm1
variables inside of the mypredict()
function. publishWebService()
identified that and included their definitions in the R environment in which the function is evaluated in AzureML. Fine-grained control over the export of variables is provided by the publishWebService()
function in case you need it (see the help page for details).
Use the packages
option of publishWebService()
to explicitly bundle required packages and their dependencies (but not suggested dependencies) using miniCRAN. This lets you upload packages to AzureML that may not otherwise be available in that environment already, using the correct R version and platform used by AzureML.
Be aware that the version of R running in AzureML may not be the same as the version of R that you are running locally. That means that some packages might not be available, or sometimes package behavior in the AzureML version of R might be different that what you observe locally. This is generally more of an issue for cutting-edge packages.
-JSON is used to transfer data between your local R environment and the R services running in AzureML–numeric values experience a change of base, which can lead to a small loss of precision in some circumstances. If you really, really need to move binary objects between your local R session and the AzureML R service you might try base64 encoding the data, for example.
-