Angel-ML · Aug 27, 2019
diff --git a/‎README.md
+3-66 b/‎README.md
+3-66
diff --git a/‎angelml/bin/spark-on-angel-env.sh
+5-6 b/‎angelml/bin/spark-on-angel-env.sh
+5-6
diff --git a/‎angelml/pom.xml
+4-4 b/‎angelml/pom.xml
+4-4
diff --git a/‎angelml/src/main/scala/org/apache/spark/angel/examples/AutoJsonRunnerExample.scala
-2 b/‎angelml/src/main/scala/org/apache/spark/angel/examples/AutoJsonRunnerExample.scala
-2
diff --git a/‎angelml/src/main/scala/org/apache/spark/angel/examples/JsonRunnerExamples.scala
+5-4 b/‎angelml/src/main/scala/org/apache/spark/angel/examples/JsonRunnerExamples.scala
+5-4
diff --git a/‎angelml/src/main/scala/org/apache/spark/angel/examples/oneline_learning/FTRLExample.scala ‎angelml/src/main/scala/org/apache/spark/angel/examples/online_learning/FTRLExample.scala
+1-1 b/‎angelml/src/main/scala/org/apache/spark/angel/examples/oneline_learning/FTRLExample.scala ‎angelml/src/main/scala/org/apache/spark/angel/examples/online_learning/FTRLExample.scala
+1-1
diff --git a/‎angelml/src/main/scala/org/apache/spark/angel/examples/oneline_learning/FtrlFMExample.scala ‎angelml/src/main/scala/org/apache/spark/angel/examples/online_learning/FtrlFMExample.scala
+1-1 b/‎angelml/src/main/scala/org/apache/spark/angel/examples/oneline_learning/FtrlFMExample.scala ‎angelml/src/main/scala/org/apache/spark/angel/examples/online_learning/FtrlFMExample.scala
+1-1
diff --git a/‎core/pom.xml
+3-3 b/‎core/pom.xml
+3-3
diff --git a/‎core/src/main/scala/com/tencent/angel/sona/util/ConfUtils.scala
+32-19 b/‎core/src/main/scala/com/tencent/angel/sona/util/ConfUtils.scala
+32-19
diff --git a/‎dist/pom.xml
+1-1 b/‎dist/pom.xml
+1-1
diff --git a/‎docs/algo/ftrl_fm_sona_en.md
+117 b/‎docs/algo/ftrl_fm_sona_en.md
+117
diff --git a/‎docs/algo/ftrl_lr_sona_en.md
+118 b/‎docs/algo/ftrl_lr_sona_en.md
+118
diff --git a/‎docs/line_en.md
+1-1 b/‎docs/line_en.md
+1-1
diff --git a/‎docs/tutorials/sona_quick_start.md
+141 b/‎docs/tutorials/sona_quick_start.md
+141
diff --git a/‎examples/pom.xml
-225 b/‎examples/pom.xml
-225
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/JsonRunner.scala
-98 b/‎examples/src/main/scala/org/apache/spark/examples/JsonRunner.scala
-98
diff --git a/‎pom.xml
+2-2 b/‎pom.xml
+2-2
@@ -48,71 +48,7 @@ Figure 4 provides an example of running distributed machine learning algorithms
 
 ## Quick Start
 SONA supports three types of runtime models: YARN, K8s and Local. The local mode enable it easy to debug. 
-
-The SONA job is essentially a Spark Application with an associated Angel-PS application. 
-After the job is successfully submitted, there will be two separate Applications on the cluster, 
-one is the Spark Application and the other is the Angel-PS Application. The two Applications are not coupled. 
-If the SONA job is deleted, users are required to kill both the Spark and Angel-PS Applications manually.
-
-```bash
-#! /bin/bash
-- cd angel-<version>-bin/bin; 
-- ./SONA-example
-```
-
-The context of the submit scripts is as following:
-```bash
-#! /bin/bash
-source ./spark-on-angel-env.sh
-$SPARK_HOME/bin/spark-submit \
-    --master yarn-cluster \
-    --conf spark.ps.jars=$SONA_ANGEL_JARS \
-    --conf spark.ps.instances=10 \
-    --conf spark.ps.cores=2 \
-    --conf spark.ps.memory=6g \
-    --queue g_teg_angel-offline \
-    --jars $SONA_SPARK_JARS \
-    --name "BreezeSGD-spark-on-angel" \
-    --driver-memory 10g \
-    --num-executors 10 \
-    --executor-cores 2 \
-    --executor-memory 4g \
-    --class com.tencent.angel.spark.examples.ml.BreezeSGD \
-    ./../lib/spark-on-angel-examples-${ANGEL_VERSION}.jar
-```
-
-Users are encouraged to program instead of just using bash script. here is an example: 
-```scala
-import com.tencent.angel.sona.core.DriverContext
-import org.apache.spark.angel.ml.classification.AngelClassifier
-import org.apache.spark.angel.ml.feature.LabeledPoint
-import org.apache.spark.angel.ml.linalg.Vectors
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.{DataFrameReader, SparkSession}
-
-val spark = SparkSession.builder()
-  .master("local[2]")
-  .appName("AngelClassification")
-  .getOrCreate()
-
-val libsvm = spark.read.format("libsvmex")
-val dummy = spark.read.format("dummy")
-
-val trainData = libsvm.load("./data/angel/census/census_148d_train.libsvm")
-
-val classifier = new AngelClassifier()
-  .setModelJsonFile("./angelml/src/test/jsons/daw.json")
-  .setNumClass(2)
-  .setNumBatch(10)
-  .setMaxIter(2)
-  .setLearningRate(0.1)
-  .setNumField(13)
-
-val model = classifier.fit(trainData)
-
-model.write.overwrite().save("trained_models/daw")
-```
-
+[sona quick start](./docs/tutorials/sona_quick_start.md)
 
 ## Algorithms
 - machine learning algorithms:
@@ -124,6 +60,8 @@ model.write.overwrite().save("trained_models/daw")
         - [Robust Regression](docs/algo/robust_sona_en.md)
         - [Gradient Boosting Decision Tree](docs/GBDT.md)
         - [Hyper-Parameter Tuning](docs/AutoML.md)
+        - [FTRL](docs/algo/ftrl_lr_sona_en.md)
+        - [FTRL-FM](docs/algo/ftrl_fm_sona_en.md)
     + Deep Learning Methods
         - [Deep Neural Network(DNN)](docs/algo/dnn_sona_en.md)
         - [Mix Logistic Regression(MLR)](docs/algo/mlr_sona_en.md)
@@ -147,4 +85,3 @@ model.write.overwrite().save("trained_models/daw")
 ## References
 
 ## Other Resources
-
@@ -12,18 +12,17 @@ export HADOOP_HOME=
 export SPARK_HOME=
 export SONA_HOME=
 export SONA_HDFS_HOME=
-export SONA_VERSION=0.1.0-SNAPSHOT
-export ANGEL_VERSION=3.0.0-SNAPSHOT
-export ANGEL_UTILS_VERSION=0.1.0-SNAPSHOT
+export SONA_VERSION=0.1.0
+export ANGEL_VERSION=3.0.0
+export ANGEL_UTILS_VERSION=0.1.1
 
 
 scala_jar=scala-library-2.11.8.jar
-angel_ps_external_jar=fastutil-7.1.0.jar,htrace-core-2.05.jar,sizeof-0.3.0.jar,kryo-shaded-4.0.0.jar,minlog-1.3.0.jar,memory-0.8.1.jar,commons-pool-1.6.jar,netty-all-4.1.1.Final.jar,hll-1.6.0.jar
+angel_ps_external_jar=fastutil-7.1.0.jar,htrace-core-2.05.jar,sizeof-0.3.0.jar,kryo-shaded-4.0.0.jar,minlog-1.3.0.jar,memory-0.8.1.jar,commons-pool-1.6.jar,netty-all-4.1.17.Final.jar,hll-1.6.0.jar
 angel_ps_jar=angel-format-${ANGEL_UTILS_VERSION}.jar,angel-mlcore-${ANGEL_UTILS_VERSION}.jar,angel-ps-core-${ANGEL_VERSION}.jar,angel-ps-mllib-${ANGEL_VERSION}.jar,angel-ps-psf-${ANGEL_VERSION}.jar,angel-math-${ANGEL_UTILS_VERSION}.jar,angel-ps-graph-${ANGEL_VERSION}.jar
 
 sona_jar=core-${SONA_VERSION}.jar,angelml-${SONA_VERSION}.jar
-sona_external_jar=fastutil-7.1.0.jar,htrace-core-2.05.jar,sizeof-0.3.0.jar,kryo-shaded-4.0.0.jar,minlog-1.3.0.jar,memory-0.8.1.jar,commons-pool-1.6.jar,netty-all-4.1.1.Final.jar,hll-1.6.0.jar,jniloader-1.1.jar,native_system-java-1.1.jar,arpack_combined_all-0.1.jar,core-1.1.2.jar,netlib-native_ref-linux-armhf-1.1-natives.jar,netlib-native_ref-linux-i686-1.1-natives.jar,netlib-native_ref-linux-x86_64-1.1-natives.jar,netlib-native_system-linux-armhf-1.1-natives.jar,netlib-native_system-linux-i686-1.1-natives.jar,netlib-native_system-linux-x86_64-1.1-natives.jar,jettison-1.4.0.jar,json4s-native_2.11-3.2.11.jar
-#sona_external_jar=fastutil-7.1.0.jar,htrace-core-2.05.jar,sizeof-0.3.0.jar,kryo-shaded-4.0.0.jar,minlog-1.3.0.jar,memory-0.8.1.jar,commons-pool-1.6.jar,netty-all-4.1.1.Final.jar,hll-1.6.0.jar,json4s-jackson_2.11-3.4.2.jar
+sona_external_jar=fastutil-7.1.0.jar,htrace-core-2.05.jar,sizeof-0.3.0.jar,kryo-shaded-4.0.0.jar,minlog-1.3.0.jar,memory-0.8.1.jar,commons-pool-1.6.jar,netty-all-4.1.17.Final.jar,hll-1.6.0.jar,jniloader-1.1.jar,native_system-java-1.1.jar,arpack_combined_all-0.1.jar,core-1.1.2.jar,netlib-native_ref-linux-armhf-1.1-natives.jar,netlib-native_ref-linux-i686-1.1-natives.jar,netlib-native_ref-linux-x86_64-1.1-natives.jar,netlib-native_system-linux-armhf-1.1-natives.jar,netlib-native_system-linux-i686-1.1-natives.jar,netlib-native_system-linux-x86_64-1.1-natives.jar,jettison-1.4.0.jar,json4s-native_2.11-3.2.11.jar
 
 dist_jar=${angel_ps_external_jar},${angel_ps_jar},${scala_jar},${sona_jar}
 local_jar=${sona_external_jar},${angel_ps_jar},${sona_jar}
 
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark.angel</groupId>
     <artifactId>sona</artifactId>
-    <version>0.1.0-SNAPSHOT</version>
+    <version>0.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -57,7 +57,7 @@
     <dependency>
       <groupId>com.tencent.angel</groupId>
       <artifactId>angel-format</artifactId>
-      <version>0.1.0-SNAPSHOT</version>
+      <version>0.1.1</version>
       <exclusions>
         <exclusion>
           <groupId>org.json4s</groupId>
@@ -76,7 +76,7 @@
     <dependency>
       <groupId>com.tencent.angel</groupId>
       <artifactId>angel-mlcore</artifactId>
-      <version>0.1.0-SNAPSHOT</version>
+      <version>0.1.1</version>
       <exclusions>
         <exclusion>
           <groupId>org.json4s</groupId>
@@ -91,7 +91,7 @@
     <dependency>
       <groupId>com.tencent.angel</groupId>
       <artifactId>angel-automl</artifactId>
-      <version>0.1.0-SNAPSHOT</version>
+      <version>0.1.0</version>
     </dependency>
     <dependency>
       <groupId>com.tencent.angel</groupId>
 
@@ -67,8 +67,6 @@ object AutoJsonRunnerExample {
 
   def main(args: Array[String]): Unit = {
 
-    val defaultInput = "hdfs://tl-nn-tdw.tencent-distribute.com:54310/user/tdw_rachelsun/joyjxu/angel-test/daw_data/census_148d_train.libsvm"
-    val defaultOutput = "hdfs://tl-nn-tdw.tencent-distribute.com:54310/user/tdw_rachelsun/joyjxu/trained_models"
     val defaultJsonFile = "No json file parsed..."
     val defaultDataFormat = "org.apache.spark.ml.source.libsvm.LibSVMFileFormat"
 
 
@@ -51,17 +51,18 @@ object JsonRunnerExamples {
     val dataFormat = params.getOrElse("dataFormat", "libsvm")//libsvm,dummy
     val actionType = params.getOrElse("actionType", "train")
     val jsonFile = params.getOrElse("jsonFile", "")
-    val input = params.get("data").get
-    val modelPath = params.get("modelPath").get
-    val predict = params.get("predictPath").get
+    val input = params.getOrElse("data", "")
+    val modelPath = params.getOrElse("modelPath", "")
+    val predict = params.getOrElse("predictPath", "")
     val numBatch = params.getOrElse("numBatch", "10").toInt
     val maxIter = params.getOrElse("maxIter", "2").toInt
     val lr = params.getOrElse("lr", "0.1").toFloat
     val numField = params.getOrElse("numField", "13").toInt
     val task = params.getOrElse("task", "classification")
+    val master = params.getOrElse("master", "yarn-cluster")
 
     val spark = SparkSession.builder()
-      .master("yarn-cluster")
+      .master(master)
       .appName("AngelClassification")
       .getOrCreate()
 
 
@@ -14,7 +14,7 @@
  * the License.
  *
  */
-package org.apache.spark.angel.examples.oneline_learning
+package org.apache.spark.angel.examples.online_learning
 
 import com.tencent.angel.conf.AngelConf
 import com.tencent.angel.ml.math2.utils.{LabeledData, RowType}
 
@@ -14,7 +14,7 @@
  * the License.
  *
  */
-package org.apache.spark.angel.examples.oneline_learning
+package org.apache.spark.angel.examples.online_learning
 
 import com.tencent.angel.conf.AngelConf
 import com.tencent.angel.ml.math2.utils.RowType
 
@@ -25,7 +25,7 @@
   <parent>
     <groupId>org.apache.spark.angel</groupId>
     <artifactId>sona</artifactId>
-    <version>0.1.0-SNAPSHOT</version>
+    <version>0.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -36,7 +36,7 @@
     <dependency>
       <groupId>com.tencent.angel</groupId>
       <artifactId>angel-format</artifactId>
-      <version>0.1.0-SNAPSHOT</version>
+      <version>0.1.1</version>
       <exclusions>
         <exclusion>
           <groupId>io.netty</groupId>
@@ -51,7 +51,7 @@
     <dependency>
       <groupId>com.tencent.angel</groupId>
       <artifactId>angel-mlcore</artifactId>
-      <version>0.1.0-SNAPSHOT</version>
+      <version>0.1.1</version>
       <exclusions>
         <exclusion>
           <groupId>org.json4s</groupId>
 
@@ -1,19 +1,19 @@
-/*
- * Tencent is pleased to support the open source community by making Angel available.
- *
- * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- *
- * https://opensource.org/licenses/Apache-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- *
- */
+/*
+ * Tencent is pleased to support the open source community by making Angel available.
+ *
+ * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/Apache-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ */
 package com.tencent.angel.sona.util
 
 import java.util.UUID
@@ -100,11 +100,12 @@ object ConfUtils extends CompatibleLogging {
     val appName = conf.get("spark.app.name") + "-ps"
     val queue = conf.get("spark.yarn.queue", "root.default")
 
-    /** mode: YARN or LOCAL */
+    /** mode: YARN , KUBERNETES or LOCAL */
     val master = conf.getOption("spark.master")
     val isLocal = if (master.isEmpty || master.get.toLowerCase.startsWith("local")) true else false
     val deployMode = if (isLocal) "LOCAL" else conf.get("spark.ps.mode", DEFAULT_ANGEL_DEPLOY_MODE)
 
+    val masterMem = conf.getSizeAsGb("spark.angel.master.memory", "2g").toInt
     val psNum = conf.getInt("spark.ps.instances", 1)
     val psCores = conf.getInt("spark.ps.cores", 1)
     val psMem = conf.getSizeAsGb("spark.ps.memory", "4g").toInt
@@ -147,7 +148,13 @@ object ConfUtils extends CompatibleLogging {
     hadoopConf.set(ANGEL_ACTION_TYPE, "train")
     hadoopConf.set(ANGEL_SAVE_MODEL_PATH, tempPath)
 
+    if (deployMode == "KUBERNETES") {
+      hadoopConf.set(ANGEL_KUBERNETES_MASTER, master.get.substring("k8s://".length))
+    }
+
     // Setting resource
+    hadoopConf.setInt(ANGEL_AM_MEMORY_GB, masterMem)
+
     hadoopConf.setInt(ANGEL_PS_NUMBER, psNum)
     hadoopConf.setInt(ANGEL_PS_CPU_VCORES, psCores)
     hadoopConf.setInt(ANGEL_PS_MEMORY_GB, psMem)
@@ -165,9 +172,15 @@ object ConfUtils extends CompatibleLogging {
     hadoopConf.setInt(ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS, 100000000)
     hadoopConf.set(ANGEL_LOG_PATH, tempPath)
 
-    // add user resource files
-    addUserResourceFiles(conf, hadoopConf)
+    if (deployMode != "KUBERNETES") {
+      // add user resource files
+      addUserResourceFiles(conf, hadoopConf)
+    }
 
+    // Some other settings
+    conf.getAllWithPrefix("spark.angel").foreach {
+      case (key, value) => hadoopConf.set(s"angel$key", value)
+    }
     hadoopConf
   }
 
 
@@ -24,7 +24,7 @@
   <parent>
     <groupId>org.apache.spark.angel</groupId>
     <artifactId>sona</artifactId>
-    <version>0.1.0-SNAPSHOT</version>
+    <version>0.1.0</version>
   </parent>
 
   <artifactId>sona-dist</artifactId>
 
@@ -0,0 +1,117 @@
+# Training Factorization Machine with FTRL on Spark on Angel
+
+> FM(Factorization Machine) is an algorithm based on matrix decomposition which can predict any real-valued vector.
+
+> Its main advantages include: 
+
+- can handle highly sparse data; 
+- linear computational complexity
+
+> FTRL (Follow-the-regularized-leader) is an optimization algorithm which is widely deployed by online learning. Employing FTRL is easy in Spark-on-Angel and you can train a model with billions, even ten billions, dimensions once you have enough machines.
+
+Here, we will use FTRL Optimizer to update the parameters of FM.
+
+If you are not familiar with how to programming on Spark-on-Angel, please first refer to [Programming Guide for Spark-on-Angel](https://github.com/Angel-ML/angel/blob/master/docs/programmers_guide/spark_on_angel_programing_guide_en.md);
+
+## Factorization Model
+
+![model](http://latex.codecogs.com/png.latex?\dpi{150}\hat{y}(x)=b+\sum_{i=1}^n{w_ix_i}+\sum_{i=1}^{n-1}\sum_{j=i+1}^n<v_i,v_j>x_ix_j)
+
+where ![](http://latex.codecogs.com/png.latex?\dpi{100}\inline%20<v_i,v_j>) is the dot of two k-dimension vector:
+
+![dot](http://latex.codecogs.com/png.latex?\dpi{150}\inline%20<v_i,v_j>=\sum_{i=1}^kv_{i,f}\cdot%20v_{j,f})
+
+model parameters:
+![parameter](http://latex.codecogs.com/png.latex?\dpi{100}\inlinew_0\in%20w\in%20R^n,V\in%20R^{n\times%20k})
+, where n is the number of feature, ![](http://latex.codecogs.com/png.latex?\dpi{100}\inline%20v_i) represents feature i composed by k factors, k is a hyperparameter that determines the factorization.
+
+
+## Using the FTRL-FM
+
+```scala
+
+import com.tencent.angel.ml.math2.utils.RowType
+import org.apache.spark.angel.ml.online_learning.FtrlFM
+
+// allocate a ftrl optimizer with (lambda1, lambda2, alpha, beta)
+val optim = new FtrlFM(lambda1, lambda2, alpha, beta)
+// initializing the model
+optim.init(dim, factor)
+```
+
+There are four hyper-parameters for the FTRL optimizer, which are lambda1, lambda2, alpha and beta. We allocate a FTRL optimizer with these four hyper-parameters. The next step is to initialized a FtrlFM model. There are two matrixs for FtrlFM, including `first` and `second`, the `first` contains the z, n and w in which z and n are used to init or update parameter w in FM, the `second` contains the z, n and v in which z and n are used to init or update parameter v in FM. In the aboving code, we allocate `first` a sparse distributed matrix with 3 rows and dim columns, and allocate `second` a sparse distributed matrix with 3 * factor rows and dim columns.
+
+### set the dimension
+In the scenaro of online learning, the index of features can be range from (int.min, int.max), which is usually generated by a hash function. In Spark-on-Angel, you can set the dim=-1 when your feature index range from (int.min, int.max) and rowType is sparse. If the feature index range from [0, n), you can set the dim=n.
+
+
+## Training with Spark
+
+### loading data
+Using the interface of RDD to load data and parse them to vectors.
+
+```scala
+val data = sc.textFile(input).repartition(partNum)
+      .map(s => (DataLoader.parseIntFloat(s, dim), DataLoader.parseLabel(s, false)))
+      .map {
+        f =>
+          f._1.setY(f._2)
+          f._1
+      }
+```
+### training model
+
+```scala
+val size = data.count()
+for (epoch <- 1 to numEpoch) {
+    val totalLoss = data.mapPartitions {
+        case iterator =>
+        // for each partition
+          val loss = iterator
+            .sliding(batchSize, batchSize)
+            .zipWithIndex
+            .map(f => optim.optimize(f._2, f_1.toArray)).sum
+          Iterator.single(loss)
+    }.sum()
+    println(s"epoch=$epoch loss=${totalLoss / size}")
+}
+```
+
+
+### saving model
+
+```scala
+output = "hdfs://xxx"
+optim.weight
+optim.save(output + "/back")
+optim.saveWeight(output)
+```
+
+### Submit Command
+
+```shell
+source ./bin/spark-on-angel-env.sh
+ 
+$SPARK_HOME/bin/spark-submit \
+    --master yarn-cluster \
+    --conf spark.yarn.allocation.am.maxMemory=55g \
+    --conf spark.yarn.allocation.executor.maxMemory=55g \
+    --conf spark.driver.maxResultSize=20g \
+    --conf spark.kryoserializer.buffer.max=2000m\
+    --conf spark.ps.jars=$SONA_ANGEL_JARS \
+    --conf spark.ps.instances=1 \
+    --conf spark.ps.cores=2 \
+    --conf spark.ps.memory=5g \
+    --conf spark.ps.log.level=INFO \
+    --conf spark.offline.evaluate=200\
+    --jars $SONA_SPARK_JARS  \
+    --name "FTRLFM on Spark-on-Angel" \
+    --driver-memory 5g \
+    --num-executors 5 \
+    --executor-cores 2 \
+    --executor-memory 2g \
+    --class org.apache.spark.angel.examples.online_learning.FtrlFMExample \
+    ./lib/angelml-${SONA_VERSION}.jar \
+    input:$input modelPath:$model dim:$dim batchSize:$batchSize actionType:train factor:5
+```
+[detail parameters](../../angelml/src/main/scala/org/apache/spark/angel/examples/online_learning/FtrlFMExample.scala) 
@@ -0,0 +1,118 @@
+# Training Logistic Regression with FTRL on  Spark on Angel
+
+FTRL (Follow-the-regularized-leader) is an optimization algorithm which is widely deployed by online learning. Employing FTRL is easy in Spark-on-Angel and you can train a model with billions, even ten billions, dimensions once you have enough machines.
+
+If you are not familiar with how to programming on Spark-on-Angel, please first refer to [Programming Guide for Spark-on-Angel](https://github.com/Angel-ML/angel/blob/master/docs/programmers_guide/spark_on_angel_programing_guide_en.md);
+
+## FTRL Optimizer
+
+The FTRL algorithm takes into account the advantages of both FOBOS and RDA algorithms. It not only guarantees high precision with FOBOS, but also produces better sparsity with loss of certain precision.
+The update formula for the feature weight of the algorithm (Reference 1) is:
+
+![](../imgs/ftrl_lr_w.png)
+
+where the ![](http://latex.codecogs.com/png.latex?\dpi{100}\inline%20G^{(1:t)}=\sum_{s=1}^t{G^{s}}) represents the gradient of loss function.
+
+The update formula for the feature weight of the algorithm can be decomposed into N independent scalar minimization problems for each dimension of feature weight.
+
+![](../imgs/ftrl_lr_w_update.png)
+
+![](../imgs/ftrl_lr_d_t.png)
+
+where the ![](http://latex.codecogs.com/png.latex?\dpi{100}\inline%20{z_i}) and ![](http://latex.codecogs.com/png.latex?\dpi{100}\inline%20{n_i}) are updated as follows:
+
+![](http://latex.codecogs.com/png.latex?\dpi{100}\inline%20{z_i}^{(t)}={z_i}^{(t-1)}\+{g_i}^t-\(\frac{1}{{\eta_i}^{(t)}}\-\frac{1}{{\eta_i}^{(t-1)}}\){w_i}^{(t)})
+
+![](http://latex.codecogs.com/png.latex?\dpi{100}\inline%20{n_i}^{(t)}={n_i}^{(t-1)}\+({g_i}^{(t)})^2)
+
+
+## Using the FTRL Optimizer
+
+```scala
+
+import com.tencent.angel.ml.math2.utils.RowType
+import org.apache.spark.angel.ml.online_learning.FTRL
+
+// allocate a ftrl optimizer with (lambda1, lambda2, alpha, beta)
+val optim = new FTRL(lambda1, lambda2, alpha, beta)
+// initializing the model
+optim.init(dim)
+```
+
+There are four hyper-parameters for the FTRL optimizer, which are lambda1, lambda2, alpha and beta. We allocate a FTRL optimizer with these four hyper-parameters. The next step is to initialized a FTRL model. There are three vectors for FTRL, including z, n and w. In the aboving code, we allocate a sparse distributed matrix with 3 rows and dim columns.
+
+### set the dimension
+In the scenaro of online learning, the index of features can be range from (long.min, long.max), which is usually generated by a hash function. In Spark-on-Angel, you can set the dim=-1 when your feature index range from (long.min, long.max) and rowType is sparse. If the feature index range from [0, n), you can set the dim=n.
+
+## Training with Spark
+
+### loading data
+Using the interface of RDD to load data and parse them to vectors.
+
+```scala
+val data = sc.textFile(input).repartition(partNum)
+      .map(s => (DataLoader.parseLongDouble(s, dim), DataLoader.parseLabel(s, false)))
+      .map {
+        f =>
+          f._1.setY(f._2)
+          f._1
+      }
+```
+### training model
+
+```scala
+val size = data.count()
+for (epoch <- 1 to numEpoch) {
+    val totalLoss = data.mapPartitions {
+        case iterator =>
+        // for each partition
+          val loss = iterator
+            .sliding(batchSize, batchSize)
+            .map(f => optim.optimize(f.toArray)).sum
+          Iterator.single(loss)
+    }.sum()
+    println(s"epoch=$epoch loss=${totalLoss / size}")
+}
+```
+
+
+### saving model
+
+```scala
+output = "hdfs://xxx"
+optim.weight
+optim.saveWeight(output)
+optim.save(output + "/back")
+```
+
+### Submit Command
+
+```shell
+source ./bin/spark-on-angel-env.sh
+ 
+$SPARK_HOME/bin/spark-submit \
+    --master yarn-cluster \
+    --conf spark.yarn.allocation.am.maxMemory=55g \
+    --conf spark.yarn.allocation.executor.maxMemory=55g \
+    --conf spark.driver.maxResultSize=20g \
+    --conf spark.kryoserializer.buffer.max=2000m\
+    --conf spark.ps.jars=$SONA_ANGEL_JARS \
+    --conf spark.ps.instances=1 \
+    --conf spark.ps.cores=2 \
+    --conf spark.ps.memory=5g \
+    --conf spark.ps.log.level=INFO \
+    --conf spark.offline.evaluate=200\
+    --jars $SONA_SPARK_JARS  \
+    --name "FTRL on Spark-on-Angel" \
+    --driver-memory 5g \
+    --num-executors 5 \
+    --executor-cores 2 \
+    --executor-memory 2g \
+    --class org.apache.spark.angel.examples.online_learning.FTRLExample \
+    ./lib/angelml-${SONA_VERSION}.jar \
+    input:$input modelPath:$model dim:$dim batchSize:$batchSize actionType:train
+```
+[detail parameters](../../angelml/src/main/scala/org/apache/spark/angel/examples/online_learning/FTRLExample.scala) 
+
+##References
+1. H. Brendan McMahan, Gary Holt, D. Sculley, Michael Young. Ad Click Prediction: a View from the Trenches.KDD’13, August 11–14, 2013
@@ -93,6 +93,6 @@ $SPARK_HOME/bin/spark-submit \
   --executor-cores 4 \
   --executor-memory 10g \
   --class org.apache.spark.angel.examples.graph.LINEExample2 \
-  ./lib/angelml-0.1.0-SNAPSHOT.jar
+  ./lib/angelml-0.1.0.jar
   input:$input output:$output embedding:128 negative:5 epoch:100 stepSize:0.01 batchSize:1000 numParts:2 subSample:false remapping:false order:2 interval:5
 ```
@@ -0,0 +1,141 @@
+# SONA(Spark on Angel) Quick Start 
+
+he SONA job is essentially a Spark Application with an associated Angel-PS application. 
+After the job is successfully submitted, there will be two separate Applications on the cluster, 
+one is the Spark Application and the other is the Angel-PS Application. The two Applications are not coupled. 
+If the SONA job is deleted, users are required to kill both the Spark and Angel-PS Applications manually.
+
+## Deployment Process
+
+1. **Install Spark**
+2. **Install SONA**
+	1. unzip sona-\<version\>-bin.zip
+	2. set these environmental variables: `SPARK_HOME`, `SONA_HOME`, `SONA_HDFS_HOME` in sona-\<version\>-bin/bin/spark-on-angl-env.sh
+	3. put the extracted folder `sona-\<version\>-bin` into `SONA_HDFS_HOME`
+
+3. Configure Environment Variables
+
+	- need to import environment script：source ./spark-on-angel-env.sh
+	- configure the Jar package location：spark.ps.jars=\$SONA_ANGEL_JARS和--jars \$SONA_SPARK_JARS
+
+## Submit Task
+
+After completing sona program coding, then package it. At last, use `spark-submit` script to submit the task.
+
+
+## Run Example（Logistic Regression）
+
+```bash
+#! /bin/bash
+- cd sona-<version>-bin/bin; 
+- ./SONA-example
+```
+
+script as follows：
+
+```bash
+#!/bin/bash
+
+source ./spark-on-angel-env.sh
+
+$SPARK_HOME/bin/spark-submit \
+    --master yarn-cluster \
+    --conf spark.ps.jars=$SONA_ANGEL_JARS \
+    --conf spark.ps.instances=10 \
+    --conf spark.ps.cores=2 \
+    --conf spark.ps.memory=6g \
+    --jars $SONA_SPARK_JARS\
+    --name "LR-spark-on-angel" \
+    --files <logreg.json path> \
+    --driver-memory 10g \
+    --num-executors 10 \
+    --executor-cores 2 \
+    --executor-memory 4g \
+    --class org.apache.spark.angel.examples.JsonRunnerExamples \
+    ./../lib/angelml-${SONA_VERSION}.jar \
+    data:<input_path> \
+    modelPath:<output_path> \
+    jsonFile:./logreg.json \
+    lr:0.1
+```
+
+> Attention: the parameters of Angel PS need to be set：`spark.ps.instance`，`spark.ps.cores`，`spark.ps.memory`
+> ```--files <logreg.json  path>``` using this parameter to upload your local json file, here ```<logreg.json path>```is the local path of json(such as: xx/xx/logreg.json)
+> ```jsonFile:./logreg.json \``` this parameter is using the json you upload
+> resources such as: executor, driver, ps, depend on your dataset
+
+
+## LR Json Example 
+
+- [detail json](https://github.com/Angel-ML/angel/blob/master/docs/basic/json_conf_en.md)
+- [data](https://github.com/Angel-ML/angel/tree/master/data/a9a/a9a_123d_train.libsvm)
+
+```json
+{
+  "data": {
+    "format": "libsvm",
+    "indexrange": 123,
+    "validateratio": 0.1,
+    "sampleratio": 1.0
+  },
+  "train": {
+    "epoch": 10,
+    "lr": 0.5
+  },
+  "model": {
+    "modeltype": "T_DENSE_SPARSE"
+  },
+  "default_optimizer": {
+    "type": "momentum",
+    "momentum": 0.9,
+    "reg2": 0.001
+  },
+  "layers": [
+    {
+      "name": "wide",
+      "type": "simpleinputlayer",
+      "outputdim": 1,
+      "transfunc": "identity"
+    },
+    {
+      "name": "simplelosslayer",
+      "type": "simplelosslayer",
+      "lossfunc": "logloss",
+      "inputlayer": "wide"
+    }
+  ]
+}
+
+```
+
+Users are encouraged to program instead of just using bash script. here is an example: 
+
+```scala
+import com.tencent.angel.sona.core.DriverContext
+import org.apache.spark.angel.ml.classification.AngelClassifier
+import org.apache.spark.angel.ml.feature.LabeledPoint
+import org.apache.spark.angel.ml.linalg.Vectors
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{DataFrameReader, SparkSession}
+
+val spark = SparkSession.builder()
+  .master("local[2]")
+  .appName("AngelClassification")
+  .getOrCreate()
+
+val libsvm = spark.read.format("libsvmex")
+val dummy = spark.read.format("dummy")
+
+val trainData = libsvm.load("./data/angel/a9a/a9a_123d_train.libsvm")
+
+val classifier = new AngelClassifier()
+  .setModelJsonFile("./angelml/src/test/jsons/logreg.json")
+  .setNumClass(2)
+  .setNumBatch(10)
+  .setMaxIter(2)
+  .setLearningRate(0.1)
+
+val model = classifier.fit(trainData)
+
+model.write.overwrite().save("trained_models/lr")
+```
@@ -21,7 +21,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>org.apache.spark.angel</groupId>
     <artifactId>sona</artifactId>
-    <version>0.1.0-SNAPSHOT</version>
+    <version>0.1.0</version>
     <packaging>pom</packaging>
     <name>Spark On Angel Project Parent POM</name>
     <url>http://spark.apache.org/</url>
@@ -36,7 +36,7 @@
         <scala.binary.version>2.11</scala.binary.version>
         <scala.version>2.11.8</scala.version>
         <spark.version>2.3.0</spark.version>
-        <angel.version>3.0.0-SNAPSHOT</angel.version>
+        <angel.version>3.0.0</angel.version>
         <breeze.version>0.13</breeze.version>
         <!-- Modules that copy jars to the build directory should do so under this location. -->
         <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`* the License.`
`15`	`15`	`*`
`16`	`16`	`*/`
`17`		`-package org.apache.spark.angel.examples.oneline_learning`
	`17`	`+package org.apache.spark.angel.examples.online_learning`
`18`	`18`
`19`	`19`	`import com.tencent.angel.conf.AngelConf`
`20`	`20`	`import com.tencent.angel.ml.math2.utils.{LabeledData, RowType}`