add DJL text similarity example

paulk-asert · paulk-asert · commit 92df1adcee23 · 2022-08-02T21:33:37.000+10:00
diff --git a/docs/images/textsimularityheatmap.png b/docs/images/textsimularityheatmap.png
diff --git a/subprojects/LanguageProcessingDjl/LanguageProcessingDjl.gradle b/subprojects/LanguageProcessingDjl/LanguageProcessingDjl.gradle
@@ -0,0 +1,42 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+apply plugin: 'groovy'
+apply plugin: 'application'
+
+repositories {
+    mavenCentral()
+}
+
+ext.appName = 'UniversalSentenceEncoder'
+
+application {
+    mainClass = appName
+}
+
+tasks.named('run').configure {
+    description = "Run $appName as a JVM application/Groovy script"
+}
+
+dependencies {
+    implementation "ai.djl:api:$djlVersion"
+    implementation "org.apache.groovy:groovy:$groovy4Version"
+    implementation "com.github.haifengl:smile-plot:$smileVersion"
+    implementation "com.github.haifengl:smile-math:$smileVersion"
+    runtimeOnly "ai.djl.tensorflow:tensorflow-engine:$djlVersion"
+    runtimeOnly "ai.djl.tensorflow:tensorflow-model-zoo:$djlVersion"
+    runtimeOnly "ai.djl.tensorflow:tensorflow-native-auto:2.4.1"
+    runtimeOnly "org.slf4j:slf4j-jdk14:$slf4jVersion"
+}
diff --git a/subprojects/LanguageProcessingDjl/README.md b/subprojects/LanguageProcessingDjl/README.md
@@ -0,0 +1,30 @@
+<!--
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Language processing with DJL and TensorFlow
+
+Neural networks with numerous layers of nodes allow for more complex, rich and _deeper_ processing and understanding.
+This example detects objects within an image.
+It uses a pre-trained model and the
+[Deep Java Library](https://djl.ai/) backed by the
+[TensorFlow](https://www.tensorflow.org/) engine.
+
+![MXNet.groovy](../../docs/images/textsimularityheatmap.png)
+
+Groovy code examples can be found in the [src/main/groovy](src/main/groovy) subdirectory.
+If you have opened the repo in IntelliJ (or your favourite IDE) you should be able to execute the examples directly in the IDE.
+
+__Requirements__: The code has been tested on JDK8, JDK11 and JDK17.
diff --git a/subprojects/LanguageProcessingDjl/src/main/groovy/UniversalSentenceEncoder.groovy b/subprojects/LanguageProcessingDjl/src/main/groovy/UniversalSentenceEncoder.groovy
@@ -0,0 +1,81 @@
+import ai.djl.Application
+import ai.djl.ndarray.NDArrays
+import ai.djl.ndarray.NDList
+import ai.djl.repository.zoo.Criteria
+import ai.djl.training.util.ProgressBar
+import ai.djl.translate.NoBatchifyTranslator
+import ai.djl.translate.TranslatorContext
+import smile.plot.swing.Heatmap
+import smile.plot.swing.Palette
+
+import static smile.math.MathEx.dot
+
+/*
+ * An example of inference using an universal sentence encoder model from TensorFlow Hub.
+ * For more info see: https://tfhub.dev/google/universal-sentence-encoder/4
+ * Inspired by: https://github.com/deepjavalibrary/djl/blob/master/examples/src/main/java/ai/djl/examples/inference/UniversalSentenceEncoder.java
+ */
+
+class MyTranslator implements NoBatchifyTranslator<String[], double[][]> {
+    @Override
+    NDList processInput(TranslatorContext ctx, String[] raw) {
+        var factory = ctx.NDManager
+        var inputs = new NDList(raw.collect(factory::create))
+        new NDList(NDArrays.stack(inputs))
+    }
+
+    @Override
+    double[][] processOutput(TranslatorContext ctx, NDList list) {
+        long numOutputs = list.singletonOrThrow().shape.get(0)
+        NDList result = []
+        for (i in 0..<numOutputs) {
+            result << list.singletonOrThrow().get(i)
+        }
+        result*.toFloatArray() as double[][]
+    }
+}
+
+def predict(String[] inputs) {
+    String modelUrl = "https://storage.googleapis.com/tfhub-modules/google/universal-sentence-encoder/4.tar.gz"
+
+    Criteria<String[], double[][]> criteria =
+        Criteria.builder()
+            .optApplication(Application.NLP.TEXT_EMBEDDING)
+            .setTypes(String[], double[][])
+            .optModelUrls(modelUrl)
+            .optTranslator(new MyTranslator())
+            .optEngine("TensorFlow")
+            .optProgress(new ProgressBar())
+            .build()
+    try (var model = criteria.loadModel()
+         var predictor = model.newPredictor()) {
+        predictor.predict(inputs)
+    }
+}
+String[] inputs = [
+    "Cycling is low impact and great for cardio",
+    "Swimming is low impact and good for fitness",
+    "Palates is good for fitness and flexibility",
+    "Weights are good for strength and fitness",
+    "Orchids can be tricky to grow",
+    "Sunflowers are fun to grow",
+    "Radishes are easy to grow",
+    "The taste of radishes grows on you after a while",
+]
+var k = inputs.size()
+
+var embeddings = predict(inputs)
+
+def z = new double[k][k]
+for (i in 0..<k) {
+    println "Embedding for: ${inputs[i]}\n${Arrays.toString(embeddings[i])}"
+    for (j in 0..<k) {
+        z[i][j] = dot(embeddings[i], embeddings[j])
+    }
+}
+
+new Heatmap(inputs, inputs, z, Palette.heat(20).reverse()).canvas().with {
+    title = 'Semantic textual similarity'
+    setAxisLabels('', '')
+    window()
+}