Merge from development #836, #837, #838, #839 (#840)

Eyal-Danieli · yonishelach · web-flow · commit 217fe7420b87 · 2025-01-05T09:26:58.000+02:00
* fix PyYAML loading (#837) * [text to audio generator] Replaced bark with openai tts models (#836) * [Text to audio generator] Add speech engine (#838) * [text to audio generator] Replaced bark with openai tts models * [text to audio generator] Fix base url env var * fix version * Add speech engine * after review * [auto-trainer] update test requirements (#839) * [Build] Fix html links, Add <function>.html as source in documentation * Update CI temporarily and update index * [XGB-Custom] Fix test artifact key name * [XGB-Serving][XGB-Test][XGB-Trainer] Fix tests - artifact key * [Build] Install python 3.9 when testing (#618) * [Build] Update python version in CI (#620) * [Build] Install python 3.9 when testing * [Build] Update python version in CI * . * Revert "[Build] Update python version in CI (#620)" (#621) This reverts commit 0cd1f15. * Revert "[Build] Install python 3.9 when testing (#618)" (#619) This reverts commit 3301415. * [Build] Build with python 3.9 (#622) * [Build] Build with python 3.9 * . * Update requirements.txt --------- Co-authored-by: Yonatan Shelach <92271540+yonishelach@users.noreply.github.com>
diff --git a/auto_trainer/requirements.txt b/auto_trainer/requirements.txt
@@ -1,4 +1,4 @@
 pandas
-scikit-learn
-xgboost
-plotly
+scikit-learn<1.4.0
+xgboost<2.0.0
+plotly
diff --git a/cli/item_yaml.py b/cli/item_yaml.py
@@ -40,7 +40,7 @@ def update_functions_yaml(root_directory: str,
         if (inner_dir / item_yaml).exists():
             path = str(inner_dir)+"/"+item_yaml
             stream = open(path, 'r')
-            data = yaml.load(stream)
+            data = yaml.load(stream=stream, Loader=yaml.FullLoader)
             if version:
                 data['version'] = version
             if mlrun_version:
diff --git a/cli/test_suite.py b/cli/test_suite.py
@@ -599,7 +599,7 @@ def clean_pipenv(directory: str):
 # load item yaml
 def load_item(path):
     with open(path, 'r') as stream:
-        data = yaml.load(stream)
+        data = yaml.load(stream=stream, Loader=yaml.FullLoader)
     return data
 
 
diff --git a/text_to_audio_generator/function.yaml b/text_to_audio_generator/function.yaml
diff --git a/text_to_audio_generator/item.yaml b/text_to_audio_generator/item.yaml
@@ -13,7 +13,7 @@ labels:
   author: yonatans
 maintainers: []
 marketplaceType: ''
-mlrunVersion: 1.5.1
+mlrunVersion: 1.7.1
 name: text_to_audio_generator
 platformVersion: 3.5.3
 spec:
@@ -22,8 +22,8 @@ spec:
   image: mlrun/mlrun
   kind: job
   requirements:
-    - bark
     - torchaudio
+    - pydub
 url: ''
-version: 1.2.0
+version: 1.3.0
 test_valid: True
diff --git a/text_to_audio_generator/requirements.txt b/text_to_audio_generator/requirements.txt
@@ -1,2 +1,4 @@
 bark
-torchaudio>=2.1.0
+torchaudio>=2.1.0
+openai>=1.58.0
+pydub
diff --git a/text_to_audio_generator/test_text_to_audio_generator.py b/text_to_audio_generator/test_text_to_audio_generator.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import mlrun
+import os
 import tempfile
+
+import mlrun
 import pytest
 
 
@@ -31,6 +33,7 @@ def test_generate_multi_speakers_audio(file_format, bits_per_sample):
                     "v2/en_speaker_0",
                     "v2/en_speaker_1",
                 ],
+                "engine": "bark",
                 "use_small_models": True,
                 "use_gpu": False,
                 "offload_cpu": True,
@@ -45,6 +48,42 @@ def test_generate_multi_speakers_audio(file_format, bits_per_sample):
             ],
             artifact_path=test_directory,
         )
-    assert function_run.error == "Run state (completed) is not in error state"
+    assert function_run.error == ""
     for key in ["audio_files", "audio_files_dataframe", "text_to_speech_errors"]:
         assert key in function_run.outputs and function_run.outputs[key] is not None
+
+
+@pytest.mark.skipif(
+    condition=os.getenv("OPENAI_API_BASE") is None
+    and os.getenv("OPENAI_API_KEY") is None,
+    reason="OpenAI API key and base URL are required to run this test",
+)
+@pytest.mark.parametrize("file_format,bits_per_sample", [("wav", 8), ("mp3", None)])
+def test_generate_multi_speakers_audio_openai(file_format, bits_per_sample):
+    text_to_audio_generator_function = mlrun.import_function("function.yaml")
+    with tempfile.TemporaryDirectory() as test_directory:
+        function_run = text_to_audio_generator_function.run(
+            handler="generate_multi_speakers_audio",
+            inputs={"data_path": "data/test_data.txt"},
+            params={
+                "output_directory": test_directory,
+                "speakers": {"Agent": 0, "Client": 1},
+                "available_voices": [
+                    "alloy",
+                    "echo",
+                ],
+                "engine": "openai",
+                "file_format": file_format,
+                "bits_per_sample": bits_per_sample,
+            },
+            local=True,
+            returns=[
+                "audio_files: path",
+                "audio_files_dataframe: dataset",
+                "text_to_speech_errors: file",
+            ],
+            artifact_path=test_directory,
+        )
+    assert function_run.error == ""
+    for key in ["audio_files", "audio_files_dataframe", "text_to_speech_errors"]:
+        assert key in function_run.outputs and function_run.outputs[key] is not None
diff --git a/text_to_audio_generator/text_to_audio_generator.ipynb b/text_to_audio_generator/text_to_audio_generator.ipynb
@@ -31,10 +31,7 @@
    "id": "bb20c4a6-f362-40e6-8f73-9145953959ec",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import mlrun\n",
-    "import tempfile"
-   ]
+   "source": "import mlrun"
   },
   {
    "cell_type": "code",
@@ -322,12 +319,10 @@
     "        \"output_directory\": \"./out\",\n",
     "        \"speakers\": {\"Agent\": 0, \"Client\": 1},\n",
     "        \"available_voices\": [\n",
-    "            \"v2/en_speaker_0\",\n",
-    "            \"v2/en_speaker_1\",\n",
+    "           \"alloy\",\n",
+    "            \"echo\",\n",
     "        ],\n",
-    "        \"use_small_models\": True,\n",
-    "        \"use_gpu\": False,\n",
-    "        \"offload_cpu\": True,\n",
+    "        \"engine\": \"bark\",\n",
     "        \"file_format\": \"mp3\",\n",
     "        # \"bits_per_sample\": 8,\n",
     "    },\n",
diff --git a/text_to_audio_generator/text_to_audio_generator.py b/text_to_audio_generator/text_to_audio_generator.py