*array = @[];
+ self.recognizer.commands = array;
+RecognizerDelegate *recognizerDelegate ;
+void _initLogCallback(logCallbackFunc callback) {
+ recognizerDelegate = [[RecognizerDelegate alloc] init];
+ logCallback = callback;
+ if(logCallback != NULL && recognizerDelegate.didEnd == NO){
+ // sendLog([@"Mac: Speech is ready" UTF8String]);
+ }
+void _startDictation(){
+ // sendLog([@"Mac: Speech is started" UTF8String]);
+ while (recognizerDelegate.didEnd == NO) {
+ [[NSRunLoop currentRunLoop] runUntilDate:[NSDate dateWithTimeIntervalSinceNow:1.0]];
+ }
+ // sendLog([@"Mac: Speech is ended" UTF8String]);
+ [recognizerDelegate.recognizer stopListening];
+void _endDictation(){
+ recognizerDelegate.didEnd = YES;
+void _addCommand(const char *string){
+ [recognizerDelegate addCommand: [NSString stringWithUTF8String:string]];
+void _clearCommand(){
+ [recognizerDelegate clearCommand];
+// swift-tools-version:5.1
+// The swift-tools-version declares the minimum version of Swift required to build this package.
+import PackageDescription
+let package = Package(
+ name: "NSSpeechTest",
+ dependencies: [
+ // Dependencies declare other packages that this package depends on.
+ // .package(url: /* package url */, from: "1.0.0"),
+ ],
+ targets: [
+ // Targets are the basic building blocks of a package. A target can define a module or a test suite.
+ // Targets can depend on other targets in this package, and on products in packages which this package depends on.
+ .target(
+ name: "NSSpeechTest",
+ dependencies: []),
+ .testTarget(
+ name: "NSSpeechTestTests",
+ dependencies: ["NSSpeechTest"]),
+ ]
+# NSSpeechTest
+author : jotaro shigeyama
+Swift package to quickly test your mac's dictation on CLI.
+Tested on Apple Swift version 5.1.3.
+## Usage
+Run : `swift run` in the root directory of this project.
+In default this will recognize `rock paper scissors` but you can easily change them in `main.swift`
+## Troubleshooting
+> macOS won't dictate but show some dialog that says: "download 0 byte ..."
+quick solution: In this case you can still use US(UK) or US(Austratlia) dictation kit from system preferences -> accessiblity -> voice control.
+# NSSpeechTest
+import Foundation
+import AppKit
+class Dispatcher: NSObject, NSSpeechRecognizerDelegate {
+ var stop: Bool
+ override init () {stop = false}
+ func speechRecognizer(_ sender: NSSpeechRecognizer, didRecognizeCommand command: String)
+ {
+ print("command: \(command)")
+ }
+let dispatcher = Dispatcher()
+let recognizer = NSSpeechRecognizer()!
+recognizer.delegate = dispatcher
+recognizer.commands = ["apple","orange","banana"]
+recognizer.displayedCommandsTitle = "test"
+let loop = RunLoop.current
+let mode = loop.currentMode ?? RunLoop.Mode.default
+while loop.run(mode:mode, before: Date(timeIntervalSinceNow: 0.1))
+ && !dispatcher.stop {}
\ No newline at end of file
+import XCTest
+import NSSpeechTestTests
+var tests = [XCTestCaseEntry]()
+tests += NSSpeechTestTests.allTests()
+import XCTest
+import class Foundation.Bundle
+final class NSSpeechTestTests: XCTestCase {
+ func testExample() throws {
+ // This is an example of a functional test case.
+ // Use XCTAssert and related functions to verify your tests produce the correct
+ // results.
+ // Some of the APIs that we use below are available in macOS 10.13 and above.
+ guard #available(macOS 10.13, *) else {
+ return
+ }
+ let fooBinary = productsDirectory.appendingPathComponent("NSSpeechTest")
+ let process = Process()
+ process.executableURL = fooBinary
+ let pipe = Pipe()
+ process.standardOutput = pipe
+ try process.run()
+ process.waitUntilExit()
+ let data = pipe.fileHandleForReading.readDataToEndOfFile()
+ let output = String(data: data, encoding: .utf8)
+ XCTAssertEqual(output, "Hello, world!\n")
+ }
+ /// Returns path to the built products directory.
+ var productsDirectory: URL {
+ #if os(macOS)
+ for bundle in Bundle.allBundles where bundle.bundlePath.hasSuffix(".xctest") {
+ return bundle.bundleURL.deletingLastPathComponent()
+ }
+ fatalError("couldn't find the products directory")
+ #else
+ return Bundle.main.bundleURL
+ #endif
+ }
+ static var allTests = [
+ ("testExample", testExample),
+ ]
+import XCTest
+#if !canImport(ObjectiveC)
+public func allTests() -> [XCTestCaseEntry] {
+ return [
+ testCase(NSSpeechTestTests.allTests),
+ ]
+ "dependencies": {
+ "com.unity.collab-proxy": "1.2.16",
+ "com.unity.ext.nunit": "1.0.0",
+ "com.unity.ide.rider": "1.0.8",
+ "com.unity.ide.visualstudio": "1.0.11",
+ "com.unity.ide.vscode": "1.0.7",
+ "com.unity.test-framework": "1.0.16",
+ "com.unity.textmeshpro": "2.0.1",
+ "com.unity.timeline": "1.1.0",
+ "com.unity.ugui": "1.0.0",
+ "com.unity.modules.ai": "1.0.0",
+ "com.unity.modules.androidjni": "1.0.0",
+ "com.unity.modules.animation": "1.0.0",
+ "com.unity.modules.assetbundle": "1.0.0",
+ "com.unity.modules.audio": "1.0.0",
+ "com.unity.modules.cloth": "1.0.0",
+ "com.unity.modules.director": "1.0.0",
+ "com.unity.modules.imageconversion": "1.0.0",
+ "com.unity.modules.imgui": "1.0.0",
+ "com.unity.modules.jsonserialize": "1.0.0",
+ "com.unity.modules.particlesystem": "1.0.0",
+ "com.unity.modules.physics": "1.0.0",
+ "com.unity.modules.physics2d": "1.0.0",
+ "com.unity.modules.screencapture": "1.0.0",
+ "com.unity.modules.terrain": "1.0.0",
+ "com.unity.modules.terrainphysics": "1.0.0",
+ "com.unity.modules.tilemap": "1.0.0",
+ "com.unity.modules.ui": "1.0.0",
+ "com.unity.modules.uielements": "1.0.0",
+ "com.unity.modules.umbra": "1.0.0",
+ "com.unity.modules.unityanalytics": "1.0.0",
+ "com.unity.modules.unitywebrequest": "1.0.0",
+ "com.unity.modules.unitywebrequestassetbundle": "1.0.0",
+ "com.unity.modules.unitywebrequestaudio": "1.0.0",
+ "com.unity.modules.unitywebrequesttexture": "1.0.0",
+ "com.unity.modules.unitywebrequestwww": "1.0.0",
+ "com.unity.modules.vehicles": "1.0.0",
+ "com.unity.modules.video": "1.0.0",
+ "com.unity.modules.vr": "1.0.0",
+ "com.unity.modules.wind": "1.0.0",
+ "com.unity.modules.xr": "1.0.0"
+ }
+A ready-to-use Unity plugin for Speech Input/Output using native Speech API of both Apple macOS and MS Windows.
+Author: Jotaro Shigeyama and Thijs Roumen
+## Features
+- Custom command speech input
+- Support for various speed / language.
+- Async/await-based interaction design.
+SpeechOut speechOut = new SpeechOut();
+SpeechIn speechIn = new SpeechIn(OnRecognized);
+void Start(){
+ Dialog();
+async void Dialog(){
+ await speechOut.Speak("Hello!");
+ await speechIn.Listen(new string[] { "Hello", "Hi", "Hey" });
+ await speechOut.Speak("How are you doing?");
+ await speechIn.Listen(new string[] { "I'm fine", "Nah", "I'm Sick" });
+ //...
+This project repo contains
+- Unity project (this repo, tested with v.2019.3.0a8)
+- XCode project NSSpeechForUnity (author: Jotaro Shigeyama)
+- Visual Studio project WindowsVoiceProject originally from here (https://chadweisshaar.com/blog/2015/07/02/microsoft-speech-for-unity/)
+- Unity project contains pre-built `.dll` and `.bundle` from above source project.
+## Installation
+This plugin works and tested on macOS Catalina or above, and Windows 10 (Windows 8 is not supported).
+### OS setup
+Right now English / German / Japanese are supported. You need to install necessary language module from your OS setting.
+Derzeit werden Englisch / Deutsch / Japanisch unterstützt. Sie müssen das erforderliche Sprachmodul in Ihrer OS-einstellung installieren.
+### Unity
+Just simply grab all Scripts, Plugins (and if Scenes if you need) to your own Unity project file.
+### Potential installation issue for macOS
+Some macOS users will experience broken speech input due to missing dictation kits: mostly because of bug on Apple. If you encounter some issue on speech input, please try these.
+- make sure your native voice command system works (System Preferences > Accessibility > Voice Command, enabling voice command will invoke macOS system voice command input windows.)
+ - Go to "Preferences > Accessibility > Sound Control > Language" and install language pack.
+- In case of macOS have some buggy issues: Try switching your OS language to another, then try to install your desired voice command module (macOS will prompt you to download missing dictation model.)
+- Make sure your macOS is the latest version
+- Try rebooting the system.
+## dev-Installation
+- XCode (mac)
+- Visual Studio (win, latest Windows SDK required)
+This package contains git submodule.
+` git clone --recursive https://github.com/HassoPlattnerInstituteHCI/SwiftVoiceCommandTest.git`
+### Testing your NSSpeechRecognizer
+tested on Apple Swift version 5.1.3
+cd NSSpeechTest
+swift run
+### Using in your own Unity package
+- Build NSSpeechForUnity in XCode.
+- Copy generated `.bundle` file in `Assets/Plugins` of your Unity project.
+### Modifying voice command dictionary
+See `MyMacSpeechScript.cs`.
+## Documentation
+coming soon.
+Your feedback is welcome!
+using UnityEngine;
+using System.Collections;
+using System.Runtime.InteropServices;
+using System.Text;
+public class WindowsVoice : MonoBehaviour {
+ [DllImport("WindowsVoice")]
+ public static extern void initSpeech();
+ [DllImport("WindowsVoice")]
+ public static extern void destroySpeech();
+ [DllImport("WindowsVoice")]
+ public static extern void addToSpeechQueue(string s);
+ [DllImport("WindowsVoice")]
+ public static extern void statusMessage(StringBuilder str, int length);
+ public static WindowsVoice theVoice = null;
+ // Use this for initialization
+ void OnEnable () {
+ if (theVoice == null)
+ {
+ theVoice = this;
+ initSpeech();
+ }
+ //else
+ //Destroy(gameObject);
+ }
+ public void test()
+ {
+ speak("Testing");
+ }
+ public void speak(string msg) {
+ addToSpeechQueue(msg);
+ }
+ void OnDestroy()
+ {
+ if (theVoice == this)
+ {
+ Debug.Log("Destroying speech");
+ destroySpeech();
+ Debug.Log("Speech destroyed");
+ theVoice = null;
+ }
+ }
+ public static string GetStatusMessage()
+ {
+ StringBuilder sb = new StringBuilder(40);
+ statusMessage(sb, 40);
+ return sb.ToString();
+ }
diff --git a/WindowsVoiceProject/WindowsVoice.h b/WindowsVoiceProject/WindowsVoice.h
+#define DLL_API __declspec(dllexport)
+#define DLL_API __declspec(dllimport)
+namespace WindowsVoice {
+ extern "C" {
+ DLL_API void __cdecl initSpeech();
+ DLL_API void __cdecl addToSpeechQueue(const char* text);
+ DLL_API void __cdecl clearSpeechQueue();
+ DLL_API void __cdecl destroySpeech();
+ DLL_API void __cdecl statusMessage(char* msg, int msgLen);
+ }
+ std::mutex theMutex;
+ std::list theSpeechQueue;
+ std::thread* theSpeechThread = nullptr;
+ bool shouldTerminate = false;
+ std::wstring theStatusMessage;
\ No newline at end of file
+ Debug
+ Win32
+ Debug
+ x64
+ Release
+ Win32
+ Release
+ x64
+ {AC8E5BA2-5F13-4C97-A35E-069E01781E85}
+ WindowsVoice
+ 10.0.14393.0
+ DynamicLibrary
+ true
+ v142
+ MultiByte
+ DynamicLibrary
+ true
+ v141
+ MultiByte
+ DynamicLibrary
+ false
+ v142
+ true
+ MultiByte
+ DynamicLibrary
+ false
+ v142
+ true
+ MultiByte
+ .dll
+ .dll
+ $(SolutionDir)\libs
+ .dll
+ .dll
+ $(SolutionDir)\libs\
+ Level3
+ Disabled
+ true
+ DLL_EXPORTS;_WINDLL;%(PreprocessorDefinitions)
+ true
+ Level3
+ Disabled
+ true
+ true
+ Level3
+ MaxSpeed
+ true
+ true
+ true
+ DLL_EXPORTS;_WINDLL;%(PreprocessorDefinitions)
+ true
+ true
+ true
+ Level3
+ MaxSpeed
+ true
+ true
+ true
+ DLL_EXPORTS;_WINDLL;%(PreprocessorDefinitions)
+ MultiThreaded
+ true
+ true
+ true
\ No newline at end of file
diff --git a/WindowsVoiceProject/dllmain.cpp b/WindowsVoiceProject/dllmain.cpp
+#include "pch.h"
+#include "WindowsVoice.h"
+namespace WindowsVoice {
+ void speechThreadFunc()
+ {
+ ISpVoice * pVoice = NULL;
+ {
+ theStatusMessage = L"Failed to initialize COM for Voice.";
+ return;
+ }
+ HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice);
+ if (!SUCCEEDED(hr))
+ {
+ LPSTR pText = 0;
+ LocalFree(pText);
+ theStatusMessage = L"Failed to create Voice instance.";
+ return;
+ }
+ theStatusMessage = L"Speech ready.";
+ //std::cout << "Speech ready.\n";
+ wchar_t* priorText = nullptr;
+ while (!shouldTerminate)
+ {
+ wchar_t* wText = NULL;
+ if (!theSpeechQueue.empty())
+ {
+ theMutex.lock();
+ wText = theSpeechQueue.front();
+ theSpeechQueue.pop_front();
+ theMutex.unlock();
+ }
+ if (wText)
+ {
+ if (priorText == nullptr || lstrcmpW(wText, priorText) != 0)
+ {
+ pVoice->Speak(wText, SPF_IS_XML, NULL);
+ Sleep(250);
+ delete[] priorText;
+ priorText = wText;
+ }
+ else
+ delete[] wText;
+ }
+ else
+ {
+ delete[] priorText;
+ priorText = nullptr;
+ Sleep(50);
+ }
+ }
+ pVoice->Release();
+ SPVOICESTATUS voiceStatus;
+ wchar_t* priorText = nullptr;
+ while (!shouldTerminate)
+ {
+ pVoice->GetStatus(&voiceStatus, NULL);
+ if (voiceStatus.dwRunningState == SPRS_IS_SPEAKING)
+ {
+ if (priorText == nullptr)
+ theStatusMessage = L"Error: SPRS_IS_SPEAKING but text is NULL";
+ else
+ {
+ theStatusMessage = L"Speaking: ";
+ theStatusMessage.append(priorText);
+ if (!theSpeechQueue.empty())
+ {
+ theMutex.lock();
+ if (lstrcmpW(theSpeechQueue.front(), priorText) == 0)
+ {
+ delete[] theSpeechQueue.front();
+ theSpeechQueue.pop_front();
+ }
+ theMutex.unlock();
+ }
+ }
+ }
+ else
+ {
+ theStatusMessage = L"Waiting.";
+ if (priorText != NULL)
+ {
+ delete[] priorText;
+ priorText = NULL;
+ }
+ if (!theSpeechQueue.empty())
+ {
+ theMutex.lock();
+ priorText = theSpeechQueue.front();
+ theSpeechQueue.pop_front();
+ theMutex.unlock();
+ //priorText = "" + priorText + "";
+ //priorText = wcscat((wchar_t *)"", wcscat(priorText,(wchar_t *)""));
+ pVoice->Speak(priorText, SPF_IS_XML | SPF_ASYNC, NULL);
+ }
+ }
+ Sleep(50);
+ }
+ pVoice->Pause();
+ pVoice->Release();
+ theStatusMessage = L"Speech thread terminated.";
+ }
+ void addToSpeechQueue(const char* text)
+ {
+ if (text)
+ {
+ int len = strlen(text) + 1;
+ wchar_t *wText = new wchar_t[len];
+ memset(wText, 0, len);
+ ::MultiByteToWideChar(CP_UTF8, NULL, text, -1, wText, len);
+ theMutex.lock();
+ theSpeechQueue.push_back(wText);
+ theMutex.unlock();
+ }
+ }
+ void clearSpeechQueue()
+ {
+ theMutex.lock();
+ theSpeechQueue.clear();
+ theMutex.unlock();
+ }
+ void initSpeech()
+ {
+ shouldTerminate = false;
+ if (theSpeechThread != nullptr)
+ {
+ theStatusMessage = L"Windows Voice thread already started.";
+ return;
+ }
+ theStatusMessage = L"Starting Windows Voice.";
+ theSpeechThread = new std::thread(WindowsVoice::speechThreadFunc);
+ }
+ void destroySpeech()
+ {
+ if (theSpeechThread == nullptr)
+ {
+ theStatusMessage = L"Speach thread already destroyed or not started.";
+ return;
+ }
+ theStatusMessage = L"Destroying speech.";
+ shouldTerminate = true;
+ theSpeechThread->join();
+ theSpeechQueue.clear();
+ delete theSpeechThread;
+ theSpeechThread = nullptr;
+ CoUninitialize();
+ theStatusMessage = L"Speech destroyed.";
+ }
+ void statusMessage(char* msg, int msgLen)
+ {
+ size_t count;
+ wcstombs_s(&count, msg, msgLen, theStatusMessage.c_str(), msgLen);
+ }
+BOOL APIENTRY DllMain(HMODULE, DWORD ul_reason_for_call, LPVOID)
+ switch (ul_reason_for_call)
+ {
+ break;
+ }
+ return TRUE;
\ No newline at end of file
diff --git a/WindowsVoiceProject/pch.cpp b/WindowsVoiceProject/pch.cpp
+// pch.cpp
+// Include the standard header and generate the precompiled header.
+#include "pch.h"
diff --git a/WindowsVoiceProject/pch.h b/WindowsVoiceProject/pch.h
+// pch.h
+// Header for standard system include files.
+#pragma once
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
+// Windows Header Files: