Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve documentation and examples #4

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions examples/recognize_from_bbc_word_radio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,23 @@ async fn main() {
.await
.expect("Failed to connect to Azure");

let radio_stream =
create_audio_stream("https://stream.live.vc.bbcmedia.co.uk/bbc_world_service").await;
let mut events = client
.recognize(
radio_stream,
create_audio_stream("https://stream.live.vc.bbcmedia.co.uk/bbc_world_service").await,
recognizer::ContentType::Mpeg,
recognizer::Details::stream("mac", "stream"),
)
.await
.expect("Failed to recognize");

while let Some(event) = events.next().await {
// You will need to wait for some time before the first recognition is successful.
// The best motivation is because they are talking to fast and
// the recognition is waiting for a silence pause to wrap-up the sentence.

// Currently is not possible to configure better the silence times and other parameters.
// but will be implemented in the future.

if let Ok(recognizer::Event::Recognized(_, result, ..)) = event {
tracing::info!("Recognized: {:?}", result.text);
}
Expand Down
64 changes: 0 additions & 64 deletions examples/recognize_from_file.rs

This file was deleted.

42 changes: 24 additions & 18 deletions examples/recognize_from_microphone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,46 @@ use azure_speech::Auth;
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use cpal::SampleFormat as CPALSampleFormat;
use std::env;
use std::error::Error;
use tokio_stream::wrappers::ReceiverStream;
use tokio_stream::StreamExt;
use tokio_stream::{Stream, StreamExt};

#[tokio::main]
async fn main() -> azure_speech::Result<()> {
async fn main() -> Result<(), Box<dyn Error>> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();

// More information on the configuration can be found in the simple example.

let auth = Auth::from_subscription(
env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
);

let (rx, stream) = listen_from_default_input().await;
let config = recognizer::Config::default();

let client = recognizer::Client::connect(
auth,
recognizer::Config::default().set_detect_languages(
vec![recognizer::Language::ItIt],
recognizer::LanguageDetectMode::Continuous,
),
)
.await?;
let client = recognizer::Client::connect(auth, config)
.await
.expect("to connect to azure");

// Using this utility, I'm creating an audio stream from the default input device.
// The audio headers are sent first, then the audio data.
// As the audio is raw, the WAV format is used.
let (stream, microphone) = listen_from_default_input().await;

microphone.play().expect("play failed");

let mut events = client
.recognize(
ReceiverStream::new(rx),
stream,
recognizer::ContentType::Wav,
recognizer::Details::stream("mac", "stream"),
)
.await?;

stream.play().expect("play failed");
.await
.expect("to recognize");

tracing::info!("Starting to listen...");
tracing::info!("... Starting to listen from microphone ...");

while let Some(event) = events.next().await {
if let Ok(recognizer::Event::Recognized(_, result, _, _, _)) = event {
Expand All @@ -51,7 +55,9 @@ async fn main() -> azure_speech::Result<()> {
Ok(())
}

async fn listen_from_default_input() -> (tokio::sync::mpsc::Receiver<Vec<u8>>, cpal::Stream) {
// This utility function creates a stream from the default input device.
// The audio headers are sent first, then the audio data.
async fn listen_from_default_input() -> (impl Stream<Item = Vec<u8>>, cpal::Stream) {
let host = cpal::default_host();
let device = host
.default_input_device()
Expand Down Expand Up @@ -176,5 +182,5 @@ async fn listen_from_default_input() -> (tokio::sync::mpsc::Receiver<Vec<u8>>, c
}
.expect("Failed to build input stream");

(rx, stream)
(ReceiverStream::new(rx), stream)
}
103 changes: 103 additions & 0 deletions examples/recognize_simple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
use azure_speech::recognizer;
use azure_speech::stream::{Stream, StreamExt};
use azure_speech::Auth;
use std::env;
use std::error::Error;
use std::path::Path;
use tokio::fs::File;
use tokio::io::{AsyncReadExt, BufReader};
use tokio_stream::wrappers::ReceiverStream;

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();

// Add your Azure region and subscription key to the environment variables.
// In this version only the default subscription key is supported.
// Other authentication methods are in the roadmap.
let auth = Auth::from_subscription(
env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
);

// Set the configuration for the recognizer.
//
// The default configuration will try to recognize en-US language,
// will use the Conversation mode and will require the simple output format.
// You can change it by using the Config struct and its methods.
let config = recognizer::Config::default();

let client = recognizer::Client::connect(auth, config)
.await
.expect("to connect to azure");

let mut stream = client
.recognize(
// Here is your input audio stream. The audio headers needs to be present if required by the content type used.
// In this example, the content type is Wav, so the headers are required at the start of the file.
// Generally you read a file, the headers are already present.
// If you are creating a stream from a microphone, you need to add the headers.
// Check the relative example for more details.
create_audio_stream("tests/audios/examples_sample_files_turn_on_the_lamp.wav").await, // Try also the mp3 version of the file.
// Here is the content type of the audio stream.
recognizer::ContentType::Wav, // Be sure to set it correctly.
// The typology of the source. You can use unknown, file or stream.
// More information can be requested by the method.
recognizer::Details::file(),
)
.await
.expect("to recognize");

while let Some(event) = stream.next().await {
// Each event is a part of the recognition process.
match event {
// as example the Recognized event will give you the result of the recognition.
Ok(recognizer::Event::Recognized(
request_id,
result,
offset,
duration,
raw_message,
)) => {
tracing::info!("Recognized session: {:?}", request_id);
tracing::info!("Result: {:?}", result);
tracing::info!("Offset: {:?}", offset);
tracing::info!("Duration: {:?}", duration);
// the raw message is the raw json message from the service.
// You can use it to extract more information if needed.
tracing::info!("Raw message: {:?}", raw_message);
}
_ => {
tracing::info!("Event: {:?}", event);
}
}
}

tracing::info!("Completed!");

Ok(())
}

async fn create_audio_stream(path: impl AsRef<Path>) -> impl Stream<Item = Vec<u8>> {
let (tx, rx) = tokio::sync::mpsc::channel(1024);
let file = File::open(path).await.expect("Failed to open file");
let mut reader = BufReader::new(file);

tokio::spawn(async move {
let mut chunk = vec![0; 4096];
while let Ok(n) = reader.read(&mut chunk).await {
if n == 0 {
break;
}
if tx.send(chunk.clone()).await.is_err() {
tracing::error!("Error sending data");
break;
}
}
drop(tx);
});

ReceiverStream::new(rx)
}
61 changes: 61 additions & 0 deletions examples/synthesize_callbacks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use azure_speech::{synthesizer, Auth};
use std::env;
use std::error::Error;
use tokio_stream::StreamExt;

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();

let auth = Auth::from_subscription(
env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
);

let config = synthesizer::Config::default()
.on_synthesising(|request_id, audio| {
tracing::info!(
"Callback - request: {:?}: Synthesising bytes {:?} ",
request_id,
audio.len()
);
})
.on_synthesised(|request_id| {
tracing::info!("Callback - request: {:?}: Synthesised", request_id);
})
.on_error(|request_id, error| {
tracing::info!("Callback - request: {:?}: Error {:?}", request_id, error);
})
.on_audio_metadata(|request_id, metadata| {
tracing::info!(
"Callback - request: {:?}: Audio metadata {:?}",
request_id,
metadata
);
})
.on_session_start(|request_id| {
tracing::info!("Callback - request: {:?}: Session started", request_id);
})
.on_session_end(|request_id| {
tracing::info!("Callback - request: {:?}: Session ended", request_id);
});

let client = synthesizer::Client::connect(auth, config)
.await
.expect("to connect to azure");

// you can use both the stream and callback in the same functions.
let mut stream = client
// here you put your text to synthesize.
.synthesize("Hello World!")
.await
.expect("to synthesize");

while let Some(event) = stream.next().await {
tracing::info!("Synthesizer Event: {:?}", event);
}

Ok(())
}
58 changes: 58 additions & 0 deletions examples/synthesize_simple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use azure_speech::{synthesizer, Auth};
use std::env;
use std::error::Error;
use tokio_stream::StreamExt;

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
tracing_subscriber::fmt()
.with_max_level(tracing::Level::INFO)
.init();

// Add your Azure region and subscription key to the environment variables.
// In this version only the default subscription key is supported.
// Other authentication methods are in the roadmap.
let auth = Auth::from_subscription(
env::var("AZURE_REGION").expect("Region set on AZURE_REGION env"),
env::var("AZURE_SUBSCRIPTION_KEY").expect("Subscription set on AZURE_SUBSCRIPTION_KEY env"),
);

// Set the configuration for the synthesizer.
//
// The default configuration will create a Riff16Khz16BitMonoPcm audio chunks,
// but you can change it using the .with_output_format(AudioFormat) method.
//
// It will understand the en-US language and will use the EnUsJennyNeural voice.
// You can change it by using the Config struct and its methods.
let config = synthesizer::Config::default();

let client = synthesizer::Client::connect(auth, config)
.await
.expect("to connect to azure");

let mut stream = client
// here you put your text to synthesize.
.synthesize("Hello World!")
.await
.expect("to synthesize");

while let Some(event) = stream.next().await {
// Each event is a part of the synthesis process.
match event {
Ok(synthesizer::Event::Synthesising(request_id, audio)) => {
// here you can use the audio to create your output.
// the audio is a Vec<u8> that contains the audio chunk.
// you can use it to create a file, to play it or to send it to a speaker.
tracing::info!(
"Synthesizer: Synthesising {:?} len: {:?}",
request_id,
audio.len()
);
}
// this will print a lot of events to the console.
_ => tracing::info!("Synthesizer: Event {:?}", event),
}
}

Ok(())
}
Loading