image channels as generic

kornia · Feb 6, 2024 · f2169ba · f2169ba
1 parent 296a914
commit f2169ba
Show file tree

Hide file tree

Showing 16 changed files with 442 additions and 399 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,7 +11,7 @@ license-file = "LICENSE"
 [dependencies]
 num-traits = "0.2.17"
 image = { version = "0.24.5" }
-turbojpeg = {version = "0.5.2"}
+turbojpeg = {version = "1.0.0"}
 ndarray = { version = "0.15.6", features = ["rayon"] }
 # this is experimental and only used for benchmarking, so it's optional
 # consider removing it in the future.
@@ -29,6 +29,6 @@ candle = ["candle-core"]
 name = "color_benchmark"
 harness = false
 
-[[bench]]
-name = "resize_benchmark"
-harness = false
+#[[bench]]
+#name = "resize_benchmark"
+#harness = false
diff --git a/benches/color_benchmark.rs b/benches/color_benchmark.rs
@@ -2,7 +2,7 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criteri
 
 use kornia_rs::color as F;
 use kornia_rs::image::{Image, ImageSize};
-use ndarray::{s, stack, Axis};
+use ndarray::s;
 
 #[cfg(feature = "candle")]
 use candle_core::{DType, Device, Storage, Tensor};
@@ -11,27 +11,24 @@ use candle_core::{DType, Device, Storage, Tensor};
 use std::ops::Deref;
 
 // vanilla version
-fn gray_iter(image: Image) -> Image {
-    let height = image.image_size().height;
-    let width = image.image_size().width;
-    let mut gray_image = Image::new(ImageSize { width, height }, vec![0; width * height * 3]);
-    for y in 0..height {
-        for x in 0..width {
+fn gray_iter(image: &Image<u8, 3>) -> Image<u8, 1> {
+    let data = vec![0u8; image.image_size().width * image.image_size().height];
+    let mut gray_image = Image::new(image.image_size(), data).unwrap();
+    for y in 0..image.height() {
+        for x in 0..image.width() {
             let r = image.data[[y, x, 0]];
             let g = image.data[[y, x, 1]];
             let b = image.data[[y, x, 2]];
             let gray_pixel = (76. * r as f32 + 150. * g as f32 + 29. * b as f32) / 255.;
             gray_image.data[[y, x, 0]] = gray_pixel as u8;
-            gray_image.data[[y, x, 1]] = gray_pixel as u8;
-            gray_image.data[[y, x, 2]] = gray_pixel as u8;
         }
     }
     gray_image
 }
 
-fn gray_vec(image: Image) -> Image {
+fn gray_vec(image: &Image<u8, 3>) -> Image<u8, 1> {
     // convert to f32
-    let mut image_f32 = image.data.mapv(|x| x as f32);
+    let mut image_f32 = image.data_ref().mapv(|x| x as f32);
 
     // get channels
     let mut binding = image_f32.view_mut();
@@ -42,14 +39,7 @@ fn gray_vec(image: Image) -> Image {
     let gray_f32 = (&r * 76.0 + &g * 150.0 + &b * 29.0) / 255.0;
     let gray_u8 = gray_f32.mapv(|x| x as u8);
 
-    // TODO: ideally we stack the channels. Not working yet.
-    let gray_stacked = match stack(Axis(2), &[gray_u8.view(), gray_u8.view(), gray_u8.view()]) {
-        Ok(gray_stacked) => gray_stacked,
-        Err(err) => {
-            panic!("Error stacking channels: {}", err);
-        }
-    };
-    Image { data: gray_stacked }
+    Image::new(image.image_size(), gray_u8.into_raw_vec()).unwrap()
 }
 
 #[cfg(feature = "candle")]
@@ -92,7 +82,7 @@ fn gray_candle(image: Image) -> Image {
     Image::from_shape_vec([shape.0, shape.1, shape.2], data)
 }
 
-fn gray_image_crate(image: Image) -> Image {
+fn gray_image_crate(image: &Image<u8, 3>) -> Image<u8, 1> {
     let image_data = image.data.as_slice().unwrap();
     let rgb = image::RgbImage::from_raw(
         image.image_size().width as u32,
@@ -104,10 +94,7 @@ fn gray_image_crate(image: Image) -> Image {
 
     let image_gray = image_crate.grayscale();
 
-    Image::from_shape_vec(
-        [image_gray.height() as usize, image_gray.width() as usize, 1],
-        image_gray.into_bytes(),
-    )
+    Image::new(image.image_size(), image_gray.into_bytes()).unwrap()
 }
 
 fn bench_grayscale(c: &mut Criterion) {
@@ -117,18 +104,18 @@ fn bench_grayscale(c: &mut Criterion) {
     for (width, height) in image_sizes {
         let id = format!("{}x{}", width, height);
         let image_data = vec![0u8; width * height * 3];
-        let image = Image::from_shape_vec([height, width, 3], image_data);
+        let image = Image::new(ImageSize { width, height }, image_data).unwrap();
         group.bench_with_input(BenchmarkId::new("zip", &id), &image, |b, i| {
             b.iter(|| F::gray_from_rgb(black_box(i)))
         });
         group.bench_with_input(BenchmarkId::new("iter", &id), &image, |b, i| {
-            b.iter(|| gray_iter(black_box(i.clone())))
+            b.iter(|| gray_iter(black_box(&i.clone())))
         });
         group.bench_with_input(BenchmarkId::new("vec", &id), &image, |b, i| {
-            b.iter(|| gray_vec(black_box(i.clone())))
+            b.iter(|| gray_vec(black_box(&i.clone())))
         });
         group.bench_with_input(BenchmarkId::new("image_crate", &id), &image, |b, i| {
-            b.iter(|| gray_image_crate(black_box(i.clone())))
+            b.iter(|| gray_image_crate(black_box(&i.clone())))
         });
         #[cfg(feature = "candle")]
         group.bench_with_input(BenchmarkId::new("candle", &id), &image, |b, i| {

diff --git a/examples/binarize.rs b/examples/binarize.rs
@@ -0,0 +1,37 @@
+use kornia_rs::io::functions as F;
+use rerun;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // read the image
+    let image_path = std::path::Path::new("tests/data/dog.jpeg");
+    let image = F::read_image_jpeg(image_path)?;
+
+    // convert the image to grayscale
+    let gray = kornia_rs::color::gray_from_rgb(&image)?;
+    let gray_viz = gray.clone();
+
+    // binarize the image
+    let gray_bin = kornia_rs::threshold::threshold_binary(&gray, 127, 255)?;
+
+    // Option1: convert the grayscale image to floating point
+    let gray_f32 = gray.cast::<f32>()?;
+
+    // Option 2: onvert and normalize the grayscale image to floating point
+    // let gray_f32 = gray.cast_and_scale::<f32>(1.0 / 255, 0.0)?;
+
+    // normalize the image between 0 and 1
+    let gray_f32 = kornia_rs::normalize::normalize_mean_std(&gray_f32, &[0.0], &[255.0])?;
+
+    // binarize the image as floating point
+    let gray_f32 = kornia_rs::threshold::threshold_binary(&gray_f32, 0.5, 1.0)?;
+
+    // create a Rerun recording stream
+    let rec = rerun::RecordingStreamBuilder::new("Kornia App").connect()?;
+
+    let _ = rec.log("image", &rerun::Image::try_from(image.data())?);
+    let _ = rec.log("gray", &rerun::Image::try_from(gray_viz.data())?);
+    let _ = rec.log("gray_bin", &rerun::Image::try_from(gray_bin.data())?);
+    let _ = rec.log("gray_f32", &rerun::Image::try_from(gray_f32.data())?);
+
+    Ok(())
+}
diff --git a/examples/distance_transform.rs b/examples/distance_transform.rs
diff --git a/examples/hello_world.rs b/examples/hello_world.rs
@@ -3,7 +3,7 @@ use kornia_rs::io::functions as F;
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // read the image
     let image_path = std::path::Path::new("tests/data/dog.jpeg");
-    let image = F::read_image_jpeg(image_path);
+    let image = F::read_image_jpeg(image_path)?;
 
     println!("Image size: {:?}", image.image_size());
     Ok(())

diff --git a/examples/imgproc.rs b/examples/imgproc.rs
@@ -4,10 +4,10 @@ use rerun;
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // read the image
     let image_path = std::path::Path::new("tests/data/dog.jpeg");
-    let image = F::read_image_jpeg(image_path);
+    let image = F::read_image_jpeg(image_path)?;
 
     // convert the image to grayscale
-    let gray = kornia_rs::color::gray_from_rgb(&image);
+    let gray = kornia_rs::color::gray_from_rgb(&image)?;
 
     let gray_resize = kornia_rs::resize::resize(
         &gray,
@@ -26,9 +26,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let rec = rerun::RecordingStreamBuilder::new("Kornia App").connect()?;
 
     // log the images
-    let _ = rec.log("image", &rerun::Image::try_from(image.data)?);
-    let _ = rec.log("gray", &rerun::Image::try_from(gray.data)?);
-    let _ = rec.log("gray_resize", &rerun::Image::try_from(gray_resize.data)?);
+    let _ = rec.log("image", &rerun::Image::try_from(image.data())?);
+    let _ = rec.log("gray", &rerun::Image::try_from(gray.data())?);
+    let _ = rec.log("gray_resize", &rerun::Image::try_from(gray_resize.data())?);
 
     Ok(())
 }
diff --git a/examples/rerun_viz.rs b/examples/rerun_viz.rs
@@ -4,13 +4,13 @@ use rerun;
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // read the image
     let image_path = std::path::Path::new("tests/data/dog.jpeg");
-    let image = F::read_image_jpeg(image_path);
+    let image = F::read_image_jpeg(image_path)?;
 
     // create a Rerun recording stream
     let rec = rerun::RecordingStreamBuilder::new("Kornia App").connect()?;
 
     // log the image
-    let _ = rec.log("image", &rerun::Image::try_from(image.data)?);
+    let _ = rec.log("image", &rerun::Image::try_from(image.data())?);
 
     Ok(())
 }
diff --git a/src/color.rs b/src/color.rs
@@ -1,5 +1,6 @@
 use crate::image::Image;
 use ndarray::{Array3, Zip};
+use num_traits::{Num, NumCast};
 
 // TODO: ideally we want something like this:
 // let rgb: Image<u8, RGB> = load_image("image.jpg");
@@ -20,9 +21,9 @@ use ndarray::{Array3, Zip};
 /// The grayscale image.
 ///
 /// Precondition: the input image must have 3 channels.
-pub fn gray_from_rgb<T>(image: &Image<T, 3>) -> Result<Image<T, 1>, String>
+pub fn gray_from_rgb<T>(image: &Image<T, 3>) -> Result<Image<T, 1>, std::io::Error>
 where
-    T: Clone + Default + Send + Sync + num_traits::NumCast + std::fmt::Debug + 'static,
+    T: Copy + Clone + Default + Send + Sync + num_traits::NumCast + std::fmt::Debug + 'static,
 {
     assert_eq!(image.num_channels(), 3);
 
@@ -36,15 +37,15 @@ where
         .and(image.data.rows())
         .par_for_each(|mut out, inp| {
             assert_eq!(inp.len(), 3);
-            //let r = inp[0] as f32;
-            //let g = inp[1] as f32;
-            //let b = inp[2] as f32;
-            //let gray = (76. * r + 150. * g + 29. * b) / 255.;
+            let r = NumCast::from(inp[0]).unwrap_or(0.0);
+            let g = NumCast::from(inp[1]).unwrap_or(0.0);
+            let b = NumCast::from(inp[2]).unwrap_or(0.0);
+            let gray = (76. * r + 150. * g + 29. * b) / 255.;
 
-            //out[0] = gray as u8;
+            out[0] = NumCast::from(gray).unwrap_or(T::default());
         });
 
-    Ok(Image { data: output });
+    Ok(output)
 }
 
 #[cfg(test)]