Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 157 additions & 163 deletions yuv/src/bt601.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,88 +2,6 @@

use lazy_static::lazy_static;

fn clamped_index(width: i32, height: i32, x: i32, y: i32) -> usize {
(x.clamp(0, width - 1) + (y.clamp(0, height - 1) * width)) as usize
}

fn unclamped_index(width: i32, x: i32, y: i32) -> usize {
(x + y * width) as usize
}

fn sample_chroma_for_luma(
chroma: &[u8],
chroma_width: usize,
chroma_height: usize,
luma_x: usize,
luma_y: usize,
clamp: bool,
) -> u8 {
let width = chroma_width as i32;
let height = chroma_height as i32;

let sample_00;
let sample_01;
let sample_10;
let sample_11;

if clamp {
let chroma_x = if luma_x == 0 {
-1
} else {
(luma_x as i32 - 1) / 2
};
let chroma_y = if luma_y == 0 {
-1
} else {
(luma_y as i32 - 1) / 2
};

sample_00 = chroma
.get(clamped_index(width, height, chroma_x, chroma_y))
.copied()
.unwrap_or(0) as u16;
sample_10 = chroma
.get(clamped_index(width, height, chroma_x + 1, chroma_y))
.copied()
.unwrap_or(0) as u16;
sample_01 = chroma
.get(clamped_index(width, height, chroma_x, chroma_y + 1))
.copied()
.unwrap_or(0) as u16;
sample_11 = chroma
.get(clamped_index(width, height, chroma_x + 1, chroma_y + 1))
.copied()
.unwrap_or(0) as u16;
} else {
let chroma_x = (luma_x as i32 - 1) / 2;
let chroma_y = (luma_y as i32 - 1) / 2;

let base = unclamped_index(width, chroma_x, chroma_y);
sample_00 = chroma.get(base).copied().unwrap_or(0) as u16;
sample_10 = chroma.get(base + 1).copied().unwrap_or(0) as u16;
sample_01 = chroma.get(base + chroma_width).copied().unwrap_or(0) as u16;
sample_11 = chroma.get(base + chroma_width + 1).copied().unwrap_or(0) as u16;
}

let interp_left = luma_x % 2 != 0;
let interp_top = luma_y % 2 != 0;

let mut sample: u16 = 0;
sample += sample_00 * if interp_left { 3 } else { 1 };
sample += sample_10 * if interp_left { 1 } else { 3 };

sample += sample_01 * if interp_left { 3 } else { 1 };
sample += sample_11 * if interp_left { 1 } else { 3 };

sample += sample_00 * if interp_top { 3 } else { 1 };
sample += sample_01 * if interp_top { 1 } else { 3 };

sample += sample_10 * if interp_top { 3 } else { 1 };
sample += sample_11 * if interp_top { 1 } else { 3 };

((sample + 8) / 16) as u8
}

/// Precomputes and stores the linear functions for converting YUV (YCb'Cr' to be precise)
/// colors to RGB (sRGB-like, with gamma) colors, in signed 12.4 fixed-point integer format.
///
Expand Down Expand Up @@ -153,19 +71,20 @@ lazy_static! {

#[inline]
fn yuv_to_rgb(yuv: (u8, u8, u8), luts: &LUTs) -> (u8, u8, u8) {
let (y, cb, cr) = yuv;

// We rely on the optimizers in rustc/LLVM to eliminate the bounds checks when indexing
// into the fixed 256-long arrays in `luts` with indices coming in as `u8` parameters.
// This is crucial for performance, as this function runs in a fairly tight loop, on all pixels.
// I verified that this is actually happening, see here: https://rust.godbolt.org/z/vWzesYzbq
// And benchmarking showed no time difference from an `unsafe` + `get_unchecked()` solution.

let y = luts.y_to_gray[yuv.0 as usize];
let gray = luts.y_to_gray[y as usize];

// The `(... + 8) >> 4` parts convert back from 12.4 fixed-point to `u8` with correct rounding.
// (At least for positive numbers - any negative numbers that might occur will be clamped to 0 anyway.)
let r = (y + luts.cr_to_r[yuv.2 as usize] + 8) >> 4;
let g = (y + luts.cr_to_g[yuv.2 as usize] + luts.cb_to_g[yuv.1 as usize] + 8) >> 4;
let b = (y + luts.cb_to_b[yuv.1 as usize] + 8) >> 4;
let r = (gray + luts.cr_to_r[cr as usize] + 8) >> 4;
let g = (gray + luts.cr_to_g[cr as usize] + luts.cb_to_g[cb as usize] + 8) >> 4;
let b = (gray + luts.cb_to_b[cb as usize] + 8) >> 4;

(
r.clamp(0, 255) as u8,
Expand All @@ -174,101 +93,87 @@ fn yuv_to_rgb(yuv: (u8, u8, u8), luts: &LUTs) -> (u8, u8, u8) {
)
}

#[inline]
fn convert_and_write_pixel(
yuv: (u8, u8, u8),
rgba: &mut Vec<u8>,
width: usize,
x_pos: usize,
y_pos: usize,
luts: &LUTs,
) {
let (r, g, b) = yuv_to_rgb(yuv, luts);

let base = (x_pos + y_pos * width) * 4;
rgba[base] = r;
rgba[base + 1] = g;
rgba[base + 2] = b;
}

/// Convert YUV 4:2:0 data into RGB 1:1:1 data.
/// Convert planar YUV 4:2:0 data into interleaved RGBA 8888 data.
///
/// This function yields an RGBA picture with the same number of pixels as were
/// provided in the `y` picture. The `b` and `r` pictures will be resampled at
/// this stage, and the resulting picture will have color components mixed.
/// provided in the `y` picture. The `chroma_b` and `chroma_r` samples are
/// simply reused without any interpolation for all four corresponding pixels.
/// This is not the most correct, or nicest, but it's what Flash Player does.
///
/// Preconditions:
/// - `y.len()` must be an integer multiple of `y_width`
/// - `chroma_b.len()` and `chroma_r.len()` must both be integer multiples of `br_width`
/// - `chroma_b` and `chroma_r` must be the same size
/// - `br_width` must be half of `y_width`, rounded up
/// - With `y_height` computed as `y.len() / y_width`, and `br_height` as `chroma_b.len() / br_width`:
/// `br_height` must be half of `y_height`, rounded up
pub fn yuv420_to_rgba(
y: &[u8],
chroma_b: &[u8],
chroma_r: &[u8],
y_width: usize,
br_width: usize,
) -> Vec<u8> {
// Shortcut for the no-op case to avoid all kinds of overflows below
if y.is_empty() {
debug_assert_eq!(chroma_b.len(), 0);
debug_assert_eq!(chroma_r.len(), 0);
debug_assert_eq!(y_width, 0);
debug_assert_eq!(br_width, 0);
return vec![];
}

debug_assert_eq!(y.len() % y_width, 0);
debug_assert_eq!(chroma_b.len() % br_width, 0);
debug_assert_eq!(chroma_r.len() % br_width, 0);
debug_assert_eq!(chroma_b.len(), chroma_r.len());

let y_height = y.len() / y_width;
let br_height = chroma_b.len() / br_width;

// prefilling with 255, so the tight loop won't need to write to the alpha channel
let mut rgba = vec![255; y.len() * 4];
// the + 1 is for rounding odd numbers up
debug_assert_eq!((y_width + 1) / 2, br_width);
debug_assert_eq!((y_height + 1) / 2, br_height);

let mut rgba = vec![0; y.len() * 4];
let rgba_stride = y_width * 4; // 4 bytes per pixel, interleaved

// making sure that the "is it initialized already?" check is only done once per frame by getting a direct reference
let luts: &LUTs = &*LUTS;

// do the bulk of the pixels faster, with no clamping, leaving out the edges
for y_pos in 1..y_height - 1 {
for x_pos in 1..y_width - 1 {
let y_sample = y.get(x_pos + y_pos * y_width).copied().unwrap_or(0);
let b_sample =
sample_chroma_for_luma(chroma_b, br_width, br_height, x_pos, y_pos, false);
let r_sample =
sample_chroma_for_luma(chroma_r, br_width, br_height, x_pos, y_pos, false);

convert_and_write_pixel(
(y_sample, b_sample, r_sample),
&mut rgba,
y_width,
x_pos,
y_pos,
luts,
);
// Iteration is done in a row-major order to fit the slice layouts.
for luma_rowindex in 0..y_height {
let chroma_rowindex = luma_rowindex / 2;

let y_row = &y[luma_rowindex * y_width..(luma_rowindex + 1) * y_width];
let cb_row = &chroma_b[chroma_rowindex * br_width..(chroma_rowindex + 1) * br_width];
let cr_row = &chroma_r[chroma_rowindex * br_width..(chroma_rowindex + 1) * br_width];
let rgba_row = &mut rgba[luma_rowindex * rgba_stride..(luma_rowindex + 1) * rgba_stride];

// Iterating on 2 pixels at a time, leaving off the last one if width is odd.
let y_iter = y_row.chunks_exact(2);
let cb_iter = cb_row.iter();
let cr_iter = cr_row.iter();
// Similar to how Y is iterated on, but with 4 channels per pixel
let rgba_iter = rgba_row.chunks_exact_mut(8);

for (((y, cb), cr), rgba) in y_iter.zip(cb_iter).zip(cr_iter).zip(rgba_iter) {
let rgb0 = yuv_to_rgb((y[0], *cb, *cr), luts);
let rgb1 = yuv_to_rgb((y[1], *cb, *cr), luts);
// The output alpha values are fixed
rgba.copy_from_slice(&[rgb0.0, rgb0.1, rgb0.2, 255, rgb1.0, rgb1.1, rgb1.2, 255]);
}
}

// doing the sides with clamping
for y_pos in 0..y_height {
for x_pos in [0, y_width - 1].iter() {
let y_sample = y.get(x_pos + y_pos * y_width).copied().unwrap_or(0);
let b_sample =
sample_chroma_for_luma(chroma_b, br_width, br_height, *x_pos, y_pos, true);
let r_sample =
sample_chroma_for_luma(chroma_r, br_width, br_height, *x_pos, y_pos, true);

convert_and_write_pixel(
(y_sample, b_sample, r_sample),
&mut rgba,
y_width,
*x_pos,
y_pos,
luts,
);
}
}
// On odd wide pictures, the last pixel is not covered by the iteration above,
// but is included in y_row and rgba_row.
if y_width % 2 == 1 {
let y = y_row.last().unwrap();
let cb = cb_row.last().unwrap();
let cr = cr_row.last().unwrap();

let rgb = yuv_to_rgb((*y, *cb, *cr), luts);

// doing the top and bottom edges with clamping
for y_pos in [0, y_height - 1].iter() {
for x_pos in 0..y_width {
let y_sample = y.get(x_pos + y_pos * y_width).copied().unwrap_or(0);
let b_sample =
sample_chroma_for_luma(chroma_b, br_width, br_height, x_pos, *y_pos, true);
let r_sample =
sample_chroma_for_luma(chroma_r, br_width, br_height, x_pos, *y_pos, true);

convert_and_write_pixel(
(y_sample, b_sample, r_sample),
&mut rgba,
y_width,
x_pos,
*y_pos,
luts,
);
rgba_row[rgba_stride - 4..rgba_stride].copy_from_slice(&[rgb.0, rgb.1, rgb.2, 255])
}
}

Expand Down Expand Up @@ -413,3 +318,92 @@ fn test_rgb_yuv_rgb_roundtrip_sanity() {
assert!((rgb.2 as i32 - rgb2.2 as i32).abs() <= 1);
}
}

#[test]
fn test_yuv420_to_rgba() {
// empty picture
assert_eq!(yuv420_to_rgba(&[], &[], &[], 0, 0), vec![0u8; 0]);

// a single pixel picture
assert_eq!(
yuv420_to_rgba(&[125u8], &[128u8], &[128u8], 1, 1),
vec![127u8, 127u8, 127u8, 255u8]
);

// a 2x2 grey picture with a single chroma sample (well, one Cb and one Cr)
#[rustfmt::skip]
assert_eq!(
yuv420_to_rgba(&[125u8, 125u8, 125u8, 125u8], &[128u8], &[128u8], 2, 1),
vec![
127u8, 127u8, 127u8, 255u8, 127u8, 127u8, 127u8, 255u8,
127u8, 127u8, 127u8, 255u8, 127u8, 127u8, 127u8, 255u8,
]
);

// a 2x2 black-and-white checkerboard picture
#[rustfmt::skip]
assert_eq!(
yuv420_to_rgba(&[16u8, 235u8, 235u8, 16u8], &[128u8], &[128u8], 2, 1),
vec![
0u8, 0u8, 0u8, 255u8, 255u8, 255u8, 255u8, 255u8,
255u8, 255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 255u8,
]
);

// a 3x2 picture, black on the left, white on the right, grey in the middle
#[rustfmt::skip]
assert_eq!(
yuv420_to_rgba(&[0u8, 125u8, 235u8, 0u8, 125u8, 235u8], &[128u8, 128u8, ], &[128u8, 128u8,], 3, 2),
vec![
0u8, 0u8, 0u8, 255u8, 127u8, 127u8, 127u8, 255u8, 255u8, 255u8, 255u8, 255u8,
0u8, 0u8, 0u8, 255u8, 127u8, 127u8, 127u8, 255u8, 255u8, 255u8, 255u8, 255u8,
]
);

// notes:
// (81, 90, 240) is full red in YUV
// (145, 54, 34) is full green in YUV

// A 3x3 picture, red on the top, green on the bottom.
#[rustfmt::skip]
assert_eq!(
yuv420_to_rgba(
&[ 81u8, 81u8, 81u8,
125u8, 125u8, 125u8,
145u8, 145u8, 145u8],
&[ 90u8, 90u8,
54u8, 54u8],
&[240u8, 240u8,
34u8, 34u8],
3, 2),
vec![
254u8, 0u8, 0u8, 255u8, 254u8, 0u8, 0u8, 255u8, 254u8, 0u8, 0u8, 255u8, // red, with rounding error
255u8, 51u8, 50u8, 255u8, 255u8, 51u8, 50u8, 255u8, 255u8, 51u8, 50u8, 255u8, // orangish
0u8, 255u8, 1u8, 255u8, 0u8, 255u8, 1u8, 255u8, 0u8, 255u8, 1u8, 255u8, // green, with rounding error
]
);
// The middle row looks fairly off when converted back to YUV: should be (125, 90, 240), but is (112, 97, 218)
// However, when converted back again to RGB, these are (255, 51, 50) and (255, 51, 49), respectively. So, close enough.

// A 3x3 picture, red on the left, green on the right. Transpose of the above.
#[rustfmt::skip]
assert_eq!(
yuv420_to_rgba(
&[ 81u8, 125u8, 145u8,
81u8, 125u8, 145u8,
81u8, 125u8, 145u8],
&[ 90u8, 54u8,
90u8, 54u8],
&[240u8, 34u8,
240u8, 34u8],
3, 2),
vec![
254u8, 0u8, 0u8, 255u8, 255u8, 51u8, 50u8, 255u8, 0u8, 255u8, 1u8, 255u8,
254u8, 0u8, 0u8, 255u8, 255u8, 51u8, 50u8, 255u8, 0u8, 255u8, 1u8, 255u8,
254u8, 0u8, 0u8, 255u8, 255u8, 51u8, 50u8, 255u8, 0u8, 255u8, 1u8, 255u8,
]
);

// The middle row/column of pixels use the top/left row/column of chroma samples:
assert_eq!(yuv_to_rgb((125, 90, 240), &LUTS), (255, 51, 50));
}