Skip to content

Commit

Permalink
Remove obsolete 8-bit buffers for bitdepth of 8
Browse files Browse the repository at this point in the history
  • Loading branch information
barrbrain committed Mar 8, 2019
1 parent 9baf1d6 commit 0a35174
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 103 deletions.
103 changes: 31 additions & 72 deletions src/mc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [
mod nasm {
use super::*;
use crate::plane::*;
use crate::util::*;

use std::mem;

Expand Down Expand Up @@ -255,44 +254,23 @@ mod nasm {
height: usize, col_frac: i32, row_frac: i32, mode_x: FilterMode,
mode_y: FilterMode, bit_depth: usize
) {
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
{
if is_x86_feature_detected!("avx2") && bit_depth == 8 {
let mut dst8: AlignedArray<[u8; 128 * 128]> =
UninitializedAlignedArray();
let mut src8: [u8; (128 + 7) * (128 + 7)] =
unsafe { mem::uninitialized() };
unsafe {
convert_slice_2d(
src8.as_mut_ptr(),
width + 7,
src.go_left(3).go_up(3).as_ptr(),
src.plane.cfg.stride,
width + 7,
height + 7
);
select_put_fn_avx2(mode_x, mode_y)(
dst8.array.as_mut_ptr(),
width as isize,
src8[(width + 7) * 3 + 3..].as_ptr(),
(width + 7) as isize,
width as i32,
height as i32,
col_frac,
row_frac
);
let dst_stride = dst.plane.cfg.stride;
convert_slice_2d(
dst.as_mut_ptr(),
dst_stride,
dst8.array.as_ptr(),
width,
width,
height
);
}
return;
if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
debug_assert!(bit_depth == 8);
let dst_stride = dst.plane.cfg.stride as isize;
let src_stride = src.plane.cfg.stride as isize;
unsafe {
select_put_fn_avx2(mode_x, mode_y)(
dst.as_mut_ptr() as *mut _,
dst_stride,
src.as_ptr() as *const _,
src_stride,
width as i32,
height as i32,
col_frac,
row_frac
);
}
return;
}
super::native::put_8tap(
dst, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth,
Expand All @@ -304,66 +282,47 @@ mod nasm {
col_frac: i32, row_frac: i32, mode_x: FilterMode, mode_y: FilterMode,
bit_depth: usize
) {
if is_x86_feature_detected!("avx2") && bit_depth == 8 {
let mut src8: [u8; (128 + 7) * (128 + 7)] =
unsafe { mem::uninitialized() };
if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
debug_assert!(bit_depth == 8);
let src_stride = src.plane.cfg.stride as isize;
unsafe {
convert_slice_2d(
src8.as_mut_ptr(),
width + 7,
src.go_left(3).go_up(3).as_ptr(),
src.plane.cfg.stride,
width + 7,
height + 7
);
select_prep_fn_avx2(mode_x, mode_y)(
tmp.as_mut_ptr(),
src8[(width + 7) * 3 + 3..].as_ptr(),
(width + 7) as isize,
src.as_ptr() as *const _,
src_stride,
width as i32,
height as i32,
col_frac,
row_frac
);
}
} else {
super::native::prep_8tap(
tmp, src, width, height, col_frac, row_frac, mode_x, mode_y,
bit_depth,
);
return;
}
super::native::prep_8tap(
tmp, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth
);
}

pub fn mc_avg<T: Pixel>(
dst: &mut PlaneMutSlice<'_, T>, tmp1: &[i16], tmp2: &[i16], width: usize,
height: usize, bit_depth: usize
) {
if is_x86_feature_detected!("avx2") && bit_depth == 8 {
let mut dst8: AlignedArray<[u8; 128 * 128]> =
UninitializedAlignedArray();
if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
debug_assert!(bit_depth == 8);
let dst_stride = dst.plane.cfg.stride as isize;
unsafe {
rav1e_avg_avx2(
dst8.array.as_mut_ptr(),
width as isize,
dst.as_mut_ptr() as *mut _,
dst_stride,
tmp1.as_ptr(),
tmp2.as_ptr(),
width as i32,
height as i32
);
let dst_stride = dst.plane.cfg.stride;
convert_slice_2d(
dst.as_mut_ptr(),
dst_stride,
dst8.array.as_ptr(),
width,
width,
height
);
}
return;
} else {
super::native::mc_avg(dst, tmp1, tmp2, width, height, bit_depth);
}
super::native::mc_avg(dst, tmp1, tmp2, width, height, bit_depth);
}
}

Expand Down
40 changes: 9 additions & 31 deletions src/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1523,14 +1523,12 @@ mod nasm {
) where
T: Pixel,
{
let stride = output.plane.cfg.stride;
if std::mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
debug_assert!(bd == 8);

if is_x86_feature_detected!("avx2") && bd == 8 {
// 64x only uses 32 coeffs
let coeff_w = Self::W.min(32);
let coeff_h = Self::H.min(32);
let mut dst8: AlignedArray<[u8; 64 * 64]> =
UninitializedAlignedArray();
let mut coeff16: AlignedArray<[i16; 32 * 32]> =
UninitializedAlignedArray();

Expand All @@ -1542,41 +1540,21 @@ mod nasm {
}
}

let stride = output.plane.cfg.stride as isize;
unsafe {
// copy output to dst8 so that the results of the inverse transform
// can be added to it
convert_slice_2d(
dst8.array.as_mut_ptr(),
Self::W,
output.as_ptr(),
stride,
Self::W,
Self::H
);

// perform the inverse transform
Self::match_tx_type(tx_type)(
dst8.array.as_mut_ptr(),
Self::W as isize,
output.as_mut_ptr() as *mut _,
stride,
coeff16.array.as_ptr(),
(coeff_w * coeff_h) as i32
);

// copy back to output
convert_slice_2d(
output.as_mut_ptr(),
stride,
dst8.array.as_ptr(),
Self::W,
Self::W,
Self::H
);
}
} else {
<Self as super::native::InvTxfm2D>::inv_txfm2d_add(
input, output, tx_type, bd,
);
return;
}
<Self as super::native::InvTxfm2D>::inv_txfm2d_add(
input, output, tx_type, bd,
);
}
}

Expand Down

0 comments on commit 0a35174

Please sign in to comment.