Skip to content

Commit

Permalink
Reduce risk of uninit ac in luma_ac and encode_coeffs (#3271)
Browse files Browse the repository at this point in the history
* Reduce risk of uninit ac in luma_ac
* eob can't exceed u16
* Avoid uninitialized data in coeff_contexts
* Fix minor type conversion and clippy issues
  • Loading branch information
kornelski authored Oct 25, 2023
1 parent 257a3c5 commit 274d00e
Show file tree
Hide file tree
Showing 16 changed files with 146 additions and 85 deletions.
11 changes: 7 additions & 4 deletions src/asm/aarch64/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ macro_rules! decl_cfl_ac_fn {
extern {
$(
fn $f(
ac: *mut i16, src: *const u8, stride: libc::ptrdiff_t,
ac: *mut MaybeUninit<i16>, src: *const u8, stride: libc::ptrdiff_t,
w_pad: libc::c_int, h_pad: libc::c_int,
width: libc::c_int, height: libc::c_int,
);
Expand All @@ -50,7 +50,7 @@ macro_rules! decl_cfl_ac_hbd_fn {
extern {
$(
fn $f(
ac: *mut i16, src: *const u16, stride: libc::ptrdiff_t,
ac: *mut MaybeUninit<i16>, src: *const u16, stride: libc::ptrdiff_t,
w_pad: libc::c_int, h_pad: libc::c_int,
width: libc::c_int, height: libc::c_int,
);
Expand Down Expand Up @@ -659,11 +659,14 @@ pub fn dispatch_predict_intra<T: Pixel>(
}
}

/// It MUST initialize all `ac` elements.
#[inline(always)]
pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>(
ac: &mut [i16], luma: &PlaneRegion<'_, T>, bsize: BlockSize, w_pad: usize,
h_pad: usize, cpu: CpuFeatureLevel,
ac: &mut [MaybeUninit<i16>], luma: &PlaneRegion<'_, T>, bsize: BlockSize,
w_pad: usize, h_pad: usize, cpu: CpuFeatureLevel,
) {
debug_assert_eq!(ac.len(), bsize.area());

if cpu < CpuFeatureLevel::NEON {
return rust::pred_cfl_ac::<T, XDEC, YDEC>(
ac, luma, bsize, w_pad, h_pad, cpu,
Expand Down
2 changes: 1 addition & 1 deletion src/asm/aarch64/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: u16,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
if tx_type == TxType::WHT_WHT {
Expand Down
20 changes: 14 additions & 6 deletions src/asm/shared/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
mod test {
use interpolate_name::interpolate_test;
use rand::random;
use std::mem::MaybeUninit;

use crate::context::MAX_TX_SIZE;
use crate::cpu_features::CpuFeatureLevel;
Expand All @@ -23,7 +24,7 @@ mod test {
IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::util::{slice_assume_init_mut, Aligned};
use crate::Pixel;

#[test]
Expand Down Expand Up @@ -188,27 +189,34 @@ mod test {
}
let luma = &plane.as_region();

let mut ac_ref = Aligned::new([0i16; 32 * 32]);
let mut ac_ref = Aligned::new([MaybeUninit::new(0x3333i16); 32 * 32]);
let ac_ref = &mut ac_ref.data[..plane_bsize.area()];

let cpu = CpuFeatureLevel::RUST;
(match (xdec, ydec) {
(0, 0) => rust::pred_cfl_ac::<T, 0, 0>,
(1, 0) => rust::pred_cfl_ac::<T, 1, 0>,
(_, _) => rust::pred_cfl_ac::<T, 1, 1>,
})(&mut ac_ref.data, luma, plane_bsize, w_pad, h_pad, cpu);
})(ac_ref, luma, plane_bsize, w_pad, h_pad, cpu);

for &cpu in
&CpuFeatureLevel::all()[..=CpuFeatureLevel::default().as_index()]
{
let mut ac = Aligned::new([0i16; 32 * 32]);
let mut ac = Aligned::new([MaybeUninit::new(0x7FFFi16); 32 * 32]);
let ac = &mut ac.data[..plane_bsize.area()];

(match (xdec, ydec) {
(0, 0) => pred_cfl_ac::<T, 0, 0>,
(1, 0) => pred_cfl_ac::<T, 1, 0>,
(_, _) => pred_cfl_ac::<T, 1, 1>,
})(&mut ac.data, luma, plane_bsize, w_pad, h_pad, cpu);
})(ac, luma, plane_bsize, w_pad, h_pad, cpu);

assert_eq!(&ac_ref.data[..], &ac.data[..])
unsafe {
let ac_ref = slice_assume_init_mut(ac_ref);
let ac = slice_assume_init_mut(ac);

assert_eq!(&ac_ref, &ac);
}
}
}
}
16 changes: 8 additions & 8 deletions src/asm/shared/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub type InvTxfmHBDFunc =

pub fn call_inverse_func<T: Pixel>(
func: InvTxfmFunc, input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>,
eob: usize, width: usize, height: usize, bd: usize,
eob: u16, width: usize, height: usize, bd: usize,
) {
debug_assert!(bd == 8);

Expand Down Expand Up @@ -51,7 +51,7 @@ pub fn call_inverse_func<T: Pixel>(

pub fn call_inverse_hbd_func<T: Pixel>(
func: InvTxfmHBDFunc, input: &[T::Coeff],
output: &mut PlaneRegionMut<'_, T>, eob: usize, width: usize, height: usize,
output: &mut PlaneRegionMut<'_, T>, eob: u16, width: usize, height: usize,
bd: usize,
) {
// Only use at most 32 columns and 32 rows of input coefficients.
Expand Down Expand Up @@ -94,7 +94,7 @@ pub mod test {

pub fn pick_eob<T: Coefficient>(
coeffs: &mut [T], tx_size: TxSize, tx_type: TxType, sub_h: usize,
) -> usize {
) -> u16 {
/* From dav1d
* copy the topleft coefficients such that the return value (being the
* coefficient scantable index for the eob token) guarantees that only
Expand All @@ -105,14 +105,14 @@ pub mod test {
let coeff_h = av1_get_coded_tx_size(tx_size).height();
let sub_high: usize = if sub_h > 0 { sub_h * 8 - 1 } else { 0 };
let sub_low: usize = if sub_h > 1 { sub_high - 8 } else { 0 };
let mut eob = 0;
let mut eob = 0u16;
let mut exit = 0;

// Wrap WHT_WHT (16) to DCT_DCT (0) scan table
let scan = av1_scan_orders[tx_size as usize][(tx_type as usize) & 15].scan;

for (i, &pos) in scan.iter().enumerate() {
exit = i;
exit = i as u16;

let rc = pos as usize;
let rcx = rc % coeff_h;
Expand All @@ -121,14 +121,14 @@ pub mod test {
if rcx > sub_high || rcy > sub_high {
break;
} else if eob == 0 && (rcx > sub_low || rcy > sub_low) {
eob = i;
eob = i as u16;
}
}

if eob != 0 {
eob += thread_rng().gen_range(0..(exit - eob).min(1));
}
for &pos in scan.iter().skip(eob) {
for &pos in scan.iter().skip(usize::from(eob)) {
coeffs[pos as usize] = T::cast_from(0);
}

Expand Down Expand Up @@ -181,7 +181,7 @@ pub mod test {
// SAFETY: forward_transform initialized freq
let freq = unsafe { slice_assume_init_mut(freq) };

let eob: usize = pick_eob(freq, tx_size, tx_type, sub_h);
let eob: u16 = pick_eob(freq, tx_size, tx_type, sub_h);
let mut rust_dst = dst.clone();

inverse_transform_add(
Expand Down
14 changes: 9 additions & 5 deletions src/asm/x86/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::tiling::{PlaneRegion, PlaneRegionMut};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::Pixel;
use std::mem::MaybeUninit;
use v_frame::pixel::PixelType;

macro_rules! decl_angular_ipred_fn {
Expand Down Expand Up @@ -145,7 +146,7 @@ macro_rules! decl_cfl_ac_fn {
extern {
$(
fn $f(
ac: *mut i16, src: *const u8, stride: libc::ptrdiff_t,
ac: *mut MaybeUninit<i16>, src: *const u8, stride: libc::ptrdiff_t,
w_pad: libc::c_int, h_pad: libc::c_int,
width: libc::c_int, height: libc::c_int,
);
Expand All @@ -168,7 +169,7 @@ macro_rules! decl_cfl_ac_hbd_fn {
extern {
$(
fn $f(
ac: *mut i16, src: *const u16, stride: libc::ptrdiff_t,
ac: *mut MaybeUninit<i16>, src: *const u16, stride: libc::ptrdiff_t,
w_pad: libc::c_int, h_pad: libc::c_int,
width: libc::c_int, height: libc::c_int,
);
Expand Down Expand Up @@ -871,12 +872,15 @@ pub fn dispatch_predict_intra<T: Pixel>(
}
}

// The implementation MUST inititialize all `ac` elements
#[inline(always)]
pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>(
ac: &mut [i16], luma: &PlaneRegion<'_, T>, bsize: BlockSize, w_pad: usize,
h_pad: usize, cpu: CpuFeatureLevel,
ac: &mut [MaybeUninit<i16>], luma: &PlaneRegion<'_, T>, bsize: BlockSize,
w_pad: usize, h_pad: usize, cpu: CpuFeatureLevel,
) {
let call_rust = |ac: &mut [i16]| {
debug_assert_eq!(ac.len(), bsize.area());

let call_rust = |ac: &mut [MaybeUninit<i16>]| {
rust::pred_cfl_ac::<T, XDEC, YDEC>(ac, luma, bsize, w_pad, h_pad, cpu);
};

Expand Down
16 changes: 9 additions & 7 deletions src/asm/x86/quantize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use std::mem::MaybeUninit;
type DequantizeFn = unsafe fn(
qindex: u8,
coeffs_ptr: *const i16,
_eob: usize,
_eob: u16,
rcoeffs_ptr: *mut i16,
tx_size: TxSize,
bit_depth: usize,
Expand All @@ -38,7 +38,7 @@ cpu_function_lookup_table!(

#[inline(always)]
pub fn dequantize<T: Coefficient>(
qindex: u8, coeffs: &[T], eob: usize, rcoeffs: &mut [MaybeUninit<T>],
qindex: u8, coeffs: &[T], eob: u16, rcoeffs: &mut [MaybeUninit<T>],
tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
cpu: CpuFeatureLevel,
) {
Expand Down Expand Up @@ -91,7 +91,7 @@ pub fn dequantize<T: Coefficient>(

#[target_feature(enable = "avx2")]
unsafe fn dequantize_avx2(
qindex: u8, coeffs_ptr: *const i16, _eob: usize, rcoeffs_ptr: *mut i16,
qindex: u8, coeffs_ptr: *const i16, _eob: u16, rcoeffs_ptr: *mut i16,
tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
) {
let log_tx_scale = _mm256_set1_epi32(get_log_tx_scale(tx_size) as i32);
Expand Down Expand Up @@ -182,12 +182,12 @@ mod test {

// Test the min, max, and random eobs
let eobs = {
let mut out = [0usize; 16];
let mut out = [0u16; 16];
let area: usize = av1_get_coded_tx_size(tx_size).area();
out[0] = 0;
out[1] = area;
out[1] = area as u16;
for eob in out.iter_mut().skip(2) {
*eob = rng.gen_range(0..area);
*eob = rng.gen_range(0..area as u16);
}
out
};
Expand All @@ -198,7 +198,9 @@ mod test {

// Generate quantized coefficients up to the eob
let between = Uniform::from(-i16::MAX..=i16::MAX);
for (i, qcoeff) in qcoeffs.data.iter_mut().enumerate().take(eob) {
for (i, qcoeff) in
qcoeffs.data.iter_mut().enumerate().take(eob as usize)
{
*qcoeff = between.sample(&mut rng)
/ if i == 0 { dc_quant } else { ac_quant };
}
Expand Down
2 changes: 1 addition & 1 deletion src/asm/x86/transform/inverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::asm::shared::transform::inverse::*;
use crate::asm::shared::transform::*;

pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: u16,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
if tx_type == TxType::WHT_WHT {
Expand Down
33 changes: 20 additions & 13 deletions src/context/block_unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

use std::mem::MaybeUninit;

use super::*;

use crate::predict::PredictionMode;
Expand Down Expand Up @@ -1781,7 +1783,7 @@ impl<'a> ContextWriter<'a> {

pub fn write_coeffs_lv_map<T: Coefficient, W: Writer>(
&mut self, w: &mut W, plane: usize, bo: TileBlockOffset, coeffs_in: &[T],
eob: usize, pred_mode: PredictionMode, tx_size: TxSize, tx_type: TxType,
eob: u16, pred_mode: PredictionMode, tx_size: TxSize, tx_type: TxType,
plane_bsize: BlockSize, xdec: usize, ydec: usize,
use_reduced_tx_set: bool, frame_clipped_txw: usize,
frame_clipped_txh: usize,
Expand All @@ -1792,8 +1794,8 @@ impl<'a> ContextWriter<'a> {
let is_inter = pred_mode >= PredictionMode::NEARESTMV;

// Note: Both intra and inter mode uses inter scan order. Surprised?
let scan: &[u16] =
&av1_scan_orders[tx_size as usize][tx_type as usize].scan[..eob];
let scan: &[u16] = &av1_scan_orders[tx_size as usize][tx_type as usize]
.scan[..usize::from(eob)];
let height = av1_get_coded_tx_size(tx_size).height();

// Create a slice with coeffs in scan order
Expand Down Expand Up @@ -1858,7 +1860,7 @@ impl<'a> ContextWriter<'a> {
}

fn encode_eob<W: Writer>(
&mut self, eob: usize, tx_size: TxSize, tx_class: TxClass, txs_ctx: usize,
&mut self, eob: u16, tx_size: TxSize, tx_class: TxClass, txs_ctx: usize,
plane_type: usize, w: &mut W,
) {
let (eob_pt, eob_extra) = Self::get_eob_pos_token(eob);
Expand Down Expand Up @@ -1913,43 +1915,48 @@ impl<'a> ContextWriter<'a> {
}

fn encode_coeffs<T: Coefficient, W: Writer>(
&mut self, coeffs: &[T], levels: &mut [u8], scan: &[u16], eob: usize,
&mut self, coeffs: &[T], levels: &mut [u8], scan: &[u16], eob: u16,
tx_size: TxSize, tx_class: TxClass, txs_ctx: usize, plane_type: usize,
w: &mut W,
) {
// SAFETY: We write to the array below before reading from it.
let mut coeff_contexts: Aligned<[i8; MAX_CODED_TX_SQUARE]> =
let mut coeff_contexts: Aligned<[MaybeUninit<i8>; MAX_CODED_TX_SQUARE]> =
unsafe { Aligned::uninitialized() };

self.get_nz_map_contexts(
// get_nz_map_contexts sets coeff_contexts contiguously as a parallel array for scan, not in scan order
let coeff_contexts = self.get_nz_map_contexts(
levels,
scan,
eob as u16,
eob,
tx_size,
tx_class,
&mut coeff_contexts.data,
);

let bhl = Self::get_txb_bhl(tx_size);

for (c, (&pos, &v)) in scan.iter().zip(coeffs.iter()).enumerate().rev() {
let scan_with_ctx =
scan.iter().copied().zip(coeff_contexts.iter().copied());
for (c, ((pos, coeff_ctx), v)) in
scan_with_ctx.zip(coeffs.iter().copied()).enumerate().rev()
{
let pos = pos as usize;
let coeff_ctx = coeff_contexts.data[pos];
let coeff_ctx = coeff_ctx as usize;
let level = v.abs();

if c == eob - 1 {
if c == usize::from(eob) - 1 {
symbol_with_update!(
self,
w,
cmp::min(u32::cast_from(level), 3) - 1,
&self.fc.coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx as usize]
&self.fc.coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx]
);
} else {
symbol_with_update!(
self,
w,
cmp::min(u32::cast_from(level), 3),
&self.fc.coeff_base_cdf[txs_ctx][plane_type][coeff_ctx as usize]
&self.fc.coeff_base_cdf[txs_ctx][plane_type][coeff_ctx]
);
}

Expand Down
Loading

0 comments on commit 274d00e

Please sign in to comment.