Reduce risk of uninit ac in luma_ac and encode_coeffs (#3271)

* Reduce risk of uninit ac in luma_ac * eob can't exceed u16 * Avoid uninitialized data in coeff_contexts * Fix minor type conversion and clippy issues
xiph · Oct 25, 2023 · 274d00e · 274d00e
1 parent 257a3c5
commit 274d00e
Show file tree

Hide file tree

Showing 16 changed files with 146 additions and 85 deletions.
diff --git a/src/asm/aarch64/predict.rs b/src/asm/aarch64/predict.rs
@@ -30,7 +30,7 @@ macro_rules! decl_cfl_ac_fn {
     extern {
       $(
         fn $f(
-          ac: *mut i16, src: *const u8, stride: libc::ptrdiff_t,
+          ac: *mut MaybeUninit<i16>, src: *const u8, stride: libc::ptrdiff_t,
           w_pad: libc::c_int, h_pad: libc::c_int,
           width: libc::c_int, height: libc::c_int,
         );
@@ -50,7 +50,7 @@ macro_rules! decl_cfl_ac_hbd_fn {
     extern {
       $(
         fn $f(
-          ac: *mut i16, src: *const u16, stride: libc::ptrdiff_t,
+          ac: *mut MaybeUninit<i16>, src: *const u16, stride: libc::ptrdiff_t,
           w_pad: libc::c_int, h_pad: libc::c_int,
           width: libc::c_int, height: libc::c_int,
         );
@@ -659,11 +659,14 @@ pub fn dispatch_predict_intra<T: Pixel>(
   }
 }
 
+/// It MUST initialize all `ac` elements.
 #[inline(always)]
 pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>(
-  ac: &mut [i16], luma: &PlaneRegion<'_, T>, bsize: BlockSize, w_pad: usize,
-  h_pad: usize, cpu: CpuFeatureLevel,
+  ac: &mut [MaybeUninit<i16>], luma: &PlaneRegion<'_, T>, bsize: BlockSize,
+  w_pad: usize, h_pad: usize, cpu: CpuFeatureLevel,
 ) {
+  debug_assert_eq!(ac.len(), bsize.area());
+
   if cpu < CpuFeatureLevel::NEON {
     return rust::pred_cfl_ac::<T, XDEC, YDEC>(
       ac, luma, bsize, w_pad, h_pad, cpu,

diff --git a/src/asm/aarch64/transform/inverse.rs b/src/asm/aarch64/transform/inverse.rs
@@ -17,7 +17,7 @@ use crate::asm::shared::transform::inverse::*;
 use crate::asm::shared::transform::*;
 
 pub fn inverse_transform_add<T: Pixel>(
-  input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
+  input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: u16,
   tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
 ) {
   if tx_type == TxType::WHT_WHT {

diff --git a/src/asm/shared/predict.rs b/src/asm/shared/predict.rs
@@ -11,6 +11,7 @@
 mod test {
   use interpolate_name::interpolate_test;
   use rand::random;
+  use std::mem::MaybeUninit;
 
   use crate::context::MAX_TX_SIZE;
   use crate::cpu_features::CpuFeatureLevel;
@@ -23,7 +24,7 @@ mod test {
     IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
   };
   use crate::transform::TxSize;
-  use crate::util::Aligned;
+  use crate::util::{slice_assume_init_mut, Aligned};
   use crate::Pixel;
 
   #[test]
@@ -188,27 +189,34 @@ mod test {
     }
     let luma = &plane.as_region();
 
-    let mut ac_ref = Aligned::new([0i16; 32 * 32]);
+    let mut ac_ref = Aligned::new([MaybeUninit::new(0x3333i16); 32 * 32]);
+    let ac_ref = &mut ac_ref.data[..plane_bsize.area()];
 
     let cpu = CpuFeatureLevel::RUST;
     (match (xdec, ydec) {
       (0, 0) => rust::pred_cfl_ac::<T, 0, 0>,
       (1, 0) => rust::pred_cfl_ac::<T, 1, 0>,
       (_, _) => rust::pred_cfl_ac::<T, 1, 1>,
-    })(&mut ac_ref.data, luma, plane_bsize, w_pad, h_pad, cpu);
+    })(ac_ref, luma, plane_bsize, w_pad, h_pad, cpu);
 
     for &cpu in
       &CpuFeatureLevel::all()[..=CpuFeatureLevel::default().as_index()]
     {
-      let mut ac = Aligned::new([0i16; 32 * 32]);
+      let mut ac = Aligned::new([MaybeUninit::new(0x7FFFi16); 32 * 32]);
+      let ac = &mut ac.data[..plane_bsize.area()];
 
       (match (xdec, ydec) {
         (0, 0) => pred_cfl_ac::<T, 0, 0>,
         (1, 0) => pred_cfl_ac::<T, 1, 0>,
         (_, _) => pred_cfl_ac::<T, 1, 1>,
-      })(&mut ac.data, luma, plane_bsize, w_pad, h_pad, cpu);
+      })(ac, luma, plane_bsize, w_pad, h_pad, cpu);
 
-      assert_eq!(&ac_ref.data[..], &ac.data[..])
+      unsafe {
+        let ac_ref = slice_assume_init_mut(ac_ref);
+        let ac = slice_assume_init_mut(ac);
+
+        assert_eq!(&ac_ref, &ac);
+      }
     }
   }
 }
diff --git a/src/asm/shared/transform/inverse.rs b/src/asm/shared/transform/inverse.rs
@@ -20,7 +20,7 @@ pub type InvTxfmHBDFunc =
 
 pub fn call_inverse_func<T: Pixel>(
   func: InvTxfmFunc, input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>,
-  eob: usize, width: usize, height: usize, bd: usize,
+  eob: u16, width: usize, height: usize, bd: usize,
 ) {
   debug_assert!(bd == 8);
 
@@ -51,7 +51,7 @@ pub fn call_inverse_func<T: Pixel>(
 
 pub fn call_inverse_hbd_func<T: Pixel>(
   func: InvTxfmHBDFunc, input: &[T::Coeff],
-  output: &mut PlaneRegionMut<'_, T>, eob: usize, width: usize, height: usize,
+  output: &mut PlaneRegionMut<'_, T>, eob: u16, width: usize, height: usize,
   bd: usize,
 ) {
   // Only use at most 32 columns and 32 rows of input coefficients.
@@ -94,7 +94,7 @@ pub mod test {
 
   pub fn pick_eob<T: Coefficient>(
     coeffs: &mut [T], tx_size: TxSize, tx_type: TxType, sub_h: usize,
-  ) -> usize {
+  ) -> u16 {
     /* From dav1d
      * copy the topleft coefficients such that the return value (being the
      * coefficient scantable index for the eob token) guarantees that only
@@ -105,14 +105,14 @@ pub mod test {
     let coeff_h = av1_get_coded_tx_size(tx_size).height();
     let sub_high: usize = if sub_h > 0 { sub_h * 8 - 1 } else { 0 };
     let sub_low: usize = if sub_h > 1 { sub_high - 8 } else { 0 };
-    let mut eob = 0;
+    let mut eob = 0u16;
     let mut exit = 0;
 
     // Wrap WHT_WHT (16) to DCT_DCT (0) scan table
     let scan = av1_scan_orders[tx_size as usize][(tx_type as usize) & 15].scan;
 
     for (i, &pos) in scan.iter().enumerate() {
-      exit = i;
+      exit = i as u16;
 
       let rc = pos as usize;
       let rcx = rc % coeff_h;
@@ -121,14 +121,14 @@ pub mod test {
       if rcx > sub_high || rcy > sub_high {
         break;
       } else if eob == 0 && (rcx > sub_low || rcy > sub_low) {
-        eob = i;
+        eob = i as u16;
       }
     }
 
     if eob != 0 {
       eob += thread_rng().gen_range(0..(exit - eob).min(1));
     }
-    for &pos in scan.iter().skip(eob) {
+    for &pos in scan.iter().skip(usize::from(eob)) {
       coeffs[pos as usize] = T::cast_from(0);
     }
 
@@ -181,7 +181,7 @@ pub mod test {
       // SAFETY: forward_transform initialized freq
       let freq = unsafe { slice_assume_init_mut(freq) };
 
-      let eob: usize = pick_eob(freq, tx_size, tx_type, sub_h);
+      let eob: u16 = pick_eob(freq, tx_size, tx_type, sub_h);
       let mut rust_dst = dst.clone();
 
       inverse_transform_add(

diff --git a/src/asm/x86/predict.rs b/src/asm/x86/predict.rs
@@ -17,6 +17,7 @@ use crate::tiling::{PlaneRegion, PlaneRegionMut};
 use crate::transform::TxSize;
 use crate::util::Aligned;
 use crate::Pixel;
+use std::mem::MaybeUninit;
 use v_frame::pixel::PixelType;
 
 macro_rules! decl_angular_ipred_fn {
@@ -145,7 +146,7 @@ macro_rules! decl_cfl_ac_fn {
     extern {
       $(
         fn $f(
-          ac: *mut i16, src: *const u8, stride: libc::ptrdiff_t,
+          ac: *mut MaybeUninit<i16>, src: *const u8, stride: libc::ptrdiff_t,
           w_pad: libc::c_int, h_pad: libc::c_int,
           width: libc::c_int, height: libc::c_int,
         );
@@ -168,7 +169,7 @@ macro_rules! decl_cfl_ac_hbd_fn {
     extern {
       $(
         fn $f(
-          ac: *mut i16, src: *const u16, stride: libc::ptrdiff_t,
+          ac: *mut MaybeUninit<i16>, src: *const u16, stride: libc::ptrdiff_t,
           w_pad: libc::c_int, h_pad: libc::c_int,
           width: libc::c_int, height: libc::c_int,
         );
@@ -871,12 +872,15 @@ pub fn dispatch_predict_intra<T: Pixel>(
   }
 }
 
+// The implementation MUST inititialize all `ac` elements
 #[inline(always)]
 pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>(
-  ac: &mut [i16], luma: &PlaneRegion<'_, T>, bsize: BlockSize, w_pad: usize,
-  h_pad: usize, cpu: CpuFeatureLevel,
+  ac: &mut [MaybeUninit<i16>], luma: &PlaneRegion<'_, T>, bsize: BlockSize,
+  w_pad: usize, h_pad: usize, cpu: CpuFeatureLevel,
 ) {
-  let call_rust = |ac: &mut [i16]| {
+  debug_assert_eq!(ac.len(), bsize.area());
+
+  let call_rust = |ac: &mut [MaybeUninit<i16>]| {
     rust::pred_cfl_ac::<T, XDEC, YDEC>(ac, luma, bsize, w_pad, h_pad, cpu);
   };
 

diff --git a/src/asm/x86/quantize.rs b/src/asm/x86/quantize.rs
@@ -22,7 +22,7 @@ use std::mem::MaybeUninit;
 type DequantizeFn = unsafe fn(
   qindex: u8,
   coeffs_ptr: *const i16,
-  _eob: usize,
+  _eob: u16,
   rcoeffs_ptr: *mut i16,
   tx_size: TxSize,
   bit_depth: usize,
@@ -38,7 +38,7 @@ cpu_function_lookup_table!(
 
 #[inline(always)]
 pub fn dequantize<T: Coefficient>(
-  qindex: u8, coeffs: &[T], eob: usize, rcoeffs: &mut [MaybeUninit<T>],
+  qindex: u8, coeffs: &[T], eob: u16, rcoeffs: &mut [MaybeUninit<T>],
   tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
   cpu: CpuFeatureLevel,
 ) {
@@ -91,7 +91,7 @@ pub fn dequantize<T: Coefficient>(
 
 #[target_feature(enable = "avx2")]
 unsafe fn dequantize_avx2(
-  qindex: u8, coeffs_ptr: *const i16, _eob: usize, rcoeffs_ptr: *mut i16,
+  qindex: u8, coeffs_ptr: *const i16, _eob: u16, rcoeffs_ptr: *mut i16,
   tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
 ) {
   let log_tx_scale = _mm256_set1_epi32(get_log_tx_scale(tx_size) as i32);
@@ -182,12 +182,12 @@ mod test {
 
       // Test the min, max, and random eobs
       let eobs = {
-        let mut out = [0usize; 16];
+        let mut out = [0u16; 16];
         let area: usize = av1_get_coded_tx_size(tx_size).area();
         out[0] = 0;
-        out[1] = area;
+        out[1] = area as u16;
         for eob in out.iter_mut().skip(2) {
-          *eob = rng.gen_range(0..area);
+          *eob = rng.gen_range(0..area as u16);
         }
         out
       };
@@ -198,7 +198,9 @@ mod test {
 
         // Generate quantized coefficients up to the eob
         let between = Uniform::from(-i16::MAX..=i16::MAX);
-        for (i, qcoeff) in qcoeffs.data.iter_mut().enumerate().take(eob) {
+        for (i, qcoeff) in
+          qcoeffs.data.iter_mut().enumerate().take(eob as usize)
+        {
           *qcoeff = between.sample(&mut rng)
             / if i == 0 { dc_quant } else { ac_quant };
         }

diff --git a/src/asm/x86/transform/inverse.rs b/src/asm/x86/transform/inverse.rs
@@ -17,7 +17,7 @@ use crate::asm::shared::transform::inverse::*;
 use crate::asm::shared::transform::*;
 
 pub fn inverse_transform_add<T: Pixel>(
-  input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
+  input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: u16,
   tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
 ) {
   if tx_type == TxType::WHT_WHT {

diff --git a/src/context/block_unit.rs b/src/context/block_unit.rs
@@ -7,6 +7,8 @@
 // Media Patent License 1.0 was not distributed with this source code in the
 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 
+use std::mem::MaybeUninit;
+
 use super::*;
 
 use crate::predict::PredictionMode;
@@ -1781,7 +1783,7 @@ impl<'a> ContextWriter<'a> {
 
   pub fn write_coeffs_lv_map<T: Coefficient, W: Writer>(
     &mut self, w: &mut W, plane: usize, bo: TileBlockOffset, coeffs_in: &[T],
-    eob: usize, pred_mode: PredictionMode, tx_size: TxSize, tx_type: TxType,
+    eob: u16, pred_mode: PredictionMode, tx_size: TxSize, tx_type: TxType,
     plane_bsize: BlockSize, xdec: usize, ydec: usize,
     use_reduced_tx_set: bool, frame_clipped_txw: usize,
     frame_clipped_txh: usize,
@@ -1792,8 +1794,8 @@ impl<'a> ContextWriter<'a> {
     let is_inter = pred_mode >= PredictionMode::NEARESTMV;
 
     // Note: Both intra and inter mode uses inter scan order. Surprised?
-    let scan: &[u16] =
-      &av1_scan_orders[tx_size as usize][tx_type as usize].scan[..eob];
+    let scan: &[u16] = &av1_scan_orders[tx_size as usize][tx_type as usize]
+      .scan[..usize::from(eob)];
     let height = av1_get_coded_tx_size(tx_size).height();
 
     // Create a slice with coeffs in scan order
@@ -1858,7 +1860,7 @@ impl<'a> ContextWriter<'a> {
   }
 
   fn encode_eob<W: Writer>(
-    &mut self, eob: usize, tx_size: TxSize, tx_class: TxClass, txs_ctx: usize,
+    &mut self, eob: u16, tx_size: TxSize, tx_class: TxClass, txs_ctx: usize,
     plane_type: usize, w: &mut W,
   ) {
     let (eob_pt, eob_extra) = Self::get_eob_pos_token(eob);
@@ -1913,43 +1915,48 @@ impl<'a> ContextWriter<'a> {
   }
 
   fn encode_coeffs<T: Coefficient, W: Writer>(
-    &mut self, coeffs: &[T], levels: &mut [u8], scan: &[u16], eob: usize,
+    &mut self, coeffs: &[T], levels: &mut [u8], scan: &[u16], eob: u16,
     tx_size: TxSize, tx_class: TxClass, txs_ctx: usize, plane_type: usize,
     w: &mut W,
   ) {
     // SAFETY: We write to the array below before reading from it.
-    let mut coeff_contexts: Aligned<[i8; MAX_CODED_TX_SQUARE]> =
+    let mut coeff_contexts: Aligned<[MaybeUninit<i8>; MAX_CODED_TX_SQUARE]> =
       unsafe { Aligned::uninitialized() };
 
-    self.get_nz_map_contexts(
+    // get_nz_map_contexts sets coeff_contexts contiguously as a parallel array for scan, not in scan order
+    let coeff_contexts = self.get_nz_map_contexts(
       levels,
       scan,
-      eob as u16,
+      eob,
       tx_size,
       tx_class,
       &mut coeff_contexts.data,
     );
 
     let bhl = Self::get_txb_bhl(tx_size);
 
-    for (c, (&pos, &v)) in scan.iter().zip(coeffs.iter()).enumerate().rev() {
+    let scan_with_ctx =
+      scan.iter().copied().zip(coeff_contexts.iter().copied());
+    for (c, ((pos, coeff_ctx), v)) in
+      scan_with_ctx.zip(coeffs.iter().copied()).enumerate().rev()
+    {
       let pos = pos as usize;
-      let coeff_ctx = coeff_contexts.data[pos];
+      let coeff_ctx = coeff_ctx as usize;
       let level = v.abs();
 
-      if c == eob - 1 {
+      if c == usize::from(eob) - 1 {
         symbol_with_update!(
           self,
           w,
           cmp::min(u32::cast_from(level), 3) - 1,
-          &self.fc.coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx as usize]
+          &self.fc.coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx]
         );
       } else {
         symbol_with_update!(
           self,
           w,
           cmp::min(u32::cast_from(level), 3),
-          &self.fc.coeff_base_cdf[txs_ctx][plane_type][coeff_ctx as usize]
+          &self.fc.coeff_base_cdf[txs_ctx][plane_type][coeff_ctx]
         );
       }