11use crate :: api:: FrameQueue ;
22use crate :: cpu_features:: CpuFeatureLevel ;
3+ use crate :: util:: { Aligned , AlignedBoxedSlice } ;
34use crate :: EncoderStatus ;
45use arrayvec:: ArrayVec ;
56use cfg_if:: cfg_if;
@@ -296,13 +297,13 @@ where
296297 ) -> & mut ( R2cFftHandler < f32 > , FftHandler < f32 > , FftHandler < f32 > ) ;
297298
298299 fn do_filtering ( & mut self , src : & [ [ Plane < T > ; 3 ] ] , dest : & mut Frame < T > ) {
299- let mut dftr = [ 0f32 ; BLOCK_VOLUME ] ;
300- let mut dftc = [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ;
301- let mut means = [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ;
300+ let mut dftr = Aligned :: new ( [ 0f32 ; BLOCK_VOLUME ] ) ;
301+ let mut dftc = Aligned :: new ( [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ) ;
302+ let mut means = Aligned :: new ( [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ) ;
302303
303304 for p in 0 ..3 {
304305 let ( pad_width, pad_height) = self . pad_dimensions ( p) ;
305- let mut ebuff = vec ! [ 0f32 ; pad_width * pad_height] ;
306+ let mut ebuff = AlignedBoxedSlice :: new ( pad_width * pad_height, 0f32 ) ;
306307 let effective_height = self . effective_height ( p) ;
307308 let src_stride = src[ 0 ] [ p] . cfg . stride ;
308309 let ebuff_stride = pad_width;
@@ -324,23 +325,23 @@ where
324325 self . proc0 (
325326 src_planes[ z] . get_unchecked ( x..) ,
326327 self . hw ( ) . get_unchecked ( ( BLOCK_AREA * z) ..) ,
327- dftr. get_unchecked_mut ( ( BLOCK_AREA * z) ..) ,
328+ dftr. data . get_unchecked_mut ( ( BLOCK_AREA * z) ..) ,
328329 src_stride,
329330 SB_SIZE ,
330331 self . src_scale ( ) ,
331332 ) ;
332333 }
333334
334- self . real_to_complex_3d ( & dftr, & mut dftc) ;
335- self . remove_mean ( & mut dftc, self . dftgc ( ) , & mut means) ;
335+ self . real_to_complex_3d ( & dftr. data , & mut dftc. data ) ;
336+ self . remove_mean ( & mut dftc. data , self . dftgc ( ) , & mut means. data ) ;
336337
337- self . filter_coeffs ( & mut dftc) ;
338+ self . filter_coeffs ( & mut dftc. data ) ;
338339
339- self . add_mean ( & mut dftc, & means) ;
340- self . complex_to_real_3d ( & dftc, & mut dftr) ;
340+ self . add_mean ( & mut dftc. data , & means. data ) ;
341+ self . complex_to_real_3d ( & dftc. data , & mut dftr. data ) ;
341342
342343 self . proc1 (
343- dftr. get_unchecked ( ( TB_MIDPOINT * BLOCK_AREA ) ..) ,
344+ dftr. data . get_unchecked ( ( TB_MIDPOINT * BLOCK_AREA ) ..) ,
344345 self . hw ( ) . get_unchecked ( ( TB_MIDPOINT * BLOCK_AREA ) ..) ,
345346 ebuff. get_unchecked_mut ( ( y * ebuff_stride + x) ..) ,
346347 SB_SIZE ,
@@ -405,7 +406,7 @@ where
405406 let s0 = s0. add ( u * p0 + v) ;
406407 let s1 = s1. add ( u * p0 + v) ;
407408 let dest = dest. add ( u * p1 + v) ;
408- dest. write ( dest . read ( ) + s0 . read ( ) * s1 . read ( ) ) ;
409+ dest. write ( s0 . read ( ) . mul_add ( s1 . read ( ) , dest . read ( ) ) ) ;
409410 }
410411 }
411412 }
@@ -693,10 +694,10 @@ where
693694 pad_dimensions : ArrayVec < ( usize , usize ) , 3 > ,
694695 effective_heights : ArrayVec < usize , 3 > ,
695696
696- hw : [ f32 ; BLOCK_VOLUME ] ,
697- dftgc : [ Complex < f32 > ; COMPLEX_COUNT ] ,
697+ hw : Aligned < [ f32 ; BLOCK_VOLUME ] > ,
698+ dftgc : Aligned < [ Complex < f32 > ; COMPLEX_COUNT ] > ,
698699 fft : ( R2cFftHandler < f32 > , FftHandler < f32 > , FftHandler < f32 > ) ,
699- sigmas : [ f32 ; CCNT2 ] ,
700+ sigmas : Aligned < [ f32 ; CCNT2 ] > ,
700701}
701702
702703impl < T > DftDenoiserRust < T >
@@ -708,8 +709,8 @@ where
708709 pad_dimensions : ArrayVec < ( usize , usize ) , 3 > ,
709710 effective_heights : ArrayVec < usize , 3 > ,
710711 ) -> Self {
711- let hw = create_window ( ) ;
712- let mut dftgr = [ 0f32 ; BLOCK_VOLUME ] ;
712+ let hw = Aligned :: new ( create_window ( ) ) ;
713+ let mut dftgr = Aligned :: new ( [ 0f32 ; BLOCK_VOLUME ] ) ;
713714
714715 let fft = (
715716 R2cFftHandler :: new ( SB_SIZE ) ,
@@ -719,15 +720,15 @@ where
719720
720721 let mut wscale = 0.0f32 ;
721722 for k in 0 ..BLOCK_VOLUME {
722- dftgr[ k] = 255.0 * hw[ k] ;
723- wscale += hw[ k] . powi ( 2 ) ;
723+ dftgr. data [ k] = 255.0 * hw. data [ k] ;
724+ wscale += hw. data [ k] . powi ( 2 ) ;
724725 }
725726 let wscale = 1.0 / wscale;
726727
727- let mut sigmas = [ 0f32 ; CCNT2 ] ;
728- sigmas. fill ( sigma / wscale) ;
728+ let mut sigmas = Aligned :: new ( [ 0f32 ; CCNT2 ] ) ;
729+ sigmas. data . fill ( sigma / wscale) ;
729730
730- let mut denoiser = DftDenoiserRust {
731+ let mut denoiser = Self {
731732 dest_scale,
732733 src_scale,
733734 peak,
@@ -736,11 +737,11 @@ where
736737 hw,
737738 fft,
738739 sigmas,
739- dftgc : [ Complex :: default ( ) ; COMPLEX_COUNT ] ,
740+ dftgc : Aligned :: new ( [ Complex :: default ( ) ; COMPLEX_COUNT ] ) ,
740741 } ;
741742
742- let mut dftgc = [ Complex :: default ( ) ; COMPLEX_COUNT ] ;
743- denoiser. real_to_complex_3d ( & dftgr, & mut dftgc) ;
743+ let mut dftgc = Aligned :: new ( [ Complex :: default ( ) ; COMPLEX_COUNT ] ) ;
744+ denoiser. real_to_complex_3d ( & dftgr. data , & mut dftgc. data ) ;
744745 denoiser. dftgc = dftgc;
745746
746747 denoiser
@@ -778,17 +779,17 @@ where
778779
779780 #[ inline( always) ]
780781 fn hw ( & self ) -> & [ f32 ; BLOCK_VOLUME ] {
781- & self . hw
782+ & self . hw . data
782783 }
783784
784785 #[ inline( always) ]
785786 fn dftgc ( & self ) -> & [ Complex < f32 > ; COMPLEX_COUNT ] {
786- & self . dftgc
787+ & self . dftgc . data
787788 }
788789
789790 #[ inline( always) ]
790791 fn sigmas ( & self ) -> & [ f32 ; CCNT2 ] {
791- & self . sigmas
792+ & self . sigmas . data
792793 }
793794
794795 #[ inline( always) ]
0 commit comments