@@ -556,62 +556,64 @@ class ThreeBoxApproxPass final : public Pass {
556556 skvx::Vec<4 , uint32_t >* buffer0Cursor = fBuffer0Cursor ;
557557 skvx::Vec<4 , uint32_t >* buffer1Cursor = fBuffer1Cursor ;
558558 skvx::Vec<4 , uint32_t >* buffer2Cursor = fBuffer2Cursor ;
559- v4u32 sum0 = __lsx_vld (fSum0 , 0 ); // same as skvx::Vec<4, uint32_t>::Load(fSum0);
560- v4u32 sum1 = __lsx_vld (fSum1 , 0 );
561- v4u32 sum2 = __lsx_vld (fSum2 , 0 );
559+ v4u32 sum0 = (v4u32) __lsx_vld (fSum0 , 0 ); // same as skvx::Vec<4, uint32_t>::Load(fSum0);
560+ v4u32 sum1 = (v4u32) __lsx_vld (fSum1 , 0 );
561+ v4u32 sum2 = (v4u32) __lsx_vld (fSum2 , 0 );
562562
563563 auto processValue = [&](v4u32& vLeadingEdge){
564564 sum0 += vLeadingEdge;
565565 sum1 += sum0;
566566 sum2 += sum1;
567567
568- v4u32 divisorFactor = __lsx_vreplgr2vr_w (fDivider .divisorFactor ());
569- v4u32 blurred = __lsx_vmuh_w (divisorFactor, sum2);
568+ v4u32 divisorFactor = (v4u32) __lsx_vreplgr2vr_w (fDivider .divisorFactor ());
569+ v4u32 blurred = (v4u32) __lsx_vmuh_w ((__m128i) divisorFactor, (__m128i) sum2);
570570
571- v4u32 buffer2Value = __lsx_vld (buffer2Cursor, 0 ); // Not fBuffer0Cursor, out of bounds.
571+ v4u32 buffer2Value = (v4u32) __lsx_vld (buffer2Cursor, 0 ); // Not fBuffer0Cursor, out of bounds.
572572 sum2 -= buffer2Value;
573- __lsx_vst (sum1, (void *)buffer2Cursor, 0 );
573+ __lsx_vst ((__m128i) sum1, (void *)buffer2Cursor, 0 );
574574 buffer2Cursor = (buffer2Cursor + 1 ) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2 ;
575- v4u32 buffer1Value = __lsx_vld (buffer1Cursor, 0 );
575+ v4u32 buffer1Value = (v4u32) __lsx_vld (buffer1Cursor, 0 );
576576 sum1 -= buffer1Value;
577- __lsx_vst (sum0, (void *)buffer1Cursor, 0 );
577+ __lsx_vst ((__m128i) sum0, (void *)buffer1Cursor, 0 );
578578 buffer1Cursor = (buffer1Cursor + 1 ) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1 ;
579- v4u32 buffer0Value = __lsx_vld (buffer0Cursor, 0 );
579+ v4u32 buffer0Value = (v4u32) __lsx_vld (buffer0Cursor, 0 );
580580 sum0 -= buffer0Value;
581- __lsx_vst (vLeadingEdge, (void *)buffer0Cursor, 0 );
581+ __lsx_vst ((__m128i) vLeadingEdge, (void *)buffer0Cursor, 0 );
582582 buffer0Cursor = (buffer0Cursor + 1 ) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0 ;
583583
584584 v16u8 shuf = {0x0 ,0x4 ,0x8 ,0xc ,0x0 };
585- v16u8 ret = __lsx_vshuf_b (blurred, blurred, shuf);
585+ v16u8 ret = (v16u8) __lsx_vshuf_b ((__m128i) blurred, (__m128i) blurred, (__m128i) shuf);
586586 return ret;
587587 };
588588
589- v4u32 zero = __lsx_vldi (0x0 );
589+ v4u32 zero = (v4u32) __lsx_vldi (0x0 );
590590 if (!src32 && !dst32) {
591591 while (n --> 0 ) {
592592 (void )processValue (zero);
593593 }
594594 } else if (src32 && !dst32) {
595595 while (n --> 0 ) {
596- v4u32 edge = __lsx_vinsgr2vr_w (zero, *src32, 0 );
597- edge = __lsx_vilvl_b (zero, edge);
598- edge = __lsx_vilvl_h (zero, edge);
596+ v4u32 edge = (v4u32) __lsx_vinsgr2vr_w ((__m128i) zero, *src32, 0 );
597+ edge = (v4u32) __lsx_vilvl_b ((__m128i) zero, (__m128i) edge);
598+ edge = (v4u32) __lsx_vilvl_h ((__m128i) zero, (__m128i) edge);
599599 (void )processValue (edge);
600600 src32 += srcStride;
601601 }
602602 } else if (!src32 && dst32) {
603603 while (n --> 0 ) {
604- v4u32 ret = processValue (zero);
605- __lsx_vstelm_w (ret, dst32, 0 , 0 ); // 3rd is offset, 4th is idx.
604+ v16u8 ret_vec = processValue (zero);
605+ v4u32 ret = (v4u32)ret_vec;
606+ __lsx_vstelm_w ((__m128i)ret, dst32, 0 , 0 ); // 3rd is offset, 4th is idx.
606607 dst32 += dstStride;
607608 }
608609 } else if (src32 && dst32) {
609610 while (n --> 0 ) {
610- v4u32 edge = __lsx_vinsgr2vr_w (zero, *src32, 0 );
611- edge = __lsx_vilvl_b (zero, edge);
612- edge = __lsx_vilvl_h (zero, edge);
613- v4u32 ret = processValue (edge);
614- __lsx_vstelm_w (ret, dst32, 0 , 0 );
611+ v4u32 edge = (v4u32)__lsx_vinsgr2vr_w ((__m128i)zero, *src32, 0 );
612+ edge = (v4u32)__lsx_vilvl_b ((__m128i)zero, (__m128i)edge);
613+ edge = (v4u32)__lsx_vilvl_h ((__m128i)zero, (__m128i)edge);
614+ v16u8 ret_vec = processValue (edge);
615+ v4u32 ret = (v4u32)ret_vec;
616+ __lsx_vstelm_w ((__m128i)ret, dst32, 0 , 0 );
615617 src32 += srcStride;
616618 dst32 += dstStride;
617619 }
@@ -622,9 +624,9 @@ class ThreeBoxApproxPass final : public Pass {
622624 fBuffer1Cursor = buffer1Cursor;
623625 fBuffer2Cursor = buffer2Cursor;
624626
625- __lsx_vst (sum0, fSum0 , 0 );
626- __lsx_vst (sum1, fSum1 , 0 );
627- __lsx_vst (sum2, fSum2 , 0 );
627+ __lsx_vst ((__m128i) sum0, fSum0 , 0 );
628+ __lsx_vst ((__m128i) sum1, fSum1 , 0 );
629+ __lsx_vst ((__m128i) sum2, fSum2 , 0 );
628630#else
629631 skvx::Vec<4 , uint32_t >* buffer0Cursor = fBuffer0Cursor ;
630632 skvx::Vec<4 , uint32_t >* buffer1Cursor = fBuffer1Cursor ;
0 commit comments