@@ -117,14 +117,6 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
117117 has_derivs ? (float *)&dresultds_simd : NULL ,
118118 has_derivs ? (float *)&dresultdt_simd : NULL );
119119
120- OIIO::simd::vfloat4 dresultdx_simd;
121- OIIO::simd::vfloat4 dresultdy_simd;
122- if (has_derivs) {
123- // Correct our st texture space gradients into xy-space gradients
124- dresultdx_simd = dresultds_simd * dsdx + dresultdt_simd * dtdx;
125- dresultdy_simd = dresultds_simd * dsdy + dresultdt_simd * dtdy;
126- }
127-
128120 // NOTE: regardless of the value of "retVal" we will always copy over the texture system's results.
129121 // We are relying on the texture system properly filling in missing or fill colors
130122
@@ -142,10 +134,10 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
142134 MaskedDx<Color3> resultDx (resultRef);
143135 MaskedDy<Color3> resultDy (resultRef);
144136
145- resultDx[lane] = Color3 (dresultdx_simd [0 ], dresultdx_simd [1 ],
146- dresultdx_simd [2 ]);
147- resultDy[lane] = Color3 (dresultdy_simd [0 ], dresultdy_simd [1 ],
148- dresultdy_simd [2 ]);
137+ resultDx[lane] = Color3 (dresultds_simd [0 ], dresultds_simd [1 ],
138+ dresultds_simd [2 ]);
139+ resultDy[lane] = Color3 (dresultdt_simd [0 ], dresultdt_simd [1 ],
140+ dresultdt_simd [2 ]);
149141 }
150142 } else if (Masked<float >::is (resultRef)) {
151143 alphaChannelIndex = 1 ;
@@ -154,8 +146,8 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
154146 MaskedDy<float > resultDy (resultRef);
155147 result[lane] = result_simd[0 ];
156148 if (resultRef.has_derivs ()) {
157- resultDx[lane] = dresultdx_simd [0 ];
158- resultDy[lane] = dresultdy_simd [0 ];
149+ resultDx[lane] = dresultds_simd [0 ];
150+ resultDy[lane] = dresultdt_simd [0 ];
159151 }
160152 }
161153
@@ -165,8 +157,8 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
165157 if (alphaRef.has_derivs ()) {
166158 MaskedDx<float > alphaDx (alphaRef);
167159 MaskedDy<float > alphaDy (alphaRef);
168- alphaDx[lane] = dresultdx_simd [alphaChannelIndex];
169- alphaDy[lane] = dresultdy_simd [alphaChannelIndex];
160+ alphaDx[lane] = dresultds_simd [alphaChannelIndex];
161+ alphaDy[lane] = dresultdt_simd [alphaChannelIndex];
170162 }
171163 }
172164 // std::cout << "s: " << s.get(i) << " t: " << t.get(i) << " color: " << resultColor << " " << wideResult.get(i) << std::endl;
@@ -311,16 +303,6 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
311303 has_derivs ? (float *)&dresultdt_simd : nullptr ,
312304 has_derivs ? (float *)&dresultdr_simd : nullptr );
313305
314- OIIO::simd::vfloat4 dresultdx_simd;
315- OIIO::simd::vfloat4 dresultdy_simd;
316- if (has_derivs) {
317- // Correct our str texture space gradients into xyz-space gradients
318- dresultdx_simd = dresultds_simd * dPdx.x + dresultdt_simd * dPdx.y
319- + dresultdr_simd * dPdx.z ;
320- dresultdy_simd = dresultds_simd * dPdy.x + dresultdt_simd * dPdy.y
321- + dresultdr_simd * dPdy.z ;
322- }
323-
324306 // NOTE: regardless of the value of "retVal" we will always copy over the texture system's results.
325307 // We are relying on the texture system properly filling in missing or fill colors
326308
@@ -337,10 +319,10 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
337319 if (resultRef.has_derivs ()) {
338320 MaskedDx<Color3> resultDx (resultRef);
339321 MaskedDy<Color3> resultDy (resultRef);
340- resultDx[lane] = Color3 (dresultdx_simd [0 ], dresultdx_simd [1 ],
341- dresultdx_simd [2 ]);
342- resultDy[lane] = Color3 (dresultdy_simd [0 ], dresultdy_simd [1 ],
343- dresultdy_simd [2 ]);
322+ resultDx[lane] = Color3 (dresultds_simd [0 ], dresultds_simd [1 ],
323+ dresultds_simd [2 ]);
324+ resultDy[lane] = Color3 (dresultdt_simd [0 ], dresultdt_simd [1 ],
325+ dresultdt_simd [2 ]);
344326 }
345327 } else if (Masked<float >::is (resultRef)) {
346328 alphaChannelIndex = 1 ;
@@ -349,8 +331,8 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
349331 if (resultRef.has_derivs ()) {
350332 MaskedDx<float > resultDx (resultRef);
351333 MaskedDy<float > resultDy (resultRef);
352- resultDx[lane] = dresultdx_simd [0 ];
353- resultDy[lane] = dresultdy_simd [0 ];
334+ resultDx[lane] = dresultds_simd [0 ];
335+ resultDy[lane] = dresultdt_simd [0 ];
354336 }
355337 }
356338
@@ -361,8 +343,8 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
361343 if (alphaRef.has_derivs ()) {
362344 MaskedDx<float > alphaDx (alphaRef);
363345 MaskedDy<float > alphaDy (alphaRef);
364- alphaDx[lane] = dresultdx_simd [alphaChannelIndex];
365- alphaDy[lane] = dresultdy_simd [alphaChannelIndex];
346+ alphaDx[lane] = dresultds_simd [alphaChannelIndex];
347+ alphaDy[lane] = dresultdt_simd [alphaChannelIndex];
366348 }
367349 }
368350
@@ -561,6 +543,164 @@ dispatch_environment(BatchedRendererServices* bsr, ustring filename,
561543} // namespace
562544
563545
546+ static OSL_NOINLINE void
547+ transformWideTextureGradients (BatchedTextureOutputs& outputs,
548+ Wide<const float > dsdx, Wide<const float > dtdx,
549+ Wide<const float > dsdy, Wide<const float > dtdy)
550+ {
551+ MaskedData resultRef = outputs.result ();
552+ if (resultRef.valid () && resultRef.has_derivs ()) {
553+ if (Masked<float >::is (resultRef)) {
554+ OSL_FORCEINLINE_BLOCK
555+ {
556+ MaskedDx<float > drds (resultRef);
557+ MaskedDy<float > drdt (resultRef);
558+
559+ OSL_OMP_PRAGMA (omp simd simdlen (__OSL_WIDTH))
560+ for (int i = 0 ; i < __OSL_WIDTH; ++i) {
561+ float drdsVal = drds[i];
562+ float drdtVal = drdt[i];
563+ float drdx = drdsVal * dsdx[i] + drdtVal * dtdx[i];
564+ float drdy = drdsVal * dsdy[i] + drdtVal * dtdy[i];
565+ drds[i] = drdx;
566+ drdt[i] = drdy;
567+ }
568+ }
569+ } else {
570+ // keep assert out of inlined code
571+ OSL_DASSERT (Masked<Color3>::is (resultRef));
572+ OSL_FORCEINLINE_BLOCK
573+ {
574+ // printf("doint color\n");
575+ MaskedDx<Color3> widedrds (resultRef);
576+ MaskedDy<Color3> widedrdt (resultRef);
577+ OSL_OMP_PRAGMA (omp simd simdlen (__OSL_WIDTH))
578+ for (int i = 0 ; i < __OSL_WIDTH; ++i) {
579+ Color3 drdsColor = widedrds[i];
580+ Color3 drdtColor = widedrdt[i];
581+
582+ widedrds[i] = drdsColor * dsdx[i] + drdtColor * dtdx[i];
583+ widedrdt[i] = drdsColor * dsdy[i] + drdtColor * dtdy[i];
584+ }
585+ }
586+ }
587+ }
588+
589+ MaskedData alphaRef = outputs.alpha ();
590+ OSL_FORCEINLINE_BLOCK
591+ if (alphaRef.valid () && alphaRef.has_derivs ()) {
592+ MaskedDx<float > dads (alphaRef);
593+ MaskedDy<float > dadt (alphaRef);
594+ OSL_OMP_PRAGMA (omp simd simdlen (__OSL_WIDTH))
595+ for (int i = 0 ; i < __OSL_WIDTH; ++i) {
596+ float dadsVal = dads[i];
597+ float dadtVal = dadt[i];
598+ float dadx = dadsVal * dsdx[i] + dadtVal * dtdx[i];
599+ float dady = dadsVal * dsdy[i] + dadtVal * dtdy[i];
600+ dads[i] = dadx;
601+ dadt[i] = dady;
602+ }
603+ }
604+ }
605+
606+ static OSL_NOINLINE void
607+ transformWideTextureGradientsTexture3d (BatchedTextureOutputs& outputs,
608+ Wide<const Vec3> Pdx,
609+ Wide<const Vec3> Pdy,
610+ Wide<const Vec3> Pdz)
611+ {
612+ MaskedData resultRef = outputs.result ();
613+ if (resultRef.valid () && resultRef.has_derivs ()) {
614+ if (Masked<float >::is (resultRef)) {
615+ OSL_FORCEINLINE_BLOCK
616+ {
617+ MaskedDx<float > drds (resultRef);
618+ MaskedDy<float > drdt (resultRef);
619+ // MaskedDz<float> drdr(resultRef); // our duals don't actually have space for this
620+
621+ OSL_OMP_PRAGMA (omp simd simdlen (__OSL_WIDTH))
622+ for (int i = 0 ; i < __OSL_WIDTH; ++i) {
623+ float dres_xVal = drds[i];
624+ float dres_yVal = drdt[i];
625+ // float dres_zVal = drdr[i];
626+
627+ Vec3 v3pdx = Pdx[i];
628+ Vec3 v3pdy = Pdy[i];
629+ // Vec3 v3pdz = Pdz[i];
630+
631+ float dres_x = dres_xVal * v3pdx.x
632+ + dres_yVal
633+ * v3pdx.y ; // + dres_zVal * v3pdx.z;
634+ float dres_y = dres_xVal * v3pdy.x
635+ + dres_yVal
636+ * v3pdy.y ; // + dres_zVal * v3pdy.z;
637+ // float dres_z = dres_xVal * v3pdz.x + dres_yVal * v3pdz.y + dres_zVal * v3pdz.z;
638+
639+ drds[i] = dres_x;
640+ drdt[i] = dres_y;
641+ // drdr[i] = dres_z;
642+ }
643+ }
644+ } else {
645+ // keep assert out of inlined code
646+ OSL_DASSERT (Masked<Color3>::is (resultRef));
647+ OSL_FORCEINLINE_BLOCK
648+ {
649+ MaskedDx<Color3> widedrp1 (resultRef);
650+ MaskedDy<Color3> widedrp2 (resultRef);
651+ // MaskedDz<Color3> widedrp3(resultRef);
652+
653+ OSL_OMP_PRAGMA (omp simd simdlen (__OSL_WIDTH))
654+ for (int i = 0 ; i < __OSL_WIDTH; ++i) {
655+ Color3 drdp1Color = widedrp1[i];
656+ Color3 drdp2Color = widedrp2[i];
657+ // Color3 drdp3Color = widedrp3[i];
658+
659+ Vec3 v3pdx = Pdx[i];
660+ Vec3 v3pdy = Pdy[i];
661+ // Vec3 v3pdz = Pdz[i];
662+
663+ widedrp1[i] = drdp1Color * v3pdx.x
664+ + drdp2Color
665+ * v3pdx.y ; // + drdp3Color * v3pdx.z;
666+ widedrp2[i] = drdp1Color * v3pdy.x
667+ + drdp2Color
668+ * v3pdy.y ; // + drdp3Color * v3pdy.z;
669+ // widedrp3[i] = drdp1Color * v3pdz.x + drdp2Color * v3pdz.y + drdp3Color * v3pdz.z;
670+ }
671+ }
672+ }
673+ }
674+
675+ MaskedData alphaRef = outputs.alpha ();
676+ OSL_FORCEINLINE_BLOCK
677+ if (alphaRef.valid () && alphaRef.has_derivs ()) {
678+ MaskedDx<float > dap1 (alphaRef);
679+ MaskedDy<float > dap2 (alphaRef);
680+ // MaskedDz<float> dap3(alphaRef);
681+
682+ OSL_OMP_PRAGMA (omp simd simdlen (__OSL_WIDTH))
683+ for (int i = 0 ; i < __OSL_WIDTH; ++i) {
684+ float dadp1Val = dap1[i];
685+ float dadp2Val = dap2[i];
686+ // float dadp3Val = dap3[i];
687+
688+ Vec3 v3pdx = Pdx[i];
689+ Vec3 v3pdy = Pdy[i];
690+ // Vec3 v3pdz = Pdz[i];
691+
692+ float dadpx = dadp1Val * v3pdx.x
693+ + dadp2Val * v3pdx.y ; // + dadp3Val * v3pdx.z;
694+ float dadpy = dadp1Val * v3pdy.x
695+ + dadp2Val * v3pdy.y ; // + dadp3Val * v3pdy.z;
696+ // float dadpz = dadp1Val * v3pdz.x + dadp2Val * v3pdz.y + dadp3Val * v3pdz.z;
697+
698+ dap1[i] = dadpx;
699+ dap2[i] = dadpy;
700+ // dap3[i] = dadpz;
701+ }
702+ }
703+ }
564704
565705OSL_BATCHOP int
566706__OSL_MASKED_OP (texture)(void * bsg_, ustring_pod name_, void * handle,
@@ -590,6 +730,14 @@ __OSL_MASKED_OP(texture)(void* bsg_, ustring_pod name_, void* handle,
590730 Wide<const float >(dsdy), Wide<const float >(dtdy),
591731 outputs);
592732
733+ // Correct our st texture space gradients into xy-space gradients
734+ if (resultHasDerivs || alphaHasDerivs) {
735+ transformWideTextureGradients (outputs, Wide<const float >(dsdx),
736+ Wide<const float >(dtdx),
737+ Wide<const float >(dsdy),
738+ Wide<const float >(dtdy));
739+ }
740+
593741 OSL_FORCEINLINE_BLOCK
594742 if (outputs.errormessage ().valid ()) {
595743 Masked<ustring> err (outputs.errormessage ());
@@ -637,6 +785,13 @@ __OSL_MASKED_OP(texture3d)(void* bsg_, ustring_pod name_, void* handle,
637785 Wide<const Vec3>(wPdy), Wide<const Vec3>(wPdz),
638786 outputs);
639787
788+ // Correct our P (Vec3) space gradients into xyz-space gradients
789+ if (resultHasDerivs || alphaHasDerivs) {
790+ transformWideTextureGradientsTexture3d (outputs, Wide<const Vec3>(wPdx),
791+ Wide<const Vec3>(wPdy),
792+ Wide<const Vec3>(wPdz));
793+ }
794+
640795 OSL_FORCEINLINE_BLOCK
641796 if (outputs.errormessage ().valid ()) {
642797 Masked<ustring> err (outputs.errormessage ());
0 commit comments