Skip to content

Commit 28b71e7

Browse files
fix(batched): Assume BatchedRendererServices texture derivatives are in st space. (#1828)
The convention in the single-point RendererServices is that the texture call returns derivatives in st space, and they are transformed to xy space before returning from the wrapper to RenderServices. This change makes BatchedRendererServices follow the same convention. --------- Signed-off-by: Stephen Friedman <[email protected]>
1 parent f1aa009 commit 28b71e7

File tree

1 file changed

+189
-34
lines changed

1 file changed

+189
-34
lines changed

src/liboslexec/wide/wide_optexture.cpp

Lines changed: 189 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,6 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
117117
has_derivs ? (float*)&dresultds_simd : NULL,
118118
has_derivs ? (float*)&dresultdt_simd : NULL);
119119

120-
OIIO::simd::vfloat4 dresultdx_simd;
121-
OIIO::simd::vfloat4 dresultdy_simd;
122-
if (has_derivs) {
123-
// Correct our st texture space gradients into xy-space gradients
124-
dresultdx_simd = dresultds_simd * dsdx + dresultdt_simd * dtdx;
125-
dresultdy_simd = dresultds_simd * dsdy + dresultdt_simd * dtdy;
126-
}
127-
128120
// NOTE: regardless of the value of "retVal" we will always copy over the texture system's results.
129121
// We are relying on the texture system properly filling in missing or fill colors
130122

@@ -142,10 +134,10 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
142134
MaskedDx<Color3> resultDx(resultRef);
143135
MaskedDy<Color3> resultDy(resultRef);
144136

145-
resultDx[lane] = Color3(dresultdx_simd[0], dresultdx_simd[1],
146-
dresultdx_simd[2]);
147-
resultDy[lane] = Color3(dresultdy_simd[0], dresultdy_simd[1],
148-
dresultdy_simd[2]);
137+
resultDx[lane] = Color3(dresultds_simd[0], dresultds_simd[1],
138+
dresultds_simd[2]);
139+
resultDy[lane] = Color3(dresultdt_simd[0], dresultdt_simd[1],
140+
dresultdt_simd[2]);
149141
}
150142
} else if (Masked<float>::is(resultRef)) {
151143
alphaChannelIndex = 1;
@@ -154,8 +146,8 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
154146
MaskedDy<float> resultDy(resultRef);
155147
result[lane] = result_simd[0];
156148
if (resultRef.has_derivs()) {
157-
resultDx[lane] = dresultdx_simd[0];
158-
resultDy[lane] = dresultdy_simd[0];
149+
resultDx[lane] = dresultds_simd[0];
150+
resultDy[lane] = dresultdt_simd[0];
159151
}
160152
}
161153

@@ -165,8 +157,8 @@ default_texture(BatchedRendererServices* bsr, ustring filename,
165157
if (alphaRef.has_derivs()) {
166158
MaskedDx<float> alphaDx(alphaRef);
167159
MaskedDy<float> alphaDy(alphaRef);
168-
alphaDx[lane] = dresultdx_simd[alphaChannelIndex];
169-
alphaDy[lane] = dresultdy_simd[alphaChannelIndex];
160+
alphaDx[lane] = dresultds_simd[alphaChannelIndex];
161+
alphaDy[lane] = dresultdt_simd[alphaChannelIndex];
170162
}
171163
}
172164
//std::cout << "s: " << s.get(i) << " t: " << t.get(i) << " color: " << resultColor << " " << wideResult.get(i) << std::endl;
@@ -311,16 +303,6 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
311303
has_derivs ? (float*)&dresultdt_simd : nullptr,
312304
has_derivs ? (float*)&dresultdr_simd : nullptr);
313305

314-
OIIO::simd::vfloat4 dresultdx_simd;
315-
OIIO::simd::vfloat4 dresultdy_simd;
316-
if (has_derivs) {
317-
// Correct our str texture space gradients into xyz-space gradients
318-
dresultdx_simd = dresultds_simd * dPdx.x + dresultdt_simd * dPdx.y
319-
+ dresultdr_simd * dPdx.z;
320-
dresultdy_simd = dresultds_simd * dPdy.x + dresultdt_simd * dPdy.y
321-
+ dresultdr_simd * dPdy.z;
322-
}
323-
324306
// NOTE: regardless of the value of "retVal" we will always copy over the texture system's results.
325307
// We are relying on the texture system properly filling in missing or fill colors
326308

@@ -337,10 +319,10 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
337319
if (resultRef.has_derivs()) {
338320
MaskedDx<Color3> resultDx(resultRef);
339321
MaskedDy<Color3> resultDy(resultRef);
340-
resultDx[lane] = Color3(dresultdx_simd[0], dresultdx_simd[1],
341-
dresultdx_simd[2]);
342-
resultDy[lane] = Color3(dresultdy_simd[0], dresultdy_simd[1],
343-
dresultdy_simd[2]);
322+
resultDx[lane] = Color3(dresultds_simd[0], dresultds_simd[1],
323+
dresultds_simd[2]);
324+
resultDy[lane] = Color3(dresultdt_simd[0], dresultdt_simd[1],
325+
dresultdt_simd[2]);
344326
}
345327
} else if (Masked<float>::is(resultRef)) {
346328
alphaChannelIndex = 1;
@@ -349,8 +331,8 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
349331
if (resultRef.has_derivs()) {
350332
MaskedDx<float> resultDx(resultRef);
351333
MaskedDy<float> resultDy(resultRef);
352-
resultDx[lane] = dresultdx_simd[0];
353-
resultDy[lane] = dresultdy_simd[0];
334+
resultDx[lane] = dresultds_simd[0];
335+
resultDy[lane] = dresultdt_simd[0];
354336
}
355337
}
356338

@@ -361,8 +343,8 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename,
361343
if (alphaRef.has_derivs()) {
362344
MaskedDx<float> alphaDx(alphaRef);
363345
MaskedDy<float> alphaDy(alphaRef);
364-
alphaDx[lane] = dresultdx_simd[alphaChannelIndex];
365-
alphaDy[lane] = dresultdy_simd[alphaChannelIndex];
346+
alphaDx[lane] = dresultds_simd[alphaChannelIndex];
347+
alphaDy[lane] = dresultdt_simd[alphaChannelIndex];
366348
}
367349
}
368350

@@ -561,6 +543,164 @@ dispatch_environment(BatchedRendererServices* bsr, ustring filename,
561543
} // namespace
562544

563545

546+
static OSL_NOINLINE void
547+
transformWideTextureGradients(BatchedTextureOutputs& outputs,
548+
Wide<const float> dsdx, Wide<const float> dtdx,
549+
Wide<const float> dsdy, Wide<const float> dtdy)
550+
{
551+
MaskedData resultRef = outputs.result();
552+
if (resultRef.valid() && resultRef.has_derivs()) {
553+
if (Masked<float>::is(resultRef)) {
554+
OSL_FORCEINLINE_BLOCK
555+
{
556+
MaskedDx<float> drds(resultRef);
557+
MaskedDy<float> drdt(resultRef);
558+
559+
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
560+
for (int i = 0; i < __OSL_WIDTH; ++i) {
561+
float drdsVal = drds[i];
562+
float drdtVal = drdt[i];
563+
float drdx = drdsVal * dsdx[i] + drdtVal * dtdx[i];
564+
float drdy = drdsVal * dsdy[i] + drdtVal * dtdy[i];
565+
drds[i] = drdx;
566+
drdt[i] = drdy;
567+
}
568+
}
569+
} else {
570+
// keep assert out of inlined code
571+
OSL_DASSERT(Masked<Color3>::is(resultRef));
572+
OSL_FORCEINLINE_BLOCK
573+
{
574+
//printf("doint color\n");
575+
MaskedDx<Color3> widedrds(resultRef);
576+
MaskedDy<Color3> widedrdt(resultRef);
577+
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
578+
for (int i = 0; i < __OSL_WIDTH; ++i) {
579+
Color3 drdsColor = widedrds[i];
580+
Color3 drdtColor = widedrdt[i];
581+
582+
widedrds[i] = drdsColor * dsdx[i] + drdtColor * dtdx[i];
583+
widedrdt[i] = drdsColor * dsdy[i] + drdtColor * dtdy[i];
584+
}
585+
}
586+
}
587+
}
588+
589+
MaskedData alphaRef = outputs.alpha();
590+
OSL_FORCEINLINE_BLOCK
591+
if (alphaRef.valid() && alphaRef.has_derivs()) {
592+
MaskedDx<float> dads(alphaRef);
593+
MaskedDy<float> dadt(alphaRef);
594+
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
595+
for (int i = 0; i < __OSL_WIDTH; ++i) {
596+
float dadsVal = dads[i];
597+
float dadtVal = dadt[i];
598+
float dadx = dadsVal * dsdx[i] + dadtVal * dtdx[i];
599+
float dady = dadsVal * dsdy[i] + dadtVal * dtdy[i];
600+
dads[i] = dadx;
601+
dadt[i] = dady;
602+
}
603+
}
604+
}
605+
606+
static OSL_NOINLINE void
607+
transformWideTextureGradientsTexture3d(BatchedTextureOutputs& outputs,
608+
Wide<const Vec3> Pdx,
609+
Wide<const Vec3> Pdy,
610+
Wide<const Vec3> Pdz)
611+
{
612+
MaskedData resultRef = outputs.result();
613+
if (resultRef.valid() && resultRef.has_derivs()) {
614+
if (Masked<float>::is(resultRef)) {
615+
OSL_FORCEINLINE_BLOCK
616+
{
617+
MaskedDx<float> drds(resultRef);
618+
MaskedDy<float> drdt(resultRef);
619+
//MaskedDz<float> drdr(resultRef); // our duals don't actually have space for this
620+
621+
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
622+
for (int i = 0; i < __OSL_WIDTH; ++i) {
623+
float dres_xVal = drds[i];
624+
float dres_yVal = drdt[i];
625+
//float dres_zVal = drdr[i];
626+
627+
Vec3 v3pdx = Pdx[i];
628+
Vec3 v3pdy = Pdy[i];
629+
//Vec3 v3pdz = Pdz[i];
630+
631+
float dres_x = dres_xVal * v3pdx.x
632+
+ dres_yVal
633+
* v3pdx.y; // + dres_zVal * v3pdx.z;
634+
float dres_y = dres_xVal * v3pdy.x
635+
+ dres_yVal
636+
* v3pdy.y; // + dres_zVal * v3pdy.z;
637+
//float dres_z = dres_xVal * v3pdz.x + dres_yVal * v3pdz.y + dres_zVal * v3pdz.z;
638+
639+
drds[i] = dres_x;
640+
drdt[i] = dres_y;
641+
//drdr[i] = dres_z;
642+
}
643+
}
644+
} else {
645+
// keep assert out of inlined code
646+
OSL_DASSERT(Masked<Color3>::is(resultRef));
647+
OSL_FORCEINLINE_BLOCK
648+
{
649+
MaskedDx<Color3> widedrp1(resultRef);
650+
MaskedDy<Color3> widedrp2(resultRef);
651+
//MaskedDz<Color3> widedrp3(resultRef);
652+
653+
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
654+
for (int i = 0; i < __OSL_WIDTH; ++i) {
655+
Color3 drdp1Color = widedrp1[i];
656+
Color3 drdp2Color = widedrp2[i];
657+
//Color3 drdp3Color = widedrp3[i];
658+
659+
Vec3 v3pdx = Pdx[i];
660+
Vec3 v3pdy = Pdy[i];
661+
//Vec3 v3pdz = Pdz[i];
662+
663+
widedrp1[i] = drdp1Color * v3pdx.x
664+
+ drdp2Color
665+
* v3pdx.y; // + drdp3Color * v3pdx.z;
666+
widedrp2[i] = drdp1Color * v3pdy.x
667+
+ drdp2Color
668+
* v3pdy.y; // + drdp3Color * v3pdy.z;
669+
//widedrp3[i] = drdp1Color * v3pdz.x + drdp2Color * v3pdz.y + drdp3Color * v3pdz.z;
670+
}
671+
}
672+
}
673+
}
674+
675+
MaskedData alphaRef = outputs.alpha();
676+
OSL_FORCEINLINE_BLOCK
677+
if (alphaRef.valid() && alphaRef.has_derivs()) {
678+
MaskedDx<float> dap1(alphaRef);
679+
MaskedDy<float> dap2(alphaRef);
680+
// MaskedDz<float> dap3(alphaRef);
681+
682+
OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
683+
for (int i = 0; i < __OSL_WIDTH; ++i) {
684+
float dadp1Val = dap1[i];
685+
float dadp2Val = dap2[i];
686+
//float dadp3Val = dap3[i];
687+
688+
Vec3 v3pdx = Pdx[i];
689+
Vec3 v3pdy = Pdy[i];
690+
//Vec3 v3pdz = Pdz[i];
691+
692+
float dadpx = dadp1Val * v3pdx.x
693+
+ dadp2Val * v3pdx.y; // + dadp3Val * v3pdx.z;
694+
float dadpy = dadp1Val * v3pdy.x
695+
+ dadp2Val * v3pdy.y; // + dadp3Val * v3pdy.z;
696+
//float dadpz = dadp1Val * v3pdz.x + dadp2Val * v3pdz.y + dadp3Val * v3pdz.z;
697+
698+
dap1[i] = dadpx;
699+
dap2[i] = dadpy;
700+
//dap3[i] = dadpz;
701+
}
702+
}
703+
}
564704

565705
OSL_BATCHOP int
566706
__OSL_MASKED_OP(texture)(void* bsg_, ustring_pod name_, void* handle,
@@ -590,6 +730,14 @@ __OSL_MASKED_OP(texture)(void* bsg_, ustring_pod name_, void* handle,
590730
Wide<const float>(dsdy), Wide<const float>(dtdy),
591731
outputs);
592732

733+
// Correct our st texture space gradients into xy-space gradients
734+
if (resultHasDerivs || alphaHasDerivs) {
735+
transformWideTextureGradients(outputs, Wide<const float>(dsdx),
736+
Wide<const float>(dtdx),
737+
Wide<const float>(dsdy),
738+
Wide<const float>(dtdy));
739+
}
740+
593741
OSL_FORCEINLINE_BLOCK
594742
if (outputs.errormessage().valid()) {
595743
Masked<ustring> err(outputs.errormessage());
@@ -637,6 +785,13 @@ __OSL_MASKED_OP(texture3d)(void* bsg_, ustring_pod name_, void* handle,
637785
Wide<const Vec3>(wPdy), Wide<const Vec3>(wPdz),
638786
outputs);
639787

788+
// Correct our P (Vec3) space gradients into xyz-space gradients
789+
if (resultHasDerivs || alphaHasDerivs) {
790+
transformWideTextureGradientsTexture3d(outputs, Wide<const Vec3>(wPdx),
791+
Wide<const Vec3>(wPdy),
792+
Wide<const Vec3>(wPdz));
793+
}
794+
640795
OSL_FORCEINLINE_BLOCK
641796
if (outputs.errormessage().valid()) {
642797
Masked<ustring> err(outputs.errormessage());

0 commit comments

Comments
 (0)