3
3
#include " log.h"
4
4
#include " var.h"
5
5
#include " op.h"
6
+ #include " eval.h"
6
7
#include < string.h>
7
8
#include < memory>
8
9
#include < atomic>
@@ -463,143 +464,106 @@ void jitc_cuda_tex_memcpy_t2d(size_t ndim, const size_t *shape,
463
464
src_texture);
464
465
}
465
466
466
- void jitc_cuda_tex_lookup (size_t ndim, const void *texture_handle,
467
- const uint32_t *pos, uint32_t *out) {
468
- if (ndim < 1 || ndim > 3 )
469
- jitc_raise (" jit_cuda_tex_lookup(): invalid texture dimension!" );
470
-
467
+ Variable jitc_cuda_tex_check (size_t ndim, const uint32_t *pos) {
471
468
// Validate input types, determine size of the operation
472
469
uint32_t size = 0 ;
470
+ bool dirty = false , placeholder = false ;
471
+ JitBackend backend = JitBackend::Invalid;
472
+
473
+ if (ndim < 1 || ndim > 3 )
474
+ jitc_raise (" jit_cuda_tex_check(): invalid texture dimension!" );
475
+
473
476
for (size_t i = 0 ; i < ndim; ++i) {
474
477
const Variable *v = jitc_var (pos[i]);
475
478
if ((VarType) v->type != VarType::Float32 )
476
- jitc_raise (" jit_cuda_tex_lookup (): type mismatch for arg. %zu (got "
479
+ jitc_raise (" jit_cuda_tex_check (): type mismatch for arg. %zu (got "
477
480
" %s, expected %s)" , i, type_name[v->type ],
478
481
type_name[(int ) VarType::Float32 ]);
479
482
size = std::max (size, v->size );
483
+ dirty |= v->is_dirty ();
484
+ placeholder |= (bool ) v->placeholder ;
485
+ backend = (JitBackend) v->backend ;
480
486
}
481
487
482
- DrJitCudaTexture &texture = *((DrJitCudaTexture *) texture_handle);
483
-
484
- for (size_t tex = 0 ; tex < texture.n_textures ; ++tex) {
485
- uint32_t dep[2 ] = {
486
- texture.indices [tex],
487
- pos[0 ]
488
- };
489
-
490
- if (ndim >= 2 ) {
491
- const char *stmt_1[2 ] = {
492
- " .reg.v2.f32 $r0$n"
493
- " mov.v2.f32 $r0, { $r1, $r2 }" ,
494
- " .reg.v4.f32 $r0$n"
495
- " mov.v4.f32 $r0, { $r1, $r2, $r3, $r3 }"
496
- };
497
- dep[1 ] = jitc_var_stmt (JitBackend::CUDA, VarType::Void,
498
- stmt_1[ndim - 2 ], 1 , (unsigned int ) ndim,
499
- pos);
500
- } else {
501
- jitc_var_inc_ref (dep[1 ]);
488
+ if (dirty) {
489
+ jitc_eval (thread_state (backend));
490
+ for (size_t i = 0 ; i < ndim; ++i) {
491
+ if (jitc_var (pos[i])->is_dirty ())
492
+ jitc_fail (" jit_cuda_tex_check(): operand r%u remains dirty "
493
+ " following evaluation!" , pos[i]);
502
494
}
495
+ }
503
496
504
- const char *stmt_2[3 ] = {
505
- " .reg.v4.f32 $r0$n"
506
- " tex.1d.v4.f32.f32 $r0, [$r1, {$r2}]" ,
507
-
508
- " .reg.v4.f32 $r0$n"
509
- " tex.2d.v4.f32.f32 $r0, [$r1, $r2]" ,
510
-
511
- " .reg.v4.f32 $r0$n"
512
- " tex.3d.v4.f32.f32 $r0, [$r1, $r2]"
513
- };
514
-
515
- uint32_t lookup = jitc_var_stmt (JitBackend::CUDA, VarType::Void,
516
- stmt_2[ndim - 1 ], 1 , 2 , dep);
517
- jitc_var_dec_ref (dep[1 ]);
518
-
519
- const char *stmt_3[4 ] = {
520
- " mov.f32 $r0, $r1.r" ,
521
- " mov.f32 $r0, $r1.g" ,
522
- " mov.f32 $r0, $r1.b" ,
523
- " mov.f32 $r0, $r1.a"
524
- };
497
+ Variable v;
498
+ v.size = size;
499
+ v.backend = (uint32_t ) backend;
500
+ v.placeholder = placeholder;
501
+ v.type = (uint32_t ) VarType::Float32 ;
502
+ return v;
503
+ }
525
504
526
- for (size_t ch = 0 ; ch < texture.channels (tex); ++ch) {
527
- uint32_t lookup_result_index = jitc_var_stmt (
528
- JitBackend::CUDA, VarType::Float32 , stmt_3[ch], 1 , 1 , &lookup);
529
- out[tex * 4 + ch] = lookup_result_index;
505
+ void jitc_cuda_tex_lookup (size_t ndim, const void *texture_handle,
506
+ const uint32_t *pos, uint32_t *out) {
507
+ DrJitCudaTexture &tex = *((DrJitCudaTexture *) texture_handle);
508
+ Variable v = jitc_cuda_tex_check (ndim, pos);
509
+
510
+ for (size_t ti = 0 ; ti < tex.n_textures ; ++ti) {
511
+ // Perform a fetch per texture ..
512
+ v.kind = VarKind::TexLookup;
513
+ v.literal = 0 ;
514
+ memset (v.dep , 0 , sizeof (v.dep ));
515
+ v.dep [0 ] = tex.indices [ti];
516
+ jitc_var_inc_ref (tex.indices [ti]);
517
+ for (size_t j = 0 ; j < ndim; ++j) {
518
+ v.dep [j + 1 ] = pos[j];
519
+ jitc_var_inc_ref (pos[j]);
520
+ }
521
+ Ref tex_load = steal (jitc_var_new (v));
522
+
523
+ // .. and then extract components
524
+ v.kind = VarKind::TexExtract;
525
+ memset (v.dep , 0 , sizeof (v.dep ));
526
+ for (size_t ch = 0 ; ch < tex.channels (ti); ++ch) {
527
+ v.literal = (uint64_t ) ch;
528
+ v.dep [0 ] = tex_load;
529
+ jitc_var_inc_ref (tex_load);
530
+ *out++ = jitc_var_new (v);
530
531
}
531
-
532
- jitc_var_dec_ref (lookup);
533
532
}
534
533
}
535
534
536
535
void jitc_cuda_tex_bilerp_fetch (size_t ndim, const void *texture_handle,
537
536
const uint32_t *pos, uint32_t *out) {
538
537
if (ndim != 2 )
539
- jitc_raise (" jitc_cuda_tex_bilerp_fetch(): invalid texture dimension, "
540
- " only 2D textures are supported!" );
541
-
542
- // Validate input types, determine size of the operation
543
- uint32_t size = 0 ;
544
- for (size_t i = 0 ; i < ndim; ++i) {
545
- const Variable *v = jitc_var (pos[i]);
546
- if ((VarType) v->type != VarType::Float32 )
547
- jitc_raise (" jitc_cuda_tex_bilerp_fetch(): type mismatch for arg. "
548
- " %zu (got %s, expected %s)" ,
549
- i, type_name[v->type ],
550
- type_name[(int ) VarType::Float32 ]);
551
- size = std::max (size, v->size );
552
- }
553
-
554
- DrJitCudaTexture &texture = *((DrJitCudaTexture *) texture_handle);
555
-
556
- for (size_t tex = 0 ; tex < texture.n_textures ; ++tex) {
557
- uint32_t dep[2 ] = {
558
- texture.indices [tex],
559
- pos[0 ]
560
- };
561
-
562
- const char *stmt_1 = " .reg.v2.f32 $r0$n"
563
- " mov.v2.f32 $r0, { $r1, $r2 }" ;
564
- dep[1 ] = jitc_var_stmt (JitBackend::CUDA, VarType::Void, stmt_1, 1 ,
565
- (unsigned int ) ndim, pos);
566
-
567
- const char *stmt_2[4 ] = {
568
- " .reg.v4.f32 $r0$n"
569
- " tld4.r.2d.v4.f32.f32 $r0, [$r1, $r2]" ,
570
-
571
- " .reg.v4.f32 $r0$n"
572
- " tld4.g.2d.v4.f32.f32 $r0, [$r1, $r2]" ,
573
-
574
- " .reg.v4.f32 $r0$n"
575
- " tld4.b.2d.v4.f32.f32 $r0, [$r1, $r2]" ,
576
-
577
- " .reg.v4.f32 $r0$n"
578
- " tld4.a.2d.v4.f32.f32 $r0, [$r1, $r2]"
579
- };
580
-
581
- const char *stmt_3[4 ] = {
582
- " mov.f32 $r0, $r1.x" ,
583
- " mov.f32 $r0, $r1.y" ,
584
- " mov.f32 $r0, $r1.z" ,
585
- " mov.f32 $r0, $r1.w"
586
- };
587
-
588
- for (size_t ch = 0 ; ch < texture.channels (tex); ++ch) {
589
- uint32_t fetch_channel = jitc_var_stmt (
590
- JitBackend::CUDA, VarType::Void, stmt_2[ch], 1 , 2 , dep);
591
-
592
- for (size_t i = 0 ; i < 4 ; ++i) {
593
- uint32_t result_index =
594
- jitc_var_stmt (JitBackend::CUDA, VarType::Float32 ,
595
- stmt_3[i], 1 , 1 , &fetch_channel);
596
- out[(i * texture.n_channels ) + (tex * 4 + ch)] = result_index;
538
+ jitc_raise (" jitc_cuda_tex_bilerp_fetch(): only 2D textures are supported!" );
539
+
540
+ DrJitCudaTexture &tex = *((DrJitCudaTexture *) texture_handle);
541
+ Variable v = jitc_cuda_tex_check (ndim, pos);
542
+
543
+ for (size_t ti = 0 ; ti < tex.n_textures ; ++ti) {
544
+ for (size_t ch = 0 ; ch < tex.channels (ti); ++ch) {
545
+ // Perform a fetch per texture and channel..
546
+ v.kind = VarKind::TexFetchBilerp;
547
+ v.literal = ch;
548
+ memset (v.dep , 0 , sizeof (v.dep ));
549
+ v.dep [0 ] = tex.indices [ti];
550
+ jitc_var_inc_ref (tex.indices [ti]);
551
+ for (size_t j = 0 ; j < ndim; ++j) {
552
+ v.dep [j + 1 ] = pos[j];
553
+ jitc_var_inc_ref (pos[j]);
554
+ }
555
+ Ref tex_load = steal (jitc_var_new (v));
556
+
557
+ memset (v.dep , 0 , sizeof (v.dep ));
558
+ v.kind = VarKind::TexExtract;
559
+ for (uint32_t j = 0 ; j < 4 ; ++j) {
560
+ // .. and then extract components
561
+ v.literal = (uint64_t ) j;
562
+ v.dep [0 ] = tex_load;
563
+ jitc_var_inc_ref (tex_load);
564
+ *out++ = jitc_var_new (v);
597
565
}
598
-
599
- jitc_var_dec_ref (fetch_channel);
600
566
}
601
-
602
- jitc_var_dec_ref (dep[1 ]);
603
567
}
604
568
}
605
569
@@ -612,11 +576,10 @@ void jitc_cuda_tex_destroy(void *texture_handle) {
612
576
613
577
DrJitCudaTexture *texture = (DrJitCudaTexture *) texture_handle;
614
578
615
- // The `texture` struct can potentially be deleted when decreasing the
616
- // reference count of the individual textures. We must hoist the number of
617
- // textures out of the loop condition.
579
+ /* The `texture` struct can potentially be deleted when decreasing the
580
+ reference count of the individual textures. We must hoist the number
581
+ of textures out of the loop condition. */
618
582
const size_t n_textures = texture->n_textures ;
619
- for (size_t tex = 0 ; tex < n_textures; ++tex) {
583
+ for (size_t tex = 0 ; tex < n_textures; ++tex)
620
584
jitc_var_dec_ref (texture->indices [tex]);
621
- }
622
585
}
0 commit comments