Skip to content

Commit 56f0265

Browse files
authored
Add rtx gpu viewshed and improve cpu viewshed (#588)
* Add rtx gpu viewshed and improve cpu viewshed * Improved xr.DataArray creation
1 parent 12aa294 commit 56f0265

File tree

8 files changed

+606
-78
lines changed

8 files changed

+606
-78
lines changed

setup.py

+3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
'noise >=1.2.2',
4040
],
4141
'examples': examples,
42+
'optional': [
43+
'rtxpy',
44+
],
4245
}
4346

4447
# additional doc dependencies may be needed

xrspatial/gpu_rtx/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from ..utils import has_cuda, has_cupy
2+
3+
4+
try:
5+
from rtxpy import RTX
6+
except ImportError:
7+
RTX = None
8+
9+
10+
def has_rtx():
11+
return has_cupy() and has_cuda() and RTX is not None

xrspatial/gpu_rtx/cuda_utils.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import numba as nb
2+
import numpy as np
3+
4+
5+
@nb.cuda.jit(device=True)
6+
def add(a, b):
7+
return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2])
8+
9+
10+
@nb.cuda.jit(device=True)
11+
def diff(a, b):
12+
return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2])
13+
14+
15+
@nb.cuda.jit(device=True)
16+
def dot(a, b):
17+
return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]
18+
19+
20+
@nb.cuda.jit(device=True)
21+
def float3(a, b, c):
22+
return (np.float32(a), np.float32(b), np.float32(c))
23+
24+
25+
@nb.cuda.jit(device=True)
26+
def invert(a):
27+
return float3(-a[0], -a[1], -a[2])
28+
29+
30+
@nb.cuda.jit(device=True)
31+
def mix(a, b, k):
32+
return add(mul(a, k), mul(b, 1-k))
33+
34+
35+
@nb.cuda.jit(device=True)
36+
def make_float3(a, offset):
37+
return float3(a[offset], a[offset+1], a[offset+2])
38+
39+
40+
@nb.cuda.jit(device=True)
41+
def mul(a, b):
42+
return float3(a[0]*b, a[1]*b, a[2]*b)
43+
44+
45+
@nb.cuda.jit(device=True)
46+
def mult_color(a, b):
47+
return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2])

xrspatial/gpu_rtx/mesh_utils.py

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import numba as nb
2+
import numpy as np
3+
import cupy
4+
5+
6+
@nb.cuda.jit
7+
def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
8+
global_id = stride + nb.cuda.grid(1)
9+
if global_id < W*H:
10+
h = global_id // W
11+
w = global_id % W
12+
mesh_map_index = h * W + w
13+
14+
val = data[h, -w]
15+
16+
offset = 3*mesh_map_index
17+
verts[offset] = w
18+
verts[offset+1] = h
19+
verts[offset+2] = val * scale
20+
21+
if w != W - 1 and h != H - 1:
22+
offset = 6*(h * (W-1) + w)
23+
triangles[offset+0] = np.int32(mesh_map_index + W)
24+
triangles[offset+1] = np.int32(mesh_map_index + W + 1)
25+
triangles[offset+2] = np.int32(mesh_map_index)
26+
triangles[offset+3] = np.int32(mesh_map_index + W + 1)
27+
triangles[offset+4] = np.int32(mesh_map_index + 1)
28+
triangles[offset+5] = np.int32(mesh_map_index)
29+
30+
31+
@nb.njit(parallel=True)
32+
def triangulate_cpu(verts, triangles, data, H, W, scale):
33+
for h in nb.prange(H):
34+
for w in range(W):
35+
mesh_map_index = h * W + w
36+
37+
val = data[h, w]
38+
39+
offset = 3*mesh_map_index
40+
verts[offset] = w
41+
verts[offset+1] = h
42+
verts[offset+2] = val * scale
43+
44+
if w != W - 1 and h != H - 1:
45+
offset = 6*(h*(W-1) + w)
46+
triangles[offset+0] = np.int32(mesh_map_index + W)
47+
triangles[offset+1] = np.int32(mesh_map_index + W+1)
48+
triangles[offset+2] = np.int32(mesh_map_index)
49+
triangles[offset+3] = np.int32(mesh_map_index + W+1)
50+
triangles[offset+4] = np.int32(mesh_map_index + 1)
51+
triangles[offset+5] = np.int32(mesh_map_index)
52+
53+
54+
def triangulate_terrain(verts, triangles, terrain, scale=1):
55+
H, W = terrain.shape
56+
if isinstance(terrain.data, np.ndarray):
57+
triangulate_cpu(verts, triangles, terrain.data, H, W, scale)
58+
if isinstance(terrain.data, cupy.ndarray):
59+
job_size = H*W
60+
blockdim = 1024
61+
griddim = (job_size + blockdim - 1) // 1024
62+
d = 100
63+
offset = 0
64+
while job_size > 0:
65+
batch = min(d, griddim)
66+
_triangulate_terrain_kernel[batch, blockdim](
67+
verts, triangles, terrain.data, H, W, scale, offset)
68+
offset += batch*blockdim
69+
job_size -= batch*blockdim
70+
return 0
71+
72+
73+
@nb.jit(nopython=True)
74+
def _fill_contents(content, verts, triangles, num_tris):
75+
v = np.empty(12, np.float32)
76+
pad = np.zeros(2, np.int8)
77+
offset = 0
78+
for i in range(num_tris):
79+
t0 = triangles[3*i+0]
80+
t1 = triangles[3*i+1]
81+
t2 = triangles[3*i+2]
82+
v[3*0+0] = 0
83+
v[3*0+1] = 0
84+
v[3*0+2] = 0
85+
v[3*1+0] = verts[3*t0+0]
86+
v[3*1+1] = verts[3*t0+1]
87+
v[3*1+2] = verts[3*t0+2]
88+
v[3*2+0] = verts[3*t1+0]
89+
v[3*2+1] = verts[3*t1+1]
90+
v[3*2+2] = verts[3*t1+2]
91+
v[3*3+0] = verts[3*t2+0]
92+
v[3*3+1] = verts[3*t2+1]
93+
v[3*3+2] = verts[3*t2+2]
94+
95+
offset = 50*i
96+
content[offset:offset+48] = v.view(np.uint8)
97+
content[offset+48:offset+50] = pad
98+
99+
100+
def write(name, verts, triangles):
101+
"""
102+
Save a triangulated raster to a standard STL file.
103+
Windows has a default STL viewer and probably all 3D viewers have native
104+
support for it because of its simplicity. Can be used to verify the
105+
correctness of the algorithm or to visualize the mesh to get a notion of
106+
the size/complexity etc.
107+
@param name - The name of the mesh file we're going to save.
108+
Should end in .stl
109+
@param verts - A numpy array containing all the vertices of the mesh.
110+
Format is 3 float32 per vertex (vertex buffer)
111+
@param triangles - A numpy array containing all the triangles of the mesh.
112+
Format is 3 int32 per triangle (index buffer)
113+
"""
114+
ib = triangles
115+
vb = verts
116+
if isinstance(ib, cupy.ndarray):
117+
ib = cupy.asnumpy(ib)
118+
if isinstance(vb, cupy.ndarray):
119+
vb = cupy.asnumpy(vb)
120+
121+
header = np.zeros(80, np.uint8)
122+
nf = np.empty(1, np.uint32)
123+
num_tris = triangles.shape[0] // 3
124+
nf[0] = num_tris
125+
f = open(name, 'wb')
126+
f.write(header)
127+
f.write(nf)
128+
129+
# size of 1 triangle in STL is 50 bytes
130+
# 12 floats (each 4 bytes) for a total of 48
131+
# And additional 2 bytes for padding
132+
content = np.empty(num_tris*(50), np.uint8)
133+
_fill_contents(content, vb, ib, num_tris)
134+
f.write(content)
135+
f.close()

0 commit comments

Comments
 (0)