Skip to content
This repository was archived by the owner on Jun 10, 2024. It is now read-only.

Commit 82b51e7

Browse files
authored
feat: support cupy gpu pointer (#514)
* feat: add cupy encoder * feat: support cupy pointer * feat: support add cupy example
1 parent a3362e3 commit 82b51e7

File tree

1 file changed

+197
-0
lines changed

1 file changed

+197
-0
lines changed

samples/SampleCupy.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
#
2+
# Copyright 2023 @royinx
3+
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Starting from Python 3.8 DLL search policy has changed.
19+
# We need to add path to CUDA DLLs explicitly.
20+
import sys
21+
import os
22+
from typing import Any
23+
import PyNvCodec as nvc
24+
import numpy as np
25+
import cupy as cp
26+
27+
class cconverter:
28+
"""
29+
Colorspace conversion chain.
30+
"""
31+
32+
def __init__(self, width: int, height: int, gpu_id: int):
33+
self.gpu_id = gpu_id
34+
self.w = width
35+
self.h = height
36+
self.chain = []
37+
38+
def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None:
39+
self.chain.append(
40+
nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id)
41+
)
42+
43+
def run(self, src_surface: nvc.Surface) -> nvc.Surface:
44+
surf = src_surface
45+
cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)
46+
47+
for cvt in self.chain:
48+
surf = cvt.Execute(surf, cc)
49+
if surf.Empty():
50+
raise RuntimeError("Failed to perform color conversion")
51+
52+
return surf.Clone(self.gpu_id)
53+
54+
class CupyNVC:
55+
def get_memptr(self, surface: nvc.Surface) -> int:
56+
return surface.PlanePtr().GpuMem()
57+
58+
def SurfaceToArray(self, surface: nvc.Surface) -> cp.array:
59+
"""
60+
Converts surface to cupy unit8 tensor.
61+
62+
- surface: nvc.Surface
63+
- return: cp.array (height, width, 3)
64+
"""
65+
if surface.Format() != nvc.PixelFormat.RGB:
66+
raise RuntimeError("Surface shall be of RGB PLANAR format , got {}".format(surface.Format()))
67+
plane = surface.PlanePtr()
68+
# cuPy array zero copy non ownned
69+
height, width, pitch = (plane.Height(), plane.Width(), plane.Pitch())
70+
cupy_mem = cp.cuda.UnownedMemory(self.get_memptr(surface), height * width * 1, surface)
71+
cupy_memptr = cp.cuda.MemoryPointer(cupy_mem, 0)
72+
cupy_frame = cp.ndarray((height, width // 3, 3), cp.uint8, cupy_memptr, strides=(pitch, 3, 1)) # RGB
73+
74+
return cupy_frame
75+
76+
def _memcpy(self, surface: nvc.Surface, img_array: cp.array) -> None:
77+
cp.cuda.runtime.memcpy2DAsync(self.get_memptr(surface),
78+
surface.Pitch(),
79+
img_array.data.ptr,
80+
surface.Width(),
81+
surface.Width(),
82+
surface.Height()*3,
83+
cp.cuda.runtime.memcpyDeviceToDevice,
84+
0) # null_stream.ptr: 0
85+
return
86+
87+
def ArrayToSurface(self, img_array: cp.array, gpu_id: int) -> nvc.Surface:
88+
"""
89+
Converts cupy ndarray to rgb surface.
90+
- surface: cp.array
91+
- return: nvc.Surface
92+
"""
93+
img_array = img_array.astype(cp.uint8)
94+
img_array = cp.transpose(img_array, (2,0,1)) # HWC to CHW
95+
img_array = cp.ascontiguousarray(img_array)
96+
_ ,tensor_h , tensor_w= img_array.shape
97+
surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id)
98+
self._memcpy(surface, img_array)
99+
return surface
100+
101+
def grayscale(img_array: cp.array) -> cp.array:
102+
img_array = cp.matmul(img_array, cp.array([0.299, 0.587, 0.114]).T)
103+
img_array = cp.expand_dims(img_array, axis=-1)
104+
img_array = cp.tile(img_array, (1,1,3)) # view as 3 channel image (packed RGB: HWC)
105+
return img_array
106+
107+
def contrast_boost(img_array: cp.array) -> cp.array:
108+
"""
109+
histogram equalization
110+
"""
111+
channel_min = cp.quantile(img_array, 0.05, axis=(0,1))
112+
channel_max = cp.quantile(img_array, 0.95, axis=(0,1))
113+
img_array = img_array.astype(cp.float32)
114+
for c, (cmin, cmax) in enumerate(zip(channel_min, channel_max)):
115+
img_array[c] = cp.clip(img_array[c], cmin, cmax)
116+
img_array = img_array- channel_min.reshape(1,1,-1)
117+
img_array /= (channel_max - channel_min).reshape(1,1,-1)
118+
img_array = cp.multiply(img_array, 255.0)
119+
return img_array
120+
121+
def main(gpu_id: int, encFilePath: str, dstFilePath: str):
122+
dstFile = open(dstFilePath, "wb")
123+
nvDec = nvc.PyNvDecoder(encFilePath, gpu_id)
124+
cpnvc = CupyNVC()
125+
126+
w = nvDec.Width()
127+
h = nvDec.Height()
128+
res = str(w) + "x" + str(h)
129+
nvEnc = nvc.PyNvEncoder(
130+
{"preset": "P4", "codec": "h264", "s": res, "bitrate": "10M"}, gpu_id
131+
)
132+
133+
# Surface converters
134+
to_rgb = cconverter(w, h, gpu_id)
135+
to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420)
136+
to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB)
137+
138+
to_nv12 = cconverter(w, h, gpu_id)
139+
to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB)
140+
to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420)
141+
to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12)
142+
143+
# Encoded video frame
144+
encFrame = np.ndarray(shape=(0), dtype=np.uint8)
145+
while True:
146+
# Decode NV12 surface
147+
src_surface = nvDec.DecodeSingleSurface()
148+
if src_surface.Empty():
149+
break
150+
151+
# Convert to packed RGB: HWC , planar CHW
152+
rgb_sur = to_rgb.run(src_surface)
153+
if rgb_sur.Empty():
154+
break
155+
156+
# PROCESS YOUR TENSOR HERE.
157+
# THIS DUMMY PROCESSING JUST ADDS RANDOM ROTATION.
158+
src_array = cpnvc.SurfaceToArray(rgb_sur)
159+
dst_array = contrast_boost(src_array)
160+
dst_array = grayscale(dst_array)
161+
surface_rgb = cpnvc.ArrayToSurface(dst_array, gpu_id)
162+
163+
# Convert back to NV12
164+
dst_surface = to_nv12.run(surface_rgb)
165+
if src_surface.Empty():
166+
break
167+
168+
# Encode
169+
success = nvEnc.EncodeSingleSurface(dst_surface, encFrame)
170+
if success:
171+
byteArray = bytearray(encFrame)
172+
dstFile.write(byteArray)
173+
174+
# Encoder is asynchronous, so we need to flush it
175+
while True:
176+
success = nvEnc.FlushSinglePacket(encFrame)
177+
if success:
178+
byteArray = bytearray(encFrame)
179+
dstFile.write(byteArray)
180+
else:
181+
break
182+
183+
184+
if __name__ == "__main__":
185+
186+
187+
if len(sys.argv) < 4:
188+
print("This sample transcode and process with pytorch an input video on given GPU.")
189+
print("Provide gpu ID, path to input and output files")
190+
print("Usage: SamplePyTorch.py $gpu_id $input_file $output_file.")
191+
print("Example: \npython3 samples/SampleCupy.py 0 tests/test.mp4 tests/dec_test.mp4")
192+
exit(1)
193+
194+
gpu_id = int(sys.argv[1])
195+
encFilePath = sys.argv[2]
196+
decFilePath = sys.argv[3]
197+
main(gpu_id, encFilePath, decFilePath)

0 commit comments

Comments
 (0)