|
| 1 | +# |
| 2 | +# Copyright 2023 @royinx |
| 3 | + |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | +# |
| 17 | + |
| 18 | +# Starting from Python 3.8 DLL search policy has changed. |
| 19 | +# We need to add path to CUDA DLLs explicitly. |
| 20 | +import sys |
| 21 | +import os |
| 22 | +from typing import Any |
| 23 | +import PyNvCodec as nvc |
| 24 | +import numpy as np |
| 25 | +import cupy as cp |
| 26 | + |
| 27 | +class cconverter: |
| 28 | + """ |
| 29 | + Colorspace conversion chain. |
| 30 | + """ |
| 31 | + |
| 32 | + def __init__(self, width: int, height: int, gpu_id: int): |
| 33 | + self.gpu_id = gpu_id |
| 34 | + self.w = width |
| 35 | + self.h = height |
| 36 | + self.chain = [] |
| 37 | + |
| 38 | + def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None: |
| 39 | + self.chain.append( |
| 40 | + nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id) |
| 41 | + ) |
| 42 | + |
| 43 | + def run(self, src_surface: nvc.Surface) -> nvc.Surface: |
| 44 | + surf = src_surface |
| 45 | + cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG) |
| 46 | + |
| 47 | + for cvt in self.chain: |
| 48 | + surf = cvt.Execute(surf, cc) |
| 49 | + if surf.Empty(): |
| 50 | + raise RuntimeError("Failed to perform color conversion") |
| 51 | + |
| 52 | + return surf.Clone(self.gpu_id) |
| 53 | + |
| 54 | +class CupyNVC: |
| 55 | + def get_memptr(self, surface: nvc.Surface) -> int: |
| 56 | + return surface.PlanePtr().GpuMem() |
| 57 | + |
| 58 | + def SurfaceToArray(self, surface: nvc.Surface) -> cp.array: |
| 59 | + """ |
| 60 | + Converts surface to cupy unit8 tensor. |
| 61 | +
|
| 62 | + - surface: nvc.Surface |
| 63 | + - return: cp.array (height, width, 3) |
| 64 | + """ |
| 65 | + if surface.Format() != nvc.PixelFormat.RGB: |
| 66 | + raise RuntimeError("Surface shall be of RGB PLANAR format , got {}".format(surface.Format())) |
| 67 | + plane = surface.PlanePtr() |
| 68 | + # cuPy array zero copy non ownned |
| 69 | + height, width, pitch = (plane.Height(), plane.Width(), plane.Pitch()) |
| 70 | + cupy_mem = cp.cuda.UnownedMemory(self.get_memptr(surface), height * width * 1, surface) |
| 71 | + cupy_memptr = cp.cuda.MemoryPointer(cupy_mem, 0) |
| 72 | + cupy_frame = cp.ndarray((height, width // 3, 3), cp.uint8, cupy_memptr, strides=(pitch, 3, 1)) # RGB |
| 73 | + |
| 74 | + return cupy_frame |
| 75 | + |
| 76 | + def _memcpy(self, surface: nvc.Surface, img_array: cp.array) -> None: |
| 77 | + cp.cuda.runtime.memcpy2DAsync(self.get_memptr(surface), |
| 78 | + surface.Pitch(), |
| 79 | + img_array.data.ptr, |
| 80 | + surface.Width(), |
| 81 | + surface.Width(), |
| 82 | + surface.Height()*3, |
| 83 | + cp.cuda.runtime.memcpyDeviceToDevice, |
| 84 | + 0) # null_stream.ptr: 0 |
| 85 | + return |
| 86 | + |
| 87 | + def ArrayToSurface(self, img_array: cp.array, gpu_id: int) -> nvc.Surface: |
| 88 | + """ |
| 89 | + Converts cupy ndarray to rgb surface. |
| 90 | + - surface: cp.array |
| 91 | + - return: nvc.Surface |
| 92 | + """ |
| 93 | + img_array = img_array.astype(cp.uint8) |
| 94 | + img_array = cp.transpose(img_array, (2,0,1)) # HWC to CHW |
| 95 | + img_array = cp.ascontiguousarray(img_array) |
| 96 | + _ ,tensor_h , tensor_w= img_array.shape |
| 97 | + surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id) |
| 98 | + self._memcpy(surface, img_array) |
| 99 | + return surface |
| 100 | + |
| 101 | +def grayscale(img_array: cp.array) -> cp.array: |
| 102 | + img_array = cp.matmul(img_array, cp.array([0.299, 0.587, 0.114]).T) |
| 103 | + img_array = cp.expand_dims(img_array, axis=-1) |
| 104 | + img_array = cp.tile(img_array, (1,1,3)) # view as 3 channel image (packed RGB: HWC) |
| 105 | + return img_array |
| 106 | + |
| 107 | +def contrast_boost(img_array: cp.array) -> cp.array: |
| 108 | + """ |
| 109 | + histogram equalization |
| 110 | + """ |
| 111 | + channel_min = cp.quantile(img_array, 0.05, axis=(0,1)) |
| 112 | + channel_max = cp.quantile(img_array, 0.95, axis=(0,1)) |
| 113 | + img_array = img_array.astype(cp.float32) |
| 114 | + for c, (cmin, cmax) in enumerate(zip(channel_min, channel_max)): |
| 115 | + img_array[c] = cp.clip(img_array[c], cmin, cmax) |
| 116 | + img_array = img_array- channel_min.reshape(1,1,-1) |
| 117 | + img_array /= (channel_max - channel_min).reshape(1,1,-1) |
| 118 | + img_array = cp.multiply(img_array, 255.0) |
| 119 | + return img_array |
| 120 | + |
| 121 | +def main(gpu_id: int, encFilePath: str, dstFilePath: str): |
| 122 | + dstFile = open(dstFilePath, "wb") |
| 123 | + nvDec = nvc.PyNvDecoder(encFilePath, gpu_id) |
| 124 | + cpnvc = CupyNVC() |
| 125 | + |
| 126 | + w = nvDec.Width() |
| 127 | + h = nvDec.Height() |
| 128 | + res = str(w) + "x" + str(h) |
| 129 | + nvEnc = nvc.PyNvEncoder( |
| 130 | + {"preset": "P4", "codec": "h264", "s": res, "bitrate": "10M"}, gpu_id |
| 131 | + ) |
| 132 | + |
| 133 | + # Surface converters |
| 134 | + to_rgb = cconverter(w, h, gpu_id) |
| 135 | + to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420) |
| 136 | + to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB) |
| 137 | + |
| 138 | + to_nv12 = cconverter(w, h, gpu_id) |
| 139 | + to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB) |
| 140 | + to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420) |
| 141 | + to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12) |
| 142 | + |
| 143 | + # Encoded video frame |
| 144 | + encFrame = np.ndarray(shape=(0), dtype=np.uint8) |
| 145 | + while True: |
| 146 | + # Decode NV12 surface |
| 147 | + src_surface = nvDec.DecodeSingleSurface() |
| 148 | + if src_surface.Empty(): |
| 149 | + break |
| 150 | + |
| 151 | + # Convert to packed RGB: HWC , planar CHW |
| 152 | + rgb_sur = to_rgb.run(src_surface) |
| 153 | + if rgb_sur.Empty(): |
| 154 | + break |
| 155 | + |
| 156 | + # PROCESS YOUR TENSOR HERE. |
| 157 | + # THIS DUMMY PROCESSING JUST ADDS RANDOM ROTATION. |
| 158 | + src_array = cpnvc.SurfaceToArray(rgb_sur) |
| 159 | + dst_array = contrast_boost(src_array) |
| 160 | + dst_array = grayscale(dst_array) |
| 161 | + surface_rgb = cpnvc.ArrayToSurface(dst_array, gpu_id) |
| 162 | + |
| 163 | + # Convert back to NV12 |
| 164 | + dst_surface = to_nv12.run(surface_rgb) |
| 165 | + if src_surface.Empty(): |
| 166 | + break |
| 167 | + |
| 168 | + # Encode |
| 169 | + success = nvEnc.EncodeSingleSurface(dst_surface, encFrame) |
| 170 | + if success: |
| 171 | + byteArray = bytearray(encFrame) |
| 172 | + dstFile.write(byteArray) |
| 173 | + |
| 174 | + # Encoder is asynchronous, so we need to flush it |
| 175 | + while True: |
| 176 | + success = nvEnc.FlushSinglePacket(encFrame) |
| 177 | + if success: |
| 178 | + byteArray = bytearray(encFrame) |
| 179 | + dstFile.write(byteArray) |
| 180 | + else: |
| 181 | + break |
| 182 | + |
| 183 | + |
| 184 | +if __name__ == "__main__": |
| 185 | + |
| 186 | + |
| 187 | + if len(sys.argv) < 4: |
| 188 | + print("This sample transcode and process with pytorch an input video on given GPU.") |
| 189 | + print("Provide gpu ID, path to input and output files") |
| 190 | + print("Usage: SamplePyTorch.py $gpu_id $input_file $output_file.") |
| 191 | + print("Example: \npython3 samples/SampleCupy.py 0 tests/test.mp4 tests/dec_test.mp4") |
| 192 | + exit(1) |
| 193 | + |
| 194 | + gpu_id = int(sys.argv[1]) |
| 195 | + encFilePath = sys.argv[2] |
| 196 | + decFilePath = sys.argv[3] |
| 197 | + main(gpu_id, encFilePath, decFilePath) |
0 commit comments