1
+ #include < iostream>
2
+ #include < stdio.h>
3
+ #include < opencv2/opencv.hpp>
4
+ #include < cuda_runtime.h>
5
+
6
+
7
+ __global__ void norm (const uchar* srcData, float * tgtData, const int h, const int w)
8
+ {
9
+ /*
10
+ mean = [0.485, 0.456, 0.406]
11
+ std = [0.229, 0.224, 0.225]
12
+ (img / 255. - mean) / std
13
+ */
14
+ int ix = threadIdx .x + blockIdx .x * blockDim .x ;
15
+ int iy = threadIdx .y + blockIdx .y * blockDim .y ;
16
+ int idx = ix + iy * w;
17
+ int idx3 = idx * 3 ;
18
+
19
+ if (ix < w && iy < h)
20
+ {
21
+ tgtData[idx3] = ((float )srcData[idx3] / 255.0 - 0.406 ) / 0.225 ; // B pixel
22
+ tgtData[idx3 + 1 ] = ((float )srcData[idx3 + 1 ] / 255.0 - 0.456 ) / 0.224 ; // G pixel
23
+ tgtData[idx3 + 2 ] = ((float )srcData[idx3 + 2 ] / 255.0 - 0.485 ) / 0.229 ; // R pixel
24
+ }
25
+ }
26
+
27
+
28
+ void normalize (const std::string& imagePath)
29
+ {
30
+ cv::Mat img = cv::imread (imagePath, cv::IMREAD_COLOR);
31
+ int w = img.cols ;
32
+ int h = img.rows ;
33
+ printf (" Image width is %d, height is %d\n " , w, h);
34
+ int wh = w * h;
35
+ int elements = wh * 3 ;
36
+ // target
37
+ float outputData[elements];
38
+
39
+ // target on device
40
+ float * tgtDevData;
41
+ cudaMalloc ((void **)&tgtDevData, sizeof (float ) * elements);
42
+ // source on device
43
+ uchar* srcDevData;
44
+ cudaMalloc ((void **)&srcDevData, sizeof (uchar) * elements);
45
+ cudaMemcpy (srcDevData, img.data , sizeof (uchar) * elements, cudaMemcpyHostToDevice);
46
+
47
+ dim3 blockSize (32 , 32 );
48
+ dim3 gridSize ((w + blockSize.x - 1 ) / blockSize.x , (h + blockSize.y - 1 ) / blockSize.y );
49
+ printf (" Block(%d, %d),Grid(%d, %d).\n " , blockSize.x , blockSize.y , gridSize.x , gridSize.y );
50
+
51
+ norm<<<gridSize, blockSize>>> (srcDevData, tgtDevData, h, w);
52
+ // cudaDeviceSynchronize();
53
+
54
+ cudaMemcpy (outputData, tgtDevData, sizeof (float ) * elements, cudaMemcpyDeviceToHost);
55
+
56
+ // print part of pixel for comparing
57
+ for (int i = 0 ; i < 8 ; i++)
58
+ {
59
+ for (int j = 0 ; j < 8 ; j++)
60
+ {
61
+ std::cout << (int )img.data [(i * w + j) * 3 ] << " ," ; // B src
62
+ std::cout << outputData[(i * w + j) * 3 ] << " " ; // B tgt
63
+ std::cout << (int )img.data [(i * w + j) * 3 + 1 ] << " ," ; // G src
64
+ std::cout << outputData[(i * w + j) * 3 + 1 ] << " " ; // G tgt
65
+ std::cout << (int )img.data [(i * w + j) * 3 + 2 ] << " ," ; // R src
66
+ std::cout << outputData[(i * w + j) * 3 + 2 ] << std::endl; // R tgt
67
+ }
68
+ }
69
+
70
+ cudaFree (tgtDevData);
71
+ cudaFree (srcDevData);
72
+ }
73
+
74
+
75
+ int main (int argc, char *argv[])
76
+ {
77
+ if (argc != 2 ) {
78
+ printf (" This program need 1 argument\n " );
79
+ printf (" Usage: ./normal [image path]\n " );
80
+ printf (" Example: ./normal lena.jpg\n " );
81
+ return 1 ;
82
+ }
83
+
84
+ std::string imagePath (argv[1 ]);
85
+ normalize (imagePath);
86
+
87
+ return 0 ;
88
+ }
0 commit comments