Skip to content

Commit 44bccf7

Browse files
authored
Add files via upload
1 parent 1b7e3d4 commit 44bccf7

File tree

1 file changed

+88
-0
lines changed

1 file changed

+88
-0
lines changed

normalize/normal.cu

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#include <iostream>
2+
#include <stdio.h>
3+
#include <opencv2/opencv.hpp>
4+
#include <cuda_runtime.h>
5+
6+
7+
__global__ void norm(const uchar* srcData, float* tgtData, const int h, const int w)
8+
{
9+
/*
10+
mean = [0.485, 0.456, 0.406]
11+
std = [0.229, 0.224, 0.225]
12+
(img / 255. - mean) / std
13+
*/
14+
int ix = threadIdx.x + blockIdx.x * blockDim.x;
15+
int iy = threadIdx.y + blockIdx.y * blockDim.y;
16+
int idx = ix + iy * w;
17+
int idx3 = idx * 3;
18+
19+
if (ix < w && iy < h)
20+
{
21+
tgtData[idx3] = ((float)srcData[idx3] / 255.0 - 0.406) / 0.225; // B pixel
22+
tgtData[idx3 + 1] = ((float)srcData[idx3 + 1] / 255.0 - 0.456) / 0.224; // G pixel
23+
tgtData[idx3 + 2] = ((float)srcData[idx3 + 2] / 255.0 - 0.485) / 0.229; // R pixel
24+
}
25+
}
26+
27+
28+
void normalize(const std::string& imagePath)
29+
{
30+
cv::Mat img = cv::imread(imagePath, cv::IMREAD_COLOR);
31+
int w = img.cols;
32+
int h = img.rows;
33+
printf("Image width is %d, height is %d\n", w, h);
34+
int wh = w * h;
35+
int elements = wh * 3;
36+
// target
37+
float outputData[elements];
38+
39+
// target on device
40+
float* tgtDevData;
41+
cudaMalloc((void**)&tgtDevData, sizeof(float) * elements);
42+
// source on device
43+
uchar* srcDevData;
44+
cudaMalloc((void**)&srcDevData, sizeof(uchar) * elements);
45+
cudaMemcpy(srcDevData, img.data, sizeof(uchar) * elements, cudaMemcpyHostToDevice);
46+
47+
dim3 blockSize(32, 32);
48+
dim3 gridSize((w + blockSize.x - 1) / blockSize.x, (h + blockSize.y - 1) / blockSize.y);
49+
printf("Block(%d, %d),Grid(%d, %d).\n", blockSize.x, blockSize.y, gridSize.x, gridSize.y);
50+
51+
norm<<<gridSize, blockSize>>>(srcDevData, tgtDevData, h, w);
52+
// cudaDeviceSynchronize();
53+
54+
cudaMemcpy(outputData, tgtDevData, sizeof(float) * elements, cudaMemcpyDeviceToHost);
55+
56+
// print part of pixel for comparing
57+
for (int i = 0; i < 8; i++)
58+
{
59+
for (int j = 0; j < 8; j++)
60+
{
61+
std::cout << (int)img.data[(i * w + j) * 3] << ","; // B src
62+
std::cout << outputData[(i * w + j) * 3] << " "; // B tgt
63+
std::cout << (int)img.data[(i * w + j) * 3 + 1] << ","; // G src
64+
std::cout << outputData[(i * w + j) * 3 + 1] << " "; // G tgt
65+
std::cout << (int)img.data[(i * w + j) * 3 + 2] << ","; // R src
66+
std::cout << outputData[(i * w + j) * 3 + 2] << std::endl; // R tgt
67+
}
68+
}
69+
70+
cudaFree(tgtDevData);
71+
cudaFree(srcDevData);
72+
}
73+
74+
75+
int main(int argc, char *argv[])
76+
{
77+
if (argc != 2) {
78+
printf("This program need 1 argument\n");
79+
printf("Usage: ./normal [image path]\n");
80+
printf("Example: ./normal lena.jpg\n");
81+
return 1;
82+
}
83+
84+
std::string imagePath(argv[1]);
85+
normalize(imagePath);
86+
87+
return 0;
88+
}

0 commit comments

Comments
 (0)