Skip to content
This repository was archived by the owner on Jun 27, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions Problem Sets/Problem Set 1/student_func.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
// Color to Greyscale Conversion

//A common way to represent color images is known as RGBA - the color
//is specified by how much Red, Grean and Blue is in it.
//The 'A' stands for Alpha and is used for transparency, it will be
//is specified by how much Red, Green, and Blue is in it.
//The 'A' stands for Alpha and is used for transparency; it will be
//ignored in this homework.

//Each channel Red, Blue, Green and Alpha is represented by one byte.
//Each channel Red, Blue, Green, and Alpha is represented by one byte.
//Since we are using one byte for each color there are 256 different
//possible values for each color. This means we use 4 bytes per pixel.

Expand All @@ -32,6 +32,7 @@
//so that the entire image is processed.

#include "utils.h"
#include <stdio.h>

__global__
void rgba_to_greyscale(const uchar4* const rgbaImage,
Expand All @@ -48,19 +49,32 @@ void rgba_to_greyscale(const uchar4* const rgbaImage,
//Note: We will be ignoring the alpha channel for this conversion

//First create a mapping from the 2D block and grid locations
//to an absolute 2D location in the image, then use that to
//to an absolute 2D location in the image, they use that to
//calculate a 1D offset
int y = threadIdx.y+ blockIdx.y* blockDim.y;
int x = threadIdx.x+ blockIdx.x* blockDim.x;
if (y < numCols && x < numRows) {
int index = numRows*y +x;
uchar4 color = rgbaImage[index];
unsigned char grey = (unsigned char)(0.299f*color.x+ 0.587f*color.y + 0.114f*color.z);
greyImage[index] = grey;
}
}

void your_rgba_to_greyscale(const uchar4 * const h_rgbaImage, uchar4 * const d_rgbaImage,
unsigned char* const d_greyImage, size_t numRows, size_t numCols)
{
//You must fill in the correct sizes for the blockSize and gridSize
//currently only one block with one thread is being launched
const dim3 blockSize(1, 1, 1); //TODO
const dim3 gridSize( 1, 1, 1); //TODO

int blockWidth = 32;

const dim3 blockSize(blockWidth, blockWidth, 1);
int blocksX = numRows/blockWidth+1;
int blocksY = numCols/blockWidth+1; //TODO
const dim3 gridSize( blocksX, blocksY, 1); //TODO
rgba_to_greyscale<<<gridSize, blockSize>>>(d_rgbaImage, d_greyImage, numRows, numCols);

cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

}

95 changes: 77 additions & 18 deletions Problem Sets/Problem Set 2/student_func.cu
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <stdio.h>
// Homework 2
// Image Blurring
//
Expand Down Expand Up @@ -102,6 +103,7 @@

#include "utils.h"

#include <stdio.h>
__global__
void gaussian_blur(const unsigned char* const inputChannel,
unsigned char* const outputChannel,
Expand All @@ -117,11 +119,33 @@ void gaussian_blur(const unsigned char* const inputChannel,
// the image. You'll want code that performs the following check before accessing
// GPU memory:
//
// if ( absolute_image_position_x >= numCols ||
// absolute_image_position_y >= numRows )
// {
// return;
// }


const int2 p = make_int2( blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y);
const int m = p.y * numCols + p.x;

if(p.x >= numCols || p.y >= numRows)
return;

float color = 0.0f;

for(int f_y = 0; f_y < filterWidth; f_y++) {
for(int f_x = 0; f_x < filterWidth; f_x++) {

int c_x = p.x + f_x - filterWidth/2;
int c_y = p.y + f_y - filterWidth/2;
c_x = min(max(c_x, 0), numCols - 1);
c_y = min(max(c_y, 0), numRows - 1);
float filter_value = filter[f_y*filterWidth + f_x];
color += filter_value*static_cast<float>(inputChannel[c_y*numCols + c_x]);

}
}

outputChannel[m] = color;



// NOTE: If a thread's absolute position 2D position is within the image, but some of
// its neighbors are outside the image, then you will need to be extra careful. Instead
Expand All @@ -147,11 +171,16 @@ void separateChannels(const uchar4* const inputImageRGBA,
// the image. You'll want code that performs the following check before accessing
// GPU memory:
//
// if ( absolute_image_position_x >= numCols ||
// absolute_image_position_y >= numRows )
// {
// return;
// }

const int2 p = make_int2( blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y);
const int m = p.y * numCols + p.x;

if(p.x >= numCols || p.y >= numRows)
return;
redChannel[m] = inputImageRGBA[m].x;
greenChannel[m] = inputImageRGBA[m].y;
blueChannel[m] = inputImageRGBA[m].z;
}

//This kernel takes in three color channels and recombines them
Expand Down Expand Up @@ -205,11 +234,12 @@ void allocateMemoryAndCopyToGPU(const size_t numRowsImage, const size_t numColsI
//be sure to use checkCudaErrors like the above examples to
//be able to tell if anything goes wrong
//IMPORTANT: Notice that we pass a pointer to a pointer to cudaMalloc

checkCudaErrors(cudaMalloc(&d_filter, sizeof( float) * filterWidth * filterWidth));
//TODO:
//Copy the filter on the host (h_filter) to the memory you just allocated
//on the GPU. cudaMemcpy(dst, src, numBytes, cudaMemcpyHostToDevice);
//Remember to use checkCudaErrors!
checkCudaErrors(cudaMemcpy(d_filter, h_filter, sizeof(float) * filterWidth * filterWidth, cudaMemcpyHostToDevice));

}

Expand All @@ -221,21 +251,50 @@ void your_gaussian_blur(const uchar4 * const h_inputImageRGBA, uchar4 * const d_
const int filterWidth)
{
//TODO: Set reasonable block size (i.e., number of threads per block)
const dim3 blockSize;
const dim3 blockSize(32, 32);

//TODO:
//Compute correct grid size (i.e., number of blocks per kernel launch)
//from the image size and and block size.
const dim3 gridSize;
const dim3 gridSize(numCols/blockSize.x + 1, numRows/blockSize.y + 1);


//TODO: Launch a kernel for separating the RGBA image into different color channels

// Call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after
// launching your kernel to make sure that you didn't make any mistakes.
separateChannels<<<gridSize, blockSize>>>(d_inputImageRGBA,
numRows,
numCols,
d_red,
d_green,
d_blue);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());


//TODO: Call your convolution kernel here 3 times, once for each color channel.

gaussian_blur<<<gridSize, blockSize>>>(
d_red,
d_redBlurred,
numRows,
numCols,
d_filter,
filterWidth);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

gaussian_blur<<<gridSize, blockSize>>>(
d_blue,
d_blueBlurred,
numRows,
numCols,
d_filter,
filterWidth);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

gaussian_blur<<<gridSize, blockSize>>>(
d_green,
d_greenBlurred,
numRows,
numCols,
d_filter,
filterWidth);
// Again, call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after
// launching your kernel to make sure that you didn't make any mistakes.
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
Expand All @@ -251,7 +310,6 @@ void your_gaussian_blur(const uchar4 * const h_inputImageRGBA, uchar4 * const d_
numRows,
numCols);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

}


Expand All @@ -262,3 +320,4 @@ void cleanup() {
checkCudaErrors(cudaFree(d_green));
checkCudaErrors(cudaFree(d_blue));
}

Loading