diff --git a/PROJ3_WIN/565Rasterizer.sdf b/PROJ3_WIN/565Rasterizer.sdf new file mode 100644 index 0000000..b693313 Binary files /dev/null and b/PROJ3_WIN/565Rasterizer.sdf differ diff --git a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj index 1077f39..a072658 100755 --- a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj +++ b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj @@ -86,6 +86,7 @@ $(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\include;C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.0/C/common/inc;../shared/glew/includes;../shared/freeglut/includes + compute_20,sm_20 @@ -111,6 +112,7 @@ $(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\include;C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.0/C/common/inc;../shared/glew/includes;../shared/freeglut/includes + compute_20,sm_20 diff --git a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj.gundeep.nvuser b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj.gundeep.nvuser new file mode 100644 index 0000000..2fa0bb0 --- /dev/null +++ b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj.gundeep.nvuser @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/PROJ3_WIN/Readme Rasterizer.pdf b/PROJ3_WIN/Readme Rasterizer.pdf new file mode 100644 index 0000000..b53db11 Binary files /dev/null and b/PROJ3_WIN/Readme Rasterizer.pdf differ diff --git a/objs/cube.obj b/objs/cube.obj new file mode 100644 index 0000000..f696427 --- /dev/null +++ b/objs/cube.obj @@ -0,0 +1,34 @@ +# cube.obj +# + +g cube + +v 0.0 0.0 0.0 +v 0.0 0.0 1.0 +v 0.0 1.0 0.0 +v 0.0 1.0 1.0 +v 1.0 0.0 0.0 +v 1.0 0.0 1.0 +v 1.0 1.0 0.0 +v 1.0 1.0 1.0 + +vn 0.0 0.0 1.0 +vn 0.0 0.0 -1.0 +vn 0.0 1.0 0.0 +vn 0.0 -1.0 0.0 +vn 1.0 0.0 0.0 +vn -1.0 0.0 0.0 + +f 1//2 7//2 5//2 +f 1//2 3//2 7//2 +f 1//6 4//6 3//6 +f 1//6 2//6 4//6 +f 3//3 8//3 7//3 +f 3//3 4//3 8//3 +f 5//5 7//5 8//5 +f 5//5 8//5 6//5 +f 1//4 5//4 6//4 +f 1//4 6//4 2//4 +f 2//1 6//1 8//1 +f 2//1 8//1 4//1 + diff --git a/objs/tri.obj b/objs/tri.obj new file mode 100644 index 0000000..5085e34 --- /dev/null +++ b/objs/tri.obj @@ -0,0 +1,23 @@ +# cube.obj +# + +g cube + +v 0.0 0.0 0.0 +v 0.0 0.0 1.0 +v 0.0 1.0 0.0 +v 0.0 1.0 1.0 +v 1.0 0.0 0.0 +v 1.0 0.0 1.0 +v 1.0 1.0 0.0 +v 1.0 1.0 1.0 + +vn 0.0 0.0 1.0 +vn 0.0 0.0 -1.0 +vn 0.0 1.0 0.0 +vn 0.0 -1.0 0.0 +vn 1.0 0.0 0.0 +vn -1.0 0.0 0.0 + +f 2//1 8//1 4//1 + diff --git a/src/ObjCore/obj.cpp b/src/ObjCore/obj.cpp index e748574..a7f6e6a 100755 --- a/src/ObjCore/obj.cpp +++ b/src/ObjCore/obj.cpp @@ -9,7 +9,7 @@ using namespace std; -obj::obj(){ +obj::obj(){ // constructor vbosize = 0; nbosize = 0; cbosize = 0; @@ -22,7 +22,7 @@ obj::obj(){ } -obj::~obj(){ +obj::~obj(){ //destructor /*delete vbo; delete nbo; delete cbo; @@ -37,7 +37,7 @@ obj::~obj(){ void obj::buildVBOs(){ recenter(); - vector VBOvec; + vector VBOvec; // vertex buffer object vector NBOvec; vector IBOvec; int index = 0; diff --git a/src/main.cpp b/src/main.cpp index dfb689a..47e77bf 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,354 +1,372 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#include "main.h" - -//------------------------------- -//-------------MAIN-------------- -//------------------------------- - -int main(int argc, char** argv){ - - bool loadedScene = false; - for(int i=1; ibuildVBOs(); - delete loader; - loadedScene = true; - } - } - - if(!loadedScene){ - cout << "Usage: mesh=[obj file]" << endl; - return 0; - } - - frame = 0; - seconds = time (NULL); - fpstracker = 0; - - // Launch CUDA/GL - #ifdef __APPLE__ - // Needed in OSX to force use of OpenGL3.2 - glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3); - glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2); - glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); - glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); - init(); - #else - init(argc, argv); - #endif - - initCuda(); - - initVAO(); - initTextures(); - - GLuint passthroughProgram; - passthroughProgram = initShader("shaders/passthroughVS.glsl", "shaders/passthroughFS.glsl"); - - glUseProgram(passthroughProgram); - glActiveTexture(GL_TEXTURE0); - - #ifdef __APPLE__ - // send into GLFW main loop - while(1){ - display(); - if (glfwGetKey(GLFW_KEY_ESC) == GLFW_PRESS || !glfwGetWindowParam( GLFW_OPENED )){ - kernelCleanup(); - cudaDeviceReset(); - exit(0); - } - } - - glfwTerminate(); - #else - glutDisplayFunc(display); - glutKeyboardFunc(keyboard); - - glutMainLoop(); - #endif - kernelCleanup(); - return 0; -} - -//------------------------------- -//---------RUNTIME STUFF--------- -//------------------------------- - -void runCuda(){ - // Map OpenGL buffer object for writing from CUDA on a single GPU - // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer - dptr=NULL; - - vbo = mesh->getVBO(); - vbosize = mesh->getVBOsize(); - - float newcbo[] = {0.0, 1.0, 0.0, - 0.0, 0.0, 1.0, - 1.0, 0.0, 0.0}; - cbo = newcbo; - cbosize = 9; - - ibo = mesh->getIBO(); - ibosize = mesh->getIBOsize(); - - cudaGLMapBufferObject((void**)&dptr, pbo); - cudaRasterizeCore(dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize); - cudaGLUnmapBufferObject(pbo); - - vbo = NULL; - cbo = NULL; - ibo = NULL; - - frame++; - fpstracker++; - -} - -#ifdef __APPLE__ - - void display(){ - runCuda(); - time_t seconds2 = time (NULL); - - if(seconds2-seconds >= 1){ - - fps = fpstracker/(seconds2-seconds); - fpstracker = 0; - seconds = seconds2; - - } - - string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; - - glfwSetWindowTitle(title.c_str()); - - - glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); - glBindTexture(GL_TEXTURE_2D, displayImage); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, - GL_RGBA, GL_UNSIGNED_BYTE, NULL); - - - glClear(GL_COLOR_BUFFER_BIT); - - // VAO, shader program, and texture already bound - glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); - - glfwSwapBuffers(); - } - -#else - - void display(){ - runCuda(); - time_t seconds2 = time (NULL); - - if(seconds2-seconds >= 1){ - - fps = fpstracker/(seconds2-seconds); - fpstracker = 0; - seconds = seconds2; - - } - - string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; - glutSetWindowTitle(title.c_str()); - - glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); - glBindTexture(GL_TEXTURE_2D, displayImage); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, - GL_RGBA, GL_UNSIGNED_BYTE, NULL); - - glClear(GL_COLOR_BUFFER_BIT); - - // VAO, shader program, and texture already bound - glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); - - glutPostRedisplay(); - glutSwapBuffers(); - } - - void keyboard(unsigned char key, int x, int y) - { - switch (key) - { - case(27): - shut_down(1); - break; - } - } - -#endif - -//------------------------------- -//----------SETUP STUFF---------- -//------------------------------- - -#ifdef __APPLE__ - void init(){ - - if (glfwInit() != GL_TRUE){ - shut_down(1); - } - - // 16 bit color, no depth, alpha or stencil buffers, windowed - if (glfwOpenWindow(width, height, 5, 6, 5, 0, 0, 0, GLFW_WINDOW) != GL_TRUE){ - shut_down(1); - } - - // Set up vertex array object, texture stuff - initVAO(); - initTextures(); - } -#else - void init(int argc, char* argv[]){ - glutInit(&argc, argv); - glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); - glutInitWindowSize(width, height); - glutCreateWindow("CIS565 Rasterizer"); - - // Init GLEW - glewInit(); - GLenum err = glewInit(); - if (GLEW_OK != err) - { - /* Problem: glewInit failed, something is seriously wrong. */ - std::cout << "glewInit failed, aborting." << std::endl; - exit (1); - } - - initVAO(); - initTextures(); - } -#endif - -void initPBO(GLuint* pbo){ - if (pbo) { - // set up vertex data parameter - int num_texels = width*height; - int num_values = num_texels * 4; - int size_tex_data = sizeof(GLubyte) * num_values; - - // Generate a buffer ID called a PBO (Pixel Buffer Object) - glGenBuffers(1,pbo); - // Make this the current UNPACK buffer (OpenGL is state-based) - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo); - // Allocate data for the buffer. 4-channel 8-bit image - glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY); - cudaGLRegisterBufferObject( *pbo ); - } -} - -void initCuda(){ - // Use device with highest Gflops/s - cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); - - initPBO(&pbo); - - // Clean up on program exit - atexit(cleanupCuda); - - runCuda(); -} - -void initTextures(){ - glGenTextures(1,&displayImage); - glBindTexture(GL_TEXTURE_2D, displayImage); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, - GL_UNSIGNED_BYTE, NULL); -} - -void initVAO(void){ - GLfloat vertices[] = - { - -1.0f, -1.0f, - 1.0f, -1.0f, - 1.0f, 1.0f, - -1.0f, 1.0f, - }; - - GLfloat texcoords[] = - { - 1.0f, 1.0f, - 0.0f, 1.0f, - 0.0f, 0.0f, - 1.0f, 0.0f - }; - - GLushort indices[] = { 0, 1, 3, 3, 1, 2 }; - - GLuint vertexBufferObjID[3]; - glGenBuffers(3, vertexBufferObjID); - - glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]); - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(positionLocation); - - glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]); - glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords, GL_STATIC_DRAW); - glVertexAttribPointer((GLuint)texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(texcoordsLocation); - - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); -} - -GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath){ - GLuint program = glslUtility::createProgram(vertexShaderPath, fragmentShaderPath, attributeLocations, 2); - GLint location; - - glUseProgram(program); - - if ((location = glGetUniformLocation(program, "u_image")) != -1) - { - glUniform1i(location, 0); - } - - return program; -} - -//------------------------------- -//---------CLEANUP STUFF--------- -//------------------------------- - -void cleanupCuda(){ - if(pbo) deletePBO(&pbo); - if(displayImage) deleteTexture(&displayImage); -} - -void deletePBO(GLuint* pbo){ - if (pbo) { - // unregister this buffer object with CUDA - cudaGLUnregisterBufferObject(*pbo); - - glBindBuffer(GL_ARRAY_BUFFER, *pbo); - glDeleteBuffers(1, pbo); - - *pbo = (GLuint)NULL; - } -} - -void deleteTexture(GLuint* tex){ - glDeleteTextures(1, tex); - *tex = (GLuint)NULL; -} - -void shut_down(int return_code){ - kernelCleanup(); - cudaDeviceReset(); - #ifdef __APPLE__ - glfwTerminate(); - #endif - exit(return_code); -} +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#include "main.h" + +//------------------------------- +//-------------MAIN-------------- +//------------------------------- + +int main(int argc, char** argv){ + + bool loadedScene = false; + for(int i=1; ibuildVBOs(); + + delete loader; + loadedScene = true; + } + } + + if(!loadedScene){ + cout << "Usage: mesh=[obj file]" << endl; + return 0; + } + + frame = 0; + seconds = time (NULL); + fpstracker = 0; + + // Launch CUDA/GL + #ifdef __APPLE__ + // Needed in OSX to force use of OpenGL3.2 + glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3); + glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2); + glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); + glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); + init(); + #else + init(argc, argv); + #endif + + initCuda(); + + initVAO(); + initTextures(); + + GLuint passthroughProgram; + passthroughProgram = initShader("shaders/passthroughVS.glsl", "shaders/passthroughFS.glsl"); + + glUseProgram(passthroughProgram); + glActiveTexture(GL_TEXTURE0); + + #ifdef __APPLE__ + // send into GLFW main loop + while(1){ + display(); + if (glfwGetKey(GLFW_KEY_ESC) == GLFW_PRESS || !glfwGetWindowParam( GLFW_OPENED )){ + kernelCleanup(); + cudaDeviceReset(); + exit(0); + } + } + + glfwTerminate(); + #else + glutDisplayFunc(display); + glutKeyboardFunc(keyboard); + + glutMainLoop(); + #endif + kernelCleanup(); + return 0; +} + +//------------------------------- +//---------RUNTIME STUFF--------- +//------------------------------- + +void runCuda(){ + // Map OpenGL buffer object for writing from CUDA on a single GPU + // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer + dptr=NULL; + + vbo = mesh->getVBO(); + vbosize = mesh->getVBOsize(); + + nbo = mesh->getNBO(); + nbosize= mesh->getNBOsize(); + + /*for( int i=0; i<52238;i++) + { + printf("vbo %i= %f \n", i,vbo[i]); + } + printf("nbosize %i",nbosize); + int y; + cin>>y;*/ + + float newcbo[] = {0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + 1.0, 0.0, 0.0}; + cbo = newcbo; + cbosize = 9; + + ibo = mesh->getIBO(); + ibosize = mesh->getIBOsize(); + + + glm::vec3 lightcol=glm::vec3(1.0,1.0,1.0); + glm::vec3 lightpos=glm::vec3(0,10.0,0.0); + + cudaGLMapBufferObject((void**)&dptr, pbo); + cudaRasterizeCore(dptr, glm::vec2(width, height),frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbosize,nbo,lightpos,lightcol); + cudaGLUnmapBufferObject(pbo); + + vbo = NULL; + cbo = NULL; + ibo = NULL; + nbo = NULL; + + frame++; + fpstracker++; + int x; + // cin>>x; +} + +#ifdef __APPLE__ + + void display(){ + runCuda(); + time_t seconds2 = time (NULL); + + if(seconds2-seconds >= 1){ + + fps = fpstracker/(seconds2-seconds); + fpstracker = 0; + seconds = seconds2; + + } + + string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; + + glfwSetWindowTitle(title.c_str()); + + + glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); + glBindTexture(GL_TEXTURE_2D, displayImage); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + + glClear(GL_COLOR_BUFFER_BIT); + + // VAO, shader program, and texture already bound + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); + + glfwSwapBuffers(); + } + +#else + + void display(){ + runCuda(); + time_t seconds2 = time (NULL); + + if(seconds2-seconds >= 1){ + + fps = fpstracker/(seconds2-seconds); + fpstracker = 0; + seconds = seconds2; + + } + + string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS"; + glutSetWindowTitle(title.c_str()); + + glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo); + glBindTexture(GL_TEXTURE_2D, displayImage); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, + GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + glClear(GL_COLOR_BUFFER_BIT); + + // VAO, shader program, and texture already bound + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); + + glutPostRedisplay(); + glutSwapBuffers(); + } + + void keyboard(unsigned char key, int x, int y) + { + switch (key) + { + case(27): + shut_down(1); + break; + } + } + +#endif + +//------------------------------- +//----------SETUP STUFF---------- +//------------------------------- + +#ifdef __APPLE__ + void init(){ + + if (glfwInit() != GL_TRUE){ + shut_down(1); + } + + // 16 bit color, no depth, alpha or stencil buffers, windowed + if (glfwOpenWindow(width, height, 5, 6, 5, 0, 0, 0, GLFW_WINDOW) != GL_TRUE){ + shut_down(1); + } + + // Set up vertex array object, texture stuff + initVAO(); + initTextures(); + } +#else + void init(int argc, char* argv[]){ + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); + glutInitWindowSize(width, height); + glutCreateWindow("CIS565 Rasterizer"); + + // Init GLEW + glewInit(); + GLenum err = glewInit(); + if (GLEW_OK != err) + { + /* Problem: glewInit failed, something is seriously wrong. */ + std::cout << "glewInit failed, aborting." << std::endl; + exit (1); + } + + initVAO(); + initTextures(); + } +#endif + +void initPBO(GLuint* pbo){ + if (pbo) { + // set up vertex data parameter + int num_texels = width*height; + int num_values = num_texels * 4; + int size_tex_data = sizeof(GLubyte) * num_values; + + // Generate a buffer ID called a PBO (Pixel Buffer Object) + glGenBuffers(1,pbo); + // Make this the current UNPACK buffer (OpenGL is state-based) + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo); + // Allocate data for the buffer. 4-channel 8-bit image + glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY); + cudaGLRegisterBufferObject( *pbo ); + } +} + +void initCuda(){ + // Use device with highest Gflops/s + cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); + + initPBO(&pbo); + + // Clean up on program exit + atexit(cleanupCuda); + + runCuda(); +} + +void initTextures(){ + glGenTextures(1,&displayImage); + glBindTexture(GL_TEXTURE_2D, displayImage); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, + GL_UNSIGNED_BYTE, NULL); +} + +void initVAO(void){ + GLfloat vertices[] = + { + -1.0f, -1.0f, + 1.0f, -1.0f, + 1.0f, 1.0f, + -1.0f, 1.0f, + }; + + GLfloat texcoords[] = + { + 1.0f, 1.0f, + 0.0f, 1.0f, + 0.0f, 0.0f, + 1.0f, 0.0f + }; + + GLushort indices[] = { 0, 1, 3, 3, 1, 2 }; + + GLuint vertexBufferObjID[3]; + glGenBuffers(3, vertexBufferObjID); + + glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); + glVertexAttribPointer((GLuint)positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); + glEnableVertexAttribArray(positionLocation); + + glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]); + glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords, GL_STATIC_DRAW); + glVertexAttribPointer((GLuint)texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0); + glEnableVertexAttribArray(texcoordsLocation); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); +} + +GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath){ + GLuint program = glslUtility::createProgram(vertexShaderPath, fragmentShaderPath, attributeLocations, 2); + GLint location; + + glUseProgram(program); + + if ((location = glGetUniformLocation(program, "u_image")) != -1) + { + glUniform1i(location, 0); + } + + return program; +} + +//------------------------------- +//---------CLEANUP STUFF--------- +//------------------------------- + +void cleanupCuda(){ + if(pbo) deletePBO(&pbo); + if(displayImage) deleteTexture(&displayImage); +} + +void deletePBO(GLuint* pbo){ + if (pbo) { + // unregister this buffer object with CUDA + cudaGLUnregisterBufferObject(*pbo); + + glBindBuffer(GL_ARRAY_BUFFER, *pbo); + glDeleteBuffers(1, pbo); + + *pbo = (GLuint)NULL; + } +} + +void deleteTexture(GLuint* tex){ + glDeleteTextures(1, tex); + *tex = (GLuint)NULL; +} + +void shut_down(int return_code){ + kernelCleanup(); + cudaDeviceReset(); + #ifdef __APPLE__ + glfwTerminate(); + #endif + exit(return_code); +} diff --git a/src/main.h b/src/main.h index 63bf0fa..127d938 100755 --- a/src/main.h +++ b/src/main.h @@ -1,105 +1,107 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#ifndef MAIN_H -#define MAIN_H - -#ifdef __APPLE__ - #include -#else - #include - #include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "glslUtility.h" -#include "glm/glm.hpp" -#include "rasterizeKernels.h" -#include "utilities.h" -#include "ObjCore/objloader.h" - -using namespace std; - -//------------------------------- -//------------GL STUFF----------- -//------------------------------- -int frame; -int fpstracker; -double seconds; -int fps = 0; -GLuint positionLocation = 0; -GLuint texcoordsLocation = 1; -const char *attributeLocations[] = { "Position", "Tex" }; -GLuint pbo = (GLuint)NULL; -GLuint displayImage; -uchar4 *dptr; - -obj* mesh; - -float* vbo; -int vbosize; -float* cbo; -int cbosize; -int* ibo; -int ibosize; - -//------------------------------- -//----------CUDA STUFF----------- -//------------------------------- - -int width=800; int height=800; - -//------------------------------- -//-------------MAIN-------------- -//------------------------------- - -int main(int argc, char** argv); - -//------------------------------- -//---------RUNTIME STUFF--------- -//------------------------------- - -void runCuda(); - -#ifdef __APPLE__ - void display(); -#else - void display(); - void keyboard(unsigned char key, int x, int y); -#endif - -//------------------------------- -//----------SETUP STUFF---------- -//------------------------------- - -#ifdef __APPLE__ - void init(); -#else - void init(int argc, char* argv[]); -#endif - -void initPBO(GLuint* pbo); -void initCuda(); -void initTextures(); -void initVAO(); -GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath); - -//------------------------------- -//---------CLEANUP STUFF--------- -//------------------------------- - -void cleanupCuda(); -void deletePBO(GLuint* pbo); -void deleteTexture(GLuint* tex); -void shut_down(int return_code); - +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#ifndef MAIN_H +#define MAIN_H + +#ifdef __APPLE__ + #include +#else + #include + #include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "glslUtility.h" +#include "glm/glm.hpp" +#include "rasterizeKernels.h" +#include "utilities.h" +#include "ObjCore/objloader.h" + +using namespace std; + +//------------------------------- +//------------GL STUFF----------- +//------------------------------- +int frame; +int fpstracker; +double seconds; +int fps = 0; +GLuint positionLocation = 0; +GLuint texcoordsLocation = 1; +const char *attributeLocations[] = { "Position", "Tex" }; +GLuint pbo = (GLuint)NULL; +GLuint displayImage; +uchar4 *dptr; + +obj* mesh; + +float* vbo; +float* nbo; +int nbosize; +int vbosize; +float* cbo; +int cbosize; +int* ibo; +int ibosize; + +//------------------------------- +//----------CUDA STUFF----------- +//------------------------------- + +int width=800; int height=800; + +//------------------------------- +//-------------MAIN-------------- +//------------------------------- + +int main(int argc, char** argv); + +//------------------------------- +//---------RUNTIME STUFF--------- +//------------------------------- + +void runCuda(); + +#ifdef __APPLE__ + void display(); +#else + void display(); + void keyboard(unsigned char key, int x, int y); +#endif + +//------------------------------- +//----------SETUP STUFF---------- +//------------------------------- + +#ifdef __APPLE__ + void init(); +#else + void init(int argc, char* argv[]); +#endif + +void initPBO(GLuint* pbo); +void initCuda(); +void initTextures(); +void initVAO(); +GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath); + +//------------------------------- +//---------CLEANUP STUFF--------- +//------------------------------- + +void cleanupCuda(); +void deletePBO(GLuint* pbo); +void deleteTexture(GLuint* tex); +void shut_down(int return_code); + #endif \ No newline at end of file diff --git a/src/rasterizeKernels.cu b/src/rasterizeKernels.cu index 826ec80..0b4c988 100755 --- a/src/rasterizeKernels.cu +++ b/src/rasterizeKernels.cu @@ -1,267 +1,627 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#include -#include -#include -#include -#include -#include "rasterizeKernels.h" -#include "rasterizeTools.h" - -glm::vec3* framebuffer; -fragment* depthbuffer; -float* device_vbo; -float* device_cbo; -int* device_ibo; -triangle* primitives; - -void checkCUDAError(const char *msg) { - cudaError_t err = cudaGetLastError(); - if( cudaSuccess != err) { - fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); - exit(EXIT_FAILURE); - } -} - -//Handy dandy little hashing function that provides seeds for random number generation -__host__ __device__ unsigned int hash(unsigned int a){ - a = (a+0x7ed55d16) + (a<<12); - a = (a^0xc761c23c) ^ (a>>19); - a = (a+0x165667b1) + (a<<5); - a = (a+0xd3a2646c) ^ (a<<9); - a = (a+0xfd7046c5) + (a<<3); - a = (a^0xb55a4f09) ^ (a>>16); - return a; -} - -//Writes a given fragment to a fragment buffer at a given location -__host__ __device__ void writeToDepthbuffer(int x, int y, fragment frag, fragment* depthbuffer, glm::vec2 resolution){ - if(x255){ - color.x = 255; - } - - if(color.y>255){ - color.y = 255; - } - - if(color.z>255){ - color.z = 255; - } - - // Each thread writes one pixel location in the texture (textel) - PBOpos[index].w = 0; - PBOpos[index].x = color.x; - PBOpos[index].y = color.y; - PBOpos[index].z = color.z; - } -} - -//TODO: Implement a vertex shader -__global__ void vertexShadeKernel(float* vbo, int vbosize){ - int index = (blockIdx.x * blockDim.x) + threadIdx.x; - if(index>>(resolution, framebuffer, glm::vec3(0,0,0)); - - fragment frag; - frag.color = glm::vec3(0,0,0); - frag.normal = glm::vec3(0,0,0); - frag.position = glm::vec3(0,0,-10000); - clearDepthBuffer<<>>(resolution, depthbuffer,frag); - - //------------------------------ - //memory stuff - //------------------------------ - primitives = NULL; - cudaMalloc((void**)&primitives, (ibosize/3)*sizeof(triangle)); - - device_ibo = NULL; - cudaMalloc((void**)&device_ibo, ibosize*sizeof(int)); - cudaMemcpy( device_ibo, ibo, ibosize*sizeof(int), cudaMemcpyHostToDevice); - - device_vbo = NULL; - cudaMalloc((void**)&device_vbo, vbosize*sizeof(float)); - cudaMemcpy( device_vbo, vbo, vbosize*sizeof(float), cudaMemcpyHostToDevice); - - device_cbo = NULL; - cudaMalloc((void**)&device_cbo, cbosize*sizeof(float)); - cudaMemcpy( device_cbo, cbo, cbosize*sizeof(float), cudaMemcpyHostToDevice); - - tileSize = 32; - int primitiveBlocks = ceil(((float)vbosize/3)/((float)tileSize)); - - //------------------------------ - //vertex shader - //------------------------------ - vertexShadeKernel<<>>(device_vbo, vbosize); - - cudaDeviceSynchronize(); - //------------------------------ - //primitive assembly - //------------------------------ - primitiveBlocks = ceil(((float)ibosize/3)/((float)tileSize)); - primitiveAssemblyKernel<<>>(device_vbo, vbosize, device_cbo, cbosize, device_ibo, ibosize, primitives); - - cudaDeviceSynchronize(); - //------------------------------ - //rasterization - //------------------------------ - rasterizationKernel<<>>(primitives, ibosize/3, depthbuffer, resolution); - - cudaDeviceSynchronize(); - //------------------------------ - //fragment shader - //------------------------------ - fragmentShadeKernel<<>>(depthbuffer, resolution); - - cudaDeviceSynchronize(); - //------------------------------ - //write fragments to framebuffer - //------------------------------ - render<<>>(resolution, depthbuffer, framebuffer); - sendImageToPBO<<>>(PBOpos, resolution, framebuffer); - - cudaDeviceSynchronize(); - - kernelCleanup(); - - checkCUDAError("Kernel failed!"); -} - -void kernelCleanup(){ - cudaFree( primitives ); - cudaFree( device_vbo ); - cudaFree( device_cbo ); - cudaFree( device_ibo ); - cudaFree( framebuffer ); - cudaFree( depthbuffer ); -} - +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania +#include +#include +#include +#include +#include +#include "rasterizeKernels.h" +#include "rasterizeTools.h" +#include "glm\gtc\/matrix_transform.hpp" + +glm::vec3* framebuffer; +fragment* depthbuffer; +int* device_stencil; +float* device_vbo; +float* device_cbo; +int* device_ibo; +float* device_nbo; +triangle* primitives; + +void checkCUDAError(const char *msg) { + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); + exit(EXIT_FAILURE); + } +} + +//Handy dandy little hashing function that provides seeds for random number generation +__host__ __device__ unsigned int hash(unsigned int a){ + a = (a+0x7ed55d16) + (a<<12); + a = (a^0xc761c23c) ^ (a>>19); + a = (a+0x165667b1) + (a<<5); + a = (a+0xd3a2646c) ^ (a<<9); + a = (a+0xfd7046c5) + (a<<3); + a = (a^0xb55a4f09) ^ (a>>16); + return a; +} + +//Writes a given fragment to a fragment buffer at a given location +__host__ __device__ void writeToDepthbuffer(int x, int y, fragment frag, fragment* depthbuffer, glm::vec2 resolution){ + if(x255){ + color.x = 255; + } + + if(color.y>255){ + color.y = 255; + } + + if(color.z>255){ + color.z = 255; + } + + // Each thread writes one pixel location in the texture (textel) + y=resolution.y-y; + x=resolution.x-x; + index=x+(y*resolution.x); + + + PBOpos[index].w = 0; + PBOpos[index].x = color.x; + PBOpos[index].y = color.y; + PBOpos[index].z = color.z; + } +} + +//TODO: Implement a vertex shader +__global__ void vertexShadeKernel(float* vbo, int vbosize , cudaMat4 project){//, float *nbo){ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if(index=resolution.y) + { + top=resolution.y-1; + } + else if (bottom <= 0 ) + { + bottom=0; + } + + int currentpoint=0; + currentpoint=top; + int xmin, xmax; + float xvaluetest1,xvaluetest2,xvaluetest3; + + bool tri=false; + + while(currentpoint!=bottom) + { + xmax=-1000000; + xmin=10000000; + //////// CASE2 + + if( (primitives[index].p1.x - primitives[index].p0.x)!=0) + { + if (primitives[index].p1.y+0.0001 <= primitives[index].p0.y && primitives[index].p1.y-0.0001 >= primitives[index].p0.y) + { + /*if (index == 1797) printf("Here\n");*/ + xmin=min(primitives[index].p1.x,primitives[index].p0.x); + xmax=max(primitives[index].p1.x,primitives[index].p0.x); + } + else + { + if (index == 2181) printf("in else\n"); + slopep0p1= (primitives[index].p1.y - primitives[index].p0.y) / (primitives[index].p1.x - primitives[index].p0.x); + + xvaluetest1=(currentpoint-primitives[index].p1.y)/slopep0p1 + primitives[index].p1.x; + /*if (xvaluetest1>=0 && xvaluetest1= primitives[index].p1.x) + ||(xvaluetest1 >= primitives[index].p0.x && xvaluetest1 <= primitives[index].p1.x)) + { + + /*if (index == 1797) printf( "checking xvaluetest1 \n");*/ + if(xvaluetest1xmax) + { + xmax=xvaluetest1; + } + } + + if (index == 2181) + printf("Xmin,Xmax After P0P1: (%i, %i)\n", xmin, xmax); + //} + } + } + else + { + xmin=min((int)primitives[index].p1.x,xmin); + xmax=max((int)primitives[index].p1.x,xmax); + } + + /////////// CASE2 + + if ((primitives[index].p2.x - primitives[index].p1.x)!=0) + { + if (primitives[index].p2.y == primitives[index].p1.y) + { + xmin=min((int)min(primitives[index].p2.x, primitives[index].p1.x),xmin); + xmax=max((int)max(primitives[index].p2.x, primitives[index].p1.x),xmax); + } + else + { + if (index == 2181) + printf("Xmin,Xmax Before P1P2: (%i, %i)\n", xmin, xmax); + slopep1p2= (primitives[index].p2.y - primitives[index].p1.y) / (primitives[index].p2.x - primitives[index].p1.x); + xvaluetest2=(currentpoint-primitives[index].p2.y)/slopep1p2 + primitives[index].p2.x; + + if (index == 2181) printf("slopep2p1=%f xvaluetest2= %f ,currentpoint= %i \n",slopep1p2,xvaluetest2, currentpoint); + + if ((xvaluetest2 <= primitives[index].p1.x && xvaluetest2 >= primitives[index].p2.x) || + (xvaluetest2 >= primitives[index].p1.x && xvaluetest2 <= primitives[index].p2.x)) + { + if(xvaluetest2>=0 && xvaluetest2<=xmin) + xmin=xvaluetest2; + if(xvaluetest2xmax) + { + xmax=xvaluetest2; + } + } + + if (index == 2181) + printf("Xmin,Xmax After P1P2: (%i, %i)\n", xmin, xmax); + } + } + else + { + xmin=min((int)primitives[index].p1.x,xmin); + xmax=max((int)primitives[index].p1.x,xmax); + } + + + ////////////////////// CASE3 + + if (primitives[index].p0.x - primitives[index].p2.x!=0) + { + if (primitives[index].p0.y == primitives[index].p2.y) + { + xmin=min((int)min(primitives[index].p0.x, primitives[index].p2.x),xmin); + xmax=max((int)max(primitives[index].p0.x, primitives[index].p2.x),xmax); + } + else + { + if (index == 2181) + printf("Xmin,Xmax Before P0P2: (%i, %i) - CurrentPoint- %i\n", xmin, xmax, currentpoint); + + slopep2p0= (primitives[index].p0.y - primitives[index].p2.y) / (primitives[index].p0.x - primitives[index].p2.x); + xvaluetest3=(currentpoint-primitives[index].p0.y)/slopep2p0 + primitives[index].p0.x ; + + if (index == 2181) printf("slopep2p0=%f xvaluetest3= %d \n",slopep2p0,xvaluetest3); + + if (xvaluetest3>=0 && xvaluetest3= primitives[index].p0.x) || + (xvaluetest3 >= primitives[index].p2.x && xvaluetest3 <= primitives[index].p0.x)) + { + if(xvaluetest3>=0 && xvaluetest3xmax) + xmax=xvaluetest3; + } + + } + if (index == 2181) + printf("Xmin,Xmax After P1P2: (%i, %i)\n", xmin, xmax); + } + + } + else + { + xmin=min((int)primitives[index].p2.x,xmin); + xmax=max((int)primitives[index].p2.x,xmax); + } + + glm::vec3 barry = calculateBarycentricCoordinate(primitives[index], glm::vec2(xmin, currentpoint)); + // using (y-y1)/m + x1=xB + // here y is currentpoint + + while(xmin<=xmax) + { + int pixel_index= xmin+currentpoint*resolution.x; + + fragment newfrag; + newfrag.color=barry.x*primitives[index].c0 + barry.y*primitives[index].c1 + barry.z*primitives[index].c2; + newfrag.normal= glm::normalize(barry.x*primitives[index].n0 + barry.y*primitives[index].n1 + barry.z*primitives[index].n2); + newfrag.lock=1; + newfrag.position.x= xmin; + newfrag.position.y= currentpoint; + + //atomic comapre and swap + bool loop=true; + while(loop) + { + if( xmin < resolution.x && xmin>=0 && currentpoint0 ) + { + /*if(xmin==0) + { + printf("index %d \n", index); + } + tri=true;*/ + if( depthbuffer[index].position.z < newfrag.position.z) + { + //if (atomicExch(&(depthbuffer[pixel_index].lock), 1) == 0) + { + depthbuffer[pixel_index]= newfrag; + loop=false; + // atomicExch(&(depthbuffer[pixel_index].lock),0); + } + } + // printf("some %f",depthbuffer[pixel_index].normal.y); + else + { + loop=false; + } + + } + } + xmin++; + } + currentpoint--; + } + + /*if (tri==false) + { + printf("index %d \n", index); + }*/ + } +} + +//TODO: Implement a fragment shader +__global__ void fragmentShadeKernel(fragment* depthbuffer, glm::vec2 resolution, glm::vec3 lightpos, glm::vec3 lightcol, int* device_stencil) + { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + //setting up the stencil + + if(x<=resolution.x && y<=resolution.y) + { + if ( device_stencil[index]==1) + { + + glm::vec3 normal= glm::normalize(depthbuffer[index].normal); + glm::vec3 L=lightpos-depthbuffer[index].position; + float diffuse=glm::clamp((glm::dot(normal,glm::normalize(L)),0.0),0.0,1.0); + + glm::vec3 final_col= diffuse*lightcol * depthbuffer[index].color; + + //depthbuffer[index].color = final_col; + } + } + +} + +//Writes fragment colors to the framebuffer +__global__ void render(glm::vec2 resolution, fragment* depthbuffer, glm::vec3* framebuffer){ + + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + int index = x + (y * resolution.x); + + if(x<=resolution.x && y<=resolution.y){ + framebuffer[index] = depthbuffer[index].color; + } +} + +// Wrapper for the __global__ call that sets up the kernel calls and does a ton of memory management +void cudaRasterizeCore(uchar4* PBOpos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* cbo, int cbosize, int* ibo, int ibosize, int nbosize, float* nbo, glm::vec3 lightpos, glm::vec3 lightcol){ + + //set uf the stencil buffer + device_stencil =NULL; + cudaMalloc((void**)&device_stencil, (int) resolution.x*(int)resolution.y*sizeof(int)); + + int totalpixels= resolution.x*resolution.y; + + int* stencil=new int[totalpixels]; + + + cudaMalloc((void**)&device_stencil, (int) resolution.x*(int)resolution.y*sizeof(int)); + cudaMemcpy( device_stencil, stencil, totalpixels*sizeof(bool), cudaMemcpyHostToDevice); + + + // set up crucial magic + int tileSize = 8; + + dim3 threadsPerBlock(tileSize, tileSize); + dim3 fullBlocksPerGrid((int)ceil(float(resolution.x)/float(tileSize)), (int)ceil(float(resolution.y)/float(tileSize))); + + //set up framebuffer + framebuffer = NULL; + cudaMalloc((void**)&framebuffer, (int)resolution.x*(int)resolution.y*sizeof(glm::vec3)); + + //set up depthbuffer + depthbuffer = NULL; + cudaMalloc((void**)&depthbuffer, (int)resolution.x*(int)resolution.y*sizeof(fragment)); + + + + + + + //kernel launches to black out accumulated/unaccumlated pixel buffers and clear our scattering states + clearImage<<>>(resolution, framebuffer, glm::vec3(0,0,0)); + + fragment frag; + frag.color = glm::vec3(0,0,0); + frag.normal = glm::vec3(0,0,0); + frag.position = glm::vec3(0,0,-10000); + clearDepthBuffer<<>>(resolution, depthbuffer,frag); + + //------------------------------ + //memory stuff + //------------------------------ + primitives = NULL; + cudaMalloc((void**)&primitives, (ibosize/3)*sizeof(triangle)); + + device_ibo = NULL; + cudaMalloc((void**)&device_ibo, ibosize*sizeof(int)); + cudaMemcpy( device_ibo, ibo, ibosize*sizeof(int), cudaMemcpyHostToDevice); + + device_nbo =NULL; + cudaMalloc ((void**)&device_nbo, nbosize*sizeof(float)); + cudaMemcpy(device_nbo, nbo, nbosize*sizeof(float),cudaMemcpyHostToDevice); + + device_vbo = NULL; + cudaMalloc((void**)&device_vbo, vbosize*sizeof(float)); + cudaMemcpy( device_vbo, vbo, vbosize*sizeof(float), cudaMemcpyHostToDevice); + + device_cbo = NULL; + cudaMalloc((void**)&device_cbo, cbosize*sizeof(float)); + cudaMemcpy( device_cbo, cbo, cbosize*sizeof(float), cudaMemcpyHostToDevice); + + tileSize = 32; + int primitiveBlocks = ceil(((float)vbosize/3)/((float)tileSize)); + + //------------------------------ + //vertex shader + //------------------------------ + + + + //setting up camera first + + glm::vec3 CameraPosition = glm::vec3(0.0f, 0.5f, 7.0f); + int width = resolution.x; + int height = resolution.y; + + glm::mat4 projection = glm::perspective(60.0f, static_cast(width) / static_cast(height), 0.1f, 50.0f); + + glm::mat4 camera = glm::lookAt(CameraPosition, glm::vec3(0.0, 0.5, 0), glm::vec3(0, 1, 0)); + + projection = projection * camera; + + cudaMat4 cudaProjection = utilityCore::glmMat4ToCudaMat4(projection); + + glm::mat4 invProjection = glm::inverse(projection); + + cudaMat4 cudaInvProjection = utilityCore::glmMat4ToCudaMat4(invProjection); + + + /* + glm::mat4 projection=glm::perspective(60.0f, static_cast(resolution.x)/ static_cast(resolution.y),0.1f, 30.0f); + glm::vec3 cameraposition= glm::vec3(0,2,10); + glm::mat4 camera= glm::lookAt(cameraposition,glm::vec3(0,0,0),glm::vec3(0,5,0)); + + //projection=projection*camera; + + cudaMat4 project= utilityCore::glmMat4ToCudaMat4(projection); + */ + vertexShadeKernel<<>>(device_vbo, vbosize, cudaProjection); + + + cudaDeviceSynchronize(); + //------------------------------ + //primitive assembly + //------------------------------ + primitiveBlocks = ceil(((float)ibosize/3)/((float)tileSize)); + primitiveAssemblyKernel<<>>(device_vbo, vbosize, device_cbo, cbosize, device_ibo, ibosize, primitives,device_nbo,nbosize); + + cudaDeviceSynchronize(); + //------------------------------ + //rasterization + //------------------------------ + rasterizationKernel<<>>(primitives, ibosize/3, depthbuffer, resolution); + + cudaDeviceSynchronize(); + //------------------------------ + //fragment shader + //------------------------------ + fragmentShadeKernel<<>>(depthbuffer, resolution, lightpos, lightcol, device_stencil); + + cudaDeviceSynchronize(); + //------------------------------ + //write fragments to framebuffer + //------------------------------ + render<<>>(resolution, depthbuffer, framebuffer); + sendImageToPBO<<>>(PBOpos, resolution, framebuffer); + + cudaDeviceSynchronize(); + + kernelCleanup(); + + checkCUDAError("Kernel failed!"); +} + +void kernelCleanup(){ + cudaFree( primitives ); + cudaFree( device_vbo ); + cudaFree( device_cbo ); + cudaFree( device_ibo ); + cudaFree( device_nbo ); + cudaFree( framebuffer ); + cudaFree( depthbuffer ); + cudaFree(device_stencil); +} + diff --git a/src/rasterizeKernels.h b/src/rasterizeKernels.h index bef3653..355e2f1 100755 --- a/src/rasterizeKernels.h +++ b/src/rasterizeKernels.h @@ -1,17 +1,17 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#ifndef RASTERIZEKERNEL_H -#define RASTERIZEKERNEL_H - -#include -#include -#include -#include -#include -#include "glm/glm.hpp" - -void kernelCleanup(); -void cudaRasterizeCore(uchar4* pos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* cbo, int cbosize, int* ibo, int ibosize); - -#endif //RASTERIZEKERNEL_H +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#ifndef RASTERIZEKERNEL_H +#define RASTERIZEKERNEL_H + +#include +#include +#include +#include +#include +#include "glm/glm.hpp" + +void kernelCleanup(); +void cudaRasterizeCore(uchar4* pos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* cbo, int cbosize, int* ibo, int ibosize, int nbosize, float* nbo, glm::vec3 lightpos, glm::vec3 lightcol); + +#endif //RASTERIZEKERNEL_H diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h index e9b5dcc..f2b864f 100755 --- a/src/rasterizeTools.h +++ b/src/rasterizeTools.h @@ -1,78 +1,83 @@ -// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania -// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania - -#ifndef RASTERIZETOOLS_H -#define RASTERIZETOOLS_H - -#include -#include "glm/glm.hpp" -#include "utilities.h" -#include "cudaMat4.h" - -struct triangle { - glm::vec3 p0; - glm::vec3 p1; - glm::vec3 p2; - glm::vec3 c0; - glm::vec3 c1; - glm::vec3 c2; -}; - -struct fragment{ - glm::vec3 color; - glm::vec3 normal; - glm::vec3 position; -}; - -//Multiplies a cudaMat4 matrix and a vec4 -__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){ - glm::vec3 r(1,1,1); - r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*v.w); - r.y = (m.y.x*v.x)+(m.y.y*v.y)+(m.y.z*v.z)+(m.y.w*v.w); - r.z = (m.z.x*v.x)+(m.z.y*v.y)+(m.z.z*v.z)+(m.z.w*v.w); - return r; -} - -//LOOK: finds the axis aligned bounding box for a given triangle -__host__ __device__ void getAABBForTriangle(triangle tri, glm::vec3& minpoint, glm::vec3& maxpoint){ - minpoint = glm::vec3(min(min(tri.p0.x, tri.p1.x),tri.p2.x), - min(min(tri.p0.y, tri.p1.y),tri.p2.y), - min(min(tri.p0.z, tri.p1.z),tri.p2.z)); - maxpoint = glm::vec3(max(max(tri.p0.x, tri.p1.x),tri.p2.x), - max(max(tri.p0.y, tri.p1.y),tri.p2.y), - max(max(tri.p0.z, tri.p1.z),tri.p2.z)); -} - -//LOOK: calculates the signed area of a given triangle -__host__ __device__ float calculateSignedArea(triangle tri){ - return 0.5*((tri.p2.x - tri.p0.x)*(tri.p1.y - tri.p0.y) - (tri.p1.x - tri.p0.x)*(tri.p2.y - tri.p0.y)); -} - -//LOOK: helper function for calculating barycentric coordinates -__host__ __device__ float calculateBarycentricCoordinateValue(glm::vec2 a, glm::vec2 b, glm::vec2 c, triangle tri){ - triangle baryTri; - baryTri.p0 = glm::vec3(a,0); baryTri.p1 = glm::vec3(b,0); baryTri.p2 = glm::vec3(c,0); - return calculateSignedArea(baryTri)/calculateSignedArea(tri); -} - -//LOOK: calculates barycentric coordinates -__host__ __device__ glm::vec3 calculateBarycentricCoordinate(triangle tri, glm::vec2 point){ - float beta = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), point, glm::vec2(tri.p2.x,tri.p2.y), tri); - float gamma = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), glm::vec2(tri.p1.x,tri.p1.y), point, tri); - float alpha = 1.0-beta-gamma; - return glm::vec3(alpha,beta,gamma); -} - -//LOOK: checks if a barycentric coordinate is within the boundaries of a triangle -__host__ __device__ bool isBarycentricCoordInBounds(glm::vec3 barycentricCoord){ - return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 && - barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 && - barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0; -} - -//LOOK: for a given barycentric coordinate, return the corresponding z position on the triangle -__host__ __device__ float getZAtCoordinate(glm::vec3 barycentricCoord, triangle tri){ - return -(barycentricCoord.x*tri.p0.z + barycentricCoord.y*tri.p1.z + barycentricCoord.z*tri.p2.z); -} - +// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania +// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania + +#ifndef RASTERIZETOOLS_H +#define RASTERIZETOOLS_H + +#include +#include "glm/glm.hpp" +#include "utilities.h" +#include "cudaMat4.h" + +struct triangle { + glm::vec3 p0; + glm::vec3 p1; + glm::vec3 p2; + glm::vec3 c0; + glm::vec3 c1; + glm::vec3 c2; + glm::vec3 n1; + glm::vec3 n2; + glm::vec3 n0; +}; + +struct fragment{ + glm::vec3 color; + glm::vec3 normal; + glm::vec3 position; + int lock; +}; + +//Multiplies a cudaMat4 matrix and a vec4 +__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){ + glm::vec3 r(1,1,1); + r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*v.w); + r.y = (m.y.x*v.x)+(m.y.y*v.y)+(m.y.z*v.z)+(m.y.w*v.w); + r.z = (m.z.x*v.x)+(m.z.y*v.y)+(m.z.z*v.z)+(m.z.w*v.w); + return r; +} + + +//LOOK: finds the axis aligned bounding box for a given triangle +__host__ __device__ void getAABBForTriangle(triangle tri, glm::vec3& minpoint, glm::vec3& maxpoint){ + minpoint = glm::vec3(min(min(tri.p0.x, tri.p1.x),tri.p2.x), + min(min(tri.p0.y, tri.p1.y),tri.p2.y), + min(min(tri.p0.z, tri.p1.z),tri.p2.z)); + maxpoint = glm::vec3(max(max(tri.p0.x, tri.p1.x),tri.p2.x), + max(max(tri.p0.y, tri.p1.y),tri.p2.y), + max(max(tri.p0.z, tri.p1.z),tri.p2.z)); +} + +//LOOK: calculates the signed area of a given triangle +__host__ __device__ float calculateSignedArea(triangle tri){ + return 0.5*((tri.p2.x - tri.p0.x)*(tri.p1.y - tri.p0.y) - (tri.p1.x - tri.p0.x)*(tri.p2.y - tri.p0.y)); +} + +//LOOK: helper function for calculating barycentric coordinates +__host__ __device__ float calculateBarycentricCoordinateValue(glm::vec2 a, glm::vec2 b, glm::vec2 c, triangle tri){ + triangle baryTri; + baryTri.p0 = glm::vec3(a,0); baryTri.p1 = glm::vec3(b,0); baryTri.p2 = glm::vec3(c,0); + return calculateSignedArea(baryTri)/calculateSignedArea(tri); +} + +//LOOK: calculates barycentric coordinates +__host__ __device__ glm::vec3 calculateBarycentricCoordinate(triangle tri, glm::vec2 point){ + float beta = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), point, glm::vec2(tri.p2.x,tri.p2.y), tri); + float gamma = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), glm::vec2(tri.p1.x,tri.p1.y), point, tri); + float alpha = 1.0-beta-gamma; + return glm::vec3(alpha,beta,gamma); +} + +//LOOK: checks if a barycentric coordinate is within the boundaries of a triangle +__host__ __device__ bool isBarycentricCoordInBounds(glm::vec3 barycentricCoord){ + return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 && + barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 && + barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0; +} + +//LOOK: for a given barycentric coordinate, return the corresponding z position on the triangle +__host__ __device__ float getZAtCoordinate(glm::vec3 barycentricCoord, triangle tri){ + return -(barycentricCoord.x*tri.p0.z + barycentricCoord.y*tri.p1.z + barycentricCoord.z*tri.p2.z); +} + #endif \ No newline at end of file diff --git a/src/utilities.h b/src/utilities.h index 3e6ef6e..2416a7f 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -1,44 +1,44 @@ -//UTILITYCORE- A Utility Library by Yining Karl Li -//This file is part of UTILITYCORE, Coyright (c) 2012 Yining Karl Li - -#ifndef Pathtracer_utilities_h -#define Pathtracer_utilities_h - -#include "glm/glm.hpp" -#include -#include -#include -#include -#include -#include -#include -#include "cudaMat4.h" - -const float PI =3.1415926535897932384626422832795028841971; -const float TWO_PI =6.2831853071795864769252867665590057683943; -const float SQRT_OF_ONE_THIRD =0.5773502691896257645091487805019574556476; -const float E =2.7182818284590452353602874713526624977572; -const float EPSILON =.000000001; -const float ZERO_ABSORPTION_EPSILON =0.00001; -const float RAY_BIAS_AMOUNT =0.0002; - -namespace utilityCore { - extern float clamp(float f, float min, float max); - extern bool replaceString(std::string& str, const std::string& from, const std::string& to); - extern glm::vec3 clampRGB(glm::vec3 color); - extern bool epsilonCheck(float a, float b); - extern std::vector tokenizeString(std::string str); - extern cudaMat4 glmMat4ToCudaMat4(glm::mat4 a); - extern glm::mat4 cudaMat4ToGlmMat4(cudaMat4 a); - extern glm::mat4 buildTransformationMatrix(glm::vec3 translation, glm::vec3 rotation, glm::vec3 scale); - extern void printCudaMat4(cudaMat4 m); - extern std::string convertIntToString(int number); - - //----------------------------- - //-------GLM Printers---------- - //----------------------------- - extern void printMat4(glm::mat4); - extern void printVec4(glm::vec4); - extern void printVec3(glm::vec3); -} -#endif +//UTILITYCORE- A Utility Library by Yining Karl Li +//This file is part of UTILITYCORE, Coyright (c) 2012 Yining Karl Li + +#ifndef Pathtracer_utilities_h +#define Pathtracer_utilities_h + +#include "glm/glm.hpp" +#include +#include +#include +#include +#include +#include +#include +#include "cudaMat4.h" + +const float PI =3.1415926535897932384626422832795028841971; +const float TWO_PI =6.2831853071795864769252867665590057683943; +const float SQRT_OF_ONE_THIRD =0.5773502691896257645091487805019574556476; +const float E =2.7182818284590452353602874713526624977572; +const float EPSILON =.0001; +const float ZERO_ABSORPTION_EPSILON =0.00001; +const float RAY_BIAS_AMOUNT =0.0002; + +namespace utilityCore { + extern float clamp(float f, float min, float max); + extern bool replaceString(std::string& str, const std::string& from, const std::string& to); + extern glm::vec3 clampRGB(glm::vec3 color); + extern bool epsilonCheck(float a, float b); + extern std::vector tokenizeString(std::string str); + extern cudaMat4 glmMat4ToCudaMat4(glm::mat4 a); + extern glm::mat4 cudaMat4ToGlmMat4(cudaMat4 a); + extern glm::mat4 buildTransformationMatrix(glm::vec3 translation, glm::vec3 rotation, glm::vec3 scale); + extern void printCudaMat4(cudaMat4 m); + extern std::string convertIntToString(int number); + + //----------------------------- + //-------GLM Printers---------- + //----------------------------- + extern void printMat4(glm::mat4); + extern void printVec4(glm::vec4); + extern void printVec3(glm::vec3); +} +#endif