diff --git a/PROJ3_WIN/565Rasterizer.sdf b/PROJ3_WIN/565Rasterizer.sdf
new file mode 100644
index 0000000..b693313
Binary files /dev/null and b/PROJ3_WIN/565Rasterizer.sdf differ
diff --git a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj
index 1077f39..a072658 100755
--- a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj
+++ b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj
@@ -86,6 +86,7 @@
$(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\include;C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.0/C/common/inc;../shared/glew/includes;../shared/freeglut/includes
+ compute_20,sm_20
@@ -111,6 +112,7 @@
$(ProjectDir)$(Platform)/$(Configuration)/%(Filename)%(Extension).obj
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v4.0\include;C:/ProgramData/NVIDIA Corporation/NVIDIA GPU Computing SDK 4.0/C/common/inc;../shared/glew/includes;../shared/freeglut/includes
+ compute_20,sm_20
diff --git a/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj.gundeep.nvuser b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj.gundeep.nvuser
new file mode 100644
index 0000000..2fa0bb0
--- /dev/null
+++ b/PROJ3_WIN/565Rasterizer/565Rasterizer.vcxproj.gundeep.nvuser
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/PROJ3_WIN/Readme Rasterizer.pdf b/PROJ3_WIN/Readme Rasterizer.pdf
new file mode 100644
index 0000000..b53db11
Binary files /dev/null and b/PROJ3_WIN/Readme Rasterizer.pdf differ
diff --git a/objs/cube.obj b/objs/cube.obj
new file mode 100644
index 0000000..f696427
--- /dev/null
+++ b/objs/cube.obj
@@ -0,0 +1,34 @@
+# cube.obj
+#
+
+g cube
+
+v 0.0 0.0 0.0
+v 0.0 0.0 1.0
+v 0.0 1.0 0.0
+v 0.0 1.0 1.0
+v 1.0 0.0 0.0
+v 1.0 0.0 1.0
+v 1.0 1.0 0.0
+v 1.0 1.0 1.0
+
+vn 0.0 0.0 1.0
+vn 0.0 0.0 -1.0
+vn 0.0 1.0 0.0
+vn 0.0 -1.0 0.0
+vn 1.0 0.0 0.0
+vn -1.0 0.0 0.0
+
+f 1//2 7//2 5//2
+f 1//2 3//2 7//2
+f 1//6 4//6 3//6
+f 1//6 2//6 4//6
+f 3//3 8//3 7//3
+f 3//3 4//3 8//3
+f 5//5 7//5 8//5
+f 5//5 8//5 6//5
+f 1//4 5//4 6//4
+f 1//4 6//4 2//4
+f 2//1 6//1 8//1
+f 2//1 8//1 4//1
+
diff --git a/objs/tri.obj b/objs/tri.obj
new file mode 100644
index 0000000..5085e34
--- /dev/null
+++ b/objs/tri.obj
@@ -0,0 +1,23 @@
+# cube.obj
+#
+
+g cube
+
+v 0.0 0.0 0.0
+v 0.0 0.0 1.0
+v 0.0 1.0 0.0
+v 0.0 1.0 1.0
+v 1.0 0.0 0.0
+v 1.0 0.0 1.0
+v 1.0 1.0 0.0
+v 1.0 1.0 1.0
+
+vn 0.0 0.0 1.0
+vn 0.0 0.0 -1.0
+vn 0.0 1.0 0.0
+vn 0.0 -1.0 0.0
+vn 1.0 0.0 0.0
+vn -1.0 0.0 0.0
+
+f 2//1 8//1 4//1
+
diff --git a/src/ObjCore/obj.cpp b/src/ObjCore/obj.cpp
index e748574..a7f6e6a 100755
--- a/src/ObjCore/obj.cpp
+++ b/src/ObjCore/obj.cpp
@@ -9,7 +9,7 @@
using namespace std;
-obj::obj(){
+obj::obj(){ // constructor
vbosize = 0;
nbosize = 0;
cbosize = 0;
@@ -22,7 +22,7 @@ obj::obj(){
}
-obj::~obj(){
+obj::~obj(){ //destructor
/*delete vbo;
delete nbo;
delete cbo;
@@ -37,7 +37,7 @@ obj::~obj(){
void obj::buildVBOs(){
recenter();
- vector VBOvec;
+ vector VBOvec; // vertex buffer object
vector NBOvec;
vector IBOvec;
int index = 0;
diff --git a/src/main.cpp b/src/main.cpp
index dfb689a..47e77bf 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,354 +1,372 @@
-// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
-// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
-
-#include "main.h"
-
-//-------------------------------
-//-------------MAIN--------------
-//-------------------------------
-
-int main(int argc, char** argv){
-
- bool loadedScene = false;
- for(int i=1; ibuildVBOs();
- delete loader;
- loadedScene = true;
- }
- }
-
- if(!loadedScene){
- cout << "Usage: mesh=[obj file]" << endl;
- return 0;
- }
-
- frame = 0;
- seconds = time (NULL);
- fpstracker = 0;
-
- // Launch CUDA/GL
- #ifdef __APPLE__
- // Needed in OSX to force use of OpenGL3.2
- glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3);
- glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2);
- glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
- glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
- init();
- #else
- init(argc, argv);
- #endif
-
- initCuda();
-
- initVAO();
- initTextures();
-
- GLuint passthroughProgram;
- passthroughProgram = initShader("shaders/passthroughVS.glsl", "shaders/passthroughFS.glsl");
-
- glUseProgram(passthroughProgram);
- glActiveTexture(GL_TEXTURE0);
-
- #ifdef __APPLE__
- // send into GLFW main loop
- while(1){
- display();
- if (glfwGetKey(GLFW_KEY_ESC) == GLFW_PRESS || !glfwGetWindowParam( GLFW_OPENED )){
- kernelCleanup();
- cudaDeviceReset();
- exit(0);
- }
- }
-
- glfwTerminate();
- #else
- glutDisplayFunc(display);
- glutKeyboardFunc(keyboard);
-
- glutMainLoop();
- #endif
- kernelCleanup();
- return 0;
-}
-
-//-------------------------------
-//---------RUNTIME STUFF---------
-//-------------------------------
-
-void runCuda(){
- // Map OpenGL buffer object for writing from CUDA on a single GPU
- // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
- dptr=NULL;
-
- vbo = mesh->getVBO();
- vbosize = mesh->getVBOsize();
-
- float newcbo[] = {0.0, 1.0, 0.0,
- 0.0, 0.0, 1.0,
- 1.0, 0.0, 0.0};
- cbo = newcbo;
- cbosize = 9;
-
- ibo = mesh->getIBO();
- ibosize = mesh->getIBOsize();
-
- cudaGLMapBufferObject((void**)&dptr, pbo);
- cudaRasterizeCore(dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize);
- cudaGLUnmapBufferObject(pbo);
-
- vbo = NULL;
- cbo = NULL;
- ibo = NULL;
-
- frame++;
- fpstracker++;
-
-}
-
-#ifdef __APPLE__
-
- void display(){
- runCuda();
- time_t seconds2 = time (NULL);
-
- if(seconds2-seconds >= 1){
-
- fps = fpstracker/(seconds2-seconds);
- fpstracker = 0;
- seconds = seconds2;
-
- }
-
- string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS";
-
- glfwSetWindowTitle(title.c_str());
-
-
- glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
- glBindTexture(GL_TEXTURE_2D, displayImage);
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height,
- GL_RGBA, GL_UNSIGNED_BYTE, NULL);
-
-
- glClear(GL_COLOR_BUFFER_BIT);
-
- // VAO, shader program, and texture already bound
- glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
-
- glfwSwapBuffers();
- }
-
-#else
-
- void display(){
- runCuda();
- time_t seconds2 = time (NULL);
-
- if(seconds2-seconds >= 1){
-
- fps = fpstracker/(seconds2-seconds);
- fpstracker = 0;
- seconds = seconds2;
-
- }
-
- string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS";
- glutSetWindowTitle(title.c_str());
-
- glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
- glBindTexture(GL_TEXTURE_2D, displayImage);
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height,
- GL_RGBA, GL_UNSIGNED_BYTE, NULL);
-
- glClear(GL_COLOR_BUFFER_BIT);
-
- // VAO, shader program, and texture already bound
- glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
-
- glutPostRedisplay();
- glutSwapBuffers();
- }
-
- void keyboard(unsigned char key, int x, int y)
- {
- switch (key)
- {
- case(27):
- shut_down(1);
- break;
- }
- }
-
-#endif
-
-//-------------------------------
-//----------SETUP STUFF----------
-//-------------------------------
-
-#ifdef __APPLE__
- void init(){
-
- if (glfwInit() != GL_TRUE){
- shut_down(1);
- }
-
- // 16 bit color, no depth, alpha or stencil buffers, windowed
- if (glfwOpenWindow(width, height, 5, 6, 5, 0, 0, 0, GLFW_WINDOW) != GL_TRUE){
- shut_down(1);
- }
-
- // Set up vertex array object, texture stuff
- initVAO();
- initTextures();
- }
-#else
- void init(int argc, char* argv[]){
- glutInit(&argc, argv);
- glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
- glutInitWindowSize(width, height);
- glutCreateWindow("CIS565 Rasterizer");
-
- // Init GLEW
- glewInit();
- GLenum err = glewInit();
- if (GLEW_OK != err)
- {
- /* Problem: glewInit failed, something is seriously wrong. */
- std::cout << "glewInit failed, aborting." << std::endl;
- exit (1);
- }
-
- initVAO();
- initTextures();
- }
-#endif
-
-void initPBO(GLuint* pbo){
- if (pbo) {
- // set up vertex data parameter
- int num_texels = width*height;
- int num_values = num_texels * 4;
- int size_tex_data = sizeof(GLubyte) * num_values;
-
- // Generate a buffer ID called a PBO (Pixel Buffer Object)
- glGenBuffers(1,pbo);
- // Make this the current UNPACK buffer (OpenGL is state-based)
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo);
- // Allocate data for the buffer. 4-channel 8-bit image
- glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY);
- cudaGLRegisterBufferObject( *pbo );
- }
-}
-
-void initCuda(){
- // Use device with highest Gflops/s
- cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
-
- initPBO(&pbo);
-
- // Clean up on program exit
- atexit(cleanupCuda);
-
- runCuda();
-}
-
-void initTextures(){
- glGenTextures(1,&displayImage);
- glBindTexture(GL_TEXTURE_2D, displayImage);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA,
- GL_UNSIGNED_BYTE, NULL);
-}
-
-void initVAO(void){
- GLfloat vertices[] =
- {
- -1.0f, -1.0f,
- 1.0f, -1.0f,
- 1.0f, 1.0f,
- -1.0f, 1.0f,
- };
-
- GLfloat texcoords[] =
- {
- 1.0f, 1.0f,
- 0.0f, 1.0f,
- 0.0f, 0.0f,
- 1.0f, 0.0f
- };
-
- GLushort indices[] = { 0, 1, 3, 3, 1, 2 };
-
- GLuint vertexBufferObjID[3];
- glGenBuffers(3, vertexBufferObjID);
-
- glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]);
- glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
- glVertexAttribPointer((GLuint)positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0);
- glEnableVertexAttribArray(positionLocation);
-
- glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]);
- glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords, GL_STATIC_DRAW);
- glVertexAttribPointer((GLuint)texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0);
- glEnableVertexAttribArray(texcoordsLocation);
-
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]);
- glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
-}
-
-GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath){
- GLuint program = glslUtility::createProgram(vertexShaderPath, fragmentShaderPath, attributeLocations, 2);
- GLint location;
-
- glUseProgram(program);
-
- if ((location = glGetUniformLocation(program, "u_image")) != -1)
- {
- glUniform1i(location, 0);
- }
-
- return program;
-}
-
-//-------------------------------
-//---------CLEANUP STUFF---------
-//-------------------------------
-
-void cleanupCuda(){
- if(pbo) deletePBO(&pbo);
- if(displayImage) deleteTexture(&displayImage);
-}
-
-void deletePBO(GLuint* pbo){
- if (pbo) {
- // unregister this buffer object with CUDA
- cudaGLUnregisterBufferObject(*pbo);
-
- glBindBuffer(GL_ARRAY_BUFFER, *pbo);
- glDeleteBuffers(1, pbo);
-
- *pbo = (GLuint)NULL;
- }
-}
-
-void deleteTexture(GLuint* tex){
- glDeleteTextures(1, tex);
- *tex = (GLuint)NULL;
-}
-
-void shut_down(int return_code){
- kernelCleanup();
- cudaDeviceReset();
- #ifdef __APPLE__
- glfwTerminate();
- #endif
- exit(return_code);
-}
+// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
+// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
+
+#include "main.h"
+
+//-------------------------------
+//-------------MAIN--------------
+//-------------------------------
+
+int main(int argc, char** argv){
+
+ bool loadedScene = false;
+ for(int i=1; ibuildVBOs();
+
+ delete loader;
+ loadedScene = true;
+ }
+ }
+
+ if(!loadedScene){
+ cout << "Usage: mesh=[obj file]" << endl;
+ return 0;
+ }
+
+ frame = 0;
+ seconds = time (NULL);
+ fpstracker = 0;
+
+ // Launch CUDA/GL
+ #ifdef __APPLE__
+ // Needed in OSX to force use of OpenGL3.2
+ glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3);
+ glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2);
+ glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
+ glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+ init();
+ #else
+ init(argc, argv);
+ #endif
+
+ initCuda();
+
+ initVAO();
+ initTextures();
+
+ GLuint passthroughProgram;
+ passthroughProgram = initShader("shaders/passthroughVS.glsl", "shaders/passthroughFS.glsl");
+
+ glUseProgram(passthroughProgram);
+ glActiveTexture(GL_TEXTURE0);
+
+ #ifdef __APPLE__
+ // send into GLFW main loop
+ while(1){
+ display();
+ if (glfwGetKey(GLFW_KEY_ESC) == GLFW_PRESS || !glfwGetWindowParam( GLFW_OPENED )){
+ kernelCleanup();
+ cudaDeviceReset();
+ exit(0);
+ }
+ }
+
+ glfwTerminate();
+ #else
+ glutDisplayFunc(display);
+ glutKeyboardFunc(keyboard);
+
+ glutMainLoop();
+ #endif
+ kernelCleanup();
+ return 0;
+}
+
+//-------------------------------
+//---------RUNTIME STUFF---------
+//-------------------------------
+
+void runCuda(){
+ // Map OpenGL buffer object for writing from CUDA on a single GPU
+ // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
+ dptr=NULL;
+
+ vbo = mesh->getVBO();
+ vbosize = mesh->getVBOsize();
+
+ nbo = mesh->getNBO();
+ nbosize= mesh->getNBOsize();
+
+ /*for( int i=0; i<52238;i++)
+ {
+ printf("vbo %i= %f \n", i,vbo[i]);
+ }
+ printf("nbosize %i",nbosize);
+ int y;
+ cin>>y;*/
+
+ float newcbo[] = {0.0, 1.0, 0.0,
+ 0.0, 0.0, 1.0,
+ 1.0, 0.0, 0.0};
+ cbo = newcbo;
+ cbosize = 9;
+
+ ibo = mesh->getIBO();
+ ibosize = mesh->getIBOsize();
+
+
+ glm::vec3 lightcol=glm::vec3(1.0,1.0,1.0);
+ glm::vec3 lightpos=glm::vec3(0,10.0,0.0);
+
+ cudaGLMapBufferObject((void**)&dptr, pbo);
+ cudaRasterizeCore(dptr, glm::vec2(width, height),frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbosize,nbo,lightpos,lightcol);
+ cudaGLUnmapBufferObject(pbo);
+
+ vbo = NULL;
+ cbo = NULL;
+ ibo = NULL;
+ nbo = NULL;
+
+ frame++;
+ fpstracker++;
+ int x;
+ // cin>>x;
+}
+
+#ifdef __APPLE__
+
+ void display(){
+ runCuda();
+ time_t seconds2 = time (NULL);
+
+ if(seconds2-seconds >= 1){
+
+ fps = fpstracker/(seconds2-seconds);
+ fpstracker = 0;
+ seconds = seconds2;
+
+ }
+
+ string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS";
+
+ glfwSetWindowTitle(title.c_str());
+
+
+ glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
+ glBindTexture(GL_TEXTURE_2D, displayImage);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height,
+ GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+
+
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ // VAO, shader program, and texture already bound
+ glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
+
+ glfwSwapBuffers();
+ }
+
+#else
+
+ void display(){
+ runCuda();
+ time_t seconds2 = time (NULL);
+
+ if(seconds2-seconds >= 1){
+
+ fps = fpstracker/(seconds2-seconds);
+ fpstracker = 0;
+ seconds = seconds2;
+
+ }
+
+ string title = "CIS565 Rasterizer | "+ utilityCore::convertIntToString((int)fps) + "FPS";
+ glutSetWindowTitle(title.c_str());
+
+ glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
+ glBindTexture(GL_TEXTURE_2D, displayImage);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height,
+ GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ // VAO, shader program, and texture already bound
+ glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
+
+ glutPostRedisplay();
+ glutSwapBuffers();
+ }
+
+ void keyboard(unsigned char key, int x, int y)
+ {
+ switch (key)
+ {
+ case(27):
+ shut_down(1);
+ break;
+ }
+ }
+
+#endif
+
+//-------------------------------
+//----------SETUP STUFF----------
+//-------------------------------
+
+#ifdef __APPLE__
+ void init(){
+
+ if (glfwInit() != GL_TRUE){
+ shut_down(1);
+ }
+
+ // 16 bit color, no depth, alpha or stencil buffers, windowed
+ if (glfwOpenWindow(width, height, 5, 6, 5, 0, 0, 0, GLFW_WINDOW) != GL_TRUE){
+ shut_down(1);
+ }
+
+ // Set up vertex array object, texture stuff
+ initVAO();
+ initTextures();
+ }
+#else
+ void init(int argc, char* argv[]){
+ glutInit(&argc, argv);
+ glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
+ glutInitWindowSize(width, height);
+ glutCreateWindow("CIS565 Rasterizer");
+
+ // Init GLEW
+ glewInit();
+ GLenum err = glewInit();
+ if (GLEW_OK != err)
+ {
+ /* Problem: glewInit failed, something is seriously wrong. */
+ std::cout << "glewInit failed, aborting." << std::endl;
+ exit (1);
+ }
+
+ initVAO();
+ initTextures();
+ }
+#endif
+
+void initPBO(GLuint* pbo){
+ if (pbo) {
+ // set up vertex data parameter
+ int num_texels = width*height;
+ int num_values = num_texels * 4;
+ int size_tex_data = sizeof(GLubyte) * num_values;
+
+ // Generate a buffer ID called a PBO (Pixel Buffer Object)
+ glGenBuffers(1,pbo);
+ // Make this the current UNPACK buffer (OpenGL is state-based)
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, *pbo);
+ // Allocate data for the buffer. 4-channel 8-bit image
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, size_tex_data, NULL, GL_DYNAMIC_COPY);
+ cudaGLRegisterBufferObject( *pbo );
+ }
+}
+
+void initCuda(){
+ // Use device with highest Gflops/s
+ cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
+
+ initPBO(&pbo);
+
+ // Clean up on program exit
+ atexit(cleanupCuda);
+
+ runCuda();
+}
+
+void initTextures(){
+ glGenTextures(1,&displayImage);
+ glBindTexture(GL_TEXTURE_2D, displayImage);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA,
+ GL_UNSIGNED_BYTE, NULL);
+}
+
+void initVAO(void){
+ GLfloat vertices[] =
+ {
+ -1.0f, -1.0f,
+ 1.0f, -1.0f,
+ 1.0f, 1.0f,
+ -1.0f, 1.0f,
+ };
+
+ GLfloat texcoords[] =
+ {
+ 1.0f, 1.0f,
+ 0.0f, 1.0f,
+ 0.0f, 0.0f,
+ 1.0f, 0.0f
+ };
+
+ GLushort indices[] = { 0, 1, 3, 3, 1, 2 };
+
+ GLuint vertexBufferObjID[3];
+ glGenBuffers(3, vertexBufferObjID);
+
+ glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[0]);
+ glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
+ glVertexAttribPointer((GLuint)positionLocation, 2, GL_FLOAT, GL_FALSE, 0, 0);
+ glEnableVertexAttribArray(positionLocation);
+
+ glBindBuffer(GL_ARRAY_BUFFER, vertexBufferObjID[1]);
+ glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords, GL_STATIC_DRAW);
+ glVertexAttribPointer((GLuint)texcoordsLocation, 2, GL_FLOAT, GL_FALSE, 0, 0);
+ glEnableVertexAttribArray(texcoordsLocation);
+
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBufferObjID[2]);
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
+}
+
+GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath){
+ GLuint program = glslUtility::createProgram(vertexShaderPath, fragmentShaderPath, attributeLocations, 2);
+ GLint location;
+
+ glUseProgram(program);
+
+ if ((location = glGetUniformLocation(program, "u_image")) != -1)
+ {
+ glUniform1i(location, 0);
+ }
+
+ return program;
+}
+
+//-------------------------------
+//---------CLEANUP STUFF---------
+//-------------------------------
+
+void cleanupCuda(){
+ if(pbo) deletePBO(&pbo);
+ if(displayImage) deleteTexture(&displayImage);
+}
+
+void deletePBO(GLuint* pbo){
+ if (pbo) {
+ // unregister this buffer object with CUDA
+ cudaGLUnregisterBufferObject(*pbo);
+
+ glBindBuffer(GL_ARRAY_BUFFER, *pbo);
+ glDeleteBuffers(1, pbo);
+
+ *pbo = (GLuint)NULL;
+ }
+}
+
+void deleteTexture(GLuint* tex){
+ glDeleteTextures(1, tex);
+ *tex = (GLuint)NULL;
+}
+
+void shut_down(int return_code){
+ kernelCleanup();
+ cudaDeviceReset();
+ #ifdef __APPLE__
+ glfwTerminate();
+ #endif
+ exit(return_code);
+}
diff --git a/src/main.h b/src/main.h
index 63bf0fa..127d938 100755
--- a/src/main.h
+++ b/src/main.h
@@ -1,105 +1,107 @@
-// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
-// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
-
-#ifndef MAIN_H
-#define MAIN_H
-
-#ifdef __APPLE__
- #include
-#else
- #include
- #include
-#endif
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include "glslUtility.h"
-#include "glm/glm.hpp"
-#include "rasterizeKernels.h"
-#include "utilities.h"
-#include "ObjCore/objloader.h"
-
-using namespace std;
-
-//-------------------------------
-//------------GL STUFF-----------
-//-------------------------------
-int frame;
-int fpstracker;
-double seconds;
-int fps = 0;
-GLuint positionLocation = 0;
-GLuint texcoordsLocation = 1;
-const char *attributeLocations[] = { "Position", "Tex" };
-GLuint pbo = (GLuint)NULL;
-GLuint displayImage;
-uchar4 *dptr;
-
-obj* mesh;
-
-float* vbo;
-int vbosize;
-float* cbo;
-int cbosize;
-int* ibo;
-int ibosize;
-
-//-------------------------------
-//----------CUDA STUFF-----------
-//-------------------------------
-
-int width=800; int height=800;
-
-//-------------------------------
-//-------------MAIN--------------
-//-------------------------------
-
-int main(int argc, char** argv);
-
-//-------------------------------
-//---------RUNTIME STUFF---------
-//-------------------------------
-
-void runCuda();
-
-#ifdef __APPLE__
- void display();
-#else
- void display();
- void keyboard(unsigned char key, int x, int y);
-#endif
-
-//-------------------------------
-//----------SETUP STUFF----------
-//-------------------------------
-
-#ifdef __APPLE__
- void init();
-#else
- void init(int argc, char* argv[]);
-#endif
-
-void initPBO(GLuint* pbo);
-void initCuda();
-void initTextures();
-void initVAO();
-GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath);
-
-//-------------------------------
-//---------CLEANUP STUFF---------
-//-------------------------------
-
-void cleanupCuda();
-void deletePBO(GLuint* pbo);
-void deleteTexture(GLuint* tex);
-void shut_down(int return_code);
-
+// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
+// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
+
+#ifndef MAIN_H
+#define MAIN_H
+
+#ifdef __APPLE__
+ #include
+#else
+ #include
+ #include
+#endif
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "glslUtility.h"
+#include "glm/glm.hpp"
+#include "rasterizeKernels.h"
+#include "utilities.h"
+#include "ObjCore/objloader.h"
+
+using namespace std;
+
+//-------------------------------
+//------------GL STUFF-----------
+//-------------------------------
+int frame;
+int fpstracker;
+double seconds;
+int fps = 0;
+GLuint positionLocation = 0;
+GLuint texcoordsLocation = 1;
+const char *attributeLocations[] = { "Position", "Tex" };
+GLuint pbo = (GLuint)NULL;
+GLuint displayImage;
+uchar4 *dptr;
+
+obj* mesh;
+
+float* vbo;
+float* nbo;
+int nbosize;
+int vbosize;
+float* cbo;
+int cbosize;
+int* ibo;
+int ibosize;
+
+//-------------------------------
+//----------CUDA STUFF-----------
+//-------------------------------
+
+int width=800; int height=800;
+
+//-------------------------------
+//-------------MAIN--------------
+//-------------------------------
+
+int main(int argc, char** argv);
+
+//-------------------------------
+//---------RUNTIME STUFF---------
+//-------------------------------
+
+void runCuda();
+
+#ifdef __APPLE__
+ void display();
+#else
+ void display();
+ void keyboard(unsigned char key, int x, int y);
+#endif
+
+//-------------------------------
+//----------SETUP STUFF----------
+//-------------------------------
+
+#ifdef __APPLE__
+ void init();
+#else
+ void init(int argc, char* argv[]);
+#endif
+
+void initPBO(GLuint* pbo);
+void initCuda();
+void initTextures();
+void initVAO();
+GLuint initShader(const char *vertexShaderPath, const char *fragmentShaderPath);
+
+//-------------------------------
+//---------CLEANUP STUFF---------
+//-------------------------------
+
+void cleanupCuda();
+void deletePBO(GLuint* pbo);
+void deleteTexture(GLuint* tex);
+void shut_down(int return_code);
+
#endif
\ No newline at end of file
diff --git a/src/rasterizeKernels.cu b/src/rasterizeKernels.cu
index 826ec80..0b4c988 100755
--- a/src/rasterizeKernels.cu
+++ b/src/rasterizeKernels.cu
@@ -1,267 +1,627 @@
-// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
-// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
-
-#include
-#include
-#include
-#include
-#include
-#include "rasterizeKernels.h"
-#include "rasterizeTools.h"
-
-glm::vec3* framebuffer;
-fragment* depthbuffer;
-float* device_vbo;
-float* device_cbo;
-int* device_ibo;
-triangle* primitives;
-
-void checkCUDAError(const char *msg) {
- cudaError_t err = cudaGetLastError();
- if( cudaSuccess != err) {
- fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
- exit(EXIT_FAILURE);
- }
-}
-
-//Handy dandy little hashing function that provides seeds for random number generation
-__host__ __device__ unsigned int hash(unsigned int a){
- a = (a+0x7ed55d16) + (a<<12);
- a = (a^0xc761c23c) ^ (a>>19);
- a = (a+0x165667b1) + (a<<5);
- a = (a+0xd3a2646c) ^ (a<<9);
- a = (a+0xfd7046c5) + (a<<3);
- a = (a^0xb55a4f09) ^ (a>>16);
- return a;
-}
-
-//Writes a given fragment to a fragment buffer at a given location
-__host__ __device__ void writeToDepthbuffer(int x, int y, fragment frag, fragment* depthbuffer, glm::vec2 resolution){
- if(x255){
- color.x = 255;
- }
-
- if(color.y>255){
- color.y = 255;
- }
-
- if(color.z>255){
- color.z = 255;
- }
-
- // Each thread writes one pixel location in the texture (textel)
- PBOpos[index].w = 0;
- PBOpos[index].x = color.x;
- PBOpos[index].y = color.y;
- PBOpos[index].z = color.z;
- }
-}
-
-//TODO: Implement a vertex shader
-__global__ void vertexShadeKernel(float* vbo, int vbosize){
- int index = (blockIdx.x * blockDim.x) + threadIdx.x;
- if(index>>(resolution, framebuffer, glm::vec3(0,0,0));
-
- fragment frag;
- frag.color = glm::vec3(0,0,0);
- frag.normal = glm::vec3(0,0,0);
- frag.position = glm::vec3(0,0,-10000);
- clearDepthBuffer<<>>(resolution, depthbuffer,frag);
-
- //------------------------------
- //memory stuff
- //------------------------------
- primitives = NULL;
- cudaMalloc((void**)&primitives, (ibosize/3)*sizeof(triangle));
-
- device_ibo = NULL;
- cudaMalloc((void**)&device_ibo, ibosize*sizeof(int));
- cudaMemcpy( device_ibo, ibo, ibosize*sizeof(int), cudaMemcpyHostToDevice);
-
- device_vbo = NULL;
- cudaMalloc((void**)&device_vbo, vbosize*sizeof(float));
- cudaMemcpy( device_vbo, vbo, vbosize*sizeof(float), cudaMemcpyHostToDevice);
-
- device_cbo = NULL;
- cudaMalloc((void**)&device_cbo, cbosize*sizeof(float));
- cudaMemcpy( device_cbo, cbo, cbosize*sizeof(float), cudaMemcpyHostToDevice);
-
- tileSize = 32;
- int primitiveBlocks = ceil(((float)vbosize/3)/((float)tileSize));
-
- //------------------------------
- //vertex shader
- //------------------------------
- vertexShadeKernel<<>>(device_vbo, vbosize);
-
- cudaDeviceSynchronize();
- //------------------------------
- //primitive assembly
- //------------------------------
- primitiveBlocks = ceil(((float)ibosize/3)/((float)tileSize));
- primitiveAssemblyKernel<<>>(device_vbo, vbosize, device_cbo, cbosize, device_ibo, ibosize, primitives);
-
- cudaDeviceSynchronize();
- //------------------------------
- //rasterization
- //------------------------------
- rasterizationKernel<<>>(primitives, ibosize/3, depthbuffer, resolution);
-
- cudaDeviceSynchronize();
- //------------------------------
- //fragment shader
- //------------------------------
- fragmentShadeKernel<<>>(depthbuffer, resolution);
-
- cudaDeviceSynchronize();
- //------------------------------
- //write fragments to framebuffer
- //------------------------------
- render<<>>(resolution, depthbuffer, framebuffer);
- sendImageToPBO<<>>(PBOpos, resolution, framebuffer);
-
- cudaDeviceSynchronize();
-
- kernelCleanup();
-
- checkCUDAError("Kernel failed!");
-}
-
-void kernelCleanup(){
- cudaFree( primitives );
- cudaFree( device_vbo );
- cudaFree( device_cbo );
- cudaFree( device_ibo );
- cudaFree( framebuffer );
- cudaFree( depthbuffer );
-}
-
+// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
+// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
+#include
+#include
+#include
+#include
+#include
+#include "rasterizeKernels.h"
+#include "rasterizeTools.h"
+#include "glm\gtc\/matrix_transform.hpp"
+
+glm::vec3* framebuffer;
+fragment* depthbuffer;
+int* device_stencil;
+float* device_vbo;
+float* device_cbo;
+int* device_ibo;
+float* device_nbo;
+triangle* primitives;
+
+void checkCUDAError(const char *msg) {
+ cudaError_t err = cudaGetLastError();
+ if( cudaSuccess != err) {
+ fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
+ exit(EXIT_FAILURE);
+ }
+}
+
+//Handy dandy little hashing function that provides seeds for random number generation
+__host__ __device__ unsigned int hash(unsigned int a){
+ a = (a+0x7ed55d16) + (a<<12);
+ a = (a^0xc761c23c) ^ (a>>19);
+ a = (a+0x165667b1) + (a<<5);
+ a = (a+0xd3a2646c) ^ (a<<9);
+ a = (a+0xfd7046c5) + (a<<3);
+ a = (a^0xb55a4f09) ^ (a>>16);
+ return a;
+}
+
+//Writes a given fragment to a fragment buffer at a given location
+__host__ __device__ void writeToDepthbuffer(int x, int y, fragment frag, fragment* depthbuffer, glm::vec2 resolution){
+ if(x255){
+ color.x = 255;
+ }
+
+ if(color.y>255){
+ color.y = 255;
+ }
+
+ if(color.z>255){
+ color.z = 255;
+ }
+
+ // Each thread writes one pixel location in the texture (textel)
+ y=resolution.y-y;
+ x=resolution.x-x;
+ index=x+(y*resolution.x);
+
+
+ PBOpos[index].w = 0;
+ PBOpos[index].x = color.x;
+ PBOpos[index].y = color.y;
+ PBOpos[index].z = color.z;
+ }
+}
+
+//TODO: Implement a vertex shader
+__global__ void vertexShadeKernel(float* vbo, int vbosize , cudaMat4 project){//, float *nbo){
+ int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+ if(index=resolution.y)
+ {
+ top=resolution.y-1;
+ }
+ else if (bottom <= 0 )
+ {
+ bottom=0;
+ }
+
+ int currentpoint=0;
+ currentpoint=top;
+ int xmin, xmax;
+ float xvaluetest1,xvaluetest2,xvaluetest3;
+
+ bool tri=false;
+
+ while(currentpoint!=bottom)
+ {
+ xmax=-1000000;
+ xmin=10000000;
+ //////// CASE2
+
+ if( (primitives[index].p1.x - primitives[index].p0.x)!=0)
+ {
+ if (primitives[index].p1.y+0.0001 <= primitives[index].p0.y && primitives[index].p1.y-0.0001 >= primitives[index].p0.y)
+ {
+ /*if (index == 1797) printf("Here\n");*/
+ xmin=min(primitives[index].p1.x,primitives[index].p0.x);
+ xmax=max(primitives[index].p1.x,primitives[index].p0.x);
+ }
+ else
+ {
+ if (index == 2181) printf("in else\n");
+ slopep0p1= (primitives[index].p1.y - primitives[index].p0.y) / (primitives[index].p1.x - primitives[index].p0.x);
+
+ xvaluetest1=(currentpoint-primitives[index].p1.y)/slopep0p1 + primitives[index].p1.x;
+ /*if (xvaluetest1>=0 && xvaluetest1= primitives[index].p1.x)
+ ||(xvaluetest1 >= primitives[index].p0.x && xvaluetest1 <= primitives[index].p1.x))
+ {
+
+ /*if (index == 1797) printf( "checking xvaluetest1 \n");*/
+ if(xvaluetest1xmax)
+ {
+ xmax=xvaluetest1;
+ }
+ }
+
+ if (index == 2181)
+ printf("Xmin,Xmax After P0P1: (%i, %i)\n", xmin, xmax);
+ //}
+ }
+ }
+ else
+ {
+ xmin=min((int)primitives[index].p1.x,xmin);
+ xmax=max((int)primitives[index].p1.x,xmax);
+ }
+
+ /////////// CASE2
+
+ if ((primitives[index].p2.x - primitives[index].p1.x)!=0)
+ {
+ if (primitives[index].p2.y == primitives[index].p1.y)
+ {
+ xmin=min((int)min(primitives[index].p2.x, primitives[index].p1.x),xmin);
+ xmax=max((int)max(primitives[index].p2.x, primitives[index].p1.x),xmax);
+ }
+ else
+ {
+ if (index == 2181)
+ printf("Xmin,Xmax Before P1P2: (%i, %i)\n", xmin, xmax);
+ slopep1p2= (primitives[index].p2.y - primitives[index].p1.y) / (primitives[index].p2.x - primitives[index].p1.x);
+ xvaluetest2=(currentpoint-primitives[index].p2.y)/slopep1p2 + primitives[index].p2.x;
+
+ if (index == 2181) printf("slopep2p1=%f xvaluetest2= %f ,currentpoint= %i \n",slopep1p2,xvaluetest2, currentpoint);
+
+ if ((xvaluetest2 <= primitives[index].p1.x && xvaluetest2 >= primitives[index].p2.x) ||
+ (xvaluetest2 >= primitives[index].p1.x && xvaluetest2 <= primitives[index].p2.x))
+ {
+ if(xvaluetest2>=0 && xvaluetest2<=xmin)
+ xmin=xvaluetest2;
+ if(xvaluetest2xmax)
+ {
+ xmax=xvaluetest2;
+ }
+ }
+
+ if (index == 2181)
+ printf("Xmin,Xmax After P1P2: (%i, %i)\n", xmin, xmax);
+ }
+ }
+ else
+ {
+ xmin=min((int)primitives[index].p1.x,xmin);
+ xmax=max((int)primitives[index].p1.x,xmax);
+ }
+
+
+ ////////////////////// CASE3
+
+ if (primitives[index].p0.x - primitives[index].p2.x!=0)
+ {
+ if (primitives[index].p0.y == primitives[index].p2.y)
+ {
+ xmin=min((int)min(primitives[index].p0.x, primitives[index].p2.x),xmin);
+ xmax=max((int)max(primitives[index].p0.x, primitives[index].p2.x),xmax);
+ }
+ else
+ {
+ if (index == 2181)
+ printf("Xmin,Xmax Before P0P2: (%i, %i) - CurrentPoint- %i\n", xmin, xmax, currentpoint);
+
+ slopep2p0= (primitives[index].p0.y - primitives[index].p2.y) / (primitives[index].p0.x - primitives[index].p2.x);
+ xvaluetest3=(currentpoint-primitives[index].p0.y)/slopep2p0 + primitives[index].p0.x ;
+
+ if (index == 2181) printf("slopep2p0=%f xvaluetest3= %d \n",slopep2p0,xvaluetest3);
+
+ if (xvaluetest3>=0 && xvaluetest3= primitives[index].p0.x) ||
+ (xvaluetest3 >= primitives[index].p2.x && xvaluetest3 <= primitives[index].p0.x))
+ {
+ if(xvaluetest3>=0 && xvaluetest3xmax)
+ xmax=xvaluetest3;
+ }
+
+ }
+ if (index == 2181)
+ printf("Xmin,Xmax After P1P2: (%i, %i)\n", xmin, xmax);
+ }
+
+ }
+ else
+ {
+ xmin=min((int)primitives[index].p2.x,xmin);
+ xmax=max((int)primitives[index].p2.x,xmax);
+ }
+
+ glm::vec3 barry = calculateBarycentricCoordinate(primitives[index], glm::vec2(xmin, currentpoint));
+ // using (y-y1)/m + x1=xB
+ // here y is currentpoint
+
+ while(xmin<=xmax)
+ {
+ int pixel_index= xmin+currentpoint*resolution.x;
+
+ fragment newfrag;
+ newfrag.color=barry.x*primitives[index].c0 + barry.y*primitives[index].c1 + barry.z*primitives[index].c2;
+ newfrag.normal= glm::normalize(barry.x*primitives[index].n0 + barry.y*primitives[index].n1 + barry.z*primitives[index].n2);
+ newfrag.lock=1;
+ newfrag.position.x= xmin;
+ newfrag.position.y= currentpoint;
+
+ //atomic comapre and swap
+ bool loop=true;
+ while(loop)
+ {
+ if( xmin < resolution.x && xmin>=0 && currentpoint0 )
+ {
+ /*if(xmin==0)
+ {
+ printf("index %d \n", index);
+ }
+ tri=true;*/
+ if( depthbuffer[index].position.z < newfrag.position.z)
+ {
+ //if (atomicExch(&(depthbuffer[pixel_index].lock), 1) == 0)
+ {
+ depthbuffer[pixel_index]= newfrag;
+ loop=false;
+ // atomicExch(&(depthbuffer[pixel_index].lock),0);
+ }
+ }
+ // printf("some %f",depthbuffer[pixel_index].normal.y);
+ else
+ {
+ loop=false;
+ }
+
+ }
+ }
+ xmin++;
+ }
+ currentpoint--;
+ }
+
+ /*if (tri==false)
+ {
+ printf("index %d \n", index);
+ }*/
+ }
+}
+
+//TODO: Implement a fragment shader
+__global__ void fragmentShadeKernel(fragment* depthbuffer, glm::vec2 resolution, glm::vec3 lightpos, glm::vec3 lightcol, int* device_stencil)
+ {
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+ int index = x + (y * resolution.x);
+
+ //setting up the stencil
+
+ if(x<=resolution.x && y<=resolution.y)
+ {
+ if ( device_stencil[index]==1)
+ {
+
+ glm::vec3 normal= glm::normalize(depthbuffer[index].normal);
+ glm::vec3 L=lightpos-depthbuffer[index].position;
+ float diffuse=glm::clamp((glm::dot(normal,glm::normalize(L)),0.0),0.0,1.0);
+
+ glm::vec3 final_col= diffuse*lightcol * depthbuffer[index].color;
+
+ //depthbuffer[index].color = final_col;
+ }
+ }
+
+}
+
+//Writes fragment colors to the framebuffer
+__global__ void render(glm::vec2 resolution, fragment* depthbuffer, glm::vec3* framebuffer){
+
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+ int index = x + (y * resolution.x);
+
+ if(x<=resolution.x && y<=resolution.y){
+ framebuffer[index] = depthbuffer[index].color;
+ }
+}
+
+// Wrapper for the __global__ call that sets up the kernel calls and does a ton of memory management
+void cudaRasterizeCore(uchar4* PBOpos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* cbo, int cbosize, int* ibo, int ibosize, int nbosize, float* nbo, glm::vec3 lightpos, glm::vec3 lightcol){
+
+ //set uf the stencil buffer
+ device_stencil =NULL;
+ cudaMalloc((void**)&device_stencil, (int) resolution.x*(int)resolution.y*sizeof(int));
+
+ int totalpixels= resolution.x*resolution.y;
+
+ int* stencil=new int[totalpixels];
+
+
+ cudaMalloc((void**)&device_stencil, (int) resolution.x*(int)resolution.y*sizeof(int));
+ cudaMemcpy( device_stencil, stencil, totalpixels*sizeof(bool), cudaMemcpyHostToDevice);
+
+
+ // set up crucial magic
+ int tileSize = 8;
+
+ dim3 threadsPerBlock(tileSize, tileSize);
+ dim3 fullBlocksPerGrid((int)ceil(float(resolution.x)/float(tileSize)), (int)ceil(float(resolution.y)/float(tileSize)));
+
+ //set up framebuffer
+ framebuffer = NULL;
+ cudaMalloc((void**)&framebuffer, (int)resolution.x*(int)resolution.y*sizeof(glm::vec3));
+
+ //set up depthbuffer
+ depthbuffer = NULL;
+ cudaMalloc((void**)&depthbuffer, (int)resolution.x*(int)resolution.y*sizeof(fragment));
+
+
+
+
+
+
+ //kernel launches to black out accumulated/unaccumlated pixel buffers and clear our scattering states
+ clearImage<<>>(resolution, framebuffer, glm::vec3(0,0,0));
+
+ fragment frag;
+ frag.color = glm::vec3(0,0,0);
+ frag.normal = glm::vec3(0,0,0);
+ frag.position = glm::vec3(0,0,-10000);
+ clearDepthBuffer<<>>(resolution, depthbuffer,frag);
+
+ //------------------------------
+ //memory stuff
+ //------------------------------
+ primitives = NULL;
+ cudaMalloc((void**)&primitives, (ibosize/3)*sizeof(triangle));
+
+ device_ibo = NULL;
+ cudaMalloc((void**)&device_ibo, ibosize*sizeof(int));
+ cudaMemcpy( device_ibo, ibo, ibosize*sizeof(int), cudaMemcpyHostToDevice);
+
+ device_nbo =NULL;
+ cudaMalloc ((void**)&device_nbo, nbosize*sizeof(float));
+ cudaMemcpy(device_nbo, nbo, nbosize*sizeof(float),cudaMemcpyHostToDevice);
+
+ device_vbo = NULL;
+ cudaMalloc((void**)&device_vbo, vbosize*sizeof(float));
+ cudaMemcpy( device_vbo, vbo, vbosize*sizeof(float), cudaMemcpyHostToDevice);
+
+ device_cbo = NULL;
+ cudaMalloc((void**)&device_cbo, cbosize*sizeof(float));
+ cudaMemcpy( device_cbo, cbo, cbosize*sizeof(float), cudaMemcpyHostToDevice);
+
+ tileSize = 32;
+ int primitiveBlocks = ceil(((float)vbosize/3)/((float)tileSize));
+
+ //------------------------------
+ //vertex shader
+ //------------------------------
+
+
+
+ //setting up camera first
+
+ glm::vec3 CameraPosition = glm::vec3(0.0f, 0.5f, 7.0f);
+ int width = resolution.x;
+ int height = resolution.y;
+
+ glm::mat4 projection = glm::perspective(60.0f, static_cast(width) / static_cast(height), 0.1f, 50.0f);
+
+ glm::mat4 camera = glm::lookAt(CameraPosition, glm::vec3(0.0, 0.5, 0), glm::vec3(0, 1, 0));
+
+ projection = projection * camera;
+
+ cudaMat4 cudaProjection = utilityCore::glmMat4ToCudaMat4(projection);
+
+ glm::mat4 invProjection = glm::inverse(projection);
+
+ cudaMat4 cudaInvProjection = utilityCore::glmMat4ToCudaMat4(invProjection);
+
+
+ /*
+ glm::mat4 projection=glm::perspective(60.0f, static_cast(resolution.x)/ static_cast(resolution.y),0.1f, 30.0f);
+ glm::vec3 cameraposition= glm::vec3(0,2,10);
+ glm::mat4 camera= glm::lookAt(cameraposition,glm::vec3(0,0,0),glm::vec3(0,5,0));
+
+ //projection=projection*camera;
+
+ cudaMat4 project= utilityCore::glmMat4ToCudaMat4(projection);
+ */
+ vertexShadeKernel<<>>(device_vbo, vbosize, cudaProjection);
+
+
+ cudaDeviceSynchronize();
+ //------------------------------
+ //primitive assembly
+ //------------------------------
+ primitiveBlocks = ceil(((float)ibosize/3)/((float)tileSize));
+ primitiveAssemblyKernel<<>>(device_vbo, vbosize, device_cbo, cbosize, device_ibo, ibosize, primitives,device_nbo,nbosize);
+
+ cudaDeviceSynchronize();
+ //------------------------------
+ //rasterization
+ //------------------------------
+ rasterizationKernel<<>>(primitives, ibosize/3, depthbuffer, resolution);
+
+ cudaDeviceSynchronize();
+ //------------------------------
+ //fragment shader
+ //------------------------------
+ fragmentShadeKernel<<>>(depthbuffer, resolution, lightpos, lightcol, device_stencil);
+
+ cudaDeviceSynchronize();
+ //------------------------------
+ //write fragments to framebuffer
+ //------------------------------
+ render<<>>(resolution, depthbuffer, framebuffer);
+ sendImageToPBO<<>>(PBOpos, resolution, framebuffer);
+
+ cudaDeviceSynchronize();
+
+ kernelCleanup();
+
+ checkCUDAError("Kernel failed!");
+}
+
+void kernelCleanup(){
+ cudaFree( primitives );
+ cudaFree( device_vbo );
+ cudaFree( device_cbo );
+ cudaFree( device_ibo );
+ cudaFree( device_nbo );
+ cudaFree( framebuffer );
+ cudaFree( depthbuffer );
+ cudaFree(device_stencil);
+}
+
diff --git a/src/rasterizeKernels.h b/src/rasterizeKernels.h
index bef3653..355e2f1 100755
--- a/src/rasterizeKernels.h
+++ b/src/rasterizeKernels.h
@@ -1,17 +1,17 @@
-// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
-// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
-
-#ifndef RASTERIZEKERNEL_H
-#define RASTERIZEKERNEL_H
-
-#include
-#include
-#include
-#include
-#include
-#include "glm/glm.hpp"
-
-void kernelCleanup();
-void cudaRasterizeCore(uchar4* pos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* cbo, int cbosize, int* ibo, int ibosize);
-
-#endif //RASTERIZEKERNEL_H
+// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
+// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
+
+#ifndef RASTERIZEKERNEL_H
+#define RASTERIZEKERNEL_H
+
+#include
+#include
+#include
+#include
+#include
+#include "glm/glm.hpp"
+
+void kernelCleanup();
+void cudaRasterizeCore(uchar4* pos, glm::vec2 resolution, float frame, float* vbo, int vbosize, float* cbo, int cbosize, int* ibo, int ibosize, int nbosize, float* nbo, glm::vec3 lightpos, glm::vec3 lightcol);
+
+#endif //RASTERIZEKERNEL_H
diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h
index e9b5dcc..f2b864f 100755
--- a/src/rasterizeTools.h
+++ b/src/rasterizeTools.h
@@ -1,78 +1,83 @@
-// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
-// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
-
-#ifndef RASTERIZETOOLS_H
-#define RASTERIZETOOLS_H
-
-#include
-#include "glm/glm.hpp"
-#include "utilities.h"
-#include "cudaMat4.h"
-
-struct triangle {
- glm::vec3 p0;
- glm::vec3 p1;
- glm::vec3 p2;
- glm::vec3 c0;
- glm::vec3 c1;
- glm::vec3 c2;
-};
-
-struct fragment{
- glm::vec3 color;
- glm::vec3 normal;
- glm::vec3 position;
-};
-
-//Multiplies a cudaMat4 matrix and a vec4
-__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){
- glm::vec3 r(1,1,1);
- r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*v.w);
- r.y = (m.y.x*v.x)+(m.y.y*v.y)+(m.y.z*v.z)+(m.y.w*v.w);
- r.z = (m.z.x*v.x)+(m.z.y*v.y)+(m.z.z*v.z)+(m.z.w*v.w);
- return r;
-}
-
-//LOOK: finds the axis aligned bounding box for a given triangle
-__host__ __device__ void getAABBForTriangle(triangle tri, glm::vec3& minpoint, glm::vec3& maxpoint){
- minpoint = glm::vec3(min(min(tri.p0.x, tri.p1.x),tri.p2.x),
- min(min(tri.p0.y, tri.p1.y),tri.p2.y),
- min(min(tri.p0.z, tri.p1.z),tri.p2.z));
- maxpoint = glm::vec3(max(max(tri.p0.x, tri.p1.x),tri.p2.x),
- max(max(tri.p0.y, tri.p1.y),tri.p2.y),
- max(max(tri.p0.z, tri.p1.z),tri.p2.z));
-}
-
-//LOOK: calculates the signed area of a given triangle
-__host__ __device__ float calculateSignedArea(triangle tri){
- return 0.5*((tri.p2.x - tri.p0.x)*(tri.p1.y - tri.p0.y) - (tri.p1.x - tri.p0.x)*(tri.p2.y - tri.p0.y));
-}
-
-//LOOK: helper function for calculating barycentric coordinates
-__host__ __device__ float calculateBarycentricCoordinateValue(glm::vec2 a, glm::vec2 b, glm::vec2 c, triangle tri){
- triangle baryTri;
- baryTri.p0 = glm::vec3(a,0); baryTri.p1 = glm::vec3(b,0); baryTri.p2 = glm::vec3(c,0);
- return calculateSignedArea(baryTri)/calculateSignedArea(tri);
-}
-
-//LOOK: calculates barycentric coordinates
-__host__ __device__ glm::vec3 calculateBarycentricCoordinate(triangle tri, glm::vec2 point){
- float beta = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), point, glm::vec2(tri.p2.x,tri.p2.y), tri);
- float gamma = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), glm::vec2(tri.p1.x,tri.p1.y), point, tri);
- float alpha = 1.0-beta-gamma;
- return glm::vec3(alpha,beta,gamma);
-}
-
-//LOOK: checks if a barycentric coordinate is within the boundaries of a triangle
-__host__ __device__ bool isBarycentricCoordInBounds(glm::vec3 barycentricCoord){
- return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 &&
- barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 &&
- barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0;
-}
-
-//LOOK: for a given barycentric coordinate, return the corresponding z position on the triangle
-__host__ __device__ float getZAtCoordinate(glm::vec3 barycentricCoord, triangle tri){
- return -(barycentricCoord.x*tri.p0.z + barycentricCoord.y*tri.p1.z + barycentricCoord.z*tri.p2.z);
-}
-
+// CIS565 CUDA Rasterizer: A simple rasterization pipeline for Patrick Cozzi's CIS565: GPU Computing at the University of Pennsylvania
+// Written by Yining Karl Li, Copyright (c) 2012 University of Pennsylvania
+
+#ifndef RASTERIZETOOLS_H
+#define RASTERIZETOOLS_H
+
+#include
+#include "glm/glm.hpp"
+#include "utilities.h"
+#include "cudaMat4.h"
+
+struct triangle {
+ glm::vec3 p0;
+ glm::vec3 p1;
+ glm::vec3 p2;
+ glm::vec3 c0;
+ glm::vec3 c1;
+ glm::vec3 c2;
+ glm::vec3 n1;
+ glm::vec3 n2;
+ glm::vec3 n0;
+};
+
+struct fragment{
+ glm::vec3 color;
+ glm::vec3 normal;
+ glm::vec3 position;
+ int lock;
+};
+
+//Multiplies a cudaMat4 matrix and a vec4
+__host__ __device__ glm::vec3 multiplyMV(cudaMat4 m, glm::vec4 v){
+ glm::vec3 r(1,1,1);
+ r.x = (m.x.x*v.x)+(m.x.y*v.y)+(m.x.z*v.z)+(m.x.w*v.w);
+ r.y = (m.y.x*v.x)+(m.y.y*v.y)+(m.y.z*v.z)+(m.y.w*v.w);
+ r.z = (m.z.x*v.x)+(m.z.y*v.y)+(m.z.z*v.z)+(m.z.w*v.w);
+ return r;
+}
+
+
+//LOOK: finds the axis aligned bounding box for a given triangle
+__host__ __device__ void getAABBForTriangle(triangle tri, glm::vec3& minpoint, glm::vec3& maxpoint){
+ minpoint = glm::vec3(min(min(tri.p0.x, tri.p1.x),tri.p2.x),
+ min(min(tri.p0.y, tri.p1.y),tri.p2.y),
+ min(min(tri.p0.z, tri.p1.z),tri.p2.z));
+ maxpoint = glm::vec3(max(max(tri.p0.x, tri.p1.x),tri.p2.x),
+ max(max(tri.p0.y, tri.p1.y),tri.p2.y),
+ max(max(tri.p0.z, tri.p1.z),tri.p2.z));
+}
+
+//LOOK: calculates the signed area of a given triangle
+__host__ __device__ float calculateSignedArea(triangle tri){
+ return 0.5*((tri.p2.x - tri.p0.x)*(tri.p1.y - tri.p0.y) - (tri.p1.x - tri.p0.x)*(tri.p2.y - tri.p0.y));
+}
+
+//LOOK: helper function for calculating barycentric coordinates
+__host__ __device__ float calculateBarycentricCoordinateValue(glm::vec2 a, glm::vec2 b, glm::vec2 c, triangle tri){
+ triangle baryTri;
+ baryTri.p0 = glm::vec3(a,0); baryTri.p1 = glm::vec3(b,0); baryTri.p2 = glm::vec3(c,0);
+ return calculateSignedArea(baryTri)/calculateSignedArea(tri);
+}
+
+//LOOK: calculates barycentric coordinates
+__host__ __device__ glm::vec3 calculateBarycentricCoordinate(triangle tri, glm::vec2 point){
+ float beta = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), point, glm::vec2(tri.p2.x,tri.p2.y), tri);
+ float gamma = calculateBarycentricCoordinateValue(glm::vec2(tri.p0.x,tri.p0.y), glm::vec2(tri.p1.x,tri.p1.y), point, tri);
+ float alpha = 1.0-beta-gamma;
+ return glm::vec3(alpha,beta,gamma);
+}
+
+//LOOK: checks if a barycentric coordinate is within the boundaries of a triangle
+__host__ __device__ bool isBarycentricCoordInBounds(glm::vec3 barycentricCoord){
+ return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 &&
+ barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 &&
+ barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0;
+}
+
+//LOOK: for a given barycentric coordinate, return the corresponding z position on the triangle
+__host__ __device__ float getZAtCoordinate(glm::vec3 barycentricCoord, triangle tri){
+ return -(barycentricCoord.x*tri.p0.z + barycentricCoord.y*tri.p1.z + barycentricCoord.z*tri.p2.z);
+}
+
#endif
\ No newline at end of file
diff --git a/src/utilities.h b/src/utilities.h
index 3e6ef6e..2416a7f 100755
--- a/src/utilities.h
+++ b/src/utilities.h
@@ -1,44 +1,44 @@
-//UTILITYCORE- A Utility Library by Yining Karl Li
-//This file is part of UTILITYCORE, Coyright (c) 2012 Yining Karl Li
-
-#ifndef Pathtracer_utilities_h
-#define Pathtracer_utilities_h
-
-#include "glm/glm.hpp"
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include "cudaMat4.h"
-
-const float PI =3.1415926535897932384626422832795028841971;
-const float TWO_PI =6.2831853071795864769252867665590057683943;
-const float SQRT_OF_ONE_THIRD =0.5773502691896257645091487805019574556476;
-const float E =2.7182818284590452353602874713526624977572;
-const float EPSILON =.000000001;
-const float ZERO_ABSORPTION_EPSILON =0.00001;
-const float RAY_BIAS_AMOUNT =0.0002;
-
-namespace utilityCore {
- extern float clamp(float f, float min, float max);
- extern bool replaceString(std::string& str, const std::string& from, const std::string& to);
- extern glm::vec3 clampRGB(glm::vec3 color);
- extern bool epsilonCheck(float a, float b);
- extern std::vector tokenizeString(std::string str);
- extern cudaMat4 glmMat4ToCudaMat4(glm::mat4 a);
- extern glm::mat4 cudaMat4ToGlmMat4(cudaMat4 a);
- extern glm::mat4 buildTransformationMatrix(glm::vec3 translation, glm::vec3 rotation, glm::vec3 scale);
- extern void printCudaMat4(cudaMat4 m);
- extern std::string convertIntToString(int number);
-
- //-----------------------------
- //-------GLM Printers----------
- //-----------------------------
- extern void printMat4(glm::mat4);
- extern void printVec4(glm::vec4);
- extern void printVec3(glm::vec3);
-}
-#endif
+//UTILITYCORE- A Utility Library by Yining Karl Li
+//This file is part of UTILITYCORE, Coyright (c) 2012 Yining Karl Li
+
+#ifndef Pathtracer_utilities_h
+#define Pathtracer_utilities_h
+
+#include "glm/glm.hpp"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "cudaMat4.h"
+
+const float PI =3.1415926535897932384626422832795028841971;
+const float TWO_PI =6.2831853071795864769252867665590057683943;
+const float SQRT_OF_ONE_THIRD =0.5773502691896257645091487805019574556476;
+const float E =2.7182818284590452353602874713526624977572;
+const float EPSILON =.0001;
+const float ZERO_ABSORPTION_EPSILON =0.00001;
+const float RAY_BIAS_AMOUNT =0.0002;
+
+namespace utilityCore {
+ extern float clamp(float f, float min, float max);
+ extern bool replaceString(std::string& str, const std::string& from, const std::string& to);
+ extern glm::vec3 clampRGB(glm::vec3 color);
+ extern bool epsilonCheck(float a, float b);
+ extern std::vector tokenizeString(std::string str);
+ extern cudaMat4 glmMat4ToCudaMat4(glm::mat4 a);
+ extern glm::mat4 cudaMat4ToGlmMat4(cudaMat4 a);
+ extern glm::mat4 buildTransformationMatrix(glm::vec3 translation, glm::vec3 rotation, glm::vec3 scale);
+ extern void printCudaMat4(cudaMat4 m);
+ extern std::string convertIntToString(int number);
+
+ //-----------------------------
+ //-------GLM Printers----------
+ //-----------------------------
+ extern void printMat4(glm::mat4);
+ extern void printVec4(glm::vec4);
+ extern void printVec3(glm::vec3);
+}
+#endif