Skip to content

Commit 53e179a

Browse files
committed
Add SVM Project files for OpenMP
1 parent 84983ab commit 53e179a

15 files changed

+33571
-0
lines changed

Diff for: Project/SVM-OpenMP/.gitignore

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Build directory
2+
build/
3+
4+
# Prerequisites
5+
*.d
6+
7+
# Compiled Object files
8+
*.slo
9+
*.lo
10+
*.o
11+
*.obj
12+
13+
# Precompiled Headers
14+
*.gch
15+
*.pch
16+
17+
# Compiled Dynamic libraries
18+
*.so
19+
*.dylib
20+
*.dll
21+
22+
# Fortran module files
23+
*.mod
24+
*.smod
25+
26+
# Compiled Static libraries
27+
*.lai
28+
*.la
29+
*.a
30+
*.lib
31+
32+
# Executables
33+
*.exe
34+
*.out
35+
*.app

Diff for: Project/SVM-OpenMP/datasets/a1a

+1,605
Large diffs are not rendered by default.

Diff for: Project/SVM-OpenMP/datasets/a1a.t

+30,956
Large diffs are not rendered by default.

Diff for: Project/SVM-OpenMP/datasets/datasets.metadata

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
1. Dataset Name: a1a
2+
3+
Source: UCI/Adult
4+
5+
# of classes : 2
6+
# of data : 1605/30956 (testing)
7+
# of features : 123/123 (testing)

Diff for: Project/SVM-OpenMP/include/classifier.hpp

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#ifndef CLASSIFIER_HPP
2+
#define CLASSIFIER_HPP
3+
4+
#include <vector>
5+
using namespace std;
6+
7+
#include "dataset.hpp"
8+
9+
/* Classifier base class from which all classifiers derive */
10+
template <typename T>
11+
class Classifier
12+
{
13+
// <define the parameters of the derived classifiers here>
14+
15+
public:
16+
// Fit the model with the dataset to learn parameters
17+
virtual void fit(const Dataset<T>& dataset) = 0;
18+
19+
// Predict the class label for test data
20+
virtual int predict(const vector<T>& data) = 0;
21+
22+
// Predict the class labels for test dataset
23+
vector<int> predict(const Dataset<T>& dataset);
24+
};
25+
26+
/* Support Vector Machine Classification Model */
27+
template <typename T>
28+
class SVC: public Classifier<T>
29+
{
30+
// Parameters of the SVC classifier: C, tol, Kernel function
31+
double C, tol, (*kernelFunction)(vector<T>, vector<T>), eps;
32+
33+
// Dataset with which model was fitted (used in prediction)
34+
Dataset<T> dataset;
35+
36+
// Parameters to be learnt from dataset: alphas[n_data], b
37+
double *alphas, b;
38+
39+
// error between prediction f(x) and actual value for examples in training data
40+
double *errors;
41+
42+
// Helper functions to find alpha i (given alpha j) and update alpha i, alpha j pair
43+
int findUpdateAlphaPair(int alpha2_index);
44+
int updateAlphaPair(int alpha1_index, int alpha2_index);
45+
46+
// Syncronization variables to indicate that alpha i that can make positive progress was found
47+
int valid_alpha1_found;
48+
49+
public:
50+
51+
// Initialize the parameters for SVM Classifier
52+
SVC(double C, double tol, double eps, double (*kernelFunction)(vector<T>, vector<T>));
53+
54+
void fit(const Dataset<T>& dataset);
55+
int predict(const vector<T>& data);
56+
57+
using Classifier<T>::predict;
58+
};
59+
60+
#endif

Diff for: Project/SVM-OpenMP/include/dataset.hpp

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#ifndef DATASET_HPP
2+
#define DATASET_HPP
3+
4+
#include <vector>
5+
using namespace std;
6+
7+
/* An classification dataset with exclusively numerical features */
8+
template <typename T>
9+
class Dataset
10+
{
11+
public:
12+
vector<int> target;
13+
vector<vector<T> > data;
14+
15+
int n_data;
16+
int n_features;
17+
};
18+
19+
/* Read a classification dataset from a libsvm file format */
20+
template <typename T>
21+
Dataset<T> readLibsvmDataset(const char *filename, int n_data, int n_features);
22+
23+
#endif

Diff for: Project/SVM-OpenMP/include/kernels.hpp

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/* The file contains various default kernels for use in SVM classifier */
2+
#ifndef KERNELS_HPP
3+
#define KERNELS_HPP
4+
5+
#include <vector>
6+
using namespace std;
7+
8+
template <typename T>
9+
double dotProduct(vector<T>, vector<T>);
10+
11+
#endif

Diff for: Project/SVM-OpenMP/include/validation.hpp

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#ifndef VALIDATION_HPP
2+
#define VALIDATION_HPP
3+
4+
#include <vector>
5+
using namespace std;
6+
7+
// Computes the accuracy of the predictions by the classifier against true target values
8+
double computeAccuracy(const vector<int>& target, const vector<int>& predictions);
9+
10+
// Computes the precision of the predictions by the classifier against true target values
11+
double computePrecision(const vector<int>& target, const vector<int>& predictions);
12+
13+
// Computes the recall of the predictions by the classifier against true target values
14+
double computeRecall(const vector<int>& target, const vector<int>& predictions);
15+
16+
// Computes the F1-score of the predictions by the classifier against true target values
17+
double computeF1Score(const vector<int>& target, const vector<int>& predictions);
18+
19+
#endif

Diff for: Project/SVM-OpenMP/makefile

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
CXX = g++
2+
LD = g++
3+
4+
# path #
5+
SRC_PATH = src
6+
BUILD_PATH = build
7+
8+
TEST_PATH = test
9+
BIN_PATH = $(BUILD_PATH)/bin
10+
11+
# code lists #
12+
SOURCES = $(wildcard $(SRC_PATH)/*.cpp)
13+
OBJECTS = $(SOURCES:$(SRC_PATH)/%.cpp=$(BUILD_PATH)/%.o)
14+
DEPENDENCIES = $(OBJECTS:%.o=%.d)
15+
16+
TEST_SOURCES = $(wildcard $(TEST_PATH)/*.cpp)
17+
TEST_OBJECTS = $(TEST_SOURCES:$(TEST_PATH)/%.cpp=$(BUILD_PATH)/%.o)
18+
TEST_DEPENDENCIES = $(TEST_OBJECTS:%.o=%.d)
19+
20+
BINARIES = $(TEST_SOURCES:$(TEST_PATH)/%.cpp=$(BIN_PATH)/%)
21+
22+
# flags #
23+
INCLUDES = -I include
24+
25+
CXXFLAGS = -Wall -std=c++11 -fopenmp
26+
LDFLAGS = -fopenmp
27+
28+
29+
30+
.PHONY: directories
31+
all: directories program
32+
33+
-include $(DEPENDENCIES) $(TEST_DEPENDENCIES)
34+
35+
# ----------------------------------------------------- #
36+
37+
directories: $(BUILD_PATH) $(BIN_PATH)
38+
39+
$(BUILD_PATH):
40+
mkdir -p $(BUILD_PATH)
41+
42+
$(BIN_PATH):
43+
mkdir -p $(BIN_PATH)
44+
45+
# ----------------------------------------------------- #
46+
47+
.PHONY: program
48+
program: $(BINARIES)
49+
50+
$(BIN_PATH)/%: $(OBJECTS) $(BUILD_PATH)/%.o
51+
$(LD) $(LDFLAGS) $^ -o $@
52+
53+
$(BUILD_PATH)/%.o: $(SRC_PATH)/%.cpp
54+
$(CXX) $(CXXFLAGS) $(INCLUDES) -MMD -c $< -o $@
55+
56+
$(BUILD_PATH)/%.o: $(TEST_PATH)/%.cpp
57+
$(CXX) $(CXXFLAGS) $(INCLUDES) -MMD -c $< -o $@
58+
59+
# ----------------------------------------------------- #
60+
61+
.PHONY: clean
62+
clean:
63+
rm -rf $(BIN_PATH)
64+
rm -rf $(BUILD_PATH)
65+
66+
# ----------------------------------------------------- #
67+
68+
.SECONDARY: $(OBJECTS) $(TEST_OBJECTS)
69+
70+
# ----------------------------------------------------- #

Diff for: Project/SVM-OpenMP/src/classifier.cpp

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include "classifier.hpp"
2+
3+
template <typename T>
4+
vector<int> Classifier<T>::predict(const Dataset<T>& dataset)
5+
{
6+
int prediction;
7+
vector<int> predictions;
8+
9+
for(int i=0; i<(int)dataset.data.size(); ++i)
10+
{
11+
prediction = predict(dataset.data[i]);
12+
predictions.push_back(prediction);
13+
}
14+
15+
return predictions;
16+
}
17+
18+
// Explicit instantiation of templated class and function
19+
template class Classifier<int>;
20+
template class Classifier<float>;
21+
template class Classifier<double>;

Diff for: Project/SVM-OpenMP/src/dataset.cpp

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#include "dataset.hpp"
2+
3+
#include <fstream>
4+
#include <string>
5+
6+
/* Read a classification dataset from a libsvm file format */
7+
template <typename T>
8+
Dataset<T> readLibsvmDataset(const char *filename, int n_data, int n_features)
9+
{
10+
Dataset<T> dataset;
11+
12+
ifstream file(filename);
13+
if(file.is_open())
14+
{
15+
string line, feature;
16+
size_t current_pos, delimiter_pos, feature_delimiter_pos;
17+
18+
int i = 0, target, feature_index;
19+
T feature_value;
20+
vector<T> feature_vector(n_features);
21+
22+
while((i < n_data || n_data == -1) && getline(file, line))
23+
{
24+
fill(feature_vector.begin(), feature_vector.end(), 0);
25+
26+
delimiter_pos = line.find(' ');
27+
target = stoi(line.substr(0, delimiter_pos));
28+
29+
current_pos = delimiter_pos + 1;
30+
31+
// skip the spaces after target
32+
while((current_pos < line.length()) && (line[current_pos] == ' '))
33+
current_pos++;
34+
35+
while((delimiter_pos != string::npos) && (current_pos < line.length()))
36+
{
37+
delimiter_pos = line.find(' ', current_pos);
38+
feature = line.substr(current_pos, delimiter_pos - current_pos);
39+
40+
feature_delimiter_pos = feature.find(':');
41+
42+
feature_index = stoi(feature.substr(0, feature_delimiter_pos));
43+
feature_value = (T)stod(feature.substr(feature_delimiter_pos + 1));
44+
feature_vector[feature_index-1] = feature_value;
45+
46+
current_pos = delimiter_pos + 1;
47+
}
48+
49+
dataset.data.push_back(feature_vector);
50+
dataset.target.push_back(target);
51+
52+
i++;
53+
}
54+
55+
dataset.n_data = i;
56+
dataset.n_features = n_features;
57+
}
58+
59+
return dataset;
60+
}
61+
62+
// Explicit declaration of templated class and function
63+
template class Dataset<int>;
64+
template Dataset<int> readLibsvmDataset<int>(const char *, int, int);
65+
66+
template class Dataset<float>;
67+
template Dataset<float> readLibsvmDataset<float>(const char *, int, int);
68+
69+
template class Dataset<double>;
70+
template Dataset<double> readLibsvmDataset<double>(const char *, int, int);

Diff for: Project/SVM-OpenMP/src/kernels.cpp

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#include "kernels.hpp"
2+
3+
/* All Kernels assume that both vectors have same length */
4+
5+
6+
// Perform the for product of two feature vectors
7+
template <typename T>
8+
double dotProduct(vector<T> feature_vector1, vector<T> feature_vector2)
9+
{
10+
int i;
11+
12+
double dot_product = 0;
13+
14+
// #pragma omp parallel for private(i) reduction(+:dot_product)
15+
for(i=0; i<(int)feature_vector1.size(); ++i)
16+
dot_product += feature_vector1[i] * feature_vector2[i];
17+
18+
return dot_product;
19+
}
20+
21+
template double dotProduct(vector<int>, vector<int>);
22+
template double dotProduct(vector<float>, vector<float>);
23+
template double dotProduct(vector<double>, vector<double>);

0 commit comments

Comments
 (0)