diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..fa78956
Binary files /dev/null and b/.DS_Store differ
diff --git a/.classpath b/.classpath
new file mode 100644
index 0000000..d13e9b0
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,35 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..364472d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+/target/
+data/dataset.csv
+data/movies.txt
+data/movies.txt.gz
diff --git a/.project b/.project
new file mode 100644
index 0000000..1d2c2d6
--- /dev/null
+++ b/.project
@@ -0,0 +1,23 @@
+
+
+ academy-exercises
+
+
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.m2e.core.maven2Builder
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+ org.eclipse.m2e.core.maven2Nature
+
+
diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..8dd9b1d
--- /dev/null
+++ b/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,3 @@
+eclipse.preferences.version=1
+encoding//src/test/java=UTF-8
+encoding/=UTF-8
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..5723a0f
--- /dev/null
+++ b/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,8 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.5
diff --git a/lib/commons-math3-3.2.jar b/lib/commons-math3-3.2.jar
new file mode 100644
index 0000000..f8b7db2
Binary files /dev/null and b/lib/commons-math3-3.2.jar differ
diff --git a/lib/guava-15.0.jar b/lib/guava-15.0.jar
new file mode 100644
index 0000000..eb9ef8a
Binary files /dev/null and b/lib/guava-15.0.jar differ
diff --git a/lib/mahout-core-0.8.jar b/lib/mahout-core-0.8.jar
new file mode 100644
index 0000000..001ec4c
Binary files /dev/null and b/lib/mahout-core-0.8.jar differ
diff --git a/lib/mahout-integration-0.8.jar b/lib/mahout-integration-0.8.jar
new file mode 100644
index 0000000..9458dcf
Binary files /dev/null and b/lib/mahout-integration-0.8.jar differ
diff --git a/lib/mahout-math-0.8.jar b/lib/mahout-math-0.8.jar
new file mode 100644
index 0000000..395009a
Binary files /dev/null and b/lib/mahout-math-0.8.jar differ
diff --git a/lib/slf4j-api-1.7.5.jar b/lib/slf4j-api-1.7.5.jar
new file mode 100644
index 0000000..8f004d3
Binary files /dev/null and b/lib/slf4j-api-1.7.5.jar differ
diff --git a/lib/slf4j-nop-1.7.5.jar b/lib/slf4j-nop-1.7.5.jar
new file mode 100644
index 0000000..42f5c15
Binary files /dev/null and b/lib/slf4j-nop-1.7.5.jar differ
diff --git a/pom.xml b/pom.xml
index 8169ff7..95af77d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -27,4 +27,17 @@
test
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 2.12.4
+
+ false
+
+
+
+
diff --git a/readme.md b/readme.md
index ce4dc89..2d8d524 100644
--- a/readme.md
+++ b/readme.md
@@ -4,17 +4,18 @@ This repo contains several common big data exercises.
* **MovieRecommender** Uses Amazon movie reviews sample data [stanford.edu/data/web-Movies.html](http://snap.stanford.edu/data/web-Movies.html) for a simple movie recommender
-
-
-
## Setup
1. Install the JDK 7.0
2. [Download & Install Maven](http://maven.apache.org/download.cgi)
-
+ -Import .jar files from lib/ folder to the project
+3. The data file movies.txt.gz need to be in data/ folder
+4. Execute MovieRecommenderTest to compile and generate the dataset.csv from movies.txt.gz
+5. Wait
## How to run tests
#from the repository root
mvn test
+
diff --git a/src/.DS_Store b/src/.DS_Store
new file mode 100644
index 0000000..a58d1bd
Binary files /dev/null and b/src/.DS_Store differ
diff --git a/src/test/.DS_Store b/src/test/.DS_Store
new file mode 100644
index 0000000..8ade123
Binary files /dev/null and b/src/test/.DS_Store differ
diff --git a/src/test/java/.DS_Store b/src/test/java/.DS_Store
new file mode 100644
index 0000000..cc10eb8
Binary files /dev/null and b/src/test/java/.DS_Store differ
diff --git a/src/test/java/nearsoft/.DS_Store b/src/test/java/nearsoft/.DS_Store
new file mode 100644
index 0000000..fc93bd5
Binary files /dev/null and b/src/test/java/nearsoft/.DS_Store differ
diff --git a/src/test/java/nearsoft/academy/.DS_Store b/src/test/java/nearsoft/academy/.DS_Store
new file mode 100644
index 0000000..f565452
Binary files /dev/null and b/src/test/java/nearsoft/academy/.DS_Store differ
diff --git a/src/test/java/nearsoft/academy/bigdata/.DS_Store b/src/test/java/nearsoft/academy/bigdata/.DS_Store
new file mode 100644
index 0000000..2556cee
Binary files /dev/null and b/src/test/java/nearsoft/academy/bigdata/.DS_Store differ
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
new file mode 100644
index 0000000..ee781e1
--- /dev/null
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
@@ -0,0 +1,131 @@
+package nearsoft.academy.bigdata.recommendation;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
+import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+public class MovieRecommender {
+ // Recommender model
+ private DataModel model;
+
+ // Totals
+ private int totalReviews = 0, totalProducts = 0, totalUsers = 0;
+
+ // HashMaps for objects
+ private Map products = new HashMap();
+ private Map users = new HashMap();
+ private Map productsInverted = new HashMap();
+
+ public MovieRecommender(String path) throws IOException {
+ // Get file
+ FileInputStream rawFile = new FileInputStream(path);
+ // Decompress file
+ GZIPInputStream decompressedFile = new GZIPInputStream(rawFile);
+ // Pass to stream
+ InputStreamReader streamFile = new InputStreamReader(decompressedFile);
+
+ // Read streamed file
+ BufferedReader txtFile = new BufferedReader(streamFile);
+ // Create CSV file
+ FileWriter fileWriter = new FileWriter(System.getProperty("user.dir")+"/data/dataset.csv");
+
+
+ String[] requiredFields = {"product/productId:", "review/userId:", "review/score:"};
+ String productId = "";
+ String userId = "";
+ String score = "";
+ String row;
+ while ((row = txtFile.readLine()) != null) {
+
+ if(row.contains(requiredFields[0])) {
+ productId = row.split(" ")[1];
+
+ if (this.products.get(productId) == null) {
+ this.totalProducts++;
+ this.products.put(productId, this.totalProducts);
+ this.productsInverted.put(this.totalProducts, productId);
+ }
+ } else if(row.contains(requiredFields[1])) {
+ userId = row.split(" ")[1];
+
+ if (this.users.get(userId) == null) {
+ this.totalUsers++;
+ this.users.put(userId, this.totalUsers);
+ }
+ } else if(row.contains(requiredFields[2])) {
+ score = row.split(" ")[1];
+ this.totalReviews++;
+ }
+
+ if ((productId != "") && (userId != "") && (score != "")) {
+ fileWriter.write(
+ this.users.get(userId) + "," +
+ this.products.get(productId) + "," +
+ score + "\n"
+ );
+ productId = "";
+ score = "";
+ userId = "";
+
+ }
+
+ }
+ fileWriter.close();
+ txtFile.close();
+ System.out.println("ALL OK");
+ }
+
+ public List getRecommendationsForUser(String userId) throws IOException, TasteException {
+ this.model = new FileDataModel(new File("data/dataset.csv"));
+
+ UserSimilarity simalirity = new PearsonCorrelationSimilarity(this.model);
+ UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, simalirity, this.model);
+ UserBasedRecommender recommender = new GenericUserBasedRecommender(this.model, neighborhood, simalirity);
+
+ List recommendations = new ArrayList();
+
+ long user = users.get(userId);
+
+ List recommendationsItems = recommender.recommend(user, 3);
+
+ for (RecommendedItem recommendation : recommendationsItems) {
+ int productId = (int) recommendation.getItemID();
+ recommendations.add(productsInverted.get(productId));
+ }
+
+ return recommendations;
+ }
+
+ public int getTotalReviews() {
+ return this.totalReviews;
+ }
+
+ public int getTotalProducts() {
+ return this.totalProducts;
+ }
+
+ public int getTotalUsers() {
+ return this.totalUsers;
+ }
+
+}
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
index 0d0b1fe..60c6e10 100644
--- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
@@ -15,7 +15,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
- MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
+ MovieRecommender recommender = new MovieRecommender("data/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());