diff --git a/pom.xml b/pom.xml
index 8169ff7..abb3399 100644
--- a/pom.xml
+++ b/pom.xml
@@ -12,19 +12,32 @@
UTF-8
+
org.apache.mahout
- mahout-core
- 0.9
+ mahout-mr
+ 0.10.0
+
junit
junit
4.7
test
+
+ org.junit.jupiter
+ junit-jupiter
+ RELEASE
+ test
+
+
+
+
+
+
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
new file mode 100644
index 0000000..1f91313
--- /dev/null
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
@@ -0,0 +1,142 @@
+package nearsoft.academy.bigdata.recommendation;
+
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
+import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import java.io.*;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+
+
+public class MovieRecommender {
+
+ private static final String OUTPUT_FILEPATH = "movies.csv";
+ private static final String PRODUCT_KEY = "product/productId: ";
+ private static final String USER_KEY = "review/userId: ";
+ private static final String SCORE_KEY = "review/score: ";
+ private int totalReviews;
+ private HashMap HMusers;
+ private HashBiMap HMproducts;
+ private Recommender recommender;
+
+
+
+
+ public MovieRecommender (String filePath) throws IOException, TasteException {
+ this.HMusers = new HashMap();
+ this.HMproducts = HashBiMap.create();
+ File csvFile = generateCSV(filePath);
+ createRecommender(csvFile);
+ }
+
+ private File generateCSV(String filePath) throws IOException {
+
+ BufferedReader reader = getGzipReader(new File(filePath));
+ FileWriter writer = new FileWriter(OUTPUT_FILEPATH);
+ String currentLine;
+ String csvLine = "";
+ int currentProduct = 0;
+
+ while((currentLine = reader.readLine()) != null) {
+ if (currentLine.startsWith(PRODUCT_KEY)) {
+ String productId = currentLine.substring(19);
+ if (!this.HMproducts.containsKey(productId)) {
+ this.HMproducts.put(productId, this.HMproducts.size());
+ }
+ currentProduct = this.HMproducts.get(productId);
+ }
+
+ else if (currentLine.startsWith(USER_KEY)) {
+ String userId = currentLine.substring(15);
+ if (!this.HMusers.containsKey(userId)){
+ this.HMusers.put(userId, HMusers.size());
+ }
+ this.totalReviews++;
+ csvLine = this.HMusers.get(userId) + "," + currentProduct + ",";
+ }
+
+ else if (currentLine.startsWith(SCORE_KEY)) {
+ double score = Double.parseDouble(currentLine.substring(14));
+ csvLine += score + "\n";
+ writer.write(csvLine);
+ writer.flush();
+ }
+
+
+
+ }
+
+ reader.close();
+ writer.close();
+
+
+
+ return new File(OUTPUT_FILEPATH);
+ }
+
+
+
+ public void createRecommender(File csvFile) throws TasteException, IOException {
+ DataModel model = new FileDataModel(csvFile);
+ UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
+ UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
+ this.recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
+ }
+
+ public List getRecommendationsForUser (String userId) throws TasteException {
+ int userIdInt = this.HMusers.get(userId);
+ List recommendations = this.recommender.recommend(userIdInt, 3);
+ return getRecommendationsIds(recommendations);
+ }
+
+private List getRecommendationsIds(List recommendations){
+ BiMap invertedProducts = this.HMproducts.inverse();
+ ArrayList productsIds = new ArrayList();
+ for (RecommendedItem recommendation : recommendations){
+ int recommendationId = (int) recommendation.getItemID();
+ productsIds.add(invertedProducts.get(recommendationId));
+ }
+ return productsIds;
+}
+
+
+
+
+ public int getTotalReviews(){
+ return totalReviews;
+ }
+
+ public int getTotalProducts(){
+ return HMproducts.size();
+ }
+
+ public int getTotalUsers(){
+ return HMusers.size();
+ }
+
+
+
+ private BufferedReader getGzipReader(File filePath) throws IOException {
+ InputStream fileStream = new FileInputStream(filePath);
+ InputStream gzipStream = new GZIPInputStream(fileStream);
+ Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
+
+ return new BufferedReader(decoder);
+ }
+
+
+
+}
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
index 0d0b1fe..fada92d 100644
--- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
@@ -15,7 +15,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
- MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
+ MovieRecommender recommender = new MovieRecommender("/Users/hzamorano/big-data-exercises/src/test/java/nearsoft/academy/bigdata/recommendation/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());