From aa70cafe5560bf2e44697bee1291aeafa490d1b9 Mon Sep 17 00:00:00 2001 From: Alonso Gutierrez Date: Fri, 4 Oct 2019 15:12:41 -0600 Subject: [PATCH 1/4] Archivo CSV --- .../recommendation/MovieRecommender.java | 85 +++++++++++++++++++ .../recommendation/MovieRecommenderTest.java | 10 +-- 2 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..0b044c3 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,85 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.*; +import java.util.*; +import java.util.zip.GZIPInputStream; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +public class MovieRecommender { + private String file; + private Hashtable HashProduct = new Hashtable(); + private Hashtable HashUser = new Hashtable(); + private int users =1, products =1, reviews = 0; + + public MovieRecommender(String file) throws IOException{ + this.file = file; + getData(); + } + + public String getData() throws IOException { + String thisProduct = null, thisUser = null; + File result = new File("Result.csv"); + InputStream fileReader = new GZIPInputStream(new FileInputStream(this.file)); + BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); + FileWriter fileWriter = new FileWriter(result); + BufferedWriter bw = new BufferedWriter(fileWriter); + String line; + String[] sp; + String key, value; + + while((line = br.readLine()) != null) { + if (line.length() >= 0) { + sp = line.split(" "); + key = sp[0]; + if (key.equals("product/productId:")) { + thisProduct = sp[1]; + if (!HashProduct.containsKey(thisProduct)){ + HashProduct.put(thisProduct,1); + products++; + } + }else if (key.equals("review/userId:")){ + thisUser = sp[1]; + if (!HashUser.containsKey(thisUser)){ + HashUser.put(thisUser,1); + users++; + } + }else if (key.equals("review/score:")){ + String score = sp[1]; + bw.write(thisUser + "," + thisProduct + "," + score + "\n"); + reviews ++; + } + } + } + br.close(); + bw.close(); + return result.getAbsolutePath(); + + + } + + public int getTotalReviews() { + return 0; + } + + public int getTotalProducts() { + return 0; + } + + public int getTotalUsers() { + return 0; + } + + public List getRecommendationsForUser(String a141HP4LYPWMSR) { + return null; + } +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..7c0ddec 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -13,18 +13,18 @@ public class MovieRecommenderTest { @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from + //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); - assertEquals(7911684, recommender.getTotalReviews()); + MovieRecommender recommender = new MovieRecommender("/Users/agutierrez/Documents/big-data/movies.txt.gz"); + /*assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); List recommendations = recommender.getRecommendationsForUser("A141HP4LYPWMSR"); assertThat(recommendations, hasItem("B0002O7Y8U")); assertThat(recommendations, hasItem("B00004CQTF")); - assertThat(recommendations, hasItem("B000063W82")); + assertThat(recommendations, hasItem("B000063W82"));*/ } -} +} \ No newline at end of file From f06a8ced468398c40a34927a59d3b28f31da1a45 Mon Sep 17 00:00:00 2001 From: Alonso Gutierrez Date: Fri, 4 Oct 2019 16:31:53 -0600 Subject: [PATCH 2/4] WIP Recommender --- .../recommendation/MovieRecommender.java | 59 +++++++++++++++---- .../recommendation/MovieRecommenderTest.java | 7 ++- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java index 0b044c3..a9597d2 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -1,6 +1,8 @@ package nearsoft.academy.bigdata.recommendation; import java.io.*; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.*; import java.util.zip.GZIPInputStream; @@ -19,7 +21,7 @@ public class MovieRecommender { private String file; private Hashtable HashProduct = new Hashtable(); private Hashtable HashUser = new Hashtable(); - private int users =1, products =1, reviews = 0; + private int users =0, products =0, reviews = 0; public MovieRecommender(String file) throws IOException{ this.file = file; @@ -27,7 +29,8 @@ public MovieRecommender(String file) throws IOException{ } public String getData() throws IOException { - String thisProduct = null, thisUser = null; + int thisProduct =0, thisUser =0; + Files.deleteIfExists(Paths.get("Result.csv")); File result = new File("Result.csv"); InputStream fileReader = new GZIPInputStream(new FileInputStream(this.file)); BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); @@ -42,16 +45,23 @@ public String getData() throws IOException { sp = line.split(" "); key = sp[0]; if (key.equals("product/productId:")) { - thisProduct = sp[1]; - if (!HashProduct.containsKey(thisProduct)){ - HashProduct.put(thisProduct,1); + value = sp[1]; + if (!HashProduct.containsKey(value)){ + HashProduct.put(value,products); + //HashProduct.put(thisProduct,1); + thisProduct = HashProduct.get(value); products++; + }else{ + thisProduct = HashProduct.get(value); } }else if (key.equals("review/userId:")){ - thisUser = sp[1]; - if (!HashUser.containsKey(thisUser)){ - HashUser.put(thisUser,1); + value = sp[1]; + if (!HashUser.containsKey(value)){ + HashUser.put(value, users); + thisUser = HashUser.get(value); users++; + }else{ + thisUser = HashUser.get(value); } }else if (key.equals("review/score:")){ String score = sp[1]; @@ -62,24 +72,47 @@ public String getData() throws IOException { } br.close(); bw.close(); - return result.getAbsolutePath(); + return null; } public int getTotalReviews() { - return 0; + return reviews; } public int getTotalProducts() { - return 0; + return products; } public int getTotalUsers() { - return 0; + return users; + } + + public List getRecommendationsForUser(String user) throws IOException, TasteException { + DataModel model = new FileDataModel(new File("Result.csv")); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + int userValue = HashUser.get(user); + + List RecommendedProducts = new ArrayList(); + List recommendations = recommender.recommend(userValue,3); + for (RecommendedItem recommendation : recommendations) { + RecommendedProducts.add(getKeyFromHT((int)recommendation.getItemID())); + } + return RecommendedProducts; } - public List getRecommendationsForUser(String a141HP4LYPWMSR) { + private String getKeyFromHT(int value) { + Enumeration e = HashProduct.keys(); + while (e.hasMoreElements()) { + String key = (String) e.nextElement(); + if (HashProduct.get(key)==value) { + return key; + } + } return null; } } diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 7c0ddec..2539e78 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,15 +15,16 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/Users/agutierrez/Documents/big-data/movies.txt.gz"); - /*assertEquals(7911684, recommender.getTotalReviews()); + MovieRecommender recommender = new MovieRecommender("/Users/agutierrez/Documents/big-data-exercises/movies.txt.gz"); + + assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); List recommendations = recommender.getRecommendationsForUser("A141HP4LYPWMSR"); assertThat(recommendations, hasItem("B0002O7Y8U")); assertThat(recommendations, hasItem("B00004CQTF")); - assertThat(recommendations, hasItem("B000063W82"));*/ + assertThat(recommendations, hasItem("B000063W82")); } From 90a25f9fdd28d37c22107c089d27a5fc52011dd1 Mon Sep 17 00:00:00 2001 From: Alonso Gutierrez Date: Sat, 5 Oct 2019 16:48:26 -0600 Subject: [PATCH 3/4] Solution... Finally --- pom.xml | 24 ++++++++++++++++++- .../recommendation/MovieRecommender.java | 16 +++---------- .../recommendation/MovieRecommenderTest.java | 2 +- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/pom.xml b/pom.xml index 8169ff7..3eb18aa 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,19 @@ nearsoft.academy big-data 1.0-SNAPSHOT - jar + + + + org.apache.maven.plugins + maven-compiler-plugin + + 7 + 7 + + + + + jar big-data http://maven.apache.org @@ -15,6 +27,16 @@ + + org.slf4j + slf4j-api + 1.7.5 + + + org.slf4j + slf4j-log4j12 + 1.7.5 + org.apache.mahout mahout-core diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java index a9597d2..10210c5 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -20,6 +20,7 @@ public class MovieRecommender { private String file; private Hashtable HashProduct = new Hashtable(); + private Hashtable InvertedHashProduct = new Hashtable(); private Hashtable HashUser = new Hashtable(); private int users =0, products =0, reviews = 0; @@ -48,7 +49,7 @@ public String getData() throws IOException { value = sp[1]; if (!HashProduct.containsKey(value)){ HashProduct.put(value,products); - //HashProduct.put(thisProduct,1); + InvertedHashProduct.put(products,value); thisProduct = HashProduct.get(value); products++; }else{ @@ -100,19 +101,8 @@ public List getRecommendationsForUser(String user) throws IOException, T List RecommendedProducts = new ArrayList(); List recommendations = recommender.recommend(userValue,3); for (RecommendedItem recommendation : recommendations) { - RecommendedProducts.add(getKeyFromHT((int)recommendation.getItemID())); + RecommendedProducts.add(InvertedHashProduct.get((int)recommendation.getItemID())); } return RecommendedProducts; } - - private String getKeyFromHT(int value) { - Enumeration e = HashProduct.keys(); - while (e.hasMoreElements()) { - String key = (String) e.nextElement(); - if (HashProduct.get(key)==value) { - return key; - } - } - return null; - } } diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 2539e78..aa4a2d6 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/Users/agutierrez/Documents/big-data-exercises/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("/Users/alonso/Documents/big-data-exercises/movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); From 43d7dcb10f2769cfdc91b64f1f645cc8f87fbbd1 Mon Sep 17 00:00:00 2001 From: Alonso Gutierrez Date: Wed, 9 Oct 2019 12:34:23 -0600 Subject: [PATCH 4/4] Refactor based on feedback --- .../recommendation/MovieRecommender.java | 79 ++++++++++--------- .../recommendation/MovieRecommenderTest.java | 2 +- 2 files changed, 41 insertions(+), 40 deletions(-) diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java index 10210c5..1103432 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -19,63 +19,64 @@ public class MovieRecommender { private String file; - private Hashtable HashProduct = new Hashtable(); - private Hashtable InvertedHashProduct = new Hashtable(); - private Hashtable HashUser = new Hashtable(); - private int users =0, products =0, reviews = 0; + private HashMap HashProduct = new HashMap(); + private HashMap InvertedHashProduct = new HashMap(); + private HashMap HashUser = new HashMap(); + private int users = 0, products = 0, reviews = 0; public MovieRecommender(String file) throws IOException{ this.file = file; getData(); } - public String getData() throws IOException { + private void getData() throws IOException { int thisProduct =0, thisUser =0; Files.deleteIfExists(Paths.get("Result.csv")); File result = new File("Result.csv"); InputStream fileReader = new GZIPInputStream(new FileInputStream(this.file)); - BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); + //BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); FileWriter fileWriter = new FileWriter(result); - BufferedWriter bw = new BufferedWriter(fileWriter); + //BufferedWriter bw = new BufferedWriter(fileWriter); String line; String[] sp; String key, value; - while((line = br.readLine()) != null) { - if (line.length() >= 0) { - sp = line.split(" "); - key = sp[0]; - if (key.equals("product/productId:")) { - value = sp[1]; - if (!HashProduct.containsKey(value)){ - HashProduct.put(value,products); - InvertedHashProduct.put(products,value); - thisProduct = HashProduct.get(value); - products++; - }else{ - thisProduct = HashProduct.get(value); + try (BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); + BufferedWriter bw = new BufferedWriter(fileWriter)) { + System.out.println("Reading File: "+this.file); + while((line = br.readLine()) != null) { + if (line.length() >= 0) { + sp = line.split(":"); + key = sp[0]; + if ("product/productId".equals(key)) { + value = sp[1].trim(); + if (!HashProduct.containsKey(value)){ + HashProduct.put(value,products); + InvertedHashProduct.put(products,value); + thisProduct = HashProduct.get(value); + products++; + }else{ + thisProduct = HashProduct.get(value); + } + }else if ("review/userId".equals(key)){ + value = sp[1].trim(); + if (!HashUser.containsKey(value)){ + HashUser.put(value, users); + thisUser = HashUser.get(value); + users++; + }else{ + thisUser = HashUser.get(value); + } + }else if ("review/score".equals(key)){ + String score = sp[1].trim(); + bw.write(thisUser + "," + thisProduct + "," + score + "\n"); + reviews ++; } - }else if (key.equals("review/userId:")){ - value = sp[1]; - if (!HashUser.containsKey(value)){ - HashUser.put(value, users); - thisUser = HashUser.get(value); - users++; - }else{ - thisUser = HashUser.get(value); - } - }else if (key.equals("review/score:")){ - String score = sp[1]; - bw.write(thisUser + "," + thisProduct + "," + score + "\n"); - reviews ++; } } + } catch (FileNotFoundException e) { + e.printStackTrace(); } - br.close(); - bw.close(); - return null; - - } public int getTotalReviews() { @@ -98,7 +99,7 @@ public List getRecommendationsForUser(String user) throws IOException, T int userValue = HashUser.get(user); - List RecommendedProducts = new ArrayList(); + List RecommendedProducts = new ArrayList(3); List recommendations = recommender.recommend(userValue,3); for (RecommendedItem recommendation : recommendations) { RecommendedProducts.add(InvertedHashProduct.get((int)recommendation.getItemID())); diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index aa4a2d6..e1ea81b 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/Users/alonso/Documents/big-data-exercises/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts());