diff --git a/pom.xml b/pom.xml index 8169ff7..abb3399 100644 --- a/pom.xml +++ b/pom.xml @@ -12,19 +12,32 @@ UTF-8 + org.apache.mahout - mahout-core - 0.9 + mahout-mr + 0.10.0 + junit junit 4.7 test + + org.junit.jupiter + junit-jupiter + RELEASE + test + + + + + + diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..1f91313 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,142 @@ +package nearsoft.academy.bigdata.recommendation; + +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.Recommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import java.io.*; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.zip.GZIPInputStream; + + +public class MovieRecommender { + + private static final String OUTPUT_FILEPATH = "movies.csv"; + private static final String PRODUCT_KEY = "product/productId: "; + private static final String USER_KEY = "review/userId: "; + private static final String SCORE_KEY = "review/score: "; + private int totalReviews; + private HashMap HMusers; + private HashBiMap HMproducts; + private Recommender recommender; + + + + + public MovieRecommender (String filePath) throws IOException, TasteException { + this.HMusers = new HashMap(); + this.HMproducts = HashBiMap.create(); + File csvFile = generateCSV(filePath); + createRecommender(csvFile); + } + + private File generateCSV(String filePath) throws IOException { + + BufferedReader reader = getGzipReader(new File(filePath)); + FileWriter writer = new FileWriter(OUTPUT_FILEPATH); + String currentLine; + String csvLine = ""; + int currentProduct = 0; + + while((currentLine = reader.readLine()) != null) { + if (currentLine.startsWith(PRODUCT_KEY)) { + String productId = currentLine.substring(19); + if (!this.HMproducts.containsKey(productId)) { + this.HMproducts.put(productId, this.HMproducts.size()); + } + currentProduct = this.HMproducts.get(productId); + } + + else if (currentLine.startsWith(USER_KEY)) { + String userId = currentLine.substring(15); + if (!this.HMusers.containsKey(userId)){ + this.HMusers.put(userId, HMusers.size()); + } + this.totalReviews++; + csvLine = this.HMusers.get(userId) + "," + currentProduct + ","; + } + + else if (currentLine.startsWith(SCORE_KEY)) { + double score = Double.parseDouble(currentLine.substring(14)); + csvLine += score + "\n"; + writer.write(csvLine); + writer.flush(); + } + + + + } + + reader.close(); + writer.close(); + + + + return new File(OUTPUT_FILEPATH); + } + + + + public void createRecommender(File csvFile) throws TasteException, IOException { + DataModel model = new FileDataModel(csvFile); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + this.recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + } + + public List getRecommendationsForUser (String userId) throws TasteException { + int userIdInt = this.HMusers.get(userId); + List recommendations = this.recommender.recommend(userIdInt, 3); + return getRecommendationsIds(recommendations); + } + +private List getRecommendationsIds(List recommendations){ + BiMap invertedProducts = this.HMproducts.inverse(); + ArrayList productsIds = new ArrayList(); + for (RecommendedItem recommendation : recommendations){ + int recommendationId = (int) recommendation.getItemID(); + productsIds.add(invertedProducts.get(recommendationId)); + } + return productsIds; +} + + + + + public int getTotalReviews(){ + return totalReviews; + } + + public int getTotalProducts(){ + return HMproducts.size(); + } + + public int getTotalUsers(){ + return HMusers.size(); + } + + + + private BufferedReader getGzipReader(File filePath) throws IOException { + InputStream fileStream = new FileInputStream(filePath); + InputStream gzipStream = new GZIPInputStream(fileStream); + Reader decoder = new InputStreamReader(gzipStream, "UTF-8"); + + return new BufferedReader(decoder); + } + + + +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..fada92d 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("/Users/hzamorano/big-data-exercises/src/test/java/nearsoft/academy/bigdata/recommendation/movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers());