diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2daced1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.idea/ +*.iml +*.cvc +target/ +*.DS_Store \ No newline at end of file diff --git a/MovieRecommender.java b/MovieRecommender.java new file mode 100644 index 0000000..047cc2a --- /dev/null +++ b/MovieRecommender.java @@ -0,0 +1,96 @@ +package nearsoft.academy.bigdata.recommendation; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; +import java.io.*; +import java.util.*; + + +public class MovieRecommender { + private final String TEMP_FILE = "/Users/anarobles/Desktop/dataset.csv"; + private int usersCount = 0, productsCount = 0, reviewsCount = 0, u=0, n=0; + private HashMap users = new HashMap(); + HashMap products = new HashMap(); + HashMap inverseHash = new HashMap(); + //private Set users = new HashSet<>(); Set products = new HashSet<>(); + + public MovieRecommender(String sourcePath) throws IOException { + + + String userId = "", productId = "", score = ""; + String line; + + BufferedReader bufferedReader = new BufferedReader(new FileReader(sourcePath)); + File outputFileStream = new File(TEMP_FILE); + BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFileStream)); + System.out.println("Buffers inicializados"); + + while ((line = bufferedReader.readLine()) != null) { + + if (line.contains("product/productId")) { + productId = line.substring(19, 29); + if (!products.containsKey(productId)) { + productsCount++; + products.put(productId, productsCount); + inverseHash.put(productsCount, productId); + n = productsCount; + } else{ + n = products.get(productId); + } + } + + if (line.contains("review/userId")) { + userId = line.substring(15); + if (!users.containsKey(userId)) { + usersCount++; + users.put(userId, usersCount); + u=usersCount; + }else{ + u=users.get(userId); + } + + } + if (line.contains("review/score")) { + score = line.substring(14); + reviewsCount++; + bufferedWriter.write(u + "," + n + "," + score + "\n"); + } + } + bufferedWriter.close(); + bufferedReader.close(); + System.out.println("Archivo creado"); + + } + + List getRecommendationsForUser(String UserId) throws IOException, TasteException { + DataModel model = new FileDataModel(new File(TEMP_FILE)); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + //System.out.println(Integer.parseInt(userId)); + List recommendations = recommender.recommend(users.get(UserId),3); + List recommendationsForUser = new ArrayList(); + for (RecommendedItem recommendation : recommendations) { + recommendationsForUser.add(inverseHash.get((int)recommendation.getItemID())); + } + return recommendationsForUser; + } + + public int getTotalUsers(){ + return usersCount; + } + public int getTotalProducts(){ + return productsCount; + } + public int getTotalReviews(){ + return reviewsCount; + } + +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 8169ff7..9ff62ec 100644 --- a/pom.xml +++ b/pom.xml @@ -1,17 +1,16 @@ 4.0.0 - nearsoft.academy big-data 1.0-SNAPSHOT jar - big-data http://maven.apache.org - UTF-8 + 14 + 14 @@ -20,6 +19,11 @@ mahout-core 0.9 + + org.apache.mahout + mahout-mr + 0.13.0 + junit junit @@ -27,4 +31,5 @@ test - + + \ No newline at end of file diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..04fe427 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -13,9 +13,9 @@ public class MovieRecommenderTest { @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from + //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("/Users/anarobles/Downloads/movies.txt"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); @@ -27,4 +27,4 @@ public void testDataInfo() throws IOException, TasteException { } -} +} \ No newline at end of file