diff --git a/pom.xml b/pom.xml index 8169ff7..3eb18aa 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,19 @@ nearsoft.academy big-data 1.0-SNAPSHOT - jar + + + + org.apache.maven.plugins + maven-compiler-plugin + + 7 + 7 + + + + + jar big-data http://maven.apache.org @@ -15,6 +27,16 @@ + + org.slf4j + slf4j-api + 1.7.5 + + + org.slf4j + slf4j-log4j12 + 1.7.5 + org.apache.mahout mahout-core diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..1103432 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,109 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.zip.GZIPInputStream; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +public class MovieRecommender { + private String file; + private HashMap HashProduct = new HashMap(); + private HashMap InvertedHashProduct = new HashMap(); + private HashMap HashUser = new HashMap(); + private int users = 0, products = 0, reviews = 0; + + public MovieRecommender(String file) throws IOException{ + this.file = file; + getData(); + } + + private void getData() throws IOException { + int thisProduct =0, thisUser =0; + Files.deleteIfExists(Paths.get("Result.csv")); + File result = new File("Result.csv"); + InputStream fileReader = new GZIPInputStream(new FileInputStream(this.file)); + //BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); + FileWriter fileWriter = new FileWriter(result); + //BufferedWriter bw = new BufferedWriter(fileWriter); + String line; + String[] sp; + String key, value; + + try (BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); + BufferedWriter bw = new BufferedWriter(fileWriter)) { + System.out.println("Reading File: "+this.file); + while((line = br.readLine()) != null) { + if (line.length() >= 0) { + sp = line.split(":"); + key = sp[0]; + if ("product/productId".equals(key)) { + value = sp[1].trim(); + if (!HashProduct.containsKey(value)){ + HashProduct.put(value,products); + InvertedHashProduct.put(products,value); + thisProduct = HashProduct.get(value); + products++; + }else{ + thisProduct = HashProduct.get(value); + } + }else if ("review/userId".equals(key)){ + value = sp[1].trim(); + if (!HashUser.containsKey(value)){ + HashUser.put(value, users); + thisUser = HashUser.get(value); + users++; + }else{ + thisUser = HashUser.get(value); + } + }else if ("review/score".equals(key)){ + String score = sp[1].trim(); + bw.write(thisUser + "," + thisProduct + "," + score + "\n"); + reviews ++; + } + } + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + + public int getTotalReviews() { + return reviews; + } + + public int getTotalProducts() { + return products; + } + + public int getTotalUsers() { + return users; + } + + public List getRecommendationsForUser(String user) throws IOException, TasteException { + DataModel model = new FileDataModel(new File("Result.csv")); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + int userValue = HashUser.get(user); + + List RecommendedProducts = new ArrayList(3); + List recommendations = recommender.recommend(userValue,3); + for (RecommendedItem recommendation : recommendations) { + RecommendedProducts.add(InvertedHashProduct.get((int)recommendation.getItemID())); + } + return RecommendedProducts; + } +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..e1ea81b 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -13,9 +13,10 @@ public class MovieRecommenderTest { @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from + //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("movies.txt.gz"); + assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); @@ -27,4 +28,4 @@ public void testDataInfo() throws IOException, TasteException { } -} +} \ No newline at end of file