diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..9ba41a2 --- /dev/null +++ b/.classpath @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.project b/.project new file mode 100644 index 0000000..04f346d --- /dev/null +++ b/.project @@ -0,0 +1,34 @@ + + + big-data + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + + + 1635192007554 + + 30 + + org.eclipse.core.resources.regexFilterMatcher + node_modules|.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__ + + + + diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..f9fe345 --- /dev/null +++ b/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/test/java=UTF-8 +encoding/=UTF-8 diff --git a/.settings/org.eclipse.jdt.apt.core.prefs b/.settings/org.eclipse.jdt.apt.core.prefs new file mode 100644 index 0000000..d4313d4 --- /dev/null +++ b/.settings/org.eclipse.jdt.apt.core.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.apt.aptEnabled=false diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..46235dc --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,9 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=11 +org.eclipse.jdt.core.compiler.compliance=11 +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore +org.eclipse.jdt.core.compiler.processAnnotations=disabled +org.eclipse.jdt.core.compiler.release=disabled +org.eclipse.jdt.core.compiler.source=11 diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..f897a7f --- /dev/null +++ b/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/pom.xml b/pom.xml index 8169ff7..d948dee 100644 --- a/pom.xml +++ b/pom.xml @@ -12,6 +12,8 @@ UTF-8 + 11 + 11 @@ -26,5 +28,15 @@ 4.7 test + + org.slf4j + slf4j-api + 1.7.32 + + + org.slf4j + slf4j-simple + 1.7.5 + diff --git a/src/main/java/.DS_Store b/src/main/java/.DS_Store new file mode 100644 index 0000000..ed9cbda Binary files /dev/null and b/src/main/java/.DS_Store differ diff --git a/src/main/java/nearsoft/.DS_Store b/src/main/java/nearsoft/.DS_Store new file mode 100644 index 0000000..08734d9 Binary files /dev/null and b/src/main/java/nearsoft/.DS_Store differ diff --git a/src/main/java/nearsoft/academy/.DS_Store b/src/main/java/nearsoft/academy/.DS_Store new file mode 100644 index 0000000..9de8351 Binary files /dev/null and b/src/main/java/nearsoft/academy/.DS_Store differ diff --git a/src/main/java/nearsoft/academy/bigdata/.DS_Store b/src/main/java/nearsoft/academy/bigdata/.DS_Store new file mode 100644 index 0000000..75bf787 Binary files /dev/null and b/src/main/java/nearsoft/academy/bigdata/.DS_Store differ diff --git a/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..c769268 --- /dev/null +++ b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,210 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + + +public class MovieRecommender { + private static String DATA_PATH = System.getProperty("user.dir") + "/src/main/data/"; + private static String CSV_DATA_PATH = DATA_PATH + "/data.csv"; + private static String USERS_INDEX_PATH = DATA_PATH + "users.csv"; + private static String PRODUCTS_INDEX_PATH = DATA_PATH + "products.csv"; + + private static int NUM_OF_REGISTRABLE_ITEMS = 3; + + private Map products = new HashMap(); + private Map productsReverse = new HashMap(); + private Map users = new HashMap(); + private String dataFilePath; + private int totalUsers = 0; + private int totalProducts = 0; + private int totalReviews = 0; + + private DataModel model; + private UserSimilarity similarity; + private UserNeighborhood neighborhood; + private UserBasedRecommender recommender; + + public MovieRecommender(String dataFilePath) throws IOException, TasteException { + this.dataFilePath = dataFilePath; + + try { + readFile(); + } catch (IOException error) { + error.printStackTrace(); + } + + + this.model = new FileDataModel(new File(CSV_DATA_PATH)); + this.similarity = new PearsonCorrelationSimilarity(this.model); + this.neighborhood = new ThresholdUserNeighborhood(0.1, this.similarity, this.model); + this.recommender = new GenericUserBasedRecommender(this.model, this.neighborhood, this.similarity); + + this.totalProducts = this.model.getNumItems(); + this.totalUsers = this.model.getNumUsers(); + } + + private void readFile () throws IOException { + // Those methods allow us read the ".txt.gz" file without unzip it. + FileInputStream file = new FileInputStream(this.dataFilePath); + GZIPInputStream gzip = new GZIPInputStream(file); + InputStreamReader isr = new InputStreamReader(gzip); + BufferedReader br = new BufferedReader(isr); + + List dataParts = new ArrayList(); + String line; + + FileWriter fileWriter = new FileWriter(CSV_DATA_PATH); // Create a csv file + + while ((line = br.readLine()) != null) { + String[] registrables = {"product/productId:", "review/userId:", "review/score:"}; // Indicate only the data we need (user id, movie & score) + + String[] parts = line.split(" "); + + Boolean idRequiredField = Arrays.asList(registrables).contains(parts[0]); + + if (idRequiredField) { + dataParts.add(parts[1]); + } + + if (dataParts.size() == NUM_OF_REGISTRABLE_ITEMS) { + String userId = dataParts.get(1); + String productId = dataParts.get(0); + String score = dataParts.get(2); + + int productIdxVal = (this.products.containsKey(productId) ? this.products.get(productId) : totalProducts); + int userIdxVal = (this.users.containsKey(userId) ? this.users.get(userId) : totalUsers); + + String mixedData = userIdxVal + "," + productIdxVal + "," + score + "\n"; + fileWriter.write(mixedData); + + this.setOccurrences(productId, userId); + + dataParts = new ArrayList(); + } + } + + fileWriter.close(); + br.close(); + } + + private void setOccurrences (String productId, String userId) { + + this.totalReviews++; // Count reviews + + // Count products + if (!this.products.containsKey(productId)) { + this.products.put(productId, totalProducts); + this.productsReverse.put(totalProducts, productId); + + try { + FileWriter fr = new FileWriter(PRODUCTS_INDEX_PATH, true); + fr.write(productId + "," + totalProducts + "\n"); + fr.close(); + } catch (IOException e) { + e.printStackTrace(); + } + totalProducts++; + } + + + if (!this.users.containsKey(userId)) {// Count users + this.users.put(userId, totalUsers); + + try { + FileWriter fr = new FileWriter(USERS_INDEX_PATH, true); + fr.write(userId + "," + totalUsers + "\n"); + fr.close(); + } catch (IOException e) { + e.printStackTrace(); + } + totalUsers++; + } + } + + private void retrieveData () throws IOException { + this.model = new FileDataModel(new File(CSV_DATA_PATH)); + + BufferedReader productReader = new BufferedReader(new FileReader(PRODUCTS_INDEX_PATH));// Read products + String product; + + while ((product = productReader.readLine()) != null) { + String[] productParts = product.split(","); + String productId = productParts[0]; + int idx = Integer.parseInt(productParts[1]); + + this.products.put(productId, idx); + this.productsReverse.put(idx, productId); + } + productReader.close(); + + BufferedReader usersReader = new BufferedReader(new FileReader(USERS_INDEX_PATH));// Read users + String user; + + while ((user = usersReader.readLine()) != null) { + String[] userParts = user.split(","); + String userId = userParts[0]; + int idx = Integer.parseInt(userParts[1]); + + this.users.put(userId, idx); + } + usersReader.close(); + + BufferedReader reviewsReader = new BufferedReader(new FileReader(CSV_DATA_PATH));// Count reviews + + while (reviewsReader.readLine() != null) { + this.totalReviews++; + } + + reviewsReader.close(); + } + + public int getTotalReviews() { + return this.totalReviews; + } + + public int getTotalProducts () { + return this.totalProducts; + } + + public int getTotalUsers () { + return this.totalUsers; + } + + public List getRecommendationsForUser (String userID) throws TasteException { + List recommendations = new ArrayList(); + + long user = users.get(userID); + + List recommendationss = this.recommender.recommend(user, 3); + + for (RecommendedItem recommendation : recommendationss) { + int idOfProduct = (int) recommendation.getItemID(); + recommendations.add(productsReverse.get(idOfProduct)); + } + + return recommendations; + } +} \ No newline at end of file diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..270c0ec 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -13,9 +13,13 @@ public class MovieRecommenderTest { @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from + //download movies.txt.gz from: // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + + // Obtain the current directory + String ROOT_PATH = System.getProperty("user.dir"); + + MovieRecommender recommender = new MovieRecommender(ROOT_PATH + "/src/main/data/movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers());