Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .classpath
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-11">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" path="target/generated-sources/annotations">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="ignore_optional_problems" value="true"/>
<attribute name="m2e-apt" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="target/generated-test-sources/test-annotations">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="ignore_optional_problems" value="true"/>
<attribute name="m2e-apt" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
34 changes: 34 additions & 0 deletions .project
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>big-data</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
<filteredResources>
<filter>
<id>1635192007554</id>
<name></name>
<type>30</type>
<matcher>
<id>org.eclipse.core.resources.regexFilterMatcher</id>
<arguments>node_modules|.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__</arguments>
</matcher>
</filter>
</filteredResources>
</projectDescription>
4 changes: 4 additions & 0 deletions .settings/org.eclipse.core.resources.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8
2 changes: 2 additions & 0 deletions .settings/org.eclipse.jdt.apt.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
eclipse.preferences.version=1
org.eclipse.jdt.apt.aptEnabled=false
9 changes: 9 additions & 0 deletions .settings/org.eclipse.jdt.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=11
org.eclipse.jdt.core.compiler.compliance=11
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.processAnnotations=disabled
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=11
4 changes: 4 additions & 0 deletions .settings/org.eclipse.m2e.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
12 changes: 12 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.target>11</maven.compiler.target>
<maven.compiler.source>11</maven.compiler.source>
</properties>

<dependencies>
Expand All @@ -26,5 +28,15 @@
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.32</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.5</version>
</dependency>
</dependencies>
</project>
Binary file added src/main/java/.DS_Store
Binary file not shown.
Binary file added src/main/java/nearsoft/.DS_Store
Binary file not shown.
Binary file added src/main/java/nearsoft/academy/.DS_Store
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;


public class MovieRecommender {
private static String DATA_PATH = System.getProperty("user.dir") + "/src/main/data/";
private static String CSV_DATA_PATH = DATA_PATH + "/data.csv";
private static String USERS_INDEX_PATH = DATA_PATH + "users.csv";
private static String PRODUCTS_INDEX_PATH = DATA_PATH + "products.csv";

private static int NUM_OF_REGISTRABLE_ITEMS = 3;

private Map<String, Integer> products = new HashMap<String,Integer>();
private Map<Integer, String> productsReverse = new HashMap<Integer,String>();
private Map<String, Integer> users = new HashMap<String,Integer>();
private String dataFilePath;
private int totalUsers = 0;
private int totalProducts = 0;
private int totalReviews = 0;

private DataModel model;
private UserSimilarity similarity;
private UserNeighborhood neighborhood;
private UserBasedRecommender recommender;

public MovieRecommender(String dataFilePath) throws IOException, TasteException {
this.dataFilePath = dataFilePath;

try {
readFile();
} catch (IOException error) {
error.printStackTrace();
}


this.model = new FileDataModel(new File(CSV_DATA_PATH));
this.similarity = new PearsonCorrelationSimilarity(this.model);
this.neighborhood = new ThresholdUserNeighborhood(0.1, this.similarity, this.model);
this.recommender = new GenericUserBasedRecommender(this.model, this.neighborhood, this.similarity);

this.totalProducts = this.model.getNumItems();
this.totalUsers = this.model.getNumUsers();
}

private void readFile () throws IOException {
// Those methods allow us read the ".txt.gz" file without unzip it.
FileInputStream file = new FileInputStream(this.dataFilePath);
GZIPInputStream gzip = new GZIPInputStream(file);
InputStreamReader isr = new InputStreamReader(gzip);
BufferedReader br = new BufferedReader(isr);

List<String> dataParts = new ArrayList<String>();
String line;

FileWriter fileWriter = new FileWriter(CSV_DATA_PATH); // Create a csv file

while ((line = br.readLine()) != null) {
String[] registrables = {"product/productId:", "review/userId:", "review/score:"}; // Indicate only the data we need (user id, movie & score)

String[] parts = line.split(" ");

Boolean idRequiredField = Arrays.asList(registrables).contains(parts[0]);

if (idRequiredField) {
dataParts.add(parts[1]);
}

if (dataParts.size() == NUM_OF_REGISTRABLE_ITEMS) {
String userId = dataParts.get(1);
String productId = dataParts.get(0);
String score = dataParts.get(2);

int productIdxVal = (this.products.containsKey(productId) ? this.products.get(productId) : totalProducts);
int userIdxVal = (this.users.containsKey(userId) ? this.users.get(userId) : totalUsers);

String mixedData = userIdxVal + "," + productIdxVal + "," + score + "\n";
fileWriter.write(mixedData);

this.setOccurrences(productId, userId);

dataParts = new ArrayList<String>();
}
}

fileWriter.close();
br.close();
}

private void setOccurrences (String productId, String userId) {

this.totalReviews++; // Count reviews

// Count products
if (!this.products.containsKey(productId)) {
this.products.put(productId, totalProducts);
this.productsReverse.put(totalProducts, productId);

try {
FileWriter fr = new FileWriter(PRODUCTS_INDEX_PATH, true);
fr.write(productId + "," + totalProducts + "\n");
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
totalProducts++;
}


if (!this.users.containsKey(userId)) {// Count users
this.users.put(userId, totalUsers);

try {
FileWriter fr = new FileWriter(USERS_INDEX_PATH, true);
fr.write(userId + "," + totalUsers + "\n");
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
totalUsers++;
}
}

private void retrieveData () throws IOException {
this.model = new FileDataModel(new File(CSV_DATA_PATH));

BufferedReader productReader = new BufferedReader(new FileReader(PRODUCTS_INDEX_PATH));// Read products
String product;

while ((product = productReader.readLine()) != null) {
String[] productParts = product.split(",");
String productId = productParts[0];
int idx = Integer.parseInt(productParts[1]);

this.products.put(productId, idx);
this.productsReverse.put(idx, productId);
}
productReader.close();

BufferedReader usersReader = new BufferedReader(new FileReader(USERS_INDEX_PATH));// Read users
String user;

while ((user = usersReader.readLine()) != null) {
String[] userParts = user.split(",");
String userId = userParts[0];
int idx = Integer.parseInt(userParts[1]);

this.users.put(userId, idx);
}
usersReader.close();

BufferedReader reviewsReader = new BufferedReader(new FileReader(CSV_DATA_PATH));// Count reviews

while (reviewsReader.readLine() != null) {
this.totalReviews++;
}

reviewsReader.close();
}

public int getTotalReviews() {
return this.totalReviews;
}

public int getTotalProducts () {
return this.totalProducts;
}

public int getTotalUsers () {
return this.totalUsers;
}

public List<String> getRecommendationsForUser (String userID) throws TasteException {
List<String> recommendations = new ArrayList<String>();

long user = users.get(userID);

List<RecommendedItem> recommendationss = this.recommender.recommend(user, 3);

for (RecommendedItem recommendation : recommendationss) {
int idOfProduct = (int) recommendation.getItemID();
recommendations.add(productsReverse.get(idOfProduct));
}

return recommendations;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@
public class MovieRecommenderTest {
@Test
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
//download movies.txt.gz from:
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");

// Obtain the current directory
String ROOT_PATH = System.getProperty("user.dir");

MovieRecommender recommender = new MovieRecommender(ROOT_PATH + "/src/main/data/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
Expand Down