Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
</properties>

<dependencies>
Expand All @@ -26,5 +28,66 @@
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.13.3</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.13.3</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.13.3</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

public class MovieRecommender {
private long totalReviews;
private long totalUsers;
private long totalMovies;
private UserBasedRecommender mahoutRecommender;
Hashtable<String, Long> users = new Hashtable<String, Long>();
Hashtable<String, Long> movies = new Hashtable<String, Long>();
Hashtable<Long, String> hashToMovie = new Hashtable<Long, String>();

public MovieRecommender(String pathToRecommendations) {
this.totalReviews = 0;
this.totalUsers = 0;
this.totalMovies = 0;
users = new Hashtable<String, Long>();
movies = new Hashtable<String, Long>();
hashToMovie = new Hashtable<Long, String>();

try {
ReviewParser parser = new ReviewParser(pathToRecommendations);
String pathToCvs = pathToRecommendations.split("\\.")[0] + ".cvs";

FileWriter cvsWriter = new FileWriter(pathToCvs);
Review review = parser.getReview();
while (review != null) {
totalReviews += 1;

if (users.putIfAbsent(review.userId, totalUsers) == null) {
totalUsers += 1;
}
if (movies.putIfAbsent(review.movieId, totalMovies) == null) {
hashToMovie.put(totalMovies, review.movieId);
totalMovies += 1;
}

long hashUser = users.get(review.userId);
long hashMovie = movies.get(review.movieId);
String cvsLine = String.format("%d,%d,%.1f", hashUser, hashMovie, review.rating);
cvsWriter.append(cvsLine + "\n");

review = parser.getReview();
}

cvsWriter.close();
// Now we create the recommender.
DataModel model = new FileDataModel(new File(pathToCvs));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
mahoutRecommender = new GenericUserBasedRecommender(model, neighborhood, similarity);

} catch (Exception e) {
e.printStackTrace();
System.out.println(e.getMessage() + totalReviews);
}
}

public long getTotalReviews() {
return totalReviews;
}

public long getTotalProducts() {
return totalMovies;
}

public long getTotalUsers() {
return totalUsers;
}

public List<String> getRecommendationsForUser(String user) {
long userHash = users.get(user);
List<String> moviesId = new ArrayList<String>();

try {
List<RecommendedItem> recommendations = mahoutRecommender.recommend(userHash, 3);
for (RecommendedItem item : recommendations) {
Long movieHash = item.getItemID();
moviesId.add(hashToMovie.get(movieHash));
}

} catch (TasteException e) {
e.printStackTrace();
}

return moviesId;
}
}
13 changes: 13 additions & 0 deletions src/test/java/nearsoft/academy/bigdata/recommendation/Review.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package nearsoft.academy.bigdata.recommendation;

public class Review {
public String userId;
public String movieId;
public float rating;

public Review(String userId, String movieId, float rating) {
this.userId = userId ;
this.movieId = movieId ;
this.rating = rating ;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;

public class ReviewParser {
private BufferedReader buffer;

public ReviewParser(String pathToRecommendations) throws Exception {
this.buffer = new BufferedReader(new FileReader(pathToRecommendations));
}

public Review getReview() throws Exception {
List<String> lines = new ArrayList<String>();
String line = "";
String userId = "";
String movieId = "";
String ratingString = "" ;
float rating = -1;

line = buffer.readLine();

if (line == null)
return null;

while (!line.isEmpty()) {
lines.add(line);
line = buffer.readLine();

if( line.isEmpty() && lines.size() < 8 )
line = buffer.readLine();
}

userId = lines.stream().filter(title -> title.contains("review/userId")).findFirst().get().split(" ")[1];
movieId = lines.stream().filter(title -> title.contains("product/productId")).findFirst().get()
.split(" ")[1];

ratingString = lines.stream().filter(title -> title.contains("review/score")).findFirst().get()
.split(" ")[1];

rating = Float.parseFloat(ratingString);

return new Review(userId, movieId, rating);
}
}

// public class ReviewParser {
// private GZIPInputStream stream;
// private int bufferLen = 4098;
// private String nextReviewStart = "";

// public ReviewParser(FileInputStream compressed) throws IOException {
// this.stream = new GZIPInputStream(compressed);
// }

// public List<Review> getReview() throws IOException {
// List<Review> reviews = new ArrayList<Review>();
// byte[] buffer = new byte[bufferLen];
// boolean reviewEnd = false;
// String reviewText = "";
// int bytesParsed ;

// while ((bytesParsed = stream.read(buffer)) > 0) {

// }

// while (!reviewEnd) {
// int bytesParsed = stream.read(buffer, 0, bufferLen);

// if (bytesParsed == -1)
// reviewEnd = true;

// String line = new String(Arrays.asList(buffer).stream().filter(predicate));
// reviewText = reviewText + buffer.stream().filter;
// String[] splitText = reviewText.split("\n\n");

// if (splitText.length > 1 || reviewEnd) {
// int i;
// for (i = 0; i < splitText.length - 1; ++i) {
// if (i == 0)
// reviewText = nextReviewStart + splitText[i];

// else
// reviewText = splitText[i];

// List<String> reviewElements = Arrays.asList(reviewText.split("\n"));
// String movieId = reviewElements.stream().filter(title ->
// title.contains("product/productId")).findFirst()
// .get().split(" ")[1];
// String userId = reviewElements.stream().filter(title ->
// title.contains("review/userId")).findFirst().get()
// .split(" ")[1];

// float rating;
// String ratingString = reviewElements.stream().filter(title ->
// title.contains("review/score")).findFirst()
// .get().split(" ")[1];

// try {
// rating = Float.parseFloat(ratingString);
// } catch (NumberFormatException e) {
// rating = getFloatFromMaybeCorrupted(ratingString);
// }

// reviews.add(new Review(userId, movieId, rating));

// }

// nextReviewStart = splitText[i];
// reviewEnd = true;
// }
// }

// return reviews;
// }

// private float getFloatFromMaybeCorrupted(String corruptedFloat) {
// String goodString = "";
// for (char c : corruptedFloat.toCharArray()) {
// if ((c >= '0' && c <= '9') || c == '.') {
// goodString += c;
// }
// }

// float goodFloat ;
// try {
// goodFloat = Float.parseFloat(goodString);
// } catch (Exception e) {
// // Probably text
// goodFloat = -1 ;
// }

// return goodFloat ;
// }
// }
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Important

This code takes a `.txt` file; that means that you may need to decompress before the `.gz` of the movies.

The Maven `pom.xml` file also changed, so it would be a good idea to run `mvn compile` before running the tests.

Also, change the path to the `.txt` in the test before running.