Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,19 @@
<groupId>nearsoft.academy</groupId>
<artifactId>big-data</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>7</source>
<target>7</target>
</configuration>
</plugin>
</plugins>
</build>
<packaging>jar</packaging>

<name>big-data</name>
<url>http://maven.apache.org</url>
Expand All @@ -15,6 +27,16 @@
</properties>

<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.zip.GZIPInputStream;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

public class MovieRecommender {
private String file;
private Hashtable<String, Integer> HashProduct = new Hashtable<String, Integer>();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you choose Hashtable vs HashMap?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying different ways of handling bigs amounts of data to see which would give me faster results for the tests, and that's the way I found when working on the solution and thought was the one that would do better, but after doing some research, I understand now that there is nothing in the hashtable that can't be done using hashmap...

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool, just wanted you to investigate the difference, which is synchronization please research on that

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually was reading about that. I also read that Hashtables would be used for thread-safe applications, but now we can use Collections.synchronizedMap() or ConcurrentHash instead.

private Hashtable<Integer, String> InvertedHashProduct = new Hashtable<Integer, String>();
private Hashtable<String, Integer> HashUser = new Hashtable<String, Integer>();
private int users =0, products =0, reviews = 0;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

always format your code =0 vs = 0


public MovieRecommender(String file) throws IOException{
this.file = file;
getData();
}

public String getData() throws IOException {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why public?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

int thisProduct =0, thisUser =0;
Files.deleteIfExists(Paths.get("Result.csv"));
File result = new File("Result.csv");
InputStream fileReader = new GZIPInputStream(new FileInputStream(this.file));
BufferedReader br = new BufferedReader(new InputStreamReader(fileReader));
FileWriter fileWriter = new FileWriter(result);
BufferedWriter bw = new BufferedWriter(fileWriter);
String line;
String[] sp;
String key, value;

while((line = br.readLine()) != null) {
if (line.length() >= 0) {
sp = line.split(" ");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if there are no blank spaces in the line being split?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not be able to get the correct values or keys... Fixed

key = sp[0];
if (key.equals("product/productId:")) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

key.equals("product/productId:") can produce a null pointer exception,
"product/productId:".equals(key) can't, something to have in mind.

value = sp[1];
if (!HashProduct.containsKey(value)){
HashProduct.put(value,products);
InvertedHashProduct.put(products,value);
thisProduct = HashProduct.get(value);
products++;
}else{
thisProduct = HashProduct.get(value);
}
}else if (key.equals("review/userId:")){
value = sp[1];
if (!HashUser.containsKey(value)){
HashUser.put(value, users);
thisUser = HashUser.get(value);
users++;
}else{
thisUser = HashUser.get(value);
}
}else if (key.equals("review/score:")){
String score = sp[1];
bw.write(thisUser + "," + thisProduct + "," + score + "\n");
reviews ++;
}
}
}
br.close();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like this is not in a finally block. have you heard about try-catch-with-resources? please investigate about it.

bw.close();
return null;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

returning always null will be helpful somehow?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed getData() to private void instead of public String



}

public int getTotalReviews() {
return reviews;
}

public int getTotalProducts() {
return products;
}

public int getTotalUsers() {
return users;
}

public List<String> getRecommendationsForUser(String user) throws IOException, TasteException {
DataModel model = new FileDataModel(new File("Result.csv"));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);

int userValue = HashUser.get(user);

List RecommendedProducts = new ArrayList<String>();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ArrayList has a constructor that receives an initialCapacity why not using it based in recommendations size?

Can you answer what benefit would it bring implementing this suggestion?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Giving an initial capacity to the ArrayList will reduce the use of memory keeping the size of the array only at whatever we are going to use

List<RecommendedItem> recommendations = recommender.recommend(userValue,3);
for (RecommendedItem recommendation : recommendations) {
RecommendedProducts.add(InvertedHashProduct.get((int)recommendation.getItemID()));
}
return RecommendedProducts;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
public class MovieRecommenderTest {
@Test
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
MovieRecommender recommender = new MovieRecommender("/Users/alonso/Documents/big-data-exercises/movies.txt.gz");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hard coded paths doesn't looks good at all

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed...


assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
Expand All @@ -27,4 +28,4 @@ public void testDataInfo() throws IOException, TasteException {

}

}
}