diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..a6f69a2 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +big-data \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..81ad3ee --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..712ab9d --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__asm_asm_3_1.xml b/.idea/libraries/Maven__asm_asm_3_1.xml new file mode 100644 index 0000000..3386f10 --- /dev/null +++ b/.idea/libraries/Maven__asm_asm_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_guava_guava_16_0.xml b/.idea/libraries/Maven__com_google_guava_guava_16_0.xml new file mode 100644 index 0000000..91cff2f --- /dev/null +++ b/.idea/libraries/Maven__com_google_guava_guava_16_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml new file mode 100644 index 0000000..bf486c9 --- /dev/null +++ b/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml new file mode 100644 index 0000000..ab01cdd --- /dev/null +++ b/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml new file mode 100644 index 0000000..e7e8ba0 --- /dev/null +++ b/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml b/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml new file mode 100644 index 0000000..1165a3b --- /dev/null +++ b/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml b/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml new file mode 100644 index 0000000..2c3025b --- /dev/null +++ b/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml new file mode 100644 index 0000000..1cb64ed --- /dev/null +++ b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml new file mode 100644 index 0000000..3fda2c0 --- /dev/null +++ b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml b/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml new file mode 100644 index 0000000..cec2493 --- /dev/null +++ b/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml b/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml new file mode 100644 index 0000000..2b149c4 --- /dev/null +++ b/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml b/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml new file mode 100644 index 0000000..3caee7e --- /dev/null +++ b/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml b/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml new file mode 100644 index 0000000..ae96781 --- /dev/null +++ b/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml b/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml new file mode 100644 index 0000000..49f1def --- /dev/null +++ b/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_el_commons_el_1_0.xml b/.idea/libraries/Maven__commons_el_commons_el_1_0.xml new file mode 100644 index 0000000..598d1b0 --- /dev/null +++ b/.idea/libraries/Maven__commons_el_commons_el_1_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml new file mode 100644 index 0000000..f040f16 --- /dev/null +++ b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_io_commons_io_2_1.xml b/.idea/libraries/Maven__commons_io_commons_io_2_1.xml new file mode 100644 index 0000000..3b78142 --- /dev/null +++ b/.idea/libraries/Maven__commons_io_commons_io_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml b/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml new file mode 100644 index 0000000..5e8b8d3 --- /dev/null +++ b/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml b/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml new file mode 100644 index 0000000..e20ca3a --- /dev/null +++ b/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml b/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml new file mode 100644 index 0000000..7fb71b5 --- /dev/null +++ b/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_activation_activation_1_1.xml b/.idea/libraries/Maven__javax_activation_activation_1_1.xml new file mode 100644 index 0000000..180d587 --- /dev/null +++ b/.idea/libraries/Maven__javax_activation_activation_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml b/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml new file mode 100644 index 0000000..a3054d2 --- /dev/null +++ b/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml b/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml new file mode 100644 index 0000000..2a4dd7a --- /dev/null +++ b/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__junit_junit_4_7.xml b/.idea/libraries/Maven__junit_junit_4_7.xml new file mode 100644 index 0000000..4402995 --- /dev/null +++ b/.idea/libraries/Maven__junit_junit_4_7.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml new file mode 100644 index 0000000..32bfe3b --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml new file mode 100644 index 0000000..dde81ab --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml new file mode 100644 index 0000000..eaa8a65 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml b/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml new file mode 100644 index 0000000..cd8023f --- /dev/null +++ b/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml b/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml new file mode 100644 index 0000000..2717cd6 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml b/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml new file mode 100644 index 0000000..66fc4a9 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml b/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml new file mode 100644 index 0000000..c672cac --- /dev/null +++ b/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml b/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml new file mode 100644 index 0000000..79debcc --- /dev/null +++ b/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml b/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml new file mode 100644 index 0000000..98bd640 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml b/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml new file mode 100644 index 0000000..36db039 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml new file mode 100644 index 0000000..a02acc0 --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml new file mode 100644 index 0000000..9039684 --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml new file mode 100644 index 0000000..2e3cd1a --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml new file mode 100644 index 0000000..dc10a10 --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml b/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml new file mode 100644 index 0000000..d62802e --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml new file mode 100644 index 0000000..6f808f1 --- /dev/null +++ b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__stax_stax_api_1_0_1.xml b/.idea/libraries/Maven__stax_stax_api_1_0_1.xml new file mode 100644 index 0000000..0b13335 --- /dev/null +++ b/.idea/libraries/Maven__stax_stax_api_1_0_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml b/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml new file mode 100644 index 0000000..0f2d9ef --- /dev/null +++ b/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml b/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml new file mode 100644 index 0000000..6726a2d --- /dev/null +++ b/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..2795f7d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,11 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..6237568 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/sonarlint/issuestore/index.pb b/.idea/sonarlint/issuestore/index.pb new file mode 100644 index 0000000..e69de29 diff --git a/big-data.iml b/big-data.iml new file mode 100644 index 0000000..38c1e5e --- /dev/null +++ b/big-data.iml @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 8169ff7..960c511 100644 --- a/pom.xml +++ b/pom.xml @@ -1,30 +1,32 @@ - 4.0.0 + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + 4.0.0 - nearsoft.academy - big-data - 1.0-SNAPSHOT - jar + nearsoft.academy + big-data + 1.0-SNAPSHOT + jar - big-data - http://maven.apache.org + big-data + http://maven.apache.org - - UTF-8 - + + UTF-8 + 7 + 7 + - - - org.apache.mahout - mahout-core - 0.9 - - - junit - junit - 4.7 - test - - + + + org.apache.mahout + mahout-core + 0.9 + + + junit + junit + 4.7 + test + + diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..28ffe7e --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,140 @@ +package nearsoft.academy.bigdata.recommendation; +import java.util.List; +import java.util.ArrayList; +import java.io.*; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.eval.RecommenderBuilder; +import org.apache.mahout.cf.taste.eval.RecommenderEvaluator; +import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.Recommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.zip.GZIPInputStream; +import java.io.File; +import java.io.IOException; + +public class MovieRecommender { + + + private final GenericUserBasedRecommender recommender; + private HashBiMap pr; + private HashBiMap us; + private int totalReviews=0; + + public MovieRecommender(String files) throws IOException, TasteException { + System.out.println("Static method"); + DataModel model = new FileDataModel(new File(transCSV(files))); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + } + + private String transCSV(String files) { + try { + + File original = new File(files); + pr= HashBiMap.create(); + us= HashBiMap.create (); + File como = new File(original.getParentFile().getAbsolutePath() + "/dataset.csv"); + if (como.exists()) { + como.delete(); + } + else{ + como.createNewFile(); + } + + try + (InputStream fileStream = new FileInputStream(files); + InputStream gzipStream = new GZIPInputStream(fileStream); + Reader read = new InputStreamReader(gzipStream, "UTF8"); + BufferedReader br = new BufferedReader(read); + Writer writer = new BufferedWriter(new FileWriter(como));) + { + String score = ""; + String line; + Integer userId = null; + Integer productId = null; + boolean status = false; + while ((line = br.readLine()) != null) { + if (status) { + if (line.contains("review/userId")) { + String user =getValuePortionOfString(line); + if(!us.containsKey(user)){ + us.put(user,us.size()+1); + } + userId = us.get(user); + + } else if (line.contains("review/score")) { + score = getValuePortionOfString(line); + } else if (line.contains("review/summary")) { + writer.append(String.valueOf(userId)); + writer.append(","); + writer.append(String.valueOf(productId)); + writer.append(","); + writer.append(score); + writer.append("\n"); + score = ""; + productId = null; + status = false; + userId = null; + } + } else if (line.contains("product/productId")) { + String productName = getValuePortionOfString(line); + if(!pr.containsKey(productName)){ + pr.put(productName,pr.size()+1); + } + productId = pr.get(productName); + status = true; + totalReviews++; + } + + } + } + return como.getAbsolutePath(); + } catch (IOException e) + { + e.printStackTrace(); + throw new RuntimeException(files, e); + } + } + + private String getValuePortionOfString(String line) { + return line.substring(line.indexOf(":") + 2, line.length()); + } + public long getTotalReviews() { + return totalReviews; + } + public long getTotalProducts(){ + return pr.size(); + } + public long getTotalUsers(){ + return us.size(); + } + public void setTotalReviews(int totalReviews) { + this.totalReviews = totalReviews; + } + + public List getRecommendationsForUser(String user) throws TasteException { + List recommendations = recommender.recommend(us.get(user), 3); + List recommendMovie = new ArrayList<>(); + + BiMap products = pr.inverse(); + for (RecommendedItem recommendation : recommendations) { + System.out.println(recommendation); + String movie = products.get((int)recommendation.getItemID()); + recommendMovie.add(movie); + } + return recommendMovie; + } +} \ No newline at end of file diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..1b2e98b 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("C:\\Users\\pauva\\OneDrive\\Documentos\\Academy Nearsoft\\movierecommender\\movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); diff --git a/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst new file mode 100644 index 0000000..578712f --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst @@ -0,0 +1,2 @@ +nearsoft\academy\bigdata\recommendation\MovieRecommenderTest.class +nearsoft\academy\bigdata\recommendation\MovieRecommender.class diff --git a/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst new file mode 100644 index 0000000..3a0dc2c --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst @@ -0,0 +1,2 @@ +C:\Users\pauva\Desktop\big-data-exercises-master\src\test\java\nearsoft\academy\bigdata\recommendation\MovieRecommenderTest.java +C:\Users\pauva\Desktop\big-data-exercises-master\src\test\java\nearsoft\academy\bigdata\recommendation\MovieRecommender.java diff --git a/target/surefire-reports/TEST-nearsoft.academy.bigdata.recommendation.MovieRecommenderTest.xml b/target/surefire-reports/TEST-nearsoft.academy.bigdata.recommendation.MovieRecommenderTest.xml new file mode 100644 index 0000000..d8021f7 --- /dev/null +++ b/target/surefire-reports/TEST-nearsoft.academy.bigdata.recommendation.MovieRecommenderTest.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/target/surefire-reports/nearsoft.academy.bigdata.recommendation.MovieRecommenderTest.txt b/target/surefire-reports/nearsoft.academy.bigdata.recommendation.MovieRecommenderTest.txt new file mode 100644 index 0000000..65f05de --- /dev/null +++ b/target/surefire-reports/nearsoft.academy.bigdata.recommendation.MovieRecommenderTest.txt @@ -0,0 +1,4 @@ +------------------------------------------------------------------------------- +Test set: nearsoft.academy.bigdata.recommendation.MovieRecommenderTest +------------------------------------------------------------------------------- +Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 371.493 sec diff --git a/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class new file mode 100644 index 0000000..b9796cb Binary files /dev/null and b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class differ diff --git a/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class new file mode 100644 index 0000000..416710c Binary files /dev/null and b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class differ