From c4cf7fc4b1840442b21325da3a18e8c2ed259454 Mon Sep 17 00:00:00 2001 From: jperezg-st <62769942+jperezg-st@users.noreply.github.com> Date: Tue, 27 Apr 2021 00:02:48 -0500 Subject: [PATCH 1/3] movie recommender solution --- .../recommendation/MovieRecommender.java | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..52e293d --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,131 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + + + +public class MovieRecommender{ + private String pathF=""; + private int dataProducts = 0; + private int dataUsers = 0; + private int dataReview = 0; + HashMap usersMap = new HashMap(); + HashMap productsMap = new HashMap(); + private String recoverdProduct = ""; + private String recoverdUser = ""; + private String recoverdScore = ""; + + public MovieRecommender(String pathFile) throws IOException, NullPointerException { + this.pathF = pathFile; + this.dataReview = 0; + this.dataProducts = 0; + this.dataUsers = 0; + this.dataProcess(); + } + + private void dataProcess() throws IOException { + + GZIPInputStream inGZIP = new GZIPInputStream(new FileInputStream(this.pathF)); + BufferedReader bufferReader = new BufferedReader(new InputStreamReader(inGZIP)); + String readingLine; + + FileWriter fileWriter = new FileWriter(new File("movies_data.csv")); + BufferedWriter bufferedWriter = new BufferedWriter(fileWriter); + + Pattern productsPattern = Pattern.compile("product\\/productId: ([A-Z0-9]+)"); + Pattern userPattern = Pattern.compile("review\\/userId: ([\\D\\d]+)"); + Pattern scorePattern = Pattern.compile("review\\/score: ([0-9]+).([0-9]+)"); + + Matcher matcherProduct, matcherUser, matcherScore; + + while((readingLine = bufferReader.readLine())!=null) { + matcherProduct = productsPattern.matcher(readingLine); + matcherUser = userPattern.matcher(readingLine); + matcherScore = scorePattern.matcher(readingLine); + if(matcherProduct.matches()) { recoverdProduct = matcherProduct.group(1); + if (!productsMap.containsKey(recoverdProduct)) { + dataProducts++; + productsMap.put(recoverdProduct, dataProducts); + } + }else if (matcherUser.matches()) { + dataReview++; + recoverdUser = matcherUser.group(1); + if(!usersMap.containsKey(recoverdUser)) { + dataUsers++; + usersMap.put(recoverdUser, dataUsers); + } + }else if (matcherScore.matches()) { recoverdScore = matcherScore.group(1); } + if (!recoverdUser.equals("") && (!recoverdProduct.equals("") && (!recoverdScore.equals(""))) ) { + bufferedWriter.write(usersMap.get(recoverdUser) + "," + productsMap.get(recoverdProduct) + "," + recoverdScore + "\n"); + recoverdUser = ""; + recoverdProduct = ""; + recoverdScore = ""; + } + } + bufferedWriter.close(); + fileWriter.close(); + bufferReader.close(); + } + + public int getTotalReviews() { + return this.dataReview; + } + public int getTotalProducts() { + this.dataProducts = productsMap.size(); + return this.dataProducts; + } + public int getTotalUsers() { + return this.dataUsers; + } + + private String getProductID(int value) + { + for (String key : this.productsMap.keySet()) { + if (productsMap.get(key)==value) { + return key; + } + } + return null; + } + + + public List getRecommendationsForUser(String userID) throws IOException, TasteException, NullPointerException{ + List resultsList = new ArrayList(); + Integer userId = this.usersMap.get(userID); + DataModel model = new FileDataModel(new File("movies_data.csv")); + + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + List recommendations = recommender.recommend(userId, 3); + for (RecommendedItem recommendation : recommendations){ + resultsList.add(getProductID(((int) recommendation.getItemID()))); + } + return resultsList; + } +} From 49854be5143513db4b8ec56061ede963c6bcc8e7 Mon Sep 17 00:00:00 2001 From: jperezg-st <62769942+jperezg-st@users.noreply.github.com> Date: Tue, 27 Apr 2021 00:24:16 -0500 Subject: [PATCH 2/3] Delete MovieRecommender.java --- .../recommendation/MovieRecommender.java | 131 ------------------ 1 file changed, 131 deletions(-) delete mode 100644 src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java deleted file mode 100644 index 52e293d..0000000 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java +++ /dev/null @@ -1,131 +0,0 @@ -package nearsoft.academy.bigdata.recommendation; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.zip.GZIPInputStream; - -import org.apache.mahout.cf.taste.common.TasteException; -import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; -import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; -import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; -import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; -import org.apache.mahout.cf.taste.model.DataModel; -import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; -import org.apache.mahout.cf.taste.recommender.RecommendedItem; -import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; -import org.apache.mahout.cf.taste.similarity.UserSimilarity; - - - -public class MovieRecommender{ - private String pathF=""; - private int dataProducts = 0; - private int dataUsers = 0; - private int dataReview = 0; - HashMap usersMap = new HashMap(); - HashMap productsMap = new HashMap(); - private String recoverdProduct = ""; - private String recoverdUser = ""; - private String recoverdScore = ""; - - public MovieRecommender(String pathFile) throws IOException, NullPointerException { - this.pathF = pathFile; - this.dataReview = 0; - this.dataProducts = 0; - this.dataUsers = 0; - this.dataProcess(); - } - - private void dataProcess() throws IOException { - - GZIPInputStream inGZIP = new GZIPInputStream(new FileInputStream(this.pathF)); - BufferedReader bufferReader = new BufferedReader(new InputStreamReader(inGZIP)); - String readingLine; - - FileWriter fileWriter = new FileWriter(new File("movies_data.csv")); - BufferedWriter bufferedWriter = new BufferedWriter(fileWriter); - - Pattern productsPattern = Pattern.compile("product\\/productId: ([A-Z0-9]+)"); - Pattern userPattern = Pattern.compile("review\\/userId: ([\\D\\d]+)"); - Pattern scorePattern = Pattern.compile("review\\/score: ([0-9]+).([0-9]+)"); - - Matcher matcherProduct, matcherUser, matcherScore; - - while((readingLine = bufferReader.readLine())!=null) { - matcherProduct = productsPattern.matcher(readingLine); - matcherUser = userPattern.matcher(readingLine); - matcherScore = scorePattern.matcher(readingLine); - if(matcherProduct.matches()) { recoverdProduct = matcherProduct.group(1); - if (!productsMap.containsKey(recoverdProduct)) { - dataProducts++; - productsMap.put(recoverdProduct, dataProducts); - } - }else if (matcherUser.matches()) { - dataReview++; - recoverdUser = matcherUser.group(1); - if(!usersMap.containsKey(recoverdUser)) { - dataUsers++; - usersMap.put(recoverdUser, dataUsers); - } - }else if (matcherScore.matches()) { recoverdScore = matcherScore.group(1); } - if (!recoverdUser.equals("") && (!recoverdProduct.equals("") && (!recoverdScore.equals(""))) ) { - bufferedWriter.write(usersMap.get(recoverdUser) + "," + productsMap.get(recoverdProduct) + "," + recoverdScore + "\n"); - recoverdUser = ""; - recoverdProduct = ""; - recoverdScore = ""; - } - } - bufferedWriter.close(); - fileWriter.close(); - bufferReader.close(); - } - - public int getTotalReviews() { - return this.dataReview; - } - public int getTotalProducts() { - this.dataProducts = productsMap.size(); - return this.dataProducts; - } - public int getTotalUsers() { - return this.dataUsers; - } - - private String getProductID(int value) - { - for (String key : this.productsMap.keySet()) { - if (productsMap.get(key)==value) { - return key; - } - } - return null; - } - - - public List getRecommendationsForUser(String userID) throws IOException, TasteException, NullPointerException{ - List resultsList = new ArrayList(); - Integer userId = this.usersMap.get(userID); - DataModel model = new FileDataModel(new File("movies_data.csv")); - - UserSimilarity similarity = new PearsonCorrelationSimilarity(model); - UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); - UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); - List recommendations = recommender.recommend(userId, 3); - for (RecommendedItem recommendation : recommendations){ - resultsList.add(getProductID(((int) recommendation.getItemID()))); - } - return resultsList; - } -} From e4248cdd3a95ace32ef59bd97b6648dd0ca17259 Mon Sep 17 00:00:00 2001 From: jperezg-st <62769942+jperezg-st@users.noreply.github.com> Date: Tue, 27 Apr 2021 00:26:29 -0500 Subject: [PATCH 3/3] Movie Recommender solution --- .../recommendation/MovieRecommender.java | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..7969dc3 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,131 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + + + +public class MovieRecommender{ + private String file_s=""; + private int dataProducts = 0; + private int dataUsers = 0; + private int dataReview = 0; + HashMap usersMap = new HashMap(); + HashMap productsMap = new HashMap(); + private String Products = ""; + private String Users = ""; + private String Scores = ""; + + public MovieRecommender(String file_sile) throws IOException, NullPointerException { + this.file_s = file_sile; + this.dataReview = 0; + this.dataProducts = 0; + this.dataUsers = 0; + this.dataProcess(); + } + + private void dataProcess() throws IOException { + + GZIPInputStream inGZIP = new GZIPInputStream(new FileInputStream(this.file_s)); + BufferedReader bufferReader = new BufferedReader(new InputStreamReader(inGZIP)); + String readingLine; + + FileWriter fileWriter = new FileWriter(new File("movies_data.csv")); + BufferedWriter bufferedWriter = new BufferedWriter(fileWriter); + + Pattern productsPattern = Pattern.compile("product\\/productId: ([A-Z0-9]+)"); + Pattern userPattern = Pattern.compile("review\\/userId: ([\\D\\d]+)"); + Pattern scorePattern = Pattern.compile("review\\/score: ([0-9]+).([0-9]+)"); + + Matcher matcherProduct, matcherUser, matcherScore; + + while((readingLine = bufferReader.readLine())!=null) { + matcherProduct = productsPattern.matcher(readingLine); + matcherUser = userPattern.matcher(readingLine); + matcherScore = scorePattern.matcher(readingLine); + if(matcherProduct.matches()) { Products = matcherProduct.group(1); + if (!productsMap.containsKey(Products)) { + dataProducts++; + productsMap.put(Products, dataProducts); + } + }else if (matcherUser.matches()) { + dataReview++; + Users = matcherUser.group(1); + if(!usersMap.containsKey(Users)) { + dataUsers++; + usersMap.put(Users, dataUsers); + } + }else if (matcherScore.matches()) { Scores = matcherScore.group(1); } + if (!Users.equals("") && (!Products.equals("") && (!Scores.equals(""))) ) { + bufferedWriter.write(usersMap.get(Users) + "," + productsMap.get(Products) + "," + Scores + "\n"); + Users = ""; + Products = ""; + Scores = ""; + } + } + bufferedWriter.close(); + fileWriter.close(); + bufferReader.close(); + } + + public int getTotalReviews() { + return this.dataReview; + } + public int getTotalProducts() { + this.dataProducts = productsMap.size(); + return this.dataProducts; + } + public int getTotalUsers() { + return this.dataUsers; + } + + private String getProductID(int value) + { + for (String key : this.productsMap.keySet()) { + if (productsMap.get(key)==value) { + return key; + } + } + return null; + } + + + public List getRecommendationsForUser(String userID) throws IOException, TasteException, NullPointerException{ + List resultsList = new ArrayList(); + Integer userId = this.usersMap.get(userID); + DataModel model = new FileDataModel(new File("movies_data.csv")); + + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + List recommendations = recommender.recommend(userId, 3); + for (RecommendedItem recommendation : recommendations){ + resultsList.add(getProductID(((int) recommendation.getItemID()))); + } + return resultsList; + } +}