Skip to content

Commit cbd230f

Browse files
authored
Code
1 parent 748e0bc commit cbd230f

6 files changed

+180
-0
lines changed

ListeningCount.java

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import org.apache.spark.SparkConf;
2+
import org.apache.spark.api.java.JavaPairRDD;
3+
import org.apache.spark.api.java.JavaRDD;
4+
import org.apache.spark.api.java.JavaSparkContext;
5+
import scala.Tuple2;
6+
7+
public class ListeningCount {
8+
public static void main(String[] args) {
9+
//SparkConf conf = new SparkConf().setAppName("ListeningCount").setMaster("local[*]");
10+
SparkConf conf = new SparkConf().setAppName("ListeningCount").setMaster("yarn");
11+
JavaSparkContext sc = new JavaSparkContext(conf);
12+
JavaRDD<String> file = sc.textFile("hdfs://master:9000/user/root/input/user_artists.dat").repartition(1);
13+
//JavaRDD<String> file = sc.textFile("input/user_artists.dat").repartition(1);
14+
String head = file.first();
15+
file = file.filter(row -> !row.equals(head));
16+
// JavaRDD< String > file = sc.textFile("input/user_artists.dat").
17+
// mapPartitionsWithIndex((index, iter) -> {
18+
// if (index == 0 && iter.hasNext()) {
19+
// iter.next();
20+
// if (iter.hasNext()) {JavaRDD<String> file = sc.textFile("hdfs://master:9000/user/root/input/user_artists.dat").repartition(1)
21+
// iter.next();
22+
// }
23+
// }
24+
// return iter;
25+
// }, true);;
26+
JavaPairRDD<String,Integer> pair = file.
27+
mapToPair(s -> new Tuple2(s.split("\t")[1],Integer.parseInt(s.split("\t")[2])));
28+
JavaPairRDD<String,Integer> count = pair.reduceByKey((int1,int2) ->(int1+int2));
29+
JavaPairRDD<Integer,String> listencount = count.mapToPair(ls -> new Tuple2<>(ls._2,ls._1));
30+
JavaPairRDD<Integer,String> listencountsort = listencount.sortByKey(true);
31+
JavaPairRDD<Integer,String> listencountsort1 = listencount.sortByKey(false);
32+
JavaPairRDD<String,Integer> list = listencountsort1.mapToPair(listen -> new Tuple2<>(listen._2,listen._1));
33+
list.foreach(num->System.out.println("Listening counts of Artists \""+num._1+"\" is "+ num._2));
34+
sc.close();
35+
36+
}
37+
38+
}
39+

QuestionFour.java

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import org.apache.spark.SparkConf;
2+
import org.apache.spark.api.java.JavaPairRDD;
3+
import org.apache.spark.api.java.JavaRDD;
4+
import org.apache.spark.api.java.JavaSparkContext;
5+
import scala.Tuple2;
6+
7+
import java.util.Arrays;
8+
import java.util.Map;
9+
10+
public class QuestionFour {
11+
12+
public static void main(String[] args){
13+
SparkConf conf = new SparkConf().setAppName("QuestionFour").setMaster("yarn");
14+
JavaSparkContext sc = new JavaSparkContext(conf);
15+
JavaRDD<String> file = sc.textFile("hdfs://master:9000/user/root/input/access_log").repartition(3);
16+
JavaPairRDD<String,Integer> pair = file.mapToPair(s -> new Tuple2(s.split(" ")[0],1));
17+
JavaPairRDD<String,Integer> count = pair.reduceByKey((int1,int2) ->(int1+int2));
18+
JavaPairRDD<Integer,String> webcount = count.mapToPair(listen -> new Tuple2<>(listen._2,listen._1));
19+
JavaPairRDD<Integer,String> webcountsort = webcount.sortByKey(true);
20+
JavaPairRDD<Integer,String> webcountsort1 = webcount.sortByKey(false);
21+
JavaPairRDD<String,Integer> list =webcountsort1.mapToPair(listen -> new Tuple2<>(listen._2,listen._1));
22+
System.out.println("the IP " + list.first()._1 + " made access for "+ list.first()._2+"times");
23+
}
24+
}

QuestionOne.java

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import org.apache.spark.SparkConf;
2+
import org.apache.spark.api.java.JavaPairRDD;
3+
import org.apache.spark.api.java.JavaRDD;
4+
import org.apache.spark.api.java.JavaSparkContext;
5+
import scala.Tuple2;
6+
7+
import java.util.Arrays;
8+
import java.util.HashMap;
9+
import java.util.Map;
10+
import java.util.List;
11+
12+
public class QuestionOne {
13+
14+
public static void main(String[] args){
15+
long start = System.currentTimeMillis();
16+
SparkConf conf = new SparkConf().setAppName("QuestionOne").setMaster("yarn");
17+
JavaSparkContext sc = new JavaSparkContext(conf);
18+
JavaRDD<String> file = sc.textFile("hdfs://master:9000/user/root/input/access_log").repartition(3);
19+
JavaRDD<String> url = file.flatMap(line -> Arrays.asList(line.split(" ")[6]).iterator());
20+
Map<String, Long> numurl = url.countByValue();
21+
//Map<String, Long> url1 = new HashMap<>();
22+
//url1.put("/assets/img/loading.gif was hit",url1.get("/assets/img/loading.gif"));
23+
for(Map.Entry<String, Long> e: numurl.entrySet()) {
24+
if (e.getKey().equals("/assets/img/loading.gif")) {
25+
System.out.println("The times of hit to website \"/assets/img/loading.gif\" was " + e.getValue());
26+
}
27+
}
28+
long end = System.currentTimeMillis();
29+
System.out.println("the running time of the program is "+(end-start));
30+
//System.out.println("The times of hit to website \"/assets/img/loading.gif\" was " + numurl.get("/assets/img/loading.gif"));
31+
32+
33+
}
34+
}

QuestionThree.java

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import org.apache.spark.SparkConf;
2+
import org.apache.spark.api.java.JavaPairRDD;
3+
import org.apache.spark.api.java.JavaRDD;
4+
import org.apache.spark.api.java.JavaSparkContext;
5+
import scala.Tuple2;
6+
7+
import java.util.Arrays;
8+
import java.util.Map;
9+
10+
public class QuestionThree {
11+
12+
public static void main(String[] args){
13+
SparkConf conf = new SparkConf().setAppName("QuestionThree").setMaster("yarn");
14+
JavaSparkContext sc = new JavaSparkContext(conf);
15+
JavaRDD<String> file = sc.textFile("hdfs://master:9000/user/root/input/access_log").repartition(3);
16+
JavaPairRDD<String,Integer> pair = file.mapToPair(s -> new Tuple2(s.split(" ")[6],1));
17+
JavaPairRDD<String,Integer> count = pair.reduceByKey((int1,int2) ->(int1+int2));
18+
JavaPairRDD<Integer,String> webcount = count.mapToPair(listen -> new Tuple2<>(listen._2,listen._1));
19+
JavaPairRDD<Integer,String> webcountsort = webcount.sortByKey(true);
20+
JavaPairRDD<Integer,String> webcountsort1 = webcount.sortByKey(false);
21+
JavaPairRDD<String,Integer> list =webcountsort1.mapToPair(listen -> new Tuple2<>(listen._2,listen._1));
22+
//System.out.println("The website \"/assets/img/loading.gif\" was hit " + numurl.get("/assets/img/loading.gif") + " times");
23+
System.out.println("the most hit path " + list.first()._1 + " was hit "+ list.first()._2+"times");
24+
}
25+
}

QuestionTwo.java

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import org.apache.spark.SparkConf;
2+
import org.apache.spark.api.java.JavaRDD;
3+
import org.apache.spark.api.java.JavaSparkContext;
4+
5+
import java.util.Arrays;
6+
import java.util.Map;
7+
8+
public class QuestionTwo {
9+
10+
public static void main(String[] args){
11+
SparkConf conf = new SparkConf().setAppName("QuestionTwo").setMaster("yarn");
12+
JavaSparkContext sc = new JavaSparkContext(conf);
13+
JavaRDD<String> file = sc.textFile("hdfs://master:9000/user/root/input/access_log").repartition(3);
14+
JavaRDD<String> url = file.flatMap(line -> Arrays.asList(line.split(" ")[6]).iterator());
15+
Map<String, Long> numurl = url.countByValue();
16+
System.out.println("The time of hit to website was " + numurl.get("/assets/js/lightbox.js"));
17+
for(Map.Entry<String, Long> e: numurl.entrySet()) {
18+
if (e.getKey().equals("/assets/js/lightbox.js")) {
19+
System.out.println("The times of hit to website \" / assets / js / lightbox.js\"\" was " + e.getValue());
20+
}
21+
}
22+
sc.close();
23+
}
24+
}
25+
26+

newOneTwo.java

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import org.apache.spark.SparkConf;
2+
import org.apache.spark.api.java.JavaPairRDD;
3+
import org.apache.spark.api.java.JavaRDD;
4+
import org.apache.spark.api.java.JavaSparkContext;
5+
import scala.Tuple2;
6+
7+
import java.util.Arrays;
8+
import java.util.HashMap;
9+
import java.util.Map;
10+
import java.util.List;
11+
12+
public class newOneTwo {
13+
14+
public static void main(String[] args){
15+
long start = System.currentTimeMillis();
16+
SparkConf conf = new SparkConf().setAppName("NewOneTwo").setMaster("yarn");
17+
//SparkConf conf = new SparkConf().setAppName("NewOneTwo").setMaster("local[*]");
18+
JavaSparkContext sc = new JavaSparkContext(conf);
19+
JavaRDD<String> file = sc.textFile("hdfs://master:9000/user/root/input/access_log").repartition(3);
20+
//JavaRDD<String> file = sc.textFile("input/access_log").repartition(3);
21+
JavaRDD<String> url = file.flatMap(line -> Arrays.asList(line.split(" ")[6]).iterator());
22+
JavaRDD<String> url1 = url.filter(s -> s.contains("/assets/js/lightbox.js")) ;
23+
long a = url1.count();
24+
long intermid = System.currentTimeMillis();
25+
JavaRDD<String> url2 = url.filter(s -> s.contains("/assets/js/lightbox.js")) ;
26+
long b = url2.count();
27+
long end = System.currentTimeMillis();
28+
System.out.println("the first time of od rw is "+(intermid - start));
29+
System.out.println("the second time of od rw is "+(end - intermid));
30+
System.out.println("the first is nearly the "+(intermid - start)/(end - intermid)+" times of the second one excution");
31+
}
32+
}

0 commit comments

Comments
 (0)