|
1 | 1 | package com.highperformancespark.examples.dataframe;
|
2 | 2 |
|
3 |
| -import com.highperformancespark.examples.objects.JavaRawPanda; |
4 | 3 | import org.apache.spark.api.java.JavaRDD;
|
5 | 4 | import org.apache.spark.api.java.JavaSparkContext;
|
6 | 5 | import org.apache.spark.sql.Column;
|
7 | 6 | import org.apache.spark.sql.DataFrame;
|
8 |
| -import org.apache.spark.sql.Row; |
9 | 7 | import org.apache.spark.sql.SQLContext;
|
10 | 8 | import org.apache.spark.sql.expressions.Window;
|
11 | 9 | import org.apache.spark.sql.expressions.WindowSpec;
|
12 | 10 | import org.apache.spark.sql.hive.HiveContext;
|
13 | 11 | import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2;
|
14 |
| -import scala.collection.JavaConversions; |
15 |
| -import scala.collection.mutable.Buffer; |
16 |
| -import scala.reflect.api.TypeTags; |
17 | 12 |
|
18 |
| -import java.util.Arrays; |
19 | 13 | import java.util.HashMap;
|
20 |
| -import java.util.List; |
21 | 14 | import java.util.Map;
|
22 |
| -import java.util.stream.Collectors; |
23 | 15 |
|
24 | 16 | import static org.apache.spark.sql.functions.*;
|
25 | 17 |
|
@@ -97,37 +89,6 @@ public DataFrame minHappyPandas(DataFrame pandaInfo, int minHappyPandas) {
|
97 | 89 | return pandaInfo.filter(pandaInfo.col("happyPandas").geq(minHappyPandas));
|
98 | 90 | }
|
99 | 91 |
|
100 |
| - /** |
101 |
| - * Extra the panda info from panda places and compute the squisheness of the panda |
102 |
| - */ |
103 |
| - public DataFrame squishPandaFromPace(DataFrame pandaPlace) { |
104 |
| - Buffer<Column> inputCols = JavaConversions.asScalaBuffer(Arrays.asList(pandaPlace.col("pandas"))); |
105 |
| - |
106 |
| - TypeTags.TypeTag tag = null; // TODO don't know how to create Type Tag in java ?? |
107 |
| - |
108 |
| - DataFrame pandaInfo = pandaPlace.explode(inputCols.toList(), r -> { |
109 |
| - List<Row> pandas = r.getList(0); |
110 |
| - List<JavaRawPanda> rawPandasList = pandas |
111 |
| - .stream() |
112 |
| - .map(a -> { |
113 |
| - long id = a.getLong(0); |
114 |
| - String zip = a.getString(1); |
115 |
| - String pt = a.getString(2); |
116 |
| - boolean happy = a.getBoolean(3); |
117 |
| - List<Double> attrs = a.getList(4); |
118 |
| - return new JavaRawPanda(id, zip, pt, happy, attrs); |
119 |
| - }).collect(Collectors.toList()); |
120 |
| - |
121 |
| - return JavaConversions.asScalaBuffer(rawPandasList); |
122 |
| - }, tag); |
123 |
| - |
124 |
| - DataFrame squishyness = |
125 |
| - pandaInfo.select((pandaInfo.col("attributes").apply(0).divide(pandaInfo.col("attributes")).apply(1)) |
126 |
| - .as("squishyness")); |
127 |
| - |
128 |
| - return squishyness; |
129 |
| - } |
130 |
| - |
131 | 92 | /**
|
132 | 93 | * Find pandas that are sad.
|
133 | 94 | */
|
@@ -201,8 +162,8 @@ public void startJDBCServer(HiveContext sqlContext) {
|
201 | 162 |
|
202 | 163 | /**
|
203 | 164 | * Orders pandas by size ascending and by age descending.
|
204 |
| - * Pandas will be sorted by "size" first and if two pandas have the same "size" |
205 |
| - * will be sorted by "age". |
| 165 | + * Pandas will be sorted by "size" first and if two pandas |
| 166 | + * have the same "size" will be sorted by "age". |
206 | 167 | */
|
207 | 168 | public DataFrame orderPandas(DataFrame pandas) {
|
208 | 169 | return pandas.orderBy(pandas.col("pandaSize").asc(), pandas.col("age").desc());
|
|
0 commit comments