Skip to content

Commit

Permalink
Add simple test cases for JavaHappyPandas
Browse files Browse the repository at this point in the history
  • Loading branch information
Mahmoud Hanafy committed May 21, 2016
1 parent a252514 commit b887283
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public static DataFrame jsonLoadFromRDD(SQLContext sqlContext, JavaRDD<String> i
*/
public static DataFrame happyPandasPercentage(DataFrame pandaInfo) {
DataFrame happyPercentage = pandaInfo.select(pandaInfo.col("place"),
pandaInfo.col("happyPandas").divide(pandaInfo.col("totalPandas")).as("percentHappy"));
(pandaInfo.col("happyPandas").divide(pandaInfo.col("totalPandas"))).as("percentHappy"));
return happyPercentage;
}

Expand Down Expand Up @@ -204,7 +204,7 @@ public static void joins(DataFrame df1, DataFrame df2) {
}

public static DataFrame selfJoin(DataFrame df) {
return df.as("a").join(df.as("b")).where(df.col("name").equalTo(df.col("name")));
return (df.as("a")).join(df.as("b")).where("a.name = b.name");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package com.highperformancespark.examples.dataframe;

import com.highperformancespark.examples.objects.JavaPandaInfo;
import com.highperformancespark.examples.objects.JavaPandas;
import com.highperformancespark.examples.objects.JavaRawPanda;
import com.holdenkarau.spark.testing.JavaDataFrameSuiteBase;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.types.*;
import org.junit.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.junit.Assert.*;

public class JavaHappyPandasTest extends JavaDataFrameSuiteBase {
String toronto = "toronto";
String sandiego = "san diego";
String virginia = "virginia";

List<JavaPandaInfo> pandaInfoList = Arrays.asList(
new JavaPandaInfo(toronto, "giant", 1, 2),
new JavaPandaInfo(sandiego, "red", 2, 3),
new JavaPandaInfo(virginia, "black", 1, 10)
);

List<JavaRawPanda> rawPandaList = Arrays.asList(
new JavaRawPanda(10L, "94110", "giant", true, Arrays.asList(1.0, 0.9)),
new JavaRawPanda(11L, "94110", "red", true, Arrays.asList(1.0, 0.9)));

List<JavaPandas> pandasList = Arrays.asList(
new JavaPandas("bata", "10010", 10, 2),
new JavaPandas("wiza", "10010", 20, 4),
new JavaPandas("dabdob", "11000", 8, 2),
new JavaPandas("hanafy", "11000", 15, 7),
new JavaPandas("hamdi", "11111", 20, 10)
);

@Test
public void simpleSelfJoinTest() {
DataFrame inputDF = sqlContext().createDataFrame(pandasList, JavaPandas.class);
DataFrame result = JavaHappyPandas.selfJoin(inputDF).select("a.name", "b.name");
List<Row> resultList = result.collectAsList();

resultList.stream().forEach(row -> assertEquals(row.getString(0), row.getString(1)));
}

@Test
public void verifyhappyPandasPercentage() {
List<Row> expectedList = Arrays.asList(RowFactory.create(toronto, 0.5),
RowFactory.create(sandiego, 2 / 3.0), RowFactory.create(virginia, 1/10.0));
DataFrame expectedDF = sqlContext().createDataFrame(
expectedList, new StructType(
new StructField[]{
new StructField("place", DataTypes.StringType, true, Metadata.empty()),
new StructField("percentHappy", DataTypes.DoubleType, true, Metadata.empty())
}));

DataFrame inputDF = sqlContext().createDataFrame(pandaInfoList, JavaPandaInfo.class);
DataFrame resultDF = JavaHappyPandas.happyPandasPercentage(inputDF);

assertDataFrameApproximateEquals(expectedDF, resultDF, 1E-5);
}

@Test
public void encodePandaType() {
DataFrame inputDF = sqlContext().createDataFrame(rawPandaList, JavaRawPanda.class);
DataFrame resultDF = JavaHappyPandas.encodePandaType(inputDF);

List<Row> expectedRows = Arrays.asList(RowFactory.create(10L, 0), RowFactory.create(11L, 1));
DataFrame expectedDF = sqlContext().createDataFrame(expectedRows, new StructType(new StructField[]{
new StructField("id", DataTypes.LongType, false, Metadata.empty()),
new StructField("encodedType", DataTypes.IntegerType, false, Metadata.empty())
}));

assertDataFrameEquals(expectedDF, resultDF);
}

@Test
public void happyPandasPlaces() {
DataFrame inputDF = sqlContext().createDataFrame(pandaInfoList, JavaPandaInfo.class);
DataFrame resultDF = JavaHappyPandas.happyPandasPlaces(inputDF);

List<JavaPandaInfo> expectedRows = Arrays.asList(
new JavaPandaInfo(toronto, "giant", 1, 2),
new JavaPandaInfo(sandiego, "red", 2, 3));
DataFrame expectedDF = sqlContext().createDataFrame(expectedRows, JavaPandaInfo.class);

assertDataFrameEquals(expectedDF, resultDF);
}

@Test
public void maxPandaSizePerZip() {
DataFrame inputDF = sqlContext().createDataFrame(pandasList, JavaPandas.class);
DataFrame resultDF = JavaHappyPandas.maxPandaSizePerZip(inputDF);

List<Row> expectedRows = Arrays.asList(
RowFactory.create(pandasList.get(1).getZip(), pandasList.get(1).getPandaSize()),
RowFactory.create(pandasList.get(3).getZip(), pandasList.get(3).getPandaSize()),
RowFactory.create(pandasList.get(4).getZip(), pandasList.get(4).getPandaSize())
);
DataFrame expectedDF = sqlContext().createDataFrame(expectedRows,
new StructType(
new StructField[]{
new StructField("zip", DataTypes.StringType, true, Metadata.empty()),
new StructField("max(pandaSize)", DataTypes.IntegerType, true, Metadata.empty())
}
));

assertDataFrameEquals(expectedDF.orderBy("zip"), resultDF.orderBy("zip"));
}

@Test
public void complexAggPerZip() {
DataFrame inputDF = sqlContext().createDataFrame(pandasList, JavaPandas.class);
DataFrame resultDF = JavaHappyPandas.minMeanSizePerZip(inputDF);

List<Row> expectedRows = Arrays.asList(
RowFactory.create(pandasList.get(1).getZip(), pandasList.get(0).getPandaSize(), 15.0),
RowFactory.create(pandasList.get(3).getZip(), pandasList.get(2).getPandaSize(), 11.5),
RowFactory.create(pandasList.get(4).getZip(), pandasList.get(4).getPandaSize(), 20.0));

DataFrame expectedDF = sqlContext().createDataFrame(expectedRows,
new StructType(
new StructField[]{
new StructField("zip", DataTypes.StringType, true, Metadata.empty()),
new StructField("min(pandaSize)", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("avg(pandaSize)", DataTypes.DoubleType, true, Metadata.empty())
}
));

assertDataFrameApproximateEquals(expectedDF.orderBy("zip"), resultDF.orderBy("zip"), 1E-5);
}

@Test
public void simpleSQLExample() {
DataFrame inputDF = sqlContext().createDataFrame(pandasList, JavaPandas.class);
DataFrame resultDF = JavaHappyPandas.simpleSqlExample(inputDF);

List<JavaPandas> expectedList = Arrays.asList(
pandasList.get(0), pandasList.get(2)
);
DataFrame expectedDF = sqlContext().createDataFrame(expectedList, JavaPandas.class);

assertDataFrameEquals(expectedDF, resultDF);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ class HappyPandasTest extends DataFrameSuiteBase {
//tag::approxEqualDataFrames[]

test("verify simple happy pandas Percentage") {
val expectedResult = List(Row(toronto, 0.5), Row(sandiego, 2/3.0), Row(virginia, 1/10.0))
val expectedDf = createDF(expectedResult, ("place", StringType),
val expectedList = List(Row(toronto, 0.5), Row(sandiego, 2/3.0), Row(virginia, 1/10.0))
val expectedDf = createDF(expectedList, ("place", StringType),
("percentHappy", DoubleType))

val inputDF = sqlContext.createDataFrame(pandaInfoList)
val result = HappyPandas.happyPandasPercentage(inputDF)
val resultDF = HappyPandas.happyPandasPercentage(inputDF)

assertDataFrameApproximateEquals(expectedDf, result, 1E-5)
assertDataFrameApproximateEquals(expectedDf, resultDF, 1E-5)
}
//end::approxEqualDataFrames[]

Expand Down

0 comments on commit b887283

Please sign in to comment.