1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ author SparkByExamples.com
4
+ """
5
+
6
+ import pyspark
7
+ from pyspark .sql import SparkSession
8
+
9
+ spark = SparkSession .builder .appName ('SparkByExamples.com' ).getOrCreate ()
10
+
11
+ simpleData = [("James" ,"Sales" ,"NY" ,90000 ,34 ,10000 ), \
12
+ ("Michael" ,"Sales" ,"NY" ,86000 ,56 ,20000 ), \
13
+ ("Robert" ,"Sales" ,"CA" ,81000 ,30 ,23000 ), \
14
+ ("Maria" ,"Finance" ,"CA" ,90000 ,24 ,23000 ) \
15
+ ]
16
+
17
+ columns = ["employee_name" ,"department" ,"state" ,"salary" ,"age" ,"bonus" ]
18
+ df = spark .createDataFrame (data = simpleData , schema = columns )
19
+ df .printSchema ()
20
+ df .show (truncate = False )
21
+
22
+ simpleData2 = [("James" ,"Sales" ,"NY" ,90000 ,34 ,10000 ), \
23
+ ("Maria" ,"Finance" ,"CA" ,90000 ,24 ,23000 ), \
24
+ ("Jen" ,"Finance" ,"NY" ,79000 ,53 ,15000 ), \
25
+ ("Jeff" ,"Marketing" ,"CA" ,80000 ,25 ,18000 ), \
26
+ ("Kumar" ,"Marketing" ,"NY" ,91000 ,50 ,21000 ) \
27
+ ]
28
+ columns2 = ["employee_name" ,"department" ,"state" ,"salary" ,"age" ,"bonus" ]
29
+
30
+ df2 = spark .createDataFrame (data = simpleData2 , schema = columns2 )
31
+
32
+ df2 .printSchema ()
33
+ df2 .show (truncate = False )
34
+
35
+ unionDF = df .union (df2 )
36
+ unionDF .show (truncate = False )
37
+ disDF = df .union (df2 ).distinct ()
38
+ disDF .show (truncate = False )
39
+
40
+ unionAllDF = df .unionAll (df2 )
41
+ unionAllDF .show (truncate = False )
0 commit comments