|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Created on Sat Jun 20 07:45:04 2020 |
| 4 | +
|
| 5 | +@author: NNK |
| 6 | +""" |
| 7 | + |
| 8 | +import pyspark |
| 9 | +from pyspark.sql import SparkSession |
| 10 | +from pyspark.sql.functions import col, asc,desc |
| 11 | + |
| 12 | +spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate() |
| 13 | + |
| 14 | +simpleData = [("James","Sales","NY",90000,34,10000), \ |
| 15 | + ("Michael","Sales","NY",86000,56,20000), \ |
| 16 | + ("Robert","Sales","CA",81000,30,23000), \ |
| 17 | + ("Maria","Finance","CA",90000,24,23000), \ |
| 18 | + ("Raman","Finance","CA",99000,40,24000), \ |
| 19 | + ("Scott","Finance","NY",83000,36,19000), \ |
| 20 | + ("Jen","Finance","NY",79000,53,15000), \ |
| 21 | + ("Jeff","Marketing","CA",80000,25,18000), \ |
| 22 | + ("Kumar","Marketing","NY",91000,50,21000) \ |
| 23 | + ] |
| 24 | +columns= ["employee_name","department","state","salary","age","bonus"] |
| 25 | + |
| 26 | +df = spark.createDataFrame(data = simpleData, schema = columns) |
| 27 | + |
| 28 | +df.printSchema() |
| 29 | +df.show(truncate=False) |
| 30 | + |
| 31 | +df.sort("department","state").show(truncate=False) |
| 32 | +df.sort(col("department"),col("state")).show(truncate=False) |
| 33 | + |
| 34 | +df.orderBy("department","state").show(truncate=False) |
| 35 | +df.orderBy(col("department"),col("state")).show(truncate=False) |
| 36 | + |
| 37 | +df.sort(df.department.asc(),df.state.asc()).show(truncate=False) |
| 38 | +df.sort(col("department").asc(),col("state").asc()).show(truncate=False) |
| 39 | +df.orderBy(col("department").asc(),col("state").asc()).show(truncate=False) |
| 40 | + |
| 41 | +df.sort(df.department.asc(),df.state.desc()).show(truncate=False) |
| 42 | +df.sort(col("department").asc(),col("state").desc()).show(truncate=False) |
| 43 | +df.orderBy(col("department").asc(),col("state").desc()).show(truncate=False) |
| 44 | + |
| 45 | + |
| 46 | +df.createOrReplaceTempView("EMP") |
| 47 | +df.select("employee_name",asc("department"),desc("state"),"salary","age","bonus").show(truncate=False) |
| 48 | + |
| 49 | +spark.sql("select employee_name,department,state,salary,age,bonus from EMP ORDER BY department asc").show(truncate=False) |
| 50 | + |
| 51 | + |
| 52 | + |
0 commit comments