Skip to content

Commit 94c6da8

Browse files
pyspark withColumn
1 parent d6a8e66 commit 94c6da8

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed

pyspark-withcolumn.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Jun 14 10:20:19 2020
4+
"""
5+
6+
import pyspark
7+
from pyspark.sql import SparkSession
8+
from pyspark.sql.functions import col, lit
9+
from pyspark.sql.types import StructType, StructField, StringType
10+
11+
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
12+
13+
data = [(("James","","Smith"),"36636","M","3000"), \
14+
(("Michael","Rose",""),"40288","M","4000"), \
15+
(("Robert","","Williams"),"42114","M","4000"), \
16+
(("Maria","Anne","Jones"),"39192","F","4000"), \
17+
(("Jen","Mary","Brown"),"","F","-1") \
18+
]
19+
20+
schema = StructType([
21+
StructField('name', StructType([
22+
StructField('firstname', StringType(), True),
23+
StructField('middlename', StringType(), True),
24+
StructField('lastname', StringType(), True)
25+
])),
26+
StructField('dob', StringType(), True),
27+
StructField('gender', StringType(), True),
28+
StructField('salary', StringType(), True)
29+
])
30+
31+
32+
df = spark.createDataFrame(data=data, schema = schema)
33+
df.printSchema()
34+
df.show(truncate=False)
35+
36+
df2 = df.withColumn("salary",col("salary").cast("Integer"))
37+
df2.printSchema()
38+
df2.show(truncate=False)
39+
40+
df3 = df.withColumn("salary",col("salary")*100)
41+
df3.printSchema()
42+
df3.show(truncate=False)
43+
44+
df4 = df.withColumn("CopiedColumn",col("salary")* -1)
45+
df4.printSchema()
46+
47+
df5 = df.withColumn("Country", lit("USA"))
48+
df5.printSchema()
49+
50+
df6 = df.withColumn("Country", lit("USA")) \
51+
.withColumn("anotherColumn",lit("anotherValue"))
52+
df6.printSchema()
53+
54+
55+
df.withColumnRenamed("gender","sex") \
56+
.show(truncate=False)
57+
58+
df4.drop("CopiedColumn") \
59+
.show(truncate=False)
60+
61+
"""
62+
columns = ["name","address"]
63+
data = [("Robert, Smith", "1 Main st, Newark, NJ, 92537"), \
64+
("Maria, Garcia","3456 Walnut st, Newark, NJ, 94732")]
65+
66+
dfFromData = spark.createDataFrame(data=data, schema = schema)
67+
68+
newDF = dfFromData.map(f=>{
69+
nameSplit = f.getAs[String](0).split(",")
70+
addSplit = f.getAs[String](1).split(",")
71+
(nameSplit(0),nameSplit(1),addSplit(0),addSplit(1),addSplit(2),addSplit(3))
72+
})
73+
finalDF = newDF.toDF("First Name","Last Name",
74+
"Address Line1","City","State","zipCode")
75+
finalDF.printSchema()
76+
finalDF.show(false)
77+
"""

0 commit comments

Comments
 (0)