1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ author SparkByExamples.com
4
+ """
5
+
6
+ from pyspark .sql import SparkSession
7
+
8
+ # Create SparkSession
9
+ spark = SparkSession .builder \
10
+ .appName ('SparkByExamples.com' ) \
11
+ .getOrCreate ()
12
+
13
+
14
+ dates = [("1" ,"2019-07-01 12:01:19.111" ),
15
+ ("2" ,"2019-06-24 12:01:19.222" ),
16
+ ("3" ,"2019-11-16 16:44:55.406" ),
17
+ ("4" ,"2019-11-16 16:50:59.406" )
18
+ ]
19
+
20
+ df = spark .createDataFrame (data = dates , schema = ["id" ,"from_timestamp" ])
21
+
22
+ from pyspark .sql .functions import *
23
+ df2 = df .withColumn ('from_timestamp' ,to_timestamp (col ('from_timestamp' )))\
24
+ .withColumn ('end_timestamp' , current_timestamp ())\
25
+ .withColumn ('DiffInSeconds' ,col ("end_timestamp" ).cast ("long" ) - col ('from_timestamp' ).cast ("long" ))
26
+ df2 .show (truncate = False )
27
+
28
+ df .withColumn ('from_timestamp' ,to_timestamp (col ('from_timestamp' )))\
29
+ .withColumn ('end_timestamp' , current_timestamp ())\
30
+ .withColumn ('DiffInSeconds' ,unix_timestamp ("end_timestamp" ) - unix_timestamp ('from_timestamp' )) \
31
+ .show (truncate = False )
32
+
33
+ df2 .withColumn ('DiffInMinutes' ,round (col ('DiffInSeconds' )/ 60 ))\
34
+ .show (truncate = False )
35
+
36
+ df2 .withColumn ('DiffInHours' ,round (col ('DiffInSeconds' )/ 3600 ))\
37
+ .show (truncate = False )
38
+
39
+ #Difference between two timestamps when input has just timestamp
40
+
41
+ data = [("12:01:19.000" ,"13:01:19.000" ),
42
+ ("12:01:19.000" ,"12:02:19.000" ),
43
+ ("16:44:55.406" ,"17:44:55.406" ),
44
+ ("16:50:59.406" ,"16:44:59.406" )]
45
+ df3 = spark .createDataFrame (data = data , schema = ["from_timestamp" ,"to_timestamp" ])
46
+
47
+ df3 .withColumn ("from_timestamp" ,to_timestamp (col ("from_timestamp" ),"HH:mm:ss.SSS" )) \
48
+ .withColumn ("to_timestamp" ,to_timestamp (col ("to_timestamp" ),"HH:mm:ss.SSS" )) \
49
+ .withColumn ("DiffInSeconds" , col ("from_timestamp" ).cast ("long" ) - col ("to_timestamp" ).cast ("long" )) \
50
+ .withColumn ("DiffInMinutes" ,round (col ("DiffInSeconds" )/ 60 )) \
51
+ .withColumn ("DiffInHours" ,round (col ("DiffInSeconds" )/ 3600 )) \
52
+ .show (truncate = False )
53
+
54
+ #
55
+
56
+
57
+ df3 = spark .createDataFrame (
58
+ data = [("1" ,"07-01-2019 12:01:19.406" )],
59
+ schema = ["id" ,"input_timestamp" ]
60
+ )
61
+ df3 .withColumn ("input_timestamp" ,to_timestamp (col ("input_timestamp" ),"MM-dd-yyyy HH:mm:ss.SSS" )) \
62
+ .withColumn ("current_timestamp" ,current_timestamp ().alias ("current_timestamp" )) \
63
+ .withColumn ("DiffInSeconds" ,current_timestamp ().cast ("long" ) - col ("input_timestamp" ).cast ("long" )) \
64
+ .withColumn ("DiffInMinutes" ,round (col ("DiffInSeconds" )/ 60 )) \
65
+ .withColumn ("DiffInHours" ,round (col ("DiffInSeconds" )/ 3600 )) \
66
+ .withColumn ("DiffInDays" ,round (col ("DiffInSeconds" )/ 24 * 3600 )) \
67
+ .show (truncate = False )
68
+
69
+ #SQL
70
+
71
+ spark .sql ("select unix_timestamp('2019-07-02 12:01:19') - unix_timestamp('2019-07-01 12:01:19') DiffInSeconds" ).show ()
72
+ spark .sql ("select (unix_timestamp('2019-07-02 12:01:19') - unix_timestamp('2019-07-01 12:01:19'))/60 DiffInMinutes" ).show ()
73
+ spark .sql ("select (unix_timestamp('2019-07-02 12:01:19') - unix_timestamp('2019-07-01 12:01:19'))/3600 DiffInHours" ).show ()
0 commit comments