-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathSpark_load_MySQL_demo.py
40 lines (36 loc) · 1.33 KB
/
Spark_load_MySQL_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# python 3
##########################################################
# REF
# https://stackoverflow.com/questions/48054270/load-data-from-the-mysql-db-using-pyspark-in-python-3
##########################################################
# OP
import pandas as pd
# spark
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext
conf = SparkConf().setAppName("LOAD MYSQL DATABASE")
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)
# ------------------ METHOD 1) ------------------
#jdbc_url = "jdbc:mysql://{0}:{1}/{2}".format(hostname, jdbcPort, dbname)
url="jdbc:mysql://localhost/local_deV"
# For SQLServer, pass in the "driver" option
# driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
# Add "driver" : driverClass
connectionProperties = {
"user" : "root",
"password" : ""
}
table= "movie_metadata"
spark_df = sqlContext.read.jdbc(url=url, table=table, properties=connectionProperties)
pandas_df = spark_df.toPandas()
#sqlContext=SQLContext(sc)
#df=sqlContext.read.jdbc(url=url, table=pushdown_query, properties=properties)
print ('='*70)
print ('spark_df : ', spark_df.take(40))
print (type(spark_df))
print ('pandas_df : ', pandas_df.head(40))
print (type(pandas_df))
print ('='*70)
##### run via command line #####
# spark-submit --packages mysql:mysql-connector-java:5.1.38 Spark_load_MySQL_demo.py