-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean_db.py
38 lines (27 loc) · 1.02 KB
/
clean_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from pymongo import MongoClient
import datetime
import json
MONGO_HOME = 'mongodb://localhost:27017/'
client = MongoClient(MONGO_HOME)
input_db = client['test']
collection = input_db['torino']
client = MongoClient(MONGO_HOME)
output_db = client['MobilityDataLakeTrial']
output_db.torino.drop()
output_collection = output_db['torino']
start = datetime.datetime(2016, 11, 10)
end = datetime.datetime.now()
cursor = collection.find({"timestamp":{'$gte': start, '$lt': end}})
for document in cursor:
# print type(document["provider"])
if type(document["state"]) == dict:
# print document["state"]["placemarks"][0].keys()
document["provider"] = "car2go"
elif type(document["state"]) == list:
# for car in document["state"]:
# print car["car_plate"]
document["provider"] = "enjoy"
elif type(document["state"]) == unicode:
# print json.loads(document["state"]).keys()
document["provider"] = "tobike"
output_collection.insert_one(document)