forked from habitlab/habitlab
-
Notifications
You must be signed in to change notification settings - Fork 2
/
main_study_get_data.py
25 lines (19 loc) · 1.3 KB
/
main_study_get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from pymongo import MongoClient
import pandas as pd
install_start_date = "2022-05-01T00:00:00.000Z" #only consider profiles created after this date
study_start_date = "2021-05-10T00:00:00.000Z" #only consider interventions done after this date
last_update_date = "2022-05-17T00:00:00.000Z" #CHANGE - get interventions after this date
current_update_date = "2022-05-20T00:00:00.000Z" #CHANGE - get interventions BEFORE this date
#get all data that is the right format (after 11/01/2021), get limit # of entries if specified
def download_data():
CLIENT = MongoClient('mongodb+srv://Nina:[email protected]/test?retryWrites=true&w=majority')
DB = CLIENT["hso-dev"]
USERS = list(pd.read_csv('main_study_bandit_online/user_ids.csv', delimiter=',', header=None)[0]) #get non-control users
# users = pd.DataFrame(list(DB.user_profiles.find({ 'userid': {'$in': USERS} })))
# user_profiles = list(users["userid"])
#change date to last update date - just use whole day
data = pd.DataFrame(list(DB.intervention_feedback.find({ 'date': { '$gte': study_start_date, '$lte': current_update_date}, 'userid': {'$in': USERS}})))
data["stress_change"] = pd.to_numeric(data["stress_change"])
data.to_csv("data_"+current_update_date+".csv", header=True)
return data
download_data()