main_study_get_data.py

from pymongo import MongoClient
import pandas as pd

install_start_date = "2022-05-01T00:00:00.000Z" #only consider profiles created after this date
study_start_date = "2021-05-10T00:00:00.000Z" #only consider interventions done after this date
last_update_date = "2022-05-17T00:00:00.000Z" #CHANGE - get interventions after this date
current_update_date = "2022-05-20T00:00:00.000Z" #CHANGE - get interventions BEFORE this date

#get all data that is the right format (after 11/01/2021), get limit # of entries if specified
def download_data():

    CLIENT =  MongoClient('mongodb+srv://Nina:rAOVbVxRiZqmSOZo@cluster0.kiigt.mongodb.net/test?retryWrites=true&w=majority')
    DB = CLIENT["hso-dev"]

    USERS = list(pd.read_csv('main_study_bandit_online/user_ids.csv', delimiter=',', header=None)[0]) #get non-control users
    # users = pd.DataFrame(list(DB.user_profiles.find({ 'userid': {'$in': USERS} }))) 
    # user_profiles = list(users["userid"])
    #change date to last update date - just use whole day
    data = pd.DataFrame(list(DB.intervention_feedback.find({ 'date': { '$gte': study_start_date, '$lte': current_update_date},  'userid': {'$in': USERS}})))
    data["stress_change"] = pd.to_numeric(data["stress_change"])
    data.to_csv("data_"+current_update_date+".csv", header=True)
    return data


download_data()