diff --git a/.gitignore b/.gitignore
index 8b55d7e1..657b5eef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,9 +21,16 @@ work-record-archive
 export
 internal
 data
+figures
+isf-basal-figures
+fonts
+wip
+projects/parsers/output/
 
 # Test
 htmlcov
 .pytest_cache
 
 
+
+projects/get-donors-pump-settings/temp-plot\.html
diff --git a/environment.yml b/environment.yml
index 7061885d..97b1f82d 100644
--- a/environment.yml
+++ b/environment.yml
@@ -11,12 +11,13 @@ dependencies:
 - xlsxwriter
 - matplotlib
 - scikit-learn
+- pip
 - plotly
-- r
-- r-essentials
+- plotly::plotly-orca
+- poppler
+- psutil
 - pytest
 - pytest-cov
 
 - pip:
-  - python-dotenv
-  - -e git+https://github.com/tidepool-org/data-analytics#egg=tidals\&subdirectory=tidepool-analysis-tools
+  - python-dotenv
\ No newline at end of file
diff --git a/projects/get-donors-pump-settings/gather-data.py b/projects/get-donors-pump-settings/gather-data.py
new file mode 100644
index 00000000..d2093b22
--- /dev/null
+++ b/projects/get-donors-pump-settings/gather-data.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+description: gather the ouput from get users settings and events
+version: 0.0.1
+created: 2019-01-30
+author: Ed Nykaza
+dependencies:
+    *
+license: BSD-2-Clause
+"""
+
+# %% REQUIRED LIBRARIES
+import pandas as pd
+import datetime as dt
+import os
+import argparse
+import glob
+
+
+# %% USER INPUTS (ADD THIS IN LATER)
+codeDescription = "Get user's settings and events"
+parser = argparse.ArgumentParser(description=codeDescription)
+
+parser.add_argument("-d",
+                    "--dataPulledDate",
+                    dest="dataPulledDate",
+                    default="2018-09-28",
+                    help="date in '%Y-%m-%d' format of unique donor list" +
+                    "(e.g., PHI-2018-03-02-uniqueDonorList)")
+
+parser.add_argument("-p",
+                    "--dataProcessedDate",
+                    dest="dataProcessedDate",
+                    default="2019-01-21",
+                    help="date in '%Y-%m-%d' format")
+
+args = parser.parse_args()
+
+
+# %% START OF CODE
+dataPulledDate = args.dataPulledDate
+dataProcessedDate = pd.to_datetime(args.dataProcessedDate)
+
+phiDate = "PHI-" + dataPulledDate
+donorPath = os.path.join(
+        "..", "bigdata-processing-pipeline",
+        "data", phiDate + "-donor-data")
+
+outputPath = os.path.join(donorPath, "settings-and-events")
+
+for name in ["allMetadata", "allAgeANDylwSummaries",
+             "allAgeSummaries", "allYlwSummaries",
+             "dayData", "basalEvents", "bolusEvents"]:
+    allDF = pd.DataFrame()
+    if name.startswith("all"):
+        files = glob.glob(os.path.join(outputPath, name + '*'))
+    else:
+        files = glob.glob(
+                os.path.join(outputPath, "data", "**", "*-" + name + ".csv"))
+    for f in files:
+        dateModified = \
+            pd.to_datetime(dt.datetime.fromtimestamp(os.path.getmtime(f)))
+        if dateModified > dataProcessedDate:
+            tempDF = pd.read_csv(f, low_memory=False)
+            tempDF.rename(
+                    columns={'Unnamed: 0': 'originalIndex'}, inplace=True)
+            tempDF["from"] = f
+            allDF = pd.concat([allDF, tempDF], ignore_index=True, sort=False)
+    allDF.to_csv(os.path.join(outputPath, "combined-" + name + ".csv"))
+    print("completed " + name)
diff --git a/projects/get-donors-pump-settings/get-users-settings-and-events.py b/projects/get-donors-pump-settings/get-users-settings-and-events.py
new file mode 100644
index 00000000..10b2b7c6
--- /dev/null
+++ b/projects/get-donors-pump-settings/get-users-settings-and-events.py
@@ -0,0 +1,1844 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+description: get users settings and events
+version: 0.0.1
+created: 2019-01-11
+author: Ed Nykaza
+dependencies:
+    *
+license: BSD-2-Clause
+"""
+
+
+# %% REQUIRED LIBRARIES
+import pandas as pd
+import numpy as np
+from pytz import timezone
+from datetime import timedelta
+import datetime as dt
+import os
+import argparse
+import pdb
+pd.options.mode.chained_assignment = None  # default='warn'
+
+# %% USER INPUTS (ADD THIS IN LATER)
+codeDescription = "Get user's settings and events"
+parser = argparse.ArgumentParser(description=codeDescription)
+
+parser.add_argument("-d",
+                    "--date-stamp",
+                    dest="dateStamp",
+                    default="2018-09-28",
+                    help="date in '%Y-%m-%d' format of unique donor list" +
+                    "(e.g., PHI-2018-03-02-uniqueDonorList)")
+
+parser.add_argument("-s",
+                    "--start-index",
+                    dest="startIndex",
+                    default=0,
+                    help="donor index (integer) to start at")
+
+parser.add_argument("-e",
+                    "--end-index",
+                    dest="endIndex",
+                    default=-1,
+                    help="donor index (integer) to end at," +
+                    "-1 will result in 1 file if startIndex != 0," +
+                    "and will default to number of unique donors" +
+                    "if startIndex = 0, or endIndex = -2")
+
+
+args = parser.parse_args()
+# %% FUNCTIONS
+def defineStartAndEndIndex(args, nDonors):
+    startIndex = int(args.startIndex)
+    endIndex = int(args.endIndex)
+    if endIndex == -1:
+        if startIndex == 0:
+            endIndex = nDonors
+        else:
+            endIndex = startIndex + 1
+    if endIndex == -2:
+        endIndex = nDonors
+    return startIndex, endIndex
+
+
+# CLEAN DATA FUNCTIONS
+def removeNegativeDurations(df):
+    if "duration" in list(df):
+        nNegativeDurations = sum(df.duration < 0)
+        if nNegativeDurations > 0:
+            df = df[~(df.duration < 0)]
+    else:
+        nNegativeDurations = np.nan
+
+    return df, nNegativeDurations
+
+
+def removeInvalidCgmValues(df):
+
+    nBefore = len(df)
+    # remove values < 38 and > 402 mg/dL
+    df = df.drop(df[((df.type == "cbg") &
+                     (df.value < 2.109284236597303))].index)
+    df = df.drop(df[((df.type == "cbg") &
+                     (df.value > 22.314006924003046))].index)
+    nRemoved = nBefore - len(df)
+
+    return df, nRemoved
+
+
+def tslimCalibrationFix(df):
+
+
+    if "payload.calibration_reading" in list(df):
+
+        searchfor = ['tan']
+        tandemDataIndex = ((df.deviceId.str.contains('|'.join(searchfor))) &
+                           (df.type == "deviceEvent"))
+
+
+        payloadCalReadingIndex = df["payload.calibration_reading"].notnull()
+
+        nTandemAndPayloadCalReadings = sum(tandemDataIndex &
+                                           payloadCalReadingIndex)
+
+        if nTandemAndPayloadCalReadings > 0:
+            # if reading is > 30 then it is in the wrong units
+            if df["payload.calibration_reading"].min() > 30:
+                df.loc[payloadCalReadingIndex, "value"] = \
+                    df[tandemDataIndex & payloadCalReadingIndex] \
+                    ["payload.calibration_reading"] / 18.01559
+            else:
+                df.loc[payloadCalReadingIndex, "value"] = \
+                    df[tandemDataIndex &
+                        payloadCalReadingIndex]["payload.calibration_reading"]
+    else:
+        nTandemAndPayloadCalReadings = 0
+    return df, nTandemAndPayloadCalReadings
+
+
+# OTHER
+def tempRemoveFields(df, removeFields):
+
+
+    tempRemoveFields = list(set(df) & set(removeFields))
+    tempDf = df[tempRemoveFields]
+    df = df.drop(columns=tempRemoveFields)
+
+    return df, tempDf
+
+
+def flattenJson(df, doNotFlattenList):
+    # remove fields that we don't want to flatten
+    df, holdData = tempRemoveFields(df, doNotFlattenList)
+
+    # get a list of data types of column headings
+    columnHeadings = list(df)
+
+    # loop through each columnHeading
+    newDataFrame = pd.DataFrame()
+
+    for colHead in columnHeadings:
+        if any(isinstance(item, list) for item in df[colHead]):
+            listBlob = df[colHead][df[colHead].astype(str).str[0] == "["]
+            df.loc[listBlob.index, colHead] = df.loc[listBlob.index, colHead].str[0]
+
+        # if the df field has embedded json
+        if any(isinstance(item, dict) for item in df[colHead]):
+            # grab the data that is in brackets
+            jsonBlob = df[colHead][df[colHead].astype(str).str[0] == "{"]
+
+            # replace those values with nan
+            df.loc[jsonBlob.index, colHead] = np.nan
+
+            # turn jsonBlob to dataframe
+            newDataFrame = pd.concat([newDataFrame, pd.DataFrame(jsonBlob.tolist(),
+                                     index=jsonBlob.index).add_prefix(colHead + '.')], axis=1)
+
+    df = pd.concat([df, newDataFrame, holdData], axis=1)
+
+    df.sort_index(axis=1, inplace=True)
+
+    return df
+
+
+def mergeWizardWithBolus(df):
+
+    if "wizard" in df["type"].unique():
+        bolusData = df[df.type == "bolus"].copy().dropna(axis=1, how="all")
+        wizardData = df[df.type == "wizard"].copy().dropna(axis=1, how="all")
+
+        # merge the wizard data with the bolus data
+        wizardData["calculatorId"] = wizardData["id"]
+        wizardDataFields = [
+            "bgInput",
+            "bgTarget.high",
+            "bgTarget.low",
+            "bgTarget.range",
+            "bgTarget.target",
+            "bolus",
+            "carbInput",
+            "calculatorId",
+            "insulinCarbRatio",
+            "insulinOnBoard",
+            "insulinSensitivity",
+            "recommended.carb",
+            "recommended.correction",
+            "recommended.net",
+            "units",
+        ]
+        keepTheseWizardFields = \
+            set(wizardDataFields).intersection(list(wizardData))
+        bolusData = pd.merge(bolusData,
+                             wizardData[list(keepTheseWizardFields)],
+                             how="left",
+                             left_on="id",
+                             right_on="bolus")
+
+        mergedBolusData = bolusData.drop("bolus", axis=1)
+    else:
+        mergedBolusData = pd.DataFrame()
+
+    return mergedBolusData
+
+
+def addUploadDate(df):
+    uploadTimes = pd.DataFrame(df[df.type == "upload"].groupby("uploadId").time.describe()["top"])
+    uploadTimes.reset_index(inplace=True)
+    uploadTimes.rename(columns={"top": "uploadTime"}, inplace=True)
+    df = pd.merge(df, uploadTimes, how='left', on='uploadId')
+    df["uploadTime"] = pd.to_datetime(df["uploadTime"])
+
+    return df
+
+
+def mmolL_to_mgdL(mmolL):
+    return mmolL * 18.01559
+
+
+def mgdL_to_mmolL(mgdL):
+    return mgdL / 18.01559
+
+
+def round_time(df, timeIntervalMinutes=5, timeField="time",
+               roundedTimeFieldName="roundedTime", startWithFirstRecord=True,
+               verbose=False):
+    '''
+    A general purpose round time function that rounds the "time"
+    field to nearest <timeIntervalMinutes> minutes
+    INPUTS:
+        * a dataframe (df) that contains a time field that you want to round
+        * timeIntervalMinutes (defaults to 5 minutes given that most cgms output every 5 minutes)
+        * timeField to round (defaults to the UTC time "time" field)
+        * roundedTimeFieldName is a user specified column name (defaults to roundedTime)
+        * startWithFirstRecord starts the rounding with the first record if True, and the last record if False (defaults to True)
+        * verbose specifies whether the extra columns used to make calculations are returned
+    '''
+
+    df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True)
+    df.reset_index(drop=True, inplace=True)
+
+    # make sure the time field is in the right form
+    t = pd.to_datetime(df[timeField])
+
+    # calculate the time between consecutive records
+    t_shift = pd.to_datetime(df[timeField].shift(1))
+    df["timeBetweenRecords"] = \
+        round((t - t_shift).dt.days*(86400/(60 * timeIntervalMinutes)) +
+              (t - t_shift).dt.seconds/(60 * timeIntervalMinutes)) * timeIntervalMinutes
+
+    # separate the data into chunks if timeBetweenRecords is greater than
+    # 2 times the <timeIntervalMinutes> minutes so the rounding process starts over
+    largeGaps = list(df.query("abs(timeBetweenRecords) > " + str(timeIntervalMinutes * 2)).index)
+    largeGaps.insert(0, 0)
+    largeGaps.append(len(df))
+
+    for gIndex in range(0, len(largeGaps) - 1):
+        chunk = t[largeGaps[gIndex]:largeGaps[gIndex+1]]
+        firstRecordChunk = t[largeGaps[gIndex]]
+
+        # calculate the time difference between each time record and the first record
+        df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "minutesFromFirstRecord"] = \
+            (chunk - firstRecordChunk).dt.days*(86400/(60)) + (chunk - firstRecordChunk).dt.seconds/(60)
+
+        # then round to the nearest X Minutes
+        # NOTE: the ".000001" ensures that mulitples of 2:30 always rounds up.
+        df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "roundedMinutesFromFirstRecord"] = \
+            round((df.loc[largeGaps[gIndex]:largeGaps[gIndex+1],
+                          "minutesFromFirstRecord"] / timeIntervalMinutes) + 0.000001) * (timeIntervalMinutes)
+
+        roundedFirstRecord = (firstRecordChunk + pd.Timedelta("1microseconds")).round(str(timeIntervalMinutes) + "min")
+        df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], roundedTimeFieldName] = \
+            roundedFirstRecord + \
+            pd.to_timedelta(df.loc[largeGaps[gIndex]:largeGaps[gIndex+1],
+                                   "roundedMinutesFromFirstRecord"], unit="m")
+
+    # sort by time and drop fieldsfields
+    df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True)
+    df.reset_index(drop=True, inplace=True)
+    if verbose is False:
+        df.drop(columns=["timeBetweenRecords",
+                         "minutesFromFirstRecord",
+                         "roundedMinutesFromFirstRecord"], inplace=True)
+
+    return df
+
+
+def get_descriptive_stats(df, newName, dataSubType):
+
+    newDf = df[dataSubType].describe().add_suffix(newName)
+
+    newDf[("rangeOf" + newName)] = \
+        newDf[("max" + newName)] - \
+        newDf[("min" + newName)]
+
+    return newDf
+
+
+def get_bolusDaySummary(bolusData):
+
+    if "extended" not in bolusData:
+        bolusData["extended"] = 0
+
+    bolusByDay = bolusData.groupby(bolusData["day"])
+
+    # total bolus insulin for each day
+    bolusDaySummary = pd.DataFrame(bolusByDay.normal.sum())
+    bolusDaySummary = bolusDaySummary.rename(columns={"normal":"totalAmountOfNormalBolusInsulin"})
+
+    bolusDaySummary["totalAmountOfExtendedBolusInsulin"] = bolusByDay.extended.sum().fillna(0.0)
+    bolusDaySummary["totalAmountOfBolusInsulin"] = bolusDaySummary["totalAmountOfNormalBolusInsulin"].fillna(0.0) + \
+                                           bolusDaySummary["totalAmountOfExtendedBolusInsulin"].fillna(0.0)
+
+    # bolus range for normal boluses
+    normalBasalDF = get_descriptive_stats(bolusByDay, "NormalBolusAmountPerBolus", "normal")
+    bolusDaySummary = pd.concat([bolusDaySummary, normalBasalDF], axis = 1)
+
+    # total number of bolus types per day
+    bolusTypePerDay = bolusData.groupby(["day",
+                                         "subType"]).size().unstack()
+
+    bolusDaySummary["numberOfNormalBoluses"] = bolusTypePerDay["normal"].fillna(0)
+
+    if "square" not in list(bolusTypePerDay):
+        bolusDaySummary["numberOfSquareBoluses"] = 0
+    else:
+        bolusDaySummary["numberOfSquareBoluses"] = bolusTypePerDay["square"].fillna(0)
+
+    if "dual/square" not in list(bolusTypePerDay):
+        bolusDaySummary["numberOfDualBoluses"] = 0
+    else:
+        bolusDaySummary["numberOfDualBoluses"] = bolusTypePerDay["dual/square"].fillna(0)
+
+    bolusDaySummary["numberOfAllBolusTypes"] = bolusDaySummary["numberOfNormalBoluses"] + \
+                                        bolusDaySummary["numberOfSquareBoluses"] + \
+                                        bolusDaySummary["numberOfDualBoluses"]
+
+    return bolusDaySummary
+
+
+def get_basalDaySummary(df):
+    # group data by day
+    basalByDay = df.groupby(df["day"])
+
+    # total basal insulin per day
+    basalDaySummary = pd.DataFrame(basalByDay.totalAmountOfBasalInsulin.sum())
+
+    # total duration per each day (this should add up to 24 hours)
+    basalDaySummary["totalBasalDuration"] = basalByDay.durationHours.sum()
+
+    # total number of basals types per day
+    basalTypePerDay = df.groupby(["day", "deliveryType"]).size().unstack()
+
+    basalDaySummary["numberOfScheduledBasals"] = basalTypePerDay["scheduled"].fillna(0)
+    if "suspend" not in list(basalTypePerDay):
+        basalDaySummary["numberOfSuspendedBasals"] = 0
+    else:
+        basalDaySummary["numberOfSuspendedBasals"] = basalTypePerDay["suspend"].fillna(0)
+    if "temp" not in list(basalTypePerDay):
+        basalDaySummary["numberOfTempBasals"] = 0
+    else:
+        basalDaySummary["numberOfTempBasals"] = basalTypePerDay["temp"].fillna(0)
+
+    basalDaySummary["totalNumberOfBasals"] = basalDaySummary["numberOfScheduledBasals"] + \
+                                 basalDaySummary["numberOfTempBasals"]
+
+    return basalDaySummary
+
+
+def filterAndSort(groupedDF, filterByField, sortByField):
+    filterDF = groupedDF.get_group(filterByField).dropna(axis=1, how="all")
+    filterDF = filterDF.sort_values(sortByField)
+    return filterDF
+
+
+def getClosedLoopDays(groupedData, nTempBasalsPerDayIsClosedLoop, metadata):
+    # filter by basal data and sort by time
+    if "basal" in groupedData.type.unique():
+        basalData = filterAndSort(groupedData, "basal", "time")
+
+        # get closed loop days
+        nTB = nTempBasalsPerDayIsClosedLoop
+
+        tbDataFrame = basalData.loc[basalData.deliveryType == "temp", ["time"]]
+        tbDataFrame.index = pd.to_datetime(tbDataFrame["time"])
+        tbDataFrame = tbDataFrame.drop(["time"], axis=1)
+        tbDataFrame["basal.temp.count"] = 1
+        nTempBasalsPerDay = tbDataFrame.resample("D").sum()
+        closedLoopDF = pd.DataFrame(nTempBasalsPerDay,
+                                    index=nTempBasalsPerDay.index.date)
+        closedLoopDF["day"] = nTempBasalsPerDay.index.date
+        closedLoopDF["basal.closedLoopDays"] = \
+            closedLoopDF["basal.temp.count"] >= nTB
+        nClosedLoopDays = closedLoopDF["basal.closedLoopDays"].sum()
+
+        if nClosedLoopDays == 0:
+            closedLoopDF = pd.DataFrame(columns=["basal.closedLoopDays", "day"])
+
+        # get the number of days with 670g
+        basalData["day"] = pd.to_datetime(basalData.time).dt.date
+        bdGroup = basalData.groupby("day")
+        topPump = bdGroup.deviceId.describe()["top"]
+        med670g = pd.DataFrame(topPump.str.contains("1780")).rename(columns={"top":"670g"})
+        med670g.reset_index(inplace=True)
+        n670gDays = med670g["670g"].sum()
+        if n670gDays == 0:
+            med670g = pd.DataFrame(columns=["670g", "day"])
+
+
+    else:
+        closedLoopDF = pd.DataFrame(columns=["basal.closedLoopDays", "day"])
+        med670g = pd.DataFrame(columns=["670g", "day"])
+        nClosedLoopDays = 0
+        n670gDays = 0
+
+    metadata["basal.closedLoopDays.count"] = nClosedLoopDays
+    metadata["med670gDays.count"] = n670gDays
+
+    return closedLoopDF, med670g, metadata
+
+
+def removeDuplicates(df, criteriaDF):
+    nBefore = len(df)
+    df = df.loc[~(df[criteriaDF].duplicated())]
+    df = df.reset_index(drop=True)
+    nDuplicatesRemoved = nBefore - len(df)
+
+    return df, nDuplicatesRemoved
+
+
+def removeCgmDuplicates(df, timeCriterion):
+    if timeCriterion in df:
+        df.sort_values(by=[timeCriterion, "uploadTime"],
+                       ascending=[False, False],
+                       inplace=True)
+        dfIsNull = df[df[timeCriterion].isnull()]
+        dfNotNull = df[df[timeCriterion].notnull()]
+        dfNotNull, nDuplicatesRemoved = removeDuplicates(dfNotNull, [timeCriterion, "value"])
+        df = pd.concat([dfIsNull, dfNotNull])
+        df.sort_values(by=[timeCriterion, "uploadTime"],
+                       ascending=[False, False],
+                       inplace=True)
+    else:
+        nDuplicatesRemoved = 0
+
+    return df, nDuplicatesRemoved
+
+
+def getStartAndEndTimes(df, dateTimeField):
+    dfBeginDate = df[dateTimeField].min()
+    dfEndDate = df[dateTimeField].max()
+
+    return dfBeginDate, dfEndDate
+
+
+def getListOfDexcomCGMDays(df):
+    # search for dexcom cgms
+    searchfor = ["Dex", "tan", "IR", "unk"]
+    # create dexcom boolean field
+    if "deviceId" in df.columns.values:
+        totalCgms = len(df.deviceId.notnull())
+        df["dexcomCGM"] = df.deviceId.str.contains("|".join(searchfor))
+        percentDexcomCGM = df.dexcomCGM.sum() / totalCgms * 100
+    else:
+        percentDexcomCGM = np.nan
+    return df, percentDexcomCGM
+
+
+def load_csv(dataPathAndName):
+    df = pd.read_csv(dataPathAndName, low_memory=False)
+    return df
+
+
+def load_json(dataPathAndName):
+    df = pd.read_json(dataPathAndName, orient="records")
+    return df
+
+
+def getTzoForDateTime(utcTime, currentTimezone):
+
+    tz = timezone(currentTimezone)
+    tzoNum = int(tz.localize(utcTime).strftime("%z"))
+    tzoNum = int(tz.localize(utcTime).strftime("%z"))
+    tzoHours = np.floor(tzoNum / 100)
+    tzoMinutes = round((tzoNum / 100 - tzoHours) * 100, 0)
+    tzoSign = np.sign(tzoHours)
+    tzo = int((tzoHours * 60) + (tzoMinutes * tzoSign))
+    localTime = utcTime + pd.to_timedelta(tzo, unit="m")
+
+    return localTime
+
+
+def getTimezoneOffset(currentDate, currentTimezone):
+
+    # edge case for 'US/Pacific-New'
+    if currentTimezone == 'US/Pacific-New':
+        currentTimezone = 'US/Pacific'
+
+    tz = timezone(currentTimezone)
+    # here we add 1 day to the current date to account for changes to/from DST
+    tzoNum = int(tz.localize(currentDate + timedelta(days=1)).strftime("%z"))
+    tzoHours = np.floor(tzoNum / 100)
+    tzoMinutes = round((tzoNum / 100 - tzoHours) * 100, 0)
+    tzoSign = np.sign(tzoHours)
+    tzo = int((tzoHours * 60) + (tzoMinutes * tzoSign))
+
+    return tzo
+
+
+def isDSTChangeDay(currentDate, currentTimezone):
+    if currentTimezone == 'US/Pacific-New':
+        currentTimezone = 'US/Pacific'
+    tzoCurrentDay = getTimezoneOffset(pd.to_datetime(currentDate),
+                                      currentTimezone)
+    tzoPreviousDay = getTimezoneOffset(pd.to_datetime(currentDate) +
+                                       timedelta(days=-1), currentTimezone)
+
+    return (tzoCurrentDay != tzoPreviousDay)
+
+
+def get_setting_durations(df, col, dataPulledDF):
+    df = pd.concat([df, dataPulledDF], sort=False)
+    df.sort_values(col + ".localTime", inplace=True)
+    df.reset_index(inplace=True, drop=True)
+    df.fillna(method='ffill', inplace=True)
+    durationHours = (df[col + ".localTime"].shift(-1) -
+                     df[col + ".localTime"]).dt.total_seconds() / 3600
+    durationHours.fillna(0, inplace=True)
+    durationHours[durationHours > 24] = 24
+    df[col + ".durationHours"] = durationHours
+
+    return df
+
+
+def get_settingStats(df, col, pumpCol):
+    df[col] = df[pumpCol]
+    df[col + ".min"] = df[col].min()
+    df[col + ".weightedMean"] = np.sum(df[col] * df[col + ".durationHours"]) / df[col + ".durationHours"].sum()
+    df[col + ".max"] = df[col].max()
+
+    return df
+
+
+def getPumpSettingsStats(df, col, pumpCol):
+    pumpColHeadings = [col + ".localTime", col, col + ".min",
+                       col + ".weightedMean", col + ".max"]
+    df[col] = df[pumpCol + ".amount"]
+    df[col + ".localTime"] = pd.to_datetime(df["day"]) + \
+        pd.to_timedelta(df[pumpCol + ".start"], unit="ms")
+    df[col + ".min"] = df[col]
+    df[col + ".weightedMean"] = df[col]
+    df[col + ".max"] = df[col]
+
+    df2 = df.loc[df[pumpCol + ".amount"].notnull(), pumpColHeadings]
+
+    return df, df2
+
+
+def isf_likely_units(df, columnHeading):
+    isfNotNull = df[df[columnHeading].notnull()][columnHeading]
+    minVal = np.min(isfNotNull)
+    maxVal = np.max(isfNotNull)
+    minDiff = np.abs(minVal - np.round(minVal))
+    maxDiff = np.abs(maxVal - np.round(maxVal))
+    if ((maxDiff == 0) & (maxDiff == 0) & (maxVal > 22.1)):
+        likelyUnits = "mg/dL"
+    else:
+        likelyUnits = "mmol/L"
+    return likelyUnits
+
+
+def correct_basal_extends_past_midnight(df, timeCol, dayCol):
+    # deal with case when basal extends past midnight due to utcTime and localTime difference
+    df.sort_values(timeCol, inplace=True)
+    uniqueDays = pd.DatetimeIndex(df[dayCol].unique())
+    midnightsNotInBasalData = uniqueDays[~uniqueDays.isin(df[timeCol])]
+    for midnight in midnightsNotInBasalData:
+        # find the last basal prior to midnight
+        dayBefore = midnight - pd.Timedelta(24, unit="h")
+        dataDayBefore = df[(df[timeCol] < midnight) & (df[timeCol] > dayBefore)]
+
+        if len(dataDayBefore) > 0:
+
+            basalPriorToMidnight = dataDayBefore[dataDayBefore[timeCol] == dataDayBefore[timeCol].max()]
+            indexToDrop = basalPriorToMidnight.index.values[0]
+            oldDuration = basalPriorToMidnight.loc[indexToDrop, "duration"]
+            newDuration = (midnight - basalPriorToMidnight.loc[indexToDrop, timeCol]).seconds * 1000.0
+            newMidnightDuration = oldDuration - newDuration
+
+            newBasalPriorToMidnight = df.copy().drop(index=df.index)
+            newBasalPriorToMidnight.loc[0,:] = basalPriorToMidnight.loc[indexToDrop,:]
+            newBasalPriorToMidnight["duration"] = newDuration
+
+            # new basal at midnight
+            newBasalAtMidnight = df.copy().drop(index=df.index)
+            newBasalAtMidnight.loc[1,:] = basalPriorToMidnight.loc[indexToDrop,:]
+            newBasalAtMidnight["duration"] = newMidnightDuration
+            newBasalAtMidnight[timeCol] = midnight.to_pydatetime()
+            newBasalAtMidnight[dayCol] = newBasalAtMidnight[timeCol].dt.date
+
+            # add data back to the basal data frame
+            newRowsToAdd = pd.concat([newBasalPriorToMidnight, newBasalAtMidnight], ignore_index = True)
+            newRowsToAdd = newRowsToAdd.astype({"rate": "float64",
+                                                "duration": "float64"})
+            df = df.drop(indexToDrop)
+            df = pd.concat([df, newRowsToAdd], ignore_index=True)
+
+    return df
+
+
+def get_basalEvent_summary(df, categories):
+    catDF = df[df["type"] == "basal"].groupby(categories)
+    summaryDF = pd.DataFrame(catDF["rate"].count()).add_suffix(".count")
+    summaryDF["basalRate.min"] = catDF["rate"].min()
+    summaryDF["basalRate.weightedMean"] = catDF["totalAmountOfBasalInsulin"].sum() / catDF["durationHours"].sum()
+    summaryDF["basalRate.max"] = catDF["rate"].max()
+
+    # max basal rate including extended boluses
+    catDF = df.groupby(categories)
+    summaryDF["basalRateIncludingExtendedBoluses.count"] = catDF["rate"].count()
+    summaryDF["basalRateIncludingExtendedBoluses.max"] = catDF["rate"].max()
+
+    return summaryDF
+
+
+def get_bolusEvent_summary(df, categories):
+
+    catDF = df.groupby(categories)
+    summaryDF = pd.DataFrame(catDF["unitsInsulin"].describe().add_prefix("insulin."))
+
+    # carbs entered in bolus calculator
+    carbEvents = catDF["carbInput"].describe().add_prefix("carbsPerMeal.")
+    summaryDF = pd.concat([summaryDF, carbEvents], axis=1)
+
+    return summaryDF
+
+
+def get_dayData_summary(df, categories):
+
+    catDF = df[df["validPumpData"]].groupby(categories)
+    summaryDF = pd.DataFrame(catDF["totalAmountOfInsulin"].describe().add_prefix("totalDailyDose."))
+    totalDailyCarbs = catDF["totalDailyCarbs"].describe().add_prefix("totalDailyCarbs.")
+    percentBasal = catDF["percentBasal"].describe().add_prefix("percentBasal.")
+    percentBolus = catDF["percentBolus"].describe().add_prefix("percentBolus.")
+    summaryDF = pd.concat([summaryDF, totalDailyCarbs, percentBasal, percentBolus], axis=1)
+
+    return summaryDF
+
+
+def get_pumpSummary(basalEventsDF, bolusEventsDF, dayDataDF, categories):
+    basalEventSummary = get_basalEvent_summary(basalEventsDF, categories)
+    bolusEventSummary = get_bolusEvent_summary(bolusEventsDF, categories)
+    dailySummary = get_dayData_summary(dayDataDF, categories)
+    pumpSummaryDF = pd.concat([basalEventSummary, bolusEventSummary, dailySummary], axis=1)
+
+    return pumpSummaryDF
+
+
+def get_episodes(df):
+    df = df.copy().sort_values("localTime").reset_index(drop=True)
+    allEpisodes = pd.DataFrame()
+    cgmFrequency = 5.0
+    episodeCriteria = pd.DataFrame({"threshold": [54, 70, 180, 250],
+                                    "duration": [15, 60, 120, 120],
+                                    "percentReadings": [75, 75, 75, 75],
+                                    "episodeName": ["extreme-hypo", "hypo",
+                                                    "hyper", "extreme-hyper"]})
+    episodes = pd.DataFrame()
+    for episodeType in range(0,len(episodeCriteria)):
+
+        # first find all of the cross points
+        episodeThreshold = episodeCriteria.loc[episodeType, "threshold"]
+        episodeDurationRequirement = episodeCriteria.loc[episodeType, "duration"]
+        episodePercentOfReadings = episodeCriteria.loc[episodeType, "percentReadings"]
+        episodeName = episodeCriteria.loc[episodeType, "episodeName"]
+
+        if episodeThreshold > 110:
+
+            df["startCrossPoint"] = ((df.mg_dL.shift(1) <= episodeThreshold) &
+                                      (df.mg_dL > episodeThreshold))
+
+            df["endCrossPoint"] = ((df.mg_dL.shift(1) > episodeThreshold) &
+                                    (df.mg_dL <= episodeThreshold))
+
+        else:
+            df["startCrossPoint"] = ((df.mg_dL.shift(1) >= episodeThreshold) &
+                                      (df.mg_dL < episodeThreshold))
+
+            df["endCrossPoint"] = ((df.mg_dL.shift(1) < episodeThreshold) &
+                                    (df.mg_dL >= episodeThreshold))
+
+
+        startList = pd.DataFrame(df[df.startCrossPoint].roundedLocalTime)
+        endList = pd.DataFrame(df[df.endCrossPoint].roundedLocalTime)
+        if len(startList) > len(endList):
+            endList = endList.append(
+                    df.loc[df.roundedLocalTime == df.roundedLocalTime.max(),
+                            ["roundedLocalTime"]]
+                    )
+        elif len(startList) < len(endList):
+            startList = startList.append(
+                    df.loc[df.roundedLocalTime == df.roundedLocalTime.min(),
+                            ["roundedLocalTime"]]
+                    ).sort_index()
+
+        if len(startList) == len(endList):
+
+            episodes = pd.concat([startList.reset_index().add_prefix("start."),
+                                  endList.reset_index().add_prefix("end.")], axis=1)
+
+            episodes["durationMinutes"] = \
+                (episodes["end.roundedLocalTime"] - episodes["start.roundedLocalTime"]).dt.seconds / 60
+
+            episodes["totalPoints"] = episodes["end.index"] - episodes["start.index"]
+            episodes["totalPossiblePoints"] = episodes["durationMinutes"] / cgmFrequency
+            episodes["percentOfReadings"] = episodes["totalPoints"] / episodes["totalPossiblePoints"] * 100
+
+        else:
+            "figure out how to resolve this case if it exists"
+            pdb.set_trace()
+
+        episodes = episodes[(episodes.durationMinutes >= episodeDurationRequirement) &
+                            (episodes.percentOfReadings >= episodePercentOfReadings)].reset_index(drop=True)
+        episodes["criterion.name"] = episodeName
+        episodes["criterion.threshold"] = episodeThreshold
+        episodes["criterion.duration"] = episodeDurationRequirement
+        episodes["criterion.percentOfReadings"] = episodePercentOfReadings
+
+        allEpisodes = pd.concat([allEpisodes, episodes]).reset_index(drop=True)
+
+    return allEpisodes
+
+
+def get_cgmStats(df):
+
+    statDF = pd.Series(df.mg_dL.describe())
+    statDF.rename(index={"count":"totalNumberCBGValues"}, inplace=True)
+
+    statDF["mean_mgdL"] = df.mg_dL.mean()
+    statDF["std_mgdL"] = df.mg_dL.std()
+    statDF["cov_mgdL"] = statDF["std_mgdL"] / statDF["mean_mgdL"]
+
+    statDF["totalBelow54"] = sum(df.mg_dL < 54)
+    statDF["totalBelow70"] = sum(df.mg_dL < 70)
+    statDF["total54to70"] = sum((df.mg_dL >= 54) & (df.mg_dL < 70))
+    statDF["total70to140"] = sum((df.mg_dL >= 70) & (df.mg_dL <= 140))
+    statDF["total70to180"] = sum((df.mg_dL >= 70) & (df.mg_dL <= 180))
+    statDF["total180to250"] = sum((df.mg_dL > 180) & (df.mg_dL <= 250))
+    statDF["totalAbove180"] = sum(df.mg_dL > 180)
+    statDF["totalAbove250"] = sum(df.mg_dL > 250)
+
+    statDF["percentBelow54"] = statDF["totalBelow54"] / statDF["totalNumberCBGValues"]
+    statDF["percentBelow70"] = statDF["totalBelow70"] / statDF["totalNumberCBGValues"]
+    statDF["percent70to140"] = statDF["total70to140"] / statDF["totalNumberCBGValues"]
+    statDF["percent70to180"] = statDF["total70to180"] / statDF["totalNumberCBGValues"]
+    statDF["percentAbove180"] = statDF["totalAbove180"] / statDF["totalNumberCBGValues"]
+    statDF["percentAbove250"] = statDF["totalAbove250"]  / statDF["totalNumberCBGValues"]
+
+    statDF["min_mgdL"] = df.mg_dL.min()
+    statDF["median_mgdL"] = df.mg_dL.describe()["50%"]
+    statDF["max_mgdL"] = df.mg_dL.max()
+
+    # calculate the start and end time of the cbg data
+    startTime = df["roundedLocalTime"].min()
+    statDF["startTime"] = startTime
+    endTime = df["roundedLocalTime"].max()
+    statDF["endTime"] = endTime
+    cgmFrequency = np.round((endTime - startTime).seconds / statDF["totalNumberCBGValues"])
+
+    # sense whether cgm data comes in 5 minute or 15 minute intervals
+    cgmFrequency = \
+        np.nanmedian((df["roundedLocalTime"] - df["roundedLocalTime"].shift(1)).dt.seconds / 60)
+
+    statDF["cgmFrequency"] = cgmFrequency
+    statDF["totalNumberPossibleCBGvalues"] = len(pd.date_range(startTime, endTime, freq=str(int(cgmFrequency)) + "min"))
+    statDF["percentCgmValues"] = statDF["totalNumberCBGValues"] / statDF["totalNumberPossibleCBGvalues"]
+
+    return statDF
+
+
+# %% START OF CODE
+dataPulledDate = args.dateStamp
+dataPulledDF = pd.DataFrame(pd.to_datetime(dataPulledDate), columns=["day"], index=[0])
+dataPulledDF["day"] = dataPulledDF["day"].dt.date
+phiDate = "PHI-" + dataPulledDate
+donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data")
+
+phiOutputPath = os.path.join(donorPath, "PHI-settings-and-events")
+outputPath = os.path.join(donorPath, "settings-and-events")
+
+# create anonExportDataPath folders
+if not os.path.exists(phiOutputPath):
+    os.makedirs(phiOutputPath)
+    os.makedirs(outputPath)
+
+donorList = phiDate + "-uniqueDonorList.csv"
+donors = load_csv(os.path.join(donorPath, donorList))
+
+allMetadata = pd.DataFrame()
+allAgeSummaries = pd.DataFrame()
+allYlwSummaries = pd.DataFrame()
+allAgeANDylwSummaries = pd.DataFrame()
+
+# %% MAKE THIS A FUNCTION SO THAT IT CAN BE RUN PER EACH INDIVIDUAL
+nUniqueDonors = len(donors)
+
+# define start and end index
+startIndex, endIndex = defineStartAndEndIndex(args, nUniqueDonors)
+
+for dIndex in range(startIndex, endIndex):
+    # % ID, HASHID, AGE, & YLW
+    userID = donors.userID[dIndex]
+    hashID = donors.hashID[dIndex]
+    metadata = pd.DataFrame(index=[dIndex])
+    metadata["hashID"] = hashID
+
+    try:
+        # make folder to save data
+        processedDataPath = os.path.join(phiOutputPath, "PHI-" + userID)
+        if not os.path.exists(processedDataPath):
+            os.makedirs(processedDataPath)
+
+        # round all birthdays and diagnosis dates to the first day of the month (to protect identities)
+        if (pd.isnull(donors.bDay[dIndex]) + pd.isnull(donors.dDay[dIndex])) == 0:
+
+            bDate = pd.to_datetime(donors.bDay[dIndex][0:7])
+            dDate = pd.to_datetime(donors.dDay[dIndex][0:7])
+
+
+            # %% LOAD IN DONOR JSON DATA
+            jsonDataPath = os.path.join(donorPath, phiDate + "-donorJsonData")
+            jsonFileName = os.path.join(jsonDataPath, "PHI-" + userID + ".json")
+
+            if os.path.exists(jsonFileName):
+                fileSize = os.stat(jsonFileName).st_size
+                metadata["fileSizeKB"] = fileSize / 1000
+                if fileSize > 1000:
+                    data = load_json(jsonFileName)
+
+                    # sort the data by time
+                    data.sort_values("time", inplace=True)
+
+                    # flatten the embedded json
+                    doNotFlattenList = ["suppressed", "recommended", "payload"]
+                    data = flattenJson(data, doNotFlattenList)
+
+
+                    # %% CLEAN DATA
+                    # remove negative durations
+                    data, nNegativeDurations = removeNegativeDurations(data)
+                    metadata["nNegativeDurations"] = nNegativeDurations
+
+                    # get rid of cgm values too low/high (< 38 & > 402 mg/dL)
+                    data, nInvalidCgmValues = removeInvalidCgmValues(data)
+                    metadata["nInvalidCgmValues"] = nInvalidCgmValues
+
+                    # Tslim calibration bug fix
+                    data, nTandemAndPayloadCalReadings = tslimCalibrationFix(data)
+                    metadata["nTandemAndPayloadCalReadings"] = nTandemAndPayloadCalReadings
+
+
+                    # %% ADD UPLOAD DATE
+                    # attach upload time to each record, for resolving duplicates
+                    if (("upload" in data.type.unique()) &
+                        ("basal" in data.type.unique()) &
+                        ("bolus" in data.type.unique()) &
+                        ("cbg" in data.type.unique()) &
+                        ("pumpSettings" in data.type.unique())):
+                        data = addUploadDate(data)
+
+
+                        # %% TIME (UTC, TIMEZONE, DAY AND EVENTUALLY LOCAL TIME)
+                        data["utcTime"] = pd.to_datetime(data["time"])
+                        data["timezone"].fillna(method='ffill', inplace=True)
+                        data["timezone"].fillna(method='bfill', inplace=True)
+
+                        # estimate local time (simple method)
+                        data["tzo"] = data[['utcTime', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1)
+                        data["localTime"] = data["utcTime"] + pd.to_timedelta(data["tzo"], unit="m")
+                        data["day"] = pd.DatetimeIndex(data["localTime"]).date
+
+                        # round to the nearest 5 minutes
+                        # TODO: once roundTime is pushed to tidals repository then this line can be replaced
+                        # with td.clean.round_time
+                        data = round_time(data, timeIntervalMinutes=5, timeField="time",
+                                          roundedTimeFieldName="roundedTime", startWithFirstRecord=True,
+                                          verbose=False)
+
+                        data["roundedLocalTime"] = data["roundedTime"] + pd.to_timedelta(data["tzo"], unit="m")
+                        data.sort_values("uploadTime", ascending=False, inplace=True)
+
+
+                        # %% ID, HASHID, AGE, & YLW
+                        data["userID"] = userID
+                        data["hashID"] = hashID
+                        data["age"] = np.floor((data["localTime"] - bDate).dt.days/365.25).astype(int)
+                        data["ylw"] = np.floor((data["localTime"] - dDate).dt.days/365.25).astype(int)
+
+
+                        # %% BOLUS EVENTS (CORRECTION, AND MEAL INCLUING: CARBS, EXTENDED, DUAL)
+                        bolus = mergeWizardWithBolus(data)
+                        if len(bolus) > 0:
+                            # get rid of duplicates that have the same ["time", "normal"]
+                            bolus.sort_values("uploadTime", ascending=False, inplace=True)
+                            bolus, nBolusDuplicatesRemoved = \
+                                removeDuplicates(bolus, ["deviceTime", "normal"])
+                            metadata["nBolusDuplicatesRemoved"] = nBolusDuplicatesRemoved
+
+                            # get a summary of boluses per day
+                            bolusDaySummary = get_bolusDaySummary(bolus)
+
+                            # figure out likely isf units
+                            isfUnits = isf_likely_units(bolus, "insulinSensitivity")
+                            metadata["bolus.isfLikelyUnits"] = isfUnits
+
+                            if isfUnits in "mmol/L":
+
+                                bolus["isf_mmolL_U"] = bolus["insulinSensitivity"]
+                                bolus["isf"] = mmolL_to_mgdL(bolus["isf_mmolL_U"])
+
+                            else:
+                                # I am pretty sure this case does NOT exist
+#                                pdb.set_trace()
+                                bolus["isf"] = bolus["insulinSensitivity"]
+                                bolus["isf_mmolL_U"]  = mgdL_to_mmolL(bolus["isf"])
+
+
+                            bolusCH = ["hashID", "age", "ylw", "day",
+                                       "utcTime", "localTime", "timezone", "tzo",
+                                       "roundedTime", "roundedLocalTime",
+                                       "normal", "carbInput", "subType",
+                                       "insulinOnBoard", "bgInput",
+                                       "isf", "isf_mmolL_U", "insulinCarbRatio"]
+                            bolusEvents = bolus.loc[bolus["normal"].notnull(), bolusCH]
+                            bolusEvents.loc[bolusEvents["bgInput"] == 0, "bgInput"] = np.nan
+                            bolusEvents = bolusEvents.rename(columns={"normal": "unitsInsulin",
+                                                                      "bgInput": "bg_mmolL"})
+                            bolusEvents["bg_mgdL"] = mmolL_to_mgdL(bolusEvents["bg_mmolL"])
+                            bolusEvents["eventType"] = "correction"
+                            bolusEvents.loc[bolusEvents["carbInput"] > 0, "eventType"] = "meal"
+
+                            if "duration" in list(bolus):
+                                bolus["duration"].replace(0, np.nan, inplace=True)
+                                bolus["durationHours"] = bolus["duration"] / 1000.0 / 3600.0
+                                bolus["rate"] = bolus["extended"] / bolus["durationHours"]
+
+                                bolusExtendedCH = ["hashID", "age", "ylw", "day",
+                                                   "utcTime", "localTime", "timezone", "tzo",
+                                                   "roundedTime", "roundedLocalTime",
+                                                   "durationHours", "rate", "type"]
+                                bolusExtendedEvents = bolus.loc[
+                                        ((bolus["extended"].notnull()) &
+                                         (bolus["duration"] > 0)), bolusExtendedCH]
+
+                            if "extended" not in bolus:
+                                bolus["extended"] = np.nan
+                                bolus["duration"] = np.nan
+
+
+                            # get start and end times
+                            bolusBeginDate, bolusEndDate = getStartAndEndTimes(bolus, "day")
+                            metadata["bolus.beginDate"] = bolusBeginDate
+                            metadata["bolus.endDate"] = bolusEndDate
+
+
+                            # %% PUMP SETTINGS
+
+                            pumpSettings = data[data.type == "pumpSettings"].copy().dropna(axis=1, how="all")
+                            pumpSettings.sort_values("uploadTime", ascending=False, inplace=True)
+
+                            pumpSettings, nPumpSettingsDuplicatesRemoved = \
+                            removeDuplicates(pumpSettings, "deviceTime")
+                            metadata["nPumpSettingsDuplicatesRemoved"] = nPumpSettingsDuplicatesRemoved
+
+                            pumpSettings.sort_values("utcTime", ascending=True, inplace=True)
+                            pumpSettings.reset_index(drop=True, inplace=True)
+
+                            # ISF
+                            isfColHeadings = ["isf.localTime", "isf", "isf_mmolL_U"]
+
+                            if "insulinSensitivity.amount" in list(pumpSettings):
+                                isfColHead = "insulinSensitivity"
+
+                                # figure out likely isf units
+                                isfUnits = isf_likely_units(pumpSettings, "insulinSensitivity.amount")
+                                metadata["pumpSettings.isfLikelyUnits"] = isfUnits
+
+                                if isfUnits in "mmol/L":
+
+                                    pumpSettings["isf_mmolL_U"] = pumpSettings[isfColHead + ".amount"]
+                                    pumpSettings["isf"] = mmolL_to_mgdL(pumpSettings["isf_mmolL_U"])
+
+                                else:
+
+                                    pumpSettings["isf"] = pumpSettings[isfColHead + ".amount"]
+                                    pumpSettings["isf_mmolL_U"] = mgdL_to_mmolL(pumpSettings["isf"])
+
+                                pumpSettings["isf.localTime"] = pd.to_datetime(pumpSettings["day"]) + \
+                                    pd.to_timedelta(pumpSettings[isfColHead + ".start"], unit="ms")
+
+                                isf = pumpSettings.loc[pumpSettings["isf"].notnull(), isfColHeadings]
+
+                                # add a day summary
+                                isfDaySummary = pd.DataFrame()
+                                isfDaySummary["day"] = isf["isf.localTime"].dt.date
+                                isfDaySummary["isf.min"] = isf["isf"]
+                                isfDaySummary["isf.weightedMean"] = isf["isf"]
+                                isfDaySummary["isf.max"] = isf["isf"]
+
+                            else:
+                                isfColHead = "insulinSensitivities"
+                                isf = pd.DataFrame(columns=isfColHeadings)
+                                isfDayColHeadings = ['day', 'isf.min', 'isf.weightedMean', 'isf.max']
+                                isfDaySummary = pd.DataFrame(columns=isfDayColHeadings)
+                                for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]):
+                                    # edge case where actSchedule is float
+                                    if isinstance(actSched, float):
+                                        actSched = str(int(actSched))
+
+                                    tempDF = pd.DataFrame(pumpSettings.loc[p, isfColHead + "." + actSched])
+                                    tempDF["day"] = pumpSettings.loc[p, "day"]
+                                    tempDF["isf.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms")
+
+                                    # figure out likely isf units
+                                    isfUnits = isf_likely_units(tempDF, "amount")
+                                    metadata["tempDF.isfLikelyUnits"] = isfUnits
+
+                                    if isfUnits in "mmol/L":
+
+                                        tempDF["isf_mmolL_U"] = tempDF["amount"]
+                                        tempDF["isf"] = mmolL_to_mgdL(tempDF["isf_mmolL_U"])
+
+                                    else:
+
+                                        tempDF["isf"] = tempDF["amount"]
+                                        tempDF["isf_mmolL_U"] = mgdL_to_mmolL(tempDF["isf"])
+
+                                    endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["isf.localTime"], index=[0])
+                                    tempDF = get_setting_durations(tempDF, "isf", endOfDay)
+                                    tempDF = tempDF[:-1]
+
+                                    tempDaySummary = pd.DataFrame(index=[0])
+                                    tempDaySummary["day"] = tempDF["isf.localTime"].dt.date
+                                    tempDaySummary["isf.min"] = tempDF["isf"].min()
+                                    tempDaySummary["isf.weightedMean"] = \
+                                        np.sum(tempDF["isf"] * tempDF["isf.durationHours"]) / tempDF["isf.durationHours"].sum()
+                                    tempDaySummary["isf.max"] = tempDF["isf"].max()
+
+                                    isf = pd.concat([isf, tempDF[isfColHeadings]], ignore_index=True)
+                                    isfDaySummary = pd.concat([isfDaySummary, tempDaySummary], ignore_index=True)
+
+                            isfDaySummary = pd.concat([isfDaySummary, dataPulledDF], sort=False)
+                            isfDaySummary.reset_index(inplace=True, drop=True)
+                            isfDaySummary.fillna(method='ffill', inplace=True)
+                            # it is possible for someone to someone to change their schedule
+                            # in the middle of the day, take the latest change as the schedule
+                            # for that day.
+                            isfDaySummary.drop_duplicates(subset="day", keep="last", inplace=True)
+                            isfDaySummary.reset_index(inplace=True, drop=True)
+
+                            # CIR
+                            cirColHeadings = ["cir.localTime", "cir"]
+
+                            if "carbRatio.amount" in list(pumpSettings):
+                                cirColHead = "carbRatio"
+                                pumpSettings["cir"] = pumpSettings[cirColHead + ".amount"]
+                                pumpSettings["cir.localTime"] = pd.to_datetime(pumpSettings["day"]) + \
+                                    pd.to_timedelta(pumpSettings[cirColHead + ".start"], unit="ms")
+
+                                cir = pumpSettings.loc[pumpSettings["carbRatio.amount"].notnull(), cirColHeadings]
+
+                                # add a day summary
+                                cirDaySummary = pd.DataFrame()
+                                cirDaySummary["day"] = cir["cir.localTime"].dt.date
+                                cirDaySummary["cir.min"] = cir["cir"]
+                                cirDaySummary["cir.weightedMean"] = cir["cir"]
+                                cirDaySummary["cir.max"] = cir["cir"]
+
+                            else:
+
+                                cirColHead = "carbRatios"
+                                cir = pd.DataFrame(columns=cirColHeadings)
+                                cirDayColHeadings = ['day', 'cir.min', 'cir.weightedMean', 'cir.max']
+                                cirDaySummary = pd.DataFrame(columns=cirDayColHeadings)
+                                for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]):
+                                    # edge case where actSchedule is float
+                                    if isinstance(actSched, float):
+                                        actSched = str(int(actSched))
+
+                                    tempDF = pd.DataFrame(pumpSettings.loc[p, cirColHead + "." + actSched])
+                                    tempDF["day"] = pumpSettings.loc[p, "day"]
+                                    tempDF["cir.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms")
+                                    tempDF["cir"] = tempDF["amount"]
+                                    endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["cir.localTime"], index=[0])
+                                    tempDF = get_setting_durations(tempDF, "cir", endOfDay)
+                                    tempDF = tempDF[:-1]
+
+                                    tempDaySummary = pd.DataFrame(index=[0])
+                                    tempDaySummary["day"] = tempDF["cir.localTime"].dt.date
+                                    tempDaySummary["cir.min"] = tempDF["cir"].min()
+                                    tempDaySummary["cir.weightedMean"] = \
+                                        np.sum(tempDF["cir"] * tempDF["cir.durationHours"]) / tempDF["cir.durationHours"].sum()
+                                    tempDaySummary["cir.max"] = tempDF["cir"].max()
+
+                                    cir = pd.concat([cir, tempDF[cirColHeadings]], ignore_index=True)
+                                    cirDaySummary = pd.concat([cirDaySummary, tempDaySummary], ignore_index=True)
+
+                            cirDaySummary = pd.concat([cirDaySummary, dataPulledDF], sort=False)
+                            cirDaySummary.fillna(method='ffill', inplace=True)
+                            # it is possible for someone to someone to change their schedule
+                            # in the middle of the day, take the latest change as the schedule
+                            # for that day.
+                            cirDaySummary.drop_duplicates(subset="day", keep="last", inplace=True)
+                            cirDaySummary.reset_index(inplace=True, drop=True)
+
+
+                            # CORRECTION TARGET
+                            ctColHeadings = ['deviceId', "ct.localTime", "ct.low", "ct.high", "ct.target", "ct.range"]
+                            ctDayColHeadings = ['day', 'deviceId', "ct.low", "ct.high", "ct.target", "ct.range",
+                                                "ct.target.min", "ct.target.weightedMean", "ct.target.max"]
+
+                            if "bgTarget.start" in list(pumpSettings):
+                                ctColHead = "bgTarget."
+
+                                for targetType in ["low", "high", "target", "range"]:
+                                    if ctColHead + targetType in list(pumpSettings):
+                                        pumpSettings["ct." + targetType + "_mmolL"] = \
+                                            pumpSettings[ctColHead + targetType]
+
+                                        pumpSettings["ct." + targetType] = \
+                                            mmolL_to_mgdL(pumpSettings["ct." + targetType + "_mmolL"])
+                                    else:
+                                        pumpSettings["ct." + targetType + "_mmolL"] = np.nan
+                                        pumpSettings["ct." + targetType]  = np.nan
+
+                                pumpSettings["ct.localTime"] = pd.to_datetime(pumpSettings["day"]) + \
+                                    pd.to_timedelta(pumpSettings[ctColHead + "start"], unit="ms")
+
+                                correctionTarget = pumpSettings.loc[pumpSettings["bgTarget.start"].notnull(), ctColHeadings]
+
+                                # add a day summary
+                                ctDaySummary = pd.DataFrame(columns=ctDayColHeadings)
+                                ctDaySummary["day"] = correctionTarget["ct.localTime"].dt.date
+                                ctDaySummary["deviceId"] = correctionTarget["deviceId"]
+
+                                # medtronic pumps use the target high as the correction target
+                                if sum(correctionTarget.deviceId.str.contains("ed")) > 0:
+                                    correctionTarget.loc[correctionTarget.deviceId.str.contains("ed"), "ct.target"] = \
+                                        correctionTarget.loc[correctionTarget.deviceId.str.contains("ed"), 'ct.high']
+
+                                if sum(correctionTarget.deviceId.str.contains("MMT")) > 0:
+                                    correctionTarget.loc[correctionTarget.deviceId.str.contains("MMT"), "ct.target"] = \
+                                        correctionTarget.loc[correctionTarget.deviceId.str.contains("MMT"), 'ct.high']
+
+                                for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]:
+                                    ctDaySummary[targetType] = correctionTarget[targetType]
+
+                                ctDaySummary["ct.target.min"] = correctionTarget["ct.target"]
+                                ctDaySummary["ct.target.weightedMean"] = correctionTarget["ct.target"]
+                                ctDaySummary["ct.target.max"] = correctionTarget["ct.target"]
+
+
+                            else:
+
+                                ctColHead = "bgTargets"
+                                correctionTarget = pd.DataFrame(columns=ctColHeadings)
+
+                                ctDaySummary = pd.DataFrame(columns=ctDayColHeadings)
+                                for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]):
+                                    # edge case where actSchedule is float
+                                    if isinstance(actSched, float):
+                                        actSched = str(int(actSched))
+
+                                    tempDF = pd.DataFrame(pumpSettings.loc[p, ctColHead + "." + actSched])
+                                    targetTypes = list(set(list(tempDF)) - set(["start"]))
+                                    tempDF["day"] = pumpSettings.loc[p, "day"]
+                                    tempDF["deviceId"] = pumpSettings.loc[p, "deviceId"]
+
+                                    for targetType in ["low", "high", "target", "range"]:
+                                        if targetType in list(tempDF):
+                                            tempDF["ct." + targetType + "_mmolL"] = \
+                                                tempDF[targetType]
+
+                                            tempDF["ct." + targetType] = \
+                                                mmolL_to_mgdL(tempDF["ct." + targetType + "_mmolL"])
+                                        else:
+                                            tempDF["ct." + targetType + "_mmolL"] = np.nan
+                                            tempDF["ct." + targetType]  = np.nan
+
+                                    tempDF["ct.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms")
+                                    endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["ct.localTime"], index=[0])
+                                    tempDF = get_setting_durations(tempDF, "ct", endOfDay)
+                                    tempDF = tempDF[:-1]
+
+                                    # medtronic pumps use the target high as the correction target
+                                    if sum(tempDF.deviceId.str.contains("ed")) > 0:
+                                        tempDF.loc[tempDF.deviceId.str.contains("ed"), "ct.target"] = \
+                                            tempDF.loc[tempDF.deviceId.str.contains("ed"), 'ct.high']
+
+                                    if sum(tempDF.deviceId.str.contains("MMT")) > 0:
+                                        tempDF.loc[tempDF.deviceId.str.contains("MMT"), "ct.target"] = \
+                                            tempDF.loc[tempDF.deviceId.str.contains("MMT"), 'ct.high']
+
+                                    tempDaySummary = pd.DataFrame(index=[0], columns=ctDayColHeadings)
+                                    tempDaySummary["day"] = tempDF["ct.localTime"].dt.date
+                                    tempDaySummary["deviceId"] = tempDF["deviceId"]
+                                    tempDaySummary["ct.target.min"] = tempDF["ct.target"].min()
+                                    tempDaySummary["ct.target.weightedMean"] = \
+                                        np.sum(tempDF["ct.target"] * tempDF["ct.durationHours"]) / tempDF["ct.durationHours"].sum()
+                                    tempDaySummary["ct.target.max"] = tempDF["ct.target"].max()
+
+                                    for targetType in ["ct.low", "ct.high", "ct.target", "ct.range"]:
+                                        tempDaySummary[targetType] = tempDF[targetType]
+
+
+                                    correctionTarget = pd.concat([correctionTarget, tempDF[ctColHeadings]], ignore_index=True)
+                                    ctDaySummary = pd.concat([ctDaySummary, tempDaySummary[ctDayColHeadings]], ignore_index=True)
+
+                            ctDaySummary = pd.concat([ctDaySummary, dataPulledDF], sort=False)
+                            ctDaySummary.fillna(method='ffill', inplace=True)
+                            # it is possible for someone to someone to change their schedule
+                            # in the middle of the day, take the latest change as the schedule
+                            # for that day.
+                            ctDaySummary.drop_duplicates(subset="day", keep="last", inplace=True)
+                            ctDaySummary.reset_index(inplace=True, drop=True)
+
+
+                            # SCHEDULED BASAL RATES
+                            sbrColHeadings = ["sbr.localTime", "rate", "sbr.type"]
+                            sbr = pd.DataFrame(columns=sbrColHeadings)
+                            sbrDayColHeadings = ['day', 'sbr.min', 'sbr.weightedMean', 'sbr.max', 'sbr.type']
+                            sbrDaySummary = pd.DataFrame(columns=sbrDayColHeadings)
+                            for p, actSched in zip(pumpSettings.index, pumpSettings["activeSchedule"]):
+                                # edge case where actSchedule is float
+                                if isinstance(actSched, float):
+                                    actSched = str(int(actSched))
+                                if 'Auto Mode' not in actSched:
+                                    # edge case where a active schedule is nan
+                                    try:
+                                        tempDF = pd.DataFrame(pumpSettings.loc[p, "basalSchedules." + actSched])
+                                    except:
+                                        tempDF = pd.DataFrame()
+                                        metadata["issueWithBasalSchedule"] = True
+                                    if len(tempDF) > 0:
+                                        tempDF["day"] = pumpSettings.loc[p, "day"]
+                                        tempDF["sbr.type"] = "regular"
+                                        tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"]) + pd.to_timedelta(tempDF["start"], unit="ms")
+                                        endOfDay = pd.DataFrame(pd.to_datetime(pumpSettings.loc[p, "day"] + pd.Timedelta(1, "D")), columns=["sbr.localTime"], index=[0])
+                                        tempDF = get_setting_durations(tempDF, "sbr", endOfDay)
+                                        tempDF = tempDF[:-1]
+
+                                        tempDaySummary = pd.DataFrame(index=[0])
+                                        tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date
+                                        tempDaySummary["sbr.min"] = tempDF["rate"].min()
+                                        tempDaySummary["sbr.weightedMean"] = \
+                                            np.sum(tempDF["rate"] * tempDF["sbr.durationHours"]) / tempDF["sbr.durationHours"].sum()
+                                        tempDaySummary["sbr.max"] = tempDF["rate"].max()
+                                        tempDaySummary["sbr.type"] = "regular"
+                                    else:
+                                        tempDF = pd.DataFrame(index=[0])
+                                        tempDF["day"] = pumpSettings.loc[p, "day"]
+                                        tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"])
+                                        tempDF["rate"] = np.nan
+                                        tempDF["sbr.type"] = "AutoMode"
+
+                                        tempDaySummary = pd.DataFrame(index=[0])
+                                        tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date
+                                        tempDaySummary["sbr.min"] = np.nan
+                                        tempDaySummary["sbr.weightedMean"] = np.nan
+                                        tempDaySummary["sbr.max"] = np.nan
+                                        tempDaySummary["sbr.type"] = "missingNullOrIssue"
+                                else:
+                                    tempDF = pd.DataFrame(index=[0])
+                                    tempDF["day"] = pumpSettings.loc[p, "day"]
+                                    tempDF["sbr.localTime"] = pd.to_datetime(tempDF["day"])
+                                    tempDF["rate"] = np.nan
+                                    tempDF["sbr.type"] = "AutoMode"
+
+                                    tempDaySummary = pd.DataFrame(index=[0])
+                                    tempDaySummary["day"] = tempDF["sbr.localTime"].dt.date
+                                    tempDaySummary["sbr.min"] = np.nan
+                                    tempDaySummary["sbr.weightedMean"] = np.nan
+                                    tempDaySummary["sbr.max"] = np.nan
+                                    tempDaySummary["sbr.type"] = "AutoMode"
+
+                                sbr = pd.concat([sbr, tempDF[sbrColHeadings]], ignore_index=True)
+                                sbrDaySummary = pd.concat([sbrDaySummary, tempDaySummary], ignore_index=True)
+
+                            sbrDaySummary = pd.concat([sbrDaySummary, dataPulledDF], sort=False)
+                            sbrDaySummary.fillna(method='ffill', inplace=True)
+                            # it is possible for someone to someone to change their schedule
+                            # in the middle of the day, take the latest change as the schedule
+                            # for that day.
+                            sbrDaySummary.drop_duplicates(subset="day", keep="last", inplace=True)
+                            sbrDaySummary.reset_index(inplace=True, drop=True)
+
+
+                            # %% test this later
+    #                        # max basal rate, max bolus amount, and insulin duration
+    #                        if "rateMaximum" in list(data):
+    #                            pdb.set_trace()
+    #                        if "amountMaximum" in list(data):
+    #                            pdb.set_trace()
+    #                        if "bolus.calculator" in list(data):
+    #                            pdb.set_trace()
+
+
+                            # %% ACTUAL BASAL RATES (TIME, VALUE, DURATION, TYPE (SCHEDULED, TEMP, SUSPEND))
+                            basal = data[data.type == "basal"].copy().dropna(axis=1, how="all")
+                            basal.sort_values("uploadTime", ascending=False, inplace=True)
+
+                            metadata["pump.top"] = basal.deviceId.describe()["top"]
+
+                            basalBeginDate, basalEndDate = getStartAndEndTimes(basal, "day")
+                            metadata["basal.beginDate"] = basalBeginDate
+                            metadata["basal.endDate"] = basalEndDate
+
+                            basal, nBasalDuplicatesRemoved = \
+                                removeDuplicates(basal, ["deliveryType", "deviceTime", "duration", "rate"])
+                            metadata["basal.nBasalDuplicatesRemoved"] = nBasalDuplicatesRemoved
+
+                            # deal with case when basal extends past midnight due to utcTime and localTime difference
+                            basal = correct_basal_extends_past_midnight(basal, "localTime", "day")
+
+                            # fill NaNs with 0, as it indicates a suspend (temp basal of 0)
+                            basal.rate.fillna(0, inplace=True)
+
+                            # get rid of basals that have durations of 0
+                            nBasalDuration0 = sum(basal.duration > 0)
+                            basal = basal[basal.duration > 0]
+                            metadata["basal.nBasalDuration0"] = nBasalDuration0
+
+                            # get rid of basal durations that are unrealistic
+                            nUnrealisticBasalDuration = ((basal.duration < 0) | (basal.duration > 86400000))
+                            metadata["nUnrealisticBasalDuration"] = sum(nUnrealisticBasalDuration)
+                            basal.loc[nUnrealisticBasalDuration, "duration"] = np.nan
+
+                            # calculate the total amount of insulin delivered (duration * rate)
+                            basal["durationHours"] = basal["duration"] / 1000.0 / 3600.0
+                            basal["totalAmountOfBasalInsulin"] = basal["durationHours"] * basal["rate"]
+
+                            # actual basal delivered
+                            basalEventsColHeadings = ["hashID", "age", "ylw", "day",
+                                                      "utcTime", "localTime", "timezone", "tzo",
+                                                      "roundedTime", "roundedLocalTime",
+                                                      "durationHours", "rate", "type"]
+                            basalEvents = basal[basalEventsColHeadings]
+                            if "duration" in list(bolus):
+                                basalEvents = pd.concat([basalEvents, bolusExtendedEvents], ignore_index=True)
+                                basalEvents.sort_values("localTime", inplace=True)
+
+                            basalEvents["timezone"].fillna(method='ffill', inplace=True)
+                            basalEvents["timezone"].fillna(method='bfill', inplace=True)
+                            basalEvents["totalAmountOfBasalInsulin"] = basalEvents["rate"] * basalEvents["durationHours"]
+
+                            # get a summary of basals per day
+                            basalDaySummary = get_basalDaySummary(basal)
+
+
+                            # %% GET CLOSED LOOP DAYS WITH TEMP BASAL DATA
+                            # group data by type
+                            groupedData = data.groupby(by="type")
+
+                            isClosedLoopDay, is670g, metadata = \
+                                getClosedLoopDays(groupedData, 30, metadata)
+
+
+                            # %% CGM DATA
+                            # filter by cgm and sort by uploadTime
+                            cgmData = groupedData.get_group("cbg").dropna(axis=1, how="all")
+
+                            # get rid of duplicates that have the same ["deviceTime", "value"]
+                            cgmData, nCgmDuplicatesRemovedDeviceTime = removeCgmDuplicates(cgmData, "deviceTime")
+                            metadata["nCgmDuplicatesRemovedDeviceTime"] = nCgmDuplicatesRemovedDeviceTime
+
+                            # get rid of duplicates that have the same ["time", "value"]
+                            cgmData, nCgmDuplicatesRemovedUtcTime = removeCgmDuplicates(cgmData, "time")
+                            metadata["cnCgmDuplicatesRemovedUtcTime"] = nCgmDuplicatesRemovedUtcTime
+
+                            # get rid of duplicates that have the same "roundedTime"
+                            cgmData, nCgmDuplicatesRemovedRoundedTime = removeDuplicates(cgmData, "roundedTime")
+                            metadata["nCgmDuplicatesRemovedRoundedTime"] = nCgmDuplicatesRemovedRoundedTime
+
+                            # get start and end times
+                            cgmBeginDate, cgmEndDate = getStartAndEndTimes(cgmData, "day")
+                            metadata["cgm.beginDate"] = cgmBeginDate
+                            metadata["cgm.endDate"] = cgmEndDate
+
+                            # get a list of dexcom cgms
+                            cgmData, percentDexcom = getListOfDexcomCGMDays(cgmData)
+                            metadata["cgm.percentDexcomCGM"] = percentDexcom
+
+                            # see if cgm is freestyle
+                            cgmData["isFreeStyle"] = cgmData["deviceId"].str.contains("Free")
+                            metadata["cgm.top"] = cgmData.deviceId.describe()["top"]
+
+                            # group by date (day) and get stats
+                            catDF = cgmData.groupby(cgmData["day"])
+                            cgmRecordsPerDay = \
+                                pd.DataFrame(catDF.value.count()). \
+                                rename(columns={"value": "cgm.count"})
+                            dayDate = catDF.day.describe()["top"]
+                            dexcomCGM = catDF.dexcomCGM.describe()["top"]
+                            freeStyleCGM = catDF.isFreeStyle.describe()["top"]
+#                            nTypesCGM = catDF.dexcomCGM.describe()["unique"]
+                            cgmRecordsPerDay["cgm.dexcomOnly"] = \
+                                (dexcomCGM & (catDF.dexcomCGM.describe()["unique"] == 1))
+                            cgmRecordsPerDay["cgm.freeStyleOnly"] = \
+                                (freeStyleCGM & (catDF.isFreeStyle.describe()["unique"] == 1))
+                            cgmRecordsPerDay["date"] = cgmRecordsPerDay.index
+
+                            # filter the cgm data
+                            cgmColHeadings = ["hashID", "age", "ylw", "day",
+                                              "utcTime", "localTime",
+                                              "timezone", "tzo",
+                                              "roundedTime", "roundedLocalTime", "value"]
+
+                            # get data in mg/dL units
+                            cgm = cgmData[cgmColHeadings]
+                            cgm = cgm.rename(columns={'value': 'mmol_L'})
+                            cgm["mg_dL"] = mmolL_to_mgdL(cgm["mmol_L"]).astype(int)
+
+
+                            # %% NUMBER OF DAYS OF PUMP AND CGM DATA, OVERALL AND PER EACH AGE & YLW
+
+                            # COMBINE DAY SUMMARIES
+                            # group by date (day) and get stats
+                            catDF = data.groupby(data["day"])
+                            dataPerDay = \
+                                pd.DataFrame(catDF.hashID.describe()["top"]). \
+                                rename(columns={"top": "hashID"})
+                            dataPerDay["age"] = catDF.age.mean()
+                            dataPerDay["ylw"] = catDF.ylw.mean()
+                            dataPerDay["timezone"] = catDF.timezone.describe()["top"]
+
+                            # calculate all of the data start and end range
+                            # this can be used for looking at settings
+                            dayBeginDate = min(cgmBeginDate, bolusBeginDate, basalBeginDate)
+                            dayEndDate = max(cgmEndDate, bolusEndDate, basalEndDate)
+                            metadata["day.beginDate"] = dayBeginDate
+                            metadata["day.endDate"] = dayEndDate
+                            rng = pd.date_range(dayBeginDate, dayEndDate).date
+                            dayData = pd.DataFrame(rng, columns=["day"])
+
+                            for dfType in [dataPerDay, basalDaySummary, bolusDaySummary, cgmRecordsPerDay]:
+                                dayData = pd.merge(dayData, dfType.reset_index(), on="day", how="left")
+
+                            for dfType in [isClosedLoopDay, is670g]:
+                                dayData = pd.merge(dayData, dfType, on="day", how="left")
+
+                            # calculate the total amount of daily insulin
+                            dayData["totalAmountOfInsulin"] = (
+                                    dayData["totalAmountOfBasalInsulin"] +
+                                    dayData["totalAmountOfBolusInsulin"]
+                                    )
+
+                            # calculate the percent bolus and percent basal
+                            dayData["percentBasal"] = (
+                                    dayData["totalAmountOfBasalInsulin"] /
+                                    dayData["totalAmountOfInsulin"]
+                                    )
+
+                            dayData["percentBolus"] = (
+                                    dayData["totalAmountOfBolusInsulin"] /
+                                    dayData["totalAmountOfInsulin"]
+                                    )
+
+                            # total daily carbs
+                            totalDailyCarbs = pd.DataFrame(bolusEvents.groupby("day").carbInput.sum())
+                            totalDailyCarbs.reset_index(inplace=True)
+                            totalDailyCarbs.rename(columns={"carbInput": "totalDailyCarbs"}, inplace=True)
+                            dayData = pd.merge(dayData, totalDailyCarbs, how="left", on="day")
+
+                            # get daily cgm stats
+                            cgm.sort_values("localTime", inplace=True)
+                            cgmCountsPerDay = cgm.groupby("day")["mg_dL"].count().reset_index()
+                            cgmCountsPerDay.rename(columns={"mg_dL":"cgmCountsPerDay"}, inplace=True)
+                            cgm = pd.merge(cgm, cgmCountsPerDay, how="left", on="day")
+
+                            cgmStats = cgm[cgm["cgmCountsPerDay"] > 1].groupby("day").apply(get_cgmStats)
+                            # fix start and end times (not sure why the get transformed to ints)
+                            cgmStats["startTime"] = pd.to_datetime(cgmStats["startTime"])
+                            cgmStats["endTime"] = pd.to_datetime(cgmStats["endTime"])
+
+                            cgmStats = cgmStats.add_prefix("cgm.")
+                            cgmStats.reset_index(inplace=True)
+                            dayData = pd.merge(dayData, cgmStats, how="left", on="day")
+
+                            # %% get all episodes
+                            allEpisodes = get_episodes(cgm)
+                            allEpisodes["day"] = allEpisodes["start.roundedLocalTime"].dt.date
+                            allEpisodes = pd.merge(allEpisodes, dayData[["age", "ylw", "day"]], how="left", on="day")
+
+                            for episodeType in allEpisodes["criterion.name"].unique():
+                                episodeGroup = allEpisodes[allEpisodes["criterion.name"] == episodeType].groupby(["day"])
+                                episodeDaySummary = episodeGroup["durationMinutes"].describe().add_prefix(episodeType + "-durationMinutes.")
+                                episodeDaySummary.rename(columns={episodeType + "-durationMinutes.count": episodeType + ".count"}, inplace=True)
+                                episodeDaySummary.reset_index(inplace=True)
+                                dayData = pd.merge(dayData, episodeDaySummary, how="left", on="day")
+
+                            # %% valid pump should be having exactly 24 hours of basal rate
+                            dayData["validPumpData"] = dayData["totalBasalDuration"] == 24
+                            dayData["atLeast3Boluses"] = dayData["numberOfNormalBoluses"] >= 3
+
+                            dayData["validCGMData"] = \
+                                ((dayData["cgm.count"] > (288*.75)) |
+                                 (dayData["cgm.count"] > (96*.75)) & (dayData["cgm.freeStyleOnly"]))
+
+                            dayData["timezone"].fillna(method='ffill', inplace=True)
+                            dayData["timezone"].fillna(method='bfill', inplace=True)
+
+                            dayData["isDSTChangeDay"] = dayData[['day', 'timezone']].apply(lambda x: isDSTChangeDay(*x), axis=1)
+                            dayData["date"] = pd.to_datetime(dayData["day"])
+                            dayData["tzo"] = dayData[['date', 'timezone']].apply(lambda x: getTimezoneOffset(*x), axis=1)
+
+                            # add settings to the dayData
+                            dayData = pd.merge(dayData, isfDaySummary, on="day", how="left")
+                            dayData = pd.merge(dayData, cirDaySummary, on="day", how="left")
+                            dayData = pd.merge(dayData, ctDaySummary, on="day", how="left")
+                            dayData = pd.merge(dayData, sbrDaySummary, on="day", how="left")
+
+                            # fill data forward
+                            fillList = ['isf.min',
+                                        'isf.weightedMean',
+                                        'isf.max',
+                                        'cir.min',
+                                        'cir.weightedMean',
+                                        'cir.max',
+                                        'ct.low',
+                                        'ct.high',
+                                        'ct.target',
+                                        'ct.range',
+                                        'ct.target.min',
+                                        'ct.target.weightedMean',
+                                        'ct.target.max',
+                                        'sbr.min',
+                                        'sbr.weightedMean',
+                                        'sbr.max',
+                                        'sbr.type']
+                            for fl in fillList:
+                                dayData[fl].fillna(method='ffill', inplace=True)
+
+                            # calculate the start and end of contiguous data
+                            # these dates can be used when simulating and predicting, where
+                            # you need both pump and cgm data
+                            contiguousBeginDate = max(cgmBeginDate, bolusBeginDate, basalBeginDate)
+                            contiguousEndDate = min(cgmEndDate, bolusEndDate, basalEndDate)
+                            metadata["contiguous.beginDate"] = contiguousBeginDate
+                            metadata["contiguous.endDate"] = contiguousEndDate
+
+                            # get a summary by age, and ylw
+                            catDF = dayData.groupby("age")
+                            ageSummary = pd.DataFrame(catDF.validPumpData.sum())
+                            ageSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True)
+                            ageSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum())
+                            ageSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum())
+                            ageSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum())
+
+                            # add in isf stats
+                            ageSummary["isf.nDays"] = catDF["isf.min"].count()
+                            ageSummary["isf.min"] = catDF["isf.min"].min()
+                            ageSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count()
+                            ageSummary["isf.max"] = catDF["isf.max"].max()
+
+                            # add cir stats
+                            ageSummary["cir.nDays"] = catDF["cir.min"].count()
+                            ageSummary["cir.min"] = catDF["cir.min"].min()
+                            ageSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count()
+                            ageSummary["cir.max"] = catDF["cir.max"].max()
+
+                            # add sbr stats
+                            ageSummary["sbr.nDays"] = catDF["sbr.min"].count()
+                            ageSummary["sbr.min"] = catDF["sbr.min"].min()
+                            ageSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count()
+                            ageSummary["sbr.max"] = catDF["sbr.max"].max()
+                            ageSummary["sbr.typeTop"] = catDF["sbr.type"].describe()["top"]
+                            ageSummary["sbr.typeCount"] = catDF["sbr.type"].count()
+
+                            # correctionTarget stats
+                            ageSummary["ct.nDays"] = catDF["ct.target.min"].count()
+                            ageSummary["ct.target.min"] = catDF["ct.target.min"].min()
+                            ageSummary["ct.target.weightedMean"] = catDF["ct.target.weightedMean"].sum() / catDF["ct.target.weightedMean"].count()
+                            ageSummary["ct.target.max"] = catDF["ct.target.max"].max()
+
+                            ageSummary.reset_index(inplace=True)
+
+                            analysisCriterion = ageSummary[((ageSummary["nDaysValidPump"]> 0) &
+                                                            (ageSummary["nDaysValidCgm"]> 0))]
+                            minAge = analysisCriterion["age"].min()
+                            maxAge = analysisCriterion["age"].max()
+                            nDaysClosedLoop = analysisCriterion["nDaysClosedLoop"].sum()
+                            n670gDays = analysisCriterion["n670gDays"].sum()
+                            metadata["minAge"] = minAge
+                            metadata["maxAge"] = maxAge
+                            metadata["nDaysClosedLoop"] = nDaysClosedLoop
+                            metadata["n670gDays"] = n670gDays
+
+                            catDF = dayData.groupby("ylw")
+                            ylwSummary = pd.DataFrame(catDF.validPumpData.sum())
+                            ylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True)
+                            ylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum())
+                            ylwSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum())
+                            ylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum())
+
+                            ylwSummary["isf.nDays"] = catDF["isf.min"].count()
+                            ylwSummary["isf.min"] = catDF["isf.min"].min()
+                            ylwSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count()
+                            ylwSummary["isf.max"] = catDF["isf.max"].max()
+
+                            # add cir stats
+                            ylwSummary["cir.nDays"] = catDF["cir.min"].count()
+                            ylwSummary["cir.min"] = catDF["cir.min"].min()
+                            ylwSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count()
+                            ylwSummary["cir.max"] = catDF["cir.max"].max()
+
+                            # add sbr stats
+                            ylwSummary["sbr.nDays"] = catDF["sbr.min"].count()
+                            ylwSummary["sbr.min"] = catDF["sbr.min"].min()
+                            ylwSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count()
+                            ylwSummary["sbr.max"] = catDF["sbr.max"].max()
+                            ylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count()
+
+                            # correctionTarget stats
+                            ylwSummary["ct.nDays"] = catDF["ct.target.min"].count()
+                            ylwSummary["ct.target.min"] = catDF["ct.target.min"].min()
+                            ylwSummary["ct.target.weightedMean"] = catDF["ct.target.weightedMean"].sum() / catDF["ct.target.weightedMean"].count()
+                            ylwSummary["ct.target.max"] = catDF["ct.target.max"].max()
+
+                            ylwSummary.reset_index(inplace=True)
+
+                            analysisCriterion = ylwSummary[((ylwSummary["nDaysValidPump"]> 0) &
+                                                            (ylwSummary["nDaysValidCgm"]> 0))]
+                            minYLW = analysisCriterion["ylw"].min()
+                            maxYLW = analysisCriterion["ylw"].max()
+                            metadata["minYLW"] = minYLW
+                            metadata["maxYLW"] = maxYLW
+
+                            # age and ylw
+                            catDF = dayData.groupby(["age", "ylw"])
+                            ageANDylwSummary = pd.DataFrame(catDF.validPumpData.sum())
+                            ageANDylwSummary.rename(columns={"validPumpData": "nDaysValidPump"}, inplace=True)
+                            ageANDylwSummary["nDaysValidCgm"] = pd.DataFrame(catDF.validCGMData.sum())
+                            ageANDylwSummary["nDaysClosedLoop"] = pd.DataFrame(catDF["basal.closedLoopDays"].sum())
+                            ageANDylwSummary["n670gDays"] = pd.DataFrame(catDF["670g"].sum())
+
+                            ageANDylwSummary["isf.nDays"] = catDF["isf.min"].count()
+                            ageANDylwSummary["isf.min"] = catDF["isf.min"].min()
+                            ageANDylwSummary["isf.weightedMean"] = catDF["isf.weightedMean"].sum() / catDF["isf.weightedMean"].count()
+                            ageANDylwSummary["isf.max"] = catDF["isf.max"].max()
+
+                            # add cir stats
+                            ageANDylwSummary["cir.nDays"] = catDF["cir.min"].count()
+                            ageANDylwSummary["cir.min"] = catDF["cir.min"].min()
+                            ageANDylwSummary["cir.weightedMean"] = catDF["cir.weightedMean"].sum() / catDF["cir.weightedMean"].count()
+                            ageANDylwSummary["cir.max"] = catDF["cir.max"].max()
+
+                            # add sbr stats
+                            ageANDylwSummary["sbr.nDays"] = catDF["sbr.min"].count()
+                            ageANDylwSummary["sbr.min"] = catDF["sbr.min"].min()
+                            ageANDylwSummary["sbr.weightedMean"] = catDF["sbr.weightedMean"].sum() / catDF["sbr.weightedMean"].count()
+                            ageANDylwSummary["sbr.max"] = catDF["sbr.max"].max()
+                            ageANDylwSummary["sbr.nAutoMode"] = catDF["sbr.type"].count()
+
+                            # correctionTarget stats
+                            ageANDylwSummary["ct.nDays"] = catDF["ct.target.min"].count()
+                            ageANDylwSummary["ct.target.min"] = catDF["ct.target.min"].min()
+                            ageANDylwSummary["ct.target.weightedMean"] = catDF["ct.target.weightedMean"].sum() / catDF["ct.target.weightedMean"].count()
+                            ageANDylwSummary["ct.target.max"] = catDF["ct.target.max"].max()
+
+
+                            # %% calculate local time
+                            basalEvents["day"] = basalEvents["localTime"].dt.date
+                            basalEvents = pd.merge(basalEvents, dayData[["day", "isDSTChangeDay"]], how="left", on="day")
+
+                            cgm["day"] = cgm["localTime"].dt.date
+                            cgm = pd.merge(cgm, dayData[["day", "isDSTChangeDay"]], how="left", on="day")
+
+                            bolusEvents["day"] = bolusEvents["localTime"].dt.date
+                            bolusEvents = pd.merge(bolusEvents, dayData[["day", "isDSTChangeDay"]], how="left", on="day")
+
+
+                            # %% STATS PER EACH TYPE, OVERALL AND PER EACH AGE & YLW (MIN, PERCENTILES, MAX, MEAN, SD, IQR, COV)
+                            # all settings
+
+                            allSettings = pd.merge(isf.rename(columns={"isf.localTime": "localTime"}),
+                                                   cir.rename(columns={"cir.localTime": "localTime"}),
+                                                   how="outer", on="localTime")
+                            allSettings = pd.merge(allSettings,
+                                                   sbr.rename(columns={"rate": "sbr",
+                                                                       "type": "sbr.type",
+                                                                       "sbr.localTime": "localTime"}),
+                                                   how="outer", on="localTime")
+                            allSettings = pd.merge(allSettings,
+                                                   correctionTarget.rename(columns={"ct.localTime": "localTime"}),
+                                                   how="outer", on="localTime")
+                            allSettings["hashID"] = hashID
+                            allSettings["age"] = np.floor((allSettings["localTime"] - bDate).dt.days/365.25).astype(int)
+                            allSettings["ylw"] = np.floor((allSettings["localTime"] - dDate).dt.days/365.25).astype(int)
+                            allSettings = round_time(allSettings, timeIntervalMinutes=5,
+                                                     timeField="localTime",
+                                                     roundedTimeFieldName="localRoundedTime",
+                                                     startWithFirstRecord=True, verbose=False)
+
+                            allSettings["day"] = allSettings["localTime"].dt.date
+                            allSettings = pd.merge(allSettings, dayData[["day", "isDSTChangeDay"]], how="left", on="day")
+
+
+                            colOrder = ["hashID", "age", "ylw", "day", "isDSTChangeDay",
+                                        "localTime", "localRoundedTime",
+                                        "isf", "cir", "sbr", "deviceId",
+                                        "ct.low", "ct.high", "ct.target", "ct.range",
+                                        "sbr.type", "isf_mmolL_U"]
+                            allSettings = allSettings[colOrder]
+
+
+                            # %% GET AND SAVE RESULTS BY AGE AND YLW
+                            for category in ["age", "ylw", ["age", "ylw"]]:
+                                pumpSummary = get_pumpSummary(basalEvents, bolusEvents, dayData, category)
+
+                                # cgm stats per category
+                                catDF = cgm[cgm["cgmCountsPerDay"] > 1].groupby(category)
+                                cgmStats = catDF.apply(get_cgmStats)
+                                # fix start and end times (not sure why the get transformed to ints)
+                                cgmStats["startTime"] = pd.to_datetime(cgmStats["startTime"])
+                                cgmStats["endTime"] = pd.to_datetime(cgmStats["endTime"])
+
+                                cgmStats = cgmStats.add_prefix("cgm.")
+                                pumpCgmSummary = pd.concat([pumpSummary, cgmStats], axis=1)
+
+                                # get all episodes
+                                for episodeType in allEpisodes["criterion.name"].unique():
+                                    episodeGroup = allEpisodes[allEpisodes["criterion.name"] == episodeType].groupby(category)
+                                    episodeDaySummary = episodeGroup["durationMinutes"].describe().add_prefix(episodeType + "-durationMinutes.")
+                                    episodeDaySummary.rename(columns={episodeType + "-durationMinutes.count": episodeType + ".count"}, inplace=True)
+                                    pumpCgmSummary = pd.concat([pumpCgmSummary, episodeDaySummary], axis=1)
+
+                                if category == "age":
+                                    pumpCgmSummary.reset_index(inplace=True)
+                                    ageSummary = pd.merge(ageSummary, pumpCgmSummary, on=category, how="left")
+                                    ageSummary["hashID"] = hashID
+                                    allAgeSummaries = pd.concat([allAgeSummaries, ageSummary], ignore_index=True, sort=False)
+                                    allAgeSummaries.to_csv(os.path.join(outputPath,
+                                        "allAgeSummaries-dIndex-" + str(startIndex) + ".csv"))
+                                elif category == "ylw":
+                                    pumpCgmSummary.reset_index(inplace=True)
+                                    ylwSummary = pd.merge(ylwSummary, pumpCgmSummary, on=category, how="left")
+                                    ylwSummary["hashID"] = hashID
+                                    allYlwSummaries = pd.concat([allYlwSummaries, ylwSummary], ignore_index=True, sort=False)
+                                    allYlwSummaries.to_csv(os.path.join(outputPath,
+                                        "allYlwSummaries-dIndex-" + str(startIndex) + ".csv"))
+                                else:
+
+                                    ageANDylwSummary = ageANDylwSummary.join(pumpCgmSummary, how="left")
+                                    pumpCgmSummary.reset_index(inplace=True)
+                                    pumpCgmSummary.reset_index(inplace=True)
+                                    pumpCgmSummary["hashID"] = hashID
+                                    allAgeANDylwSummaries = pd.concat([allAgeANDylwSummaries, pumpCgmSummary], ignore_index=True, sort=False)
+
+                                    allAgeANDylwSummaries.to_csv(os.path.join(outputPath,
+                                        "allAgeANDylwSummaries-dIndex-" + str(startIndex) + ".csv"))
+
+
+                            # %% save data for this person
+                            if ((pd.notna(minAge)) & (pd.notna(minYLW))):
+                                outputString = "age-%s-%s-ylw-%s-%s-lp-%s-670g-%s-id-%s"
+                                outputFormat = (f"{int(minAge):02d}",
+                                                f"{int(maxAge):02d}",
+                                                f"{int(minYLW):02d}",
+                                                f"{int(maxYLW):02d}",
+                                                f"{int(nDaysClosedLoop):03d}",
+                                                f"{int(n670gDays):03d}",
+                                                hashID[0:4])
+                                outputFolderName = outputString % outputFormat
+                            else:
+                                outputFolderName = "dIndex-" + str(dIndex) + "-investigate-" + str(hashID[0:4])
+
+                            outputFolderName_Path = os.path.join(outputPath, "data", outputFolderName)
+                            if not os.path.exists(outputFolderName_Path):
+                                os.makedirs(outputFolderName_Path)
+
+                            fName = outputFolderName + "-allSettings.csv"
+                            allSettingsMinusPumpSerial = allSettings.copy().drop(columns=["deviceId"])
+                            allSettingsMinusPumpSerial.to_csv(os.path.join(outputFolderName_Path, fName))
+                            fName = outputFolderName + "-dayData.csv"
+                            dayDataMinusPumpSerial = dayData.copy().drop(columns=["deviceId"])
+                            dayDataMinusPumpSerial.to_csv(os.path.join(outputFolderName_Path, fName))
+                            fName = outputFolderName + "-basalEvents.csv"
+                            basalEvents.to_csv(os.path.join(outputFolderName_Path, fName))
+                            fName = outputFolderName + "-bolusEvents.csv"
+                            bolusEvents.to_csv(os.path.join(outputFolderName_Path, fName))
+                            fName = outputFolderName + "-cgm.csv"
+                            cgm.to_csv(os.path.join(outputFolderName_Path, fName))
+                            fName = outputFolderName + "-allEpisodes.csv"
+                            allEpisodes.to_csv(os.path.join(outputFolderName_Path, fName))
+
+
+                            # %% save the processed data (saving this data will take up a lot of space and time)
+                            data.to_csv(os.path.join(processedDataPath, "allDataCleaned-PHI-" + userID + ".csv"))
+                            basal.to_csv(os.path.join(processedDataPath, "basal-PHI-" + userID + ".csv"))
+                            bolus.to_csv(os.path.join(processedDataPath, "bolus-PHI-" + userID + ".csv"))
+                            cgmData.to_csv(os.path.join(processedDataPath, "cgm-PHI-" + userID + ".csv"))
+                            pumpSettings.to_csv(os.path.join(processedDataPath, "pumpSettings-PHI-" + userID + ".csv"))
+                            allSettings.to_csv(os.path.join(processedDataPath, "allSettings-PHI-" + userID + ".csv"))
+                            dayData.to_csv(os.path.join(processedDataPath, "dayData-PHI-" + userID + ".csv"))
+
+                        else:
+                            metadata["flags"] = "no bolus wizard data"
+                    else:
+                        metadata["flags"] = "missing either pump or cgm  data"
+                else:
+                    metadata["flags"] = "file contains no data"
+            else:
+                metadata["flags"] = "file does not exist"
+        else:
+            metadata["flags"] = "missing bDay/dDay"
+
+    except:
+        print("something is broke dIndex=", dIndex)
+        metadata["flags"] = "something is broke"
+
+
+    # write metaData to allMetadata
+    allMetadata = pd.concat([allMetadata, metadata], axis=0, sort=True)
+    allMetadata.to_csv(os.path.join(outputPath,
+        "allMetadata-dIndex-" + str(startIndex) + ".csv"))
+
+    print("done with", dIndex)
+
+
+# %% V2 DATA TO GRAB
+# INVESTIGATE SETTINGS OUTLIERS (Paradigm Veo pumps have unrealistic high ISF, ommipod with likely mg/dL have wrong correction target)
+# ADD ROUNDEDLOCAL TIME TO THE END RESULTS
+# CALCULATE MMOL SUMMARIES
+# DEFINE A DAY BETWEEN 6AM AND 6AM
+# FIX DAYLIGHT SAVINGS TIME TIMES
+# FIGURE OUT WHY TEMP BASAL COUNTS ARE DIFFERENT BETWEEN THE TWO DIFFERENT METHODS
+# MAX BASAL RATE, MAX BOLUS AMOUNT, AND INSULIN DURATION SET ON SELECT PUMPS
+# ALERT SETTINGS
+# ESTIMATED LOCAL TIME
+# GLYCEMIC OUTCOMES
+# DO NOT ROUND DATA
+# INFUSION SITE CHANGES
+# CGM CALIBRATIONS
diff --git a/projects/get-donors-pump-settings/visualize-users-settings-and-events-jaeb-ages.py b/projects/get-donors-pump-settings/visualize-users-settings-and-events-jaeb-ages.py
new file mode 100644
index 00000000..62eb6969
--- /dev/null
+++ b/projects/get-donors-pump-settings/visualize-users-settings-and-events-jaeb-ages.py
@@ -0,0 +1,1396 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan 22 06:46:33 2019
+
+@author: ed
+"""
+
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+description: visualize users settings and events
+version: 0.0.1
+created: 2019-01-11
+author: Ed Nykaza
+dependencies:
+    *
+license: BSD-2-Clause
+"""
+
+
+# %% REQUIRED LIBRARIES
+import pandas as pd
+import numpy as np
+from pytz import timezone
+from datetime import timedelta
+import datetime as dt
+import os
+import argparse
+import pdb
+import matplotlib.pyplot as plt
+import plotly
+import plotly.plotly as py
+import plotly.graph_objs as go
+from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
+import plotly.io as pio
+
+
+
+# %% FUNCTIONS
+def make_bold(val_list):
+    bold_list = []
+    for val in val_list:
+        bold_list.append('<b>' + str(val) + '</b>')
+    return bold_list
+
+def make_bold_and_round(val_list, nDecimalPlaces):
+    bold_list = []
+    for val in val_list:
+        if nDecimalPlaces == 0:
+            bold_list.append('<b>' + str(int(np.round(val, nDecimalPlaces))) + '</b>')
+
+        else:
+            bold_list.append('<b>' + str(np.round(val, nDecimalPlaces)) + '</b>')
+    return bold_list
+
+
+def save_fig(fig, plot_name, width, height, scale):
+    pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        plot_name + ".png"
+    ),
+    width=width,
+    height=height,
+    scale=scale)
+
+    return
+
+
+def make_static_plot(field, yLabel, figName, df, yMin, yMax):
+
+    df.sort_values("categories", inplace=True)
+
+    traces = []
+    for yd in df.categories.unique():
+        traces.append(go.Box(
+            y=df.loc[df["categories"] == yd, field].values,
+            x=df.loc[df["categories"] == yd, "categories"].values,
+            name=yd,
+            boxpoints="all",
+            notched=True,
+            hoverlabel=dict(font=dict(size=22)),
+            marker=dict(
+                color=df.loc[df["categories"] == yd, "allColors"].describe()["top"],
+                opacity=0,
+            ),
+        ))
+
+    layout = go.Layout(
+        font=dict(
+            size=22
+        ),
+        xaxis=dict(
+            tickangle=52.5
+        ),
+        yaxis=dict(
+            title=yLabel,
+            range=[yMin, yMax],
+            showgrid=True,
+            gridcolor='#f1f3f4',
+            gridwidth=2,
+            zeroline=True,
+            zerolinecolor='#f1f3f4',
+            zerolinewidth=2,
+        ),
+        margin=dict(
+            l=100,
+            r=200,
+            b=250,
+            t=50,
+        ),
+
+#        boxmode='group',
+        showlegend=False,
+        legend=dict(font=dict(size=14))
+    )
+
+    fig = go.Figure(data=traces, layout=layout)
+
+    save_fig(fig, figName + "-boxplot-lowRes", 1800, 1200, 1)
+    save_fig(fig, figName + "-boxplot-highRes", 1800, 1200, 4)
+
+def make_static_table(field, figName, filteredDF, nDecimals, return_summaryTable=False):
+
+    # first make an overall table
+    allCounts = filteredDF.groupby(["hashID"])[field].describe()
+    allAgeTable = pd.DataFrame(index=[field])
+    allAgeTable["min"] = allCounts["min"].min()
+    allAgeTable["max"] = allCounts["max"].max()
+    allAgeTable["U"] = len(allCounts)
+    allAgeTable["N"] = allCounts["count"].sum()
+
+    # then make summary per categories
+    uniqueCounts = filteredDF.groupby(["categories"])["hashID"].describe()
+    uniqueCounts.reset_index(inplace=True)
+    summaryTable = filteredDF.groupby("categories")[field].describe()
+    summaryTable.reset_index(inplace=True)
+    summaryTable = pd.merge(summaryTable, uniqueCounts[["categories", "unique"]], how="left", on="categories")
+    summaryTable = pd.merge(summaryTable, catColorDF, how="left", on="categories")
+    summaryTable["unique"] = summaryTable["unique"].astype(float)
+
+    # add in interquartile range
+    summaryTable["IQR"] = summaryTable["75%"] - summaryTable["25%"]
+
+    col_headings = make_bold(["Group", "N", "U", "Average", "Stdev", "Min", "Q1", "Median", "Q3", "Max"])
+
+    trace = go.Table(
+        header=dict(values=col_headings,
+                    fill = dict(color='white'),
+                    align = ['center', 'center', 'center'],
+                    font = dict(color = 'black', size=12)),
+        columnwidth=[1.5, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        cells=dict(values=[make_bold(summaryTable["categories"]),
+                           make_bold_and_round(summaryTable["count"], 0),
+                           make_bold_and_round(summaryTable["unique"], 0),
+                           make_bold_and_round(summaryTable["mean"], nDecimals),
+                           make_bold_and_round(summaryTable["std"], nDecimals),
+                           make_bold_and_round(summaryTable["min"], nDecimals),
+                           make_bold_and_round(summaryTable["25%"], nDecimals),
+                           make_bold_and_round(summaryTable["50%"], nDecimals),
+                           make_bold_and_round(summaryTable["75%"], nDecimals),
+                           make_bold_and_round(summaryTable["max"], nDecimals)],
+                   fill = dict(color = [summaryTable["allColors"]]),
+                   align = ['center', 'center', 'center'],
+                   font = dict(color = 'black', size=10),
+                   height = 20)
+        )
+
+    fig = go.Figure()
+    fig.add_trace(trace)
+
+    pio.write_image(
+        fig,
+        os.path.join(
+            figure_path,
+            figName + "-table-highRes.png"
+        ),
+        width=1200,
+        height=1200,
+        scale=4)
+
+    pio.write_image(
+        fig,
+        os.path.join(
+            figure_path,
+            figName + "-table-lowRes.png"
+        ),
+        width=1200,
+        height=1200,
+        scale=1)
+
+    summaryTable.to_csv(
+        os.path.join(
+            figure_path,
+            figName + "-table.csv"
+        )
+    )
+    allAgeTable.to_csv(
+        os.path.join(
+            figure_path,
+            figName + "-all-age-table.csv"
+        )
+    )
+
+    if return_summaryTable:
+        return summaryTable, allAgeTable
+    else:
+        return
+
+
+def make_lite_interactive_boxplot(field, yLabel, df, yMin, yMax):
+    df.sort_values("categories", inplace=True)
+
+    traces = []
+    for yd in df.categories.unique():
+        yValues = df.loc[df["categories"] == yd, field]
+        yStats = yValues.describe()
+        yMinimum = yStats["min"]
+        yQ1 = yStats["25%"]
+        yQ2 = yStats["50%"]
+        yQ3 = yStats["75%"]
+        yMaximum = yStats["max"]
+        yIQR = yQ3 - yQ1
+        maxWhisker = yIQR * 1.5
+        lowWhiskerBound = yQ1 - maxWhisker
+        highWhiskerBound = yQ3 + maxWhisker
+        yLowerFence = yValues[yValues >= lowWhiskerBound].min()
+        yUpperFence = yValues[yValues <= highWhiskerBound].max()
+        yBoxData = [yMinimum, yLowerFence, yQ1, yQ1, yQ1, yQ1, yQ1,
+                    yQ2, yQ3, yQ3, yQ3, yQ3, yQ3,
+                    yUpperFence, yMaximum]
+
+        # get N and U
+        nDays = df.loc[df["categories"] == yd, "count"].median().astype(int)
+        uniqueDonors = df.loc[df["categories"] == yd, "unique"].median().astype(int)
+
+        traces.append(go.Box(
+            y=yBoxData,
+            jitter=0,
+            pointpos=0,
+            text=list(np.repeat("N=%s, U=%s" % (nDays, uniqueDonors), len(yBoxData))),
+            hoverinfo="y+text",
+            name=yd,
+            boxpoints="all",
+            notched=False,
+            marker=dict(
+                color=df.loc[df["categories"] == yd, "allColors"].describe()["top"],
+                opacity=0,
+            ),
+        ))
+
+    layout = go.Layout(
+        yaxis=dict(
+            title=yLabel,
+            range=[yMin, yMax],
+            showgrid=True,
+            gridcolor='#f1f3f4',
+            gridwidth=2,
+            zeroline=True,
+            zerolinecolor='#f1f3f4',
+            zerolinewidth=2,
+        ),
+        showlegend=True
+    )
+
+    fig = go.Figure(data=traces, layout=layout)
+    plot_url = py.plot(fig, filename="Distribution of " + figName, auto_open=False)
+    print(figName, plot_url)
+
+    return
+
+
+def filter_data(df, min_days_criteria=7):
+
+    # keep all type1 adn null diagnosis data (not specified)
+    df = df[((df.diagnosisType.isnull()) | (df.diagnosisType == "type1"))]
+
+    # filter out invalid ages and ylw
+    df = df[((df.age.astype(float) >= 0) & (df.age.astype(float) <= 90))]
+    df = df[((df.ylw.astype(float) >= 0) & (df.ylw.astype(float) <= 80))]
+
+    # filter out invalid pump and cgm days
+    df = df[((df["validPumpData"]) & (df["validCGMData"]))]
+
+    # filter out Paradigm Veo Pumps
+    df = df[~df["pump.top"].str.contains("Paradigm Veo")]
+
+    # filter out omnipod with mg/dL likely settings
+    df = df[~((df["pump.top"].str.contains("InsOmn-130")) &
+              (df['pumpSettings.isfLikelyUnits'] == "mg/dL"))]
+
+    # require a minimum number of days of data
+    dayGroups = pd.DataFrame(df.groupby(["hashID", "age", "ylw"]).day.count()).reset_index()
+    dayGroups.rename(columns={"day": "nDays"}, inplace=True)
+    df = pd.merge(df, dayGroups, how="left", on=["hashID", "age", "ylw"])
+
+    df = df[df["nDays"] >= min_days_criteria]
+
+    return df
+
+
+def merge_dayData(df, dayDF):
+
+    df = pd.merge(
+        df,
+        dayDF[[
+            "hashID",
+            "day",
+            "validPumpData",
+            "atLeast3Boluses",
+            "validCGMData",
+            "diagnosisType",
+            "pump.top",
+            "pumpSettings.isfLikelyUnits"
+        ]],
+        how="left",
+        on=["hashID", "day"]
+    )
+
+    return df
+
+
+def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min_unique_donors=10):
+
+    # bin data (defined above)
+    df["ageBins"] = pd.cut(df["age"], ageBins, labels=ageGroupNames)
+    df["ylwBins"] = pd.cut(df["ylw"], ylwBins, labels=ylwGroupNames)
+    df["ageCategories"] = df["ageBins"].astype(str)
+    df["ylwCategories"] = df["ylwBins"].astype(str)
+    df["categories"] = "age " + df["ageBins"].astype(str) + " ylw " + df["ylwBins"].astype(str)
+
+    # attach bin colors (defined above)
+    df = pd.merge(df, catColorDF, how="left", on="categories")
+    df["categories"].astype("category", inplace=True)
+
+    # attach counts per group
+    dGroups = df.groupby("categories")
+    groupDF = dGroups["hashID"].describe()
+    groupDF["ageCategories"] = dGroups["ageCategories"].describe()["top"]
+    groupDF["ylwCategories"] = dGroups["ylwCategories"].describe()["top"]
+    #groupDF["ylwAlpha"] = dGroups["ylwAlpha"].mean()
+    groupDF["allColors"] = dGroups["allColors"].describe()["top"]
+    groupDF.reset_index(inplace=True)
+
+    # attach group counts to the main dataframe
+    df = pd.merge(df, groupDF[["categories", "count", "unique"]], how="left", on="categories")
+
+    # remove all categories that do NOT have at least 10 unique people
+    df = df[df["unique"] > min_unique_donors]
+    groupDF = groupDF[groupDF["unique"] > min_unique_donors]
+
+    # attach N and U to the categories
+    df["categoriesFull"] = (
+        df["categories"].astype(str) +
+        " (N=" + df["count"].astype(str) +
+        ", U=" + df["unique"].astype(str) +  ")"
+    )
+
+    return df, groupDF
+
+
+# %% define age and years living with bins
+group_title = "-jos-groups"
+figure_path = os.path.join(".", "figures")
+
+# next bin the data by age-ylw groups
+dataGroupName = "age-ylw-groups"
+
+# original age and ylw bins
+#ageBins = np.array([0,5,8,12,17,24,85])
+#ylwBins = np.array([-1,0,1,2,5,10,25,75])
+
+#catColors = [
+#    '#f0d8e5','#f4bdd8','#f7a0cc','#f781bf',
+#    '#ebc3c1','#f1a095','#f17d6c','#ec5644','#e41a1c',
+#    '#f2d8c3','#fbc299','#ffac6f','#ff9746','#ff7f00',
+#    '#d0e1cc','#b8d8b2','#9fcd97','#86c37e','#6cb964','#4daf4a',
+#    '#c9d6e3','#afc4da','#95b1d2','#7aa0c9','#5b8fc1','#377eb8',
+#    '#dacbde','#d0b6d4','#c5a1ca','#ba8dc0','#af78b7','#a464ad','#984ea3'
+#]
+#
+#finalCategories = [
+#        'age 01-05 ylw 00', 'age 01-05 ylw 01', 'age 01-05 ylw 02',
+#        'age 01-05 ylw 03-05', 'age 06-08 ylw 00', 'age 06-08 ylw 01',
+#        'age 06-08 ylw 02', 'age 06-08 ylw 03-05', 'age 06-08 ylw 06-10',
+#        'age 09-12 ylw 00', 'age 09-12 ylw 01', 'age 09-12 ylw 02',
+#        'age 09-12 ylw 03-05', 'age 09-12 ylw 06-10', 'age 13-17 ylw 00',
+#        'age 13-17 ylw 01', 'age 13-17 ylw 02', 'age 13-17 ylw 03-05',
+#        'age 13-17 ylw 06-10', 'age 13-17 ylw 11-25', 'age 18-24 ylw 00',
+#        'age 18-24 ylw 01', 'age 18-24 ylw 02', 'age 18-24 ylw 03-05',
+#        'age 18-24 ylw 06-10', 'age 18-24 ylw 11-25', 'age 25-85 ylw 00',
+#        'age 25-85 ylw 01', 'age 25-85 ylw 02', 'age 25-85 ylw 03-05',
+#        'age 25-85 ylw 06-10', 'age 25-85 ylw 11-25',
+#        'age 25-85 ylw 26-75'
+#]
+
+# jaeb obs study bins
+ageBins = np.array([-1,6,13,25,85])
+ylwBins = np.array([-1,1,5,75])
+
+# bin by age
+ageGroupNames = []
+for x, y in zip(ageBins[:-1]+1, ageBins[1:]):
+    ageGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}"))
+
+ylwGroupNames = []
+for x, y in zip(ylwBins[:-1]+1, ylwBins[1:]):
+    if x == y:
+        ylwGroupNames.append("%s"%(f"{x:02d}"))
+    else:
+        ylwGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}"))
+
+## 7 colors in each
+#oranges = ['#fdd0a2','#fdae6b','#fd8d3c','#f16913','#d94801','#a63603','#7f2704']
+#reds = ['#fcbba1','#fc9272','#fb6a4a','#ef3b2c','#cb181d','#a50f15','#67000d']
+#greens = ['#c7e9c0','#a1d99b','#74c476','#41ab5d','#238b45','#006d2c','#00441b']
+#blues = ['#c6dbef','#9ecae1','#6baed6','#4292c6','#2171b5','#08519c','#08306b']
+#purples = ['#dadaeb','#bcbddc','#9e9ac8','#807dba','#6a51a3','#54278f','#3f007d']
+#greys = ['#d9d9d9','#bdbdbd','#969696','#737373','#525252','#252525','#000000']
+
+# 3 colors in each
+#reds = ['#fcae91','#fb6a4a','#cb181d']
+oranges = ['#fdbe85','#fd8d3c','#d94701']
+greens = ['#bae4b3','#74c476','#238b45']
+blues = ['#bdd7e7','#6baed6','#2171b5']
+purples = ['#cbc9e2','#9e9ac8','#6a51a3']
+#greys = ['#cccccc','#969696','#525252']
+
+
+color_matrix = pd.DataFrame([oranges, greens, blues, purples])
+
+all_colors = np.reshape(color_matrix.values, -1)
+
+i = 0
+catColorDF = pd.DataFrame()
+for ai in range(0, len(ageGroupNames)):
+    for yi in range(0, len(ylwGroupNames)):
+        catColorDF.loc[i, "categories"] = "age %s ylw %s" %(ageGroupNames[ai], ylwGroupNames[yi])
+        catColorDF.loc[i, "allColors"] = all_colors[i]
+        i = i + 1
+
+
+
+# %% load in summary donor data
+dataPulledDate = "2019-01-10"
+dataProcessedDate = "2019-01-22"
+
+phiDate = "PHI-" + dataPulledDate
+donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data")
+donorList = phiDate + "-uniqueDonorList"
+donors = pd.read_csv(os.path.join(donorPath, donorList + ".csv"), low_memory=False)
+
+
+# %% all-donors summary
+allAgeSummary = pd.DataFrame()
+dataPath = os.path.join(donorPath, "settings-and-events")
+d = pd.read_csv(os.path.join(dataPath, "combined-allMetadata.csv"), low_memory=False)
+
+# attach the donor level data to the
+allMetadata = pd.merge(
+    d,
+    donors[[
+        "hashID",
+        "userID",
+        "diagnosisType",
+        "targetDevices",
+        "targetTimezone",
+        "termsAccepted"
+    ]],
+    how="left",
+    on="hashID"
+)
+allMetadata.to_csv(os.path.join(donorPath, donorList + "-w-metaData.csv"))
+
+
+# %% load data
+dayData = pd.read_csv(os.path.join(dataPath, "combined-dayData.csv"), low_memory=False)
+bolusData = pd.read_csv(os.path.join(dataPath, "combined-bolusEvents.csv"), low_memory=False)
+basalData = pd.read_csv(os.path.join(dataPath, "combined-basalEvents.csv"), low_memory=False)
+
+# %% attach the diagnosis type to the day data
+dayDF = pd.merge(
+    dayData,
+    allMetadata[[
+        "hashID",
+        "diagnosisType",
+        "pump.top",
+        "pumpSettings.isfLikelyUnits"
+    ]],
+    how="left",
+    on="hashID"
+)
+
+dayDF = filter_data(dayDF, min_days_criteria=7)
+dayDF, dayDFGroupSummary = (
+    bin_data(
+        dayDF,
+        ageBins,
+        ageGroupNames,
+        ylwBins,
+        ylwGroupNames,
+        catColorDF,
+        min_unique_donors=10
+    )
+)
+
+
+# %% all-event level summary (max basal and max bolus)
+# attach the day to bolus data and filter data by analysis criteria
+# NOTE: seet the filter_data function for details
+bolus = merge_dayData(bolusData, dayDF)
+bolus = filter_data(bolus, min_days_criteria=7)
+bolus, bolusGroupSummary = (
+    bin_data(
+        bolus,
+        ageBins,
+        ageGroupNames,
+        ylwBins,
+        ylwGroupNames,
+        catColorDF,
+        min_unique_donors=10
+    )
+)
+
+
+# %% overview of bolus data table
+figName = "overviewTable-bolus-events"
+figName = figName + group_title
+trace = go.Table(
+    header=dict(
+        values=make_bold(["AGE-YLW Group",
+                          "Age",
+                          "Years Living with T1D",
+                          "N (Bolus Events)",
+                          "U (Unique Donors)"]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=14)
+    ),
+    cells=dict(
+        values=[make_bold(bolusGroupSummary['categories']),
+                make_bold(bolusGroupSummary['ageCategories']),
+                make_bold(bolusGroupSummary['ylwCategories']),
+                make_bold(bolusGroupSummary['count']),
+                make_bold(bolusGroupSummary['unique'])],
+        fill = dict(color = [bolusGroupSummary["allColors"]]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=11),
+        height = 22
+    ),
+)
+
+fig = go.Figure()
+fig.add_trace(trace)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-highRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=4)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-lowRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=1)
+
+
+# %% max bolus amount ()
+maxBolus = pd.DataFrame(bolus.groupby(["hashID", "day"])["unitsInsulin"].max()).reset_index()
+maxBolus.rename(columns={"unitsInsulin":"maxBolusPerDay"}, inplace=True)
+
+maxBolus = pd.merge(
+    maxBolus,
+    dayDF[[
+        "hashID",
+        "day",
+        "categories",
+        "allColors"
+    ]],
+    how="left",
+    on=["hashID", "day"]
+)
+
+# remove nans in category as they represent data from days that did not meat the
+# acceptable day standard
+maxBolus = maxBolus[maxBolus["categories"].notnull()]
+
+field = 'maxBolusPerDay'
+yLabel = "Max Bolus Per Day (U)"
+figName = "Max Bolus"
+yMin = 0
+yMax = 21
+filteredDF = maxBolus[maxBolus[field] > 0].copy()
+
+## make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+# add N events and n unique donors
+filteredDF = pd.merge(
+    filteredDF,
+    summaryTable[[
+        "categories",
+        "count",
+        "unique"
+    ]],
+    how="left",
+    on="categories"
+)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% basal data
+basal = merge_dayData(basalData, dayDF)
+basal = filter_data(basal, min_days_criteria=7)
+basal, basalGroupSummary = (
+    bin_data(
+        basal,
+        ageBins,
+        ageGroupNames,
+        ylwBins,
+        ylwGroupNames,
+        catColorDF,
+        min_unique_donors=10
+    )
+)
+
+
+# %% overview of basal data table
+figName = "overviewTable-basal-events"
+figName = figName + group_title
+
+trace = go.Table(
+    header=dict(
+        values=make_bold(["AGE-YLW Group",
+                          "Age",
+                          "Years Living with T1D",
+                          "N (Basal Events)",
+                          "U (Unique Donors)"]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=14)
+    ),
+    cells=dict(
+        values=[make_bold(basalGroupSummary['categories']),
+                make_bold(basalGroupSummary['ageCategories']),
+                make_bold(basalGroupSummary['ylwCategories']),
+                make_bold(basalGroupSummary['count']),
+                make_bold(basalGroupSummary['unique'])],
+        fill = dict(color = [basalGroupSummary["allColors"]]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=11),
+        height = 22
+    ),
+)
+
+fig = go.Figure()
+fig.add_trace(trace)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-highRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=4)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-lowRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=1)
+
+
+# %% max basal rate
+maxBasal = pd.DataFrame(basal[basal["type"]=="basal"].groupby(["hashID", "day"])["rate"].max()).reset_index()
+
+maxBasal.rename(columns={"rate":"maxBasalRatePerDay"}, inplace=True)
+
+maxBasal = pd.merge(
+    maxBasal,
+    dayDF[[
+        "hashID",
+        "day",
+        "categories",
+        "allColors"
+    ]],
+    how="left",
+    on=["hashID", "day"]
+)
+
+# remove nans in category as they represent data from days that did not meat the
+# acceptable day standard
+maxBasal = maxBasal[maxBasal["categories"].notnull()]
+
+field = 'maxBasalRatePerDay'
+yLabel = "Max Basal Per Day (U/hr)"
+figName = "Max Basal"
+yMin = 0
+yMax = 3.25
+filteredDF = maxBasal[maxBasal[field] > 0].copy()
+
+## make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+# add N events and n unique donors
+filteredDF = pd.merge(
+    filteredDF,
+    summaryTable[[
+        "categories",
+        "count",
+        "unique"
+    ]],
+    how="left",
+    on="categories"
+)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% overview of day level data table
+figName = "overviewTable-day-data"
+figName = figName + group_title
+
+trace = go.Table(
+    header=dict(
+        values=make_bold(["AGE-YLW Group",
+                          "Age",
+                          "Years Living with T1D",
+                          "N (Days)",
+                          "U (Unique Donors)"]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=14)
+    ),
+    cells=dict(
+        values=[make_bold(dayDFGroupSummary['categories']),
+                make_bold(dayDFGroupSummary['ageCategories']),
+                make_bold(dayDFGroupSummary['ylwCategories']),
+                make_bold(dayDFGroupSummary['count']),
+                make_bold(dayDFGroupSummary['unique'])],
+        fill = dict(color = [dayDFGroupSummary["allColors"]]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=11),
+        height = 22
+    ),
+)
+
+fig = go.Figure()
+fig.add_trace(trace)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-highRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=4)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-lowRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=1)
+
+
+# %% Average ISF per day
+dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1)
+field = 'isfRounded'
+yLabel = "Insulin Sensitivity Factor (mg/dL/U)"
+figName = "Insulin Sensitivity Factor"
+yMin = 0
+yMax = 400
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average CIR per day
+field = 'cir.weightedMean'
+yLabel = "Carb to Insulin Ratio (g/U)"
+figName = "Carb to Insulin Ratio"
+yMin = 0
+yMax = 70
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Correction Target per day
+field = 'ct.target.weightedMean'
+yLabel = "Correction Target (mg/dL)"
+figName = "Correction Target"
+yMin = 70
+yMax = 180
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Basal Rate per day
+field = 'sbr.weightedMean'
+yLabel = "Scheduled Basal Rate (U/hr)"
+figName = "Scheduled Basal Rate"
+yMin = 0
+yMax = 2.5
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 3
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Total Daily Dose
+field = "totalAmountOfInsulin"
+yLabel = "Total Daily Dose (U)"
+figName = "Total Daily Dose"
+yMin = 0
+yMax = 125
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Percent Basal
+dayDF["perecentBasalInPercent"] = dayDF["percentBasal"] * 100
+field = "perecentBasalInPercent"
+yLabel = "Basal Proportion of Total Daily Dose (%)"
+figName = "Basal Proportion of Total Daily Dose"
+yMin = 0
+yMax = 100
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Total Daily Carbs
+field = "totalDailyCarbs"
+yLabel = "Total Daily Carbs (g)"
+figName = "Total Daily Carbs"
+yMin = 0
+yMax = 600
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Daily Time in Range (70-180 mg/dL)
+dayDF["perecentInRange"] = dayDF["cgm.percent70to180"] * 100
+field = "perecentInRange"
+yLabel = "Percent of Day in Targe Range (70-180 mg/dL, %)"
+figName = "Percent of Day in Targe Range 70-180"
+yMin = 0
+yMax = 100
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Mean CGM (mg/dL)
+field = "cgm.mean_mgdL"
+yLabel = "Daily Average CGM Level (mg/dL)"
+figName = "Daily Average CGM Level"
+yMin = 50
+yMax = 300
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Cov CGM (mg/dL)
+dayDF["covPercent"] = dayDF["cgm.cov_mgdL"] * 100
+field = "covPercent"
+yLabel = "Coeffient of Variation (%)"
+figName = "Coeffient of Variation"
+yMin = 6
+yMax = 62
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Daily Time Below 54 (Percentage)
+dayDF["perecentBelow54mgdL"] = dayDF["cgm.percentBelow54"] * 100
+field = "perecentBelow54mgdL"
+yLabel = "Percent of Day Below 54 mg/dL (%)"
+figName = "Percent of Day in Extreme Hypo Below 54 mgdL"
+yMin = 0
+yMax = 5
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 2
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Number of Below 54 mg/dL Episodes per Day
+field = "extreme-hypo.count"
+dayDF[field].fillna(0, inplace=True)
+yLabel = "Number of Extreme Hypo Episodes (Below 54 mg/dL) per Day"
+figName =  "Number of Extreme Hypo Episodes per Day"
+yMin = 0
+yMax = 2
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Duration of each Episode Below 54 mg/dL
+field = "extreme-hypo-durationMinutes.mean"
+yLabel = "Average Duration of each Extreme Hypo Episode (minutes)"
+figName =  "Average Duration of each Extreme Hypo Episode"
+yMin = 15
+yMax = 120
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Daily Time Above 250 (Percentage)
+dayDF["perecentAbove250mgdL"] = dayDF["cgm.percentAbove250"] * 100
+field = "perecentAbove250mgdL"
+yLabel = "Percent of Day Above 250 mg/dL (%)"
+figName = "Percent of Day in Extreme Hyper Above 250 mgdL"
+yMin = 0
+yMax = 75
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Number of Above 250 mg/dL Episodes per Day
+field = "extreme-hyper.count"
+dayDF[field].fillna(0, inplace=True)
+yLabel = "Number of Extreme Hyper Episodes (Above 250 mg/dL) per Day"
+figName =  "Number of Extreme Hyper Episodes per Day"
+yMin = 0
+yMax = 2
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Duration of each Episode Above 250 mg/dL
+dayDF["avgExtremeHyperHours"] = dayDF["extreme-hyper-durationMinutes.mean"] / 60
+field = "avgExtremeHyperHours"
+yLabel = "Average Duration of each Extreme Hyper Episode (hours)"
+figName =  "Average Duration of each Extreme Hyper Episode"
+yMin = 2
+yMax = 10
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% save the all age summaries
+figName = "allAgeSettingSummary" + group_title
+allAgeSummary.to_csv(
+    os.path.join(
+        figure_path,
+        figName + "-all-age-table.csv"
+    )
+)
+
+
+# %% make a plot of TDD by ISF
+# Average ISF per day
+dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1)
+
+filteredDF = dayDF[((dayDF['isfRounded'] > 0) &
+                    (dayDF['totalAmountOfInsulin'] > 0))].copy()
+
+x = np.arange(1, 500)
+c = pd.DataFrame(columns=["ISF", "TDD"])
+for xi in x:
+    if sum(filteredDF['isfRounded'] == xi) > 3:
+        c.loc[xi, "ISF"] = xi
+        c.loc[xi, "TDD"] = filteredDF.loc[
+            filteredDF['isfRounded'] == xi,
+            "totalAmountOfInsulin"].median()
+
+trend_by_isf = c.rolling(25, center=True).mean()
+
+x = np.arange(1, 300)
+d = pd.DataFrame(columns=["TDD", "ISF"])
+for xi in x:
+    if sum(filteredDF['totalAmountOfInsulin'].round() == xi) > 3:
+        d.loc[xi, "TDD"] = xi
+        d.loc[xi, "ISF"] = filteredDF.loc[
+            filteredDF['totalAmountOfInsulin'].round() == xi,
+            "isfRounded"].median()
+
+# then smooth out the medians
+trend_by_tdd = d.rolling(10, center=True).mean()
+
+traces = []
+
+for yd in catColorDF.categories.unique():
+    traces.append(go.Scattergl(
+            y=filteredDF.loc[filteredDF["categories"] == yd, 'isfRounded'],
+            x=filteredDF.loc[filteredDF["categories"] == yd, 'totalAmountOfInsulin'].round(),
+            name=yd,
+            mode='markers',
+            marker=dict(
+                color=filteredDF.loc[filteredDF["categories"] == yd, 'allColors'],
+                opacity=0.5,
+            ),
+    ))
+
+traces.append(go.Scattergl(
+        y=trend_by_tdd["ISF"],
+        x=trend_by_tdd["TDD"],
+        mode='lines',
+        name="Trend by TDD",
+        line=dict(
+            color="black",
+            dash="dot",
+        ),
+))
+
+traces.append(go.Scattergl(
+        y=trend_by_isf["ISF"],
+        x=trend_by_isf["TDD"],
+        mode='lines',
+        name="Trend by ISF",
+        line=dict(
+            color="black",
+            dash="dash",
+        ),
+))
+
+layout = go.Layout(
+    font=dict(
+        size=18
+    ),
+    xaxis=dict(
+        title="TDD",
+        dtick=20,
+        range=[0, 300],
+        showgrid=True,
+        gridcolor='#f1f3f4',
+        gridwidth=2,
+        zeroline=True,
+        zerolinecolor='#f1f3f4',
+        zerolinewidth=2,
+    ),
+    yaxis=dict(
+        title="ISF",
+        dtick=20,
+        range=[0, 500],
+        showgrid=True,
+        gridcolor='#f1f3f4',
+        gridwidth=2,
+        zeroline=True,
+        zerolinecolor='#f1f3f4',
+        zerolinewidth=2,
+    )
+)
+figName = "ISFbyTDD"
+fig = go.Figure(data=traces, layout=layout)
+plot_url = py.plot(fig, filename=figName, auto_open=False)
+print(figName, plot_url)
diff --git a/projects/get-donors-pump-settings/visualize-users-settings-and-events.py b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py
new file mode 100644
index 00000000..cd392754
--- /dev/null
+++ b/projects/get-donors-pump-settings/visualize-users-settings-and-events.py
@@ -0,0 +1,1529 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan 22 06:46:33 2019
+
+@author: ed
+"""
+
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+description: visualize users settings and events
+version: 0.0.1
+created: 2019-01-11
+author: Ed Nykaza
+dependencies:
+    *
+license: BSD-2-Clause
+"""
+
+
+# %% REQUIRED LIBRARIES
+import pandas as pd
+import numpy as np
+from pytz import timezone
+from datetime import timedelta
+import datetime as dt
+import os
+import argparse
+import pdb
+import matplotlib.pyplot as plt
+import plotly
+import plotly.plotly as py
+import plotly.graph_objs as go
+from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
+import plotly.io as pio
+
+
+# %% FUNCTIONS
+def make_bold(val_list):
+    bold_list = []
+    for val in val_list:
+        bold_list.append('<b>' + str(val) + '</b>')
+    return bold_list
+
+def make_bold_and_round(val_list, nDecimalPlaces):
+    bold_list = []
+    for val in val_list:
+        if nDecimalPlaces == 0:
+            bold_list.append('<b>' + str(int(np.round(val, nDecimalPlaces))) + '</b>')
+
+        else:
+            bold_list.append('<b>' + str(np.round(val, nDecimalPlaces)) + '</b>')
+    return bold_list
+
+
+def save_fig(fig, plot_name, width, height, scale):
+    pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        plot_name + ".png"
+    ),
+    width=width,
+    height=height,
+    scale=scale)
+
+    return
+
+
+def make_static_plot(field, yLabel, figName, df, yMin, yMax):
+
+    df.sort_values("categories", inplace=True)
+
+    traces = []
+    for yd in df.categories.unique():
+        traces.append(go.Box(
+            y=df.loc[df["categories"] == yd, field].values,
+            x=df.loc[df["categories"] == yd, "categories"].values,
+            name=yd,
+            boxpoints="all",
+            notched=True,
+            hoverlabel=dict(font=dict(size=22)),
+            marker=dict(
+                color=df.loc[df["categories"] == yd, "allColors"].describe()["top"],
+                opacity=0,
+            ),
+        ))
+
+    layout = go.Layout(
+        font=dict(
+            size=22
+        ),
+        xaxis=dict(
+            tickangle=52.5
+        ),
+        yaxis=dict(
+            title=yLabel,
+            range=[yMin, yMax],
+            showgrid=True,
+            gridcolor='#f1f3f4',
+            gridwidth=2,
+            zeroline=True,
+            zerolinecolor='#f1f3f4',
+            zerolinewidth=2,
+        ),
+        margin=dict(
+            l=100,
+            r=200,
+            b=250,
+            t=50,
+        ),
+
+        boxmode='group',
+        showlegend=False,
+        legend=dict(font=dict(size=14))
+    )
+
+    fig = go.Figure(data=traces, layout=layout)
+
+    save_fig(fig, figName + "-boxplot-lowRes", 1800, 1200, 1)
+    save_fig(fig, figName + "-boxplot-highRes", 1800, 1200, 4)
+
+def make_static_table(field, figName, filteredDF, nDecimals, return_summaryTable=False):
+
+    # first make an overall table
+    allCounts = filteredDF.groupby(["hashID"])[field].describe()
+    allAgeTable = pd.DataFrame(index=[field])
+    allAgeTable["min"] = allCounts["min"].min()
+    allAgeTable["max"] = allCounts["max"].max()
+    allAgeTable["U"] = len(allCounts)
+    allAgeTable["N"] = allCounts["count"].sum()
+
+    # then make summary per categories
+    uniqueCounts = filteredDF.groupby(["categories"])["hashID"].describe()
+    uniqueCounts.reset_index(inplace=True)
+    summaryTable = filteredDF.groupby("categories")[field].describe()
+    summaryTable.reset_index(inplace=True)
+    summaryTable = pd.merge(summaryTable, uniqueCounts[["categories", "unique"]], how="left", on="categories")
+    summaryTable = pd.merge(summaryTable, catColorDF, how="left", on="categories")
+    summaryTable["unique"] = summaryTable["unique"].astype(float)
+
+    # add in interquartile range
+    summaryTable["IQR"] = summaryTable["75%"] - summaryTable["25%"]
+
+    col_headings = make_bold(["Group", "N", "U", "Average", "Stdev", "Min", "Q1", "Median", "Q3", "Max"])
+
+    trace = go.Table(
+        header=dict(values=col_headings,
+                    fill = dict(color='white'),
+                    align = ['center', 'center', 'center'],
+                    font = dict(color = 'black', size=12)),
+        columnwidth=[1.5, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        cells=dict(values=[make_bold(summaryTable["categories"]),
+                           make_bold_and_round(summaryTable["count"], 0),
+                           make_bold_and_round(summaryTable["unique"], 0),
+                           make_bold_and_round(summaryTable["mean"], nDecimals),
+                           make_bold_and_round(summaryTable["std"], nDecimals),
+                           make_bold_and_round(summaryTable["min"], nDecimals),
+                           make_bold_and_round(summaryTable["25%"], nDecimals),
+                           make_bold_and_round(summaryTable["50%"], nDecimals),
+                           make_bold_and_round(summaryTable["75%"], nDecimals),
+                           make_bold_and_round(summaryTable["max"], nDecimals)],
+                   fill = dict(color = [summaryTable["allColors"]]),
+                   align = ['center', 'center', 'center'],
+                   font = dict(color = 'black', size=10),
+                   height = 20)
+        )
+
+    fig = go.Figure()
+    fig.add_trace(trace)
+
+    pio.write_image(
+        fig,
+        os.path.join(
+            figure_path,
+            figName + "-table-highRes.png"
+        ),
+        width=1200,
+        height=1200,
+        scale=4)
+
+    pio.write_image(
+        fig,
+        os.path.join(
+            figure_path,
+            figName + "-table-lowRes.png"
+        ),
+        width=1200,
+        height=1200,
+        scale=1)
+
+    summaryTable.to_csv(
+        os.path.join(
+            figure_path,
+            figName + "-table.csv"
+        )
+    )
+    allAgeTable.to_csv(
+        os.path.join(
+            figure_path,
+            figName + "-all-age-table.csv"
+        )
+    )
+
+    if return_summaryTable:
+        return summaryTable, allAgeTable
+    else:
+        return
+
+
+def make_lite_interactive_boxplot(field, yLabel, df, yMin, yMax):
+    df.sort_values("categories", inplace=True)
+
+    traces = []
+    for yd in df.categories.unique():
+        yValues = df.loc[df["categories"] == yd, field]
+        yStats = yValues.describe()
+        yMinimum = yStats["min"]
+        yQ1 = yStats["25%"]
+        yQ2 = yStats["50%"]
+        yQ3 = yStats["75%"]
+        yMaximum = yStats["max"]
+        yIQR = yQ3 - yQ1
+        maxWhisker = yIQR * 1.5
+        lowWhiskerBound = yQ1 - maxWhisker
+        highWhiskerBound = yQ3 + maxWhisker
+        yLowerFence = yValues[yValues >= lowWhiskerBound].min()
+        yUpperFence = yValues[yValues <= highWhiskerBound].max()
+        yBoxData = [yMinimum, yLowerFence, yQ1, yQ1, yQ1, yQ1, yQ1,
+                    yQ2, yQ3, yQ3, yQ3, yQ3, yQ3,
+                    yUpperFence, yMaximum]
+
+        # get N and U
+        nDays = df.loc[df["categories"] == yd, "count"].median().astype(int)
+        uniqueDonors = df.loc[df["categories"] == yd, "unique"].median().astype(int)
+
+        traces.append(go.Box(
+            y=yBoxData,
+            jitter=0,
+            pointpos=0,
+            text=list(np.repeat("N=%s, U=%s" % (nDays, uniqueDonors), len(yBoxData))),
+            hoverinfo="y+text",
+            name=yd,
+            boxpoints="all",
+            notched=False,
+            marker=dict(
+                color=df.loc[df["categories"] == yd, "allColors"].describe()["top"],
+                opacity=0,
+            ),
+        ))
+
+    layout = go.Layout(
+        yaxis=dict(
+            title=yLabel,
+            range=[yMin, yMax],
+            showgrid=True,
+            gridcolor='#f1f3f4',
+            gridwidth=2,
+            zeroline=True,
+            zerolinecolor='#f1f3f4',
+            zerolinewidth=2,
+        ),
+        showlegend=True
+    )
+
+    fig = go.Figure(data=traces, layout=layout)
+    plot_url = py.plot(fig, filename="Distribution of " + figName, auto_open=False)
+    print(figName, plot_url)
+
+    return
+
+
+def filter_data(df, min_days_criteria=7):
+
+    # keep all type1 adn null diagnosis data (not specified)
+    df = df[((df.diagnosisType.isnull()) | (df.diagnosisType == "type1"))]
+
+    # filter out invalid ages and ylw
+    df = df[((df.age.astype(float) >= 0) & (df.age.astype(float) <= 90))]
+    df = df[((df.ylw.astype(float) >= 0) & (df.ylw.astype(float) <= 80))]
+
+    # filter out invalid pump and cgm days
+    df = df[((df["validPumpData"]) & (df["validCGMData"]))]
+
+    # filter out Paradigm Veo Pumps
+    df = df[~df["pump.top"].str.contains("Paradigm Veo")]
+
+    # filter out omnipod with mg/dL likely settings
+    df = df[~((df["pump.top"].str.contains("InsOmn-130")) &
+              (df['pumpSettings.isfLikelyUnits'] == "mg/dL"))]
+
+    # require a minimum number of days of data
+    dayGroups = pd.DataFrame(df.groupby(["hashID", "age", "ylw"]).day.count()).reset_index()
+    dayGroups.rename(columns={"day": "nDays"}, inplace=True)
+    df = pd.merge(df, dayGroups, how="left", on=["hashID", "age", "ylw"])
+
+    df = df[df["nDays"] >= min_days_criteria]
+
+    return df
+
+
+def merge_dayData(df, dayDF):
+
+    df = pd.merge(
+        df,
+        dayDF[[
+            "hashID",
+            "day",
+            "validPumpData",
+            "atLeast3Boluses",
+            "validCGMData",
+            "diagnosisType",
+            "pump.top",
+            "pumpSettings.isfLikelyUnits"
+        ]],
+        how="left",
+        on=["hashID", "day"]
+    )
+
+    return df
+
+
+def bin_data(df, ageBins, ageGroupNames, ylwBins, ylwGroupNames, catColorDF, min_unique_donors=10):
+
+    # bin data (defined above)
+    df["ageBins"] = pd.cut(df["age"], ageBins, labels=ageGroupNames)
+    df["ylwBins"] = pd.cut(df["ylw"], ylwBins, labels=ylwGroupNames)
+    df["ageCategories"] = df["ageBins"].astype(str)
+    df["ylwCategories"] = df["ylwBins"].astype(str)
+    df["categories"] = "age " + df["ageBins"].astype(str) + " ylw " + df["ylwBins"].astype(str)
+
+    # attach bin colors (defined above)
+    df = pd.merge(df, catColorDF, how="left", on="categories")
+    df["categories"].astype("category", inplace=True)
+
+    # attach counts per group
+    dGroups = df.groupby("categories")
+    groupDF = dGroups["hashID"].describe()
+    groupDF["ageCategories"] = dGroups["ageCategories"].describe()["top"]
+    groupDF["ylwCategories"] = dGroups["ylwCategories"].describe()["top"]
+    #groupDF["ylwAlpha"] = dGroups["ylwAlpha"].mean()
+    groupDF["allColors"] = dGroups["allColors"].describe()["top"]
+    groupDF.reset_index(inplace=True)
+
+    # attach group counts to the main dataframe
+    df = pd.merge(df, groupDF[["categories", "count", "unique"]], how="left", on="categories")
+
+    # remove all categories that do NOT have at least 10 unique people
+    df = df[df["unique"] > min_unique_donors]
+    groupDF = groupDF[groupDF["unique"] > min_unique_donors]
+
+    # attach N and U to the categories
+    df["categoriesFull"] = (
+        df["categories"].astype(str) +
+        " (N=" + df["count"].astype(str) +
+        ", U=" + df["unique"].astype(str) +  ")"
+    )
+
+    return df, groupDF
+
+
+# %% define age and years living with bins
+group_title = "-withYlw0"
+figure_path = os.path.join(".", "figures")
+
+# next bin the data by age-ylw groups
+dataGroupName = "age-ylw-groups"
+ageBins = np.array([0,5,8,12,17,24,85])
+ylwBins = np.array([-1,0,1,2,5,10,25,75])
+
+# bin by age
+ageGroupNames = []
+for x, y in zip(ageBins[:-1]+1, ageBins[1:]):
+    ageGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}"))
+
+ylwGroupNames = []
+for x, y in zip(ylwBins[:-1]+1, ylwBins[1:]):
+    if x == y:
+        ylwGroupNames.append("%s"%(f"{x:02d}"))
+    else:
+        ylwGroupNames.append("%s-%s"%(f"{x:02d}", f"{y:02d}"))
+
+catColors = [
+    '#f0d8e5','#f4bdd8','#f7a0cc','#f781bf',
+    '#ebc3c1','#f1a095','#f17d6c','#ec5644','#e41a1c',
+    '#f2d8c3','#fbc299','#ffac6f','#ff9746','#ff7f00',
+    '#d0e1cc','#b8d8b2','#9fcd97','#86c37e','#6cb964','#4daf4a',
+    '#c9d6e3','#afc4da','#95b1d2','#7aa0c9','#5b8fc1','#377eb8',
+    '#dacbde','#d0b6d4','#c5a1ca','#ba8dc0','#af78b7','#a464ad','#984ea3'
+]
+
+
+
+finalCategories = [
+        'age 01-05 ylw 00', 'age 01-05 ylw 01', 'age 01-05 ylw 02',
+        'age 01-05 ylw 03-05', 'age 06-08 ylw 00', 'age 06-08 ylw 01',
+        'age 06-08 ylw 02', 'age 06-08 ylw 03-05', 'age 06-08 ylw 06-10',
+        'age 09-12 ylw 00', 'age 09-12 ylw 01', 'age 09-12 ylw 02',
+        'age 09-12 ylw 03-05', 'age 09-12 ylw 06-10', 'age 13-17 ylw 00',
+        'age 13-17 ylw 01', 'age 13-17 ylw 02', 'age 13-17 ylw 03-05',
+        'age 13-17 ylw 06-10', 'age 13-17 ylw 11-25', 'age 18-24 ylw 00',
+        'age 18-24 ylw 01', 'age 18-24 ylw 02', 'age 18-24 ylw 03-05',
+        'age 18-24 ylw 06-10', 'age 18-24 ylw 11-25', 'age 25-85 ylw 00',
+        'age 25-85 ylw 01', 'age 25-85 ylw 02', 'age 25-85 ylw 03-05',
+        'age 25-85 ylw 06-10', 'age 25-85 ylw 11-25',
+        'age 25-85 ylw 26-75'
+]
+
+catColorDF = pd.DataFrame(data=[finalCategories, catColors], index=["categories", "allColors"]).T
+
+
+# %% load in summary donor data
+dataPulledDate = "2019-01-10"
+dataProcessedDate = "2019-01-22"
+
+phiDate = "PHI-" + dataPulledDate
+donorPath = os.path.join("..", "bigdata-processing-pipeline", "data", phiDate + "-donor-data")
+donorList = phiDate + "-uniqueDonorList"
+donors = pd.read_csv(os.path.join(donorPath, donorList + ".csv"), low_memory=False)
+
+
+# %% all-donors summary
+allAgeSummary = pd.DataFrame()
+dataPath = os.path.join(donorPath, "settings-and-events")
+d = pd.read_csv(os.path.join(dataPath, "combined-allMetadata.csv"), low_memory=False)
+
+# attach the donor level data to the
+allMetadata = pd.merge(
+    d,
+    donors[[
+        "hashID",
+        "userID",
+        "diagnosisType",
+        "targetDevices",
+        "targetTimezone",
+        "termsAccepted"
+    ]],
+    how="left",
+    on="hashID"
+)
+allMetadata.to_csv(os.path.join(donorPath, donorList + "-w-metaData.csv"))
+
+
+# %% load data
+dayData = pd.read_csv(os.path.join(dataPath, "combined-dayData.csv"), low_memory=False)
+bolusData = pd.read_csv(os.path.join(dataPath, "combined-bolusEvents.csv"), low_memory=False)
+basalData = pd.read_csv(os.path.join(dataPath, "combined-basalEvents.csv"), low_memory=False)
+
+# %% attach the diagnosis type to the day data
+dayDF = pd.merge(
+    dayData,
+    allMetadata[[
+        "hashID",
+        "diagnosisType",
+        "pump.top",
+        "pumpSettings.isfLikelyUnits"
+    ]],
+    how="left",
+    on="hashID"
+)
+
+dayDF = filter_data(dayDF, min_days_criteria=7)
+dayDF, dayDFGroupSummary = (
+    bin_data(
+        dayDF,
+        ageBins,
+        ageGroupNames,
+        ylwBins,
+        ylwGroupNames,
+        catColorDF,
+        min_unique_donors=10
+    )
+)
+
+
+# %% all-event level summary (max basal and max bolus)
+# attach the day to bolus data and filter data by analysis criteria
+# NOTE: seet the filter_data function for details
+bolus = merge_dayData(bolusData, dayDF)
+bolus = filter_data(bolus, min_days_criteria=7)
+bolus, bolusGroupSummary = (
+    bin_data(
+        bolus,
+        ageBins,
+        ageGroupNames,
+        ylwBins,
+        ylwGroupNames,
+        catColorDF,
+        min_unique_donors=10
+    )
+)
+
+
+# %% overview of bolus data table
+figName = "overviewTable-bolus-events"
+figName = figName + group_title
+trace = go.Table(
+    header=dict(
+        values=make_bold(["AGE-YLW Group",
+                          "Age",
+                          "Years Living with T1D",
+                          "N (Bolus Events)",
+                          "U (Unique Donors)"]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=14)
+    ),
+    cells=dict(
+        values=[make_bold(bolusGroupSummary['categories']),
+                make_bold(bolusGroupSummary['ageCategories']),
+                make_bold(bolusGroupSummary['ylwCategories']),
+                make_bold(bolusGroupSummary['count']),
+                make_bold(bolusGroupSummary['unique'])],
+        fill = dict(color = [bolusGroupSummary["allColors"]]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=11),
+        height = 22
+    ),
+)
+
+fig = go.Figure()
+fig.add_trace(trace)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-highRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=4)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-lowRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=1)
+
+
+# %% max bolus amount ()
+maxBolus = pd.DataFrame(bolus.groupby(["hashID", "day"])["unitsInsulin"].max()).reset_index()
+maxBolus.rename(columns={"unitsInsulin":"maxBolusPerDay"}, inplace=True)
+
+maxBolus = pd.merge(
+    maxBolus,
+    dayDF[[
+        "hashID",
+        "day",
+        "categories",
+        "allColors"
+    ]],
+    how="left",
+    on=["hashID", "day"]
+)
+
+# remove nans in category as they represent data from days that did not meat the
+# acceptable day standard
+maxBolus = maxBolus[maxBolus["categories"].notnull()]
+
+field = 'maxBolusPerDay'
+yLabel = "Max Bolus Per Day (U)"
+figName = "Max Bolus"
+yMin = 0
+yMax = 21
+filteredDF = maxBolus[maxBolus[field] > 0].copy()
+
+## make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+# add N events and n unique donors
+filteredDF = pd.merge(
+    filteredDF,
+    summaryTable[[
+        "categories",
+        "count",
+        "unique"
+    ]],
+    how="left",
+    on="categories"
+)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% basal data
+basal = merge_dayData(basalData, dayDF)
+basal = filter_data(basal, min_days_criteria=7)
+basal, basalGroupSummary = (
+    bin_data(
+        basal,
+        ageBins,
+        ageGroupNames,
+        ylwBins,
+        ylwGroupNames,
+        catColorDF,
+        min_unique_donors=10
+    )
+)
+
+
+# %% overview of basal data table
+figName = "overviewTable-basal-events"
+figName = figName + group_title
+
+trace = go.Table(
+    header=dict(
+        values=make_bold(["AGE-YLW Group",
+                          "Age",
+                          "Years Living with T1D",
+                          "N (Basal Events)",
+                          "U (Unique Donors)"]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=14)
+    ),
+    cells=dict(
+        values=[make_bold(basalGroupSummary['categories']),
+                make_bold(basalGroupSummary['ageCategories']),
+                make_bold(basalGroupSummary['ylwCategories']),
+                make_bold(basalGroupSummary['count']),
+                make_bold(basalGroupSummary['unique'])],
+        fill = dict(color = [basalGroupSummary["allColors"]]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=11),
+        height = 22
+    ),
+)
+
+fig = go.Figure()
+fig.add_trace(trace)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-highRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=4)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-lowRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=1)
+
+
+# %% max basal rate
+maxBasal = pd.DataFrame(basal[basal["type"]=="basal"].groupby(["hashID", "day"])["rate"].max()).reset_index()
+
+maxBasal.rename(columns={"rate":"maxBasalRatePerDay"}, inplace=True)
+
+maxBasal = pd.merge(
+    maxBasal,
+    dayDF[[
+        "hashID",
+        "day",
+        "categories",
+        "allColors"
+    ]],
+    how="left",
+    on=["hashID", "day"]
+)
+
+# remove nans in category as they represent data from days that did not meat the
+# acceptable day standard
+maxBasal = maxBasal[maxBasal["categories"].notnull()]
+
+field = 'maxBasalRatePerDay'
+yLabel = "Max Basal Per Day (U/hr)"
+figName = "Max Basal"
+yMin = 0
+yMax = 3.25
+filteredDF = maxBasal[maxBasal[field] > 0].copy()
+
+## make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+# add N events and n unique donors
+filteredDF = pd.merge(
+    filteredDF,
+    summaryTable[[
+        "categories",
+        "count",
+        "unique"
+    ]],
+    how="left",
+    on="categories"
+)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% overview of day level data table
+figName = "overviewTable-day-data"
+figName = figName + group_title
+
+trace = go.Table(
+    header=dict(
+        values=make_bold(["AGE-YLW Group",
+                          "Age",
+                          "Years Living with T1D",
+                          "N (Days)",
+                          "U (Unique Donors)"]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=14)
+    ),
+    cells=dict(
+        values=[make_bold(dayDFGroupSummary['categories']),
+                make_bold(dayDFGroupSummary['ageCategories']),
+                make_bold(dayDFGroupSummary['ylwCategories']),
+                make_bold(dayDFGroupSummary['count']),
+                make_bold(dayDFGroupSummary['unique'])],
+        fill = dict(color = [dayDFGroupSummary["allColors"]]),
+        align = ['center', 'center', 'center'],
+        font = dict(color = 'black', size=11),
+        height = 22
+    ),
+)
+
+fig = go.Figure()
+fig.add_trace(trace)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-highRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=4)
+
+pio.write_image(
+    fig,
+    os.path.join(
+        figure_path,
+        figName + "-lowRes.png"
+    ),
+    width=1200,
+    height=1200,
+    scale=1)
+
+
+# %% Average ISF per day
+dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1)
+field = 'isfRounded'
+yLabel = "Insulin Sensitivity Factor (mg/dL/U)"
+figName = "Insulin Sensitivity Factor"
+yMin = 0
+yMax = 400
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average CIR per day
+field = 'cir.weightedMean'
+yLabel = "Carb to Insulin Ratio (g/U)"
+figName = "Carb to Insulin Ratio"
+yMin = 0
+yMax = 70
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Correction Target per day
+field = 'ct.target.weightedMean'
+yLabel = "Correction Target (mg/dL)"
+figName = "Correction Target"
+yMin = 70
+yMax = 180
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Basal Rate per day
+field = 'sbr.weightedMean'
+yLabel = "Scheduled Basal Rate (U/hr)"
+figName = "Scheduled Basal Rate"
+yMin = 0
+yMax = 2.5
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 3
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Total Daily Dose
+field = "totalAmountOfInsulin"
+yLabel = "Total Daily Dose (U)"
+figName = "Total Daily Dose"
+yMin = 0
+yMax = 125
+filteredDF = dayDF[dayDF[field] > 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Percent Basal
+dayDF["perecentBasalInPercent"] = dayDF["percentBasal"] * 100
+field = "perecentBasalInPercent"
+yLabel = "Basal Proportion of Total Daily Dose (%)"
+figName = "Basal Proportion of Total Daily Dose"
+yMin = 0
+yMax = 100
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Total Daily Carbs
+field = "totalDailyCarbs"
+yLabel = "Total Daily Carbs (g)"
+figName = "Total Daily Carbs"
+yMin = 0
+yMax = 600
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Daily Time in Range (70-180 mg/dL)
+dayDF["perecentInRange"] = dayDF["cgm.percent70to180"] * 100
+field = "perecentInRange"
+yLabel = "Percent of Day in Targe Range (70-180 mg/dL, %)"
+figName = "Percent of Day in Targe Range 70-180"
+yMin = 0
+yMax = 100
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Mean CGM (mg/dL)
+field = "cgm.mean_mgdL"
+yLabel = "Daily Average CGM Level (mg/dL)"
+figName = "Daily Average CGM Level"
+yMin = 50
+yMax = 300
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Cov CGM (mg/dL)
+dayDF["covPercent"] = dayDF["cgm.cov_mgdL"] * 100
+field = "covPercent"
+yLabel = "Coeffient of Variation (%)"
+figName = "Coeffient of Variation"
+yMin = 6
+yMax = 62
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Daily Time Below 54 (Percentage)
+dayDF["perecentBelow54mgdL"] = dayDF["cgm.percentBelow54"] * 100
+field = "perecentBelow54mgdL"
+yLabel = "Percent of Day Below 54 mg/dL (%)"
+figName = "Percent of Day in Extreme Hypo Below 54 mgdL"
+yMin = 0
+yMax = 5
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 2
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Number of Below 54 mg/dL Episodes per Day
+field = "extreme-hypo.count"
+dayDF[field].fillna(0, inplace=True)
+yLabel = "Number of Extreme Hypo Episodes (Below 54 mg/dL) per Day"
+figName =  "Number of Extreme Hypo Episodes per Day"
+yMin = 0
+yMax = 2
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Duration of each Episode Below 54 mg/dL
+field = "extreme-hypo-durationMinutes.mean"
+yLabel = "Average Duration of each Extreme Hypo Episode (minutes)"
+figName =  "Average Duration of each Extreme Hypo Episode"
+yMin = 15
+yMax = 120
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Daily Time Above 250 (Percentage)
+dayDF["perecentAbove250mgdL"] = dayDF["cgm.percentAbove250"] * 100
+field = "perecentAbove250mgdL"
+yLabel = "Percent of Day Above 250 mg/dL (%)"
+figName = "Percent of Day in Extreme Hyper Above 250 mgdL"
+yMin = 0
+yMax = 75
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 0
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Number of Above 250 mg/dL Episodes per Day
+field = "extreme-hyper.count"
+dayDF[field].fillna(0, inplace=True)
+yLabel = "Number of Extreme Hyper Episodes (Above 250 mg/dL) per Day"
+figName =  "Number of Extreme Hyper Episodes per Day"
+yMin = 0
+yMax = 2
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% Average Duration of each Episode Above 250 mg/dL
+dayDF["avgExtremeHyperHours"] = dayDF["extreme-hyper-durationMinutes.mean"] / 60
+field = "avgExtremeHyperHours"
+yLabel = "Average Duration of each Extreme Hyper Episode (hours)"
+figName =  "Average Duration of each Extreme Hyper Episode"
+yMin = 2
+yMax = 10
+filteredDF = dayDF[dayDF[field] >= 0].copy()
+
+# make/save static summary table
+figName = figName + group_title
+nDecimalPlaces = 1
+summaryTable, allAgeTable = make_static_table(
+    field,
+    figName,
+    filteredDF,
+    nDecimalPlaces,
+    return_summaryTable=True
+)
+
+# add the ageTable to the allAgeSummary
+#allAgeSummary = pd.concat([allAgeSummary, allAgeTable], axis=0)
+
+## make/save static boxplot
+make_static_plot(field, yLabel, figName, filteredDF, yMin, yMax)
+
+# make lite interactive plot
+make_lite_interactive_boxplot(field, yLabel, filteredDF, yMin, yMax)
+
+
+# %% save the all age summaries
+figName = "allAgeSettingSummary" + group_title
+allAgeSummary.to_csv(
+    os.path.join(
+        figure_path,
+        figName + "-all-age-table.csv"
+    )
+)
+
+# %% make a plot of TDD by ISF
+
+# Average ISF per day
+dayDF["isfRounded"] = dayDF['isf.weightedMean'].round(1)
+#field = 'isfRounded'
+#yLabel = "Insulin Sensitivity Factor (mg/dL/U)"
+#figName = "Insulin Sensitivity Factor"
+#yMin = 0
+#yMax = 400
+
+## Total Daily Dose
+#field = "totalAmountOfInsulin"
+#yLabel = "Total Daily Dose (U)"
+#figName = "Total Daily Dose"
+#yMin = 0
+#yMax = 125
+#filteredDF = dayDF[dayDF[field] > 0].copy()
+
+filteredDF = dayDF[((dayDF['isfRounded'] > 0) &
+                    (dayDF['totalAmountOfInsulin'] > 0))].copy()
+
+ylwColors = ["#ffffb2", '#fecc5c', '#fd8d3c', '#f03b20', '#bd0026']
+for f in filteredDF["ylwCategories"].unique():
+    if f == '00':
+        colorCode = 0
+    if f == '01':
+        colorCode = 1
+    if f == '02':
+        colorCode = 2
+    if f == '03-05':
+        colorCode = 3
+    else:
+        colorCode = 4
+
+    filteredDF.loc[filteredDF["ylwCategories"] == f, "ylwColor"] = ylwColors[colorCode]
+
+
+
+from scipy.optimize import curve_fit
+def func(x, a, b, c):
+    return (a * x + b) / (x - 10)
+
+import statsmodels.api as sm
+lowess = sm.nonparametric.lowess
+#a * np.exp(-b*x) + c * np.exp(-d * x)
+#y = a * np.exp(-b * x) + c
+#y = a * np.exp(b*x) + c * np.exp(d * x)
+
+xdata = filteredDF['totalAmountOfInsulin'].round()
+ydata = filteredDF['isfRounded']
+popt, pcov = curve_fit(func, xdata, ydata)
+
+
+x = np.arange(1, 500)
+c = pd.DataFrame(columns=["ISF", "TDD"])
+for xi in x:
+    if sum(filteredDF['isfRounded'] == xi) > 3:
+        c.loc[xi, "ISF"] = xi
+        c.loc[xi, "TDD"] = filteredDF.loc[
+            filteredDF['isfRounded'] == xi,
+            "totalAmountOfInsulin"].median()
+
+asdf2 = c.rolling(25, center=True).mean()
+plt.plot(asdf2["TDD"], asdf2["ISF"])
+
+x = np.arange(1, 300)
+d = pd.DataFrame(columns=["TDD", "ISF"])
+for xi in x:
+    if sum(filteredDF['totalAmountOfInsulin'].round() == xi) > 3:
+        d.loc[xi, "TDD"] = xi
+        d.loc[xi, "ISF"] = filteredDF.loc[
+            filteredDF['totalAmountOfInsulin'].round() == xi,
+            "isfRounded"].median()
+
+# then smooth out the medians
+asdf = d.rolling(10, center=True).mean()
+plt.plot(asdf["TDD"], asdf["ISF"])
+
+
+# try a different approach were we just do a smoothed line
+
+
+z = lowess(ydata, xdata)
+#>>> w = lowess(y, x, frac=1./3)
+plt.plot(z[:,0], z[:,1])
+
+plt.plot(
+    x,
+    func(x, *popt),
+    'r-',
+#    label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)
+)
+
+#df.sort_values("categories", inplace=True)
+
+traces = []
+traces.append(go.Scatter(
+        y=ydata,
+        x=xdata,
+        name="Scatter",
+        mode='markers',
+        marker=dict(
+            color=filteredDF["allColors"],
+            opacity=0.125,
+        ),
+))
+
+#traces.append(go.Scatter(
+#        y=z2[:,0],
+#        x=z2[:,1],
+#        mode='lines',
+#))
+#
+#traces.append(go.Scatter(
+#        y=z[:,1],
+#        x=z[:,0],
+#        mode='lines',
+#        line=dict(
+#            color="black",
+#        ),
+#))
+
+traces.append(go.Scatter(
+        y=asdf["ISF"],
+        x=asdf["TDD"],
+        mode='lines',
+        name="Trend by TDD",
+        line=dict(
+            color="black",
+            dash="dot",
+        ),
+))
+
+traces.append(go.Scatter(
+        y=asdf2["ISF"],
+        x=asdf2["TDD"],
+        mode='lines',
+        name="Trend by ISF",
+        line=dict(
+            color="black",
+            dash="dash",
+        ),
+))
+
+layout = go.Layout(
+    font=dict(
+        size=18
+    ),
+    xaxis=dict(
+        title="TDD",
+        dtick=20,
+        range=[0, 300],
+        showgrid=True,
+        gridcolor='#f1f3f4',
+        gridwidth=2,
+        zeroline=True,
+        zerolinecolor='#f1f3f4',
+        zerolinewidth=2,
+    ),
+    yaxis=dict(
+        title="ISF",
+        dtick=20,
+        range=[0, 500],
+        showgrid=True,
+        gridcolor='#f1f3f4',
+        gridwidth=2,
+        zeroline=True,
+        zerolinecolor='#f1f3f4',
+        zerolinewidth=2,
+    )
+)
+
+fig = go.Figure(data=traces, layout=layout)
+plot(fig)
+
+for yd in df.categories.unique():
+    traces.append(go.Box(
+        y=df.loc[df["categories"] == yd, field].values,
+        x=df.loc[df["categories"] == yd, "categories"].values,
+        name=yd,
+        boxpoints="all",
+        notched=True,
+        hoverlabel=dict(font=dict(size=22)),
+        marker=dict(
+            color=df.loc[df["categories"] == yd, "allColors"].describe()["top"],
+            opacity=0,
+        ),
+    ))
+
+layout = go.Layout(
+    font=dict(
+        size=22
+    ),
+    xaxis=dict(
+        tickangle=52.5
+    ),
+    yaxis=dict(
+        title=yLabel,
+        range=[yMin, yMax],
+        showgrid=True,
+        gridcolor='#f1f3f4',
+        gridwidth=2,
+        zeroline=True,
+        zerolinecolor='#f1f3f4',
+        zerolinewidth=2,
+    ),
+    margin=dict(
+        l=100,
+        r=200,
+        b=250,
+        t=50,
+    ),
+
+    boxmode='group',
+    showlegend=False,
+    legend=dict(font=dict(size=14))
+)
+
+fig = go.Figure(data=traces, layout=layout)
+
+save_fig(fig, figName + "-boxplot-lowRes", 1800, 1200, 1)
+save_fig(fig, figName + "-boxplot-highRes", 1800, 1200, 4)
+
+
+
+
+
+
+
+
+#filteredDF.plot.scatter(y="isfRounded", x="totalAmountOfInsulin", alpha=0.025)
+
+# %% make a plot of TDD by max temp basal rate
+maxBasal = pd.DataFrame(basal[basal["type"]=="basal"].groupby(["hashID", "day"])["rate"].max()).reset_index()
+
+maxBasal.rename(columns={"rate":"maxBasalRatePerDay"}, inplace=True)
+
+maxBasal = pd.merge(
+    maxBasal,
+    dayDF[[
+        "hashID",
+        "day",
+        "categories",
+        "allColors",
+        "totalAmountOfInsulin",
+        'basal.closedLoopDays'
+    ]],
+    how="left",
+    on=["hashID", "day"]
+)
+
+# remove nans in category as they represent data from days that did not meat the
+# acceptable day standard
+#maxBasal = maxBasal[maxBasal["categories"].notnull()]
+
+
+
+
+filteredDF = maxBasal[((maxBasal['totalAmountOfInsulin'] > 0) &
+                    (maxBasal['maxBasalRatePerDay'] > 0))].copy()
+
+
+filteredDF.plot.scatter(y="maxBasalRatePerDay", x="totalAmountOfInsulin", alpha=0.125)
+
diff --git a/tidepool-analysis-tools/tidals/clean/clean.py b/tidepool-analysis-tools/tidals/clean/clean.py
index 9a4f1836..ca61844f 100644
--- a/tidepool-analysis-tools/tidals/clean/clean.py
+++ b/tidepool-analysis-tools/tidals/clean/clean.py
@@ -17,48 +17,66 @@ def remove_duplicates(df, criteriaDF):
 
 
 def round_time(df, timeIntervalMinutes=5, timeField="time",
-               roundedTimeFieldName="roundedTime", verbose=False):
+               roundedTimeFieldName="roundedTime", startWithFirstRecord=True,
+               verbose=False):
+    '''
+    A general purpose round time function that rounds the "time"
+    field to nearest <timeIntervalMinutes> minutes
+    INPUTS:
+        * a dataframe (df) that contains a time field that you want to round
+        * timeIntervalMinutes (defaults to 5 minutes given that most cgms output every 5 minutes)
+        * timeField to round (defaults to the UTC time "time" field)
+        * roundedTimeFieldName is a user specified column name (defaults to roundedTime)
+        * startWithFirstRecord starts the rounding with the first record if True, and the last record if False (defaults to True)
+        * verbose specifies whether the extra columns used to make calculations are returned
+    '''
+
     import pandas as pd
-    # A general purpose round time function that rounds the
-    # "time" field to nearest <timeIntervalMinutes> minutes
-    # INPUTS:
-    #   * a dataframe (df) that contains a time field
-    #   * timeIntervalMinutes defaults to 5 minutes given that most cgms output every 5 minutes
-    #   * timeField defaults to UTC time "time"
-    #   * verbose specifies whether the "TIB" and "TIB_cumsum" columns are returned
-
-    df.sort_values(by=timeField, ascending=True, inplace=True)
+    df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True)
     df.reset_index(drop=True, inplace=True)
 
-    # calculate the time-in-between (TIB) consecutive records
-    t = pd.to_datetime(df.time)
-    t_shift = pd.to_datetime(df.time.shift(1))
-    df["TIB"] = round((t - t_shift).dt.days*(86400/(60 * timeIntervalMinutes)) +
-                      (t - t_shift).dt.seconds/(60 * timeIntervalMinutes)) * timeIntervalMinutes
+    # make sure the time field is in the right form
+    t = pd.to_datetime(df[timeField])
+
+    # calculate the time between consecutive records
+    t_shift = pd.to_datetime(df[timeField].shift(1))
+    df["timeBetweenRecords"] = \
+        round((t - t_shift).dt.days*(86400/(60 * timeIntervalMinutes)) +
+              (t - t_shift).dt.seconds/(60 * timeIntervalMinutes)) * timeIntervalMinutes
 
-    # separate the data into chunks if TIB is greater than <timeIntervalMinutes> minutes
-    # so that rounding process can start over
-    largeGaps = list(df.query("TIB > " + str(timeIntervalMinutes)).index)
+    # separate the data into chunks if timeBetweenRecords is greater than
+    # 2 times the <timeIntervalMinutes> minutes so the rounding process starts over
+    largeGaps = list(df.query("abs(timeBetweenRecords) > " + str(timeIntervalMinutes * 2)).index)
     largeGaps.insert(0, 0)
     largeGaps.append(len(df))
 
-    # loop through each chunk to get the cumulative sum and the rounded time
     for gIndex in range(0, len(largeGaps) - 1):
-
-        df.loc[largeGaps[gIndex], "TIB"] = 0
-
-        df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), "TIB_cumsum"] = \
-            df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), "TIB"].cumsum()
-
-        df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), roundedTimeFieldName] = \
-            pd.to_datetime(df.loc[largeGaps[gIndex], timeField]).round(str(timeIntervalMinutes) + "min") + \
-            pd.to_timedelta(df.loc[largeGaps[gIndex]:(largeGaps[gIndex + 1] - 1), "TIB_cumsum"], unit="m")
-
-    # sort descendingly by time and drop fieldsfields
-    df.sort_values(by=timeField, ascending=False, inplace=True)
+        chunk = t[largeGaps[gIndex]:largeGaps[gIndex+1]]
+        firstRecordChunk = t[largeGaps[gIndex]]
+
+        # calculate the time difference between each time record and the first record
+        df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "minutesFromFirstRecord"] = \
+            (chunk - firstRecordChunk).dt.days*(86400/(60)) + (chunk - firstRecordChunk).dt.seconds/(60)
+
+        # then round to the nearest X Minutes
+        # NOTE: the ".000001" ensures that mulitples of 2:30 always rounds up.
+        df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], "roundedMinutesFromFirstRecord"] = \
+            round((df.loc[largeGaps[gIndex]:largeGaps[gIndex+1],
+                          "minutesFromFirstRecord"] / timeIntervalMinutes) + 0.000001) * (timeIntervalMinutes)
+
+        roundedFirstRecord = (firstRecordChunk + pd.Timedelta("1microseconds")).round(str(timeIntervalMinutes) + "min")
+        df.loc[largeGaps[gIndex]:largeGaps[gIndex+1], roundedTimeFieldName] = \
+            roundedFirstRecord + \
+            pd.to_timedelta(df.loc[largeGaps[gIndex]:largeGaps[gIndex+1],
+                                   "roundedMinutesFromFirstRecord"], unit="m")
+
+    # sort by time and drop fieldsfields
+    df.sort_values(by=timeField, ascending=startWithFirstRecord, inplace=True)
     df.reset_index(drop=True, inplace=True)
     if verbose is False:
-        df.drop(columns=["TIB", "TIB_cumsum"], inplace=True)
+        df.drop(columns=["timeBetweenRecords",
+                         "minutesFromFirstRecord",
+                         "roundedMinutesFromFirstRecord"], inplace=True)
 
     return df