From 67f5cc8598320e036335d0528b9de457fa4f84e0 Mon Sep 17 00:00:00 2001 From: Camellia Magness Date: Thu, 6 Feb 2020 16:25:19 -0500 Subject: [PATCH 1/3] draft of the nuv dark monitor. it doesn't break. not sure if it does anything else useful --- .gitignore | 4 ++ cosmo/monitors/data_models.py | 55 +++++++++++++++++++++ cosmo/monitors/nuv_dark_monitor.py | 77 ++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+) create mode 100644 cosmo/monitors/nuv_dark_monitor.py diff --git a/.gitignore b/.gitignore index 68af242b..c86d478c 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,7 @@ target/ # Finder .DS_Store + +# Misc +testoutputs/ +*.ipynb diff --git a/cosmo/monitors/data_models.py b/cosmo/monitors/data_models.py index 503251ea..7611ca7c 100644 --- a/cosmo/monitors/data_models.py +++ b/cosmo/monitors/data_models.py @@ -1,7 +1,9 @@ import pandas as pd import numpy as np +import os from typing import List +from glob import glob from monitorframe.datamodel import BaseDataModel from peewee import OperationalError @@ -192,3 +194,56 @@ def get_new_data(self): data_results = data_results.replace([np.inf, -np.inf], np.nan).dropna().reset_index(drop=True) return data_results[~data_results.EXPTYPE.str.contains('ACQ|DARK|FLAT')] + + +class NUVDarkDataModel(BaseDataModel): + """Datamodel for NUV Dark files.""" + files_source = FILES_SOURCE + subdir_pattern = '?????' + + def get_new_data(self): + header_request = { + 0: ['ROOTNAME'], + 1: [ + 'EXPSTART', + 'EXPTIME' + ] + } + + table_request = { + 1: [ + 'TIME', + 'XCORR', + 'YCORR' + ], + 3: [ + 'LATITUDE', + 'LONGITUDE' + ] + + } + # TODO: add gross counts, don't need PHA, lat or long right now + + # any special data requests + # TODO: add spt support for temp, sun_lat, sun_long + # TODO: is this a good place to add solar data scraping in the future? + + # this is temporary to find the files from the dark programs until + # we can add the dark files to the monitor_data database + files = [] + program_ids = ['15776/'] + for program in program_ids: + new_files_source = os.path.join(FILES_SOURCE, program) + subfiles = glob(os.path.join(new_files_source, "*corrtag*")) + # TODO: figure out why this wasn't working with find_files() + files += subfiles + + if not files: # No new files + return pd.DataFrame() + + # need to add any other keywords that need to be set + data_results = data_from_exposures(files, + header_request=header_request, + table_request=table_request) + + return data_results diff --git a/cosmo/monitors/nuv_dark_monitor.py b/cosmo/monitors/nuv_dark_monitor.py new file mode 100644 index 00000000..aeb6ff8e --- /dev/null +++ b/cosmo/monitors/nuv_dark_monitor.py @@ -0,0 +1,77 @@ +import numpy as np +import plotly.graph_objs as go +import plotly.express as px +import datetime +import pandas as pd + +from peewee import Model +from monitorframe.monitor import BaseMonitor +from astropy.time import Time +from typing import List, Union + +from .data_models import NUVDarkDataModel +from ..monitor_helpers import fit_line, convert_day_of_year, create_visibility, v2v3 +from .. import SETTINGS + +COS_MONITORING = SETTINGS['output'] + + +class NUVDarkMonitor(BaseMonitor): + data_model = NUVDarkDataModel + # TODO: update docs + # docs = "https://spacetelescope.github.io/cosmo/monitors.html#" + output = COS_MONITORING + + run = 'monthly' + + def get_data(self): + # access data, perform any filtering required for analysis + data = self.model.new_data + dark_rate_column = [] + + xlim = [0, 1024] + ylim = [0, 1024] + + # parallelize, this is going to get bad when looking at a lot of data + for index, row in data.iterrows(): + subdf = pd.DataFrame({ + "TIME": row["TIME"], "XCORR": row["XCORR"], + "YCORR": row["YCORR"] + }) + filtered = subdf.where( + (subdf["XCORR"] > xlim[0]) & (subdf["XCORR"] < xlim[1]) & ( + subdf["YCORR"] > ylim[0]) & ( + subdf["YCORR"] < ylim[1])) + dark_rate = self.calculate_dark_rate(filtered, xlim, ylim) + dark_rate_column.append(dark_rate) + + data["DARK_RATE"] = dark_rate_column + + return data + + def calculate_dark_rate(self, dataframe, xlim, ylim): + # calculate dark rate for one exposure, with a dataframe with TIME, + # XCORR, and YCORR values + + # need to set this somewhere + timestep = 25 + time_bins = dataframe["TIME"][::timestep] + + counts = np.histogram(dataframe["TIME"], bins=time_bins)[0] + npix = float((xlim[1] - xlim[0]) * (ylim[1] - ylim[0])) + dark_rate = np.median(counts / npix / timestep) + # what is going on with the whole histogram, ok to use median? + + return dark_rate + + def track(self): + # track something. perhaps current dark rate? + pass + + def plot(self): + # do some plots + pass + + def store_results(self): + # need to store results if not going in the database + pass From 8b215b096d1665a053318171d4a4ef5be01c6ce1 Mon Sep 17 00:00:00 2001 From: Camellia Magness Date: Tue, 10 Mar 2020 11:15:35 -0400 Subject: [PATCH 2/3] this is NOT FUNCTIONAL but i am pushing it so @jwhite can review. thx. --- cosmo/monitors/data_models.py | 64 ++++++++++++++++++++++++++++-- cosmo/monitors/nuv_dark_monitor.py | 37 ++++++++++++----- 2 files changed, 89 insertions(+), 12 deletions(-) diff --git a/cosmo/monitors/data_models.py b/cosmo/monitors/data_models.py index 7611ca7c..34cd38d0 100644 --- a/cosmo/monitors/data_models.py +++ b/cosmo/monitors/data_models.py @@ -217,12 +217,13 @@ def get_new_data(self): 'YCORR' ], 3: [ + 'TIME', 'LATITUDE', - 'LONGITUDE' + 'LONGITUDE', + 'DARKRATE' ] } - # TODO: add gross counts, don't need PHA, lat or long right now # any special data requests # TODO: add spt support for temp, sun_lat, sun_long @@ -235,7 +236,6 @@ def get_new_data(self): for program in program_ids: new_files_source = os.path.join(FILES_SOURCE, program) subfiles = glob(os.path.join(new_files_source, "*corrtag*")) - # TODO: figure out why this wasn't working with find_files() files += subfiles if not files: # No new files @@ -247,3 +247,61 @@ def get_new_data(self): table_request=table_request) return data_results + + +class CorrtagDataModel(BaseDataModel): + """Datamodel for all NUV Dark files.""" + files_source = FILES_SOURCE + subdir_pattern = '?????' + + def get_new_data(self): + header_request = { + 0: ['ROOTNAME'], + 1: [ + 'EXPSTART', + 'EXPTIME', + 'EXPTYPE', + 'DETECTOR' + ] + } + + table_request = { + 1: [ + 'TIME', + 'XCORR', + 'YCORR' + ], + 3: [ + 'LATITUDE', + 'LONGITUDE' + ] + + } + # TODO: add gross counts, don't need PHA, lat or long right now + + # any special data requests + # TODO: add spt support for temp, sun_lat, sun_long + # TODO: is this a good place to add solar data scraping in the future? + + # this is temporary to find the files from the dark programs until + # we can add the dark files to the monitor_data database + # files = [] + # program_ids = ['15776/'] + # for program in program_ids: + # new_files_source = os.path.join(FILES_SOURCE, program) + # subfiles = glob(os.path.join(new_files_source, "*corrtag*")) + # # TODO: figure out why this wasn't working with find_files() + # files += subfiles + + files = find_files('*corrtag*', data_dir=self.files_source, + subdir_pattern=self.subdir_pattern) + + if not files: # No new files + return pd.DataFrame() + + # need to add any other keywords that need to be set + data_results = data_from_exposures(files, + header_request=header_request, + table_request=table_request) + + return data_results diff --git a/cosmo/monitors/nuv_dark_monitor.py b/cosmo/monitors/nuv_dark_monitor.py index aeb6ff8e..41397952 100644 --- a/cosmo/monitors/nuv_dark_monitor.py +++ b/cosmo/monitors/nuv_dark_monitor.py @@ -28,6 +28,7 @@ def get_data(self): # access data, perform any filtering required for analysis data = self.model.new_data dark_rate_column = [] + dec_year_column = [] xlim = [0, 1024] ylim = [0, 1024] @@ -35,17 +36,21 @@ def get_data(self): # parallelize, this is going to get bad when looking at a lot of data for index, row in data.iterrows(): subdf = pd.DataFrame({ - "TIME": row["TIME"], "XCORR": row["XCORR"], - "YCORR": row["YCORR"] + "EXPSTART": row["EXPSTART"], "TIME": row["TIME"], + "XCORR": row["XCORR"], "YCORR": row["YCORR"], + "TIME_3": row["TIME_3"] }) filtered = subdf.where( (subdf["XCORR"] > xlim[0]) & (subdf["XCORR"] < xlim[1]) & ( subdf["YCORR"] > ylim[0]) & ( subdf["YCORR"] < ylim[1])) - dark_rate = self.calculate_dark_rate(filtered, xlim, ylim) - dark_rate_column.append(dark_rate) + dark_rate_array, dec_year_array = self.calculate_dark_rate( + filtered, xlim, ylim) + dark_rate_column.append(dark_rate_array) + dec_year_column.append(dec_year_array) data["DARK_RATE"] = dark_rate_column + data["DECIMAL_YEAR"] = dec_year_column return data @@ -59,10 +64,23 @@ def calculate_dark_rate(self, dataframe, xlim, ylim): counts = np.histogram(dataframe["TIME"], bins=time_bins)[0] npix = float((xlim[1] - xlim[0]) * (ylim[1] - ylim[0])) - dark_rate = np.median(counts / npix / timestep) - # what is going on with the whole histogram, ok to use median? - - return dark_rate + dark_rate_array = counts / npix / timestep + # save the whole histogram in time bins, and then plot each of them + + # make a decimal year array corresponding to the time bins of the + # dark rates + # do this with the expstart (mjd) and time array from the timeline + # extension + mjd_conversion_factor = 1.15741e-5 + # taking the expstart, binning the time array by the timestep, + # removing the last element in the array (bin by front edge), + # and then multiplying by the conversion factor + mjd_array = dataframe["EXPSTART"] + dataframe["TIME_3"][::timestep][ + :-1] * mjd_conversion_factor + mjd_array = Time(mjd_array, format='mjd') + dec_year_array = mjd_array.decimalyear + + return dark_rate_array, dec_year_array def track(self): # track something. perhaps current dark rate? @@ -70,8 +88,9 @@ def track(self): def plot(self): # do some plots + pass def store_results(self): # need to store results if not going in the database - pass + pass \ No newline at end of file From 57ef9d20fdf8c994e2b99fb5da0f075ab7fd404f Mon Sep 17 00:00:00 2001 From: Camellia Magness Date: Mon, 16 Mar 2020 11:12:53 -0400 Subject: [PATCH 3/3] this is a disaster and i need help to figure out why the rates are being calculated wrong --- cosmo/monitors/data_models.py | 24 ++-- cosmo/monitors/nuv_dark_monitor.py | 174 +++++++++++++++++++++++++---- 2 files changed, 161 insertions(+), 37 deletions(-) diff --git a/cosmo/monitors/data_models.py b/cosmo/monitors/data_models.py index 34cd38d0..259a747b 100644 --- a/cosmo/monitors/data_models.py +++ b/cosmo/monitors/data_models.py @@ -204,25 +204,12 @@ class NUVDarkDataModel(BaseDataModel): def get_new_data(self): header_request = { 0: ['ROOTNAME'], - 1: [ - 'EXPSTART', - 'EXPTIME' - ] + 1: ['EXPSTART','EXPTIME'] } table_request = { - 1: [ - 'TIME', - 'XCORR', - 'YCORR' - ], - 3: [ - 'TIME', - 'LATITUDE', - 'LONGITUDE', - 'DARKRATE' - ] - + 1: ['TIME','XCORR','YCORR'], + 3: ['TIME','LATITUDE','LONGITUDE','DARKRATE'] } # any special data requests @@ -232,7 +219,10 @@ def get_new_data(self): # this is temporary to find the files from the dark programs until # we can add the dark files to the monitor_data database files = [] - program_ids = ['15776/'] + # 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17 + program_ids = ['15776/', '15538/', '14942/', '14521/', '14442/', '13974/', + '13528/', '13126/', '12720/', '12420/', '11894/'] + # program_ids = ['15776/'] for program in program_ids: new_files_source = os.path.join(FILES_SOURCE, program) subfiles = glob(os.path.join(new_files_source, "*corrtag*")) diff --git a/cosmo/monitors/nuv_dark_monitor.py b/cosmo/monitors/nuv_dark_monitor.py index 41397952..8634c87d 100644 --- a/cosmo/monitors/nuv_dark_monitor.py +++ b/cosmo/monitors/nuv_dark_monitor.py @@ -13,6 +13,23 @@ from ..monitor_helpers import fit_line, convert_day_of_year, create_visibility, v2v3 from .. import SETTINGS +import os +import yaml +import cosmo +import numpy +import itertools +from glob import glob +from astropy.io import fits +import matplotlib.pyplot as plt +from matplotlib.ticker import FormatStrFormatter +from cosmo.monitors.data_models import NUVDarkDataModel +from cosmo.filesystem import find_files, data_from_exposures, data_from_jitters +from cosmo.monitor_helpers import absolute_time, explode_df +from monitorframe.datamodel import BaseDataModel +from monitorframe.monitor import BaseMonitor + +# these imports are so messy i will fix this later + COS_MONITORING = SETTINGS['output'] @@ -36,22 +53,34 @@ def get_data(self): # parallelize, this is going to get bad when looking at a lot of data for index, row in data.iterrows(): subdf = pd.DataFrame({ - "EXPSTART": row["EXPSTART"], "TIME": row["TIME"], - "XCORR": row["XCORR"], "YCORR": row["YCORR"], - "TIME_3": row["TIME_3"] + "EXPSTART": row["EXPSTART"], "TIME": [row["TIME"]], + "XCORR": [row["XCORR"]], "YCORR": [row["YCORR"]], + "TIME_3": [row["TIME_3"]] }) - filtered = subdf.where( - (subdf["XCORR"] > xlim[0]) & (subdf["XCORR"] < xlim[1]) & ( - subdf["YCORR"] > ylim[0]) & ( - subdf["YCORR"] < ylim[1])) - dark_rate_array, dec_year_array = self.calculate_dark_rate( - filtered, xlim, ylim) + + # this is temporary until i understand james' suggestion to use + # df.apply and lambda functions + xcorr = subdf["XCORR"][0] + ycorr = subdf["YCORR"][0] + filtered_xcorr = xcorr[ + np.where((xcorr > xlim[0]) & (xcorr < xlim[1]))] + filtered_ycorr = ycorr[ + np.where((ycorr > ylim[0]) & (ycorr < ylim[1]))] + subdf["XCORR"] = [filtered_xcorr] + subdf["YCORR"] = [filtered_ycorr] + + dark_rate_array, dec_year_array = self.calculate_dark_rate(subdf, + xlim, + ylim) dark_rate_column.append(dark_rate_array) dec_year_column.append(dec_year_array) data["DARK_RATE"] = dark_rate_column data["DECIMAL_YEAR"] = dec_year_column + # when the monitor method of the monitor is called, it will + # initialize the self.data attribute + # with this method and then can be used by the other methods return data def calculate_dark_rate(self, dataframe, xlim, ylim): @@ -60,9 +89,9 @@ def calculate_dark_rate(self, dataframe, xlim, ylim): # need to set this somewhere timestep = 25 - time_bins = dataframe["TIME"][::timestep] + time_bins = dataframe["TIME_3"][0][::timestep] - counts = np.histogram(dataframe["TIME"], bins=time_bins)[0] + counts = np.histogram(dataframe["TIME_3"][0], bins=time_bins)[0] npix = float((xlim[1] - xlim[0]) * (ylim[1] - ylim[0])) dark_rate_array = counts / npix / timestep # save the whole histogram in time bins, and then plot each of them @@ -71,26 +100,131 @@ def calculate_dark_rate(self, dataframe, xlim, ylim): # dark rates # do this with the expstart (mjd) and time array from the timeline # extension - mjd_conversion_factor = 1.15741e-5 # taking the expstart, binning the time array by the timestep, # removing the last element in the array (bin by front edge), # and then multiplying by the conversion factor - mjd_array = dataframe["EXPSTART"] + dataframe["TIME_3"][::timestep][ - :-1] * mjd_conversion_factor - mjd_array = Time(mjd_array, format='mjd') + # this is done by the absolute_time helper function + mjd_array = absolute_time(expstart=dataframe['EXPSTART'][0], + time=dataframe['TIME_3'][0][::timestep][:-1]) dec_year_array = mjd_array.decimalyear return dark_rate_array, dec_year_array def track(self): # track something. perhaps current dark rate? - pass + if self.data is None: + self.data = self.get_data() + + plotdf = pd.DataFrame({ + "DECIMAL_YEAR": self.data["DECIMAL_YEAR"], + "DARK_RATE": self.data["DARK_RATE"] + }) + + # i can't get explode_df to work so this is for now + # i think i know why it doesn't work and i fixed it i just haven't + # switched to using it + all_dec_year = [] + all_dark_rates = [] + for index, row in plotdf.iterrows(): + all_dec_year = list( + itertools.chain(all_dec_year, row["DECIMAL_YEAR"])) + all_dark_rates = list( + itertools.chain(all_dark_rates, row["DARK_RATE"])) + + dark_counts = np.asarray(all_dark_rates) + fig = plt.figure(figsize=(12, 9)) + bin_size = 1e-8 + n_bins = int((dark_counts.max() - dark_counts.min()) / bin_size) + ax = fig.add_subplot(2, 1, 1) + ax.hist(dark_counts, bins=n_bins, align='mid', histtype='stepfilled') + counts, bins = np.histogram(dark_counts, bins=100) + cuml_dist = np.cumsum(counts) + count_99 = abs(cuml_dist / float(cuml_dist.max()) - .99).argmin() + count_95 = abs(cuml_dist / float(cuml_dist.max()) - .95).argmin() + + mean = dark_counts.mean() + med = np.median(dark_counts) + std = dark_counts.std() + mean_obj = ax.axvline(x=mean, lw=2, ls='--', color='r', label='Mean ') + med_obj = ax.axvline(x=med, lw=2, ls='-', color='r', label='Median') + two_sig = ax.axvline(x=med + (2 * std), lw=2, ls='-', color='gold') + three_sig = ax.axvline(x=med + (3 * std), lw=2, ls='-', + color='DarkOrange') + dist_95 = ax.axvline(x=bins[count_95], lw=2, ls='-', + color='LightGreen') + dist_99 = ax.axvline(x=bins[count_99], lw=2, ls='-', color='DarkGreen') + + ax.grid(True, which='both') + ax.set_title('Histogram of Dark Rates', fontsize=15, fontweight='bold') + ax.set_ylabel('Frequency', fontsize=15, fontweight='bold') + ax.set_xlabel('Counts/pix/sec', fontsize=15, fontweight='bold') + ax.set_xlim(dark_counts.min(), dark_counts.max()) + ax.xaxis.set_major_formatter(FormatStrFormatter('%3.2e')) + + ax = fig.add_subplot(2, 1, 2) + # log_bins = np.logspace(np.log10(dark.min()), np.log10(dark.max()), + # 100) + ax.hist(dark_counts, bins=n_bins, align='mid', log=True, + histtype='stepfilled') + + ax.axvline(x=mean, lw=2, ls='--', color='r', label='Mean') + ax.axvline(x=med, lw=2, ls='-', color='r', label='Median') + ax.axvline(x=med + (2 * std), lw=2, ls='-', color='gold') + ax.axvline(x=med + (3 * std), lw=2, ls='-', color='DarkOrange') + ax.axvline(x=bins[count_95], lw=2, ls='-', color='LightGreen') + ax.axvline(x=bins[count_99], lw=2, ls='-', color='DarkGreen') + + # ax.set_xscale('log') + ax.grid(True, which='both') + ax.set_ylabel('Log Frequency', fontsize=15, fontweight='bold') + ax.set_xlabel('Counts/pix/sec', fontsize=15, fontweight='bold') + ax.set_xlim(dark_counts.min(), dark_counts.max()) + ax.xaxis.set_major_formatter(FormatStrFormatter('%3.2e')) + + fig.legend([med_obj, mean_obj, two_sig, three_sig, dist_95, dist_99], + ['Median: {0:.2e}'.format(med), + 'Mean: {0:.2e}'.format(mean), + r'2$\sigma$: {0:.2e}'.format(med + (2 * std)), + r'3$\sigma$: {0:.2e}'.format(med + (3 * std)), + r'95$\%$: {0:.2e}'.format(bins[count_95]), + r'99$\%$: {0:.2e}'.format(bins[count_99])], shadow=True, + numpoints=1, bbox_to_anchor=[0.8, 0.8]) def plot(self): - # do some plots - - pass + # select the important columns from the dataframe + if self.data is None: + self.data = self.get_data() + + plotdf = pd.DataFrame({ + "DECIMAL_YEAR": self.data["DECIMAL_YEAR"], + "DARK_RATE": self.data["DARK_RATE"] + }) + + # i can't get explode_df to work so this is for now + all_dec_year = [] + all_dark_rates = [] + for index, row in plotdf.iterrows(): + all_dec_year = list( + itertools.chain(all_dec_year, row["DECIMAL_YEAR"])) + all_dark_rates = list( + itertools.chain(all_dark_rates, row["DARK_RATE"])) + + # not sure what is happening here tbh, still figuring it out + # self.x = all_dec_year + # self.y = all_dark_rates + + # self.basic_scatter() + + fig = plt.figure(figsize=(12, 9)) + plt.scatter(all_dec_year, all_dark_rates) + # plt.xlim(min(all_dec_year), max(all_dec_year)) + plt.ylim(0, max(all_dark_rates) + 0.5e-7) + plt.ticklabel_format(axis='y', style='sci', scilimits=(-2, 2)) + plt.ticklabel_format(axis='x', style='plain') + plt.xlabel("Decimal Year") + plt.ylabel("Dark Rate (c/p/s)") + plt.grid(True) def store_results(self): # need to store results if not going in the database - pass \ No newline at end of file + pass