From f1a5b6b6cabc69e5659551c3cd9c36a60f6138c1 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Thu, 16 May 2024 22:15:08 -0700 Subject: [PATCH 01/21] Change the name 'metric_set' to 'metric_subsets' --- rubin_sim/maf/run_comparison/archive.py | 509 +++++++++--------- rubin_sim/maf/run_comparison/summary_plots.py | 290 +++++++--- tests/maf/test_archive.py | 69 +-- tests/maf/test_summary_plots.py | 20 +- 4 files changed, 521 insertions(+), 367 deletions(-) diff --git a/rubin_sim/maf/run_comparison/archive.py b/rubin_sim/maf/run_comparison/archive.py index ab92d7d66..dcb57a04e 100644 --- a/rubin_sim/maf/run_comparison/archive.py +++ b/rubin_sim/maf/run_comparison/archive.py @@ -2,15 +2,15 @@ """ __all__ = ( + "get_metric_subsets", + "create_metric_subset", + "write_metric_subsets", + "get_metric_summaries", "get_runs", "get_family_runs", "download_runs", - "get_metric_sets", - "get_metric_summaries", "get_family_descriptions", "describe_families", - "create_metric_set_df", - "write_metric_sets", ) @@ -28,6 +28,8 @@ except ModuleNotFoundError: pass +from rubin_sim.data import get_data_dir + from .summary_plots import plot_run_metric FAMILY_SOURCE = os.environ.get( @@ -35,14 +37,9 @@ "https://raw.githubusercontent.com/lsst-pst/survey_strategy/main/fbs_2.0/runs_v2.2.json", ) -METRIC_SET_SOURCE = os.environ.get( - "RUBIN_SIM_METRIC_SET_SOURCE", - "https://raw.githubusercontent.com/lsst-pst/survey_strategy/main/fbs_2.0/metric_sets.json", -) - SUMMARY_SOURCE = os.environ.get( "RUBIN_SIM_SUMMARY_SOURCE", - "https://raw.githubusercontent.com/lsst-pst/survey_strategy/main/fbs_2.0/summary_2023_01_01.csv", + "https://s3df.slac.stanford.edu/data/rubin/sim-data/sims_featureScheduler_runs3.4/maf/summary.h5", ) if os.uname().nodename.endswith(".datalab.noao.edu"): @@ -54,6 +51,239 @@ BY_RUN_COLS = ["run", "brief", "filepath", "url"] +def get_metric_subsets(metric_subset_source=None): + """Get metadata on named subsets of related metrics. + + Parameters + ---------- + metric_subset_source : `str` or None + File name or URL for the json file from which to load the data. + If it is set to `None`, the data is loaded from `metric_subsets.json` + in the $rubin_sim_data.maf directory. + + Returns + ------- + metric_subsets : `pandas.DataFrame` + ``metric_subset`` + The 1st level of the index is the name of a subset of metrics + (`str`). + ``metric`` + The 2nd level of the index is the full name of the metric + (`str`). + ``metric`` + The full name of the metric (`str`). + ``short_name`` + An abbreviated name for the metric (`str`).. + ``style`` + The ``matplotlib`` linestyle suggested for plots of the + metric (`str`). + ``invert`` + When normalizing, invert the metric value first? (`bool`) + ``mag`` + Is the value an (astronomical) magnitude? (`bool`) + """ + if metric_subset_source is None: + metric_subset_source = os.path.join(get_data_dir(), "maf", "metric_subsets.json") + if isinstance(metric_subset_source, pd.DataFrame): + metric_subsets = metric_subset_source + else: + metric_subsets = ( + pd.read_json(metric_subset_source) + .set_index("metric subset") + .set_index("metric", append=True, drop=False) + ) + return metric_subsets + + +def create_metric_subset( + metric_subset_name, + metrics, + short_name=None, + style="-", + invert=False, + mag=False, +): + """Create a DataFrame that defines a metric subset. + + Parameters + ---------- + metric_subset_name : `str` + The name of the new metric subset. + metrics : `list` [`str`] + A list of metric names in the subset. + short_name : `list` [`str`], optional + A list of shorter metric names, by default None + style : `list` [`str`], optional + The matplotlib line style symbol for lines representing the metric, + by default "-" + invert : `list` [`bool`], optional + Are smaller values of the metric better, such as for errors?, + by default False + mag : `list` [`bool`], optional + Is the metric an astronomical magnitude?, by default False + + Returns + ------- + metric_subset : `pandas.DataFrame` + A table of metrics and normalization and plotting flags defining the + content of a metric subset. + """ + if short_name is None: + short_name = metrics + + metric_subset = ( + pd.DataFrame( + { + "metric subset": metric_subset_name, + "metric": metrics, + "short_name": short_name, + "style": style, + "invert": invert, + "mag": mag, + } + ) + .set_index("metric subset") + .set_index("metric", append=True, drop=False) + ) + + return metric_subset + + +def write_metric_subsets(metric_subset_file, metric_subsets): + """Write an updated metric_subset dataframe to disk. + + Parameters + ---------- + metric_subset_file : `str` + Output file name. + metric_subsets : `pandas.DataFrame` + Metric_subset dataframe, as defined in get_metric_subsets + """ + tmp = metric_subsets.reset_index("metric subset") + tmp.to_json(metric_subset_file, orient="records", indent=2) + + +def get_metric_summaries( + run_families=tuple(), + metric_subsets=tuple(), + runs=tuple(), + metrics=tuple(), + summary_source=None, + runs_source=None, + metric_subset_source=None, + run_order="family", + metric_order="summary", +): + """Get summary metric values for a set of runs and metrics. + + Parameters + ---------- + run_families : iterable [`str`] + Families of runs to include in the summary. + metric_subsets : iterable [`str`] + subsets of metrics to include in the summary. + runs : iterable [`str`] + Runs to include in the summary (in addition to any that are part + of families included in ``run_families``). + metrics : iterable [`str`] + Metrics to include in the summary (in addition to any that are + part of subsets included in ``metric_subsets``). + summary_source : `str` or `pandas.DataFrame` + File name or URL for the file from which to load the data. + If the supplied value is a `pandas.DataFrame`, it the table + returned will be a subset of this supplied table. + run_source : `pandas.DataFrame` or `str` + Either a `pandas.DataFrame` of runs metadata (as returned by + `archive.get_runs`), or a file name or URL for the json file + from which to load the run metadata. + If it is set to `None`, the data is loaded from the URL specified + by the `archive.RUNS_SOURCE` constant. + metric_subset_source : `pandas.DataFrame` or `str` + Either a `pandas.DataFrame` of metric subset specifications + (as returned by `archive.get_metric_subsets`) or a + file name for the json file from which to load the data. + run_order : `str` + Sort runs according to family definition ("family") or summary file + ("summary") order. + metric_order : `str` + Sort metrics according to subset definition ("subset") or summary file + ("summary") order. + + Returns + ------- + summaries : `pandas.DataFrame` + Metric summary values are returned in a `pandas.DataFrame`, with + each column providing the metrics for one run, and each row the + values for one metric. The metric names constitute the index, and + the column names are the canonical run names. + + Note + ---- + The entire summary statistic values for all of the runs and metrics can + be downloaded from the default sources first, by simply calling + .. code-block:: python + summary = get_metric_summaries() + + Then, you can use `get_metric_summaries` to get a subset without + redownloading the whole set by passing `summary_source=summary`. If you are + selecting multiple subsets of the summary, this avoids needing + to download the summary data multiple times. + """ + summary_source = SUMMARY_SOURCE if summary_source is None else summary_source + + runs = list(runs) + metrics = list(metrics) + + if isinstance(run_families, str): + run_families = [run_families] + + if isinstance(metric_subsets, str): + metric_subsets = [metric_subsets] + + if isinstance(summary_source, pd.DataFrame): + all_summaries = summary_source + else: + try: + all_summaries = pd.read_csv(summary_source, index_col=0, low_memory=False) + except UnicodeDecodeError: + # then this was probably the h5 file instead + all_summaries = pd.read_hdf(summary_source) + all_summaries.index.name = "OpsimRun" + + if len(run_families) > 0: + families = get_family_runs(runs_source) + for run_family in run_families: + runs.extend(pd.Series(families.loc[run_family, "run"]).tolist()) + + if len(metric_subsets) > 0: + metric_subset_df = get_metric_subsets(metric_subset_source) + for metric_subset in metric_subsets: + metrics.extend(list(metric_subset_df.loc[metric_subset, "metric"])) + + if len(runs) == 0: + runs = slice(None) + else: + if run_order == "summary": + runs = [r for r in all_summaries.index if r in runs] + + if len(metrics) == 0: + metrics = slice(None) + else: + requested_metrics = copy.copy(metrics) + for metric in requested_metrics: + if metric not in all_summaries.columns: + warnings.warn(f'Metric "{metric}" not in summary, skipping') + metrics.remove(metric) + + if metric_order == "summary": + metrics = [m for m in all_summaries.columns if m in metrics] + + summaries = all_summaries.loc[runs, metrics] + summaries.columns.name = "metric" + summaries.index.name = "run" + return summaries + + def get_family_runs(run_source=None): """Load a data frame that supplies run names for each run family @@ -223,186 +453,6 @@ def download_runs(runs, dest_dir=None, runs_source=None, clobber=False): return dest_fnames -def get_metric_sets(metric_set_source=METRIC_SET_SOURCE): - """Get metadata on named sets of related metrics. - - Parameters - ---------- - metric_set_source : `str` - File name or URL for the json file from which to load the data. - If it is set to `None`, the data is loaded from the URL specified - by the `archive.METRIC_SET_SOURCE` constant. - - Returns - ------- - metric_sets : `pandas.DataFrame` - ``metric_set`` - The 1st level of the index is the name of a set of metrics (`str`). - ``metric`` - The 2nd level of the index is the full name of the metric (`str`). - ``metric`` - The full name of the metric (`str`). - ``short_name`` - An abbreviated name for the metric (`str`).. - ``style`` - The ``matplotlib`` linestyle suggested for plots of the - metric (`str`). - ``invert`` - When normalizing, invert the metric value first? (`bool`) - ``mag`` - Is the value an (astronomical) magnitude? (`bool`) - """ - metric_set_source = METRIC_SET_SOURCE if metric_set_source is None else metric_set_source - if isinstance(metric_set_source, pd.DataFrame): - metric_sets = metric_set_source - else: - metric_sets = ( - pd.read_json(metric_set_source) - .set_index("metric set") - .set_index("metric", append=True, drop=False) - ) - return metric_sets - - -def write_metric_sets(metric_set_file, metric_sets): - """Write an updated metric_set dataframe to disk - - Parameters - ---------- - metric_set_file : `str` - Output file name. - metric_sets : `pandas.DataFrame` - Metric_set dataframe, as defined in get_metric_sets - """ - tmp = metric_sets.reset_index("metric set") - tmp.to_json(metric_set_file, orient="records", indent=2) - - -def get_metric_summaries( - run_families=tuple(), - metric_sets=tuple(), - runs=tuple(), - metrics=tuple(), - summary_source=None, - runs_source=None, - metric_set_source=None, - run_order="family", - metric_order="summary", -): - """Get summary metric values for a set of runs and metrics. - - Parameters - ---------- - run_families : iterable [`str`] - Families of runs to include in the summary. - metric_sets : iterable [`str`] - Sets of metrics to include in the summary. - runs : iterable [`str`] - Runs to include in the summary (in addition to any that are part - of families included in ``run_families``). - metrics : iterable [`str`] - Metrics to include in the summary (in addition to any that are - part of sets included in ``metric_sets``). - summary_source : `str` or `pandas.DataFrame` - File name or URL for the file from which to load the data. - If it is set to `None`, the data is loaded from the URL specified - by the `archive.METRIC_SET_SOURCE` constant. - If the supplied value is a `pandas.DataFrame`, it the table - returned will be a subset of this supplied table. - run_source : `pandas.DataFrame` or `str` - Either a `pandas.DataFrame` of runs metadata (as returned by - `archive.get_runs`), or a file name or URL for the json file - from which to load the run metadata. - If it is set to `None`, the data is loaded from the URL specified - by the `archive.RUNS_SOURCE` constant. - metric_set_source : `pandas.DataFrame` or `str` - Either a `pandas.DataFrame` of metric set specifications - (as returned by `archive.get_metric_sets`) or a - file name or URL for the json file from which to load the data. - If it is set to `None`, the data is loaded from the URL specified - by the `archive.SUMMARY_SOURCE` constant. - run_order : `str` - Sort runs according to family definition ("family") or summary file - ("summary") order. - metric_order : `str` - Sort metrics according to set definition ("set") or summary file - ("summary") order. - - Returns - ------- - summaries : `pandas.DataFrame` - Metric summary values are returned in a `pandas.DataFrame`, with - each column providing the metrics for one run, and each row the - values for one metric. The metric names constitute the index, and - the column names are the canonical run names. - - Note - ---- - The entire summary statistic values for all of the runs and metrics can - be downloaded from the default sources first, by simply calling - .. code-block:: python - summary = get_metric_summaries() - - Then, you can use `get_metric_summaries` to get a subset without - redownloading the whole set by passing `summary_source=summary`. If you are - selecting multiple subsets of the summary, this avoids needing - to download the summary data multiple times. - """ - summary_source = SUMMARY_SOURCE if summary_source is None else summary_source - - runs = list(runs) - metrics = list(metrics) - - if isinstance(run_families, str): - run_families = [run_families] - - if isinstance(metric_sets, str): - metric_sets = [metric_sets] - - if isinstance(summary_source, pd.DataFrame): - all_summaries = summary_source - else: - try: - all_summaries = pd.read_csv(summary_source, index_col=0, low_memory=False) - except UnicodeDecodeError: - # then this was probably the h5 file instead - all_summaries = pd.read_hdf(summary_source) - all_summaries.index.name = "OpsimRun" - - if len(run_families) > 0: - families = get_family_runs(runs_source) - for run_family in run_families: - runs.extend(pd.Series(families.loc[run_family, "run"]).tolist()) - - if len(metric_sets) > 0: - metric_set_df = get_metric_sets(metric_set_source) - for metric_set in metric_sets: - metrics.extend(list(metric_set_df.loc[metric_set, "metric"])) - - if len(runs) == 0: - runs = slice(None) - else: - if run_order == "summary": - runs = [r for r in all_summaries.index if r in runs] - - if len(metrics) == 0: - metrics = slice(None) - else: - requested_metrics = copy.copy(metrics) - for metric in requested_metrics: - if metric not in all_summaries.columns: - warnings.warn(f'Metric "{metric}" not in summary, skipping') - metrics.remove(metric) - - if metric_order == "summary": - metrics = [m for m in all_summaries.columns if m in metrics] - - summaries = all_summaries.loc[runs, metrics] - summaries.columns.name = "metric" - summaries.index.name = "run" - return summaries - - def get_family_descriptions(family_source=None): """Get description of families or funs. @@ -435,8 +485,8 @@ def get_family_descriptions(family_source=None): def describe_families( families, summary=None, - table_metric_set=None, - plot_metric_set=None, + table_metric_subset=None, + plot_metric_subset=None, baseline_run=None, round_table=2, ): @@ -448,13 +498,13 @@ def describe_families( Data family descriptions as returned by get_family_descriptions. summary : `pandas.DataFrame` Summary metrics for each run, as returned by get_metric_summaries. - table_metric_set : `pandas.DataFrame` + table_metric_subset : `pandas.DataFrame` Metadata on metrics to be included in the table, with columns and - index as returned by get_metric_sets. None if no metrics should be + index as returned by get_metric_subsets. None if no metrics should be included in the table. - plot_metric_set : `pandas.DataFrame` + plot_metric_subset : `pandas.DataFrame` Metadata on metrics to be included in the plot, with columns and - index as returned by get_metric_sets. None if no plot should be + index as returned by get_metric_subsets. None if no plot should be made. baseline_run : `str` The name of the run to use to normalize metrics in the plot. @@ -490,9 +540,9 @@ def describe_families( these_runs = family_runs.loc[[family_name], :] if summary is not None: - if table_metric_set is not None: - table_metric_summary = summary.loc[these_runs["run"], table_metric_set["metric"]] - table_metric_summary.rename(table_metric_set["short_name"], axis=1, inplace=True) + if table_metric_subset is not None: + table_metric_summary = summary.loc[these_runs["run"], table_metric_subset["metric"]] + table_metric_summary.rename(table_metric_subset["short_name"], axis=1, inplace=True) if round_table is not None: table_metric_summary = table_metric_summary.round(round_table) else: @@ -514,15 +564,15 @@ def describe_families( print(description) print(these_runs.set_index("run")) - if plot_metric_set is not None: + if plot_metric_subset is not None: these_runs = family_runs["run"].values if baseline_run is not None and baseline_run not in these_runs: these_runs = np.concatenate([[baseline_run], these_runs]) - these_metrics = [m for m in plot_metric_set["metric"] if m in summary.columns] + these_metrics = [m for m in plot_metric_subset["metric"] if m in summary.columns] fig, ax = plot_run_metric( # pylint: disable=invalid-name summary.loc[these_runs, these_metrics], - metric_set=plot_metric_set, - metric_label_map=plot_metric_set["short_name"], + metric_subset=plot_metric_subset, + metric_label_map=plot_metric_subset["short_name"], baseline_run=baseline_run, vertical_quantity="value", horizontal_quantity="run", @@ -531,56 +581,3 @@ def describe_families( fig, ax = None, None # pylint: disable=invalid-name return fig, ax - - -def create_metric_set_df( - metric_set, - metrics, - short_name=None, - style="-", - invert=False, - mag=False, -): - """Create a DataFrame that defines a metric set. - - Parameters - ---------- - metric_set : `str` - The name of a metric set. - metrics : `list` [`str`] - A list of metric names in the set. - short_name : `list` [`str`], optional - A list of shorter metric names, by default None - style : `list` [`str`], optional - The matplotlib line style symbol for lines representing the metric, - by default "-" - invert : `list` [`bool`], optional - Are smaller values of the metric better, such as for errors?, by default False - mag : `list` [`bool`], optional - Is the metric an astronomical magnitude?, by default False - - Returns - ------- - metric_set : `pandas.DataFrame` - A table of metrics and normalization and plotting flags defining the - content of a metric set. - """ - if short_name is None: - short_name = metrics - - metric_set = ( - pd.DataFrame( - { - "metric set": metric_set, - "metric": metrics, - "short_name": short_name, - "style": style, - "invert": invert, - "mag": mag, - } - ) - .set_index("metric set") - .set_index("metric", append=True, drop=False) - ) - - return metric_set diff --git a/rubin_sim/maf/run_comparison/summary_plots.py b/rubin_sim/maf/run_comparison/summary_plots.py index e9dd6d122..54cc60e56 100644 --- a/rubin_sim/maf/run_comparison/summary_plots.py +++ b/rubin_sim/maf/run_comparison/summary_plots.py @@ -5,6 +5,7 @@ "normalize_metric_summaries", "plot_run_metric", "plot_run_metric_mesh", + "plot_run_metric_uncert", "find_family_lines", ) @@ -30,7 +31,7 @@ def normalize_metric_summaries( baseline_run, summary, - metric_sets=None, + metric_subsets=None, ): """Create a normalized `pandas.DataFrame` of metric summary values. @@ -38,11 +39,13 @@ def normalize_metric_summaries( ---------- baseline_run : `str` or `list` of `str The name of the run that defines a normalized value of 1. - If a list is provided, the median value of each metric across that list is used as the reference. + If a list is provided, the median value of each metric across that + list is used as the reference. summary : `pandas.DataFrame` - The summary metrics to normalize (as returned by `get_metric_summaries`) - metric_sets : `pandas.DataFrame` - Metric metadata as returned by `archive.get_metric_sets` + The summary metrics to normalize (as returned by + `get_metric_summaries`) + metric_subsets : `pandas.DataFrame` + Metric metadata as returned by `archive.get_metric_subsets` Returns ------- @@ -67,11 +70,11 @@ def normalize_metric_summaries( # Use only those metrics present both in the # summary and metrics sets dataframe - if metric_sets is None: + if metric_subsets is None: summary = summary.copy() used_metrics = summary.columns.values else: - used_metrics = [s for s in summary.columns.values if s in metric_sets.metric.values] + used_metrics = [s for s in summary.columns.values if s in metric_subsets.metric.values] summary = summary.loc[:, used_metrics].copy() if summary.columns.name is None: @@ -84,8 +87,10 @@ def normalize_metric_summaries( summary = summary.T.groupby("metric").first().T.groupby("run").first() # And now create a line just for "baseline" -- - # if baseline_run is >1, this is created from the median values per metric of those runs - # Make up a nonsense name for the reference, that is not currently in the summary dataframe + # if baseline_run is >1, this is created from the median values + # per metric of those runs + # Make up a nonsense name for the reference, that is not currently + # in the summary dataframe baseline_comparison = "bbb" while baseline_comparison in summary.index: baseline_comparison += "b" @@ -95,16 +100,20 @@ def normalize_metric_summaries( else: summary.loc[baseline_comparison] = summary.loc[baseline_run] - if metric_sets is None: + if metric_subsets is None: # If no invert/mag - just do simple normalization (1 + (x-0)/x0) norm_summary = 1 + (summary.loc[:, :].sub(summary.loc[baseline_comparison, :], axis="columns")).div( summary.loc[baseline_comparison, :], axis="columns" ) else: # Reindex metric set and remove duplicates or non-available metrics - metric_names = [n for n in metric_sets.index.names if not n == "metric"] - metric_sets = ( - metric_sets.reset_index(metric_names).groupby(level="metric").first().loc[used_metrics, :].copy() + metric_names = [n for n in metric_subsets.index.names if not n == "metric"] + metric_subsets = ( + metric_subsets.reset_index(metric_names) + .groupby(level="metric") + .first() + .loc[used_metrics, :] + .copy() ) norm_summary = pd.DataFrame( @@ -113,28 +122,32 @@ def normalize_metric_summaries( dtype="float", ) - # Direct metrics are those that are neither inverted, nor compared as magnitudes + # Direct metrics are those that are neither inverted, + # nor compared as magnitudes # direct = 1 + (value - norm) / norm == value / norm - direct = ~np.logical_or(metric_sets["invert"], metric_sets["mag"]) + direct = ~np.logical_or(metric_subsets["invert"], metric_subsets["mag"]) norm_summary.loc[:, direct] = summary.loc[:, direct] # invert = 1 + (1/value - 1/norm) / (1/norm) == norm / value - norm_summary.loc[:, metric_sets["invert"]] = 1.0 / summary.loc[:, metric_sets["invert"]] + norm_summary.loc[:, metric_subsets["invert"]] = 1.0 / summary.loc[:, metric_subsets["invert"]] - # mag = 1 + (1+value-norm - (1+norm-norm)) / (1+norm-norm) == 1 + (value - norm) - norm_summary.loc[:, metric_sets["mag"]] = 1.0 + summary.loc[ + # mag = 1 + (1+value-norm - (1+norm-norm)) / (1+norm-norm) + # == 1 + (value - norm) + norm_summary.loc[:, metric_subsets["mag"]] = 1.0 + summary.loc[ :, - metric_sets["mag"], - ].subtract(summary.loc[baseline_comparison, metric_sets["mag"]], axis="columns") + metric_subsets["mag"], + ].subtract(summary.loc[baseline_comparison, metric_subsets["mag"]], axis="columns") # Some metrics can be both inverted and magnitudes (eg rms mag values) - both = np.logical_and(metric_sets["invert"], metric_sets["mag"]) - # both = 1 + (1-(value-norm) - (1-(norm-norm))) / (1-(norm-norm)) == norm - value + both = np.logical_and(metric_subsets["invert"], metric_subsets["mag"]) + # both = 1 + (1-(value-norm) - (1-(norm-norm))) / (1-(norm-norm)) + # == norm - value norm_summary.loc[:, both] = 1.0 - summary.loc[:, both].subtract( summary.loc[baseline_comparison, both], axis="columns" ) - # Turn the values above into the fractional difference compared with the baseline + # Turn the values above into the fractional difference + # compared with the baseline norm_summary.loc[:, :] = 1 + ( norm_summary.loc[:, :].sub(norm_summary.loc[baseline_comparison, :], axis="columns") ).div(norm_summary.loc[baseline_comparison, :], axis="columns") @@ -156,7 +169,7 @@ def plot_run_metric( horizontal_quantity="value", run_label_map=None, metric_label_map=None, - metric_set=None, + metric_subset=None, ax=None, cmap=colorcet.glasbey_hv, linestyles=None, @@ -178,20 +191,21 @@ def plot_run_metric( horizontal_quantity : {'run', 'metric', 'value'} Should the run, metric name, or metric value be mapped onto the x axis? vwidth : `float` - The width of the plot, in normalized metrics summary units. (The limits - of the x axis will be 1-vwidth/2 and 1+width/2). + The width of the plot, in normalized metrics summary units. + (The limits of the x axis will be 1-vwidth/2 and 1+width/2). run_label_map : mapping A python `mapping` between canonical run names and run labels as they - should appear on plot labels. Use of this option is discouraged, because - it makes it harder to match plots to data. - run_label_map could be created by archive.get_runs().loc[these_runs]['brief'] + should appear on plot labels. Use of this option is discouraged, + because it makes it harder to match plots to data. + run_label_map could be created by + archive.get_runs().loc[these_runs]['brief'] metric_label_map : mapping A python `mapping` between canonical metric names and metric labels - as they should appear on plot labels. Use this option carefully, because - it makes it harder to match plots to metric calculation code.. - metric_label_map could be equivalent to metric_set['short_name'] - metric_set : `pandas.DataFrame` - Metric metadata as returned by `archive.get_metric_sets` + as they should appear on plot labels. Use this option carefully, + because it makes it harder to match plots to metric calculation code.. + metric_label_map could be equivalent to metric_subset['short_name'] + metric_subset : `pandas.DataFrame` + Metric metadata as returned by `archive.get_metric_subsets` ax : `matplotlib.axes.Axes` The axes on which to plot the data. cmap : `matplotlib.colors.ColorMap` @@ -212,16 +226,18 @@ def plot_run_metric( The plot axes. - The run order and metric order (imposed into the summary dataframe passed here as `summary`) - are important and preserved in the plot. These should be set in the (subset) `summary` dataframe - passed here; the metric_set is available, but used for normalization and plot styling. + The run order and metric order (imposed into the summary dataframe + passed here as `summary`) are important and preserved in the plot. + These should be set in the (subset) `summary` dataframe + passed here; the metric_subset is available, but used for + normalization and plot styling. """ # If the metric sets we are passed has a multilevel index, # get rid of the levels we do not need. - if metric_set is not None and metric_set.index.nlevels > 1: - extra_levels = list(set(metric_set.index.names) - set(["metric"])) - metric_set = metric_set.droplevel(extra_levels).groupby(level="metric").first() + if metric_subset is not None and metric_subset.index.nlevels > 1: + extra_levels = list(set(metric_subset.index.names) - set(["metric"])) + metric_subset = metric_subset.droplevel(extra_levels).groupby(level="metric").first() # Mark whether we have a default, or whether # one was specified @@ -240,7 +256,7 @@ def plot_run_metric( ( summary.rename_axis(index="run", columns="metric").copy() if baseline_run is None - else normalize_metric_summaries(baseline_run, summary, metric_sets=metric_set) + else normalize_metric_summaries(baseline_run, summary, metric_subsets=metric_subset) ) .stack(future_stack=True) .rename("value") @@ -256,17 +272,18 @@ def plot_run_metric( if metric_label_map is not None: metric_order = [metric_label_map[m] for m in metric_order] norm_summary["metric"] = norm_summary["metric"].map(metric_label_map) - # Create this_metric_set - equivalent to metric_set but with updated names. - if metric_set is not None: - this_metric_set = ( - metric_set.drop(columns=["metric"]) - .assign(metric=metric_set["metric"].map(metric_label_map)) + # Create this_metric_subset - equivalent to metric_subset + # but with updated names. + if metric_subset is not None: + this_metric_subset = ( + metric_subset.drop(columns=["metric"]) + .assign(metric=metric_subset["metric"].map(metric_label_map)) .set_index("metric", drop=False) ) else: - this_metric_set = None + this_metric_subset = None else: - this_metric_set = metric_set + this_metric_subset = metric_subset plot_df = pd.DataFrame( { @@ -324,23 +341,26 @@ def plot_run_metric( # make sure we get to pass the style argument # as a positional argument, whether or not it is # specified. - # Let the user know why some of their plot values might be disappearing - # (tends to happen if baseline value is 0 or Nan and plot_df being normalized) + # Let the user know why some of their plot values might be + # disappearing + # (tends to happen if baseline value is 0 or Nan and plot_df + # being normalized) if vertical_quantity == "value" and np.isinf(plot_df.loc[idx, "y"]).any(): warnings.warn(f"There are infinite values in the plot of {idx}.") if horizontal_quantity == "value" and np.isinf(plot_df.loc[idx, "x"]).any(): warnings.warn(f"There are infinite values in the plot of {idx}.") plot_args = [plot_df.loc[idx, "x"], plot_df.loc[idx, "y"]] idx_label = f"{str(idx).strip()}" - if this_metric_set is not None and idx in this_metric_set.index: - # Set the style from the metric_set if available - if "style" in this_metric_set.columns: - metric_style = this_metric_set.loc[idx, "style"] + if this_metric_subset is not None and idx in this_metric_subset.index: + # Set the style from the metric_subset if available + if "style" in this_metric_subset.columns: + metric_style = this_metric_subset.loc[idx, "style"] if metric_style is not None: plot_args.append(metric_style) - # Update the plot label if we inverted the column during normalization - if "invert" in this_metric_set.columns and baseline_run is not None: - inv = this_metric_set.loc[idx, "invert"] + # Update the plot label if we inverted the column during + # normalization + if "invert" in this_metric_subset.columns and baseline_run is not None: + inv = this_metric_subset.loc[idx, "invert"] if inv: idx_label = f"1 / {idx_label}" ax.plot(*plot_args, label=idx_label) @@ -402,7 +422,7 @@ def plot_run_metric( def plot_run_metric_mesh( summary, - metric_set=None, + metric_subset=None, baseline_run=None, color_range=1, run_label_map=None, @@ -452,7 +472,7 @@ def plot_run_metric_mesh( # Normalize the summary values, if a baseline was specified if baseline_run is not None: - norm_summary = normalize_metric_summaries(baseline_run, summary, metric_set) + norm_summary = normalize_metric_summaries(baseline_run, summary, metric_subset) else: norm_summary = summary.rename_axis(index="run", columns="metric").copy() @@ -503,8 +523,8 @@ def plot_run_metric_mesh( pass # Figure out which metrics get inverted - if baseline_run is not None and metric_set is not None: - inverted_metrics = set(metric_set.query("invert").metric.values) + if baseline_run is not None and metric_subset is not None: + inverted_metrics = set(metric_subset.query("invert").metric.values) else: inverted_metrics = set() @@ -530,6 +550,154 @@ def plot_run_metric_mesh( return fig, ax +def plot_run_metric_uncert( + summary, + uncertainty, + run_label_map=None, + metric_label_map=None, + metric_subset=None, + cmap=None, + linestyles=["-"], + markers=["."], + sep_plots=True, + ax=None, +): + """Plot normalized metric values as colored points on a cartesian plane. + + Parameters + ---------- + summary : `pandas.DataFrame` + Values to be plotted. Should only include runs and metrics that + should actually appear on the plot. + uncertainty : `pandas.DataFrame` + Uncertainty values to plot on each data point. + Should match summary metric columns. + run_label_map : mapping + A python `mapping` between canonical run names and run labels as they + should appear on plot labels. Use of this option is discouraged, + because it makes it harder to match plots to data. + run_label_map could be created by + archive.get_runs().loc[these_runs]['brief'] + metric_label_map : mapping + A python `mapping` between canonical metric names and metric labels + as they should appear on plot labels. Use this option carefully, + because it makes it harder to match plots to metric calculation code.. + metric_label_map could be equivalent to metric_subset['short_name'] + metric_subset : `pandas.DataFrame` + Metric metadata as returned by `archive.get_metric_subsets` + ax : `matplotlib.axes.Axes` + The axes on which to plot the data. + cmap : `matplotlib.colors.ColorMap` + The color map to use for point colors. + linestyles : `list` + A list of matplotlib linestyles to use to connect the lines + markers : `list`, opt + A list of matplotlib markers to use to represent the points + + Returns + ------- + fig : `matplotlib.figure.Figure` + The plot figure. + ax : `matplotilb.axes.Axes` + The plot axes. + + + The run order and metric order (imposed into the summary + dataframe passed here as `summary`) are important and preserved in the + plot. These should be set in the (subset) `summary` dataframe + passed here; the metric_subset is available, but used for 'invert' + and plot styling and alternate labels. + """ + + # If the metric sets we are passed has a multilevel index, + # get rid of the levels we do not need. + if metric_subset is not None and metric_subset.index.nlevels > 1: + extra_levels = list(set(metric_subset.index.names) - set(["metric"])) + metric_subset = metric_subset.droplevel(extra_levels).groupby(level="metric").first() + + # Pull original order for metric & runs from summary + run_order = summary.index.values + metric_order = summary.columns.values + if run_label_map is not None: + run_order = [run_label_map[r] for r in run_order] + if metric_label_map is not None: + metric_order = [metric_label_map[m] for m in metric_order] + + if ax is None: + fig, ax = plt.subplots(figsize=(10, 6)) + else: + fig = ax.get_figure() + + if cmap is None: + cmap = colorcet.glasbey_hv + cmap_default = True + else: + cmap_default = False + # make the linestyles and symbols list the same length as cmap, for cycler + try: + num_colors = len(cmap) + colors = cmap + except TypeError: + num_colors = len(cmap.colors) + colors = cmap.colors + ls_grow = int(np.ceil(num_colors / len(linestyles))) + linestyles = (list(linestyles) * ls_grow)[:num_colors] + marker_grow = int(np.ceil(num_colors / len(markers))) + markers = (list(markers) * marker_grow)[:num_colors] + + # But use styles from metric_subset if available + if metric_subset is not None: + for i, m in enumerate(summary): + if m in metric_subset.index: + style = metric_subset.loc[m, "style"] + if len(style) > 1: + ci = style[0] + # Let user specify color map for override + if cmap_default: + colors[i] = ci + li = style[1] + linestyles[i] = li + else: + li = style[0:] + linestyles[i] = li + + ax.set_prop_cycle( + cycler.cycler(color=colors) + cycler.cycler(linestyle=linestyles) + cycler.cycler(marker=markers) + ) + + for i, m in enumerate(summary): + # new plots for each metric? + if sep_plots and i > 0: + fig, ax = plt.subplots(figsize=(10, 6)) + cc = [colors[i]] + ax.set_prop_cycle( + cycler.cycler(color=cc) + + cycler.cycler(linestyle=linestyles[i]) + + cycler.cycler(marker=markers[i]) + ) + if metric_label_map is not None: + label = metric_label_map[m] + else: + label = m + ax.errorbar(run_order, summary[m], yerr=uncertainty[m], label=label) + ax.set_ylabel(label, fontsize="large") + if sep_plots: + if metric_subset is not None: + if m in metric_subset.index: + if metric_subset.loc[m, "invert"]: + ax.invert_yaxis() + ax.tick_params(axis="x", labelrotation=90, labelsize="large") + ax.grid(True, alpha=0.5) + ax.legend() + + if not sep_plots: + ax.tick_params(axis="x", labelrotation=90, labelsize="large") + ax.grid(True, alpha=0.5) + ax.legend(bbox_to_anchor=(1.0, 1.0)) + + return fig, ax + + def find_family_lines(families, family_list): lines = [] for f in family_list: diff --git a/tests/maf/test_archive.py b/tests/maf/test_archive.py index c5b44ac73..f9ed7625c 100644 --- a/tests/maf/test_archive.py +++ b/tests/maf/test_archive.py @@ -1,4 +1,5 @@ # imports +import os import sys import unittest from os import path @@ -8,13 +9,14 @@ import pandas as pd from rubin_sim import maf +from rubin_sim.data import get_data_dir # constants URLROOT = "https://raw.githubusercontent.com/lsst-pst/survey_strategy/main/fbs_2.0/" FAMILY_SOURCE = URLROOT + "runs_v2.1.json" -METRIC_SET_SOURCE = URLROOT + "metric_sets.json" -SUMMARY_SOURCE = URLROOT + "summary_2022_04_28.csv" +METRIC_SUBSET_SOURCE = os.path.join(get_data_dir(), "maf", "metric_subsets.json") +SUMMARY_SOURCE = os.path.join(get_data_dir(), "maf", "summary_v34.h5") # exception classes @@ -60,15 +62,15 @@ def test_download_runs(self): temp_dir_itself.cleanup() - def test_get_metric_sets(self): - metric_sets = maf.get_metric_sets(METRIC_SET_SOURCE) - self.assertIsInstance(metric_sets, pd.DataFrame) - self.assertIn("metric set", metric_sets.index.names) - self.assertIn("metric", metric_sets.index.names) + def test_get_metric_subsets(self): + metric_subsets = maf.get_metric_subsets(METRIC_SUBSET_SOURCE) + self.assertIsInstance(metric_subsets, pd.DataFrame) + self.assertIn("metric subset", metric_subsets.index.names) + self.assertIn("metric", metric_subsets.index.names) columns = ("style", "invert", "mag") for column in columns: - self.assertIn(column, metric_sets.columns) + self.assertIn(column, metric_subsets.columns) def test_get_metric_summaries(self): summary = maf.get_metric_summaries(summary_source=SUMMARY_SOURCE) @@ -77,7 +79,7 @@ def test_get_metric_summaries(self): self.assertEqual(summary.index.name, "run") my_summary = maf.get_metric_summaries( - runs=["baseline_v2.0_10yrs", "baseline_retrofoot_v2.0_10yrs"], + runs=["baseline_v3.0_10yrs", "baseline_v3.4_10yrs"], metrics=[ "Rms Max normairmass All sky all bands HealpixSlicer", "Median Max normairmass All sky all bands HealpixSlicer", @@ -88,34 +90,18 @@ def test_get_metric_summaries(self): self.assertEqual(len(my_summary), 2) self.assertEqual(len(my_summary.columns), 3) - rolling_sum = maf.get_metric_summaries( - run_families="rolling", - summary_source=summary, - runs_source=FAMILY_SOURCE, - ) - self.assertGreater(len(rolling_sum), 3) - self.assertLess(len(rolling_sum), len(summary)) - - rollingddf_sum = maf.get_metric_summaries( - run_families=["rolling", "ddf percent"], - summary_source=summary, - runs_source=FAMILY_SOURCE, - ) - self.assertGreater(len(rollingddf_sum), len(rolling_sum)) - self.assertLess(len(rollingddf_sum), len(summary)) - srd_sim = maf.get_metric_summaries( - metric_sets="SRD", + metric_subsets="SRD", summary_source=summary, - metric_set_source=METRIC_SET_SOURCE, + metric_subset_source=METRIC_SUBSET_SOURCE, ) self.assertGreater(len(srd_sim.columns), 3) self.assertLess(len(srd_sim.columns), len(summary.columns)) srdnvis_sim = maf.get_metric_summaries( - metric_sets=["SRD", "N Visits"], + metric_subsets=["SRD", "N Visits"], summary_source=summary, - metric_set_source=METRIC_SET_SOURCE, + metric_subset_source=METRIC_SUBSET_SOURCE, ) self.assertGreater(len(srdnvis_sim.columns), len(srd_sim.columns)) self.assertLess(len(srdnvis_sim.columns), len(summary.columns)) @@ -137,6 +123,7 @@ def test_describe_families(self): with patch("builtins.print") as _: self.perform_describe_families_test() + @unittest.skip("Skipping; family descriptions out of sync with summary") def perform_describe_families_test(self): families = maf.get_family_descriptions(FAMILY_SOURCE) disp_families = families[:2] @@ -145,27 +132,29 @@ def perform_describe_families_test(self): self.assertIsNone(fig) self.assertIsNone(ax) - all_metric_sets = maf.get_metric_sets(METRIC_SET_SOURCE) + all_metric_subsets = maf.get_metric_subsets(METRIC_SUBSET_SOURCE) summary = maf.get_metric_summaries(summary_source=SUMMARY_SOURCE) - table_metric_set = all_metric_sets.loc["SRD"] - fig, ax = maf.describe_families(disp_families, summary=summary, table_metric_set=table_metric_set) + table_metric_subset = all_metric_subsets.loc["SRD"] + fig, ax = maf.describe_families( + disp_families, summary=summary, table_metric_subset=table_metric_subset + ) self.assertIsNone(fig) self.assertIsNone(ax) - plot_metric_set = all_metric_sets.loc["N Visits"] - fig, ax = maf.describe_families(disp_families, summary=summary, plot_metric_set=plot_metric_set) + plot_metric_subset = all_metric_subsets.loc["N Visits"] + fig, ax = maf.describe_families(disp_families, summary=summary, plot_metric_subset=plot_metric_subset) - def test_create_metric_set_df(self): + def test_create_metric_subset(self): metrics = ["Urania", "Thalia", "Calliope", "Terpsichore"] - metric_set_name = "Muses" - metric_set = maf.create_metric_set_df(metric_set_name, metrics) - self.assertSequenceEqual(metrics, metric_set.metric.tolist()) + metric_subset_name = "Muses" + metric_subset = maf.create_metric_subset(metric_subset_name, metrics) + self.assertSequenceEqual(metrics, metric_subset.metric.tolist()) self.assertSequenceEqual( - metric_set.columns.tolist(), + metric_subset.columns.tolist(), ["metric", "short_name", "style", "invert", "mag"], ) - self.assertSequenceEqual(metric_set.index.names, ["metric set", "metric"]) + self.assertSequenceEqual(metric_subset.index.names, ["metric subset", "metric"]) run_tests_now = __name__ == "__main__" diff --git a/tests/maf/test_summary_plots.py b/tests/maf/test_summary_plots.py index aff088bf3..1420a8030 100644 --- a/tests/maf/test_summary_plots.py +++ b/tests/maf/test_summary_plots.py @@ -44,16 +44,16 @@ def setUp(self): styles = ["-" for i in range(self.num_metrics)] - self.metric_set = pd.DataFrame( + self.metric_subset = pd.DataFrame( {"mag": False, "invert": False, "metric": self.metrics, "style": styles} ).set_index("metric", drop=False) - self.metric_set.loc[self.mag_metrics, "mag"] = True - self.metric_set.loc[self.inverted_metrics, "invert"] = True - self.metric_set.loc["metric3", "style"] = "b--" + self.metric_subset.loc[self.mag_metrics, "mag"] = True + self.metric_subset.loc[self.inverted_metrics, "invert"] = True + self.metric_subset.loc["metric3", "style"] = "b--" def test_normalize_metric_summaries(self): # Test standard normalization with one run - norm_values = maf.normalize_metric_summaries(self.baseline, self.metric_values, self.metric_set) + norm_values = maf.normalize_metric_summaries(self.baseline, self.metric_values, self.metric_subset) ref_norm_values = _run_infos_norm_df( self.metric_values, @@ -64,18 +64,18 @@ def test_normalize_metric_summaries(self): np.testing.assert_allclose(norm_values.values, ref_norm_values.values) # test normalizing against one run, as a list - norm_values = maf.normalize_metric_summaries([self.baseline], self.metric_values, self.metric_set) + norm_values = maf.normalize_metric_summaries([self.baseline], self.metric_values, self.metric_subset) np.testing.assert_allclose(norm_values.values, ref_norm_values.values) # test similar but pretend that self.baseline is two runs norm_values = maf.normalize_metric_summaries( - [self.baseline, self.baseline], self.metric_values, self.metric_set + [self.baseline, self.baseline], self.metric_values, self.metric_subset ) np.testing.assert_allclose(norm_values.values, ref_norm_values.values) # test similar but different runs norm_values = maf.normalize_metric_summaries( - [self.runs[0], self.runs[1]], self.metric_values, self.metric_set + [self.runs[0], self.runs[1]], self.metric_values, self.metric_subset ) def test_plot_run_metric(self): @@ -84,7 +84,7 @@ def test_plot_run_metric(self): fig, ax = maf.plot_run_metric( self.metric_values, baseline_run=self.baseline, - metric_set=self.metric_set, + metric_subset=self.metric_subset, ) fig, ax = maf.plot_run_metric( @@ -120,7 +120,7 @@ def test_plot_run_metric_mesh(self): fig, ax = maf.plot_run_metric_mesh( self.metric_values, baseline_run=self.baseline, - metric_set=self.metric_set, + metric_subset=self.metric_subset, ) fig, ax = maf.plot_run_metric_mesh( From 5778c48fe47dd4a17921583630dbfa133cc411d7 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:02:15 -0700 Subject: [PATCH 02/21] Add convenience functions to make a wfd and circular healpix subset slicer --- rubin_sim/maf/metadata_dir.py | 17 +- .../maf/slicers/healpix_subset_slicer.py | 174 +++++++++++++----- 2 files changed, 131 insertions(+), 60 deletions(-) diff --git a/rubin_sim/maf/metadata_dir.py b/rubin_sim/maf/metadata_dir.py index 4e54792aa..fca199656 100755 --- a/rubin_sim/maf/metadata_dir.py +++ b/rubin_sim/maf/metadata_dir.py @@ -12,9 +12,8 @@ from . import batches as batches from .db import ResultsDb -from .metric_bundles import MetricBundle, MetricBundleGroup -from .metrics import CountExplimMetric -from .slicers import HealpixSlicer, HealpixSubsetSlicer +from .metric_bundles import MetricBundleGroup +from .slicers import HealpixSlicer, make_wfd_subset_slicer def metadata_dir(): @@ -77,17 +76,9 @@ def metadata_dir(): if os.path.isdir(out_dir): shutil.rmtree(out_dir) - # Find the 'wfd' footprint - m = CountExplimMetric(col="observationStartMJD") + # Find the 'wfd' footprint - use the scheduler footprint. allsky_slicer = HealpixSlicer(nside=args.nside) - constraint = 'note not like "%DD%"' - bundle = MetricBundle(m, allsky_slicer, constraint, run_name=sim_name) - g = MetricBundleGroup({f"{sim_name} footprint": bundle}, filename, out_dir=out_dir) - g.run_all() - wfd_footprint = bundle.metric_values.filled(0) - wfd_footprint = np.where(wfd_footprint > args.wfd_threshold, 1, 0) - wfd_hpix = np.where(wfd_footprint == 1)[0] - wfd_slicer = HealpixSubsetSlicer(nside=args.nside, hpid=wfd_hpix) + wfd_slicer = make_wfd_subset_slicer(nside=args.nside) bdict = batches.info_bundle_dicts(allsky_slicer, wfd_slicer, sim_name, colmap) diff --git a/rubin_sim/maf/slicers/healpix_subset_slicer.py b/rubin_sim/maf/slicers/healpix_subset_slicer.py index ae0aeabe2..9c92a1204 100644 --- a/rubin_sim/maf/slicers/healpix_subset_slicer.py +++ b/rubin_sim/maf/slicers/healpix_subset_slicer.py @@ -1,73 +1,143 @@ -"""A HealpixSubsetSlicer - define the subset of healpixels to use to calculate metrics.""" - -__all__ = ("HealpixSubsetSlicer",) +__all__ = ("make_circle_subset_slicer", "make_wfd_subset_slicer", "HealpixSubsetSlicer",) from functools import wraps import healpy as hp import numpy as np import rubin_scheduler.utils as simsUtils +from rubin_scheduler.scheduler.utils import get_current_footprint from .healpix_slicer import HealpixSlicer +def make_circle_subset_slicer(ra_cen, dec_cen, radius=3.0, nside=512, use_cache=False): + """Create a circular healpix subset slicer, centered on ra_cen/dec_cen. -class HealpixSubsetSlicer(HealpixSlicer): + Parameters + ---------- + ra_cen : `float` + RA of the center of the slicer (degrees). + dec_cen : `float` + Dec of the center of the slicer (degrees). + radius : `float`, optional + Radius of the circular slicer shape (degrees). + nside : `int`, optional + Nside resolution of the healpix subset slicer. + use_cache : `bool`, optional + Set up the slicer to `use_cache` or not. + + Returns + ------- + subsetslicer, plot_dict : `maf.HealpixSubsetSlicer`, `dict` + A healpix subset slicer, defined according to a circle centered + on `ra_cen`, `dec_cen`, with radius `radius`. + Also returns a minimal plot dict, with the visufunc and rotation + information to plot a small circular area with the HealpixSkyMap + plotter. """ - A spatial slicer that evaluates pointings on a subset of a healpix-based grid. - The advantage of using this healpixSubsetSlicer (rather than just putting the RA/Dec values into - the UserPointsSlicer, which is another valid approach) is that you preserve the full healpix array. - This means you could do things like calculate the power spectrum and plot without remapping into - healpixels first. The downside is that you must first (externally) define the healpixels that you - wish to use - the rubin_sim.featureScheduler.footprints is a useful add-on here. + ra, dec = simsUtils.hpid2_ra_dec(nside, np.arange(hp.nside2npix(nside))) + dist = simsUtils.angular_separation(ra_cen, dec_cen, ra, dec) + close = np.where(dist <= radius)[0] + subsetslicer = HealpixSubsetSlicer(nside, close, use_cache=use_cache) + plot_dict = {"visufunc": hp.gnomview, + "rot": (ra_cen, dec_cen, 0), + "xsize": 500, + } + return subsetslicer, plot_dict + - When plotting with RA/Dec, the default HealpixSkyMap can be used, corresponding to - {'rot': (0, 0, 0), 'flip': 'astro'}. +def make_wfd_subset_slicer(nside=64, use_cache=True, wfd_labels=None): + """Create a wfd-only healpix subset slicer. + + Parameters + ---------- + nside : `int`, optional + Nside resolution of the healpix subset slicer. + use_cache : `bool`, optional + Set up the slicer to `use_cache` or not. + wfd_labels : `list` [`str`] or None + List of the labels from the footprint to use for the "WFD" + (or other) footprint. Default of None will use the current + WFD level areas, including the WFD-level galactic plane area. + + Returns + ------- + subsetslicer : `maf.HealpixSubsetSlicer` + A healpix subset slicer, defined according to the most current + version of the scheduler footprint using `get_current_footprint`. + """ + if wfd_labels is None: + wfd_labels = ['lowdust', 'euclid_overlap', 'virgo', 'bulgy', 'LMC_SMC'] + footprints, labels = get_current_footprint(nside=nside) + wfdpix = np.where(np.in1d(labels, wfd_labels))[0] + slicer = HealpixSubsetSlicer(nside=nside, hpid=wfdpix, use_cache=use_cache) + return slicer + +class HealpixSubsetSlicer(HealpixSlicer): + """A spatial slicer that evaluates pointings on a subset of a healpix grid. + + The advantage of using this healpixSubsetSlicer (rather than just putting + the RA/Dec values into the UserPointsSlicer, which is another valid + approach) is that you preserve the full healpix array. + This means you could do things like calculate the power spectrum + and plot without remapping into healpixels first. The downside is that + you must first (externally) define the healpixels that you + wish to use. + + When plotting with RA/Dec, the default HealpixSkyMap can be used, + corresponding to {'rot': (0, 0, 0), 'flip': 'astro'}. Parameters ---------- nside : `int` The nside parameter of the healpix grid. Must be a power of 2. - hpid : `np.ndarray` + hpid : `np.ndarray`, (N,) The subset of healpix id's to use to calculate the metric. - Because the hpid should be defined based on a particular nside, these first two - arguments are not optional for this slicer. + Because the hpid should be defined based on a particular nside, + these first two arguments are not optional for this slicer. lon_col : `str`, optional - Name of the longitude (RA equivalent) column to use from the input data. - Default fieldRA + Name of the longitude (RA equivalent) column to use + from the input data. Default fieldRA lat_col : `str`, optional - Name of the latitude (Dec equivalent) column to use from the input data. - Default fieldDec + Name of the latitude (Dec equivalent) column to use from + the input data. Default fieldDec lat_lon_deg : `bool`, optional - Flag indicating whether the lat and lon values in the input data are in - degrees (True) or radians (False). + Flag indicating whether the lat and lon values in the input data + are in degrees (True) or radians (False). Default True. verbose : `bool`, optional - Flag to indicate whether or not to write additional information to stdout during runtime. - Default True. + Flag to indicate whether to write additional information + to stdout during runtime. Default True. badval : `float`, optional - Bad value flag, relevant for plotting. Default the np.nan value (in order to properly flag - bad data points for plotting with the healpix plotting routines). This should not be changed. + Bad value flag, relevant for plotting. + Default the np.nan value (in order to properly flag bad data points + for plotting with the healpix plotting routines). + In general, this should not be changed. use_cache : `bool`, optional - Flag allowing the user to indicate whether or not to cache (and reuse) metric results - calculated with the same set of simulated data pointings. - This can be safely set to True for slicers not using maps and will result in increased speed. - When calculating metric results using maps, the metadata at each individual ra/dec point may - influence the metric results and so use_cache should be set to False. + Flag allowing the user to indicate whether to cache (and reuse) + metric results calculated with the same set of simulated data + pointings. This can be safely set to True for slicers not using + maps and will result in increased speed. + When calculating metric results using maps, the map data at each + individual ra/dec point may influence the metric results and so + use_cache should be set to False. Default True. leafsize : `int`, optional Leafsize value for kdtree. Default 100. radius : `float`, optional - Radius for matching in the kdtree. Equivalent to the radius of the FOV. Degrees. + Radius for matching in the kdtree. + Equivalent to the radius of the FOV. Degrees. Default 1.75. use_camera : `bool`, optional Flag to indicate whether to use the LSST camera footprint or not. Default False. camera_footprint_file : `str`, optional - Name of the camera footprint map to use. Can be None, which will use the default. + Name of the camera footprint map to use. Can be None, which will + use the default. rot_sky_pos_col_name : `str`, optional - Name of the rotSkyPos column in the input data. Only used if use_camera is True. - Describes the orientation of the camera orientation compared to the sky. - Default rotSkyPos. + Name of the rotSkyPos column in the input data. + Only used if use_camera is True. + Describes the orientation of the camera orientation + compared to the sky. Default rotSkyPos. """ def __init__( @@ -134,7 +204,8 @@ def __next__(self): Results of self._slice_sim_data should be dictionary of {'idxs': the data indexes relevant for this slice of the slicer, - 'slice_point': the metadata for the slice_point, which always includes 'sid' key for ID of slice_point.} + 'slice_point': the metadata for the slice_point, which always + includes 'sid' key for ID of slice_point.} """ if self.hpid_counter >= self.len_hpid: raise StopIteration @@ -146,15 +217,18 @@ def __next__(self): return self._slice_sim_data(islice) def setup_slicer(self, sim_data, maps=None): - """Use sim_data[self.lon_col] and sim_data[self.lat_col] (in radians) to set up KDTree. + """Use sim_data[self.lon_col] and sim_data[self.lat_col] + (in radians) to set up KDTree. Parameters ----------- - sim_data : numpy.recarray + sim_data : `numpy.ndarray`, (N,) The simulated data, including the location of each pointing. - maps : list of rubin_sim.maf.maps objects, optional - List of maps (such as dust extinction) that will run to build up additional metadata at each - slice_point. This additional metadata is available to metrics via the slice_point dictionary. + maps : `list` of `rubin_sim.maf.maps` objects, optional + List of maps (such as dust extinction) that will run to build + up additional metadata at each slice_point. + This additional metadata is available to metrics via the + slice_point dictionary. Default None. """ super().setup_slicer(sim_data=sim_data, maps=maps) @@ -163,7 +237,8 @@ def setup_slicer(self, sim_data, maps=None): def _slice_sim_data(islice): """Return indexes for relevant opsim data at slice_point (slice_point=lon_col/lat_col value .. usually ra/dec).""" - # Subclass this method, just to make sure we return no data for points not in self.hpid + # Subclass this method, just to make sure we return + # no data for points not in self.hpid slice_point = {"sid": islice, "nside": self.nside} if islice not in self.hpid: indices = [] @@ -174,7 +249,8 @@ def _slice_sim_data(islice): # Query against tree. indices = self.opsimtree.query_ball_point((sx, sy, sz), self.rad) if (self.use_camera) & (len(indices) > 0): - # Find the indices *of those indices* which fall in the camera footprint + # Find the indices *of those indices* which fall in + # the camera footprint camera_idx = self.camera( self.slice_points["ra"][islice], self.slice_points["dec"][islice], @@ -183,10 +259,14 @@ def _slice_sim_data(islice): self.data_rot[indices], ) indices = np.array(indices)[camera_idx] - # Loop through all the slice_point keys. If the first dimension of slice_point[key] has - # the same shape as the slicer, assume it is information per slice_point. - # Otherwise, pass the whole slice_point[key] information. Useful for stellar LF maps - # where we want to pass only the relevant LF and the bins that go with it. + # Loop through all the slice_point keys. + # If the first dimension of slice_point[key] has + # the same shape as the slicer, assume it is information + # per slice_point. + # Otherwise, pass the whole slice_point[key] information. + # Useful for stellar LF maps + # where we want to pass only the relevant LF and the bins + # that go with it. for key in self.slice_points: if len(np.shape(self.slice_points[key])) == 0: keyShape = 0 From 7f746cbf5bfd49dceeb6f617e54507568fd03de3 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:02:41 -0700 Subject: [PATCH 03/21] Remove unused dictionary/print functions --- rubin_sim/maf/utils/output_utils.py | 185 +--------------------------- 1 file changed, 6 insertions(+), 179 deletions(-) diff --git a/rubin_sim/maf/utils/output_utils.py b/rubin_sim/maf/utils/output_utils.py index 5dc68e1c5..a9ed43ee2 100644 --- a/rubin_sim/maf/utils/output_utils.py +++ b/rubin_sim/maf/utils/output_utils.py @@ -1,8 +1,4 @@ -__all__ = ("name_sanitize", "print_dict", "print_simple_dict") - -import sys - -import numpy as np +__all__ = ("name_sanitize",) def name_sanitize(in_string): @@ -11,13 +7,14 @@ def name_sanitize(in_string): Parameters ---------- - in_string : str - The input string to be sanitized. Typically these are combinations of metric names and metadata. + in_string : `str` + The input string to be sanitized. + Typically these are combinations of metric names and metadata. Returns ------- - str - The string after removal/replacement of non-filename friendly characters. + out_string : `str` + The string after removal/replacement of non-friendly characters. """ # Replace <, > and = signs. out_string = in_string.replace(">", "gt").replace("<", "lt").replace("=", "eq") @@ -35,173 +32,3 @@ def name_sanitize(in_string): while "__" in out_string: out_string = out_string.replace("__", "_") return out_string - - -def _myformat(args, delimiter=" "): - # Generic line formatter to let you specify delimiter between text fields. - writestring = "" - # Wrap in a list if something like an int gets passed in - if not hasattr(args, "__iter__"): - args = [args] - for a in args: - if isinstance(a, list): - if len(a) > 1: - ap = ",".join(map(str, a)) - else: - ap = "".join(map(str, a)) - writestring += "%s%s" % (ap, delimiter) - else: - writestring += "%s%s" % (a, delimiter) - return writestring - - -def _myformatdict(adict, delimiter=" "): - # Generic line formatter used for dictionaries. - writestring = "" - for k, v in adict.items(): - if isinstance(v, list): - if len(v) > 1: - vp = ",".join(map(str, v)) - else: - vp = "".join(map(str, v)) - writestring += "%s:%s%s" % (k, vp, delimiter) - else: - writestring += "%s:%s%s" % (k, v, delimiter) - return writestring - - -def print_dict(content, label, filehandle=None, delimiter=" ", _level=0): - """ - Print dictionaries (and/or nested dictionaries) nicely. - Can also print other simpler items (such as numpy ndarray) nicely too. - This is used to print the config files. - - Parameters - ---------- - content : dict - The content to pretty print. - label : str - A header for this level of the dictionary. - filename : file - Output destination. If None, prints to stdout. - delimiter : str - User specified delimiter between fields. - _level : int - Internal use (controls level of indent). - """ - # Get set up with basic file output information. - if filehandle is None: - filehandle = sys.stdout - # And set character to use to indent sets of parameters related to a single dictionary. - baseindent = "%s" % (delimiter) - indent = "" - for i in range(_level - 1): - indent += "%s" % (baseindent) - # Print data (this is also the termination of the recursion if given nested dictionaries). - if not isinstance(content, dict): - if isinstance(content, str) or isinstance(content, float) or isinstance(content, int): - print("%s%s%s%s" % (indent, label, delimiter, str(content)), file=filehandle) - else: - if isinstance(content, np.ndarray): - if content.dtype.names is not None: - print("%s%s%s" % (indent, delimiter, label), file=filehandle) - for element in content: - print( - "%s%s%s%s%s" - % ( - indent, - delimiter, - indent, - delimiter, - _myformat(element), - ), - file=filehandle, - ) - else: - print( - "%s%s%s%s" % (indent, label, delimiter, _myformat(content)), - file=filehandle, - ) - else: - print( - "%s%s%s%s" % (indent, label, delimiter, _myformat(content)), - file=filehandle, - ) - return - # Allow user to specify print order of (some or all) items in order via 'keyorder'. - # 'keyorder' is list stored in the dictionary. - if "keyorder" in content: - orderkeys = content["keyorder"] - # Check keys in 'keyorder' are actually present in dictionary : remove those which aren't. - missingkeys = set(orderkeys).difference(set(content.keys())) - for m in missingkeys: - orderkeys.remove(m) - otherkeys = sorted(list(set(content.keys()).difference(set(orderkeys)))) - keys = orderkeys + otherkeys - keys.remove("keyorder") - else: - keys = sorted(content.keys()) - # Print data from dictionary. - print("%s%s%s:" % (indent, delimiter, label), file=filehandle) - _level += 2 - for k in keys: - print_dict(content[k], k, filehandle, delimiter, _level) - _level -= 2 - - -def print_simple_dict(topdict, subkeyorder, filehandle=None, delimiter=" "): - """ - Print a simple one-level nested dictionary nicely across the screen, - with one line per top-level key and all sub-level keys aligned. - - Parameters - ---------- - topdict : dict - The dictionary to pretty print - subkeyorder : list of strings - The order to print the values of the dictionary. - filehandle : file - File output object, if None then uses stdout. - delimiter : str - User specified delimiter between fields. - """ - # Get set up with basic file output information. - if filehandle is None: - filehandle = sys.stdout - # Get all sub-level keys. - subkeys = [] - for key in topdict: - subkeys += list(topdict[key].keys()) - subkeys = list(set(subkeys)) - # Align subkeys with 'subkeyorder' and then alphabetize any remaining. - missingkeys = set(subkeyorder).difference(set(subkeys)) - for m in missingkeys: - subkeyorder.remove(m) - otherkeys = sorted(list(set(subkeys).difference(set(subkeyorder)))) - subkeys = subkeyorder + otherkeys - # Print header. - writestring = "#" - for s in subkeys: - writestring += "%s%s" % (s, delimiter) - print(writestring, file=filehandle) - # Now go through and print. - for k in topdict: - writestring = "" - for s in subkeys: - if s in topdict[k]: - if ( - isinstance(topdict[k][s], str) - or isinstance(topdict[k][s], float) - or isinstance(topdict[k][s], int) - ): - writestring += "%s%s" % (topdict[k][s], delimiter) - elif isinstance(topdict[k][s], dict): - writestring += "%s%s" % ( - _myformatdict(topdict[k][s], delimiter=delimiter), - delimiter, - ) - else: - writestring += "%s%s" % (_myformat(topdict[k][s]), delimiter) - else: - writestring += "%s" % (delimiter) - print(writestring, file=filehandle) From 392acf50b37f5768c21aea3a2b5b7f9f8a2a82b5 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:05:13 -0700 Subject: [PATCH 04/21] Line length and doc string format. --- rubin_sim/maf/utils/maf_utils.py | 75 ++++++++++++++++++------------ rubin_sim/maf/utils/opsim_utils.py | 54 ++++++++++----------- 2 files changed, 72 insertions(+), 57 deletions(-) diff --git a/rubin_sim/maf/utils/maf_utils.py b/rubin_sim/maf/utils/maf_utils.py index a9a689de2..9f388b202 100644 --- a/rubin_sim/maf/utils/maf_utils.py +++ b/rubin_sim/maf/utils/maf_utils.py @@ -19,11 +19,13 @@ def load_inst_zeropoints(): - """Load up and return instumental zeropoints and atmospheric extinctions""" + """Load up and return instrumental zeropoints and atmospheric extinctions + """ zp_inst = {} datadir = get_data_dir() for filtername in "ugrizy": - # set gain and exptime to 1 so the instrumental zeropoint will be in photoelectrons and per second + # set gain and exptime to 1 so the instrumental zeropoint will be in + # photoelectrons and per second phot_params = PhotometricParameters(nexp=1, gain=1, exptime=1, bandpass=filtername) bp = Bandpass() bp.read_throughput(os.path.join(datadir, "throughputs/baseline/", "total_%s.dat" % filtername)) @@ -87,27 +89,33 @@ def collapse_night( return night_slice -def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1): +def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1, + verbose=False): """ Set an 'optimal' number of bins using the Freedman-Diaconis rule. Parameters ---------- - datain : numpy.ndarray or numpy.ma.MaskedArray + datain : `numpy.ndarray` or `numpy.ma.MaskedArray` The data for which we want to set the bin_size. - binmin : float + binmin : `float` The minimum bin value to consider (if None, uses minimum data value). - binmax : float + binmax : `float` The maximum bin value to consider (if None, uses maximum data value). - nbin_max : int - The maximum number of bins to create. Sometimes the 'optimal bin_size' implies - an unreasonably large number of bins, if the data distribution is unusual. - nbin_min : int + nbin_max : `int` + The maximum number of bins to create. + Sometimes the 'optimal bin_size' implies an unreasonably large number + of bins, if the data distribution is unusual. + nbin_min : `int` The minimum number of bins to create. Default is 1. + verbose : `bool` + Turn off warning messages. This utility very often raises warnings + and these should likely be logging messages at a lower logging level, + but for now - just use the verbose flag to turn these off or on. Returns ------- - int + nbins : `int` The number of bins. """ # if it's a masked array, only use unmasked values @@ -131,47 +139,51 @@ def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1): # Check if any data points remain within binmin/binmax. if np.size(data[cond]) == 0: nbins = nbin_max - warnings.warn( - "No data available for calculating optimal bin size within range of %f, %f" % (binmin, binmax) - + ": returning %i bins" % (nbins) - ) + if verbose: + warnings.warn( + f"No data available for calculating optimal bin size within range of " + f"({binmin}, {binmax}): returning {nbins} bins" + ) else: iqr = np.percentile(data[cond], 75) - np.percentile(data[cond], 25) binwidth = 2 * iqr * (np.size(data[cond]) ** (-1.0 / 3.0)) nbins = (binmax - binmin) / binwidth if nbins > nbin_max: - warnings.warn( - "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_max) - ) + if verbose: + warnings.warn( + "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_max) + ) nbins = nbin_max if nbins < nbin_min: - warnings.warn( - "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_min) - ) + if verbose: + warnings.warn( + "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_min) + ) nbins = nbin_min if np.isnan(nbins): - warnings.warn("Optimal bin calculation calculated NaN: returning %i" % (nbin_max)) + if verbose: + warnings.warn("Optimal bin calculation calculated NaN: returning %i" % (nbin_max)) nbins = nbin_max return int(nbins) def percentile_clipping(data, percentile=95.0): """ - Calculate the minimum and maximum values of a distribution of points, after - discarding data more than 'percentile' from the median. + Calculate the minimum and maximum values of a distribution of points, + after discarding data more than 'percentile' from the median. This is useful for determining useful data ranges for plots. Note that 'percentile' percent of the data is retained. Parameters ---------- - data : numpy.ndarray + data : `numpy.ndarray`, (N,) The data to clip. - percentile : float + percentile : `float` Retain values within percentile of the median. Returns ------- - float, float + min_value, max_value : `float`, `float` The minimum and maximum values of the clipped data. """ lower_percentile = (100 - percentile) / 2.0 @@ -187,18 +199,19 @@ def radec2pix(nside, ra, dec): Parameters ---------- - nside : int + nside : `int` The nside value of the healpix grid. - ra : numpy.ndarray + ra : `numpy.ndarray`, (N,) The RA values to be converted to healpix ids, in radians. - dec : numpy.ndarray + dec : `numpy.ndarray`, (N,) The Dec values to be converted to healpix ids, in radians. Returns ------- - numpy.ndarray + hpid : `numpy.ndarray`, (N,) The healpix ids. """ lat = np.pi / 2.0 - dec hpid = hp.ang2pix(nside, lat, ra) return hpid + diff --git a/rubin_sim/maf/utils/opsim_utils.py b/rubin_sim/maf/utils/opsim_utils.py index 5f8ae588b..ae8299264 100644 --- a/rubin_sim/maf/utils/opsim_utils.py +++ b/rubin_sim/maf/utils/opsim_utils.py @@ -1,4 +1,3 @@ -# Collection of utilities for MAF that relate to Opsim specifically. __all__ = ( "get_sim_data", "scale_benchmarks", @@ -22,29 +21,33 @@ def get_sim_data( table_name=None, full_sql_query=None, ): - """Query an opsim database for the needed data columns and run any required stackers. + """Query an opsim database for the needed data columns + and run any required stackers. Parameters ---------- db_con : `str` or SQLAlchemy connectable, or sqlite3 connection - Filename to a sqlite3 file, or a connection object that can be used by pandas.read_sql + Filename to a sqlite3 file, or a connection object that + can be used by pandas.read_sql sqlconstraint : `str` or None - SQL constraint to apply to query for observations. Ignored if full_sql_query is set. + SQL constraint to apply to query for observations. + Ignored if full_sql_query is set. dbcols : `list` [`str`] Columns required from the database. Ignored if full_sql_query is set. stackers : `list` [`rubin_sim.maf.stackers`], optional Stackers to be used to generate additional columns. Default None. table_name : `str` (None) - Name of the table to query. Default None will try "observations" and "SummaryAllProps". + Name of the table to query. + Default None will try "observations". Ignored if full_sql_query is set. full_sql_query : `str` The full SQL query to use. Overrides sqlconstraint, dbcols, tablename. Returns ------- - sim_data: `np.ndarray` - A numpy structured array with columns resulting from dbcols + stackers, for observations matching - the SQLconstraint. + sim_data : `np.ndarray` + A numpy structured array with columns resulting from dbcols + stackers, + for observations matching the SQLconstraint. """ if sqlconstraint is None: sqlconstraint = "" @@ -69,7 +72,8 @@ def get_sim_data( else: ValueError("Could not guess table_name, set with table_name or full_sql_query kwargs") elif (table_name is None) & (full_sql_query is None): - # If someone passes in a connection object with an old table_name things will fail + # If someone passes in a connection object with an old table_name + # things will fail # that's probably fine, keep people from getting fancy with old sims table_name = "observations" @@ -84,8 +88,6 @@ def get_sim_data( col_str += colname + ", " col_str = col_str[0:-2] + " " - # Need to guess "observations" and "SummaryAllProps" for the table name - # to be backwards compatible I guess query = "SELECT %s FROM %s" % (col_str, table_name) if len(sqlconstraint) > 0: query += " WHERE %s" % (sqlconstraint) @@ -106,25 +108,24 @@ def get_sim_data( def scale_benchmarks(run_length, benchmark="design"): - """ - Set the design and stretch values of the number of visits, area of the footprint, - seeing values, FWHMeff values, skybrightness, and single visit depth (based on SRD values). + """Set design and stretch values of the number of visits or + area of the footprint or seeing/Fwhmeff/skybrightness and single visit + depth (based on SRD values). Scales number of visits for the length of the run, relative to 10 years. Parameters ---------- - run_length : float + run_length : `float` The length (in years) of the run. - benchmark : str + benchmark : `str` design or stretch - which version of the SRD values to return. - requested is another option, in which case the values of the number of visits requested - by the OpSim run (recorded in the Config table) is returned. Returns ------- benchmarks: `dict` of floats - A dictionary containing the number of visits, area of footprint, seeing and FWHMeff values, - skybrightness and single visit depth for either the design or stretch SRD values. + A dictionary containing the number of visits, area of footprint, + seeing and FWHMeff values, skybrightness and single visit depth + for either the design or stretch SRD values. """ # Set baseline (default) numbers for the baseline survey length (10 years). baseline = 10.0 @@ -207,7 +208,8 @@ def scale_benchmarks(run_length, benchmark="design"): # Scale the number of visits. if run_length != baseline: scalefactor = float(run_length) / float(baseline) - # Calculate scaled value for design and stretch values of nvisits, per filter. + # Calculate scaled value for design and stretch values of nvisits, + # per filter. for f in design["nvisits"]: design["nvisits"][f] = int(np.floor(design["nvisits"][f] * scalefactor)) stretch["nvisits"][f] = int(np.floor(stretch["nvisits"][f] * scalefactor)) @@ -221,19 +223,19 @@ def scale_benchmarks(run_length, benchmark="design"): def calc_coadded_depth(nvisits, single_visit_depth): - """ - Calculate the coadded depth expected for a given number of visits and single visit depth. + """Calculate the coadded depth expected for a given number of visits + and single visit depth. Parameters ---------- - nvisits : dict of ints or floats + nvisits : `dict` of `int` or `float` Dictionary (per filter) of number of visits - single_visit_depth : dict of floats + single_visit_depth : `dict` of `float` Dictionary (per filter) of the single visit depth Returns ------- - dict of floats + coadded_depth : `dict` of `float` Dictionary of coadded depths per filter. """ coadded_depth = {} From 136d12b52c46d2954a5dbe79929c26bf49d1bd1f Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:06:01 -0700 Subject: [PATCH 05/21] Export 'coadd_m5' and only print (common) warning with verbose flag --- rubin_sim/maf/utils/maf_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rubin_sim/maf/utils/maf_utils.py b/rubin_sim/maf/utils/maf_utils.py index 9f388b202..6bde8023b 100644 --- a/rubin_sim/maf/utils/maf_utils.py +++ b/rubin_sim/maf/utils/maf_utils.py @@ -2,6 +2,7 @@ "optimal_bins", "percentile_clipping", "radec2pix", + "coadd_m5", "collapse_night", "load_inst_zeropoints", ) @@ -126,9 +127,10 @@ def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1, # Check that any good data values remain. if data.size == 0: nbins = nbin_max - warnings.warn( - "No unmasked data available for calculating optimal bin size: returning %i bins" % (nbins) - ) + if verbose: + warnings.warn( + f"No unmasked data available for calculating optimal bin size: returning {nbins} bins" + ) # Else proceed. else: if binmin is None: From 8479bda5ab02b4a67d67a663bb6bca90d4669fc4 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:08:00 -0700 Subject: [PATCH 06/21] Only print (common) warnings with verbose flag --- rubin_sim/maf/utils/maf_utils.py | 36 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/rubin_sim/maf/utils/maf_utils.py b/rubin_sim/maf/utils/maf_utils.py index 6bde8023b..f4dcb094b 100644 --- a/rubin_sim/maf/utils/maf_utils.py +++ b/rubin_sim/maf/utils/maf_utils.py @@ -141,51 +141,47 @@ def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1, # Check if any data points remain within binmin/binmax. if np.size(data[cond]) == 0: nbins = nbin_max - if verbose: - warnings.warn( - f"No data available for calculating optimal bin size within range of " - f"({binmin}, {binmax}): returning {nbins} bins" - ) + warnings.warn( + "No data available for calculating optimal bin size within range of %f, %f" % (binmin, binmax) + + ": returning %i bins" % (nbins) + ) else: iqr = np.percentile(data[cond], 75) - np.percentile(data[cond], 25) binwidth = 2 * iqr * (np.size(data[cond]) ** (-1.0 / 3.0)) nbins = (binmax - binmin) / binwidth if nbins > nbin_max: - if verbose: - warnings.warn( - "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_max) - ) + warnings.warn( + "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_max) + ) nbins = nbin_max if nbins < nbin_min: - if verbose: - warnings.warn( - "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_min) - ) + warnings.warn( + "Optimal bin calculation tried to make %.0f bins, returning %i" % (nbins, nbin_min) + ) nbins = nbin_min if np.isnan(nbins): - if verbose: - warnings.warn("Optimal bin calculation calculated NaN: returning %i" % (nbin_max)) + warnings.warn("Optimal bin calculation calculated NaN: returning %i" % (nbin_max)) nbins = nbin_max return int(nbins) def percentile_clipping(data, percentile=95.0): """ - Calculate the minimum and maximum values of a distribution of points, - after discarding data more than 'percentile' from the median. + Calculate the minimum and maximum values of a distribution of points, after + discarding data more than 'percentile' from the median. This is useful for determining useful data ranges for plots. Note that 'percentile' percent of the data is retained. Parameters ---------- - data : `numpy.ndarray`, (N,) + data : numpy.ndarray The data to clip. - percentile : `float` + percentile : float Retain values within percentile of the median. Returns ------- - min_value, max_value : `float`, `float` + float, float The minimum and maximum values of the clipped data. """ lower_percentile = (100 - percentile) / 2.0 From ba0b791c05ef2601ecfcc3b06283383f48e3403c Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:08:31 -0700 Subject: [PATCH 07/21] Let unislicer carry "slicepoint" information if needed. --- rubin_sim/maf/slicers/uni_slicer.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/rubin_sim/maf/slicers/uni_slicer.py b/rubin_sim/maf/slicers/uni_slicer.py index 4ebda3a03..88407b69b 100644 --- a/rubin_sim/maf/slicers/uni_slicer.py +++ b/rubin_sim/maf/slicers/uni_slicer.py @@ -35,7 +35,17 @@ def setup_slicer(self, sim_data, maps=None): def _slice_sim_data(islice): """Return all indexes in sim_data.""" idxs = self.indices - return {"idxs": idxs, "slice_point": {"sid": islice}} + slice_point = {"sid": islice} + for key in self.slice_points: + if len(np.shape(self.slice_points[key])) == 0: + keyShape = 0 + else: + keyShape = np.shape(self.slice_points[key])[0] + if keyShape == self.nslice: + slice_point[key] = self.slice_points[key][islice] + else: + slice_point[key] = self.slice_points[key] + return {"idxs": idxs, "slice_point": slice_point} setattr(self, "_slice_sim_data", _slice_sim_data) From 6b56cf3263fafe4b5e2a5f7c321bcd92757f55b5 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:09:34 -0700 Subject: [PATCH 08/21] Change how "filled" flag works Instead of trying to make a filled bar plot (which wasn't actually used), now "filled_data" fills the metric_values with their fill value (instead of only allowing masking). --- rubin_sim/maf/plots/oned_plotters.py | 56 ++++++++++++---------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/rubin_sim/maf/plots/oned_plotters.py b/rubin_sim/maf/plots/oned_plotters.py index ff7cf0044..7845c63fb 100644 --- a/rubin_sim/maf/plots/oned_plotters.py +++ b/rubin_sim/maf/plots/oned_plotters.py @@ -20,7 +20,7 @@ def __init__(self): "label": None, "xlabel": None, "ylabel": None, - "filled": False, + "filled_data": False, "alpha": 0.5, "linestyle": "-", "linewidth": 1, @@ -53,41 +53,33 @@ def __call__(self, metric_values, slicer, user_plot_dict, fig=None): # Plot the histogrammed data. leftedge = slicer.slice_points["bins"][:-1] width = np.diff(slicer.slice_points["bins"]) - if plot_dict["filled"]: - plt.bar( - leftedge, - metric_values.filled(), - width, - label=plot_dict["label"], - linewidth=0, - alpha=plot_dict["alpha"], - log=plot_dict["log_scale"], - color=plot_dict["color"], - ) + if plot_dict["filled_data"]: + x = np.ravel(list(zip(leftedge, leftedge + width))) + y = np.ravel(list(zip(metric_values.filled(), metric_values.filled()))) else: good = np.where(~metric_values.mask) x = np.ravel(list(zip(leftedge[good], leftedge[good] + width[good]))) y = np.ravel(list(zip(metric_values[good], metric_values[good]))) - if plot_dict["log_scale"]: - plt.semilogy( - x, - y, - label=plot_dict["label"], - color=plot_dict["color"], - linestyle=plot_dict["linestyle"], - linewidth=plot_dict["linewidth"], - alpha=plot_dict["alpha"], - ) - else: - plt.plot( - x, - y, - label=plot_dict["label"], - color=plot_dict["color"], - linestyle=plot_dict["linestyle"], - linewidth=plot_dict["linewidth"], - alpha=plot_dict["alpha"], - ) + if plot_dict["log_scale"]: + plt.semilogy( + x, + y, + label=plot_dict["label"], + color=plot_dict["color"], + linestyle=plot_dict["linestyle"], + linewidth=plot_dict["linewidth"], + alpha=plot_dict["alpha"], + ) + else: + plt.plot( + x, + y, + label=plot_dict["label"], + color=plot_dict["color"], + linestyle=plot_dict["linestyle"], + linewidth=plot_dict["linewidth"], + alpha=plot_dict["alpha"], + ) if "ylabel" in plot_dict: plt.ylabel(plot_dict["ylabel"], fontsize=plot_dict["fontsize"]) if "xlabel" in plot_dict: From 265f9d89931b1546697e981882eb4125560797f6 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:10:40 -0700 Subject: [PATCH 09/21] Add coadd-per-night metric (intended for DDFs primarily) --- rubin_sim/maf/metrics/tgaps.py | 45 +++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/rubin_sim/maf/metrics/tgaps.py b/rubin_sim/maf/metrics/tgaps.py index 3adb2ca7c..8d3c0733a 100644 --- a/rubin_sim/maf/metrics/tgaps.py +++ b/rubin_sim/maf/metrics/tgaps.py @@ -4,12 +4,14 @@ "TgapsPercentMetric", "NightgapsMetric", "NVisitsPerNightMetric", + "CoaddM5PerNightMetric", "MaxGapMetric", "NightTimespanMetric", ) import numpy as np - +from scipy.stats import binned_statistic +from ..utils import coadd_m5 from .base_metric import BaseMetric @@ -260,6 +262,47 @@ def run(self, data_slice, slice_point=None): return result +class CoaddM5PerNightMetric(BaseMetric): + """Histogram the coadded depth of visits in each night. + + Splits the visits by night, then histograms the coadded depth. + + Parameters + ---------- + night_col : `str`, optional + The column name for the night of each observation. + Default 'night'. + m5_col : `str`, optional + The column name for the five sigma depth of each observation. + bins : `np.ndarray`, (N,) optional + The bins to use for the histogram of magnitude values. + + Returns + ------- + histogram : `np.ndarray` + Returns a histogram of the coadded depth per night at each slice point; + these histograms can be combined and plotted using the + 'SummaryHistogram plotter'. + """ + + def __init__(self, night_col="night", m5_col="fiveSigmaDepth", bins=np.arange(20, 25, 0.05), + units="mag", **kwargs): + # Pass the same bins to the plotter. + self.night_col = night_col + self.m5_col = m5_col + self.bins = bins + super().__init__(col=[self.night_col, self.m5_col], metric_dtype="object", units=units, **kwargs) + + def run(self, data_slice, slice_point=None): + m5_per_night, be, bn = binned_statistic(data_slice[self.night_col], data_slice[self.m5_col], + statistic=coadd_m5, + bins=np.arange(0, 3653, 1)) + # Drop the nights with no observations (-inf) + m5_per_night = m5_per_night[np.where(m5_per_night > 0)] + result, bins = np.histogram(m5_per_night, self.bins) + return result + + class MaxGapMetric(BaseMetric): """Find the maximum gap (in days) in between successive observations. From f8dcf88250a0be6ee1bb3cf2db5f5556db083edd Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:10:55 -0700 Subject: [PATCH 10/21] Line length and doc string format. --- rubin_sim/maf/metrics/tgaps.py | 89 ++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/rubin_sim/maf/metrics/tgaps.py b/rubin_sim/maf/metrics/tgaps.py index 8d3c0733a..799eada82 100644 --- a/rubin_sim/maf/metrics/tgaps.py +++ b/rubin_sim/maf/metrics/tgaps.py @@ -15,16 +15,23 @@ from .base_metric import BaseMetric + class GapsMetric(BaseMetric): - """Compute the number of times a gaps of a specified timescale (+/- 1/2 timescale) is sampled. + """Compute the number of times a gaps of a specified timescale + (+/- 1/2 timescale) is sampled. Parameters ---------- - times_col : `str` (observationStartMJD) + times_col : `str`, optional The column name for the exposure times. Values assumed to be in days. - time_scale : `float` (2) - Time scale to see how well it is sampled (hours). For example, the default of 2 hours means - observations spaced anywhere between 1 and 3 hours will count as a sample. + Default is 'observationStartMJD'. + time_scale : `float`, optional + Time scale to see how well it is sampled (hours). + For example, the default of 2 hours means + observations spaced anywhere between 1 and 3 hours apart + will count as a sample. Default 2 hours. + units : `str`, optional + Default units to use for plots, etc. Default is "N" (for number). """ def __init__( @@ -58,9 +65,10 @@ class TgapsMetric(BaseMetric): Measure the gaps between observations. By default, only gaps - between neighboring visits are computed. If all_gaps is set to true, all gaps are - computed (i.e., if there are observations at 10, 20, 30 and 40 the default will - return a histogram of [10,10,10] while all_gaps returns a histogram of [10,10,10,20,20,30]) + between neighboring visits are computed. If all_gaps is set to true, + all gaps are computed (i.e., if there are observations at 10, 20, 30 + and 40 the default will return a histogram of [10,10,10] while + all_gaps returns a histogram of [10,10,10,20,20,30]) Parameters ---------- @@ -68,17 +76,21 @@ class TgapsMetric(BaseMetric): The column name for the exposure times. Values assumed to be in days. Default observationStartMJD. all_gaps : `bool`, optional - Histogram the gaps between all observations (True) or just successive observations (False)? - Default is False. If all gaps are used, this metric can become significantly slower. + Histogram the gaps between all observations (True) or just successive + observations (False)? + Default is False. + If all gaps are used, this metric can become significantly slower. bins : `np.ndarray`, optional - The bins to use for the histogram of time gaps (in days, or same units as times_col). + The bins to use for the histogram of time gaps + (in days, or same units as times_col). Default values are bins from 0 to 2 hours, in 5 minute intervals. Returns ------- histogram : `np.ndarray` Returns a histogram of the tgaps at each slice point; - these histograms can be combined and plotted using the 'SummaryHistogram plotter'. + these histograms can be combined and plotted using the + 'SummaryHistogram plotter'. """ def __init__( @@ -111,15 +123,22 @@ def run(self, data_slice, slice_point=None): class TgapsPercentMetric(BaseMetric): - """Compute the fraction of the time gaps between observations that occur in a given time range. + """Compute the fraction of the time gaps between observations that occur + in a given time range. Measure the gaps between observations. By default, only gaps - between neighboring visits are computed. If all_gaps is set to true, all gaps are - computed (i.e., if there are observations at 10, 20, 30 and 40 the default will - Compute the percent of gaps between specified endpoints. + between neighboring visits are computed. If all_gaps is set to true, + all gaps are computed (i.e., if there are observations at 10, 20, 30 and + 40 the default will Compute the percent of gaps between specified endpoints. - This is different from the TgapsMetric in that this only looks at what percent of intervals fall - into the specified range, rather than histogramming the entire set of tgaps. + This is different from the TgapsMetric in that this only looks at + what percent of intervals fall into the specified range, rather than + histogramming the entire set of tgaps. + + This metric has drawbacks in that the result is tied to the overall + number of tgaps (i.e. a result of 100 could still be worse than + a different simulation with a result of 50, in terms of how often a + particular Tgap is sampled). Parameters ---------- @@ -127,8 +146,10 @@ class TgapsPercentMetric(BaseMetric): The column name for the exposure times. Values assumed to be in days. Default observationStartMJD. all_gaps : `bool`, opt - Histogram the gaps between all observations (True) or just successive observations (False)? - Default is False. If all gaps are used, this metric can become significantly slower. + Histogram the gaps between all observations (True) or + just successive observations (False)? + Default is False. If all gaps are used, this metric + can become significantly slower. min_time : `float`, opt Minimum time of gaps to include (days). Default 2/24 (2 hours). max_time : `float`, opt @@ -177,8 +198,9 @@ class NightgapsMetric(BaseMetric): Measure the gaps between observations. By default, only gaps - between neighboring visits are computed. If all_gaps is set to true, all gaps are - computed (i.e., if there are observations at 10, 20, 30 and 40 the default will + between neighboring visits are computed. If all_gaps is set to true, + all gaps are computed (i.e., if there are observations at 10, 20, 30 and + 40 the default will histogram [10,10,10] while all_gaps histograms [10,10,10,20,20,30]) Parameters @@ -187,17 +209,21 @@ class NightgapsMetric(BaseMetric): The column name for the night of each observation. Default 'night'. all_gaps : `bool`, optional - Histogram the gaps between all observations (True) or just successive observations (False)? - Default is False. If all gaps are used, this metric can become significantly slower. + Histogram the gaps between all observations (True) or just successive + observations (False)? + Default is False. If all gaps are used, this metric can become + significantly slower. bins : `np.ndarray`, optional - The bins to use for the histogram of time gaps (in days, or same units as timesCol). + The bins to use for the histogram of time gaps (in days, or same + units as timesCol). Default values are bins from 0 to 10 days, in 1 day intervals. Returns ------- histogram : `np.ndarray` Returns a histogram of the deltaT between nights at each slice point; - these histograms can be combined and plotted using the 'SummaryHistogram plotter'. + these histograms can be combined and plotted using the + 'SummaryHistogram plotter'. """ def __init__( @@ -232,7 +258,8 @@ def run(self, data_slice, slice_point=None): class NVisitsPerNightMetric(BaseMetric): """Histogram the number of visits in each night. - Splits the visits by night, then histograms how many visits occur in each night. + Splits the visits by night, then histograms how many visits occur + in each night. Parameters ---------- @@ -240,14 +267,16 @@ class NVisitsPerNightMetric(BaseMetric): The column name for the night of each observation. Default 'night'. bins : `np.ndarray`, optional - The bins to use for the histogram of time gaps (in days, or same units as timesCol). + The bins to use for the histogram of time gaps (in days, or same + units as timesCol). Default values are bins from 0 to 5 visits, in steps of 1. Returns ------- histogram : `np.ndarray` - Returns a histogram of the number of visits per night at each slice point; - these histograms can be combined and plotted using the 'SummaryHistogram plotter'. + Returns a histogram of the number of visits per night at each + slice point; these histograms can be combined and plotted using the + 'SummaryHistogram plotter'. """ def __init__(self, night_col="night", bins=np.arange(0, 10, 1), units="#", **kwargs): From c70b18ae1fff7fccd1806d9ef58eb2ba0225815f Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 12:12:05 -0700 Subject: [PATCH 11/21] Sort of a hack to allow the "Identity" metric automatically applied to UniSlicer metrics to not fail when metric returns a histogram (ie. for the SummaryHistogram plotter). --- rubin_sim/maf/db/results_db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rubin_sim/maf/db/results_db.py b/rubin_sim/maf/db/results_db.py index d409873ed..f4185ccc5 100644 --- a/rubin_sim/maf/db/results_db.py +++ b/rubin_sim/maf/db/results_db.py @@ -574,7 +574,7 @@ def update_summary_stat(self, metric_id, summary_name, summary_value, ntry=3, pa # np.ndarray with 'name' and 'value' columns. self.open() tries = 0 - if isinstance(summary_value, np.ndarray): + if isinstance(summary_value, np.ndarray) and summary_value.dtype.names is not None: if ("name" in summary_value.dtype.names) and ("value" in summary_value.dtype.names): for value in summary_value: sSuffix = value["name"] From 41581978d251244d33388f324158c698815aef01 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 20 May 2024 16:53:52 -0700 Subject: [PATCH 12/21] Update the DDF metric batch. NvisitsPerNight now works on Unislicer (receiving all visits per DDF, not just those at the center). Added CoaddPerNight metric. Fixed SeasonLength plot labels. --- rubin_sim/maf/batches/ddf_batch.py | 102 ++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 24 deletions(-) diff --git a/rubin_sim/maf/batches/ddf_batch.py b/rubin_sim/maf/batches/ddf_batch.py index 7a283c1d4..d09c58c65 100644 --- a/rubin_sim/maf/batches/ddf_batch.py +++ b/rubin_sim/maf/batches/ddf_batch.py @@ -232,23 +232,8 @@ def ddfBatch( ) # Weak lensing visits - # The "magic numbers" here scale the final depth into - # approximately consistent visits per year - final depth is - # determined by arbitrary definition of 'good sample' lim_ebv = 0.2 - offset = 0.1 - mag_cuts = { - 1: 24.75 - offset, - 2: 25.12 - offset, - 3: 25.35 - offset, - 4: 25.5 - offset, - 5: 25.62 - offset, - 6: 25.72 - offset, - 7: 25.8 - offset, - 8: 25.87 - offset, - 9: 25.94 - offset, - 10: 26.0 - offset, - } + mag_cuts = 26.0 displayDict["group"] = "Weak Lensing" displayDict["subgroup"] = "" displayDict["caption"] = f"Weak lensing metric in the {fieldname} DDF." @@ -261,7 +246,7 @@ def ddfBatch( for sql in sqls_gri: metric = maf.WeakLensingNvisits( lsst_filter="i", - depth_cut=mag_cuts[10], + depth_cut=mag_cuts, ebvlim=lim_ebv, min_exp_time=20.0, metric_name="WeakLensingNvisits_" + sql, @@ -389,7 +374,7 @@ def ddfBatch( display_dict=displayDict, ) ) - + ####### # Coadded depth per filter, and count per filter displayDict["group"] = "Basics" for f in "ugrizy": @@ -464,11 +449,22 @@ def ddfBatch( ) ) - # Now to compute some things at just the center of the DDF + # Now to compute some things ~~at just the center of the DDF~~ NOPE + # (will compute these "per DDF" not just at the center, since + # the dithering pattern is not yet set and that will influence the + # result -- once dithering is better determined, could add ptslicer). # For these metrics, add a requirement that the 'note' label # match the DDF, to avoid WFD visits skewing the results # (we want to exclude non-DD visits), - ptslicer = maf.UserPointsSlicer(np.mean(ddfs[ddf]["ra"]), np.mean(ddfs[ddf]["dec"])) + + if fieldname == "WFD": + ptslicer = maf.UserPointsSlicer(np.mean(ddfs[ddf]["ra"]), np.mean(ddfs[ddf]["dec"])) + else: + ptslicer = maf.UniSlicer() # rely on query to remove non-DD visits + # Add RA and Dec to slice_point data (for season calculations) + # slice_points store ra/dec internally in radians. + ptslicer.slice_points["ra"] = np.radians(np.mean(ddfs[ddf]["ra"])) + ptslicer.slice_points["dec"] = np.radians(np.mean(ddfs[ddf]["dec"])) displayDict["group"] = "Cadence" displayDict["order"] = order @@ -488,7 +484,7 @@ def ddfBatch( displayDict["subgroup"] = "Sequence length" # Number of observations per night, any filter (sequence length) - # Histogram the number of visits per night at the center of the DDF + # Histogram the number of visits per night countbins = np.arange(0, 200, 5) metric = maf.NVisitsPerNightMetric( night_col="night", @@ -497,7 +493,7 @@ def ddfBatch( ) plotDict = {"bins": countbins, "xlabel": "Number of visits per night"} displayDict["caption"] = ( - f"Histogram of the number of visits in each night, at the center of {fieldname}." + f"Histogram of the number of visits in each night per DDF." ) plotFunc = maf.SummaryHistogram() bundle = maf.MetricBundle( @@ -511,17 +507,49 @@ def ddfBatch( ) bundle_list.append(bundle) + # Coadded depth of observations per night, each filter + # "magic numbers" to fill plot come from baseline v3.4 + min_coadds = {'u': 22.3, 'g': 22.3, 'r': 22.9, 'i': 23.1, 'z': 21.7, 'y': 21.5} + max_coadds = {'u': 26, 'g': 27.2, 'r': 27, 'i': 26.5, 'z': 26.5, 'y': 25.1} + # Histogram the coadded depth per night, per filter + for f in 'ugrizy': + magbins = np.arange(min_coadds[f], max_coadds[f], 0.05) + metric = maf.CoaddM5PerNightMetric( + night_col="night", + m5_col="fiveSigmaDepth", + bins=magbins, + metric_name=f"{fieldname} CoaddM5PerNight", + ) + plotDict = {"bins": magbins, "xlabel": "Coadded Depth Per Night"} + displayDict["caption"] = ( + f"Histogram of the coadded depth in {f} in each night per DDF." + ) + plotFunc = maf.SummaryHistogram() + bundle = maf.MetricBundle( + metric, + ptslicer, + fieldsqls[f], + info_label=info_labels[f], + plot_dict=plotDict, + display_dict=displayDict, + plot_funcs=[plotFunc], + ) + bundle_list.append(bundle) + + # Plot of number of visits per night over time if fieldname.endswith("WFD"): pass else: displayDict["caption"] = f"Number of visits per night for {fieldname}." metric = maf.CountMetric("observationStartMJD", metric_name=f"{fieldname} Nvisits Per Night") - slicer = maf.OneDSlicer(slice_col_name="night", bin_size=1) + slicer = maf.OneDSlicer(slice_col_name="night", bin_size=1, badval=0) + plot_dict = {'filled_data': True} bundle = maf.MetricBundle( metric, slicer, fieldsqls["all"], info_label=info_labels["all"], + plot_dict=plot_dict, display_dict=displayDict, summary_metrics=[ maf.MedianMetric(), @@ -532,6 +560,31 @@ def ddfBatch( ) bundle_list.append(bundle) + # Likewise, but coadded depth per filter + if fieldname.endswith("WFD"): + pass + else: + for f in 'ugrizy': + displayDict["caption"] = f"Coadded depth per night for {fieldname} in band {f}." + metric = maf.Coaddm5Metric(metric_name=f"{fieldname} CoaddedM5 Per Night") + slicer = maf.OneDSlicer(slice_col_name="night", bin_size=1, badval=min_coadds[f]) + plot_dict = {'filled_data': True} + bundle = maf.MetricBundle( + metric, + slicer, + fieldsqls[f], + info_label=info_labels[f], + plot_dict=plot_dict, + display_dict=displayDict, + summary_metrics=[ + maf.MedianMetric(), + maf.PercentileMetric(percentile=80, metric_name="80thPercentile"), + maf.MinMetric(), + maf.MaxMetric(), + ], + ) + bundle_list.append(bundle) + displayDict["subgroup"] = "Sequence gaps" # Histogram of the number of nights between visits, all filters @@ -582,12 +635,13 @@ def rfunc(simdata): # Sometimes number of seasons is 10, sometimes 11 # (depending on where survey starts/end) # so normalize it so there's always 11 values + # by adding 0 at the end. if len(simdata) < 11: simdata = np.concatenate([simdata, np.array([0], float)]) return simdata metric = maf.SeasonLengthMetric(reduce_func=rfunc, metric_dtype="object") - plotDict = {"bins": np.arange(0, 12), "xlabel": "Season length (days)"} + plotDict = {"bins": np.arange(0, 12), "ylabel": "Season length (days)", "xlabel": "Season"} plotFunc = maf.SummaryHistogram() displayDict["caption"] = f"Plot of the season length per season in the {fieldname} DDF." displayDict["order"] = order From 595310df7c833866fd9ea672c4a76b0fc06fb82d Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Tue, 11 Jun 2024 13:43:54 -0700 Subject: [PATCH 13/21] Warning if no pyoorb --- rubin_sim/moving_objects/ooephemerides.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/rubin_sim/moving_objects/ooephemerides.py b/rubin_sim/moving_objects/ooephemerides.py index 9c61de20b..b077b5515 100644 --- a/rubin_sim/moving_objects/ooephemerides.py +++ b/rubin_sim/moving_objects/ooephemerides.py @@ -7,7 +7,11 @@ import numpy as np import pandas as pd -import pyoorb as oo + +try: + import pyoorb as oo +except ModuleNotFoundError: + NO_PYOORB = True def dtime(time_prev): @@ -56,6 +60,11 @@ class PyOrbEphemerides: """ def __init__(self, ephfile=None): + + if NO_PYOORB: + warnings.warn("No pyoorb available, use another ephemeris generator.") + raise ModuleNotFoundError + # Set translation from timescale to OpenOrb numerical representation. # Note all orbits are assumed to be in TT timescale. # Also, all dates are expected to be in MJD. From 1aae39b9efdf6d26270dcd11d3cd629791531422 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Tue, 21 May 2024 11:02:41 -0700 Subject: [PATCH 14/21] Black/isort/ruff Black fix test failures Black again Ruff updates in import names --- rubin_sim/maf/batches/ddf_batch.py | 22 +++++++--------- rubin_sim/maf/metadata_dir.py | 1 - rubin_sim/maf/metrics/tgaps.py | 25 ++++++++++++------- rubin_sim/maf/run_moving_calc.py | 8 +++--- rubin_sim/maf/run_moving_fractions.py | 4 +-- rubin_sim/maf/scimaf_dir.py | 4 +-- .../maf/slicers/healpix_subset_slicer.py | 19 +++++++++----- rubin_sim/maf/utils/maf_utils.py | 11 +++----- rubin_sim/moving_objects/cheby_values.py | 2 +- rubin_sim/moving_objects/ooephemerides.py | 6 +++-- rubin_sim/moving_objects/orbits.py | 4 +-- tests/maf/test_archive.py | 2 +- tests/moving_objects/test_ephemerides.py | 2 +- 13 files changed, 59 insertions(+), 51 deletions(-) diff --git a/rubin_sim/maf/batches/ddf_batch.py b/rubin_sim/maf/batches/ddf_batch.py index d09c58c65..9caa2ee84 100644 --- a/rubin_sim/maf/batches/ddf_batch.py +++ b/rubin_sim/maf/batches/ddf_batch.py @@ -460,7 +460,7 @@ def ddfBatch( if fieldname == "WFD": ptslicer = maf.UserPointsSlicer(np.mean(ddfs[ddf]["ra"]), np.mean(ddfs[ddf]["dec"])) else: - ptslicer = maf.UniSlicer() # rely on query to remove non-DD visits + ptslicer = maf.UniSlicer() # rely on query to remove non-DD visits # Add RA and Dec to slice_point data (for season calculations) # slice_points store ra/dec internally in radians. ptslicer.slice_points["ra"] = np.radians(np.mean(ddfs[ddf]["ra"])) @@ -492,9 +492,7 @@ def ddfBatch( metric_name=f"{fieldname} NVisitsPerNight", ) plotDict = {"bins": countbins, "xlabel": "Number of visits per night"} - displayDict["caption"] = ( - f"Histogram of the number of visits in each night per DDF." - ) + displayDict["caption"] = "Histogram of the number of visits in each night per DDF." plotFunc = maf.SummaryHistogram() bundle = maf.MetricBundle( metric, @@ -509,10 +507,10 @@ def ddfBatch( # Coadded depth of observations per night, each filter # "magic numbers" to fill plot come from baseline v3.4 - min_coadds = {'u': 22.3, 'g': 22.3, 'r': 22.9, 'i': 23.1, 'z': 21.7, 'y': 21.5} - max_coadds = {'u': 26, 'g': 27.2, 'r': 27, 'i': 26.5, 'z': 26.5, 'y': 25.1} + min_coadds = {"u": 22.3, "g": 22.3, "r": 22.9, "i": 23.1, "z": 21.7, "y": 21.5} + max_coadds = {"u": 26, "g": 27.2, "r": 27, "i": 26.5, "z": 26.5, "y": 25.1} # Histogram the coadded depth per night, per filter - for f in 'ugrizy': + for f in "ugrizy": magbins = np.arange(min_coadds[f], max_coadds[f], 0.05) metric = maf.CoaddM5PerNightMetric( night_col="night", @@ -521,9 +519,7 @@ def ddfBatch( metric_name=f"{fieldname} CoaddM5PerNight", ) plotDict = {"bins": magbins, "xlabel": "Coadded Depth Per Night"} - displayDict["caption"] = ( - f"Histogram of the coadded depth in {f} in each night per DDF." - ) + displayDict["caption"] = f"Histogram of the coadded depth in {f} in each night per DDF." plotFunc = maf.SummaryHistogram() bundle = maf.MetricBundle( metric, @@ -543,7 +539,7 @@ def ddfBatch( displayDict["caption"] = f"Number of visits per night for {fieldname}." metric = maf.CountMetric("observationStartMJD", metric_name=f"{fieldname} Nvisits Per Night") slicer = maf.OneDSlicer(slice_col_name="night", bin_size=1, badval=0) - plot_dict = {'filled_data': True} + plot_dict = {"filled_data": True} bundle = maf.MetricBundle( metric, slicer, @@ -564,11 +560,11 @@ def ddfBatch( if fieldname.endswith("WFD"): pass else: - for f in 'ugrizy': + for f in "ugrizy": displayDict["caption"] = f"Coadded depth per night for {fieldname} in band {f}." metric = maf.Coaddm5Metric(metric_name=f"{fieldname} CoaddedM5 Per Night") slicer = maf.OneDSlicer(slice_col_name="night", bin_size=1, badval=min_coadds[f]) - plot_dict = {'filled_data': True} + plot_dict = {"filled_data": True} bundle = maf.MetricBundle( metric, slicer, diff --git a/rubin_sim/maf/metadata_dir.py b/rubin_sim/maf/metadata_dir.py index fca199656..3df044e10 100755 --- a/rubin_sim/maf/metadata_dir.py +++ b/rubin_sim/maf/metadata_dir.py @@ -6,7 +6,6 @@ import shutil import matplotlib -import numpy as np matplotlib.use("Agg") diff --git a/rubin_sim/maf/metrics/tgaps.py b/rubin_sim/maf/metrics/tgaps.py index 799eada82..62c81bee9 100644 --- a/rubin_sim/maf/metrics/tgaps.py +++ b/rubin_sim/maf/metrics/tgaps.py @@ -11,11 +11,11 @@ import numpy as np from scipy.stats import binned_statistic + from ..utils import coadd_m5 from .base_metric import BaseMetric - class GapsMetric(BaseMetric): """Compute the number of times a gaps of a specified timescale (+/- 1/2 timescale) is sampled. @@ -129,7 +129,8 @@ class TgapsPercentMetric(BaseMetric): Measure the gaps between observations. By default, only gaps between neighboring visits are computed. If all_gaps is set to true, all gaps are computed (i.e., if there are observations at 10, 20, 30 and - 40 the default will Compute the percent of gaps between specified endpoints. + 40 the default will compute the percent of gaps between + specified endpoints. This is different from the TgapsMetric in that this only looks at what percent of intervals fall into the specified range, rather than @@ -314,8 +315,9 @@ class CoaddM5PerNightMetric(BaseMetric): 'SummaryHistogram plotter'. """ - def __init__(self, night_col="night", m5_col="fiveSigmaDepth", bins=np.arange(20, 25, 0.05), - units="mag", **kwargs): + def __init__( + self, night_col="night", m5_col="fiveSigmaDepth", bins=np.arange(20, 25, 0.05), units="mag", **kwargs + ): # Pass the same bins to the plotter. self.night_col = night_col self.m5_col = m5_col @@ -323,9 +325,12 @@ def __init__(self, night_col="night", m5_col="fiveSigmaDepth", bins=np.arange(20 super().__init__(col=[self.night_col, self.m5_col], metric_dtype="object", units=units, **kwargs) def run(self, data_slice, slice_point=None): - m5_per_night, be, bn = binned_statistic(data_slice[self.night_col], data_slice[self.m5_col], - statistic=coadd_m5, - bins=np.arange(0, 3653, 1)) + m5_per_night, be, bn = binned_statistic( + data_slice[self.night_col], + data_slice[self.m5_col], + statistic=coadd_m5, + bins=np.arange(0, 3653, 1), + ) # Drop the nights with no observations (-inf) m5_per_night = m5_per_night[np.where(m5_per_night > 0)] result, bins = np.histogram(m5_per_night, self.bins) @@ -335,7 +340,8 @@ def run(self, data_slice, slice_point=None): class MaxGapMetric(BaseMetric): """Find the maximum gap (in days) in between successive observations. - Useful for making sure there is an image within the last year that would make a good template image. + Useful for making sure there is an image within the last year that + would make a good template image. Parameters ---------- @@ -363,7 +369,8 @@ def run(self, data_slice, slice_point=None): class NightTimespanMetric(BaseMetric): - """Calculate the maximum time span covered in each night, report the `percentile` value of all timespans. + """Calculate the maximum time span covered in each night, + report the `percentile` value of all timespans. Parameters ---------- diff --git a/rubin_sim/maf/run_moving_calc.py b/rubin_sim/maf/run_moving_calc.py index 9b26ae468..fba08610f 100755 --- a/rubin_sim/maf/run_moving_calc.py +++ b/rubin_sim/maf/run_moving_calc.py @@ -10,7 +10,7 @@ from . import batches as batches from . import db as db -from . import metricBundles as mmb +from . import metricBundles as mmB def run_moving_calc(): @@ -171,7 +171,7 @@ def run_moving_calc(): ) # Run these discovery metrics print("Calculating quick discovery metrics with simple trailing losses.") - bg = mmb.MoMetricBundleGroup(bdictT, out_dir=args.out_dir, results_db=results_db) + bg = mmB.MoMetricBundleGroup(bdictT, out_dir=args.out_dir, results_db=results_db) bg.run_all() # Run all discovery metrics using 'detection' losses @@ -201,7 +201,7 @@ def run_moving_calc(): # Run these discovery metrics print("Calculating full discovery metrics with detection losses.") - bg = mmb.MoMetricBundleGroup(bdictD, out_dir=args.out_dir, results_db=results_db) + bg = mmB.MoMetricBundleGroup(bdictD, out_dir=args.out_dir, results_db=results_db) bg.run_all() # Run all characterization metrics @@ -229,5 +229,5 @@ def run_moving_calc(): ) # Run these characterization metrics print("Calculating characterization metrics.") - bg = mmb.MoMetricBundleGroup(bdictC, out_dir=args.out_dir, results_db=results_db) + bg = mmB.MoMetricBundleGroup(bdictC, out_dir=args.out_dir, results_db=results_db) bg.run_all() diff --git a/rubin_sim/maf/run_moving_fractions.py b/rubin_sim/maf/run_moving_fractions.py index f308ec348..82cae208d 100755 --- a/rubin_sim/maf/run_moving_fractions.py +++ b/rubin_sim/maf/run_moving_fractions.py @@ -9,7 +9,7 @@ from . import batches as batches from . import db as db -from . import metricBundles as mmb +from . import metricBundles as mmB def run_moving_fractions(): @@ -74,7 +74,7 @@ def run_moving_fractions(): bdict = {} for mName, mFile in zip(metric_names, metricfiles): - bdict[mName] = mmb.create_empty_mo_metric_bundle() + bdict[mName] = mmB.create_empty_mo_metric_bundle() bdict[mName].read(mFile) first = bdict[metric_names[0]] diff --git a/rubin_sim/maf/scimaf_dir.py b/rubin_sim/maf/scimaf_dir.py index 5238aa7d0..f516f1fc1 100755 --- a/rubin_sim/maf/scimaf_dir.py +++ b/rubin_sim/maf/scimaf_dir.py @@ -14,7 +14,7 @@ from . import batches as batches from . import db as db -from . import metricBundles as mb +from . import metricBundles as mmB def scimaf_dir(): @@ -70,7 +70,7 @@ def scimaf_dir(): mjd0=mjd0, ) # Run them, including generating plots - group = mb.MetricBundleGroup( + group = mmB.MetricBundleGroup( bdict, filename, out_dir=out_dir, results_db=results_db, save_early=False ) group.run_all(clear_memory=True, plot_now=True) diff --git a/rubin_sim/maf/slicers/healpix_subset_slicer.py b/rubin_sim/maf/slicers/healpix_subset_slicer.py index 9c92a1204..2eed222f0 100644 --- a/rubin_sim/maf/slicers/healpix_subset_slicer.py +++ b/rubin_sim/maf/slicers/healpix_subset_slicer.py @@ -1,4 +1,8 @@ -__all__ = ("make_circle_subset_slicer", "make_wfd_subset_slicer", "HealpixSubsetSlicer",) +__all__ = ( + "make_circle_subset_slicer", + "make_wfd_subset_slicer", + "HealpixSubsetSlicer", +) from functools import wraps @@ -9,6 +13,7 @@ from .healpix_slicer import HealpixSlicer + def make_circle_subset_slicer(ra_cen, dec_cen, radius=3.0, nside=512, use_cache=False): """Create a circular healpix subset slicer, centered on ra_cen/dec_cen. @@ -38,10 +43,11 @@ def make_circle_subset_slicer(ra_cen, dec_cen, radius=3.0, nside=512, use_cache= dist = simsUtils.angular_separation(ra_cen, dec_cen, ra, dec) close = np.where(dist <= radius)[0] subsetslicer = HealpixSubsetSlicer(nside, close, use_cache=use_cache) - plot_dict = {"visufunc": hp.gnomview, - "rot": (ra_cen, dec_cen, 0), - "xsize": 500, - } + plot_dict = { + "visufunc": hp.gnomview, + "rot": (ra_cen, dec_cen, 0), + "xsize": 500, + } return subsetslicer, plot_dict @@ -66,12 +72,13 @@ def make_wfd_subset_slicer(nside=64, use_cache=True, wfd_labels=None): version of the scheduler footprint using `get_current_footprint`. """ if wfd_labels is None: - wfd_labels = ['lowdust', 'euclid_overlap', 'virgo', 'bulgy', 'LMC_SMC'] + wfd_labels = ["lowdust", "euclid_overlap", "virgo", "bulgy", "LMC_SMC"] footprints, labels = get_current_footprint(nside=nside) wfdpix = np.where(np.in1d(labels, wfd_labels))[0] slicer = HealpixSubsetSlicer(nside=nside, hpid=wfdpix, use_cache=use_cache) return slicer + class HealpixSubsetSlicer(HealpixSlicer): """A spatial slicer that evaluates pointings on a subset of a healpix grid. diff --git a/rubin_sim/maf/utils/maf_utils.py b/rubin_sim/maf/utils/maf_utils.py index f4dcb094b..533cbe878 100644 --- a/rubin_sim/maf/utils/maf_utils.py +++ b/rubin_sim/maf/utils/maf_utils.py @@ -20,8 +20,7 @@ def load_inst_zeropoints(): - """Load up and return instrumental zeropoints and atmospheric extinctions - """ + """Load up and return instrumental zeropoints and atmospheric extinctions""" zp_inst = {} datadir = get_data_dir() for filtername in "ugrizy": @@ -90,8 +89,7 @@ def collapse_night( return night_slice -def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1, - verbose=False): +def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1, verbose=False): """ Set an 'optimal' number of bins using the Freedman-Diaconis rule. @@ -167,8 +165,8 @@ def optimal_bins(datain, binmin=None, binmax=None, nbin_max=200, nbin_min=1, def percentile_clipping(data, percentile=95.0): """ - Calculate the minimum and maximum values of a distribution of points, after - discarding data more than 'percentile' from the median. + Calculate the minimum and maximum values of a distribution of points, + after discarding data more than 'percentile' from the median. This is useful for determining useful data ranges for plots. Note that 'percentile' percent of the data is retained. @@ -212,4 +210,3 @@ def radec2pix(nside, ra, dec): lat = np.pi / 2.0 - dec hpid = hp.ang2pix(nside, lat, ra) return hpid - diff --git a/rubin_sim/moving_objects/cheby_values.py b/rubin_sim/moving_objects/cheby_values.py index 34553b6bd..f1204bf9b 100644 --- a/rubin_sim/moving_objects/cheby_values.py +++ b/rubin_sim/moving_objects/cheby_values.py @@ -68,7 +68,7 @@ def read_coefficients(self, cheby_fits_file): if not os.path.isfile(cheby_fits_file): raise IOError("Could not find cheby_fits_file at %s" % (cheby_fits_file)) # Read the coefficients file. - coeffs = pd.read_table(cheby_fits_file, delim_whitespace=True) + coeffs = pd.read_table(cheby_fits_file, sep="\s+") # The header line provides information on the number of # coefficients for each parameter. datacols = coeffs.columns.values diff --git a/rubin_sim/moving_objects/ooephemerides.py b/rubin_sim/moving_objects/ooephemerides.py index b077b5515..ebb4da264 100644 --- a/rubin_sim/moving_objects/ooephemerides.py +++ b/rubin_sim/moving_objects/ooephemerides.py @@ -10,8 +10,10 @@ try: import pyoorb as oo + PYOORB_PRESENT = True + except ModuleNotFoundError: - NO_PYOORB = True + PYOORB_PRESENT = False def dtime(time_prev): @@ -61,7 +63,7 @@ class PyOrbEphemerides: def __init__(self, ephfile=None): - if NO_PYOORB: + if not PYOORB_PRESENT: warnings.warn("No pyoorb available, use another ephemeris generator.") raise ModuleNotFoundError diff --git a/rubin_sim/moving_objects/orbits.py b/rubin_sim/moving_objects/orbits.py index b1ce56561..c140f705f 100644 --- a/rubin_sim/moving_objects/orbits.py +++ b/rubin_sim/moving_objects/orbits.py @@ -388,7 +388,7 @@ def read_orbits(self, orbit_file, delim=None, skiprows=None): "COMPCODE", ) # First use names_com, and then change if required. - orbits = pd.read_csv(orbit_file, delim_whitespace=True, header=None, names=names_com) + orbits = pd.read_csv(orbit_file, sep="\s+", header=None, names=names_com) if orbits["FORMAT"][0] == "KEP": orbits.columns = names_kep @@ -397,7 +397,7 @@ def read_orbits(self, orbit_file, delim=None, skiprows=None): else: if delim is None: - orbits = pd.read_csv(orbit_file, delim_whitespace=True, skiprows=skiprows, names=names) + orbits = pd.read_csv(orbit_file, sep="\s+", skiprows=skiprows, names=names) else: orbits = pd.read_csv(orbit_file, sep=delim, skiprows=skiprows, names=names) diff --git a/tests/maf/test_archive.py b/tests/maf/test_archive.py index f9ed7625c..d9f2f7859 100644 --- a/tests/maf/test_archive.py +++ b/tests/maf/test_archive.py @@ -16,7 +16,7 @@ URLROOT = "https://raw.githubusercontent.com/lsst-pst/survey_strategy/main/fbs_2.0/" FAMILY_SOURCE = URLROOT + "runs_v2.1.json" METRIC_SUBSET_SOURCE = os.path.join(get_data_dir(), "maf", "metric_subsets.json") -SUMMARY_SOURCE = os.path.join(get_data_dir(), "maf", "summary_v34.h5") +SUMMARY_SOURCE = os.path.join(get_data_dir(), "maf", "baseline_summary.h5") # exception classes diff --git a/tests/moving_objects/test_ephemerides.py b/tests/moving_objects/test_ephemerides.py index 1dd48a3b4..348104623 100644 --- a/tests/moving_objects/test_ephemerides.py +++ b/tests/moving_objects/test_ephemerides.py @@ -154,7 +154,7 @@ def setUp(self): self.jpl_dir = os.path.join(get_data_dir(), "tests", "jpl_testdata") self.orbits.read_orbits(os.path.join(self.jpl_dir, "S0_n747.des"), skiprows=1) # Read JPL ephems. - self.jpl = pd.read_csv(os.path.join(self.jpl_dir, "807_n747.txt"), delim_whitespace=True) + self.jpl = pd.read_csv(os.path.join(self.jpl_dir, "807_n747.txt"), sep="\s+") # Temp key fix self.jpl["obj_id"] = self.jpl["objId"] # Add times in TAI and UTC, because. From a5dc99cf9d631a8d187acfa2f7dfdcbf46e24245 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Thu, 13 Jun 2024 17:24:34 -0700 Subject: [PATCH 15/21] Update data download versions --- rubin_sim/data/rs_download_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rubin_sim/data/rs_download_data.py b/rubin_sim/data/rs_download_data.py index e9594b0b3..b3defe1be 100644 --- a/rubin_sim/data/rs_download_data.py +++ b/rubin_sim/data/rs_download_data.py @@ -52,12 +52,12 @@ def data_dict(): # to create tar files and follow any sym links, run: e.g. # ``tar -chvzf maf_may_2021.tgz maf`` file_dict = { - "maf": "maf_2022_08_26.tgz", + "maf": "maf_2024_06_13.tgz", "maps": "maps_2022_2_28.tgz", "movingObjects": "movingObjects_oct_2021.tgz", "orbits": "orbits_2022_3_1.tgz", "orbits_precompute": "orbits_precompute_2023_05_23.tgz", - "sim_baseline": "sim_baseline_2023_09_22.tgz", + "sim_baseline": "sim_baseline_2024_06_13.tgz", "skybrightness": "skybrightness_2023_09_11.tgz", "throughputs": "throughputs_2023_09_22.tgz", "tests": "tests_2022_10_18.tgz", From 85075ea02a4ec6cb1ce380b21b038c448d13cb81 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Fri, 14 Jun 2024 16:37:06 -0700 Subject: [PATCH 16/21] Wider tolerance for skybrightness: intel vs. m2 --- tests/skybrightness/test_skymodel.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/skybrightness/test_skymodel.py b/tests/skybrightness/test_skymodel.py index 83372f242..220336043 100644 --- a/tests/skybrightness/test_skymodel.py +++ b/tests/skybrightness/test_skymodel.py @@ -147,7 +147,7 @@ def test_setups(self): dummy, spec1 = sm1.return_wave_spec() dummy, spec2 = sm2.return_wave_spec() - np.testing.assert_array_equal(spec1, spec2) + np.testing.assert_allclose(spec1, spec2, rtol=1e-13) # Check that the degrees kwarg works sm2.set_params( @@ -192,7 +192,8 @@ def test_setups(self): def test_mags(self): """ - Test that the interpolated mags are similar to mags computed from interpolated spectra + Test that the interpolated mags are similar to mags computed + from interpolated spectra """ through_path = os.path.join(get_data_dir(), "throughputs", "baseline") @@ -217,8 +218,10 @@ def test_mags(self): mag2 = sm2.return_mags() # Let's set the tolerance for matching the throughputs to be 0.001 - # This allows for small changes in the filter throughputs without requiring recomputation of - # sims_skybrighntess_pre, while still requiring a reasonable match against the measured sky + # This allows for small changes in the filter throughputs + # without requiring recomputation of + # sims_skybrighntess_pre, while still requiring a + # reasonable match against the measured sky for i, filtername in enumerate(filters): np.testing.assert_allclose(mags1[filtername], mag2[filtername], rtol=5e-3) From 052947be74480d7f9cb0cc9345de2d0556b88124 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Fri, 14 Jun 2024 16:47:16 -0700 Subject: [PATCH 17/21] Black for ooephemerides again --- rubin_sim/moving_objects/ooephemerides.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rubin_sim/moving_objects/ooephemerides.py b/rubin_sim/moving_objects/ooephemerides.py index ebb4da264..8cbb6d749 100644 --- a/rubin_sim/moving_objects/ooephemerides.py +++ b/rubin_sim/moving_objects/ooephemerides.py @@ -10,6 +10,7 @@ try: import pyoorb as oo + PYOORB_PRESENT = True except ModuleNotFoundError: From 7a22073954a34057b7b20abe6db3ecedd2147496 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 17 Jun 2024 13:11:33 -0700 Subject: [PATCH 18/21] Update to newer test datafile, without skybrightness pre files --- rubin_sim/data/rs_download_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rubin_sim/data/rs_download_data.py b/rubin_sim/data/rs_download_data.py index b3defe1be..1b443d572 100644 --- a/rubin_sim/data/rs_download_data.py +++ b/rubin_sim/data/rs_download_data.py @@ -60,7 +60,7 @@ def data_dict(): "sim_baseline": "sim_baseline_2024_06_13.tgz", "skybrightness": "skybrightness_2023_09_11.tgz", "throughputs": "throughputs_2023_09_22.tgz", - "tests": "tests_2022_10_18.tgz", + "tests": "tests_2024_04_23.tgz", } return file_dict From 2979237c8581a3f1866a2c7a5ca002cbd7ef841d Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 17 Jun 2024 15:29:19 -0700 Subject: [PATCH 19/21] Update tests to use new example database --- tests/maf/test_3x2fom.py | 3 +- tests/maf/test_batches.py | 4 +- tests/maf/test_metricbundle.py | 3 +- tests/maf/test_opsimutils.py | 4 +- tests/maf/test_stackers.py | 105 +++------------------------------ 5 files changed, 15 insertions(+), 104 deletions(-) diff --git a/tests/maf/test_3x2fom.py b/tests/maf/test_3x2fom.py index 36839a9b1..243569fb1 100644 --- a/tests/maf/test_3x2fom.py +++ b/tests/maf/test_3x2fom.py @@ -8,6 +8,7 @@ import rubin_sim.maf as maf +TEST_DB = "example_v3.4_0yrs.db" class Test3x2(unittest.TestCase): @classmethod @@ -55,7 +56,7 @@ def test_3x2(self): ) ) - database = os.path.join(get_data_dir(), "tests", "example_dbv1.7_0yrs.db") + database = os.path.join(get_data_dir(), "tests", TEST_DB) results_db = maf.db.ResultsDb(out_dir=self.out_dir) bd = maf.metric_bundles.make_bundles_dict_from_list(bundle_list) bg = maf.metric_bundles.MetricBundleGroup(bd, database, out_dir=self.out_dir, results_db=results_db) diff --git a/tests/maf/test_batches.py b/tests/maf/test_batches.py index dd0e6e0c7..42980dfd9 100644 --- a/tests/maf/test_batches.py +++ b/tests/maf/test_batches.py @@ -12,7 +12,7 @@ from rubin_sim.data import get_data_dir from rubin_sim.maf.slicers import MoObjSlicer - +TEST_DB = "example_v3.4_0yrs.db" class TestBatches(unittest.TestCase): @classmethod def tearDown_class(cls): @@ -125,7 +125,7 @@ def test_science_radar(self): ) def test_glance(self): ack = batches.glanceBatch() - database = os.path.join(get_data_dir(), "tests", "example_dbv1.7_0yrs.db") + database = os.path.join(get_data_dir(), "tests", TEST_DB) results_db = db.ResultsDb(out_dir=self.out_dir) bgroup = metric_bundles.MetricBundleGroup(ack, database, out_dir=self.out_dir, results_db=results_db) bgroup.run_all() diff --git a/tests/maf/test_metricbundle.py b/tests/maf/test_metricbundle.py index 118db45a0..e887cf2f6 100644 --- a/tests/maf/test_metricbundle.py +++ b/tests/maf/test_metricbundle.py @@ -14,6 +14,7 @@ import rubin_sim.maf.slicers as slicers import rubin_sim.maf.stackers as stackers +TEST_DB = "example_v3.4_0yrs.db" class TestMetricBundle(unittest.TestCase): @classmethod @@ -38,7 +39,7 @@ def test_out(self): metric_b = metric_bundles.MetricBundle( metric, slicer, sql, stacker_list=[stacker1, stacker2], maps_list=[map] ) - database = os.path.join(get_data_dir(), "tests", "example_dbv1.7_0yrs.db") + database = os.path.join(get_data_dir(), "tests", TEST_DB) results_db = db.ResultsDb(out_dir=self.out_dir) diff --git a/tests/maf/test_opsimutils.py b/tests/maf/test_opsimutils.py index 956a79a52..84a90bd4a 100644 --- a/tests/maf/test_opsimutils.py +++ b/tests/maf/test_opsimutils.py @@ -7,7 +7,7 @@ import rubin_sim.maf.utils.opsim_utils as opsimUtils - +TEST_DB = "example_v3.4_0yrs.db" class TestOpsimUtils(unittest.TestCase): def test_scale_benchmarks(self): """Test scaling the design and stretch benchmarks for the @@ -51,7 +51,7 @@ def test_calc_coadded_depth(self): def test_get_sim_data(self): """Test that we can get simulation data""" - database_file = os.path.join(get_data_dir(), "tests", "example_dbv1.7_0yrs.db") + database_file = os.path.join(get_data_dir(), "tests", TEST_DB) dbcols = ["fieldRA", "fieldDec", "note"] sql = "night < 10" full_sql = "SELECT fieldRA, fieldDec, note FROM observations where night < 10;" diff --git a/tests/maf/test_stackers.py b/tests/maf/test_stackers.py index 68d8a6806..70b639b0f 100644 --- a/tests/maf/test_stackers.py +++ b/tests/maf/test_stackers.py @@ -18,11 +18,12 @@ except ModuleNotFoundError: pass +TEST_DB = "example_v3.4_0yrs.db" class TestStackerClasses(unittest.TestCase): def setUp(self): # get some of the test data - test_db = os.path.join(get_data_dir(), "tests", "example_dbv1.7_0yrs.db") + test_db = os.path.join(get_data_dir(), "tests", TEST_DB) query = "select * from observations limit 1000" self.test_data = get_sim_data(test_db, None, [], full_sql_query=query) @@ -145,34 +146,8 @@ def _t_dither_per_night(self, diffsra, diffsdec, ra, dec, nights): self.assertAlmostEqual(dra_on_night.max(), 0) self.assertAlmostEqual(ddec_on_night.max(), 0) - @unittest.skip("Dither Stackers deprecated") - def test_setup_dither_stackers(self): - # Test that we get no stacker when using default columns. - ra_col = "fieldRA" - dec_col = "fieldDec" - degrees = True - stackerlist = stackers.setup_dither_stackers(ra_col, dec_col, degrees) - self.assertEqual(len(stackerlist), 0) - # Test that we get one (and the right one) - # when using particular columns. - ra_col = "hexDitherFieldPerNightRa" - dec_col = "hexDitherFieldPerNightDec" - stackerlist = stackers.setup_dither_stackers(ra_col, dec_col, degrees) - self.assertEqual(len(stackerlist), 1) - self.assertEqual(stackerlist[0], stackers.HexDitherFieldPerNightStacker()) - # Test that kwargs are passed along. - stackerlist = stackers.setup_dither_stackers(ra_col, dec_col, degrees, max_dither=0.5) - self.assertEqual(stackerlist[0].max_dither, np.radians(0.5)) - - @unittest.skip("Dither Stackers deprecated") - def test_base_dither_stacker(self): - # Test that the base dither stacker matches the type of a stacker. - s = stackers.HexDitherFieldPerNightStacker() - self.assertTrue(isinstance(s, stackers.BaseDitherStacker)) - s = stackers.ParallaxFactorStacker() - self.assertFalse(isinstance(s, stackers.BaseDitherStacker)) - - @unittest.skip("Dither Stackers deprecated") + + def test_random_dither(self): """ Test the random dither pattern. @@ -185,16 +160,15 @@ def test_random_dither(self): # comparisons. data["fieldRA"] = np.degrees(rng.random_sample(600) * (np.pi) + np.pi / 2.0) data["fieldDec"] = np.degrees(rng.random_sample(600) * np.pi / 2.0 - np.pi / 4.0) - stacker = stackers.RandomDitherFieldPerVisitStacker(max_dither=max_dither) + stacker = stackers.RandomDitherPerVisitStacker(max_dither=max_dither) data = stacker.run(data) - diffsra = (data["fieldRA"] - data["randomDitherFieldPerVisitRa"]) * np.cos( + diffsra = (data["fieldRA"] - data["randomDitherPerVisitRa"]) * np.cos( np.radians(data["fieldDec"]) ) - diffsdec = data["fieldDec"] - data["randomDitherFieldPerVisitDec"] + diffsdec = data["fieldDec"] - data["randomDitherPerVisitDec"] # Check dithers within expected range. self._t_dither_range(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], max_dither) - @unittest.skip("Dither Stackers deprecated") def test_random_dither_per_night(self): """ Test the per-night random dither pattern. @@ -215,7 +189,6 @@ def test_random_dither_per_night(self): ) data["fieldRA"] = rng.rand(ndata) * (np.pi) + np.pi / 2.0 data["fieldDec"] = rng.rand(ndata) * np.pi / 2.0 - np.pi / 4.0 - data["fieldId"] = np.floor(rng.rand(ndata) * ndata) data["night"] = np.floor(rng.rand(ndata) * 10).astype("int") stacker = stackers.RandomDitherPerNightStacker(max_dither=max_dither) data = stacker.run(data) @@ -227,71 +200,7 @@ def test_random_dither_per_night(self): # Check that dithers on the same night are the same. self._t_dither_per_night(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], data["night"]) - @unittest.skip("Dither Stackers deprecated") - def test_spiral_dither_per_night(self): - """ - Test the per-night spiral dither pattern. - """ - max_dither = 0.5 - ndata = 2000 - # Set seed so the test is stable - rng = np.random.RandomState(42) - - data = np.zeros( - ndata, - dtype=list( - zip( - ["fieldRA", "fieldDec", "fieldId", "night"], - [float, float, int, int], - ) - ), - ) - data["fieldRA"] = rng.rand(ndata) * (np.pi) + np.pi / 2.0 - data["fieldRA"] = np.zeros(ndata) + np.pi / 2.0 - data["fieldDec"] = rng.rand(ndata) * np.pi / 2.0 - np.pi / 4.0 - data["fieldDec"] = np.zeros(ndata) - data["fieldId"] = np.floor(rng.rand(ndata) * ndata) - data["night"] = np.floor(rng.rand(ndata) * 20).astype("int") - stacker = stackers.SpiralDitherPerNightStacker(max_dither=max_dither) - data = stacker.run(data) - diffsra = (data["fieldRA"] - data["spiralDitherPerNightRa"]) * np.cos(np.radians(data["fieldDec"])) - diffsdec = data["fieldDec"] - data["spiralDitherPerNightDec"] - self._t_dither_range(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], max_dither) - # Check that dithers on the same night are the same. - self._t_dither_per_night(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], data["night"]) - - @unittest.skip("Dither Stackers deprecated") - def test_hex_dither_per_night(self): - """ - Test the per-night hex dither pattern. - """ - max_dither = 0.5 - ndata = 2000 - # Set seed so the test is stable - rng = np.random.RandomState(42) - - data = np.zeros( - ndata, - dtype=list( - zip( - ["fieldRA", "fieldDec", "fieldId", "night"], - [float, float, int, int], - ) - ), - ) - data["fieldRA"] = rng.rand(ndata) * (np.pi) + np.pi / 2.0 - data["fieldDec"] = rng.rand(ndata) * np.pi / 2.0 - np.pi / 4.0 - data["fieldId"] = np.floor(rng.rand(ndata) * ndata) - data["night"] = np.floor(rng.rand(ndata) * 217).astype("int") - stacker = stackers.HexDitherPerNightStacker(max_dither=max_dither) - data = stacker.run(data) - diffsra = (data["fieldRA"] - data["hexDitherPerNightRa"]) * np.cos(np.radians(data["fieldDec"])) - diffsdec = data["fieldDec"] - data["hexDitherPerNightDec"] - self._t_dither_range(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], max_dither) - # Check that dithers on the same night are the same. - self._t_dither_per_night(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], data["night"]) - @unittest.skip("Dither Stackers deprecated") def test_random_rot_dither_per_filter_change_stacker(self): """ Test the rotational dither stacker. From ed8f38238af42e9f2a2404d76b3fbbd2f974627e Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 17 Jun 2024 15:29:52 -0700 Subject: [PATCH 20/21] Remove obsolete dither code and re-enable remaining dither options --- rubin_sim/maf/stackers/__init__.py | 1 + rubin_sim/maf/stackers/dither_stackers.py | 897 +++------------------- 2 files changed, 97 insertions(+), 801 deletions(-) diff --git a/rubin_sim/maf/stackers/__init__.py b/rubin_sim/maf/stackers/__init__.py index e3fef24d3..1efe6855b 100644 --- a/rubin_sim/maf/stackers/__init__.py +++ b/rubin_sim/maf/stackers/__init__.py @@ -11,3 +11,4 @@ from .sdss_stackers import * from .sn_stacker import * from .teff_stacker import * +from .dither_stackers import * diff --git a/rubin_sim/maf/stackers/dither_stackers.py b/rubin_sim/maf/stackers/dither_stackers.py index 982e3ecab..18912f57d 100644 --- a/rubin_sim/maf/stackers/dither_stackers.py +++ b/rubin_sim/maf/stackers/dither_stackers.py @@ -5,15 +5,8 @@ "in_hexagon", "polygon_coords", "BaseDitherStacker", - "RandomDitherFieldPerVisitStacker", - "RandomDitherFieldPerNightStacker", - "SpiralDitherFieldPerVisitStacker", - "SpiralDitherFieldPerNightStacker", - "HexDitherFieldPerVisitStacker", - "HexDitherFieldPerNightStacker", + "RandomDitherPerVisitStacker", "RandomDitherPerNightStacker", - "SpiralDitherPerNightStacker", - "HexDitherPerNightStacker", "RandomRotDitherPerFilterChangeStacker", ) @@ -25,15 +18,15 @@ from .base_stacker import BaseStacker # Stacker naming scheme: -# [Pattern]Dither[Field]Per[Timescale]. +# [Pattern]DitherPer[Timescale]. # Timescale indicates how often the dither offset is changed. -# The presence of 'Field' indicates that a new offset is chosen per field, on the indicated timescale. -# The absence of 'Field' indicates that all visits within the indicated timescale use the same dither offset. - -# Original dither stackers (Random, Spiral, Hex) written by Lynne Jones (lynnej@uw.edu) -# Additional dither stackers written by Humna Awan (humna.awan@rutgers.edu), with addition of -# constraining dither offsets to be within an inscribed hexagon (code modifications for use here by LJ). +# Original dither stackers (Random, Spiral, Hex) written by Lynne Jones +# (lynnej@uw.edu) +# Additional dither stackers written by Humna Awan (humna.awan@rutgers.edu), +# with addition of +# constraining dither offsets to be within an inscribed hexagon +# (code modifications for use here by LJ). def setup_dither_stackers(ra_col, dec_col, degrees, **kwargs): @@ -109,7 +102,8 @@ def in_hexagon(x_off, y_off, max_dither): Returns ------- numpy.ndarray - Indexes of the offsets which are within the hexagon inscribed inside the 'max_dither' radius circle. + Indexes of the offsets which are within the hexagon + inscribed inside the 'max_dither' radius circle. """ # Set up the hexagon limits. # y = mx + b, 2h is the height. @@ -161,7 +155,8 @@ def polygon_coords(nside, radius, rotation_angle): class BaseDitherStacker(BaseStacker): """Base class for dither stackers. - The base class just adds an easy way to define a stacker as one of the 'dither' types of stackers. + The base class just adds an easy way to define a stacker as + one of the 'dither' types of stackers. These run first, before any other stackers. Parameters @@ -178,8 +173,10 @@ class BaseDitherStacker(BaseStacker): The radius of the maximum dither offset, in degrees. Default 1.75 degrees. in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. + If True, offsets are constrained to lie within a hexagon + inscribed within the max_dither circle. + If False, offsets can lie anywhere out to the edges of + the max_dither circle. Default True. """ @@ -205,14 +202,15 @@ def __init__( self.units = ["deg", "deg"] else: self.units = ["rad", "rad"] - # Values required for framework operation: this specifies the data columns required from the database. + # Values required for framework operation: this specifies + # the data columns required from the database. self.cols_req = [self.ra_col, self.dec_col] -class RandomDitherFieldPerVisitStacker(BaseDitherStacker): +class RandomDitherPerVisitStacker(BaseDitherStacker): """ - Randomly dither the RA and Dec pointings up to max_dither degrees from center, - with a different offset for each field, for each visit. + Randomly dither the RA and Dec pointings up to max_dither degrees + from center, with a different offset for each visit. Parameters ---------- @@ -228,16 +226,20 @@ class RandomDitherFieldPerVisitStacker(BaseDitherStacker): The radius of the maximum dither offset, in degrees. Default 1.75 degrees. in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. + If True, offsets are constrained to lie within a + hexagon inscribed within the max_dither circle. + If False, offsets can lie anywhere out to the edges + of the max_dither circle. Default True. random_seed : int or None, optional - If set, then used as the random seed for the numpy random number generation for the dither offsets. + If set, then used as the random seed for the numpy random + number generation for the dither offsets. Default None. """ - # Values required for framework operation: this specifies the name of the new columns. - cols_added = ["randomDitherFieldPerVisitRa", "randomDitherFieldPerVisitDec"] + # Values required for framework operation: + # this specifies the name of the new columns. + cols_added = ["randomDitherPerVisitRa", "randomDitherPerVisitDec"] def __init__( self, @@ -288,9 +290,11 @@ def _generate_random_offsets(self, noffsets): def _run(self, sim_data, cols_present=False): if cols_present: - # Column already present in data; assume it is correct and does not need recalculating. + # Column already present in data; assume it is correct + # and does not need recalculating. return sim_data - # Generate random numbers for dither, using defined seed value if desired. + # Generate random numbers for dither, + # using defined seed value if desired. if not hasattr(self, "_rng"): if self.random_seed is not None: self._rng = np.random.RandomState(self.random_seed) @@ -307,15 +311,15 @@ def _run(self, sim_data, cols_present=False): else: ra = sim_data[self.ra_col] dec = sim_data[self.dec_col] - sim_data["randomDitherFieldPerVisitRa"] = ra + self.x_off / np.cos(dec) - sim_data["randomDitherFieldPerVisitDec"] = dec + self.y_off + sim_data["randomDitherPerVisitRa"] = ra + self.x_off / np.cos(dec) + sim_data["randomDitherPerVisitDec"] = dec + self.y_off # Wrap back into expected range. ( - sim_data["randomDitherFieldPerVisitRa"], - sim_data["randomDitherFieldPerVisitDec"], + sim_data["randomDitherPerVisitRa"], + sim_data["randomDitherPerVisitDec"], ) = wrap_ra_dec( - sim_data["randomDitherFieldPerVisitRa"], - sim_data["randomDitherFieldPerVisitDec"], + sim_data["randomDitherPerVisitRa"], + sim_data["randomDitherPerVisitDec"], ) # Convert to degrees if self.degrees: @@ -324,126 +328,11 @@ def _run(self, sim_data, cols_present=False): return sim_data -class RandomDitherFieldPerNightStacker(RandomDitherFieldPerVisitStacker): - """ - Randomly dither the RA and Dec pointings up to max_dither degrees from center, - one dither offset per new night of observation of a field. - e.g. visits within the same night, to the same field, have the same offset. - - Parameters - ---------- - ra_col : str, optional - The name of the RA column in the data. - Default 'fieldRA'. - dec_col : str, optional - The name of the Dec column in the data. - Default 'fieldDec'. - degrees : bool, optional - Flag whether RA/Dec should be treated as (and kept as) degrees. - field_id_col : str, optional - The name of the fieldId column in the data. - Used to identify fields which should be identified as the 'same'. - Default 'fieldId'. - night_col : str, optional - The name of the night column in the data. - Default 'night'. - max_dither : float, optional - The radius of the maximum dither offset, in degrees. - Default 1.75 degrees. - in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. - Default True. - random_seed : int or None, optional - If set, then used as the random seed for the numpy random number generation for the dither offsets. - Default None. - """ - - # Values required for framework operation: this specifies the names of the new columns. - cols_added = ["randomDitherFieldPerNightRa", "randomDitherFieldPerNightDec"] - - def __init__( - self, - ra_col="fieldRA", - dec_col="fieldDec", - degrees=True, - field_id_col="fieldId", - night_col="night", - max_dither=1.75, - in_hex=True, - random_seed=None, - ): - """ - @ MaxDither in degrees - """ - # Instantiate the RandomDither object and set internal variables. - super().__init__( - ra_col=ra_col, - dec_col=dec_col, - degrees=degrees, - max_dither=max_dither, - in_hex=in_hex, - random_seed=random_seed, - ) - self.night_col = night_col - self.field_id_col = field_id_col - # Values required for framework operation: this specifies the data columns required from the database. - self.cols_req = [self.ra_col, self.dec_col, self.night_col, self.field_id_col] - - def _run(self, sim_data, cols_present=False): - if cols_present: - return sim_data - # Generate random numbers for dither, using defined seed value if desired. - if not hasattr(self, "_rng"): - if self.random_seed is not None: - self._rng = np.random.RandomState(self.random_seed) - else: - self._rng = np.random.RandomState(872453) - - # Generate the random dither values, one per night per field. - fields = np.unique(sim_data[self.field_id_col]) - nights = np.unique(sim_data[self.night_col]) - self._generate_random_offsets(len(fields) * len(nights)) - if self.degrees: - ra = np.radians(sim_data[self.ra_col]) - dec = np.radians(sim_data[self.dec_col]) - else: - ra = sim_data[self.ra_col] - dec = sim_data[self.dec_col] - # counter to ensure new random numbers are chosen every time - delta = 0 - for fieldid in np.unique(sim_data[self.field_id_col]): - # Identify observations of this field. - match = np.where(sim_data[self.field_id_col] == fieldid)[0] - # Apply dithers, increasing each night. - nights = sim_data[self.night_col][match] - vertex_idxs = np.searchsorted(np.unique(nights), nights) - vertex_idxs = vertex_idxs % len(self.x_off) - # ensure that the same xOff/yOff entries are not chosen - delta = delta + len(vertex_idxs) - sim_data["randomDitherFieldPerNightRa"][match] = ra[match] + self.x_off[vertex_idxs] / np.cos( - dec[match] - ) - sim_data["randomDitherFieldPerNightDec"][match] = dec[match] + self.y_off[vertex_idxs] - # Wrap into expected range. - ( - sim_data["randomDitherFieldPerNightRa"], - sim_data["randomDitherFieldPerNightDec"], - ) = wrap_ra_dec( - sim_data["randomDitherFieldPerNightRa"], - sim_data["randomDitherFieldPerNightDec"], - ) - if self.degrees: - for col in self.cols_added: - sim_data[col] = np.degrees(sim_data[col]) - return sim_data - - -class RandomDitherPerNightStacker(RandomDitherFieldPerVisitStacker): +class RandomDitherPerNightStacker(RandomDitherPerVisitStacker): """ - Randomly dither the RA and Dec pointings up to max_dither degrees from center, - one dither offset per night. - All fields observed within the same night get the same offset. + Randomly dither the RA and Dec pointings up to max_dither + degrees from center, one dither offset per night. + All pointings observed within the same night get the same offset. Parameters ---------- @@ -462,15 +351,19 @@ class RandomDitherPerNightStacker(RandomDitherFieldPerVisitStacker): The radius of the maximum dither offset, in degrees. Default 1.75 degrees. in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. + If True, offsets are constrained to lie within a hexagon + inscribed within the max_dither circle. + If False, offsets can lie anywhere out to the edges of the + max_dither circle. Default True. random_seed : int or None, optional - If set, then used as the random seed for the numpy random number generation for the dither offsets. + If set, then used as the random seed for the numpy random number + generation for the dither offsets. Default None. """ - # Values required for framework operation: this specifies the names of the new columns. + # Values required for framework operation: this specifies the + # names of the new columns. cols_added = ["randomDitherPerNightRa", "randomDitherPerNightDec"] def __init__( @@ -496,13 +389,15 @@ def __init__( random_seed=random_seed, ) self.night_col = night_col - # Values required for framework operation: this specifies the data columns required from the database. + # Values required for framework operation: + # this specifies the data columns required from the database. self.cols_req = [self.ra_col, self.dec_col, self.night_col] def _run(self, sim_data, cols_present=False): if cols_present: return sim_data - # Generate random numbers for dither, using defined seed value if desired. + # Generate random numbers for dither, + # using defined seed value if desired. if not hasattr(self, "_rng"): if self.random_seed is not None: self._rng = np.random.RandomState(self.random_seed) @@ -534,626 +429,6 @@ def _run(self, sim_data, cols_present=False): return sim_data -class SpiralDitherFieldPerVisitStacker(BaseDitherStacker): - """ - Offset along an equidistant spiral with num_points, out to a maximum radius of max_dither. - Each visit to a field receives a new, sequential offset. - - Parameters - ---------- - ra_col : str, optional - The name of the RA column in the data. - Default 'fieldRA'. - dec_col : str, optional - The name of the Dec column in the data. - Default 'fieldDec'. - degrees : bool, optional - Flag whether RA/Dec should be treated as (and kept as) degrees. - field_id_col : str, optional - The name of the fieldId column in the data. - Used to identify fields which should be identified as the 'same'. - Default 'fieldId'. - num_points : int, optional - The number of points in the spiral. - Default 60. - max_dither : float, optional - The radius of the maximum dither offset, in degrees. - Default 1.75 degrees. - n_coils : int, optional - The number of coils the spiral should have. - Default 5. - in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. - Default True. - """ - - # Values required for framework operation: this specifies the names of the new columns. - cols_added = ["spiralDitherFieldPerVisitRa", "spiralDitherFieldPerVisitDec"] - - def __init__( - self, - ra_col="fieldRA", - dec_col="fieldDec", - degrees=True, - field_id_col="fieldId", - num_points=60, - max_dither=1.75, - n_coils=5, - in_hex=True, - ): - """ - @ MaxDither in degrees - """ - super().__init__( - ra_col=ra_col, - dec_col=dec_col, - degrees=degrees, - max_dither=max_dither, - in_hex=in_hex, - ) - self.field_id_col = field_id_col - # Convert max_dither from degrees (internal units for ra/dec are radians) - self.num_points = num_points - self.n_coils = n_coils - # Values required for framework operation: this specifies the data columns required from the database. - self.cols_req = [self.ra_col, self.dec_col, self.field_id_col] - - def _generate_spiral_offsets(self): - # First generate a full archimedean spiral .. - theta = np.arange(0.0001, self.n_coils * np.pi * 2.0, 0.001) - a = self.max_dither / theta.max() - if self.in_hex: - a = 0.85 * a - r = theta * a - # Then pick out equidistant points along the spiral. - arc = a / 2.0 * (theta * np.sqrt(1 + theta**2) + np.log(theta + np.sqrt(1 + theta**2))) - stepsize = arc.max() / float(self.num_points) - arcpts = np.arange(0, arc.max(), stepsize) - arcpts = arcpts[0 : self.num_points] - rpts = np.zeros(self.num_points, float) - thetapts = np.zeros(self.num_points, float) - for i, ap in enumerate(arcpts): - diff = np.abs(arc - ap) - match = np.where(diff == diff.min())[0][0] - rpts[i] = r[match] - thetapts[i] = theta[match] - # Translate these r/theta points into x/y (ra/dec) offsets. - self.x_off = rpts * np.cos(thetapts) - self.y_off = rpts * np.sin(thetapts) - - def _run(self, sim_data, cols_present=False): - if cols_present: - return sim_data - # Generate the spiral offset vertices. - self._generate_spiral_offsets() - # Now apply to observations. - if self.degrees: - ra = np.radians(sim_data[self.ra_col]) - dec = np.radians(sim_data[self.dec_col]) - else: - ra = sim_data[self.ra_col] - dec = sim_data[self.dec_col] - for fieldid in np.unique(sim_data[self.field_id_col]): - match = np.where(sim_data[self.field_id_col] == fieldid)[0] - # Apply sequential dithers, increasing with each visit. - vertex_idxs = np.arange(0, len(match), 1) - vertex_idxs = vertex_idxs % self.num_points - sim_data["spiralDitherFieldPerVisitRa"][match] = ra[match] + self.x_off[vertex_idxs] / np.cos( - dec[match] - ) - sim_data["spiralDitherFieldPerVisitDec"][match] = dec[match] + self.y_off[vertex_idxs] - # Wrap into expected range. - ( - sim_data["spiralDitherFieldPerVisitRa"], - sim_data["spiralDitherFieldPerVisitDec"], - ) = wrap_ra_dec( - sim_data["spiralDitherFieldPerVisitRa"], - sim_data["spiralDitherFieldPerVisitDec"], - ) - if self.degrees: - for col in self.cols_added: - sim_data[col] = np.degrees(sim_data[col]) - return sim_data - - -class SpiralDitherFieldPerNightStacker(SpiralDitherFieldPerVisitStacker): - """ - Offset along an equidistant spiral with num_points, out to a maximum radius of max_dither. - Each field steps along a sequential series of offsets, each night it is observed. - - Parameters - ---------- - ra_col : str, optional - The name of the RA column in the data. - Default 'fieldRA'. - dec_col : str, optional - The name of the Dec column in the data. - Default 'fieldDec'. - degrees : bool, optional - Flag whether RA/Dec should be treated as (and kept as) degrees. - field_id_col : str, optional - The name of the fieldId column in the data. - Used to identify fields which should be identified as the 'same'. - Default 'fieldId'. - night_col : str, optional - The name of the night column in the data. - Default 'night'. - num_points : int, optional - The number of points in the spiral. - Default 60. - max_dither : float, optional - The radius of the maximum dither offset, in degrees. - Default 1.75 degrees. - n_coils : int, optional - The number of coils the spiral should have. - Default 5. - in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. - Default True. - """ - - # Values required for framework operation: this specifies the names of the new columns. - cols_added = ["spiralDitherFieldPerNightRa", "spiralDitherFieldPerNightDec"] - - def __init__( - self, - ra_col="fieldRA", - dec_col="fieldDec", - degrees=True, - field_id_col="fieldId", - night_col="night", - num_points=60, - max_dither=1.75, - n_coils=5, - in_hex=True, - ): - """ - @ MaxDither in degrees - """ - super().__init__( - ra_col=ra_col, - dec_col=dec_col, - degrees=degrees, - field_id_col=field_id_col, - num_points=num_points, - max_dither=max_dither, - n_coils=n_coils, - in_hex=in_hex, - ) - self.night_col = night_col - # Values required for framework operation: this specifies the data columns required from the database. - self.cols_req.append(self.night_col) - - def _run(self, sim_data, cols_present=False): - if cols_present: - return sim_data - self._generate_spiral_offsets() - if self.degrees: - ra = np.radians(sim_data[self.ra_col]) - dec = np.radians(sim_data[self.dec_col]) - else: - ra = sim_data[self.ra_col] - dec = sim_data[self.dec_col] - for fieldid in np.unique(sim_data[self.field_id_col]): - # Identify observations of this field. - match = np.where(sim_data[self.field_id_col] == fieldid)[0] - # Apply a sequential dither, increasing each night. - nights = sim_data[self.night_col][match] - vertex_idxs = np.searchsorted(np.unique(nights), nights) - vertex_idxs = vertex_idxs % self.num_points - sim_data["spiralDitherFieldPerNightRa"][match] = ra[match] + self.x_off[vertex_idxs] / np.cos( - dec[match] - ) - sim_data["spiralDitherFieldPerNightDec"][match] = dec[match] + self.y_off[vertex_idxs] - # Wrap into expected range. - ( - sim_data["spiralDitherFieldPerNightRa"], - sim_data["spiralDitherFieldPerNightDec"], - ) = wrap_ra_dec( - sim_data["spiralDitherFieldPerNightRa"], - sim_data["spiralDitherFieldPerNightDec"], - ) - if self.degrees: - for col in self.cols_added: - sim_data[col] = np.degrees(sim_data[col]) - return sim_data - - -class SpiralDitherPerNightStacker(SpiralDitherFieldPerVisitStacker): - """ - Offset along an equidistant spiral with num_points, out to a maximum radius of max_dither. - All fields observed in the same night receive the same sequential offset, changing per night. - - Parameters - ---------- - ra_col : str, optional - The name of the RA column in the data. - Default 'fieldRA'. - dec_col : str, optional - The name of the Dec column in the data. - Default 'fieldDec'. - degrees : bool, optional - Flag whether RA/Dec should be treated as (and kept as) degrees. - field_id_col : str, optional - The name of the fieldId column in the data. - Used to identify fields which should be identified as the 'same'. - Default 'fieldId'. - night_col : str, optional - The name of the night column in the data. - Default 'night'. - num_points : int, optional - The number of points in the spiral. - Default 60. - max_dither : float, optional - The radius of the maximum dither offset, in degrees. - Default 1.75 degrees. - n_coils : int, optional - The number of coils the spiral should have. - Default 5. - in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. - Default True. - """ - - # Values required for framework operation: this specifies the names of the new columns. - cols_added = ["spiralDitherPerNightRa", "spiralDitherPerNightDec"] - - def __init__( - self, - ra_col="fieldRA", - dec_col="fieldDec", - degrees=True, - field_id_col="fieldId", - night_col="night", - num_points=60, - max_dither=1.75, - n_coils=5, - in_hex=True, - ): - """ - @ MaxDither in degrees - """ - super().__init__( - ra_col=ra_col, - dec_col=dec_col, - degrees=degrees, - field_id_col=field_id_col, - num_points=num_points, - max_dither=max_dither, - n_coils=n_coils, - in_hex=in_hex, - ) - self.night_col = night_col - # Values required for framework operation: this specifies the data columns required from the database. - self.cols_req.append(self.night_col) - - def _run(self, sim_data, cols_present=False): - if cols_present: - return sim_data - self._generate_spiral_offsets() - nights = np.unique(sim_data[self.night_col]) - if self.degrees: - ra = np.radians(sim_data[self.ra_col]) - dec = np.radians(sim_data[self.dec_col]) - else: - ra = sim_data[self.ra_col] - dec = sim_data[self.dec_col] - # Add to RA and dec values. - vertex_idxs = np.searchsorted(nights, sim_data[self.night_col]) - vertex_idxs = vertex_idxs % self.num_points - sim_data["spiralDitherPerNightRa"] = ra + self.x_off[vertex_idxs] / np.cos(dec) - sim_data["spiralDitherPerNightDec"] = dec + self.y_off[vertex_idxs] - # Wrap RA/Dec into expected range. - ( - sim_data["spiralDitherPerNightRa"], - sim_data["spiralDitherPerNightDec"], - ) = wrap_ra_dec(sim_data["spiralDitherPerNightRa"], sim_data["spiralDitherPerNightDec"]) - if self.degrees: - for col in self.cols_added: - sim_data[col] = np.degrees(sim_data[col]) - return sim_data - - -class HexDitherFieldPerVisitStacker(BaseDitherStacker): - """ - Use offsets from the hexagonal grid of 'hexdither', but visit each vertex sequentially. - Sequential offset for each visit. - - Parameters - ---------- - ra_col : str, optional - The name of the RA column in the data. - Default 'fieldRA'. - dec_col : str, optional - The name of the Dec column in the data. - Default 'fieldDec'. - degrees : bool, optional - Flag whether RA/Dec should be treated as (and kept as) degrees. - field_id_col : str, optional - The name of the fieldId column in the data. - Used to identify fields which should be identified as the 'same'. - Default 'fieldId'. - max_dither : float, optional - The radius of the maximum dither offset, in degrees. - Default 1.75 degrees. - in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. - Default True. - """ - - # Values required for framework operation: this specifies the names of the new columns. - cols_added = ["hexDitherFieldPerVisitRa", "hexDitherFieldPerVisitDec"] - - def __init__( - self, - ra_col="fieldRA", - dec_col="fieldDec", - degrees=True, - field_id_col="fieldId", - max_dither=1.75, - in_hex=True, - ): - """ - @ MaxDither in degrees - """ - super().__init__( - ra_col=ra_col, - dec_col=dec_col, - degrees=degrees, - max_dither=max_dither, - in_hex=in_hex, - ) - self.field_id_col = field_id_col - # Values required for framework operation: this specifies the data columns required from the database. - self.cols_req = [self.ra_col, self.dec_col, self.field_id_col] - - def _generate_hex_offsets(self): - # Set up basics of dither pattern. - dith_level = 4 - nrows = 2**dith_level - halfrows = int(nrows / 2.0) - # Calculate size of each offset - dith_size_x = self.max_dither * 2.0 / float(nrows) - dith_size_y = np.sqrt(3) * self.max_dither / float(nrows) # sqrt 3 comes from hexagon - if self.in_hex: - dith_size_x = 0.95 * dith_size_x - dith_size_y = 0.95 * dith_size_y - # Calculate the row identification number, going from 0 at center - nid_row = np.arange(-halfrows, halfrows + 1, 1) - # and calculate the number of vertices in each row. - vert_in_row = np.arange(-halfrows, halfrows + 1, 1) - # First calculate how many vertices we will create in each row. - total_vert = 0 - for i in range(-halfrows, halfrows + 1, 1): - vert_in_row[i] = (nrows + 1) - abs(nid_row[i]) - total_vert += vert_in_row[i] - self.num_points = total_vert - self.x_off = [] - self.y_off = [] - # Calculate offsets over hexagonal grid. - for i in range(0, nrows + 1, 1): - for j in range(0, vert_in_row[i], 1): - self.x_off.append(dith_size_x * (j - (vert_in_row[i] - 1) / 2.0)) - self.y_off.append(dith_size_y * nid_row[i]) - self.x_off = np.array(self.x_off) - self.y_off = np.array(self.y_off) - - def _run(self, sim_data, cols_present=False): - if cols_present: - return sim_data - self._generate_hex_offsets() - if self.degrees: - ra = np.radians(sim_data[self.ra_col]) - dec = np.radians(sim_data[self.dec_col]) - else: - ra = sim_data[self.ra_col] - dec = sim_data[self.dec_col] - for fieldid in np.unique(sim_data[self.field_id_col]): - # Identify observations of this field. - match = np.where(sim_data[self.field_id_col] == fieldid)[0] - # Apply sequential dithers, increasing with each visit. - vertex_idxs = np.arange(0, len(match), 1) - vertex_idxs = vertex_idxs % self.num_points - sim_data["hexDitherFieldPerVisitRa"][match] = ra[match] + self.x_off[vertex_idxs] / np.cos( - dec[match] - ) - sim_data["hexDitherFieldPerVisitDec"][match] = dec[match] + self.y_off[vertex_idxs] - # Wrap into expected range. - ( - sim_data["hexDitherFieldPerVisitRa"], - sim_data["hexDitherFieldPerVisitDec"], - ) = wrap_ra_dec(sim_data["hexDitherFieldPerVisitRa"], sim_data["hexDitherFieldPerVisitDec"]) - if self.degrees: - for col in self.cols_added: - sim_data[col] = np.degrees(sim_data[col]) - return sim_data - - -class HexDitherFieldPerNightStacker(HexDitherFieldPerVisitStacker): - """ - Use offsets from the hexagonal grid of 'hexdither', but visit each vertex sequentially. - Sequential offset for each night of visits. - - Parameters - ---------- - ra_col : str, optional - The name of the RA column in the data. - Default 'fieldRA'. - dec_col : str, optional - The name of the Dec column in the data. - Default 'fieldDec'. - degrees : bool, optional - Flag whether RA/Dec should be treated as (and kept as) degrees. - field_id_col : str, optional - The name of the fieldId column in the data. - Used to identify fields which should be identified as the 'same'. - Default 'fieldId'. - night_col : str, optional - The name of the night column in the data. - Default 'night'. - max_dither : float, optional - The radius of the maximum dither offset, in degrees. - Default 1.75 degrees. - in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. - Default True. - """ - - # Values required for framework operation: this specifies the names of the new columns. - cols_added = ["hexDitherFieldPerNightRa", "hexDitherFieldPerNightDec"] - - def __init__( - self, - ra_col="fieldRA", - dec_col="fieldDec", - degrees=True, - field_id_col="fieldId", - night_col="night", - max_dither=1.75, - in_hex=True, - ): - """ - @ MaxDither in degrees - """ - super().__init__( - ra_col=ra_col, - dec_col=dec_col, - field_id_col=field_id_col, - degrees=degrees, - max_dither=max_dither, - in_hex=in_hex, - ) - self.night_col = night_col - # Values required for framework operation: this specifies the data columns required from the database. - self.cols_req.append(self.night_col) - - def _run(self, sim_data, cols_present=False): - if cols_present: - return sim_data - self._generate_hex_offsets() - if self.degrees: - ra = np.radians(sim_data[self.ra_col]) - dec = np.radians(sim_data[self.dec_col]) - else: - ra = sim_data[self.ra_col] - dec = sim_data[self.dec_col] - for fieldid in np.unique(sim_data[self.field_id_col]): - # Identify observations of this field. - match = np.where(sim_data[self.field_id_col] == fieldid)[0] - # Apply a sequential dither, increasing each night. - vertex_idxs = np.arange(0, len(match), 1) - nights = sim_data[self.night_col][match] - vertex_idxs = np.searchsorted(np.unique(nights), nights) - vertex_idxs = vertex_idxs % self.num_points - sim_data["hexDitherFieldPerNightRa"][match] = ra[match] + self.x_off[vertex_idxs] / np.cos( - dec[match] - ) - sim_data["hexDitherFieldPerNightDec"][match] = dec[match] + self.y_off[vertex_idxs] - # Wrap into expected range. - ( - sim_data["hexDitherFieldPerNightRa"], - sim_data["hexDitherFieldPerNightDec"], - ) = wrap_ra_dec(sim_data["hexDitherFieldPerNightRa"], sim_data["hexDitherFieldPerNightDec"]) - if self.degrees: - for col in self.cols_added: - sim_data[col] = np.degrees(sim_data[col]) - return sim_data - - -class HexDitherPerNightStacker(HexDitherFieldPerVisitStacker): - """ - Use offsets from the hexagonal grid of 'hexdither', but visit each vertex sequentially. - Sequential offset per night for all fields. - - Parameters - ---------- - ra_col : str, optional - The name of the RA column in the data. - Default 'fieldRA'. - dec_col : str, optional - The name of the Dec column in the data. - Default 'fieldDec'. - degrees : bool, optional - Flag whether RA/Dec should be treated as (and kept as) degrees. - field_id_col : str, optional - The name of the fieldId column in the data. - Used to identify fields which should be identified as the 'same'. - Default 'fieldId'. - night_col : str, optional - The name of the night column in the data. - Default 'night'. - max_dither : float, optional - The radius of the maximum dither offset, in degrees. - Default 1.75 degrees. - in_hex : bool, optional - If True, offsets are constrained to lie within a hexagon inscribed within the max_dither circle. - If False, offsets can lie anywhere out to the edges of the max_dither circle. - Default True. - """ - - # Values required for framework operation: this specifies the names of the new columns. - cols_added = ["hexDitherPerNightRa", "hexDitherPerNightDec"] - - def __init__( - self, - ra_col="fieldRA", - dec_col="fieldDec", - degrees=True, - field_id_col="fieldId", - night_col="night", - max_dither=1.75, - in_hex=True, - ): - """ - @ MaxDither in degrees - """ - super().__init__( - ra_col=ra_col, - dec_col=dec_col, - degrees=degrees, - field_id_col=field_id_col, - max_dither=max_dither, - in_hex=in_hex, - ) - self.night_col = night_col - # Values required for framework operation: this specifies the data columns required from the database. - self.cols_req.append(self.night_col) - self.added_ra = self.cols_added[0] - self.added_dec = self.cols_added[1] - - def _run(self, sim_data, cols_present=False): - if cols_present: - return sim_data - # Generate the spiral dither values - self._generate_hex_offsets() - nights = np.unique(sim_data[self.night_col]) - if self.degrees: - ra = np.radians(sim_data[self.ra_col]) - dec = np.radians(sim_data[self.dec_col]) - else: - ra = sim_data[self.ra_col] - dec = sim_data[self.dec_col] - # Add to RA and dec values. - vertex_id = 0 - for n in nights: - match = np.where(sim_data[self.night_col] == n)[0] - vertex_id = vertex_id % self.num_points - sim_data[self.added_ra][match] = ra[match] + self.x_off[vertex_id] / np.cos(dec[match]) - sim_data[self.added_dec][match] = dec[match] + self.y_off[vertex_id] - vertex_id += 1 - # Wrap RA/Dec into expected range. - sim_data[self.added_ra], sim_data[self.added_dec] = wrap_ra_dec( - sim_data[self.added_ra], sim_data[self.added_dec] - ) - if self.degrees: - for col in self.cols_added: - sim_data[col] = np.degrees(sim_data[col]) - return sim_data class RandomRotDitherPerFilterChangeStacker(BaseDitherStacker): @@ -1181,9 +456,11 @@ class RandomRotDitherPerFilterChangeStacker(BaseDitherStacker): between -max_dither to max_dither. Default: 90 degrees. max_rot_angle : float, optional - Maximum rotator angle possible for the camera (degrees). Default 90 degrees. + Maximum rotator angle possible for the camera (degrees). + Default 90 degrees. min_rot_angle : float, optional - Minimum rotator angle possible for the camera (degrees). Default -90 degrees. + Minimum rotator angle possible for the camera (degrees). + Default -90 degrees. random_seed: int, optional If set, then used as the random seed for the numpy random number generation for the dither offsets. @@ -1194,7 +471,8 @@ class RandomRotDitherPerFilterChangeStacker(BaseDitherStacker): Default: False """ - # Values required for framework operation: this specifies the names of the new columns. + # Values required for framework operation: this specifies + # the names of the new columns. cols_added = ["randomDitherPerFilterChangeRotTelPos"] def __init__( @@ -1227,7 +505,8 @@ def __init__( self.min_rot_angle = np.radians(self.min_rot_angle) self.debug = debug - # Values required for framework operation: specify the data columns required from the database. + # Values required for framework operation: + # specify the data columns required from the database. self.cols_req = [self.rot_tel_col, self.filter_col] def _run(self, sim_data, cols_present=False): @@ -1238,8 +517,10 @@ def _run(self, sim_data, cols_present=False): if cols_present: return sim_data - # Generate random numbers for dither, using defined seed value if desired. - # Note that we must define the random state for np.random, to ensure consistency in the build system. + # Generate random numbers for dither, using defined seed value + # if desired. + # Note that we must define the random state for np.random, + # to ensure consistency in the build system. if not hasattr(self, "_rng"): if self.random_seed is not None: self._rng = np.random.RandomState(self.random_seed) @@ -1266,24 +547,32 @@ def _run(self, sim_data, cols_present=False): rot_dither = self.cols_added[0] if len(change_idxs) == 0: - # There are no filter changes, so nothing to dither. Just use original values. + # There are no filter changes, so nothing to dither. + # Just use original values. sim_data[rot_dither] = sim_data[self.rot_tel_col] else: - # For each filter change, generate a series of random values for the offsets, - # between +/- self.max_dither. These are potential values for the rotational offset. - # The offset actually used will be confined to ensure that rotTelPos for all visits in + # For each filter change, generate a series of random + # values for the offsets, + # between +/- self.max_dither. These are potential values + # for the rotational offset. + # The offset actually used will be confined to ensure that + # rotTelPos for all visits in # that set of observations (between filter changes) fall within - # the specified min/maxRotAngle -- without truncating the rotTelPos values. + # the specified min/maxRotAngle -- without truncating the + # rotTelPos values. - # Generate more offsets than needed - either 2x filter changes or 2500, whichever is bigger. + # Generate more offsets than needed - either 2x filter changes + # or 2500, whichever is bigger. # 2500 is an arbitrary number. max_num = max(len(change_idxs) * 2, 2500) rot_offset = np.zeros(len(sim_data), float) - # Some sets of visits will not be assigned dithers: it was too hard to find an offset. + # Some sets of visits will not be assigned dithers: + # it was too hard to find an offset. n_problematic_ones = 0 - # Loop over the filter change indexes (current filter change, next filter change) to identify + # Loop over the filter change indexes (current filter change, + # next filter change) to identify # sets of visits that should have the same offset. for c, cn in zip(change_idxs, change_idxs[1:]): random_offsets = self._rng.rand(max_num + 1) * 2.0 * self.max_dither - self.max_dither @@ -1291,7 +580,8 @@ def _run(self, sim_data, cols_present=False): potential_offset = random_offsets[i] # Calculate new rotTelPos values, if we used this offset. new_rot_tel = sim_data[self.rot_tel_col][c + 1 : cn + 1] + potential_offset - # Does it work? Do all values fall within minRotAngle / maxRotAngle? + # Does it work? + # Do all values fall within minRotAngle / maxRotAngle? good_to_go = (new_rot_tel >= self.min_rot_angle).all() and ( new_rot_tel <= self.max_rot_angle ).all() @@ -1304,13 +594,17 @@ def _run(self, sim_data, cols_present=False): new_rot_tel <= self.max_rot_angle ).all() - if not good_to_go: # i.e. no good offset was found after max_num tries + if not good_to_go: + # i.e. no good offset was found after max_num tries n_problematic_ones += 1 - rot_offset[c + 1 : cn + 1] = 0.0 # no dither + rot_offset[c + 1 : cn + 1] = 0.0 + # no dither else: - rot_offset[c + 1 : cn + 1] = random_offsets[i] # assign the chosen offset + rot_offset[c + 1 : cn + 1] = random_offsets[i] + # assign the chosen offset - # Handle the last set of observations (after the last filter change to the end of the survey). + # Handle the last set of observations (after the last filter + # change to the end of the survey). random_offsets = self._rng.rand(max_num + 1) * 2.0 * self.max_dither - self.max_dither i = 0 potential_offset = random_offsets[i] @@ -1327,7 +621,8 @@ def _run(self, sim_data, cols_present=False): new_rot_tel <= self.max_rot_angle ).all() - if not good_to_go: # i.e. no good offset was found after max_num tries + if not good_to_go: + # i.e. no good offset was found after max_num tries n_problematic_ones += 1 rot_offset[c + 1 : cn + 1] = 0.0 else: From c9831a7aa093a5168e7a0424b904f09d7ae1dfd6 Mon Sep 17 00:00:00 2001 From: Lynne Jones Date: Mon, 17 Jun 2024 15:50:08 -0700 Subject: [PATCH 21/21] black and isort --- rubin_sim/maf/stackers/__init__.py | 2 +- rubin_sim/maf/stackers/dither_stackers.py | 2 -- tests/maf/test_3x2fom.py | 1 + tests/maf/test_batches.py | 2 ++ tests/maf/test_metricbundle.py | 1 + tests/maf/test_opsimutils.py | 2 ++ tests/maf/test_stackers.py | 8 ++------ 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rubin_sim/maf/stackers/__init__.py b/rubin_sim/maf/stackers/__init__.py index 1efe6855b..97f492ec4 100644 --- a/rubin_sim/maf/stackers/__init__.py +++ b/rubin_sim/maf/stackers/__init__.py @@ -1,6 +1,7 @@ from .base_stacker import * from .coord_stackers import * from .date_stackers import * +from .dither_stackers import * from .general_stackers import * from .get_col_info import * from .label_stackers import * @@ -11,4 +12,3 @@ from .sdss_stackers import * from .sn_stacker import * from .teff_stacker import * -from .dither_stackers import * diff --git a/rubin_sim/maf/stackers/dither_stackers.py b/rubin_sim/maf/stackers/dither_stackers.py index 18912f57d..e43d2c37e 100644 --- a/rubin_sim/maf/stackers/dither_stackers.py +++ b/rubin_sim/maf/stackers/dither_stackers.py @@ -429,8 +429,6 @@ def _run(self, sim_data, cols_present=False): return sim_data - - class RandomRotDitherPerFilterChangeStacker(BaseDitherStacker): """ Randomly dither the physical angle of the telescope rotator wrt the mount, diff --git a/tests/maf/test_3x2fom.py b/tests/maf/test_3x2fom.py index 243569fb1..802fcad05 100644 --- a/tests/maf/test_3x2fom.py +++ b/tests/maf/test_3x2fom.py @@ -10,6 +10,7 @@ TEST_DB = "example_v3.4_0yrs.db" + class Test3x2(unittest.TestCase): @classmethod def tearDownClass(cls): diff --git a/tests/maf/test_batches.py b/tests/maf/test_batches.py index 42980dfd9..6dfec00a6 100644 --- a/tests/maf/test_batches.py +++ b/tests/maf/test_batches.py @@ -13,6 +13,8 @@ from rubin_sim.maf.slicers import MoObjSlicer TEST_DB = "example_v3.4_0yrs.db" + + class TestBatches(unittest.TestCase): @classmethod def tearDown_class(cls): diff --git a/tests/maf/test_metricbundle.py b/tests/maf/test_metricbundle.py index e887cf2f6..29318dc7d 100644 --- a/tests/maf/test_metricbundle.py +++ b/tests/maf/test_metricbundle.py @@ -16,6 +16,7 @@ TEST_DB = "example_v3.4_0yrs.db" + class TestMetricBundle(unittest.TestCase): @classmethod def tearDown_class(cls): diff --git a/tests/maf/test_opsimutils.py b/tests/maf/test_opsimutils.py index 84a90bd4a..b385b1f2b 100644 --- a/tests/maf/test_opsimutils.py +++ b/tests/maf/test_opsimutils.py @@ -8,6 +8,8 @@ import rubin_sim.maf.utils.opsim_utils as opsimUtils TEST_DB = "example_v3.4_0yrs.db" + + class TestOpsimUtils(unittest.TestCase): def test_scale_benchmarks(self): """Test scaling the design and stretch benchmarks for the diff --git a/tests/maf/test_stackers.py b/tests/maf/test_stackers.py index 70b639b0f..f15b5bdf8 100644 --- a/tests/maf/test_stackers.py +++ b/tests/maf/test_stackers.py @@ -20,6 +20,7 @@ TEST_DB = "example_v3.4_0yrs.db" + class TestStackerClasses(unittest.TestCase): def setUp(self): # get some of the test data @@ -146,8 +147,6 @@ def _t_dither_per_night(self, diffsra, diffsdec, ra, dec, nights): self.assertAlmostEqual(dra_on_night.max(), 0) self.assertAlmostEqual(ddec_on_night.max(), 0) - - def test_random_dither(self): """ Test the random dither pattern. @@ -162,9 +161,7 @@ def test_random_dither(self): data["fieldDec"] = np.degrees(rng.random_sample(600) * np.pi / 2.0 - np.pi / 4.0) stacker = stackers.RandomDitherPerVisitStacker(max_dither=max_dither) data = stacker.run(data) - diffsra = (data["fieldRA"] - data["randomDitherPerVisitRa"]) * np.cos( - np.radians(data["fieldDec"]) - ) + diffsra = (data["fieldRA"] - data["randomDitherPerVisitRa"]) * np.cos(np.radians(data["fieldDec"])) diffsdec = data["fieldDec"] - data["randomDitherPerVisitDec"] # Check dithers within expected range. self._t_dither_range(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], max_dither) @@ -200,7 +197,6 @@ def test_random_dither_per_night(self): # Check that dithers on the same night are the same. self._t_dither_per_night(diffsra, diffsdec, data["fieldRA"], data["fieldDec"], data["night"]) - def test_random_rot_dither_per_filter_change_stacker(self): """ Test the rotational dither stacker.