diff --git a/20240808_fix_apo1m_mwm_products.py b/20240808_fix_apo1m_mwm_products.py deleted file mode 100644 index 574678e..0000000 --- a/20240808_fix_apo1m_mwm_products.py +++ /dev/null @@ -1,15 +0,0 @@ -from astra.products.mwm import create_mwmVisit_and_mwmStar_products -from astra.models import ApogeeCoaddedSpectrumInApStar, Source - -from tqdm import tqdm - -s = ( - Source - .select() - .join(ApogeeCoaddedSpectrumInApStar, on=(ApogeeCoaddedSpectrumInApStar.source_pk == Source.pk)) - .where(ApogeeCoaddedSpectrumInApStar.telescope == "apo1m") -) - -for source in tqdm(s): - create_mwmVisit_and_mwmStar_products(source, apreds=("dr17", "1.3"), run2ds=("v6_1_3", ), overwrite=True) - diff --git a/20240808_snow_white.py b/20240808_snow_white.py deleted file mode 100644 index 71629bc..0000000 --- a/20240808_snow_white.py +++ /dev/null @@ -1,76 +0,0 @@ -from astra import __version__ -from astra.models.boss import BossVisitSpectrum -from astra.models.mwm import BossCombinedSpectrum, BossRestFrameVisitSpectrum -from astra.pipelines.snow_white import snow_white -from astra.models import Source, SnowWhite -from peewee import JOIN -from tqdm import tqdm - -q = ( - BossCombinedSpectrum - .select() - .join(Source) - .switch(BossCombinedSpectrum) - .join( - SnowWhite, - JOIN.LEFT_OUTER, - on=( - (SnowWhite.spectrum_pk == BossCombinedSpectrum.spectrum_pk) - & (SnowWhite.v_astra == __version__) - ) - ) - .where( - Source.assigned_to_program("mwm_wd") - & SnowWhite.spectrum_pk.is_null() - & (BossCombinedSpectrum.run2d == "v6_1_3") - ) -) - -for item in tqdm(snow_white(q), total=1): - None - -q = ( - BossVisitSpectrum - .select() - .join(Source) - .switch(BossVisitSpectrum) - .join( - SnowWhite, - JOIN.LEFT_OUTER, - on=( - (SnowWhite.spectrum_pk == BossVisitSpectrum.spectrum_pk) - & (SnowWhite.v_astra == __version__) - ) - ) - .where( - Source.assigned_to_program("mwm_wd") - & SnowWhite.spectrum_pk.is_null() - & (BossVisitSpectrum.run2d == "v6_1_3") - ) -) -for item in tqdm(snow_white(q), total=1): - None - - -q = ( - BossRestFrameVisitSpectrum - .select() - .join(Source) - .switch(BossRestFrameVisitSpectrum) - .join( - SnowWhite, - JOIN.LEFT_OUTER, - on=( - (SnowWhite.spectrum_pk == BossRestFrameVisitSpectrum.spectrum_pk) - & (SnowWhite.v_astra == __version__) - ) - ) - .where( - Source.assigned_to_program("mwm_wd") - & SnowWhite.spectrum_pk.is_null() - & (BossRestFrameVisitSpectrum.run2d == "v6_1_3") - ) -) -for item in tqdm(snow_white(q), total=1): - None - diff --git a/20240815_corv.py b/20240815_corv.py deleted file mode 100644 index 065219f..0000000 --- a/20240815_corv.py +++ /dev/null @@ -1,5 +0,0 @@ -from astra.pipelines.corv import corv -from tqdm import tqdm - -for item in tqdm(corv(max_workers=32), total=0): - None diff --git a/20240820_make_aspcapstar.py b/20240820_make_aspcapstar.py deleted file mode 100644 index 8b4a02b..0000000 --- a/20240820_make_aspcapstar.py +++ /dev/null @@ -1,13 +0,0 @@ -from astra.models import ASPCAP, ApogeeCoaddedSpectrumInApStar -from astra.products.pipeline import create_star_pipeline_products_for_all_sources - -if __name__ == "__main__": - import sys - create_star_pipeline_products_for_all_sources( - ASPCAP, - apogee_where=( - ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")) - ), - page=int(sys.argv[1]), - limit=int(sys.argv[2]) - ) diff --git a/20240830_remove_proprietary_boss.py b/20240830_remove_proprietary_boss.py deleted file mode 100644 index afb9e57..0000000 --- a/20240830_remove_proprietary_boss.py +++ /dev/null @@ -1,159 +0,0 @@ - -# The DR19 (v0.6.0) release should restrict BOSS spectra to things up and until 60130. -import os -import pickle -from tqdm import tqdm -from astra.models import BossVisitSpectrum, BossCombinedSpectrum, Source - - -# For all these spectra, we need to do the following things: - -# [X] 1. Delete the affected BossVisitSpectrum objects. - -# [X] 2. Delete the affected BossRestFrameVisitSpectrum objects. - -boss_visit_spectrum_path = "20240830_boss_spectra.pkl" -if os.path.exists(boss_visit_spectrum_path): - with open(boss_visit_spectrum_path, "rb") as f: - pks = pickle.load(f) -else: - spectra = list( - BossVisitSpectrum - .select() - .where( - (BossVisitSpectrum.run2d == 'v6_1_3') - & (BossVisitSpectrum.mjd > 60130) - ) - ) - # This first instance is one I deleted by hand to test the cascading. - pks = {"source_pk": [10345532, None, 9337795, 1379544], "spectrum_pk": [36841167, 36887682, 35276769, 36134659], "boss_visit_spectrum_pk": [5038130, 5744638, 7637413, 4180240]} - for spectrum in spectra: - pks["source_pk"].append(spectrum.source_pk) - pks["spectrum_pk"].append(spectrum.spectrum_pk_id) - pks["boss_visit_spectrum_pk"].append(spectrum.pk) - - with open(boss_visit_spectrum_path, "wb") as f: - pickle.dump(pks, f) - - # Delete them with cascade, which impacts the BossRestFrameVisitSpectrum reference. - for spectrum in tqdm(spectra): - spectrum.delete_instance(recursive=True) - -# [X] 3. Delete the affected BossCombinedSpectrum objects. -if False: - source_pks = list(set(pks["source_pk"]).difference({None})) - q = ( - BossCombinedSpectrum - .delete() - .where( - (BossCombinedSpectrum.run2d == 'v6_1_3') - & (BossCombinedSpectrum.v_astra == '0.6.0') - & BossCombinedSpectrum.source_pk.in_(source_pks) - ) - .execute() - ) - -# [X] 4. Delete the affected mwmVisit and mwmStar files. -# -> 0 mwmStar -# -> 0 mwmVisit -if False: - sdss_ids = list( - Source - .select(Source.sdss_id) - .where(Source.pk.in_(source_pks)) - .tuples() - ) - for (sdss_id, ) in tqdm(sdss_ids): - - num = (f"{sdss_id}")[-4:] - if len(num) < 4: - num = ("0" + (4 - len(num))) + num - - u, d = (num[:2], num[2:]) - - mwmStar_path = f"/uufs/chpc.utah.edu/common/home/sdss51/sdsswork/mwm/spectro/astra/0.6.0/star/{u}/{d}/mwmStar-0.6.0-{sdss_id}.fits" - mwmVisit_path = f"/uufs/chpc.utah.edu/common/home/sdss51/sdsswork/mwm/spectro/astra/0.6.0/visit/{u}/{d}/mwmVisit-0.6.0-{sdss_id}.fits" - - if os.path.exists(mwmStar_path): - os.system(f"mv {mwmStar_path} {mwmStar_path}.for_removal") - if os.path.exists(mwmVisit_path): - os.system(f"mv {mwmVisit_path} {mwmVisit_path}.for_removal") - - -# [X] 5. Recreate the mwmVisit and mwmStar files for the affected sources. - -# [X] 6. Go through all the pipeline tables and remove the results for the affected BOSS visit spectra. -from astra.models import (BossNet, SnowWhite, LineForest, MDwarfType, Corv) - -if False: - boss_pipeline_models = (BossNet, SnowWhite, LineForest, MDwarfType, Corv) - for model in boss_pipeline_models: - n = ( - model - .delete() - .where( - (model.v_astra == "0.6.0") - & (model.spectrum_pk.in_(pks["spectrum_pk"])) - ) - .execute() - ) - print(model, n) - - ''' - 1737800 - 33930 - 365344 - 0 - 15435 - ''' - -# [X] Update spectrum counts, min/max mjds. -if False: - from astra.migrations.misc import update_visit_spectra_counts - from astra.models import ApogeeVisitSpectrum, BossVisitSpectrum - update_visit_spectra_counts( - apogee_where=ApogeeVisitSpectrum.apred.in_(("dr17", "1.3")), - boss_where=(BossVisitSpectrum.run2d == "v6_1_3") - ) - - -# 7. Re-build pipeline-level files for affected SnowWhite/Corv things. -if False: - - from astra.products.pipeline import create_visit_pipeline_product - - q = ( - Source - .select() - .distinct(Source.sdss_id) - .join(BossVisitSpectrum, on=(BossVisitSpectrum.source_pk == Source.pk)) - .where( - Source.assigned_to_program("mwm_wd") - & Source.pk.in_(pks["source_pk"]) - ) - ) - from tqdm import tqdm - - for source in tqdm(q): - try: - create_visit_pipeline_product( - source, - SnowWhite, - overwrite=True - ) - except: - print("fail") - try: - Corv.get(source_pk=source.pk) - except: - None - else: - create_visit_pipeline_product( - source, - Corv, - overwrite=True - ) - -# 7. Delete any sources that don't have any APOGEE spectra or BOSS spectra. -> Just restrict mwmTargets/mwmStar etc to those with n_apogee_visits > 0 or n_boss_visits > - -# 8. Construct new summary files for any affected pipelines. diff --git a/20240912_builder.py b/20240912_builder.py deleted file mode 100644 index 48fcf7b..0000000 --- a/20240912_builder.py +++ /dev/null @@ -1,60 +0,0 @@ -from astra.models import (ASPCAP, ApogeeNet, AstroNN, AstroNNdist, BossNet, Corv, LineForest, MDwarfType, Slam, SnowWhite, TheCannon, ThePayne) -from astra.models.mwm import BossVisitSpectrum, BossCombinedSpectrum -from astra.products.mwm_summary import (create_mwm_targets_product, create_mwm_all_star_product, create_mwm_all_visit_product) -from astra.products.pipeline_summary import (create_astra_all_star_product, create_astra_all_visit_product) -from astra.utils import log - -from astra.models import ApogeeCoaddedSpectrumInApStar, ApogeeVisitSpectrumInApStar, ApogeeVisitSpectrum - - -log.info("mwmTargets") -create_mwm_targets_product(overwrite=True) - -log.info("mwmAllStar") -create_mwm_all_star_product( - overwrite=True, - apogee_where=ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")), - boss_where=(BossCombinedSpectrum.run2d == "v6_1_3"), -) - -log.info("mwmAllVisit") -create_mwm_all_visit_product( - overwrite=True, - apogee_where=ApogeeVisitSpectrum.apred.in_(("dr17", "1.3")), - boss_where=(BossVisitSpectrum.run2d == "v6_1_3"), -) - - -star_level_models = ( - (ASPCAP, dict(apogee_where=ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (ApogeeNet, dict(apogee_where=ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (AstroNN, dict(apogee_where=ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (AstroNNdist, dict(apogee_where=ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (BossNet, dict(boss_spectrum_model=BossCombinedSpectrum, boss_where=(BossCombinedSpectrum.run2d == "v6_1_3"))), - (TheCannon, dict(apogee_where=ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (ThePayne, dict(apogee_where=ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (LineForest, dict(boss_spectrum_model=BossCombinedSpectrum, boss_where=(BossCombinedSpectrum.run2d == "v6_1_3"))), - (MDwarfType, dict(boss_spectrum_model=BossCombinedSpectrum, boss_where=(BossCombinedSpectrum.run2d == "v6_1_3"))), - (SnowWhite, dict(boss_spectrum_model=BossCombinedSpectrum, boss_where=(BossCombinedSpectrum.run2d == "v6_1_3"))), - (Corv, dict(boss_spectrum_model=BossCombinedSpectrum, boss_where=(BossCombinedSpectrum.run2d == "v6_1_3"))), - (Slam, dict(boss_spectrum_model=BossCombinedSpectrum)), -) - - -for model, kwargs in star_level_models: - log.info(f"{model.__name__} star level") - create_astra_all_star_product(model, overwrite=True, **kwargs) - - -visit_level_models = ( - (AstroNN, dict(apogee_spectrum_model=ApogeeVisitSpectrumInApStar,apogee_where=ApogeeVisitSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (BossNet, dict(boss_spectrum_model=BossVisitSpectrum, boss_where=(BossVisitSpectrum.run2d == "v6_1_3"))), - (Corv, dict(boss_spectrum_model=BossVisitSpectrum, boss_where=(BossVisitSpectrum.run2d == "v6_1_3"))), - (LineForest, dict(boss_spectrum_model=BossVisitSpectrum, boss_where=(BossVisitSpectrum.run2d == "v6_1_3"))), - (ThePayne, dict(apogee_spectrum_model=ApogeeVisitSpectrumInApStar, apogee_where=ApogeeVisitSpectrumInApStar.apred.in_(("dr17", "1.3")))), - (SnowWhite, dict(boss_spectrum_model=BossVisitSpectrum, boss_where=(BossVisitSpectrum.run2d == "v6_1_3"))), -) - -for model, kwargs in visit_level_models: - log.info(f"{model.__name__} visit level") - create_astra_all_visit_product(model, overwrite=True, **kwargs) diff --git a/20240912_make_products.py b/20240912_make_products.py deleted file mode 100644 index a3872d4..0000000 --- a/20240912_make_products.py +++ /dev/null @@ -1,47 +0,0 @@ -from astra.models import (ASPCAP, ApogeeNet, AstroNN, AstroNNdist, BossNet, Corv, LineForest, MDwarfType, Slam, SnowWhite, TheCannon, ThePayne) -from astra.models.mwm import BossVisitSpectrum, BossCombinedSpectrum -from astra.products.mwm_summary import (create_mwm_targets_product, create_mwm_all_star_product, create_mwm_all_visit_product) -from astra.utils import log - - -log.info("mwmTargets") -create_mwm_targets_product(overwrite=True) -log.info("mwmAllStar") -create_mwm_all_star_product(overwrite=True) -log.info("mwmAllVisit") -create_mwm_all_visit_product(overwrite=True) - - -star_level_models = ( - (ASPCAP, {}), - (ApogeeNet, {}), - (AstroNN, {}), - (AstroNNdist, {}), - (TheCannon, {}), - (ThePayne, {}), - (LineForest, dict(boss_spectrum_model=BossCombinedSpectrum)), - (MDwarfType, dict(boss_spectrum_model=BossCombinedSpectrum)), - (SnowWhite, dict(boss_spectrum_model=BossCombinedSpectrum)), - (Corv, dict(boss_spectrum_model=BossCombinedSpectrum)), - #(Slam, dict(boss_spectrum_model=BossCombinedSpectrum)), -) -visit_level_models = ( - (AstroNN, {}), - (BossNet, {}), - (Corv, dict(boss_spectrum_model=BossVisitSpectrum)), - (LineForest, dict(boss_spectrum_model=BossVisitSpectrum)), - (MDwarfType, dict(boss_spectrum_model=BossVisitSpectrum)), - (SnowWhite, dict(boss_spectrum_model=BossVisitSpectrum)), - (ThePayne, {}), - #(Slam, dict(boss_spectrum_model=BossVisitSpectrum)), -) - -for model, kwargs in star_level_models: - log.info(f"{model.__name__} star level") - create_mwm_all_star_product(pipeline_model=model, overwrite=True, **kwargs) - raise a - - -for model, kwargs in visit_level_models: - log.info(f"{model.__name__} visit level") - create_mwm_all_visit_product(pipeline_model=model, overwrite=True, **kwargs) diff --git a/agnostic_errors.py b/agnostic_errors.py deleted file mode 100644 index afb477d..0000000 --- a/agnostic_errors.py +++ /dev/null @@ -1,883 +0,0 @@ - -from astropy.table import Table -import os -import numpy as np -from itertools import combinations -from tqdm import tqdm -from astra import __version__ -from astra.utils import expand_path -import pickle - - -overwrite = True - - -def safe_float(x): - try: - return float(x) - except: - return np.nan - - -def select_pairwise_combinations( - output_path, - query, - field_names, - group_by=("source_pk", ), - meta_keys=("spectrum_pk", "task_pk", "snr"), - overwrite=False, - exclude_edges=True, - limit=None -): - path = expand_path(f"$MWM_ASTRA/{__version__}/aux/{output_path}") - os.makedirs(os.path.dirname(path), exist_ok=True) - if os.path.exists(path) and not overwrite: - with open(path, "rb") as fp: - pairwise = pickle.load(fp) - return pairwise - - results = Table(rows=list(query)) - # exclude values on the edge - percentiles = {} - keep = np.ones(len(results), dtype=bool) - for name in field_names: - v = np.array(results[name]).astype(float) - e_v = np.array(results[f"e_{name}"]).astype(float) - lower, upper = np.nanpercentile(v, [5, 95]) - if np.isfinite([lower, upper]).all(): - print(name, lower, upper) - keep *= (upper >= v) & (v >= lower) - keep *= (e_v > 0) & (1000 > e_v) - - results = results[keep] - - pairwise = { k: [] for k in group_by } - for name in field_names: - pairwise.update({ - f"{name}_0": [], - f"{name}_1": [], - f"e_{name}_0": [], - f"e_{name}_1": [], - }) - for name in meta_keys: - pairwise.update({ - f"{name}_0": [], - f"{name}_1": [], - }) - - for group in tqdm(results.group_by(group_by).groups): - G = len(group) - if G == 1: - continue - - group_by_values = [group[k][0] for k in group_by] - - for rows in combinations(group, 2): - - # group-level metadata keys first - for k, v in zip(group_by, group_by_values): - pairwise[k].append(v) - - # Then pairwise metadata keys - for suffix, row in enumerate(rows): - - for name in meta_keys: - pairwise[f"{name}_{suffix}"].append(safe_float(row[name])) - - for name in field_names: - pairwise[f"{name}_{suffix}"].append(safe_float(row[name])) - pairwise[f"e_{name}_{suffix}"].append(safe_float(row[f"e_{name}"])) - - for k in pairwise.keys(): - pairwise[k] = np.array(pairwise[k]) - - with open(path, "wb") as fp: - pickle.dump(pairwise, fp) - - print(f"Results written to {path}") - return pairwise - - -from scipy import stats - - - -def get_names(pairwise): - names = [] - for k in pairwise.keys(): - if k.startswith("e_") and k.endswith("_0"): - names.append(k[2:-2]) - return tuple(names) - -# Compute grid of corrections. -def compute_corrections( - output_path, - pairwise, - z_bins=np.linspace(-5, 5, 100), - scales=np.array([0.5, 0.75, 1.0, 1.25, 1.5, 2]), - offsets=dict( - teff=np.logspace(-1, 2, 10), - default=np.logspace(-2, 0, 10), - ), - overwrite=False, -): - - path = expand_path(f"$MWM_ASTRA/{__version__}/aux/{output_path}") - os.makedirs(os.path.dirname(path), exist_ok=True) - if os.path.exists(path) and not overwrite: - with open(path, "rb") as fp: - r = pickle.load(fp) - return r - - names = get_names(pairwise) - - meta = { - "bin_edges": None, - "reference_pdf": None, - } - results = {} - - for name in names: - - var_0, var_1 = (pairwise[f"e_{name}_0"]**2 , pairwise[f"e_{name}_1"]**2) - delta = (pairwise[f"{name}_0"] - pairwise[f"{name}_1"]) - - use = ( - np.isfinite(delta * var_0 * var_1) - & (pairwise[f"e_{name}_0"] >= 0) - & (pairwise[f"e_{name}_1"] >= 0) - ) - delta = delta[use] - var_0, var_1 = (var_0[use], var_1[use]) - - x, y = (offsets.get(name, offsets["default"]), scales) - grid = np.meshgrid(x, y) - grid_offsets, grid_scales = (grid[0].flatten(), grid[1].flatten()) - - costs, best_index, best_cost, best_z_pdf = ([], None, None, None) - for i, (o, s) in tqdm(enumerate(zip(grid_offsets, grid_scales)), total=x.size * y.size): - inv_z_e = 1/np.sqrt((s**2 * var_0) + (s**2 * var_1) + (2*o)**2) - z_pdf, bin_edges = np.histogram(delta * inv_z_e, bins=z_bins, density=True) - if meta["reference_pdf"] is None: - meta.update( - bin_edges=bin_edges, - reference_pdf=stats.norm.pdf(bin_edges[:-1] + 0.5 * np.diff(bin_edges)[0], loc=0, scale=1) - ) - cost = np.sum((z_pdf - meta["reference_pdf"])**2) - if best_cost is None or cost < best_cost: - best_index, best_cost, best_z_pdf = (i, cost, z_pdf) - costs.append(cost) - - results[name] = dict( - N=delta.size, - costs=costs, - offsets=grid_offsets, - scales=grid_scales, - offset=grid_offsets[best_index], - scale=grid_scales[best_index], - best_index=best_index, - best_cost=best_cost, - best_z_pdf=best_z_pdf, - ) - - with open(path, "wb") as fp: - pickle.dump((results, meta), fp) - print(f"Results written to {path}") - return (results, meta) - - -from astra.models import ASPCAP, ApogeeCoaddedSpectrumInApStar - -pipeline_model = ASPCAP - -q = ( - pipeline_model - .select( - ApogeeCoaddedSpectrumInApStar.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.raw_teff.alias("teff"), - pipeline_model.raw_e_teff.alias("e_teff"), - pipeline_model.raw_logg.alias("logg"), - pipeline_model.raw_e_logg.alias("e_logg"), - pipeline_model.raw_v_micro.alias("v_micro"), - (pipeline_model.raw_e_v_micro / pipeline_model.raw_v_micro).alias("e_v_micro"), - pipeline_model.raw_v_sini.alias("v_sini"), - (pipeline_model.raw_e_v_sini / pipeline_model.raw_v_sini).alias("e_v_sini"), - pipeline_model.raw_m_h_atm.alias("m_h_atm"), - pipeline_model.raw_e_m_h_atm.alias("e_m_h_atm"), - pipeline_model.raw_alpha_m_atm.alias("alpha_m_atm"), - pipeline_model.raw_e_alpha_m_atm.alias("e_alpha_m_atm"), - pipeline_model.raw_c_m_atm.alias("c_m_atm"), - pipeline_model.raw_e_c_m_atm.alias("e_c_m_atm"), - pipeline_model.raw_n_m_atm.alias("n_m_atm"), - pipeline_model.raw_e_n_m_atm.alias("e_n_m_atm"), - pipeline_model.raw_al_h.alias("al_h"), - pipeline_model.raw_e_al_h.alias("e_al_h"), - pipeline_model.raw_c_12_13.alias("c_12_13"), - pipeline_model.raw_e_c_12_13.alias("e_c_12_13"), - pipeline_model.raw_ca_h.alias("ca_h"), - pipeline_model.raw_e_ca_h.alias("e_ca_h"), - pipeline_model.raw_ce_h.alias("ce_h"), - pipeline_model.raw_e_ce_h.alias("e_ce_h"), - pipeline_model.raw_c_1_h.alias("c_1_h"), - pipeline_model.raw_e_c_1_h.alias("e_c_1_h"), - pipeline_model.raw_c_h.alias("c_h"), - pipeline_model.raw_e_c_h.alias("e_c_h"), - pipeline_model.raw_co_h.alias("co_h"), - pipeline_model.raw_e_co_h.alias("e_co_h"), - pipeline_model.raw_cr_h.alias("cr_h"), - pipeline_model.raw_e_cr_h.alias("e_cr_h"), - pipeline_model.raw_cu_h.alias("cu_h"), - pipeline_model.raw_e_cu_h.alias("e_cu_h"), - pipeline_model.raw_fe_h.alias("fe_h"), - pipeline_model.raw_e_fe_h.alias("e_fe_h"), - pipeline_model.raw_k_h.alias("k_h"), - pipeline_model.raw_e_k_h.alias("e_k_h"), - pipeline_model.raw_mg_h.alias("mg_h"), - pipeline_model.raw_e_mg_h.alias("e_mg_h"), - pipeline_model.raw_mn_h.alias("mn_h"), - pipeline_model.raw_e_mn_h.alias("e_mn_h"), - pipeline_model.raw_na_h.alias("na_h"), - pipeline_model.raw_e_na_h.alias("e_na_h"), - pipeline_model.raw_nd_h.alias("nd_h"), - pipeline_model.raw_e_nd_h.alias("e_nd_h"), - pipeline_model.raw_ni_h.alias("ni_h"), - pipeline_model.raw_e_ni_h.alias("e_ni_h"), - pipeline_model.raw_n_h.alias("n_h"), - pipeline_model.raw_e_n_h.alias("e_n_h"), - pipeline_model.raw_o_h.alias("o_h"), - pipeline_model.raw_e_o_h.alias("e_o_h"), - pipeline_model.raw_p_h.alias("p_h"), - pipeline_model.raw_e_p_h.alias("e_p_h"), - pipeline_model.raw_si_h.alias("si_h"), - pipeline_model.raw_e_si_h.alias("e_si_h"), - pipeline_model.raw_s_h.alias("s_h"), - pipeline_model.raw_e_s_h.alias("e_s_h"), - pipeline_model.raw_ti_h.alias("ti_h"), - pipeline_model.raw_e_ti_h.alias("e_ti_h"), - pipeline_model.raw_ti_2_h.alias("ti_2_h"), - pipeline_model.raw_e_ti_2_h.alias("e_ti_2_h"), - pipeline_model.raw_v_h.alias("v_h"), - pipeline_model.raw_e_v_h.alias("e_v_h"), - ) - .distinct(ApogeeCoaddedSpectrumInApStar.spectrum_pk) - .join(ApogeeCoaddedSpectrumInApStar, on=(ApogeeCoaddedSpectrumInApStar.spectrum_pk == pipeline_model.spectrum_pk)) - .where(ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3"))) # exclude daily - .where(~pipeline_model.flag_bad) - .dicts() -) -field_names = ( - "teff", - "logg", - "v_micro", - "v_sini", - "m_h_atm", - "alpha_m_atm", - "c_m_atm", - "n_m_atm", - "al_h", - "c_12_13", - "ca_h", - "ce_h", - "c_1_h", - "c_h", - "co_h", - "cr_h", - "cu_h", - "fe_h", - "k_h", - "mg_h", - "mn_h", - "na_h", - "nd_h", - "ni_h", - "n_h", - "o_h", - "p_h", - "si_h", - "s_h", - "ti_h", - "ti_2_h", - "v_h", -) - -pairwise = select_pairwise_combinations("ASPCAP.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("ASPCAP_corrections.pkl", pairwise, overwrite=overwrite) - -''' - -from astra.models import ApogeeNetV2, ApogeeVisitSpectrumInApStar, ApogeeVisitSpectrum - -pipeline_model = ApogeeNetV2 - -q = ( - pipeline_model - .select( - ApogeeVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff, - pipeline_model.logg, - pipeline_model.fe_h, - pipeline_model.e_teff, - pipeline_model.e_logg, - pipeline_model.e_fe_h, - ) - .distinct(pipeline_model.spectrum_pk) - .join(ApogeeVisitSpectrumInApStar, on=(ApogeeVisitSpectrumInApStar.spectrum_pk == pipeline_model.spectrum_pk)) - .join(ApogeeVisitSpectrum, on=(ApogeeVisitSpectrum.spectrum_pk == ApogeeVisitSpectrumInApStar.drp_spectrum_pk)) - .where(pipeline_model.result_flags == 0) - .where(pipeline_model.v_astra == __version__) - .dicts() -) - -field_names = ("teff", "logg", "fe_h") -pairwise = select_pairwise_combinations("ApogeeNetV2.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("ApogeeNetV2_corrections.pkl", pairwise, overwrite=overwrite) -''' - - -from astra.models import ApogeeNet, ApogeeVisitSpectrumInApStar, ApogeeVisitSpectrum - -pipeline_model = ApogeeNet - -from astra.models.apogeenet import apply_result_flags, apply_noise_model - -apply_result_flags() - -q = ( - pipeline_model - .select( - ApogeeVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.raw_teff.alias("teff"), - pipeline_model.raw_logg.alias("logg"), - pipeline_model.raw_fe_h.alias("fe_h"), - pipeline_model.raw_e_teff.alias("e_teff"), - pipeline_model.raw_e_logg.alias("e_logg"), - pipeline_model.raw_e_fe_h.alias("e_fe_h"), - ) - .distinct(pipeline_model.spectrum_pk) - .join(ApogeeVisitSpectrumInApStar, on=(ApogeeVisitSpectrumInApStar.spectrum_pk == pipeline_model.spectrum_pk)) - .join(ApogeeVisitSpectrum, on=(ApogeeVisitSpectrum.spectrum_pk == ApogeeVisitSpectrumInApStar.drp_spectrum_pk)) - .where(pipeline_model.result_flags == 0) - .where(pipeline_model.v_astra == __version__) - .dicts() -) -field_names = ("teff", "logg", "fe_h") -pairwise = select_pairwise_combinations("ApogeeNet.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("ApogeeNet_corrections.pkl", pairwise, overwrite=overwrite) - -# Apply corrections -apply_noise_model() - - - -from astra.models import AstroNN, ApogeeVisitSpectrumInApStar - -pipeline_model = AstroNN - -q = ( - pipeline_model - .select( - ApogeeVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff, - pipeline_model.e_teff, - pipeline_model.logg, - pipeline_model.e_logg, - pipeline_model.c_h, - pipeline_model.e_c_h, - pipeline_model.c_1_h, - pipeline_model.e_c_1_h, - pipeline_model.n_h, - pipeline_model.e_n_h, - pipeline_model.o_h, - pipeline_model.e_o_h, - pipeline_model.na_h, - pipeline_model.e_na_h, - pipeline_model.mg_h, - pipeline_model.e_mg_h, - pipeline_model.al_h, - pipeline_model.e_al_h, - pipeline_model.si_h, - pipeline_model.e_si_h, - pipeline_model.p_h, - pipeline_model.e_p_h, - pipeline_model.s_h, - pipeline_model.e_s_h, - pipeline_model.k_h, - pipeline_model.e_k_h, - pipeline_model.ca_h, - pipeline_model.e_ca_h, - pipeline_model.ti_h, - pipeline_model.e_ti_h, - pipeline_model.ti_2_h, - pipeline_model.e_ti_2_h, - pipeline_model.v_h, - pipeline_model.e_v_h, - pipeline_model.cr_h, - pipeline_model.e_cr_h, - pipeline_model.mn_h, - pipeline_model.e_mn_h, - pipeline_model.fe_h, - pipeline_model.e_fe_h, - pipeline_model.co_h, - pipeline_model.e_co_h, - pipeline_model.ni_h, - pipeline_model.e_ni_h, - ) - .distinct(pipeline_model.spectrum_pk) - .join(ApogeeVisitSpectrumInApStar, on=(ApogeeVisitSpectrumInApStar.spectrum_pk == pipeline_model.spectrum_pk)) - .join(ApogeeVisitSpectrum, on=(ApogeeVisitSpectrum.spectrum_pk == ApogeeVisitSpectrumInApStar.drp_spectrum_pk)) - .where(pipeline_model.result_flags == 0) - .where(pipeline_model.v_astra == __version__) - .dicts() -) -field_names = ( - "teff", - "logg", - "c_h", - "c_1_h", - "n_h", - "o_h", - "na_h", - "mg_h", - "al_h", - "si_h", - "p_h", - "s_h", - "k_h", - "ca_h", - "ti_h", - "ti_2_h", - "v_h", - "cr_h", - "mn_h", - "fe_h", - "co_h", - "ni_h", -) - -pairwise = select_pairwise_combinations("AstroNN.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("AstroNN_corrections.pkl", pairwise, overwrite=overwrite) - - - -''' -from astra.models import Corv, BossVisitSpectrum - -pipeline_model = Corv - -q = ( - pipeline_model - .select( - BossVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff, - pipeline_model.logg, - pipeline_model.e_teff, - pipeline_model.e_logg, - ) - .distinct(pipeline_model.spectrum_pk) - .join(BossVisitSpectrum, on=(BossVisitSpectrum.spectrum_pk == pipeline_model.spectrum_pk)) - .where(BossVisitSpectrum.run2d == "v6_1_3") - #.where(pipeline_model.result_flags == 0) - .dicts() -) -field_names = ("teff", "logg") - -pairwise = select_pairwise_combinations("Corv.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("Corv_corrections.pkl", pairwise, overwrite=overwrite) -''' - -from peewee import fn -from astra.models import SnowWhite, BossVisitSpectrum - -pipeline_model = SnowWhite - -q = ( - pipeline_model - .select( - BossVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff.alias("teff"), - pipeline_model.logg.alias("logg"), - pipeline_model.e_teff.alias("e_teff"), - pipeline_model.e_logg.alias("e_logg"), - ) - .distinct(pipeline_model.spectrum_pk) - .join(BossVisitSpectrum, on=(BossVisitSpectrum.spectrum_pk == pipeline_model.spectrum_pk)) - .where( - pipeline_model.teff.is_null(False) - & (pipeline_model.logg > 7) - & (9.5 > pipeline_model.logg) - & (fn.abs(pipeline_model.teff - 13_000) > 250) # lots of bunching up at the nodes and edges - ) - .where(pipeline_model.logg > 7) - .where(BossVisitSpectrum.run2d == "v6_1_3") - .where(pipeline_model.v_astra == "0.6.0") - .dicts() -) - -field_names = ("teff", "logg") - -pairwise = select_pairwise_combinations("SnowWhite.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("SnowWhite_corrections.pkl", pairwise, overwrite=overwrite) - -# ThePayne - -from astra.models import ThePayne, ApogeeVisitSpectrumInApStar - -pipeline_model = ThePayne - -q = ( - pipeline_model - .select( - ApogeeVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff.alias("teff"), - pipeline_model.e_teff.alias("e_teff"), - pipeline_model.v_turb.alias("v_turb"), - pipeline_model.e_v_turb.alias("e_v_turb"), - pipeline_model.logg.alias("logg"), - pipeline_model.e_logg.alias("e_logg"), - pipeline_model.c_h.alias("c_h"), - pipeline_model.e_c_h.alias("e_c_h"), - pipeline_model.n_h.alias("n_h"), - pipeline_model.e_n_h.alias("e_n_h"), - pipeline_model.o_h.alias("o_h"), - pipeline_model.e_o_h.alias("e_o_h"), - pipeline_model.na_h.alias("na_h"), - pipeline_model.e_na_h.alias("e_na_h"), - pipeline_model.mg_h.alias("mg_h"), - pipeline_model.e_mg_h.alias("e_mg_h"), - pipeline_model.al_h.alias("al_h"), - pipeline_model.e_al_h.alias("e_al_h"), - pipeline_model.si_h.alias("si_h"), - pipeline_model.e_si_h.alias("e_si_h"), - pipeline_model.p_h.alias("p_h"), - pipeline_model.e_p_h.alias("e_p_h"), - pipeline_model.s_h.alias("s_h"), - pipeline_model.e_s_h.alias("e_s_h"), - pipeline_model.k_h.alias("k_h"), - pipeline_model.e_k_h.alias("e_k_h"), - pipeline_model.ca_h.alias("ca_h"), - pipeline_model.e_ca_h.alias("e_ca_h"), - pipeline_model.ti_h.alias("ti_h"), - pipeline_model.e_ti_h.alias("e_ti_h"), - pipeline_model.v_h.alias("v_h"), - pipeline_model.e_v_h.alias("e_v_h"), - pipeline_model.cr_h.alias("cr_h"), - pipeline_model.e_cr_h.alias("e_cr_h"), - pipeline_model.mn_h.alias("mn_h"), - pipeline_model.e_mn_h.alias("e_mn_h"), - pipeline_model.fe_h.alias("fe_h"), - pipeline_model.e_fe_h.alias("e_fe_h"), - pipeline_model.co_h.alias("co_h"), - pipeline_model.e_co_h.alias("e_co_h"), - pipeline_model.ni_h.alias("ni_h"), - pipeline_model.e_ni_h.alias("e_ni_h"), - pipeline_model.cu_h.alias("cu_h"), - pipeline_model.e_cu_h.alias("e_cu_h"), - pipeline_model.ge_h.alias("ge_h"), - pipeline_model.e_ge_h.alias("e_ge_h"), - pipeline_model.c12_c13.alias("c12_c13"), - pipeline_model.e_c12_c13.alias("e_c12_c13"), - pipeline_model.v_macro.alias("v_macro"), - pipeline_model.e_v_macro.alias("e_v_macro") - ) - .distinct(pipeline_model.spectrum_pk) - .join(ApogeeVisitSpectrumInApStar, on=(ApogeeVisitSpectrumInApStar.spectrum_pk == pipeline_model.spectrum_pk)) - .join(ApogeeVisitSpectrum, on=(ApogeeVisitSpectrum.spectrum_pk == ApogeeVisitSpectrumInApStar.drp_spectrum_pk)) - .where(pipeline_model.result_flags == 0) - .where(pipeline_model.v_astra == __version__) - .where(ApogeeVisitSpectrum.apred.in_(("dr17", "1.3"))) - .dicts() -) - -field_names = ( - "teff", - "v_turb", - "logg", - "c_h", - "n_h", - "o_h", - "na_h", - "mg_h", - "al_h", - "si_h", - "p_h", - "s_h", - "k_h", - "ca_h", - "ti_h", - "v_h", - "cr_h", - "mn_h", - "fe_h", - "co_h", - "ni_h", - "cu_h", - "ge_h", - "c12_c13", - "v_macro", -) -pairwise = select_pairwise_combinations("ThePayne.pkl", q, field_names, overwrite=overwrite, limit=100_000) -corrections = compute_corrections("ThePayne_corrections.pkl", pairwise, overwrite=overwrite) - -# TheCannon - - -from astra.models import TheCannon, ApogeeCoaddedSpectrumInApStar - -pipeline_model = TheCannon - -q = ( - pipeline_model - .select( - ApogeeCoaddedSpectrumInApStar.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff, - pipeline_model.e_teff, - pipeline_model.logg, - pipeline_model.e_logg, - pipeline_model.fe_h, - pipeline_model.e_fe_h, - pipeline_model.v_micro, - pipeline_model.e_v_micro, - pipeline_model.v_macro, - pipeline_model.e_v_macro, - pipeline_model.c_fe, - pipeline_model.e_c_fe, - pipeline_model.n_fe, - pipeline_model.e_n_fe, - pipeline_model.o_fe, - pipeline_model.e_o_fe, - pipeline_model.na_fe, - pipeline_model.e_na_fe, - pipeline_model.mg_fe, - pipeline_model.e_mg_fe, - pipeline_model.al_fe, - pipeline_model.e_al_fe, - pipeline_model.si_fe, - pipeline_model.e_si_fe, - pipeline_model.s_fe, - pipeline_model.e_s_fe, - pipeline_model.k_fe, - pipeline_model.e_k_fe, - pipeline_model.ca_fe, - pipeline_model.e_ca_fe, - pipeline_model.ti_fe, - pipeline_model.e_ti_fe, - pipeline_model.v_fe, - pipeline_model.e_v_fe, - pipeline_model.cr_fe, - pipeline_model.e_cr_fe, - pipeline_model.mn_fe, - pipeline_model.e_mn_fe, - pipeline_model.ni_fe, - pipeline_model.e_ni_fe, - ) - .distinct(pipeline_model.spectrum_pk) - .join(ApogeeCoaddedSpectrumInApStar, on=(ApogeeCoaddedSpectrumInApStar.spectrum_pk == pipeline_model.spectrum_pk)) - .where(pipeline_model.result_flags == 0) - .where(ApogeeCoaddedSpectrumInApStar.apred.in_(("dr17", "1.3"))) - .dicts() -) - -field_names = ( - "teff", - "logg", - "fe_h", - "v_micro", - "v_macro", - "c_fe", - "n_fe", - "o_fe", - "na_fe", - "mg_fe", - "al_fe", - "si_fe", - "s_fe", - "k_fe", - "ca_fe", - "ti_fe", - "v_fe", - "cr_fe", - "mn_fe", - "ni_fe", -) - -pairwise = select_pairwise_combinations("TheCannon.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("TheCannon_corrections.pkl", pairwise, overwrite=overwrite) - - - - - -''' -from astra.models import HotPayne, BossVisitSpectrum - - -pipeline_model = HotPayne - -q = ( - pipeline_model - .select( - BossVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff, - pipeline_model.logg, - pipeline_model.fe_h, - pipeline_model.v_micro, - pipeline_model.v_sini, - pipeline_model.he_h, - pipeline_model.c_h, - pipeline_model.n_h, - pipeline_model.o_h, - pipeline_model.si_h, - pipeline_model.s_h, - pipeline_model.teff_fullspec, - pipeline_model.logg_fullspec, - pipeline_model.fe_h_fullspec, - pipeline_model.v_micro_fullspec, - pipeline_model.v_sini_fullspec, - pipeline_model.he_h_fullspec, - pipeline_model.c_h_fullspec, - pipeline_model.n_h_fullspec, - pipeline_model.o_h_fullspec, - pipeline_model.si_h_fullspec, - pipeline_model.s_h_fullspec, - pipeline_model.teff_hmasked, - pipeline_model.logg_hmasked, - pipeline_model.fe_h_hmasked, - pipeline_model.v_micro_hmasked, - pipeline_model.v_sini_hmasked, - pipeline_model.he_h_hmasked, - pipeline_model.c_h_hmasked, - pipeline_model.n_h_hmasked, - pipeline_model.o_h_hmasked, - pipeline_model.si_h_hmasked, - pipeline_model.s_h_hmasked, - pipeline_model.e_teff, - pipeline_model.e_logg, - pipeline_model.e_fe_h, - pipeline_model.e_v_micro, - pipeline_model.e_v_sini, - pipeline_model.e_he_h, - pipeline_model.e_c_h, - pipeline_model.e_n_h, - pipeline_model.e_o_h, - pipeline_model.e_si_h, - pipeline_model.e_s_h, - pipeline_model.e_teff_fullspec, - pipeline_model.e_logg_fullspec, - pipeline_model.e_fe_h_fullspec, - pipeline_model.e_v_micro_fullspec, - pipeline_model.e_v_sini_fullspec, - pipeline_model.e_he_h_fullspec, - pipeline_model.e_c_h_fullspec, - pipeline_model.e_n_h_fullspec, - pipeline_model.e_o_h_fullspec, - pipeline_model.e_si_h_fullspec, - pipeline_model.e_s_h_fullspec, - pipeline_model.e_teff_hmasked, - pipeline_model.e_logg_hmasked, - pipeline_model.e_fe_h_hmasked, - pipeline_model.e_v_micro_hmasked, - pipeline_model.e_v_sini_hmasked, - pipeline_model.e_he_h_hmasked, - pipeline_model.e_c_h_hmasked, - pipeline_model.e_n_h_hmasked, - pipeline_model.e_o_h_hmasked, - pipeline_model.e_si_h_hmasked, - pipeline_model.e_s_h_hmasked, - ) - .distinct(pipeline_model.spectrum_pk) - .join(BossVisitSpectrum, on=(BossVisitSpectrum.spectrum_pk == pipeline_model.spectrum_pk)) - #.where(pipeline_model.result_flags == 0) - .dicts() -) -field_names = ( - "teff", - "logg", - "fe_h", - "v_micro", - "v_sini", - "he_h", - "c_h", - "n_h", - "o_h", - "si_h", - "s_h", - "teff_fullspec", - "logg_fullspec", - "fe_h_fullspec", - "v_micro_fullspec", - "v_sini_fullspec", - "he_h_fullspec", - "c_h_fullspec", - "n_h_fullspec", - "o_h_fullspec", - "si_h_fullspec", - "s_h_fullspec", - "teff_hmasked", - "logg_hmasked", - "fe_h_hmasked", - "v_micro_hmasked", - "v_sini_hmasked", - "he_h_hmasked", - "c_h_hmasked", - "n_h_hmasked", - "o_h_hmasked", - "si_h_hmasked", - "s_h_hmasked", -) -pairwise = select_pairwise_combinations("HotPayne.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("HotPayne_corrections.pkl", pairwise, overwrite=overwrite) -''' - -from astra.models import Slam, BossVisitSpectrum - -pipeline_model = Slam - -q = ( - pipeline_model - .select( - BossVisitSpectrum.snr, - pipeline_model.source_pk, - pipeline_model.task_pk, - pipeline_model.spectrum_pk, - pipeline_model.teff, - pipeline_model.fe_h, - pipeline_model.e_teff, - pipeline_model.e_fe_h, - ) - .distinct(pipeline_model.spectrum_pk) - .join(BossVisitSpectrum, on=(BossVisitSpectrum.spectrum_pk == pipeline_model.spectrum_pk)) - #.where(pipeline_model.result_flags == 0) - .dicts() -) -field_names = ("teff", "fe_h") - -pairwise = select_pairwise_combinations("SLAM.pkl", q, field_names, overwrite=overwrite) -corrections = compute_corrections("SLAM_corrections.pkl", pairwise, overwrite=overwrite) - diff --git a/dr19_copy_aspcap_dr17.py b/dr19_copy_aspcap_dr17.py deleted file mode 100644 index e8d7bf6..0000000 --- a/dr19_copy_aspcap_dr17.py +++ /dev/null @@ -1,58 +0,0 @@ -from tqdm import tqdm -from astra import __version__ -from astra.models.base import database -from astra.models import ASPCAP, ApogeeCoaddedSpectrumInApStar -from peewee import chunked - -# Get all DR17 results for v_astra = 0.5.0 -q = ( - ASPCAP - .select() - .distinct(ASPCAP.spectrum_pk) - .join( - ApogeeCoaddedSpectrumInApStar, - on=(ApogeeCoaddedSpectrumInApStar.spectrum_pk == ASPCAP.spectrum_pk) - ) - .where( - (ASPCAP.v_astra == "0.5.0") - & (ApogeeCoaddedSpectrumInApStar.release == "dr17") - ) - .dicts() -) - -rows = [] -for r in tqdm(q): - r.pop("task_pk") - r["v_astra"] = __version__ - # Remove any calibrations. - for k, v in r.items(): - if k.startswith("raw_"): - r[k[4:]] = v - rows.append(r) - -spectrum_pks = [e["spectrum_pk"] for e in rows] - -# Check to make sure we are not duplicating... -q = ( - ASPCAP - .select() - .where( - (ASPCAP.v_astra == __version__) - & ASPCAP.spectrum_pk.in_(spectrum_pks) - ) - .first() -) -assert q is None - -# Bulk insert -batch_size = 1000 -with database.atomic(): - with tqdm(desc="Inserting", total=len(rows)) as pb: - for chunk in chunked(rows, batch_size): - ( - ASPCAP - .insert_many(chunk) - .execute() - ) - pb.update(min(batch_size, len(chunk))) - pb.refresh() \ No newline at end of file diff --git a/move_paths.py b/move_paths.py deleted file mode 100644 index e7c908a..0000000 --- a/move_paths.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from glob import glob -from tqdm import tqdm -from shutil import rmtree -k = 100 -base_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/mwm/spectro/astra/0.2.5/v6_0_9-daily/spectra/" - -for prefix in ("star", "visit"): - for old_path in tqdm(glob(f"{base_dir}{prefix}/*/*/*.fits"), desc=prefix): - catalogid = int(old_path.split("-")[-1].split(".")[0]) - new_path = f"{(catalogid // k) % k:0>2.0f}/{catalogid % k:0>2.0f}/{os.path.basename(old_path)}" - os.makedirs(os.path.dirname(new_path), exist_ok=True) - os.rename(old_path, new_path) - -# remove empty directories -for prefix in ("star", "visit"): - single_col = glob(f"{base_dir}{prefix}/?/*/*.fits") - assert len(single_col) == 0 - for empty_dir in glob(f"{base_dir}{prefix}/?"): - print(f"removing {empty_dir}") - rmtree(empty_dir) - triple_col = glob(f"{base_dir}{prefix}/???/*/*.fits") - assert len(triple_col) == 0 - for empty_dir in glob(f"{base_dir}{prefix}/???"): - print(f"removing {empty_dir}") - rmtree(empty_dir) \ No newline at end of file diff --git a/snow_white_script.py b/snow_white_script.py deleted file mode 100644 index 615584c..0000000 --- a/snow_white_script.py +++ /dev/null @@ -1,49 +0,0 @@ -from astra import __version__ -from astra.pipelines.snow_white import snow_white -from tqdm import tqdm -from astra.models import BossCombinedSpectrum, Source, SnowWhite, BossRestFrameVisitSpectrum -from peewee import JOIN - -q = ( - BossCombinedSpectrum - .select() - .join(Source) - .switch(BossCombinedSpectrum) - .join( - SnowWhite, - JOIN.LEFT_OUTER, - on=( - (SnowWhite.spectrum_pk == BossCombinedSpectrum.spectrum_pk) - & (SnowWhite.v_astra == __version__) - ) - ) - .where( - Source.assigned_to_program("mwm_wd") - & SnowWhite.spectrum_pk.is_null() - ) -) - -for item in tqdm(snow_white(q), total=1): - None - -q = ( - BossRestFrameVisitSpectrum - .select() - .join(Source) - .switch(BossRestFrameVisitSpectrum) - .join( - SnowWhite, - JOIN.LEFT_OUTER, - on=( - (SnowWhite.spectrum_pk == BossRestFrameVisitSpectrum.spectrum_pk) - & (SnowWhite.v_astra == __version__) - ) - ) - .where( - Source.assigned_to_program("mwm_wd") - & SnowWhite.spectrum_pk.is_null() - ) -) - -for item in tqdm(snow_white(q), total=1): - None diff --git a/some_0.7_thoughts.py b/some_0.7_thoughts.py deleted file mode 100644 index dc87bdb..0000000 --- a/some_0.7_thoughts.py +++ /dev/null @@ -1,134 +0,0 @@ - -from typing import Callable, Union - -# For intermediate outputs, there should be an accessor that tries to get it from the real path (eg astraStar file) but if that fails it gets it from the intermediate output path -# and there should be a standard for the intermediate output path (?pickle ?hdf5) -# and the intermediate output path should depend ONLY on spectrum_pk - - -# task must be able to specify: -# - which types of spectra it takes -# - which type it should prioritise (eg coadd > visit) ????? -# - where clauses for what kind of spectra it takes (eg by color, source, targeting) -# - pre loading function -# - slurm profile? requirements? -# - how to handle exceptions? -# - write to database? -# - default execution kwds??? have them read from astra.config -# - needs to be able to extend the specrum query to make additional selecton cuts (eg Corv requiring SnowWhite DA-type), or SLAM only running in some parts of param space -# - allow DEBUG as a special kwarg that will re-raise all exceptions - -# TODO: what if the task runs on a SOURCE and not a SPECTRUM? -# ... I think that is OK, the introspection should just check for the type expected (eg Source, not Spectrum) -# and then it should distribute the work accordingly - - -@task( - pre_process_callable=my_task_pre_loader, # executed once per process - select_query_callable=lambda q: ( - q - .join(SnowWhite) - .where(SnowWhite.classification == "DA") - ), # used before distributing work -) -def my_task(spectrum: ApogeeRestFrameSpectrum, **kwargs) -> OutputType: - """ - Does shit - """ - context = kwargs.get("pre_execution_callable") # result from pre-loader??? - - # write individual file - - return X - - -# So for something like ASPCAP, what does that look like? -# When the astra_execute thing is run, that wants to run on a whole bunch of spectra -# and to distribute across nodes, etc. So *that* is the point where we need to do load -# balancing and set up FERRE, because by the time it gets to the process level, the -# distribution has already happened. - -def post_process_ferre(context, **kwargs): - pwd = context["pre_distribute_result"]["pwd"] - - return { - # spectrum_pk -> result kwds - } - -@task( - pre_distribute_callable=load_balance_and_pre_process_ferre, - pre_process_callable=post_process_ferre -) -def ferre_initial(spectrum: ApogeeRestFrameSpectrumType, context: dict, **kwargs) -> FerreInitial: - - pre_process_result = context["pre_process_result"] - kwds = pre_process_result[spectrum.spectrum_pk] - - return FerreInitial(**kwds) - - -def load_balance_and_pre_process_ferre(): - - # the @task will: - # 1. query for what spectra to run, and how many, etc. - # 2. if there is a pre_distribute_callable, then it is going to run that on the spectra before setting up slurm jobs etc - # -> if there isn't one then the slurm jobs would just be paginating the query out to each process, etc. - # 3. the pre_distribute_callable should return some SlurmJobs or something like that. - # 4. those slurm jobs should run ferre_initial on the pwd when they are complete, with the 'pre_distribute_result' providing the pwd - # 5. the pre_process_callable would process the FERRE run - - - -def design_matrix(path: "some_model.pt", spectrum, **kwargs): - # spectrum must - return A, L, label_names - - -@task(pre_process_callable=design_matrix) -def whow(spectrum: AnySpectrumType, model_path: str, **kwargs) -> WHOW: - - A, L, label_names = kwargs.get("pre_process_callable_result") - - Y = spectrum.flux - Cinv = spectrum.ivar - - ATCinv = A.T @ Cinv - X = np.linalg.solve(ATCinv @ A, ATCinv @ Y) - - rchi2 = ... - labels = L @ X[:32] - - r = WHOW.from_spectrum( - spectrum, - **dict(zip(label_names, labels)) - ) - - # TODO: need to know whether this spectrum is star-level type or not, right? - with fits.open(r.absolute_path) as image: - image.write(...) - - return r - - - -distribute( - my_task, - ApogeeCoaddedSpectrumInApStar, - where=None, - slurm_profile="notchpeak", - nodes=4, - limit=10_000, - threads=32, - processes=10 -) #-> Slurm jobs? - -# steps would be: -# - executor performs a query to get all things not yet analyzed, which could then be paginated to different nodes/procs/etc -# - executor submits slurm jobs across each node/proc/etc -# - within each proc: -# + executor gets its pages to do -# + executor does introspection to see there is a pre-execute hook -# + it runs the pre-execute hook once and creates a context object --> this pre-execute hook must have access to the first spectrum -# + it directly runs the task function for all spectra, supplying the context object each time -# + it adds the processor overhead, how many spectra executed in this batch (since it knows hat), and the time for this single analysis -# + at some intervals, it batch inserts the results to the database, following some rules about what to do if there are integrity conflicts diff --git a/src/astra/glossary.py b/src/astra/glossary.py index 1e49908..afef744 100644 --- a/src/astra/glossary.py +++ b/src/astra/glossary.py @@ -1440,7 +1440,7 @@ def resolve_special_contexts(obj, name): else: return value - warnings.warn(f"There are some missing glossary definitions. See `astra.glossary.MISSING_GLOSSARY_TERMS`.") + #warnings.warn(f"There are some missing glossary definitions. See `astra.glossary.MISSING_GLOSSARY_TERMS`.") MISSING_GLOSSARY_TERMS.add(name) return None