diff --git a/src/leavitt/leavitt.py b/src/leavitt/leavitt.py index 38a9af7..3df62ca 100644 --- a/src/leavitt/leavitt.py +++ b/src/leavitt/leavitt.py @@ -188,30 +188,29 @@ def main(args=None): file=stderr) return 1 - if args.error_method is not None: - try: - dependent_vars_error = data[ - [args.error_prefix + var + args.error_suffix - for var in args.dependent_vars] - ].iloc[:-n_coeff].values - - except KeyError as e: - key = e.args[0] - print("Missing entry in input table for error: {}".format(key), - file=stderr) - return 1 - - nan_rows = any_nan(dependent_vars, - independent_vars, - dependent_vars_error, - axis=1) - non_nan_rows = ~nan_rows - dependent_vars = dependent_vars[ non_nan_rows] - independent_vars = independent_vars[ non_nan_rows] - dependent_vars_error = dependent_vars_error[non_nan_rows] - mask = numpy.concatenate((non_nan_rows, - numpy.ones(n_coeff, dtype=bool))) + try: + dependent_vars_error = data[ + [args.error_prefix + var + args.error_suffix + for var in args.dependent_vars] + ].iloc[:-n_coeff].values + + except KeyError as e: + key = e.args[0] + print("Missing entry in input table for error: {}".format(key), + file=stderr) + return 1 + nan_rows = any_nan(dependent_vars, + independent_vars, + dependent_vars_error, + axis=1) + non_nan_rows = ~nan_rows + dependent_vars = dependent_vars[ non_nan_rows] + independent_vars = independent_vars[ non_nan_rows] + dependent_vars_error = dependent_vars_error[non_nan_rows] + mask = numpy.concatenate((non_nan_rows, + numpy.ones(n_coeff, dtype=bool))) + if args.error_method is not None: fit, fit_err = args.error_method(dependent_vars, independent_vars, dependent_vars_error, @@ -224,13 +223,8 @@ def main(args=None): data[args.distance_label ] = fit data[args.distance_error_label] = fit_err else: - nan_rows = any_nan(dependent_vars, independent_vars, axis=1) - non_nan_rows = ~nan_rows - dependent_vars = dependent_vars[non_nan_rows] - independent_vars = independent_vars[non_nan_rows] - mask = numpy.concatenate((non_nan_rows, - numpy.ones(n_coeff, dtype=bool))) fit = leavitt_law(dependent_vars, independent_vars, + dependent_vars_error, args.add_const, args.fit_modulus, args.sigma_method, args.sigma, args.mean_modulus, args.units, diff --git a/src/leavitt/regression.py b/src/leavitt/regression.py index dcf3c65..da30dbb 100644 --- a/src/leavitt/regression.py +++ b/src/leavitt/regression.py @@ -1,4 +1,5 @@ import numpy +import statsmodels.api as sm from leavitt.utils import identity, zscore from sys import stdout, stderr @@ -50,14 +51,19 @@ def design_matrix(dependent_vars, independent_vars, add_const=False): return design_matrix, numpy.transpose(dependent_vars).flatten() +def modulus_design_matrix(n_bands, n_samples): + return numpy.tile(numpy.eye(n_samples), + (n_bands, 1)) + +def stack_vars(vars): + return numpy.transpose(vars).flatten() def simple_leavitt_law(dependent_vars, independent_vars, add_const, rcond, debug=False): n_samples, n_vars = dependent_vars.shape X = simple_design_matrix(independent_vars, add_const, n_vars) - y = numpy.transpose(dependent_vars).flatten() - + y = stack_vars(dependent_vars) if debug: _print_debug(X, y) @@ -72,14 +78,40 @@ def simple_leavitt_law(dependent_vars, independent_vars, add_const, rcond, return fit - - - -def leavitt_law(dependent_vars, independent_vars, add_const=False, - fit_modulus=False, +def _err(intrinsic, photometric, slope, zero_point, logP): + return numpy.sqrt( + intrinsic**2 + + photometric**2 + + (slope*logP)**2 + + zero_point**2 + ) + +def error_leavitt_law(intrinsic_error, photometric_error, + slope_error, zero_point_error, + logP): + # print(photometric_error.shape, + # slope_error.shape, + # zero_point_error.shape, + # logP.shape, + # file=stderr) + # exit() + errors = numpy.empty_like(photometric_error) + n_bands = slope_error.size + + for i in range(n_bands): + errors[:,i] = _err(intrinsic_error, + photometric_error[:,i], + slope_error[i], zero_point_error[i], + logP[:,0]) + return errors + +def leavitt_law(dependent_vars, independent_vars, + dependent_vars_error, + add_const=False, fit_modulus=False, sigma_method=zscore, sigma=0.0, mean_modulus=0.0, unit_conversion=identity, rcond=1e-3, max_iter=20, + intrinsic_error=0.05, debug=False): n_samples, n_vars = dependent_vars.shape n_coeff = (1+add_const)*n_vars @@ -90,6 +122,35 @@ def leavitt_law(dependent_vars, independent_vars, add_const=False, add_const, rcond, debug) # if sigma is 0 or less, do not perform any outlier detection if sigma <= 0: + X_pl = simple_design_matrix(independent_vars, add_const, n_vars) + y = stack_vars(dependent_vars) + model_pl = sm.OLS(y, X_pl) + results_pl = model_pl.fit() + coeffs_pl = results_pl.params + + stderr_pl = results_pl.HC0_se + slope_err = stderr_pl[0::2] + zero_point_err = stderr_pl[1::2] + error = stack_vars(error_leavitt_law(intrinsic_error, + dependent_vars_error, + slope_err, zero_point_err, + independent_vars)) +# pl_coeffs, pl_residuals, pl_rank, pl_sv = numpy.linalg.lstsq(X_pl, y) + + fitted_y = results_pl.fittedvalues + residuals = y - fitted_y + + X_modulus = modulus_design_matrix(n_vars, n_samples) + model_modulus = sm.WLS(residuals, X_modulus, error) + results_modulus = model_modulus.fit() + coeffs_modulus = results_modulus.params + + dist = unit_conversion(coeffs_modulus + mean_modulus) + ret = numpy.concatenate((dist, coeffs_pl)) + if debug: + print(ret, file=stderr) + return ret + X, y = design_matrix(dependent_vars, independent_vars, add_const) if debug: _print_debug(X, y)