Skip to content

Commit

Permalink
Merge pull request #6 from astroswego/nikolaev-leavitt-law
Browse files Browse the repository at this point in the history
Introduced weighted regression for fitting distance modulii
  • Loading branch information
dwysocki committed Oct 18, 2015
2 parents d801ebe + 89a47c7 commit 8337e39
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 36 deletions.
52 changes: 23 additions & 29 deletions src/leavitt/leavitt.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,30 +188,29 @@ def main(args=None):
file=stderr)
return 1

if args.error_method is not None:
try:
dependent_vars_error = data[
[args.error_prefix + var + args.error_suffix
for var in args.dependent_vars]
].iloc[:-n_coeff].values

except KeyError as e:
key = e.args[0]
print("Missing entry in input table for error: {}".format(key),
file=stderr)
return 1

nan_rows = any_nan(dependent_vars,
independent_vars,
dependent_vars_error,
axis=1)
non_nan_rows = ~nan_rows
dependent_vars = dependent_vars[ non_nan_rows]
independent_vars = independent_vars[ non_nan_rows]
dependent_vars_error = dependent_vars_error[non_nan_rows]
mask = numpy.concatenate((non_nan_rows,
numpy.ones(n_coeff, dtype=bool)))
try:
dependent_vars_error = data[
[args.error_prefix + var + args.error_suffix
for var in args.dependent_vars]
].iloc[:-n_coeff].values

except KeyError as e:
key = e.args[0]
print("Missing entry in input table for error: {}".format(key),
file=stderr)
return 1

nan_rows = any_nan(dependent_vars,
independent_vars,
dependent_vars_error,
axis=1)
non_nan_rows = ~nan_rows
dependent_vars = dependent_vars[ non_nan_rows]
independent_vars = independent_vars[ non_nan_rows]
dependent_vars_error = dependent_vars_error[non_nan_rows]
mask = numpy.concatenate((non_nan_rows,
numpy.ones(n_coeff, dtype=bool)))
if args.error_method is not None:
fit, fit_err = args.error_method(dependent_vars,
independent_vars,
dependent_vars_error,
Expand All @@ -224,13 +223,8 @@ def main(args=None):
data[args.distance_label ] = fit
data[args.distance_error_label] = fit_err
else:
nan_rows = any_nan(dependent_vars, independent_vars, axis=1)
non_nan_rows = ~nan_rows
dependent_vars = dependent_vars[non_nan_rows]
independent_vars = independent_vars[non_nan_rows]
mask = numpy.concatenate((non_nan_rows,
numpy.ones(n_coeff, dtype=bool)))
fit = leavitt_law(dependent_vars, independent_vars,
dependent_vars_error,
args.add_const, args.fit_modulus,
args.sigma_method, args.sigma,
args.mean_modulus, args.units,
Expand Down
75 changes: 68 additions & 7 deletions src/leavitt/regression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy
import statsmodels.api as sm
from leavitt.utils import identity, zscore

from sys import stdout, stderr
Expand Down Expand Up @@ -50,14 +51,19 @@ def design_matrix(dependent_vars, independent_vars, add_const=False):

return design_matrix, numpy.transpose(dependent_vars).flatten()

def modulus_design_matrix(n_bands, n_samples):
return numpy.tile(numpy.eye(n_samples),
(n_bands, 1))

def stack_vars(vars):
return numpy.transpose(vars).flatten()

def simple_leavitt_law(dependent_vars, independent_vars, add_const, rcond,
debug=False):
n_samples, n_vars = dependent_vars.shape

X = simple_design_matrix(independent_vars, add_const, n_vars)
y = numpy.transpose(dependent_vars).flatten()

y = stack_vars(dependent_vars)

if debug:
_print_debug(X, y)
Expand All @@ -72,14 +78,40 @@ def simple_leavitt_law(dependent_vars, independent_vars, add_const, rcond,

return fit




def leavitt_law(dependent_vars, independent_vars, add_const=False,
fit_modulus=False,
def _err(intrinsic, photometric, slope, zero_point, logP):
return numpy.sqrt(
intrinsic**2 +
photometric**2 +
(slope*logP)**2 +
zero_point**2
)

def error_leavitt_law(intrinsic_error, photometric_error,
slope_error, zero_point_error,
logP):
# print(photometric_error.shape,
# slope_error.shape,
# zero_point_error.shape,
# logP.shape,
# file=stderr)
# exit()
errors = numpy.empty_like(photometric_error)
n_bands = slope_error.size

for i in range(n_bands):
errors[:,i] = _err(intrinsic_error,
photometric_error[:,i],
slope_error[i], zero_point_error[i],
logP[:,0])
return errors

def leavitt_law(dependent_vars, independent_vars,
dependent_vars_error,
add_const=False, fit_modulus=False,
sigma_method=zscore, sigma=0.0,
mean_modulus=0.0, unit_conversion=identity,
rcond=1e-3, max_iter=20,
intrinsic_error=0.05,
debug=False):
n_samples, n_vars = dependent_vars.shape
n_coeff = (1+add_const)*n_vars
Expand All @@ -90,6 +122,35 @@ def leavitt_law(dependent_vars, independent_vars, add_const=False,
add_const, rcond, debug)
# if sigma is 0 or less, do not perform any outlier detection
if sigma <= 0:
X_pl = simple_design_matrix(independent_vars, add_const, n_vars)
y = stack_vars(dependent_vars)
model_pl = sm.OLS(y, X_pl)
results_pl = model_pl.fit()
coeffs_pl = results_pl.params

stderr_pl = results_pl.HC0_se
slope_err = stderr_pl[0::2]
zero_point_err = stderr_pl[1::2]
error = stack_vars(error_leavitt_law(intrinsic_error,
dependent_vars_error,
slope_err, zero_point_err,
independent_vars))
# pl_coeffs, pl_residuals, pl_rank, pl_sv = numpy.linalg.lstsq(X_pl, y)

fitted_y = results_pl.fittedvalues
residuals = y - fitted_y

X_modulus = modulus_design_matrix(n_vars, n_samples)
model_modulus = sm.WLS(residuals, X_modulus, error)
results_modulus = model_modulus.fit()
coeffs_modulus = results_modulus.params

dist = unit_conversion(coeffs_modulus + mean_modulus)
ret = numpy.concatenate((dist, coeffs_pl))
if debug:
print(ret, file=stderr)
return ret

X, y = design_matrix(dependent_vars, independent_vars, add_const)
if debug:
_print_debug(X, y)
Expand Down

0 comments on commit 8337e39

Please sign in to comment.