diff --git a/setup.py b/setup.py index d8452e7..65432b2 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,6 @@ Plotypus is built on top of numpy, matplotlib, and scikit. """ - - DOCLINES = __doc__.split("\n") CLASSIFIERS = """\ @@ -46,12 +44,13 @@ def setup_package(): version = get_version_info(), package_dir = {'': 'src'}, packages = [ - 'plotypus' + 'plotypus', + 'plotypus_scripts' ], entry_points = { 'console_scripts': [ - 'plotypus_demo = plotypus.demo:main', - 'plotypus = plotypus.plotypus:main' + 'plotypus_demo = plotypus_scripts.demo:main', + 'plotypus = plotypus_scripts.plotypus:main' ] }, keywords = [ diff --git a/src/plotypus_scripts/__init__.py b/src/plotypus_scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/plotypus/demo.py b/src/plotypus_scripts/demo.py similarity index 96% rename from src/plotypus/demo.py rename to src/plotypus_scripts/demo.py index 0337f74..6c2cfe7 100644 --- a/src/plotypus/demo.py +++ b/src/plotypus_scripts/demo.py @@ -1,8 +1,8 @@ import numpy as np from sklearn.linear_model import LinearRegression, LassoCV from sklearn.pipeline import Pipeline -from .Fourier import Fourier -from .utils import colvec +from plotypus.Fourier import Fourier +from plotypus.utils import colvec import matplotlib matplotlib.use('PDF') diff --git a/src/plotypus/plotypus.py b/src/plotypus_scripts/plotypus.py similarity index 97% rename from src/plotypus/plotypus.py rename to src/plotypus_scripts/plotypus.py index 2f1142c..9ba370f 100644 --- a/src/plotypus/plotypus.py +++ b/src/plotypus_scripts/plotypus.py @@ -2,7 +2,7 @@ from sys import exit from os import path, listdir from optparse import OptionParser -from .lightcurve import get_lightcurve, plot_lightcurve +from plotypus.lightcurve import get_lightcurve, plot_lightcurve def get_ops(): parser = OptionParser() diff --git a/src/src/plotypus/Fourier.py b/src/src/plotypus/Fourier.py new file mode 100644 index 0000000..bab80e0 --- /dev/null +++ b/src/src/plotypus/Fourier.py @@ -0,0 +1,35 @@ +import numpy + +__all__ = [ + 'Fourier', + 'trigonometric_coefficient_matrix' +] + +class Fourier(): + def __init__(self, degree=3): + self.degree = degree + + def fit(self, X, y=None): + return self + + def transform(self, X, y=None): + data = numpy.array(list(zip(numpy.array(X).T[0], range(len(X))))) + phase, order = data[data[:,0].argsort()].T + coefficients = trigonometric_coefficient_matrix(phase, self.degree) + return numpy.array([mag for (orig, mag) # Put back in original order + in sorted(zip(order, coefficients), + key=lambda pair: pair[0])]) + + def get_params(self, deep): + return {'degree': self.degree} + + def set_params(self, **params): + if 'degree' in params: + self.degree = params['degree'] + +def trigonometric_coefficient_matrix(phases, degree): + return numpy.array([[numpy.cos(numpy.pi*(j+1)*phases[i]) if j % 2 == 0 #even + else numpy.sin(numpy.pi*j*phases[i]) #odd + for j in range(2*degree)] + for i in range(len(phases))]) + diff --git a/src/src/plotypus/__init__.py b/src/src/plotypus/__init__.py new file mode 100644 index 0000000..e219da5 --- /dev/null +++ b/src/src/plotypus/__init__.py @@ -0,0 +1,6 @@ +__all__ = [ + 'Fourier', + 'lightcurve', + 'periodogram', + 'utils' +] diff --git a/src/src/plotypus/lightcurve.py b/src/src/plotypus/lightcurve.py new file mode 100644 index 0000000..8071f27 --- /dev/null +++ b/src/src/plotypus/lightcurve.py @@ -0,0 +1,116 @@ +import numpy +from math import floor +from os import path +from .utils import make_sure_path_exists, get_signal, get_noise, colvec +from .periodogram import find_period, rephase, get_phase +from .Fourier import Fourier +from sklearn.linear_model import LassoCV +from sklearn.pipeline import Pipeline +from sklearn.grid_search import GridSearchCV +import warnings +import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt + +__all__ = [ + 'get_lightcurve', + 'find_outliers', + 'plot_lightcurve' +] + +def get_lightcurve(filename, fourier_degree=15, cv=10, + min_period=0.2, max_period=32, + coarse_precision=0.001, fine_precision=0.0000001, + sigma=5, min_phase_cover=2/3., + phases=numpy.arange(0, 1, 0.01), **options): + + # Initialize predictor + pipeline = Pipeline([('Fourier', Fourier()), ('Lasso', LassoCV(cv=cv))]) + params = {'Fourier__degree': list(range(3, 1+fourier_degree))} + predictor = GridSearchCV(pipeline, params) + + # Load file + data = numpy.ma.masked_array(data=numpy.loadtxt(filename), mask=None) + + while True: + # Find the period of the inliers + signal = get_signal(data) + period = find_period(signal.T[0], signal.T[1], min_period, max_period, + coarse_precision, fine_precision) + phase, mag, err = rephase(signal, period).T + + # Determine whether there is sufficient phase coverage + coverage = numpy.zeros((100)) + for p in phase: + coverage[int(floor(p*100))] = 1 + if sum(coverage)/100. < min_phase_cover: + print(sum(coverage)/100., min_phase_cover) + print("Insufficient phase coverage") + return None + + # Predict light curve + with warnings.catch_warnings(record=True) as w: + try: + predictor = predictor.fit(colvec(phase), mag) + except Warning: + print(w) + return None + + # Reject outliers and repeat the process if there are any + if sigma: + outliers = find_outliers(data.data, period, predictor, sigma) + if set.issubset(set(numpy.nonzero(outliers.T[0])[0]), + set(numpy.nonzero(data.mask.T[0])[0])): + break + print("Rejecting", sum(outliers)[0], "outliers") + data.mask = numpy.ma.mask_or(data.mask, outliers) + + # Build light curve + lc = predictor.predict([[i] for i in phases]) + + # Shift to max light + arg_max_light = lc.argmin() + lc = numpy.concatenate((lc[arg_max_light:], lc[:arg_max_light])) + data.T[0] = numpy.array([get_phase(p, period, arg_max_light / 100.) + for p in data.data.T[0]]) + + return period, lc, data + +def find_outliers(data, period, predictor, sigma): + phase, mag, err = rephase(data, period).T + phase = numpy.resize(phase, (phase.shape[0], 1)) + residuals = abs(predictor.predict(phase) - mag) + mse = numpy.array([0 if residual < error else (residual - error)**2 + for residual, error in zip(residuals, err)]) + return numpy.tile(numpy.vstack(mse > sigma * mse.std()), data.shape[1]) + +def plot_lightcurve(output, filename, lc, period, data, + phases=numpy.arange(0, 1, 0.01), + grid=True, invert=True): + ax = plt.gca() + ax.grid(grid) + if invert: + ax.invert_yaxis() + plt.xlim(-0.1,2.1) + + # Plot the fitted light curve + plt.plot(numpy.hstack((phases,1+phases)), numpy.hstack((lc, lc)), + linewidth=1.5, color='r') + + # Plot points used + phase, mag, err = get_signal(data).T + plt.errorbar(numpy.hstack((phase,1+phase)), numpy.hstack((mag, mag)), + yerr = numpy.hstack((err,err)), ls='None', ms=.01, mew=.01) + + # Plot outliers rejected + phase, mag, err = get_noise(data).T + plt.errorbar(numpy.hstack((phase,1+phase)), numpy.hstack((mag, mag)), + yerr = numpy.hstack((err,err)), ls='None', ms=.01, mew=.01, + color='r') + + plt.xlabel('Phase ({0:0.7} day period)'.format(period)) + plt.ylabel('Magnitude') + plt.title(filename.split('.')[0]) + make_sure_path_exists(output) + plt.savefig(path.join(output, filename + '.png')) + plt.clf() diff --git a/src/src/plotypus/periodogram.py b/src/src/plotypus/periodogram.py new file mode 100644 index 0000000..1ef6ab7 --- /dev/null +++ b/src/src/plotypus/periodogram.py @@ -0,0 +1,36 @@ +import numpy +from scipy.signal import lombscargle +from math import modf + +__all__ = [ + 'find_period', + 'LombScargle', + 'rephase', + 'get_phase' +] + +def find_period(time, mags, min_period, max_period, + coarse_precision, fine_precision, method=None): + if min_period >= max_period: return min_period + scaled_mags = (mags-mags.mean())/mags.std() + coarse_period = LombScargle(time, scaled_mags, coarse_precision, + min_period, max_period) + if coarse_precision <= fine_precision: return coarse_period + return LombScargle(time, scaled_mags, fine_precision, + coarse_period - coarse_precision, + coarse_period + coarse_precision) + +def LombScargle(time, scaled_mags, precision, min_period, max_period): + minf, maxf = 2*numpy.pi/max_period, 2*numpy.pi/min_period + freqs = numpy.arange(minf, maxf, precision) + pgram = lombscargle(time, scaled_mags, freqs) + return 2*numpy.pi/freqs[numpy.argmax(pgram)] + +def rephase(data, period=1, col=0): + rephased = numpy.ma.copy(data) + rephased.T[col] = [get_phase(x[col], period) + for x in rephased] + return rephased + +def get_phase(time, period=1, offset=0): + return (modf(time/period)[0]-offset)%1 diff --git a/src/src/plotypus/utils.py b/src/src/plotypus/utils.py new file mode 100644 index 0000000..9519bb5 --- /dev/null +++ b/src/src/plotypus/utils.py @@ -0,0 +1,30 @@ +from os import makedirs +from os.path import isdir +from numpy import resize + +__all__ = [ + 'make_sure_path_exists', + 'get_signal', + 'get_noise', + 'colvec' +] + +def make_sure_path_exists(path): + """Creates the supplied path. Raises OS error if the path cannot be + created.""" + try: + makedirs(path) + except OSError: + if not isdir(path): + raise + +def get_signal(data): + """Returns all of the values that are not outliers.""" + return data[~data.mask].data.reshape(-1, data.shape[1]) + +def get_noise(data): + """Returns all identified outliers""" + return data[data.mask].data.reshape(-1, data.shape[1]) + +def colvec(X): + return resize(X, (X.shape[0], 1)) diff --git a/src/src/plotypus_scripts/__init__.py b/src/src/plotypus_scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/src/plotypus_scripts/demo.py b/src/src/plotypus_scripts/demo.py new file mode 100644 index 0000000..6c2cfe7 --- /dev/null +++ b/src/src/plotypus_scripts/demo.py @@ -0,0 +1,63 @@ +import numpy as np +from sklearn.linear_model import LinearRegression, LassoCV +from sklearn.pipeline import Pipeline +from plotypus.Fourier import Fourier +from plotypus.utils import colvec + +import matplotlib +matplotlib.use('PDF') +from matplotlib import rc +rc('font',**{'family':'serif','serif':['Latin Modern']}) +rc('text', usetex=True) +import matplotlib.pyplot as plt + +def lc(X): + return 10 + np.cos(2*np.pi*X) + 0.1*np.cos(18*np.pi*X) + +def main(): + X_true = np.linspace(0, 1, 101) + y_true = lc(X_true) + + n_samples = 50 + X_sample = np.random.uniform(size=n_samples) + y_sample = lc(X_sample) + np.random.normal(0, 0.1, n_samples) + + predictor = Pipeline([('Fourier', Fourier(9)), + ('OLS', LinearRegression())]) + predictor = predictor.fit(colvec(X_sample), y_sample) + y_pred = predictor.predict(colvec(X_true)) + + predictor = Pipeline([('Fourier', Fourier(9)), + ('Lasso', LassoCV())]) + predictor = predictor.fit(colvec(X_sample), y_sample) + y_lasso = predictor.predict(colvec(X_true)) + + ax = plt.gca() + signal, = plt.plot(np.hstack((X_true,1+X_true)), + np.hstack((y_true, y_true)), + linewidth=1.5, + color='black') + + fd, = plt.plot(np.hstack((X_true,1+X_true)), np.hstack((y_pred, y_pred)), + linewidth=1.5, color='black', ls='dotted') + + lasso, = plt.plot(np.hstack((X_true,1+X_true)), + np.hstack((y_lasso, y_lasso)), + linewidth=1.5, + color='black', + ls='dashed') + + sc = plt.scatter(np.hstack((X_sample,1+X_sample)), + np.hstack((y_sample, y_sample)), + color='black') + + plt.legend([signal, sc, fd, lasso], + ["Signal", "Noisy Data", "FD", "Lasso FD"], + loc='best') + + plt.xlim(0,2) + plt.xlabel('Phase') + plt.ylabel('Magnitude') + plt.title('Simulated Lightcurve Example') + plt.savefig('demo.pdf') + plt.clf() diff --git a/src/src/plotypus_scripts/plotypus.py b/src/src/plotypus_scripts/plotypus.py new file mode 100644 index 0000000..9ba370f --- /dev/null +++ b/src/src/plotypus_scripts/plotypus.py @@ -0,0 +1,56 @@ +import numpy +from sys import exit +from os import path, listdir +from optparse import OptionParser +from plotypus.lightcurve import get_lightcurve, plot_lightcurve + +def get_ops(): + parser = OptionParser() + parser.add_option('-i', '--input', type='string', + default=path.join('..', 'data', 'lmc', 'i', 'cep', 'f'), + help='location of stellar observations',) + parser.add_option('-o', '--output', type='string', + default=path.join('..', 'results'), + help='location of results') + parser.add_option('--min_period', dest='min_period', type='float', + default=0.2, help='minimum period of each star') + parser.add_option('--max_period', dest='max_period', type='float', + default=32., help='maximum period of each star') + parser.add_option('--coarse_precision', dest='coarse_precision', type='int', + default=0.001, help='level of granularity on first pass') + parser.add_option('--fine_precision', dest='fine_precision', type='int', + default=0.0000001, help='level of granularity on second pass') + parser.add_option('--fourier_degree', dest='fourier_degree', type='int', + default=15, help='number of coefficients to generate') + parser.add_option('--sigma', dest='sigma', type='float', + default=4, help='rejection criterion for outliers') + parser.add_option('--cv', dest='cv', type='int', + default=10, help='number of folds in the L1-regularization search') + parser.add_option('--min_phase_cover', dest='min_phase_cover', type='float', + default=1/2., help='minimum fraction of phases that must have points') + (options, args) = parser.parse_args() + return options + +def main(): + ops = get_ops() + lcs = [] + for filename in sorted(listdir(ops.input)): + print(filename) + star = get_lightcurve(path.join(ops.input, filename), + ops.fourier_degree, ops.cv, + ops.min_period, ops.max_period, + ops.coarse_precision, ops.fine_precision, + ops.sigma, ops.min_phase_cover) + + if star is not None: + period, lc, data = star + lcs += [[period] + list(lc)] + plot_lightcurve(ops.output, filename, lc, period, data) + + numpy.savetxt(path.join(ops.output, 'lightcurves.dat'), + numpy.array(lcs), fmt='%.5f', + header='Period ' + \ + ' '.join(['Phase' + str(i) for i in range(100)])) + +if __name__ == "__main__": + exit(main())