-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfit_spectra.py
285 lines (225 loc) · 11 KB
/
fit_spectra.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#!/usr/bin/env python
"""
This fits SpFrame spectra with airglow lines and a simple model for the continuum,
including zodiacal light. The fit is done using a simple linear regression.
After running the least squares fit, the fitted lines are separated from the continuum and
the lines, continuum and residuals are returned independently.
INPUT: * SpFrame flux files as .npy files with wavelength and sky spectra
* Also need a list of airglow lines. these should be on the github repo
OUTPUT: numpy files identified in the same way as the sky_flux files with the plate number
and file identifier "split_spectra". The numpy arrays have the following fields: WAVE, LINES, CONT, RESIDS
Before running, identify the directory that the spframe flux files are kept and where the airglow lines are
saved. Also identify where you want to save the files generated by this program (SAVE_DIR)
Title: SpFrame Flux Spectra Fit
Author: P. Fagrelius
Date: June, 2017
"""
import glob
import os
from datetime import datetime
import multiprocessing
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.special import eval_legendre
class FitSpectra(object):
def __init__(self):
#####UPDATE THESE DIRECTORIES#######
#Directory to save data
self.SAVE_DIR = '/global/cscratch1/sd/parkerf/split_flux/blue_split/'
#Directory where all SpFrame flux files reside
self.SPECTRA_DIR = '/global/cscratch1/sd/parkerf/sky_flux/'
####################################
# Load spectra data
self.SAVED_FILES = glob.glob(self.SAVE_DIR+"*_split_fit.npy")
self.SPECTRA_FILES = glob.glob(self.SPECTRA_DIR+"*_calibrated_sky.npy")
#Get metadata
self.MetaData = np.load(self.SPECTRA_DIR+'meta_rich.npy')
print("got MetaData")
#Identify which data you want to look at
#Options: test (10 total), blue, red, full
self.ttype = 'blue'
def run(self):
self.get_plates_needed()
self.get_airglow_spectra()
self.get_vac_lines()
print("got Airglow Lines")
#Run script
#for spectra_file in self.SPECTRA:
# self.fit_and_separate_spectra(spectra_file)
pool2 = multiprocessing.Pool(processes=12)
pool2.map(self.fit_and_separate_spectra, self.SPECTRA)
pool2.terminate()
def get_plates_needed(self):
"""This function compares what spectra have already been split so that if program
stops working in the middle you don't have to start over"""
Complete = [d[-18:-14] for d in self.SAVED_FILES]
Total = [d[-23:-19] for d in self.SPECTRA_FILES]
Needed_idx = [i for i, x in enumerate(Total) if x not in Complete]
self.SPECTRA = [self.SPECTRA_FILES[x] for x in Needed_idx]
print('Will be analyzing %d plate files' % len(Needed_idx))
def get_airglow_spectra(self):
"""This function loads the airglow files from Cosby et al paper and changes the format"""
self.AIRGLOW_DIR = os.getcwd()+'/AirglowSpectra/cosby/'
AF = glob.glob(self.AIRGLOW_DIR+'/*.txt')
AL = []
for file in AF:
data = pd.read_csv(file, delim_whitespace=True)
d = data.to_records(index=False)
AL.append(np.array(d))
self.AirglowLines = np.hstack(AL)
def get_vac_lines(self):
"""Gets only the airglow lines needed to make an appropriate fit. This could be
different for the blue and red CCDs. """
b_sig = np.where(self.AirglowLines['obs_eint'] > 5)
bVL = self.air_to_vac(self.AirglowLines['obs_wave'])
bVL = bVL[b_sig] #nm to A
self.BlueVacLines = bVL[bVL < 700]
r_sig = np.where(self.AirglowLines['obs_eint'] > 5)
rVL = self.air_to_vac(self.AirglowLines['obs_wave'])
rVL = rVL[r_sig] #nm to A
self.RedVacLines = rVL[rVL > 560]
def clean_spectra(self, spectrum):
"""Takes out all nan/inf so lstsq will run smoothly """
ok = np.isfinite(spectrum['SKY'])
self.wave = spectrum['WAVE'][ok]
self.sky = spectrum['SKY'][ok]
self.ivar = spectrum['IVAR'][ok]
self.disp = spectrum['DISP'][ok]
def air_to_vac(self, wave):
"""Index of refraction to go from wavelength in air to wavelength in vacuum
Equation from (Edlen 1966)
vac_wave = n*air_wave
"""
#Convert to um
wave_um = wave*.001
ohm2 = (1./wave_um)**(2)
#Calculate index at every wavelength
nn = []
for x in ohm2:
n = 1+10**(-8)*(8342.13 + (2406030/float(130.-x)) + (15997/float(389-x)))
nn.append(n)
#Get new wavelength by multiplying by index of refraction
vac_wave = nn*wave
return vac_wave
def airglow_line_components(self, vaclines, wave_range, disp_range):
""" Takes each Airglow line included in the analysis and creates a gaussian profile
of the line.
INPUT: - List of airglow lines wanted to model
- Wavelength range of the spectra
- Wavelength dispersion for the wavelength range of the spectra
OUTPUT:
Matrix with all lines used for linear regression. Size[len(wave_range),len(airglow_lines)]
"""
AA = []
for line in vaclines:
AA.append(np.exp(-0.5*((wave_range-line)/disp_range)**2))
return np.vstack(AA)
def linear_model(self, spectrum, num_cont, vaclines):
""" This is the heart of the program. It computes the linear model using Ordinary Least Squares. It then
splits up the model into components: lines, continuum and residuals.
The inputs characterize whether it is a blue or red sky fit. They have slightly different
continuum models and we break up the airglow lines so that it is faster.
INPUTS: - sky spectrum: from Boss Sky Spectra (generated by spframe_flux.py)
- num_cont: the number of polynomials used for the continuum fitting
- airglow_lines: the set of lines used to fit out the airglow. different for blue/red
OUTPUTS: - wave_range: original wavelength range (generated by spframe_flux.py)
- lines: Airglow lines fitted
- cont: Continuum fitted
- res: Residuals between linear model and original sky flux
- R: R^2 value to determine goodness of fit
"""
self.clean_spectra(spectrum)
AA = self.airglow_line_components(vaclines, self.wave, self.disp)
# Continuum model
AC = []
for i in range(num_cont):
AC.append(eval_legendre(i, self.wave))
AC = np.array(AC)
A = np.stack(np.vstack((AC, AA)), axis=1)
#Create model
results = sm.OLS(self.sky, A).fit()
params = results.params
model = np.dot(A, params)
#Separate
cont = np.dot(A[:,0:num_cont], params[0:num_cont])
lines = np.dot(A[:,num_cont:], params[num_cont:])
res = self.sky - model
#R^2
R_1 = np.sum([(i)**2 for i in res])
R_2 = np.sum([(i-np.mean(self.sky))**2 for i in self.sky])
R = 1-(R_1/R_2)
model = model
A = A
params = params
return [self.wave, lines, cont, res, R]
def get_specnos(self, spectra_file):
"""Function that runs the linear model and saves the output. The output file is a .npy file that includes
the parts of the model (lines, continuum, and residuals) along with the wavelengths associated for ease in comparison.
It also contains some meta data so that these linear models can be correlated with the actual flux.
This funciton is used in a multiprocessing loop.
"""
self.plate_num = spectra_file[-23:-19]
print("Fitting spectra in plate %s" % self.plate_num)
self.spectra = np.load(spectra_file)
self.this_plate = self.MetaData[self.MetaData['PLATE'] == int(self.plate_num)]
if self.ttype == 'test':
max_num = 10
self.specnos = self.this_plate[0: max_num]['SPECNO']
elif self.ttype == 'blue':
max_num = len(self.spectra)-1
self.specnos = self.this_plate[(self.this_plate['CAMERAS'] == b'b1') | (self.this_plate['CAMERAS'] == b'b2')]['SPECNO']
elif self.ttype == 'red':
max_num = len(self.spectra)-1
self.specnos = self.this_plate[(self.this_plate['CAMERAS'] == b'r1') | (self.this_plate['CAMERAS'] == b'r2')]['SPECNO']
elif self.ttype == 'full':
max_num = len(self.spectra)
self.specnos = self.this_plate['SPECNO']
else:
print("not a valid type. Going to test")
max_num = 10 #len(spectra) Number of spectra in a given plate that you want to run this for. Mostly for debugging
self.specnos = np.random.choice(self.this_plate['SPECNO'], size=max_num)
self.spectra_length = len(self.specnos)
def fit_and_separate_spectra(self, spectra_file):
start = datetime.now()
self.get_specnos(spectra_file)
data = []
for specno in self.specnos:
d = self.fit_and_split_spectrum(specno)
data.append(d)
#pool = multiprocessing.Pool(processes=64)
#data = pool.map(self.fit_and_split_spectrum, self.specnos)
#pool.terminate()
#data = np.vstack(data)
np.save(self.SAVE_DIR+self.plate_num+'_split_fit',data)
total_time = (datetime.now() - start).total_seconds()
print("Total time for spectra %s: %f" %(self.plate_num,total_time))
def fit_and_split_spectrum(self, specno):
print('splitting spectra %d/%d for plate %s' % (specno, self.spectra_length, self.plate_num))
this_obs = self.this_plate[self.this_plate['SPECNO'] == specno]
if len(this_obs) > 1:
this_obs = this_obs[0]
print("This observation had more than one specno with that number")
else:
pass
if (this_obs['CAMERAS'] == b'b1') | (this_obs['CAMERAS'] == b'b2'):
model = self.linear_model(self.spectra[specno], 3, self.BlueVacLines)
elif (this_obs['CAMERAS'] == b'r1') | (this_obs['CAMERAS'] == b'r2'):
model = self.linear_model(self.spectra[specno], 2, self.RedVacLines)
else:
print("Don't recognize the camera")
model = [0, 0, 0, 0, 0]
model_fit = np.zeros(len(model[0]),dtype=[('PLATE','i4'), ('COLOR','S2'), ('SPECNO','i4'), ('WAVE','f8'),
('LINES','f8'), ('CONT','f8'), ('RESIDS','f8'), ('R','f8')])
model_fit['PLATE'] = self.plate_num
model_fit['COLOR'] = this_obs['CAMERAS']
model_fit['SPECNO'] = specno
model_fit['WAVE'] = model[0]
model_fit['LINES'] = model[1]
model_fit['CONT'] = model[2]
model_fit['RESIDS'] = model[3]
model_fit['R'] = model[4]
return model_fit
if __name__=="__main__":
FT = FitSpectra()
FT.run()