Skip to content

Commit 54fb34d

Browse files
author
James White
authored
Merge pull request #144 from spacetelescope/130-update-nuvosmshift
130 update nuvosmshift
2 parents 7c6b21c + 4c579a5 commit 54fb34d

24 files changed

+569
-109
lines changed

.travis.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ env:
2828
- SETUP_CMD='test'
2929
- COSMO_CONFIG='/home/travis/build/spacetelescope/cosmo/tests/cosmoconfig_test.yaml'
3030
- MONITOR_CONFIG='/home/travis/build/spacetelescope/cosmo/tests/cosmoconfig_test.yaml'
31+
- CRDS_SERVER_URL='https://hst-crds.stsci.edu'
32+
- CRDS_PATH='/home/travis/build/spacetelescope/cosmo/tests/data/test_crds_cache'
3133

3234
install:
3335
# USE UTF8 ENCODING. SHOULD BE DEFAULT, BUT THIS IS INSURANCE AGAINST FUTURE CHANGES
@@ -59,6 +61,7 @@ install:
5961
# Install package with pip
6062
- pip install .
6163

62-
script: coverage run -m pytest
64+
script:
65+
- coverage run -m pytest
6366

6467
after_success: codecov

cosmo/filesystem.py

Lines changed: 134 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import os
22
import dask
33
import re
4+
import crds
5+
import numpy as np
6+
import warnings
47

58
from glob import glob
69
from astropy.io import fits
@@ -43,7 +46,7 @@ class FileData(dict):
4346
def __init__(self, filename: str, header_keywords: Sequence, header_extensions: Sequence,
4447
spt_suffix: str = 'spt.fits.gz', spt_keywords: Sequence = None, spt_extensions: Sequence = None,
4548
data_keywords: Sequence = None, data_extensions: Sequence = None,
46-
header_defaults: Dict[str, Any] = None):
49+
header_defaults: Dict[str, Any] = None, reference_request: Dict[str, Dict[str, list]] = None):
4750
"""Initialize and create the possible corresponding spt file name."""
4851
super().__init__(self)
4952

@@ -69,15 +72,40 @@ def __init__(self, filename: str, header_keywords: Sequence, header_extensions:
6972
if len(data_keywords) != len(data_extensions):
7073
raise ValueError('data_keywords and data_extensions must be the same length.')
7174

75+
if reference_request:
76+
for reference in reference_request.keys():
77+
if not ('match' in reference_request[reference] and 'columns' in reference_request[reference]):
78+
raise ValueError('reference_requests require "columns", and "match" keys.')
79+
80+
if not isinstance(reference_request[reference]['columns'], list):
81+
raise TypeError('"columns" value in reference_request must be a list')
82+
83+
if not isinstance(reference_request[reference]['match'], list):
84+
raise TypeError('"match" value in reference_request must be a list')
85+
7286
with fits.open(filename) as hdu:
7387
self.get_header_data(hdu, header_keywords, header_extensions, header_defaults)
7488

7589
if data_keywords:
7690
self.get_table_data(hdu, data_keywords, data_extensions)
7791

92+
if reference_request:
93+
self.get_reference_data(hdu, reference_request)
94+
7895
if spt_keywords:
7996
self.get_spt_header_data(spt_file, spt_keywords, spt_extensions)
8097

98+
self._convert_bytes_to_strings()
99+
100+
def _convert_bytes_to_strings(self):
101+
"""Convert byte-string arrays to strings. This affects reference files in particular, but can also be an issue
102+
for older COS datatypes.
103+
"""
104+
for key, value in self.items():
105+
if isinstance(value, np.ndarray):
106+
if value.dtype in ['S3', 'S4']:
107+
self[key] = value.astype(np.unicode_)
108+
81109
@staticmethod
82110
def _create_spt_filename(filename: str, spt_suffix: str) -> Union[str, None]:
83111
"""Create an spt filename based on the input filename."""
@@ -90,36 +118,130 @@ def _create_spt_filename(filename: str, spt_suffix: str) -> Union[str, None]:
90118

91119
return
92120

93-
def get_header_data(self, hdu: fits.HDUList, header_keywords: Sequence,
94-
header_extensions: Sequence, header_defaults: dict = None):
121+
def get_header_data(self, hdu: fits.HDUList, header_keywords: Sequence, header_extensions: Sequence,
122+
header_defaults: dict = None):
95123
"""Get header data."""
96124
for key, ext in zip(header_keywords, header_extensions):
97125
if header_defaults is not None and key in header_defaults:
98-
self.update({key: hdu[ext].header.get(key, default=header_defaults[key])})
126+
self[key] = hdu[ext].header.get(key, default=header_defaults[key])
99127

100128
else:
101-
self.update({key: hdu[ext].header[key]})
129+
self[key] = hdu[ext].header[key]
102130

103131
def get_spt_header_data(self, spt_file: str, spt_keywords: Sequence, spt_extensions: Sequence):
104132
"""Open the spt file and collect requested data."""
105133
with fits.open(spt_file) as spt:
106-
self.update({key: spt[ext].header[key] for key, ext in zip(spt_keywords, spt_extensions)})
134+
for key, ext in zip(spt_keywords, spt_extensions):
135+
self[key] = spt[ext].header[key]
107136

108137
def get_table_data(self, hdu: fits.HDUList, data_keywords: Sequence, data_extensions: Sequence):
109-
"""Get table data."""
110-
self.update({key: hdu[ext].data[key] for key, ext in zip(data_keywords, data_extensions)})
138+
"""Get table data from the TableHDU."""
139+
for key, ext in zip(data_keywords, data_extensions):
140+
if key in self:
141+
self[f'{key}_{ext}'] = hdu[ext].data[key]
142+
143+
else:
144+
self[key] = hdu[ext].data[key]
145+
146+
@staticmethod
147+
def _get_match_values(hdu: fits.HDUList, match_list: list):
148+
"""Get match key values from the input data."""
149+
return {key: hdu[0].header[key] for key in match_list}
150+
151+
@staticmethod
152+
def _get_reference_table(hdu: fits.HDUList, reference_name: str) -> Union[fits.fitsrec.FITS_rec, None]:
153+
"""Locate and read the requested reference file."""
154+
# noinspection PyUnresolvedReferences
155+
reference_path = crds.locate_file(hdu[0].header[reference_name].split('$')[-1], 'hst')
156+
157+
# Check for gzipped files
158+
if not os.path.exists(reference_path):
159+
reference_path += '.gz'
160+
161+
if not os.path.exists(reference_path):
162+
return
163+
164+
try: # Some older reference files actually have bad formats for some columns and are unreadable.
165+
return fits.getdata(reference_path)
166+
167+
except ValueError:
168+
return
169+
170+
def _get_matching_values(self, match_values: dict, reference_table: fits.fitsrec.FITS_rec, request: dict,
171+
reference_name: str):
172+
"""Find the row in the reference file data that corresponds to the values provided in match_values."""
173+
for key, value in match_values.items():
174+
try:
175+
if isinstance(value, str): # Different "generations" of ref files stored strings in different ways...
176+
reference_table = reference_table[
177+
(reference_table[key] == value) |
178+
(reference_table[key] == value + ' ') |
179+
(reference_table[key] == value.encode())
180+
]
181+
182+
else:
183+
reference_table = reference_table[reference_table[key] == value]
184+
185+
except KeyError:
186+
continue
187+
188+
if not len(reference_table):
189+
raise ValueError(
190+
f'A matching row could not be determined with the given parameters: {request["match"]}'
191+
f'\nAvailable columns: {reference_table.names}'
192+
)
193+
194+
for column in request['columns']:
195+
if column in self:
196+
try:
197+
self[f'{column}_{reference_name}'] = np.array(reference_table[column]) # No masked arrays
198+
199+
except KeyError:
200+
self[f'{column}_{reference_name}'] = np.zeros(1)
201+
202+
else:
203+
try:
204+
self[column] = np.array(reference_table[column])
205+
206+
except KeyError:
207+
self[column] = np.zeros(1)
208+
209+
def get_reference_data(self, hdu: fits.HDUList, reference_request: Dict[str, Dict[str, list]]):
210+
"""Get data from requested reference files."""
211+
for reference in reference_request.keys():
212+
request = reference_request[reference]
213+
214+
ref_data = self._get_reference_table(hdu, reference)
215+
216+
if ref_data is not None: # Unreadable reference files are set to empty numpy arrays
217+
match_values = self._get_match_values(hdu, request['match'])
218+
219+
self._get_matching_values(match_values, ref_data, request, reference)
220+
221+
else:
222+
for column in request['columns']:
223+
if column in self:
224+
self[f'{column}_{reference}'] = np.zeros(1)
225+
226+
else:
227+
self[column] = np.zeros(1)
111228

112229

113230
def get_file_data(fitsfiles: List[str], keywords: Sequence, extensions: Sequence, spt_keywords: Sequence = None,
114231
spt_extensions: Sequence = None, data_keywords: Sequence = None,
115-
data_extensions: Sequence = None, header_defaults: Dict[str, Any] = None) -> List[dict]:
232+
data_extensions: Sequence = None, header_defaults: Dict[str, Any] = None,
233+
reference_request: dict = None) -> List[dict]:
116234
@dask.delayed
117235
def _get_file_data(fitsfile: str, *args, **kwargs) -> Union[FileData, None]:
118236
"""Get specified data from a fitsfile and optionally its corresponding spt file."""
119237
try:
120238
return FileData(fitsfile, *args, **kwargs)
121239

122-
except (ValueError, OSError):
240+
# Occasionally there are empty or corrupt files that will throw an OSError; This shouldn't break the process,
241+
# but users should be warned.
242+
except OSError as e:
243+
warnings.warn(f'Bad file found: {fitsfile}\n{str(e)}', Warning)
244+
123245
return
124246

125247
delayed_results = [
@@ -131,7 +253,8 @@ def _get_file_data(fitsfile: str, *args, **kwargs) -> Union[FileData, None]:
131253
spt_extensions=spt_extensions,
132254
data_keywords=data_keywords,
133255
data_extensions=data_extensions,
134-
header_defaults=header_defaults
256+
header_defaults=header_defaults,
257+
reference_request=reference_request
135258
) for fitsfile in fitsfiles
136259
]
137260

cosmo/monitor_helpers.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,16 @@ def get_osm_data(datamodel, detector: str) -> pd.DataFrame:
108108
data = data.append(
109109
datamodel.query_to_pandas(
110110
query,
111-
array_cols=['TIME', 'SHIFT_DISP', 'SHIFT_XDISP', 'SEGMENT'],
112-
array_dtypes=[float, float, float, str]
111+
array_cols=[
112+
'TIME',
113+
'SHIFT_DISP',
114+
'SHIFT_XDISP',
115+
'SEGMENT',
116+
'XC_RANGE',
117+
'SEGMENT_LAMPTAB',
118+
'SEARCH_OFFSET',
119+
'FP_PIXEL_SHIFT'
120+
],
113121
),
114122
sort=True,
115123
ignore_index=True

cosmo/monitors/data_models.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from typing import List
44
from monitorframe.datamodel import BaseDataModel
5+
from peewee import OperationalError
56

67
from ..filesystem import find_files, get_file_data
78
from ..sms import SMSTable
@@ -78,6 +79,17 @@ def get_new_data(self):
7879
data_keys = ('TIME', 'SHIFT_DISP', 'SHIFT_XDISP', 'SEGMENT')
7980
data_extensions = (1, 1, 1, 1)
8081

82+
reference_request = {
83+
'LAMPTAB': {
84+
'match': ['OPT_ELEM', 'CENWAVE', 'FPOFFSET'],
85+
'columns': ['SEGMENT', 'FP_PIXEL_SHIFT']
86+
},
87+
'WCPTAB': {
88+
'match': ['OPT_ELEM'],
89+
'columns': ['XC_RANGE', 'SEARCH_OFFSET']
90+
}
91+
}
92+
8193
files = find_files('*lampflash*', data_dir=self.files_source, cosmo_layout=self.cosmo_layout)
8294

8395
if self.model is not None:
@@ -95,7 +107,8 @@ def get_new_data(self):
95107
header_keys,
96108
header_extensions,
97109
data_keywords=data_keys,
98-
data_extensions=data_extensions
110+
data_extensions=data_extensions,
111+
reference_request=reference_request
99112
)
100113
)
101114

@@ -105,11 +118,15 @@ def get_new_data(self):
105118
).reset_index(drop=True)
106119

107120
# Add tsince data from SMSTable.
108-
sms_data = pd.DataFrame(
109-
SMSTable.select(SMSTable.ROOTNAME, SMSTable.TSINCEOSM1, SMSTable.TSINCEOSM2).where(
110-
# x << y -> x IN y (y must be a list)
111-
SMSTable.ROOTNAME + 'q' << data_results.ROOTNAME.to_list()).dicts()
112-
)
121+
try:
122+
sms_data = pd.DataFrame(
123+
SMSTable.select(SMSTable.ROOTNAME, SMSTable.TSINCEOSM1, SMSTable.TSINCEOSM2).where(
124+
# x << y -> x IN y (y must be a list)
125+
SMSTable.ROOTNAME + 'q' << data_results.ROOTNAME.to_list()).dicts()
126+
)
127+
128+
except OperationalError as e:
129+
raise type(e)(str(e) + '\nSMS database is required.')
113130

114131
# It's possible that there could be a lag in between when the SMS data is updated and when new lampflashes
115132
# are added.

0 commit comments

Comments
 (0)