1
1
import os
2
2
import dask
3
3
import re
4
+ import crds
5
+ import numpy as np
6
+ import warnings
4
7
5
8
from glob import glob
6
9
from astropy .io import fits
@@ -43,7 +46,7 @@ class FileData(dict):
43
46
def __init__ (self , filename : str , header_keywords : Sequence , header_extensions : Sequence ,
44
47
spt_suffix : str = 'spt.fits.gz' , spt_keywords : Sequence = None , spt_extensions : Sequence = None ,
45
48
data_keywords : Sequence = None , data_extensions : Sequence = None ,
46
- header_defaults : Dict [str , Any ] = None ):
49
+ header_defaults : Dict [str , Any ] = None , reference_request : Dict [ str , Dict [ str , list ]] = None ):
47
50
"""Initialize and create the possible corresponding spt file name."""
48
51
super ().__init__ (self )
49
52
@@ -69,15 +72,40 @@ def __init__(self, filename: str, header_keywords: Sequence, header_extensions:
69
72
if len (data_keywords ) != len (data_extensions ):
70
73
raise ValueError ('data_keywords and data_extensions must be the same length.' )
71
74
75
+ if reference_request :
76
+ for reference in reference_request .keys ():
77
+ if not ('match' in reference_request [reference ] and 'columns' in reference_request [reference ]):
78
+ raise ValueError ('reference_requests require "columns", and "match" keys.' )
79
+
80
+ if not isinstance (reference_request [reference ]['columns' ], list ):
81
+ raise TypeError ('"columns" value in reference_request must be a list' )
82
+
83
+ if not isinstance (reference_request [reference ]['match' ], list ):
84
+ raise TypeError ('"match" value in reference_request must be a list' )
85
+
72
86
with fits .open (filename ) as hdu :
73
87
self .get_header_data (hdu , header_keywords , header_extensions , header_defaults )
74
88
75
89
if data_keywords :
76
90
self .get_table_data (hdu , data_keywords , data_extensions )
77
91
92
+ if reference_request :
93
+ self .get_reference_data (hdu , reference_request )
94
+
78
95
if spt_keywords :
79
96
self .get_spt_header_data (spt_file , spt_keywords , spt_extensions )
80
97
98
+ self ._convert_bytes_to_strings ()
99
+
100
+ def _convert_bytes_to_strings (self ):
101
+ """Convert byte-string arrays to strings. This affects reference files in particular, but can also be an issue
102
+ for older COS datatypes.
103
+ """
104
+ for key , value in self .items ():
105
+ if isinstance (value , np .ndarray ):
106
+ if value .dtype in ['S3' , 'S4' ]:
107
+ self [key ] = value .astype (np .unicode_ )
108
+
81
109
@staticmethod
82
110
def _create_spt_filename (filename : str , spt_suffix : str ) -> Union [str , None ]:
83
111
"""Create an spt filename based on the input filename."""
@@ -90,36 +118,130 @@ def _create_spt_filename(filename: str, spt_suffix: str) -> Union[str, None]:
90
118
91
119
return
92
120
93
- def get_header_data (self , hdu : fits .HDUList , header_keywords : Sequence ,
94
- header_extensions : Sequence , header_defaults : dict = None ):
121
+ def get_header_data (self , hdu : fits .HDUList , header_keywords : Sequence , header_extensions : Sequence ,
122
+ header_defaults : dict = None ):
95
123
"""Get header data."""
96
124
for key , ext in zip (header_keywords , header_extensions ):
97
125
if header_defaults is not None and key in header_defaults :
98
- self . update ({ key : hdu [ext ].header .get (key , default = header_defaults [key ])} )
126
+ self [ key ] = hdu [ext ].header .get (key , default = header_defaults [key ])
99
127
100
128
else :
101
- self . update ({ key : hdu [ext ].header [key ]})
129
+ self [ key ] = hdu [ext ].header [key ]
102
130
103
131
def get_spt_header_data (self , spt_file : str , spt_keywords : Sequence , spt_extensions : Sequence ):
104
132
"""Open the spt file and collect requested data."""
105
133
with fits .open (spt_file ) as spt :
106
- self .update ({key : spt [ext ].header [key ] for key , ext in zip (spt_keywords , spt_extensions )})
134
+ for key , ext in zip (spt_keywords , spt_extensions ):
135
+ self [key ] = spt [ext ].header [key ]
107
136
108
137
def get_table_data (self , hdu : fits .HDUList , data_keywords : Sequence , data_extensions : Sequence ):
109
- """Get table data."""
110
- self .update ({key : hdu [ext ].data [key ] for key , ext in zip (data_keywords , data_extensions )})
138
+ """Get table data from the TableHDU."""
139
+ for key , ext in zip (data_keywords , data_extensions ):
140
+ if key in self :
141
+ self [f'{ key } _{ ext } ' ] = hdu [ext ].data [key ]
142
+
143
+ else :
144
+ self [key ] = hdu [ext ].data [key ]
145
+
146
+ @staticmethod
147
+ def _get_match_values (hdu : fits .HDUList , match_list : list ):
148
+ """Get match key values from the input data."""
149
+ return {key : hdu [0 ].header [key ] for key in match_list }
150
+
151
+ @staticmethod
152
+ def _get_reference_table (hdu : fits .HDUList , reference_name : str ) -> Union [fits .fitsrec .FITS_rec , None ]:
153
+ """Locate and read the requested reference file."""
154
+ # noinspection PyUnresolvedReferences
155
+ reference_path = crds .locate_file (hdu [0 ].header [reference_name ].split ('$' )[- 1 ], 'hst' )
156
+
157
+ # Check for gzipped files
158
+ if not os .path .exists (reference_path ):
159
+ reference_path += '.gz'
160
+
161
+ if not os .path .exists (reference_path ):
162
+ return
163
+
164
+ try : # Some older reference files actually have bad formats for some columns and are unreadable.
165
+ return fits .getdata (reference_path )
166
+
167
+ except ValueError :
168
+ return
169
+
170
+ def _get_matching_values (self , match_values : dict , reference_table : fits .fitsrec .FITS_rec , request : dict ,
171
+ reference_name : str ):
172
+ """Find the row in the reference file data that corresponds to the values provided in match_values."""
173
+ for key , value in match_values .items ():
174
+ try :
175
+ if isinstance (value , str ): # Different "generations" of ref files stored strings in different ways...
176
+ reference_table = reference_table [
177
+ (reference_table [key ] == value ) |
178
+ (reference_table [key ] == value + ' ' ) |
179
+ (reference_table [key ] == value .encode ())
180
+ ]
181
+
182
+ else :
183
+ reference_table = reference_table [reference_table [key ] == value ]
184
+
185
+ except KeyError :
186
+ continue
187
+
188
+ if not len (reference_table ):
189
+ raise ValueError (
190
+ f'A matching row could not be determined with the given parameters: { request ["match" ]} '
191
+ f'\n Available columns: { reference_table .names } '
192
+ )
193
+
194
+ for column in request ['columns' ]:
195
+ if column in self :
196
+ try :
197
+ self [f'{ column } _{ reference_name } ' ] = np .array (reference_table [column ]) # No masked arrays
198
+
199
+ except KeyError :
200
+ self [f'{ column } _{ reference_name } ' ] = np .zeros (1 )
201
+
202
+ else :
203
+ try :
204
+ self [column ] = np .array (reference_table [column ])
205
+
206
+ except KeyError :
207
+ self [column ] = np .zeros (1 )
208
+
209
+ def get_reference_data (self , hdu : fits .HDUList , reference_request : Dict [str , Dict [str , list ]]):
210
+ """Get data from requested reference files."""
211
+ for reference in reference_request .keys ():
212
+ request = reference_request [reference ]
213
+
214
+ ref_data = self ._get_reference_table (hdu , reference )
215
+
216
+ if ref_data is not None : # Unreadable reference files are set to empty numpy arrays
217
+ match_values = self ._get_match_values (hdu , request ['match' ])
218
+
219
+ self ._get_matching_values (match_values , ref_data , request , reference )
220
+
221
+ else :
222
+ for column in request ['columns' ]:
223
+ if column in self :
224
+ self [f'{ column } _{ reference } ' ] = np .zeros (1 )
225
+
226
+ else :
227
+ self [column ] = np .zeros (1 )
111
228
112
229
113
230
def get_file_data (fitsfiles : List [str ], keywords : Sequence , extensions : Sequence , spt_keywords : Sequence = None ,
114
231
spt_extensions : Sequence = None , data_keywords : Sequence = None ,
115
- data_extensions : Sequence = None , header_defaults : Dict [str , Any ] = None ) -> List [dict ]:
232
+ data_extensions : Sequence = None , header_defaults : Dict [str , Any ] = None ,
233
+ reference_request : dict = None ) -> List [dict ]:
116
234
@dask .delayed
117
235
def _get_file_data (fitsfile : str , * args , ** kwargs ) -> Union [FileData , None ]:
118
236
"""Get specified data from a fitsfile and optionally its corresponding spt file."""
119
237
try :
120
238
return FileData (fitsfile , * args , ** kwargs )
121
239
122
- except (ValueError , OSError ):
240
+ # Occasionally there are empty or corrupt files that will throw an OSError; This shouldn't break the process,
241
+ # but users should be warned.
242
+ except OSError as e :
243
+ warnings .warn (f'Bad file found: { fitsfile } \n { str (e )} ' , Warning )
244
+
123
245
return
124
246
125
247
delayed_results = [
@@ -131,7 +253,8 @@ def _get_file_data(fitsfile: str, *args, **kwargs) -> Union[FileData, None]:
131
253
spt_extensions = spt_extensions ,
132
254
data_keywords = data_keywords ,
133
255
data_extensions = data_extensions ,
134
- header_defaults = header_defaults
256
+ header_defaults = header_defaults ,
257
+ reference_request = reference_request
135
258
) for fitsfile in fitsfiles
136
259
]
137
260
0 commit comments