Skip to content

Commit a7616d1

Browse files
committed
Essentially patched in the multi-mission version of EventList. For issue #1428
1 parent 4f930b0 commit a7616d1

File tree

1 file changed

+176
-6
lines changed

1 file changed

+176
-6
lines changed

xga/products/misc.py

Lines changed: 176 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,201 @@
11
# This code is a part of X-ray: Generate and Analyse (XGA), a module designed for the XMM Cluster Survey (XCS).
2-
# Last modified by David J Turner (turne540@msu.edu) 20/02/2023, 14:04. Copyright (c) The Contributors
2+
# Last modified by David J Turner (turne540@msu.edu) 25/08/2025, 17:34. Copyright (c) The Contributors
3+
from typing import List
34

5+
import fitsio
6+
import pandas as pd
7+
from fitsio import FITSHDR, read_header
48

59
from . import BaseProduct
10+
from ..exceptions import XGADeveloperError
611

712

813
class EventList(BaseProduct):
914
"""
10-
A product class for event lists, largely just used as a means to store information about the event list.
15+
A product class for event lists, it stores information about the event list.
1116
12-
:param str path: The path to where the event list file SHOULD be located.
17+
:param str path: The path to the event list file, OR an S3-bucket (or S3-bucket-like) path/url to stream
18+
the event list data from.
1319
:param str obs_id: The ObsID related to the event list being declared.
1420
:param str instrument: The instrument related to the event list being declared.
1521
:param str stdout_str: The stdout from calling the terminal command.
1622
:param str stderr_str: The stderr from calling the terminal command.
1723
:param str gen_cmd: The command used to generate the event list.
24+
:param str telescope: The telescope that is the source of this event list. The default is None.
25+
:param List[str] obs_ids: The obs ids that were combined to make this event list. The default is None.
1826
"""
19-
def __init__(self, path: str, obs_id: str, instrument: str, stdout_str: str, stderr_str: str,
20-
gen_cmd: str):
27+
28+
def __init__(self, path: str, obs_id: str = None, instrument: str = None, stdout_str: str = None,
29+
stderr_str: str = None, gen_cmd: str = None, telescope: str = None, obs_ids: List[str] = None):
2130
"""
22-
The init method of the EventList class.
31+
The init method of the EventList class, a product class for event lists, it stores information about
32+
the event list.
33+
34+
:param str path: The path to the event list file, OR an S3-bucket (or S3-bucket-like) path/url to stream
35+
the event list data from.
36+
:param str obs_id: The ObsID related to the event list being declared.
37+
:param str instrument: The instrument related to the event list being declared.
38+
:param str stdout_str: The stdout from calling the terminal command.
39+
:param str stderr_str: The stderr from calling the terminal command.
40+
:param str gen_cmd: The command used to generate the event list.
41+
:param str telescope: The telescope that is the source of this event list. The default is None.
2342
"""
43+
if hasattr(super(), 'telescope'):
44+
raise XGADeveloperError("S3 streaming event lists have been merged into multi-mission XGA, and the "
45+
"call to BaseProduct init in EventList needs to be updated.")
46+
2447
super().__init__(path, obs_id, instrument, stdout_str, stderr_str, gen_cmd)
2548
self._prod_type = "events"
49+
# These store the header of the event list fits file (if read in), as well as the main table of event
50+
# information (again if read in).
51+
self._header = None
52+
self._data = None
53+
self._telescope = telescope
54+
55+
if obs_ids is not None and (not isinstance(obs_ids, List) or
56+
(isinstance(obs_ids, List) and not all(isinstance(obs, str) for obs in obs_ids))):
57+
raise ValueError("The 'obs_ids' argument must be a list of strings.")
58+
59+
self._obs_ids = obs_ids
60+
61+
@property
62+
def obs_ids(self) -> list:
63+
"""
64+
Property getter for the ObsIDs that are involved in this Eventlist, if combined. Otherwise
65+
will return a list with one element, the single relevant ObsID.
66+
67+
:return: List of ObsIDs involved in this EventList.
68+
:rtype: list
69+
"""
70+
71+
return self._obs_ids
72+
73+
# This absolutely doesn't get a setter considering it's the header object
74+
@property
75+
def header(self) -> FITSHDR:
76+
"""
77+
Property getter allowing access to the astropy fits header object of this event list.
78+
79+
:return: The primary header of the event list header.
80+
:rtype: FITSHDR
81+
"""
82+
# If the header attribute is None then we know we have to read the header in
83+
if self._header is None:
84+
self._read_header_on_demand()
85+
return self._header
86+
87+
@header.deleter
88+
def header(self):
89+
"""
90+
Property deleter for the header of this EventList instance. The self._header attribute is removed from
91+
memory, and then self._header is explicitly set to None so that self._read_header_on_demand() will be
92+
triggered if you ever want the header from this object again.
93+
"""
94+
del self._header
95+
self._header = None
96+
97+
@property
98+
def data(self) -> pd.DataFrame:
99+
"""
100+
Property getter allowing access to the astropy fits header object of this event list.
101+
102+
:return: The header of the primary data table of the event list.
103+
:rtype: pd.DataFrame
104+
"""
105+
# If the header attribute is None then we know we have to read the header in
106+
if self._data is None:
107+
self._read_data_on_demand()
108+
return self._data
109+
110+
@data.deleter
111+
def data(self):
112+
"""
113+
Property deleter for the data of this EventList instance. The self._data attribute is removed from
114+
memory, and then self._data is explicitly set to None so that self._read_data_on_demand() will be
115+
triggered if you ever want the header from this object again.
116+
"""
117+
del self._data
118+
self._data = None
119+
120+
def _read_header_on_demand(self):
121+
"""
122+
This will read the event list header into memory, without loading the data from the event list main table. That
123+
way the user can get access to the summary information stored in the header without wasting a lot of memory.
124+
"""
125+
try:
126+
# Reads only the header information
127+
self._header = read_header(self.path)
128+
except OSError:
129+
raise FileNotFoundError("FITSIO read_header cannot open {f}, possibly because there is a problem with "
130+
"the file, it doesn't exist, or maybe an SFTP problem? This product is associated "
131+
"with {s}.".format(f=self.path, s=self.src_name))
132+
133+
def _read_data_on_demand(self):
134+
"""
135+
This will read the event list table into memory.
136+
"""
137+
138+
try:
139+
# reads the events table into a np.recarray
140+
arr = fitsio.read(self.path, ext=1)
141+
# nicer to return a df than an array
142+
self._data = pd.DataFrame.from_records(arr)
143+
144+
except OSError:
145+
raise FileNotFoundError("FITSIO read method cannot open {f}, possibly because there is a problem with "
146+
"the file, it doesn't exist, or maybe an SFTP problem? This product is associated "
147+
"with {s}.".format(f=self.path, s=self.src_name))
148+
149+
def unload(self, unload_data: bool = True, unload_header: bool = True):
150+
"""
151+
This method allows you to safely remove the header and/or data information stored in memory.
152+
153+
:param bool unload_data: Specifies whether the data should be unloaded from memory. Default is True, as the
154+
event list data is liable to take up far more memory than the header, meaning it is more likely to need to
155+
be removed.
156+
:param bool unload_header: Specifies whether the header should be unloaded from memory. Default is True.
157+
"""
158+
# Doesn't make sense in this case, as the method wouldn't do anything - as it was probably a mistake to call
159+
# the method like this I throw an error so the user knows
160+
if not unload_data and not unload_header:
161+
raise ValueError("At least one of the 'unload_data' and 'unload_header' arguments must be True.")
162+
163+
# Pretty simple, if the user wants the data gone then we use the existing property delete method for data
164+
if unload_data:
165+
del self.data
166+
167+
# And if they want the header gone then we use the property delete method for header
168+
if unload_header:
169+
del self.header
170+
171+
def get_columns_from_data(self, col_names: List[str]) -> pd.DataFrame:
172+
"""
173+
This method allows you to retrieve specific columns from the event list table, without loading the whole table
174+
into memory.
175+
176+
:param List[str] col_names: A list of column names to retrieve.
177+
"""
26178

179+
# There is no sense reading in the columns again, if the whole event list is already in memory
180+
if self._data is not None:
181+
return self.data.loc[:, col_names]
27182

183+
try:
184+
# Reads the events table into a np.recarray
185+
arr = fitsio.read(self.path, columns=col_names, ext=1)
28186

187+
# Makes sure that the byte order is correct
188+
if arr.dtype[0].byteorder != '<':
189+
arr = arr.view(arr.dtype.newbyteorder()).byteswap(inplace=False)
29190

191+
# Much nicer to have a dataframe than a recarray
192+
return pd.DataFrame.from_records(arr)
30193

194+
except ValueError as err:
195+
# The error message generated by fitsio is informative enough
196+
raise err
31197

198+
except OSError:
199+
raise FileNotFoundError("FITSIO read method cannot open {f}, possibly because there is a problem with "
200+
"the file, it doesn't exist, or maybe an SFTP problem? This product is associated "
201+
"with {s}.".format(f=self.path, s=self.src_name))

0 commit comments

Comments
 (0)