1
1
# This code is a part of X-ray: Generate and Analyse (XGA), a module designed for the XMM Cluster Survey (XCS).
2
- # Last modified by David J Turner (turne540@msu.edu) 20/02/2023, 14:04. Copyright (c) The Contributors
2
+ # Last modified by David J Turner (turne540@msu.edu) 25/08/2025, 17:34. Copyright (c) The Contributors
3
+ from typing import List
3
4
5
+ import fitsio
6
+ import pandas as pd
7
+ from fitsio import FITSHDR , read_header
4
8
5
9
from . import BaseProduct
10
+ from ..exceptions import XGADeveloperError
6
11
7
12
8
13
class EventList (BaseProduct ):
9
14
"""
10
- A product class for event lists, largely just used as a means to store information about the event list.
15
+ A product class for event lists, it stores information about the event list.
11
16
12
- :param str path: The path to where the event list file SHOULD be located.
17
+ :param str path: The path to the event list file, OR an S3-bucket (or S3-bucket-like) path/url to stream
18
+ the event list data from.
13
19
:param str obs_id: The ObsID related to the event list being declared.
14
20
:param str instrument: The instrument related to the event list being declared.
15
21
:param str stdout_str: The stdout from calling the terminal command.
16
22
:param str stderr_str: The stderr from calling the terminal command.
17
23
:param str gen_cmd: The command used to generate the event list.
24
+ :param str telescope: The telescope that is the source of this event list. The default is None.
25
+ :param List[str] obs_ids: The obs ids that were combined to make this event list. The default is None.
18
26
"""
19
- def __init__ (self , path : str , obs_id : str , instrument : str , stdout_str : str , stderr_str : str ,
20
- gen_cmd : str ):
27
+
28
+ def __init__ (self , path : str , obs_id : str = None , instrument : str = None , stdout_str : str = None ,
29
+ stderr_str : str = None , gen_cmd : str = None , telescope : str = None , obs_ids : List [str ] = None ):
21
30
"""
22
- The init method of the EventList class.
31
+ The init method of the EventList class, a product class for event lists, it stores information about
32
+ the event list.
33
+
34
+ :param str path: The path to the event list file, OR an S3-bucket (or S3-bucket-like) path/url to stream
35
+ the event list data from.
36
+ :param str obs_id: The ObsID related to the event list being declared.
37
+ :param str instrument: The instrument related to the event list being declared.
38
+ :param str stdout_str: The stdout from calling the terminal command.
39
+ :param str stderr_str: The stderr from calling the terminal command.
40
+ :param str gen_cmd: The command used to generate the event list.
41
+ :param str telescope: The telescope that is the source of this event list. The default is None.
23
42
"""
43
+ if hasattr (super (), 'telescope' ):
44
+ raise XGADeveloperError ("S3 streaming event lists have been merged into multi-mission XGA, and the "
45
+ "call to BaseProduct init in EventList needs to be updated." )
46
+
24
47
super ().__init__ (path , obs_id , instrument , stdout_str , stderr_str , gen_cmd )
25
48
self ._prod_type = "events"
49
+ # These store the header of the event list fits file (if read in), as well as the main table of event
50
+ # information (again if read in).
51
+ self ._header = None
52
+ self ._data = None
53
+ self ._telescope = telescope
54
+
55
+ if obs_ids is not None and (not isinstance (obs_ids , List ) or
56
+ (isinstance (obs_ids , List ) and not all (isinstance (obs , str ) for obs in obs_ids ))):
57
+ raise ValueError ("The 'obs_ids' argument must be a list of strings." )
58
+
59
+ self ._obs_ids = obs_ids
60
+
61
+ @property
62
+ def obs_ids (self ) -> list :
63
+ """
64
+ Property getter for the ObsIDs that are involved in this Eventlist, if combined. Otherwise
65
+ will return a list with one element, the single relevant ObsID.
66
+
67
+ :return: List of ObsIDs involved in this EventList.
68
+ :rtype: list
69
+ """
70
+
71
+ return self ._obs_ids
72
+
73
+ # This absolutely doesn't get a setter considering it's the header object
74
+ @property
75
+ def header (self ) -> FITSHDR :
76
+ """
77
+ Property getter allowing access to the astropy fits header object of this event list.
78
+
79
+ :return: The primary header of the event list header.
80
+ :rtype: FITSHDR
81
+ """
82
+ # If the header attribute is None then we know we have to read the header in
83
+ if self ._header is None :
84
+ self ._read_header_on_demand ()
85
+ return self ._header
86
+
87
+ @header .deleter
88
+ def header (self ):
89
+ """
90
+ Property deleter for the header of this EventList instance. The self._header attribute is removed from
91
+ memory, and then self._header is explicitly set to None so that self._read_header_on_demand() will be
92
+ triggered if you ever want the header from this object again.
93
+ """
94
+ del self ._header
95
+ self ._header = None
96
+
97
+ @property
98
+ def data (self ) -> pd .DataFrame :
99
+ """
100
+ Property getter allowing access to the astropy fits header object of this event list.
101
+
102
+ :return: The header of the primary data table of the event list.
103
+ :rtype: pd.DataFrame
104
+ """
105
+ # If the header attribute is None then we know we have to read the header in
106
+ if self ._data is None :
107
+ self ._read_data_on_demand ()
108
+ return self ._data
109
+
110
+ @data .deleter
111
+ def data (self ):
112
+ """
113
+ Property deleter for the data of this EventList instance. The self._data attribute is removed from
114
+ memory, and then self._data is explicitly set to None so that self._read_data_on_demand() will be
115
+ triggered if you ever want the header from this object again.
116
+ """
117
+ del self ._data
118
+ self ._data = None
119
+
120
+ def _read_header_on_demand (self ):
121
+ """
122
+ This will read the event list header into memory, without loading the data from the event list main table. That
123
+ way the user can get access to the summary information stored in the header without wasting a lot of memory.
124
+ """
125
+ try :
126
+ # Reads only the header information
127
+ self ._header = read_header (self .path )
128
+ except OSError :
129
+ raise FileNotFoundError ("FITSIO read_header cannot open {f}, possibly because there is a problem with "
130
+ "the file, it doesn't exist, or maybe an SFTP problem? This product is associated "
131
+ "with {s}." .format (f = self .path , s = self .src_name ))
132
+
133
+ def _read_data_on_demand (self ):
134
+ """
135
+ This will read the event list table into memory.
136
+ """
137
+
138
+ try :
139
+ # reads the events table into a np.recarray
140
+ arr = fitsio .read (self .path , ext = 1 )
141
+ # nicer to return a df than an array
142
+ self ._data = pd .DataFrame .from_records (arr )
143
+
144
+ except OSError :
145
+ raise FileNotFoundError ("FITSIO read method cannot open {f}, possibly because there is a problem with "
146
+ "the file, it doesn't exist, or maybe an SFTP problem? This product is associated "
147
+ "with {s}." .format (f = self .path , s = self .src_name ))
148
+
149
+ def unload (self , unload_data : bool = True , unload_header : bool = True ):
150
+ """
151
+ This method allows you to safely remove the header and/or data information stored in memory.
152
+
153
+ :param bool unload_data: Specifies whether the data should be unloaded from memory. Default is True, as the
154
+ event list data is liable to take up far more memory than the header, meaning it is more likely to need to
155
+ be removed.
156
+ :param bool unload_header: Specifies whether the header should be unloaded from memory. Default is True.
157
+ """
158
+ # Doesn't make sense in this case, as the method wouldn't do anything - as it was probably a mistake to call
159
+ # the method like this I throw an error so the user knows
160
+ if not unload_data and not unload_header :
161
+ raise ValueError ("At least one of the 'unload_data' and 'unload_header' arguments must be True." )
162
+
163
+ # Pretty simple, if the user wants the data gone then we use the existing property delete method for data
164
+ if unload_data :
165
+ del self .data
166
+
167
+ # And if they want the header gone then we use the property delete method for header
168
+ if unload_header :
169
+ del self .header
170
+
171
+ def get_columns_from_data (self , col_names : List [str ]) -> pd .DataFrame :
172
+ """
173
+ This method allows you to retrieve specific columns from the event list table, without loading the whole table
174
+ into memory.
175
+
176
+ :param List[str] col_names: A list of column names to retrieve.
177
+ """
26
178
179
+ # There is no sense reading in the columns again, if the whole event list is already in memory
180
+ if self ._data is not None :
181
+ return self .data .loc [:, col_names ]
27
182
183
+ try :
184
+ # Reads the events table into a np.recarray
185
+ arr = fitsio .read (self .path , columns = col_names , ext = 1 )
28
186
187
+ # Makes sure that the byte order is correct
188
+ if arr .dtype [0 ].byteorder != '<' :
189
+ arr = arr .view (arr .dtype .newbyteorder ()).byteswap (inplace = False )
29
190
191
+ # Much nicer to have a dataframe than a recarray
192
+ return pd .DataFrame .from_records (arr )
30
193
194
+ except ValueError as err :
195
+ # The error message generated by fitsio is informative enough
196
+ raise err
31
197
198
+ except OSError :
199
+ raise FileNotFoundError ("FITSIO read method cannot open {f}, possibly because there is a problem with "
200
+ "the file, it doesn't exist, or maybe an SFTP problem? This product is associated "
201
+ "with {s}." .format (f = self .path , s = self .src_name ))
0 commit comments