diff --git a/CHANGES.rst b/CHANGES.rst index b0d7964ee2..67d3b7ed77 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -27,6 +27,7 @@ linelists.cdms ^^^^^^^^^^^^^^ - Add a keyword to control writing of new species cache files. This is needed to prevent tests from overwriting those files. [#3297] +- Add more complete support for CDMS quantum number and other value parsing. [#3302] heasarc ^^^^^^^ @@ -76,10 +77,10 @@ mast - Fix bug in ``utils.remove_duplicate_products`` that does not retain the order of the products in an input table. [#3314] -- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs +- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs to the returned cloud URIs. [#3314] -- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot +- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot be found in the cloud. [#3314] diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 95c739b304..8fe26164b5 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -12,6 +12,7 @@ # import configurable items declared in __init__.py from astroquery.linelists.cdms import conf from astroquery.exceptions import InvalidQueryError, EmptyResponseError +from astroquery import log import re import string @@ -54,7 +55,8 @@ def query_lines_async(self, min_frequency, max_frequency, *, min_strength : int, optional Minimum strength in catalog units, the default is -500 - molecule : list, string of regex if parse_name_locally=True, optional + molecule : list or string if parse_name_locally=False, + string of regex if parse_name_locally=True, optional Identifiers of the molecules to search for. If this parameter is not provided the search will match any species. Default is 'All'. As a first pass, the molecule will be searched for with a direct @@ -134,18 +136,21 @@ def query_lines_async(self, min_frequency, max_frequency, *, # changes interpretation of query self._last_query_temperature = temperature_for_intensity - if molecule is not None: - if parse_name_locally: - self.lookup_ids = build_lookup() - luts = self.lookup_ids.find(molecule, flags) - if len(luts) == 0: - raise InvalidQueryError('No matching species found. Please ' - 'refine your search or read the Docs ' - 'for pointers on how to search.') - payload['Molecules'] = tuple(f"{val:06d} {key}" - for key, val in luts.items())[0] - else: - payload['Molecules'] = molecule + if molecule == 'All': + payload['Moleculesgrp'] = 'all species' + else: + if molecule is not None: + if parse_name_locally: + self.lookup_ids = build_lookup() + luts = self.lookup_ids.find(molecule, flags) + if len(luts) == 0: + raise InvalidQueryError('No matching species found. Please ' + 'refine your search or read the Docs ' + 'for pointers on how to search.') + payload['Molecules'] = tuple(f"{val:06d} {key}" + for key, val in luts.items())[0] + else: + payload['Molecules'] = molecule if get_query_payload: return payload @@ -180,7 +185,7 @@ def query_lines_async(self, min_frequency, max_frequency, *, # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) - if payload['Molecules'] in badlist: + if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist: raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. " f"Try get_molecule({payload['Molecules']}) instead.") @@ -233,15 +238,32 @@ def _parse_result(self, response, *, verbose=False): soup = BeautifulSoup(response.text, 'html.parser') text = soup.find('pre').text + need_to_filter_bad_molecules = False + for bad_molecule in self.MALFORMATTED_MOLECULE_LIST: + if text.find(bad_molecule.split()[1]) > -1: + need_to_filter_bad_molecules = True + break + if need_to_filter_bad_molecules: + text_new = '' + text = text.split('\n') + for line in text: + need_to_include_line = True + for bad_molecule in self.MALFORMATTED_MOLECULE_LIST: + if line.find(bad_molecule.split()[1]) > -1: + need_to_include_line = False + break + if need_to_include_line: + text_new = text_new + '\n' + line + text = text_new + starts = {'FREQ': 0, 'ERR': 14, 'LGINT': 25, 'DR': 36, 'ELO': 38, 'GUP': 47, - 'MOLWT': 51, - 'TAG': 54, - 'QNFMT': 58, + 'TAG': 50, + 'QNFMT': 57, 'Ju': 61, 'Ku': 63, 'vu': 65, @@ -265,6 +287,7 @@ def _parse_result(self, response, *, verbose=False): result['FREQ'].unit = u.MHz result['ERR'].unit = u.MHz + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] result['Lab'] = result['MOLWT'] < 0 result['MOLWT'] = np.abs(result['MOLWT']) result['MOLWT'].unit = u.Da @@ -387,7 +410,7 @@ def tryfloat(x): return result - def get_molecule(self, molecule_id, *, cache=True): + def get_molecule(self, molecule_id, *, cache=True, return_response=False): """ Retrieve the whole molecule table for a given molecule id """ @@ -396,6 +419,8 @@ def get_molecule(self, molecule_id, *, cache=True): url = f'{self.CLASSIC_URL}/entries/c{molecule_id}.cat' response = self._request(method='GET', url=url, timeout=self.TIMEOUT, cache=cache) + if return_response: + return response result = self._parse_cat(response) species_table = self.get_species_table() @@ -426,21 +451,21 @@ def _parse_cat(self, response, *, verbose=False): 'ELO': 32, 'GUP': 42, 'TAG': 44, - 'QNFMT': 52, - 'Q1': 56, - 'Q2': 58, - 'Q3': 60, - 'Q4': 62, - 'Q5': 64, - 'Q6': 66, - 'Q7': 68, - 'Q8': 70, - 'Q9': 72, - 'Q10': 74, - 'Q11': 76, - 'Q12': 78, - 'Q13': 80, - 'Q14': 82, + 'QNFMT': 51, + 'Q1': 55, + 'Q2': 57, + 'Q3': 59, + 'Q4': 61, + 'Q5': 63, + 'Q6': 65, + 'Q7': 67, + 'Q8': 69, + 'Q9': 71, + 'Q10': 73, + 'Q11': 75, + 'Q12': 77, + 'Q13': 79, + 'Q14': 81, } result = ascii.read(text, header_start=None, data_start=0, @@ -450,7 +475,7 @@ def _parse_cat(self, response, *, verbose=False): format='fixed_width', fast_reader=False) # int truncates - which is what we want - result['MOLWT'] = [int(x/1e4) for x in result['TAG']] + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] result['FREQ'].unit = u.MHz result['ERR'].unit = u.MHz @@ -460,15 +485,18 @@ def _parse_cat(self, response, *, verbose=False): result['MOLWT'].unit = u.Da fix_keys = ['GUP'] - for suf in '': - for qn in (f'Q{ii}' for ii in range(1, 15)): - qnind = qn+suf - fix_keys.append(qnind) + for qn in (f'Q{ii}' for ii in range(1, 15)): + fix_keys.append(qn) + log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP") for key in fix_keys: if not np.issubdtype(result[key].dtype, np.integer): intcol = np.array(list(map(parse_letternumber, result[key])), dtype=int) + if any(intcol == -999999): + intcol = np.ma.masked_where(intcol == -999999, intcol) result[key] = intcol + if not np.issubdtype(result[key].dtype, np.integer): + raise ValueError(f"Failed to parse {key} as integer") result['LGINT'].unit = u.nm**2 * u.MHz result['ELO'].unit = u.cm**(-1) @@ -486,13 +514,16 @@ def parse_letternumber(st): From the CDMS docs: "Exactly two characters are available for each quantum number. Therefore, half integer quanta are rounded up ! In addition, capital letters are used to - indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small - types are used to signal corresponding negative quantum numbers." + indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters + are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc." """ + if np.ma.is_masked(st): + return -999999 + asc = string.ascii_lowercase ASC = string.ascii_uppercase - newst = ''.join(['-' + str(asc.index(x)+10) if x in asc else - str(ASC.index(x)+10) if x in ASC else + newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else + str((ASC.index(x)+10)) if x in ASC else x for x in st]) return int(newst) diff --git a/astroquery/linelists/cdms/tests/test_cdms.py b/astroquery/linelists/cdms/tests/test_cdms.py index 597311d715..0b8059105f 100644 --- a/astroquery/linelists/cdms/tests/test_cdms.py +++ b/astroquery/linelists/cdms/tests/test_cdms.py @@ -83,6 +83,7 @@ def test_query(patch_post): assert tbl['LGINT'][0] == -7.1425 assert tbl['GUP'][0] == 3 assert tbl['GUP'][7] == 17 + assert tbl['MOLWT'][0] == 28 def test_parseletternumber(): @@ -99,9 +100,12 @@ def test_parseletternumber(): assert parse_letternumber("Z9") == 359 # inferred? - assert parse_letternumber("z9") == -359 + assert parse_letternumber("a0") == -10 + assert parse_letternumber("b0") == -20 assert parse_letternumber("ZZ") == 3535 + assert parse_letternumber(np.ma.masked) == -999999 + def test_hc7s(patch_post): """ diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index 5c2a2059fb..96b5ee96ec 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -38,6 +38,55 @@ def test_remote_300K(): assert tbl['FREQ'][0] == 505366.7875 assert tbl['ERR'][0] == 49.13 assert tbl['LGINT'][0] == -4.2182 + assert tbl['MOLWT'][0] == 18 + assert tbl['TAG'][0] == 18505 + + +@pytest.mark.remote_data +def test_co_basics(): + tbl = CDMS.get_molecule('028503') + assert tbl['Q1'][0] == 1 + assert tbl['Q7'][0] == 0 + assert tbl['Q1'][10] == 11 + assert tbl['Q7'][10] == 10 + assert tbl['MOLWT'][0] == 28 + assert tbl['TAG'][0] == -28503 + + +@pytest.mark.remote_data +def test_ch3cn_negqn(): + # 041505 = CH3CN on 2025-05-21 + tbl = CDMS.get_molecule('041505') + assert tbl.meta['molecule'] == 'CH3CN, v=0' + fourtominusthree = tbl[(tbl['Q1'] == 4) & (tbl['Q2'] == -3)] + assert len(fourtominusthree) >= 1 + + # check specifically for -21, which is encoded as `b1` + twentytwominustwentyone = tbl[(tbl['Q1'] == 22) & (tbl['Q2'] == -21)] + assert len(twentytwominustwentyone) >= 1 + + assert tbl['TAG'][0] == 41505 + + twentythreeminustwentyone = tbl[(tbl['Q1'] == 23) & (tbl['Q2'] == -21)] + assert len(twentythreeminustwentyone) >= 1 + assert twentythreeminustwentyone['TAG'][0] == -41505 + + +@pytest.mark.remote_data +def test_propanediol(): + tbl1 = CDMS.get_molecule('076513') + assert 'int' in tbl1['Q2'].dtype.name + + tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz, + max_frequency=100.5 * u.GHz, + molecule='076513') + assert isinstance(tbl, Table) + assert len(tbl) >= 1 + assert 'aG\'g-1,2-Propanediol' in tbl['name'] + # check that the parser worked - this will be string or obj otherwise + assert 'int' in tbl['Ku'].dtype.name + assert tbl['MOLWT'][0] == 76 + assert tbl['TAG'][0] == 76513 @pytest.mark.remote_data @@ -66,16 +115,16 @@ def test_molecule_with_parens(): MC = np.ma.core.MaskedConstant() - for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66, - 506, 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', False)): + for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66506, + 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', 66, False)): if val is MC: assert tbl[0][col].mask else: assert tbl[0][col] == val # this test row includes degeneracy = 1225, which covers one of the weird letter-is-number parser cases - for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66, - 506, 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', False),): + for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66506, + 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', 66, False),): if val is MC: assert tbl[16][col].mask else: @@ -121,6 +170,20 @@ def test_retrieve_species_table(): assert 'float' in species_table['lg(Q(1000))'].dtype.name +@pytest.mark.remote_data +def test_remote_all_species(): + tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz, + max_frequency=100.5 * u.GHz, + min_strength=-5) + assert isinstance(tbl, Table) + + AlS_is_in_table = (tbl['name'] == 'AlS').sum() > 0 + Propanediol_is_in_table = (tbl['name'] == "aG'g-1,2-Propanediol").sum() > 0 + + assert AlS_is_in_table + assert Propanediol_is_in_table + + @pytest.mark.bigdata @pytest.mark.remote_data class TestRegressionAllCats: