From 7383528c412719631f8da8bd8b42ac47e5012571 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Wed, 9 Apr 2025 16:39:08 +0200 Subject: [PATCH 1/3] maint: UCD cleanup upstream in SIMBAD --- astroquery/simbad/core.py | 68 +++++++++++++------------- astroquery/simbad/tests/test_simbad.py | 6 ++- docs/simbad/query_tap.rst | 32 ++++++------ 3 files changed, 54 insertions(+), 52 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index e74cae49cf..63abc402f3 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -242,18 +242,18 @@ def list_votable_fields(self): >>> options = Simbad.list_votable_fields() # doctest: +REMOTE_DATA >>> # to print only the available bundles of columns >>> options[options["type"] == "bundle of basic columns"][["name", "description"]] # doctest: +REMOTE_DATA - - name description - object object - ------------- ------------------------------------------------------- - coordinates all fields related with coordinates - dim major and minor axis, angle and inclination - dimensions all fields related to object dimensions - morphtype all fields related to the morphological type - parallax all fields related to parallaxes - propermotions all fields related with the proper motions - sp all fields related with the spectral type - velocity all fields related with radial velocity and redshift +
+ name description + object object + ------------- ---------------------------------------------------- + coordinates all fields related with coordinates + dim major and minor axis, angle and inclination + dimensions all fields related to object dimensions + morphtype all fields related to the morphological type + parallax all fields related to parallaxes + propermotions all fields related with the proper motions + sp all fields related with the spectral type + velocity all fields related with radial velocity and redshift """ # get the tables with a simple link to basic query_tables = """SELECT DISTINCT table_name AS name, tables.description @@ -1276,37 +1276,37 @@ def list_columns(self, *tables: str, keyword=None, get_query_payload=False): >>> from astroquery.simbad import Simbad >>> Simbad.list_columns("ids", "ident") # doctest: +REMOTE_DATA
- table_name column_name datatype ... unit ucd - object object object ... object object - ---------- ----------- -------- ... ------ ------- - ident id VARCHAR ... meta.id - ident oidref BIGINT ... - ids ids VARCHAR ... meta.id - ids oidref BIGINT ... + table_name column_name datatype ... unit ucd + object object object ... object object + ---------- ----------- -------- ... ------ ----------- + ident id VARCHAR ... meta.id + ident oidref BIGINT ... meta.record + ids ids VARCHAR ... meta.id + ids oidref BIGINT ... meta.record >>> from astroquery.simbad import Simbad >>> Simbad.list_columns(keyword="filter") # doctest: +REMOTE_DATA
- table_name column_name datatype ... unit ucd - object object object ... object object - ----------- ----------- ----------- ... ------ ---------------------- - filter description UNICODECHAR ... meta.note;instr.filter - filter filtername VARCHAR ... instr.filter - filter unit VARCHAR ... meta.unit - flux filter VARCHAR ... instr.filter - mesDiameter filter CHAR ... instr.filter + table_name column_name datatype ... unit ucd + object object object ... object object + ----------- ----------- ----------- ... ------ --------------------------- + filter description UNICODECHAR ... meta.note;instr.filter + filter filtername VARCHAR ... instr.bandpass;instr.filter + filter unit VARCHAR ... meta.unit + flux filter VARCHAR ... instr.bandpass;instr.filter + mesDiameter filter CHAR ... instr.bandpass;instr.filter >>> from astroquery.simbad import Simbad >>> Simbad.list_columns("basic", keyword="object") # doctest: +REMOTE_DATA
- table_name column_name datatype ... unit ucd - object object object ... object object - ---------- ----------- -------- ... ------ ------------------- - basic main_id VARCHAR ... meta.id;meta.main - basic otype_txt VARCHAR ... src.class - basic oid BIGINT ... meta.record;meta.id - basic otype VARCHAR ... src.class + table_name column_name datatype ... unit ucd + object object object ... object object + ---------- ----------- -------- ... ------ ----------------- + basic main_id VARCHAR ... meta.id;meta.main + basic otype_txt VARCHAR ... src.class + basic oid BIGINT ... meta.record + basic otype VARCHAR ... src.class """ query = ("SELECT table_name, column_name, datatype, description, unit, ucd" " FROM TAP_SCHEMA.columns" diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index d8a6ad4d7d..3917bbef16 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -563,14 +563,16 @@ def test_query_tap_errors(): @pytest.mark.usefixtures("_mock_simbad_class") def test_query_tap_cache_call(monkeypatch): msg = "called_cached_query_tap" - monkeypatch.setattr(simbad.core, "_cached_query_tap", lambda tap, query, maxrec: msg) + monkeypatch.setattr(simbad.core, "_cached_query_tap", + lambda tap, query, maxrec, async_job: msg) assert simbad.Simbad.query_tap("select top 1 * from basic") == msg @pytest.mark.usefixtures("_mock_simbad_class") def test_empty_response_warns(monkeypatch): # return something of length zero - monkeypatch.setattr(simbad.core.Simbad, "query_tap", lambda _, get_query_payload, maxrec: []) + monkeypatch.setattr(simbad.core.Simbad, "query_tap", + lambda _, get_query_payload, maxrec, async_job: []) msg = ("The request executed correctly, but there was no data corresponding to these" " criteria in SIMBAD") with pytest.warns(NoResultsWarning, match=msg): diff --git a/docs/simbad/query_tap.rst b/docs/simbad/query_tap.rst index 79f303a3d1..fad87aedb6 100644 --- a/docs/simbad/query_tap.rst +++ b/docs/simbad/query_tap.rst @@ -139,22 +139,22 @@ some tables, add their name. To get the columns of the tables ``ref`` and ``bibl >>> from astroquery.simbad import Simbad >>> Simbad.list_columns("ref", "biblio")
- table_name column_name datatype ... unit ucd - object object object ... object object - ---------- ----------- ----------- ... ------ -------------------- - biblio biblio VARCHAR ... meta.record;meta.bib - biblio oidref BIGINT ... meta.record;meta.id - ref "year" SMALLINT ... meta.note;meta.bib - ref abstract UNICODECHAR ... meta.record - ref bibcode CHAR ... meta.bib.bibcode - ref doi VARCHAR ... meta.code;meta.bib - ref journal VARCHAR ... meta.bib.journal - ref last_page INTEGER ... meta.bib.page - ref nbobject INTEGER ... meta.number - ref oidbib BIGINT ... meta.record;meta.bib - ref page INTEGER ... meta.bib.page - ref title UNICODECHAR ... meta.title - ref volume INTEGER ... meta.bib.volume + table_name column_name datatype ... unit ucd + object object object ... object object + ---------- ----------- ----------- ... ------ ----------------- + biblio biblio VARCHAR ... meta.bib.bibcode + biblio oidref BIGINT ... meta.record + ref "year" SMALLINT ... time.publiYear + ref abstract UNICODECHAR ... meta.record + ref bibcode CHAR ... meta.bib.bibcode + ref doi VARCHAR ... meta.ref.doi + ref journal VARCHAR ... meta.bib.journal + ref last_page INTEGER ... meta.bib.page + ref nbobject INTEGER ... meta.id;arith.sum + ref oidbib BIGINT ... meta.record + ref page INTEGER ... meta.bib.page + ref title UNICODECHAR ... meta.title + ref volume INTEGER ... meta.bib.volume `~astroquery.simbad.SimbadClass.list_columns` can also be called with a keyword argument. This returns columns from any table for witch the given keyword is either in the table name, From 2881639300e1b12435cb05e44c47248032a2808d Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Mon, 28 Apr 2025 11:57:33 +0200 Subject: [PATCH 2/3] feat: add async_job option --- CHANGES.rst | 7 + astroquery/simbad/__init__.py | 7 +- astroquery/simbad/core.py | 149 ++++++++++++++---- astroquery/simbad/tests/test_simbad.py | 24 ++- astroquery/simbad/tests/test_simbad_remote.py | 6 + docs/simbad/simbad.rst | 24 +++ 6 files changed, 185 insertions(+), 32 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index fb1948305d..9b4f2ef3ed 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -44,6 +44,13 @@ imcce - Changing RuntimeError to NoResultsWarning when an empty result is returned. [#3307] +SIMBAD +^^^^^^ + +- add ``async_job`` option in all query methods. This executes the query in asynchronous + mode. It provides slower to start, but more robust queries for which the timeout can + be increased (with the ``timeout`` property or with the configuration file) [#3305] + utils.tap ^^^^^^^^^ diff --git a/astroquery/simbad/__init__.py b/astroquery/simbad/__init__.py index 55c09db211..b918f230ff 100644 --- a/astroquery/simbad/__init__.py +++ b/astroquery/simbad/__init__.py @@ -22,8 +22,11 @@ class Conf(_config.ConfigNamespace): 'Name of the SIMBAD mirror to use.') timeout = _config.ConfigItem( - 60, - 'Time limit for connecting to Simbad server.') + 1080, + # this is the default value in SIMBAD's main mirror + # https://simbad.cds.unistra.fr/simbad/sim-tap/capabilities + "Time limit for the execution of asynchronous queries, " + "in seconds.") row_limit = _config.ConfigItem( # defaults to the maximum limit diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 63abc402f3..4020e5c12a 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -50,7 +50,7 @@ def _adql_parameter(entry: str): @lru_cache(256) -def _cached_query_tap(tap, query: str, *, maxrec=10000): +def _cached_query_tap(tap, query: str, *, maxrec=10000, async_job=False, timeout=None): """Cache version of query TAP. This private function is called when query_tap is executed without an @@ -67,12 +67,22 @@ def _cached_query_tap(tap, query: str, *, maxrec=10000): Astronomical Data Query Language (ADQL). maxrec : int, optional The number of records to be returned. Its maximum value is 2000000. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. + timeout: int, optional + The execution duration for the asynchronous query. If 'async_job' is true, then + this has to be provided. Returns ------- `~astropy.table.Table` The response returned by SIMBAD. """ + if async_job: + return tap.run_async(query, maxrec=maxrec, execution_duration=timeout).to_table() return tap.search(query, maxrec=maxrec).to_table() @@ -103,7 +113,7 @@ class SimbadClass(BaseVOQuery): """ SIMBAD_URL = 'https://' + conf.server + '/simbad/sim-script' - def __init__(self, ROW_LIMIT=None): + def __init__(self, ROW_LIMIT=None, *, timeout=None): super().__init__() # to create the TAPService self._server = conf.server @@ -115,6 +125,7 @@ def __init__(self, ROW_LIMIT=None): self.joins = [] # a list of _Join self.criteria = [] # a list of strings self.ROW_LIMIT = ROW_LIMIT + self.timeout = timeout @property def ROW_LIMIT(self): @@ -131,6 +142,28 @@ def ROW_LIMIT(self, ROW_LIMIT): "maximum capability, 0 to retrieve an empty table, " "or a positive integer.") + @property + def timeout(self): + """The execution time for asynchronous queries. + + Returns + ------- + int + The execution time before the query times out, in seconds. + """ + return self._timeout + + @timeout.setter + def timeout(self, timeout): + if timeout is None: + self._timeout = conf.timeout + elif timeout <= self.tap.capabilities[0].executionduration.hard: + self._timeout = timeout + else: + raise ValueError( + "'timeout' cannot exceed the maximum time duration set by this mirror: " + f"{self.tap.capabilities[0].executionduration.hard} seconds.") + @property def server(self): """The SIMBAD mirror to use.""" @@ -539,8 +572,8 @@ def get_field_description(self, field_name): @deprecated_renamed_argument(["verbose"], new_name=[None], since=['0.4.8'], relax=True) - def query_object(self, object_name, *, wildcard=False, - criteria=None, get_query_payload=False, verbose=False): + def query_object(self, object_name, *, wildcard=False, criteria=None, + get_query_payload=False, async_job=False, verbose=False): """Query SIMBAD for the given object. Object names may also be specified with wildcards. See examples below. @@ -560,6 +593,11 @@ def query_object(self, object_name, *, wildcard=False, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. Returns ------- @@ -616,12 +654,13 @@ def query_object(self, object_name, *, wildcard=False, instance_criteria.append(f"({criteria})") return self._query(top, columns, joins, instance_criteria, - get_query_payload=get_query_payload) + get_query_payload=get_query_payload, async_job=async_job) @deprecated_renamed_argument(["verbose", "cache"], new_name=[None, None], since=['0.4.8', '0.4.8'], relax=True) def query_objects(self, object_names, *, wildcard=False, criteria=None, - get_query_payload=False, verbose=False, cache=False): + get_query_payload=False, async_job=False, verbose=False, + cache=False): """Query SIMBAD for the specified list of objects. Object names may be specified with wildcards. @@ -643,6 +682,11 @@ def query_objects(self, object_names, *, wildcard=False, criteria=None, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. cache : Deprecated since 0.4.8. The cache is now automatically emptied at the end of the python session. It can also be emptied manually with `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. @@ -681,7 +725,7 @@ def query_objects(self, object_names, *, wildcard=False, criteria=None, instance_criteria += [f'({" OR ".join(list_criteria)})'] return self._query(top, columns, joins, instance_criteria, - get_query_payload=get_query_payload) + get_query_payload=get_query_payload, async_job=async_job) # There is a faster way to do the query if there is no wildcard: the first table # can be the uploaded one and we use a LEFT JOIN for the other ones. @@ -707,7 +751,7 @@ def query_objects(self, object_names, *, wildcard=False, criteria=None, new_name=[None]*3, since=['0.4.8']*3, relax=True) def query_region(self, coordinates, radius=2*u.arcmin, *, - criteria=None, get_query_payload=False, + criteria=None, get_query_payload=False, async_job=False, equinox=None, epoch=None, cache=None): """Query SIMBAD in a cone around the specified coordinates. @@ -724,6 +768,11 @@ def query_region(self, coordinates, radius=2*u.arcmin, *, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. cache : Deprecated since 0.4.8. The cache is now automatically emptied at the end of the python session. It can also be emptied manually with `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. @@ -820,13 +869,13 @@ def query_region(self, coordinates, radius=2*u.arcmin, *, "('ICRS', centers.ra, centers.dec, centers.radius)) = 1 ") return self._query(top, columns, joins, instance_criteria, - from_table=f"{sub_query}, basic", + from_table=f"{sub_query}, basic", async_job=async_job, get_query_payload=get_query_payload, centers=upload_centers) @deprecated_renamed_argument(["verbose", "cache"], new_name=[None, None], since=['0.4.8', '0.4.8'], relax=True) def query_catalog(self, catalog, *, criteria=None, get_query_payload=False, - verbose=False, cache=True): + async_job=False, verbose=False, cache=True): """Query a whole catalog. Parameters @@ -840,6 +889,11 @@ def query_catalog(self, catalog, *, criteria=None, get_query_payload=False, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. cache : Deprecated since 0.4.8. The cache is now automatically emptied at the end of the python session. It can also be emptied manually with `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. @@ -883,11 +937,11 @@ def query_catalog(self, catalog, *, criteria=None, get_query_payload=False, instance_criteria.append(f"({criteria})") return self._query(top, columns, joins, instance_criteria, - get_query_payload=get_query_payload) + get_query_payload=get_query_payload, async_job=async_job) def query_hierarchy(self, name, hierarchy, *, detailed_hierarchy=True, - criteria=None, get_query_payload=False): + criteria=None, get_query_payload=False, async_job=False): """Query either the parents or the children of the object. Parameters @@ -914,6 +968,11 @@ def query_hierarchy(self, name, hierarchy, *, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. Returns ------- @@ -964,12 +1023,12 @@ def query_hierarchy(self, name, hierarchy, *, return self._query(top, columns, joins, instance_criteria, from_table=f"{sub_query}, basic", distinct=True, - get_query_payload=get_query_payload) + get_query_payload=get_query_payload, async_job=async_job) @deprecated_renamed_argument(["verbose"], new_name=[None], since=['0.4.8'], relax=True) def query_bibobj(self, bibcode, *, criteria=None, - get_query_payload=False, + get_query_payload=False, async_job=False, verbose=False): """Query all the objects mentioned in an article. @@ -981,6 +1040,11 @@ def query_bibobj(self, bibcode, *, criteria=None, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. Returns ------- @@ -1000,13 +1064,13 @@ def query_bibobj(self, bibcode, *, criteria=None, instance_criteria.append(f"({criteria})") return self._query(top, columns, joins, instance_criteria, - get_query_payload=get_query_payload) + get_query_payload=get_query_payload, async_job=async_job) @deprecated_renamed_argument(["verbose", "cache"], new_name=[None, None], since=['0.4.8', '0.4.8'], relax=True) def query_bibcode(self, bibcode, *, wildcard=False, - abstract=False, get_query_payload=False, criteria=None, - verbose=None, cache=None, ): + abstract=False, criteria=None, get_query_payload=False, + async_job=False, verbose=None, cache=None, ): """Query the references corresponding to a given bibcode. Wildcards may be used to specify bibcodes. @@ -1028,6 +1092,11 @@ def query_bibcode(self, bibcode, *, wildcard=False, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. cache : Deprecated since 0.4.8. The cache is now automatically emptied at the end of the python session. It can also be emptied manually with `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. @@ -1073,12 +1142,13 @@ def query_bibcode(self, bibcode, *, wildcard=False, query += " ORDER BY bibcode" - return self.query_tap(query, get_query_payload=get_query_payload) + return self.query_tap(query, get_query_payload=get_query_payload, + async_job=async_job) @deprecated_renamed_argument(["verbose", "cache"], new_name=[None, None], since=['0.4.8', '0.4.8'], relax=True) - def query_objectids(self, object_name, *, verbose=None, cache=None, - get_query_payload=False, criteria=None): + def query_objectids(self, object_name, *, criteria=None, get_query_payload=False, + async_job=False, verbose=None, cache=None): """Query SIMBAD with an object name. This returns a table of all names associated with that object. @@ -1095,6 +1165,11 @@ def query_objectids(self, object_name, *, verbose=None, cache=None, When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. cache : Deprecated since 0.4.8. The cache is now automatically emptied at the end of the python session. It can also be emptied manually with `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. @@ -1137,7 +1212,8 @@ def query_objectids(self, object_name, *, verbose=None, cache=None, f"WHERE id_typed.id = '{_adql_parameter(object_name)}'") if criteria is not None: query += f" AND {criteria}" - return self.query_tap(query, get_query_payload=get_query_payload) + return self.query_tap(query, get_query_payload=get_query_payload, + async_job=async_job) @deprecated(since="v0.4.8", message=("'query_criteria' is deprecated. It uses the former sim-script " @@ -1365,7 +1441,8 @@ def list_linked_tables(self, table: str, *, get_query_payload=False): f" OR (target_table = '{_adql_parameter(table)}')") return self.query_tap(query, get_query_payload=get_query_payload) - def query_tap(self, query: str, *, maxrec=10000, get_query_payload=False, **uploads): + def query_tap(self, query: str, *, maxrec=10000, async_job=False, + get_query_payload=False, **uploads): """Query SIMBAD TAP service. Parameters @@ -1380,10 +1457,14 @@ def query_tap(self, query: str, *, maxrec=10000, get_query_payload=False, **uplo Any number of local tables to be used in the *query*. In the *query*, these tables are referred as *TAP_UPLOAD.table_alias* where *TAP_UPLOAD* is imposed and *table_alias* is the keyword name you chose. The maximum number of lines for the uploaded tables is 200000. - get_query_payload : bool, optional - When set to `True` the method returns the HTTP request parameters without - querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. Defaults to `False`. + get_query_payload : bool, default=False + When set to ``True`` the method returns the HTTP request parameters without + querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Returns ------- @@ -1462,9 +1543,14 @@ def query_tap(self, query: str, *, maxrec=10000, get_query_payload=False, **uplo return dict(TAPQuery(self.SIMBAD_URL, query, maxrec=maxrec, uploads=uploads)) # without uploads we call the version with cache if uploads == {}: - return _cached_query_tap(self.tap, query, maxrec=maxrec) + return _cached_query_tap(self.tap, query, maxrec=maxrec, + async_job=async_job, timeout=self.timeout) # with uploads it has to be without cache - return self.tap.run_async(query, maxrec=maxrec, uploads=uploads).to_table() + if async_job: + return self.tap.run_async(query, maxrec=maxrec, + execution_duration=self.timeout, + uploads=uploads).to_table() + return self.tap.run_sync(query, maxrec=maxrec, uploads=uploads).to_table() @staticmethod def clear_cache(): @@ -1481,7 +1567,7 @@ def _get_query_parameters(self): return tuple(map(copy.deepcopy, (self.ROW_LIMIT, self.columns_in_output, self.joins, self.criteria))) def _query(self, top, columns, joins, criteria, from_table="basic", distinct=False, - get_query_payload=False, **uploads): + async_job=False, get_query_payload=False, **uploads): """Generate an ADQL string from the given query parameters and executes the query. Parameters @@ -1503,6 +1589,11 @@ def _query(self, top, columns, joins, criteria, from_table="basic", distinct=Fal When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. Defaults to `False`. + async_job: bool, optional + When set to `True`, the query will be executed in asynchronous mode. This is + better for very long queries, as it prevents transient failures to abort the + query execution. + Defaults to `False`. uploads : `~astropy.table.Table` Any number of local tables to be used in the *query*. In the *query*, these tables are referred as *TAP_UPLOAD.table_alias* where *TAP_UPLOAD* is imposed and *table_alias* @@ -1550,7 +1641,7 @@ def _query(self, top, columns, joins, criteria, from_table="basic", distinct=Fal query = f"SELECT{distinct_results}{top_part}{columns} FROM {from_table}{join}{criteria}" response = self.query_tap(query, get_query_payload=get_query_payload, - maxrec=self.hardlimit, + maxrec=self.hardlimit, async_job=async_job, **uploads) if len(response) == 0 and top != 0: diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index 3917bbef16..9a458cf52f 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -8,9 +8,11 @@ import astropy.units as u from astropy.utils.exceptions import AstropyDeprecationWarning from pyvo.dal.tap import TAPService +from pyvo.io.vosi import tapregext import pytest +from .. import conf from ... import simbad from .test_simbad_remote import multicoords from astroquery.exceptions import NoResultsWarning @@ -155,6 +157,26 @@ def test_mocked_simbad(): # and the uploadlimit assert simbad_instance.uploadlimit == 200000 + +def test_simbad_timeout(monkeypatch): + simbad_instance = simbad.Simbad() + assert simbad_instance.timeout == conf.timeout # default value + + class PatchedCapability: + @property + def executionduration(self): + time_limit = tapregext.TimeLimits() + time_limit.hard = 2000 + return time_limit + + monkeypatch.setattr(TAPService, "capabilities", [PatchedCapability()]) + # good value + simbad_instance.timeout = 10 + assert simbad_instance.timeout == 10 + # too high + with pytest.raises(ValueError, match="'timeout' cannot exceed*"): + simbad_instance.timeout = 10000 + # ---------------------------- # Test output options settings # ---------------------------- @@ -564,7 +586,7 @@ def test_query_tap_errors(): def test_query_tap_cache_call(monkeypatch): msg = "called_cached_query_tap" monkeypatch.setattr(simbad.core, "_cached_query_tap", - lambda tap, query, maxrec, async_job: msg) + lambda tap, query, maxrec, async_job, timeout: msg) assert simbad.Simbad.query_tap("select top 1 * from basic") == msg diff --git a/astroquery/simbad/tests/test_simbad_remote.py b/astroquery/simbad/tests/test_simbad_remote.py index 20efa02f19..4c40e8ca84 100644 --- a/astroquery/simbad/tests/test_simbad_remote.py +++ b/astroquery/simbad/tests/test_simbad_remote.py @@ -168,6 +168,12 @@ def test_query_tap(self): Simbad.clear_cache() assert _cached_query_tap.cache_info().currsize == 0 + def test_async_query(self): + adql = "select top 1 main_id from basic" + sync_job = Simbad.query_tap(adql) + async_job = Simbad.query_tap(adql, async_job=True) + assert sync_job["main_id"] == async_job["main_id"] + def test_empty_response_warns(self): with pytest.warns(NoResultsWarning, match="The request executed correctly, but *"): # a catalog that does not exists should return an empty response diff --git a/docs/simbad/simbad.rst b/docs/simbad/simbad.rst index 80054d7123..b68764b327 100644 --- a/docs/simbad/simbad.rst +++ b/docs/simbad/simbad.rst @@ -829,6 +829,30 @@ Query TAP Troubleshooting =============== +Longer queries +-------------- + +It can be useful to execute longer queries in asynchronous mode by setting the +``async_job`` argument to ``True``. This may take longer to start, depending on the +current number of other people using the asynchronous SIMBAD queue, but it is more +robust against transient errors. Asynchronous queries will take the ``timeout`` property +in account: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> simbad = Simbad(timeout=2000) # in seconds + >>> simbad.query_tap("select otype, description from otypedef where otype = 'N*'", + ... async_job=True) +
+ otype description + object object + ------ ------------ + N* Neutron Star + +Clearing the cache +------------------ + If you are repeatedly getting failed queries, or bad/out-of-date results, try clearing your cache: From f1ee46b7d173d17fb6f0e544df8f822c8ebcc695 Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Wed, 30 Apr 2025 11:11:15 +0200 Subject: [PATCH 3/3] docs: clean deprecated 'cache' parameter out of docstrings --- astroquery/simbad/core.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 4020e5c12a..0c03893915 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -687,9 +687,6 @@ def query_objects(self, object_names, *, wildcard=False, criteria=None, better for very long queries, as it prevents transient failures to abort the query execution. Defaults to `False`. - cache : Deprecated since 0.4.8. The cache is now automatically emptied at the - end of the python session. It can also be emptied manually with - `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. Returns ------- @@ -773,9 +770,6 @@ def query_region(self, coordinates, radius=2*u.arcmin, *, better for very long queries, as it prevents transient failures to abort the query execution. Defaults to `False`. - cache : Deprecated since 0.4.8. The cache is now automatically emptied at the - end of the python session. It can also be emptied manually with - `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. Returns ------- @@ -894,9 +888,6 @@ def query_catalog(self, catalog, *, criteria=None, get_query_payload=False, better for very long queries, as it prevents transient failures to abort the query execution. Defaults to `False`. - cache : Deprecated since 0.4.8. The cache is now automatically emptied at the - end of the python session. It can also be emptied manually with - `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. Returns ------- @@ -1097,9 +1088,6 @@ def query_bibcode(self, bibcode, *, wildcard=False, better for very long queries, as it prevents transient failures to abort the query execution. Defaults to `False`. - cache : Deprecated since 0.4.8. The cache is now automatically emptied at the - end of the python session. It can also be emptied manually with - `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. Returns ------- @@ -1170,9 +1158,6 @@ def query_objectids(self, object_name, *, criteria=None, get_query_payload=False better for very long queries, as it prevents transient failures to abort the query execution. Defaults to `False`. - cache : Deprecated since 0.4.8. The cache is now automatically emptied at the - end of the python session. It can also be emptied manually with - `~astroquery.simbad.SimbadClass.clear_cache` but cannot be deactivated. Returns -------