Skip to content

GAIA: update the authentication implementation to read the cookies sent by the new ESAC tap mechanism #3289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ heasarc
no data associated with that row rather than filtering it out. [#3275]


utils.tap
^^^^^^^^^

- Get the cookie associated to the keys JSESSIONID or SESSION due to the tap library release at ESAC. [#3289]


Infrastructure, Utility and Other Changes and Additions
-------------------------------------------------------

Expand Down
30 changes: 23 additions & 7 deletions astroquery/utils/tap/conn/tapconn.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@

import http.client as httplib
import mimetypes
import os
import platform
import requests
import time
import os
from astroquery.utils.tap.xmlparser import utils
from astroquery.utils.tap import taputils
from astroquery import version

import requests
from astroquery import version
from astroquery.utils.tap import taputils
from astroquery.utils.tap.xmlparser import utils

__all__ = ['TapConn']

Expand Down Expand Up @@ -485,6 +485,22 @@ def find_header(self, headers, key):
"""
return taputils.taputil_find_header(headers, key)

def find_all_headers(self, headers, key):
"""Searches for the specified keyword

Parameters
----------
headers : HTTP(s) headers object, mandatory
HTTP(s) response headers
key : str, mandatory
header key to be searched for

Returns
-------
A list of requested header values or an emtpy list if no header is found
"""
return taputils.taputil_find_all_headers(headers, key)

def dump_to_file(self, output, response):
"""Writes the connection response into the specified output

Expand Down Expand Up @@ -585,7 +601,7 @@ def get_file_from_header(self, headers):
if content_disposition is not None:
p = content_disposition.find('filename="')
if p >= 0:
filename = os.path.basename(content_disposition[p+10:len(content_disposition)-1])
filename = os.path.basename(content_disposition[p + 10:len(content_disposition) - 1])
content_encoding = self.find_header(headers, 'Content-Encoding')

if content_encoding is not None:
Expand Down Expand Up @@ -722,7 +738,7 @@ def encode_multipart(self, fields, files):

def __str__(self):
return f"\tHost: {self.__connHost}\n\tUse HTTPS: {self.__isHttps}" \
f"\n\tPort: {self.__connPort}\n\tSSL Port: {self.__connPortSsl}"
f"\n\tPort: {self.__connPort}\n\tSSL Port: {self.__connPortSsl}"


class ConnectionHandler:
Expand Down
3 changes: 3 additions & 0 deletions astroquery/utils/tap/conn/tests/DummyConnHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ def get_file_from_header(self, headers):
def find_header(self, headers, key):
return taputils.taputil_find_header(headers, key)

def find_all_headers(self, headers, key):
return taputils.taputil_find_all_headers(headers, key)

def execute_table_edit(self, data,
content_type="application/x-www-form-urlencoded",
verbose=False):
Expand Down
56 changes: 56 additions & 0 deletions astroquery/utils/tap/conn/tests/test_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,59 @@ def test_login():
assert r.get_method() == 'POST'
assert r.get_context() == context
assert r.get_body() == data


def test_find_header():
host = "testHost"
tap = TapConn(ishttps=False, host=host)

headers = [('Date', 'Sat, 12 Apr 2025 05:10:47 GMT'),
('Server', 'Apache/2.4.6 (Red Hat Enterprise Linux) OpenSSL/1.0.2k-fips mod_jk/1.2.43'),
('Set-Cookie', 'JSESSIONID=E677B51BA5C4837347D1E17D4E36647E; Path=/data-server; Secure; HttpOnly'),
('X-Content-Type-Options', 'nosniff'), ('X-XSS-Protection', '0'),
('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate'), ('Pragma', 'no-cache'),
('Expires', '0'), ('X-Frame-Options', 'SAMEORIGIN'),
('Set-Cookie', 'SESSION=ZjQ3MjIzMDAt; Path=/data-server; Secure; HttpOnly; SameSite=Lax'),
('Transfer-Encoding', 'chunked'), ('Content-Type', 'text/plain; charset=UTF-8')]
key = 'Set-Cookie'
result = tap.find_header(headers, key)

assert (result == "JSESSIONID=E677B51BA5C4837347D1E17D4E36647E; Path=/data-server; Secure; HttpOnly")


def test_find_all_headers():
host = "testHost"
tap = TapConn(ishttps=False, host=host)

headers = [('Date', 'Sat, 12 Apr 2025 05:10:47 GMT'),
('Server', 'Apache/2.4.6 (Red Hat Enterprise Linux) OpenSSL/1.0.2k-fips mod_jk/1.2.43'),
('Set-Cookie', 'JSESSIONID=E677B51BA5C4837347D1E17D4E36647E; Path=/data-server; Secure; HttpOnly'),
('X-Content-Type-Options', 'nosniff'), ('X-XSS-Protection', '0'),
('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate'), ('Pragma', 'no-cache'),
('Expires', '0'), ('X-Frame-Options', 'SAMEORIGIN'),
('Set-Cookie', 'SESSION=ZjQ3MjIzMDAtNjNiYy00Mj; Path=/data-server; Secure; HttpOnly; SameSite=Lax'),
('Transfer-Encoding', 'chunked'), ('Content-Type', 'text/plain; charset=UTF-8')]
key = 'Set-Cookie'
result = tap.find_all_headers(headers, key)

assert (result[0] == "JSESSIONID=E677B51BA5C4837347D1E17D4E36647E; Path=/data-server; Secure; HttpOnly")
assert (result[1] == "SESSION=ZjQ3MjIzMDAtNjNiYy00Mj; Path=/data-server; Secure; HttpOnly; SameSite=Lax")


def test_get_file_from_header():
host = "testHost"
tap = TapConn(ishttps=False, host=host)

headers = [('Date', 'Sat, 12 Apr 2025 05:10:47 GMT'),
('Server', 'Apache/2.4.6 (Red Hat Enterprise Linux) OpenSSL/1.0.2k-fips mod_jk/1.2.43'),
('Set-Cookie', 'JSESSIONID=E677B51BA5C4837347D1E17D4E36647E; Path=/data-server; Secure; HttpOnly'),
('X-Content-Type-Options', 'nosniff'), ('X-XSS-Protection', '0'),
('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate'), ('Pragma', 'no-cache'),
('Expires', '0'), ('X-Frame-Options', 'SAMEORIGIN'),
('Set-Cookie', 'SESSION=ZjQ3MjIzMDAtNjNiYy00Mj; Path=/data-server; Secure; HttpOnly; SameSite=Lax'),
('Transfer-Encoding', 'chunked'), ('Content-Type', 'text/plain; charset=UTF-8'),
('Content-Disposition', 'filename="my_file.vot.gz"'), ('Content-Encoding', "gzip")]

result = tap.get_file_from_header(headers)

assert (result == "my_file.vot.gz")
24 changes: 15 additions & 9 deletions astroquery/utils/tap/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@
"""
import getpass
import os
import tempfile
from urllib.parse import urlencode

import requests
import tempfile
from astropy.table.table import Table
from urllib.parse import urlencode

from astroquery import log
from astroquery.utils.tap import taputils
Expand Down Expand Up @@ -661,16 +660,23 @@ def __extract_sync_subcontext(self, location):
return location[pos:]

def __findCookieInHeader(self, headers, *, verbose=False):
cookies = self.__connHandler.find_header(headers, 'Set-Cookie')
cookies = self.__connHandler.find_all_headers(headers, 'Set-Cookie')
if verbose:
print(cookies)
if cookies is None:
if not cookies:
return None
else:
items = cookies.split(';')
for i in items:
if i.startswith("JSESSIONID="):
return i
for cook in cookies:
items = cook.split(';')
for item in items:
if item.startswith("SESSION="):
return item

for cook in cookies:
items = cook.split(';')
for item in items:
if item.startswith("JSESSIONID="):
return item
return None

def __parseUrl(self, url, *, verbose=False):
Expand Down
22 changes: 22 additions & 0 deletions astroquery/utils/tap/taputils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,28 @@ def taputil_find_header(headers, key):
return None


def taputil_find_all_headers(headers, key):
"""Searches for the specified keyword

Parameters
----------
headers : HTTP(s) headers object, mandatory
HTTP(s) response headers
key : str, mandatory
header key to be searched for

Returns
-------
A list of requested header values or an empty list if not header is found
"""

result = list()
for entry in headers:
if key.lower() == entry[0].lower():
result.append(entry[1])
return result


def taputil_create_sorted_dict_key(dictionaryObject):
"""Searches for the specified keyword

Expand Down
71 changes: 50 additions & 21 deletions astroquery/utils/tap/tests/test_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,21 @@

Created on 30 jun. 2016
"""
import gzip
from pathlib import Path
from unittest.mock import patch
from urllib.parse import quote_plus, urlencode

import gzip
import numpy as np
import pytest
from astropy.table import Table
from requests import HTTPError

from astroquery.utils.tap.model.tapcolumn import TapColumn

from astroquery.utils.tap import taputils
from astroquery.utils.tap.conn.tests.DummyConnHandler import DummyConnHandler
from astroquery.utils.tap.conn.tests.DummyResponse import DummyResponse
from astroquery.utils.tap.core import TapPlus
from astroquery.utils.tap import taputils
from astropy.table import Table
from astroquery.utils.tap.model.tapcolumn import TapColumn


def read_file(filename):
Expand Down Expand Up @@ -115,8 +114,8 @@ def test_load_tables_parameters():


def test_load_table():
connHandler = DummyConnHandler()
tap = TapPlus(url="http://test:1111/tap", connhandler=connHandler)
conn_handler = DummyConnHandler()
tap = TapPlus(url="http://test:1111/tap", connhandler=conn_handler)

# No arguments
with pytest.raises(Exception):
Expand All @@ -128,7 +127,7 @@ def test_load_table():
tableName = "table1"
fullQualifiedTableName = f"{tableSchema}.{tableName}"
tableRequest = f"tables?tables={fullQualifiedTableName}"
connHandler.set_response(tableRequest, responseLoadTable)
conn_handler.set_response(tableRequest, responseLoadTable)

with pytest.raises(Exception):
tap.load_table(fullQualifiedTableName)
Expand Down Expand Up @@ -871,26 +870,26 @@ def test_rename_table():


def __find_table(schemaName, tableName, tables):
qualifiedName = f"{schemaName}.{tableName}"
for table in (tables):
if table.get_qualified_name() == qualifiedName:
qualified_name = f"{schemaName}.{tableName}"
for table in tables:
if table.get_qualified_name() == qualified_name:
return table
# not found: raise exception
pytest.fail(f"Table '{qualifiedName}' not found")
pytest.fail(f"Table '{qualified_name}' not found")


def __find_column(columnName, columns):
for c in (columns):
if c.name == columnName:
def __find_column(column_name, columns):
for c in columns:
if c.name == column_name:
Comment on lines -882 to +883
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for doing all this cleanup, too!

return c
# not found: raise exception
pytest.fail(f"Column '{columnName}' not found")
pytest.fail(f"Column '{column_name}' not found")


def __check_column(column, description, unit, dataType, flag):
def __check_column(column, description, unit, data_type, flag):
assert column.description == description
assert column.unit == unit
assert column.data_type == dataType
assert column.data_type == data_type
assert column.flag == flag


Expand All @@ -906,11 +905,11 @@ def __check_results_column(results, columnName, description, unit,
def test_login(mock_login):
conn_handler = DummyConnHandler()
tap = TapPlus(url="http://test:1111/tap", connhandler=conn_handler)
tap.login("user", "password")
tap.login(user="user", password="password")
assert (mock_login.call_count == 1)
mock_login.side_effect = HTTPError("Login error")
with pytest.raises(HTTPError):
tap.login("user", "password")
tap.login(user="user", password="password")
assert (mock_login.call_count == 2)


Expand All @@ -923,7 +922,7 @@ def test_login_gui(mock_login_gui, mock_login):
assert (mock_login_gui.call_count == 0)
mock_login_gui.side_effect = HTTPError("Login error")
with pytest.raises(HTTPError):
tap.login("user", "password")
tap.login(user="user", password="password")
assert (mock_login.call_count == 1)


Expand Down Expand Up @@ -951,3 +950,33 @@ def test_upload_table():
tap.upload_table(upload_resource=table, table_name=table_name)

assert str(exc_info.value) == f"Table name is not allowed to contain a dot: {table_name}"


def test___findCookieInHeader():
conn_handler = DummyConnHandler()
tap = TapPlus(url="http://test:1111/tap", connhandler=conn_handler)

headers = [('Date', 'Sat, 12 Apr 2025 05:10:47 GMT'),
('Server', 'Apache/2.4.6 (Red Hat Enterprise Linux) OpenSSL/1.0.2k-fips mod_jk/1.2.43'),
('Set-Cookie', 'JSESSIONID=E677B51BA5C4837347D1E17D4E36647E; Path=/data-server; Secure; HttpOnly'),
('X-Content-Type-Options', 'nosniff'), ('X-XSS-Protection', '0'),
('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate'), ('Pragma', 'no-cache'),
('Expires', '0'), ('X-Frame-Options', 'SAMEORIGIN'),
('Set-Cookie', 'SESSION=ZjQ3MjIzMDAtNjNiYy00Mj; Path=/data-server; Secure; HttpOnly; SameSite=Lax'),
('Transfer-Encoding', 'chunked'), ('Content-Type', 'text/plain; charset=UTF-8')]

result = tap._Tap__findCookieInHeader(headers)

assert (result == "SESSION=ZjQ3MjIzMDAtNjNiYy00Mj")

headers = [('Date', 'Sat, 12 Apr 2025 05:10:47 GMT'),
('Server', 'Apache/2.4.6 (Red Hat Enterprise Linux) OpenSSL/1.0.2k-fips mod_jk/1.2.43'),
('Set-Cookie', 'JSESSIONID=E677B51BA5C4837347D1E17D4E36647E; Path=/data-server; Secure; HttpOnly'),
('X-Content-Type-Options', 'nosniff'), ('X-XSS-Protection', '0'),
('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate'), ('Pragma', 'no-cache'),
('Expires', '0'), ('X-Frame-Options', 'SAMEORIGIN'),
('Transfer-Encoding', 'chunked'), ('Content-Type', 'text/plain; charset=UTF-8')]

result = tap._Tap__findCookieInHeader(headers)

assert (result == "JSESSIONID=E677B51BA5C4837347D1E17D4E36647E")
1 change: 1 addition & 0 deletions docs/gaia/gaia.rst
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,7 @@ The following example shows how to retrieve the DataLink products associated wit
... data_release=data_release, retrieval_type=retrieval_type, data_structure=data_structure)

The DataLink products are stored inside a Python Dictionary. Each of its elements (keys) contains a one-element list that can be extracted as follows:

.. code-block:: python

>>> dl_keys = [inp for inp in datalink.keys()]
Expand Down
Loading