Skip to content

Commit deb043f

Browse files
authored
Jobs pagination (geopython#1779)
* Add pagination for job list Adds limit and offset parameter to `get_jobs`. Process manager `get_jobs` now also returns the number of matched jobs additionally to the jobs themselves so we can calculate whether we need a next link. Note that this is a breaking change. * Add pagination support to jobs UI This works exactly the same way as for itemtypes * Add note regarding job sorting * Formatting fixes
1 parent 0677c2e commit deb043f

File tree

8 files changed

+215
-18
lines changed

8 files changed

+215
-18
lines changed

pygeoapi/api/processes.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import json
4747
import logging
4848
from typing import Tuple
49+
import urllib.parse
4950

5051
from pygeoapi import l10n
5152
from pygeoapi.util import (
@@ -240,17 +241,59 @@ def get_jobs(api: API, request: APIRequest,
240241

241242
headers = request.get_response_headers(SYSTEM_LOCALE,
242243
**api.api_headers)
244+
LOGGER.debug('Processing limit parameter')
245+
try:
246+
limit = int(request.params.get('limit'))
247+
248+
if limit <= 0:
249+
msg = 'limit value should be strictly positive'
250+
return api.get_exception(
251+
HTTPStatus.BAD_REQUEST, headers, request.format,
252+
'InvalidParameterValue', msg)
253+
except TypeError:
254+
limit = int(api.config['server']['limit'])
255+
LOGGER.debug('returning all jobs')
256+
except ValueError:
257+
msg = 'limit value should be an integer'
258+
return api.get_exception(
259+
HTTPStatus.BAD_REQUEST, headers, request.format,
260+
'InvalidParameterValue', msg)
261+
262+
LOGGER.debug('Processing offset parameter')
263+
try:
264+
offset = int(request.params.get('offset'))
265+
if offset < 0:
266+
msg = 'offset value should be positive or zero'
267+
return api.get_exception(
268+
HTTPStatus.BAD_REQUEST, headers, request.format,
269+
'InvalidParameterValue', msg)
270+
except TypeError as err:
271+
LOGGER.warning(err)
272+
offset = 0
273+
except ValueError:
274+
msg = 'offset value should be an integer'
275+
return api.get_exception(
276+
HTTPStatus.BAD_REQUEST, headers, request.format,
277+
'InvalidParameterValue', msg)
278+
243279
if job_id is None:
244-
jobs = sorted(api.manager.get_jobs(),
280+
jobs_data = api.manager.get_jobs(limit=limit, offset=offset)
281+
# TODO: For pagination to work, the provider has to do the sorting.
282+
# Here we do sort again in case the provider doesn't support
283+
# pagination yet and always returns all jobs.
284+
jobs = sorted(jobs_data['jobs'],
245285
key=lambda k: k['job_start_datetime'],
246286
reverse=True)
287+
numberMatched = jobs_data['numberMatched']
288+
247289
else:
248290
try:
249291
jobs = [api.manager.get_job(job_id)]
250292
except JobNotFoundError:
251293
return api.get_exception(
252294
HTTPStatus.NOT_FOUND, headers, request.format,
253295
'InvalidParameterValue', job_id)
296+
numberMatched = 1
254297

255298
serialized_jobs = {
256299
'jobs': [],
@@ -309,6 +352,44 @@ def get_jobs(api: API, request: APIRequest,
309352

310353
serialized_jobs['jobs'].append(job2)
311354

355+
serialized_query_params = ''
356+
for k, v in request.params.items():
357+
if k not in ('f', 'offset'):
358+
serialized_query_params += '&'
359+
serialized_query_params += urllib.parse.quote(k, safe='')
360+
serialized_query_params += '='
361+
serialized_query_params += urllib.parse.quote(str(v), safe=',')
362+
363+
uri = f'{api.base_url}/jobs'
364+
365+
if offset > 0:
366+
prev = max(0, offset - limit)
367+
serialized_jobs['links'].append(
368+
{
369+
'href': f'{uri}?offset={prev}{serialized_query_params}',
370+
'type': FORMAT_TYPES[F_JSON],
371+
'rel': 'prev',
372+
'title': l10n.translate('Items (prev)', request.locale),
373+
})
374+
375+
next_link = False
376+
377+
if numberMatched > (limit + offset):
378+
next_link = True
379+
elif len(jobs) == limit:
380+
next_link = True
381+
382+
if next_link:
383+
next_ = offset + limit
384+
next_href = f'{uri}?offset={next_}{serialized_query_params}'
385+
serialized_jobs['links'].append(
386+
{
387+
'href': next_href,
388+
'rel': 'next',
389+
'type': FORMAT_TYPES[F_JSON],
390+
'title': l10n.translate('Items (next)', request.locale),
391+
})
392+
312393
if job_id is None:
313394
j2_template = 'jobs/index.html'
314395
else:
@@ -318,6 +399,7 @@ def get_jobs(api: API, request: APIRequest,
318399
if request.format == F_HTML:
319400
data = {
320401
'jobs': serialized_jobs,
402+
'offset': offset,
321403
'now': datetime.now(timezone.utc).strftime(DATETIME_FORMAT)
322404
}
323405
response = render_j2_template(api.tpl_config, j2_template, data,

pygeoapi/process/manager/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,21 @@ def get_processor(self, process_id: str) -> BaseProcessor:
108108
else:
109109
return load_plugin('process', process_conf['processor'])
110110

111-
def get_jobs(self, status: JobStatus = None) -> list:
111+
def get_jobs(self,
112+
status: JobStatus = None,
113+
limit: Optional[int] = None,
114+
offset: Optional[int] = None
115+
) -> dict:
112116
"""
113117
Get process jobs, optionally filtered by status
114118
115119
:param status: job status (accepted, running, successful,
116120
failed, results) (default is all)
121+
:param limit: number of jobs to return
122+
:param offset: pagination offset
117123
118-
:returns: `list` of jobs (identifier, status, process identifier)
124+
:returns: dict of list of jobs (identifier, status, process identifier)
125+
and numberMatched
119126
"""
120127

121128
raise NotImplementedError()

pygeoapi/process/manager/dummy.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,21 @@ def __init__(self, manager_def: dict):
5656

5757
super().__init__(manager_def)
5858

59-
def get_jobs(self, status: JobStatus = None) -> list:
59+
def get_jobs(self, status: JobStatus = None, limit=None, offset=None
60+
) -> dict:
6061
"""
6162
Get process jobs, optionally filtered by status
6263
6364
:param status: job status (accepted, running, successful,
6465
failed, results) (default is all)
66+
:param limit: number of jobs to return
67+
:param offset: pagination offset
6568
66-
:returns: `list` of jobs (identifier, status, process identifier)
69+
:returns: dict of list of jobs (identifier, status, process identifier)
70+
and numberMatched
6771
"""
6872

69-
return []
73+
return {'jobs': [], 'numberMatched': 0}
7074

7175
def execute_process(
7276
self,

pygeoapi/process/manager/mongodb_.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def destroy(self):
7070
exc_info=(traceback))
7171
return False
7272

73-
def get_jobs(self, status=None):
73+
def get_jobs(self, status=None, limit=None, offset=None):
7474
try:
7575
self._connect()
7676
database = self.db.job_manager_pygeoapi
@@ -80,7 +80,10 @@ def get_jobs(self, status=None):
8080
else:
8181
jobs = list(collection.find({}))
8282
LOGGER.info("JOBMANAGER - MongoDB jobs queried")
83-
return jobs
83+
return {
84+
'jobs': jobs,
85+
'numberMatched': len(jobs)
86+
}
8487
except Exception:
8588
LOGGER.error("JOBMANAGER - get_jobs error",
8689
exc_info=(traceback))

pygeoapi/process/manager/postgresql.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,18 @@ def __init__(self, manager_def: dict):
116116
LOGGER.error(f'{msg}: {err}')
117117
raise ProcessorGenericError(msg)
118118

119-
def get_jobs(self, status: JobStatus = None) -> list:
119+
def get_jobs(self, status: JobStatus = None, limit=None, offset=None
120+
) -> dict:
120121
"""
121122
Get jobs
122123
123124
:param status: job status (accepted, running, successful,
124125
failed, results) (default is all)
126+
:param limit: number of jobs to return
127+
:param offset: pagination offset
125128
126-
:returns: 'list` of jobs (type (default='process'), identifier,
127-
status, process_id, job_start_datetime, job_end_datetime, location,
128-
mimetype, message, progress)
129+
:returns: dict of list of jobs (identifier, status, process identifier)
130+
and numberMatched
129131
"""
130132

131133
LOGGER.debug('Querying for jobs')
@@ -135,7 +137,11 @@ def get_jobs(self, status: JobStatus = None) -> list:
135137
column = getattr(self.table_model, 'status')
136138
results = results.filter(column == status.value)
137139

138-
return [r.__dict__ for r in results.all()]
140+
jobs = [r.__dict__ for r in results.all()]
141+
return {
142+
'jobs': jobs,
143+
'numberMatched': len(jobs)
144+
}
139145

140146
def add_job(self, job_metadata: dict) -> str:
141147
"""

pygeoapi/process/manager/tinydb_.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,35 @@ def destroy(self) -> bool:
8282

8383
return True
8484

85-
def get_jobs(self, status: JobStatus = None) -> list:
85+
def get_jobs(self, status: JobStatus = None, limit=None, offset=None
86+
) -> dict:
8687
"""
8788
Get jobs
8889
8990
:param status: job status (accepted, running, successful,
9091
failed, results) (default is all)
92+
:param limit: number of jobs to return
93+
:param offset: pagination offset
9194
92-
:returns: 'list` of jobs (identifier, status, process identifier)
95+
:returns: dict of list of jobs (identifier, status, process identifier)
96+
and numberMatched
9397
"""
9498

9599
with self._db() as db:
96100
jobs_list = db.all()
97101

98-
return jobs_list
102+
number_matched = len(jobs_list)
103+
104+
if offset:
105+
jobs_list = jobs_list[offset:]
106+
107+
if limit:
108+
jobs_list = jobs_list[:limit]
109+
110+
return {
111+
'jobs': jobs_list,
112+
'numberMatched': number_matched
113+
}
99114

100115
def add_job(self, job_metadata: dict) -> str:
101116
"""

pygeoapi/templates/jobs/index.html

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,38 @@
4848
</table>
4949
</div>
5050
</div>
51+
<div class="row">
52+
<div class="col-sm-12">
53+
{% trans %}Limit{% endtrans %}:
54+
<select id="limits">
55+
<option value="{{ config['server']['limit'] }}">{{ config['server']['limit'] }} ({% trans %}default{% endtrans %})</option>
56+
<option value="100">100</option>
57+
<option value="1000">1,000</option>
58+
<option value="2000">2,000</option>
59+
</select>
60+
<script>
61+
var select = document.getElementById('limits');
62+
var defaultValue = select.getElementsByTagName('option')[0].value;
63+
let params = (new URL(document.location)).searchParams;
64+
select.value = params.get('limit') || defaultValue;
65+
select.addEventListener('change', ev => {
66+
var limit = ev.target.value;
67+
document.location.search = `limit=${limit}`;
68+
});
69+
</script>
70+
</div>
71+
</div>
72+
<div class="row">
73+
<div class="col-sm-12">
74+
{% for link in data['jobs']['links'] %}
75+
{% if link['rel'] == 'prev' and data['offset'] > 0 %}
76+
<a role="button" href="{{ link['href'] }}">{% trans %}Prev{% endtrans %}</a>
77+
{% elif link['rel'] == 'next' and data['jobs']['jobs'] %}
78+
<a role="button" href="{{ link['href'] }}">{% trans %}Next{% endtrans %}</a>
79+
{% endif %}
80+
{% endfor %}
81+
</div>
82+
</div>
83+
5184
</section>
5285
{% endblock %}

tests/api/test_processes.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
from pygeoapi.api import FORMAT_TYPES, F_HTML, F_JSON
4141
from pygeoapi.api.processes import (
42-
describe_processes, execute_process, delete_job, get_job_result,
42+
describe_processes, execute_process, delete_job, get_job_result, get_jobs
4343
)
4444

4545
from tests.util import mock_api_request
@@ -442,4 +442,51 @@ def test_get_job_result(api_):
442442
)
443443
assert code == HTTPStatus.OK
444444
assert rsp_headers['Content-Type'] == 'application/json'
445-
assert json.loads(response)['value'] == "Hello Sync Test!"
445+
assert json.loads(response)['value'] == 'Hello Sync Test!'
446+
447+
448+
def test_get_jobs_single(api_):
449+
job_id = _execute_a_job(api_)
450+
headers, code, response = get_jobs(api_, mock_api_request(), job_id=job_id)
451+
assert code == HTTPStatus.OK
452+
453+
job = json.loads(response)
454+
assert job['jobID'] == job_id
455+
assert job['status'] == 'successful'
456+
457+
458+
def test_get_jobs_pagination(api_):
459+
# generate test jobs for querying
460+
for _ in range(11):
461+
_execute_a_job(api_)
462+
463+
# test default pagination limit
464+
headers, code, response = get_jobs(api_, mock_api_request(), job_id=None)
465+
job_response = json.loads(response)
466+
assert len(job_response['jobs']) == 10
467+
assert next(
468+
link for link in job_response['links'] if link['rel'] == 'next'
469+
)['href'].endswith('/jobs?offset=10')
470+
471+
headers, code, response = get_jobs(
472+
api_,
473+
mock_api_request({'limit': 10, 'offset': 9}),
474+
job_id=None)
475+
job_response_offset = json.loads(response)
476+
# check to get 1 same job id with an offset of 9 and limit of 10
477+
same_job_ids = {job['jobID'] for job in job_response['jobs']}.intersection(
478+
{job['jobID'] for job in job_response_offset['jobs']}
479+
)
480+
assert len(same_job_ids) == 1
481+
assert next(
482+
link for link in job_response_offset['links'] if link['rel'] == 'prev'
483+
)['href'].endswith('/jobs?offset=0&limit=10')
484+
485+
# test custom limit
486+
headers, code, response = get_jobs(
487+
api_,
488+
mock_api_request({'limit': 20}),
489+
job_id=None)
490+
job_response = json.loads(response)
491+
# might be more than 11 due to test interaction
492+
assert len(job_response['jobs']) > 10

0 commit comments

Comments
 (0)