Skip to content

Commit 678bf0b

Browse files
committed
Convert job ID to UUID
In some CI runs it was observed that unexpected results were being returned for middleware jobs. This commit converts our job ids from being monotonically incrementing integer to proper uuid so that the job id that client is trying to track is guaranteed to uniquely identify it regardless of which HA node is being connected to. This commit also has benefit of making it much harder to guess the job id when using public download endpoints for job results.
1 parent 7d0fed2 commit 678bf0b

File tree

10 files changed

+25
-25
lines changed

10 files changed

+25
-25
lines changed

src/middlewared/middlewared/apps/file_app.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
)
1414
from middlewared.service_exception import CallError
1515
from truenas_api_client import json
16+
from uuid import UUID
1617

1718
__all__ = ("FileApplication",)
1819

@@ -47,12 +48,16 @@ async def _cleanup_job(self, job_id):
4748

4849
async def download(self, request):
4950
path = request.path.split("/")
50-
if not request.path[-1].isdigit():
51+
try:
52+
UUID(path[-1])
53+
except ValueError:
54+
self.middleware.logger.error('XXX: failed to parse %s', request.path, exc_info=True)
55+
# The job id should be a valid UUID
5156
resp = web.Response()
5257
resp.set_status(404)
5358
return resp
5459

55-
job_id = int(path[-1])
60+
job_id = path[-1]
5661

5762
qs = parse_qs(request.query_string)
5863
denied = False

src/middlewared/middlewared/job.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import asyncio
22
import contextlib
3-
from collections import OrderedDict
43
import copy
54
import enum
65
import errno
@@ -17,6 +16,7 @@
1716
from middlewared.pipe import Pipes
1817
from middlewared.utils.privilege import credential_is_limited_to_own_jobs, credential_has_full_admin
1918
from middlewared.utils.time_utils import utc_now
19+
from uuid import uuid4
2020

2121

2222
logger = logging.getLogger(__name__)
@@ -225,8 +225,7 @@ class JobsDeque:
225225

226226
def __init__(self, maxlen=1000):
227227
self.maxlen = maxlen
228-
self.count = 0
229-
self.__dict = OrderedDict()
228+
self.__dict = {}
230229
with contextlib.suppress(FileNotFoundError):
231230
shutil.rmtree(LOGS_DIR)
232231

@@ -244,7 +243,6 @@ def _get_next_id(self):
244243
return self.count
245244

246245
def add(self, job):
247-
job.set_id(self._get_next_id())
248246
if len(self.__dict) > self.maxlen:
249247
for old_job_id, old_job in self.__dict.items():
250248
if old_job.state in (State.SUCCESS, State.FAILED, State.ABORTED):
@@ -291,7 +289,7 @@ def __init__(self, middleware, method_name, serviceobj, method, args, options, p
291289
self.app = app
292290
self.audit_callback = audit_callback
293291

294-
self.id = None
292+
self.id = str(uuid4())
295293
self.lock = None
296294
self.result = None
297295
self.error = None
@@ -377,9 +375,6 @@ def get_lock_name(self):
377375
errno.EINVAL)
378376
return lock_name
379377

380-
def set_id(self, id_):
381-
self.id = id_
382-
383378
def set_result(self, result):
384379
self.result = result
385380

src/middlewared/middlewared/plugins/rdma/rdma.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,5 @@ async def capable_protocols(self):
133133
if is_ent and 'MINI' not in await self.middleware.call('truenas.get_chassis_hardware'):
134134
if await self.middleware.call('rdma.get_link_choices', True):
135135
result.extend([RDMAprotocols.NFS.value, RDMAprotocols.ISER.value])
136+
136137
return result

src/middlewared/middlewared/service/core_service.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def __job_by_credential_and_id(self, credential, job_id, access):
106106
@filterable
107107
@filterable_returns(Dict(
108108
'job',
109-
Int('id'),
109+
Str('id'),
110110
Str('method'),
111111
List('arguments'),
112112
Bool('transient'),
@@ -175,7 +175,7 @@ def get_jobs(self, app, filters, options):
175175
return jobs
176176

177177
@no_authz_required
178-
@accepts(Int('id'), Str('filename'), Bool('buffered', default=False))
178+
@accepts(Str('id'), Str('filename'), Bool('buffered', default=False))
179179
@pass_app(rest=True)
180180
async def job_download_logs(self, app, id_, filename, buffered):
181181
"""
@@ -192,15 +192,15 @@ async def job_download_logs(self, app, id_, filename, buffered):
192192
return (await self._download(app, 'filesystem.get', [job.logs_path], filename, buffered))[1]
193193

194194
@no_authz_required
195-
@accepts(Int('id'))
195+
@accepts(Str('id'))
196196
@job()
197197
async def job_wait(self, job, id_):
198198
target_job = self.__job_by_credential_and_id(job.credentials, id_, JobAccess.READ)
199199

200200
return await job.wrap(target_job)
201201

202202
@private
203-
@accepts(Int('id'), Dict(
203+
@accepts(Str('id'), Dict(
204204
'job-update',
205205
Dict('progress', additional_attrs=True),
206206
))
@@ -234,7 +234,7 @@ def notify_postinit(self):
234234
self.middleware._setup_periodic_tasks()
235235

236236
@no_authz_required
237-
@accepts(Int('id'))
237+
@accepts(Str('id'))
238238
@pass_app(rest=True)
239239
def job_abort(self, app, id_):
240240
job = self._job_by_app_and_id(app, id_, JobAccess.ABORT)

tests/api2/test_006_pool_and_sysds.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_002_create_permanent_zpool(request, ws_client):
118118
try:
119119
sysdataset_update = ws_client.call('core.get_jobs', [
120120
['method', '=', 'systemdataset.update']
121-
], {'order_by': ['-id'], 'get': True})
121+
], {'order_by': ['-time_started'], 'get': True})
122122
except Exception:
123123
fail('Failed to get status of systemdataset update')
124124

tests/api2/test_011_user.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ def test_037_move_homedir_to_new_directory(request):
472472
)
473473

474474
filters = [['method', '=', 'user.do_home_copy']]
475-
opts = {'get': True, 'order_by': ['-id']}
475+
opts = {'get': True, 'order_by': ['-time_started']}
476476
move_job_timeout = 300 # 5 mins
477477
move_job1 = call('core.get_jobs', filters, opts)
478478
assert move_job1
@@ -518,7 +518,7 @@ def test_038_change_homedir_to_existing_path(request):
518518
{'home': new_home}
519519
)
520520
filters = [['method', '=', 'user.do_home_copy']]
521-
opts = {'get': True, 'order_by': ['-id']}
521+
opts = {'get': True, 'order_by': ['-time_started']}
522522
move_job_timeout = 300 # 5 mins
523523
home_move_job = call('core.get_jobs', filters, opts)
524524
rv = wait_on_job(home_move_job['id'], move_job_timeout)

tests/api2/test_040_ad_user_group_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def do_ad_connection(request):
2323
cache_fill_job = call(
2424
'core.get_jobs',
2525
[['method', '=', 'directoryservices.cache.refresh_impl']],
26-
{'order_by': ['-id'], 'get': True}
26+
{'order_by': ['-time_started'], 'get': True}
2727
)
2828
if cache_fill_job['state'] == 'RUNNING':
2929
call('core.job_wait', cache_fill_job['id'], job=True)

tests/api2/test_110_certificate.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ def test_certificate():
4141
certificate_id = results["certificate"]["id"]
4242

4343
# successful delete
44-
results = call("certificate.delete", certificate_id, True)
45-
job_id = int(results)
44+
job_id = call("certificate.delete", certificate_id, True)
4645

4746
# failed delete
4847
while True:

tests/api2/test_cloud_backup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ def test_snapshot(s3_credential):
428428
def test_script_shebang(cloud_backup_task, expected):
429429
ssh(f"touch /mnt/{cloud_backup_task.local_dataset}/blob")
430430
run_task(cloud_backup_task.task)
431-
job = call("core.get_jobs", [["method", "=", "cloud_backup.sync"]], {"order_by": ["-id"], "get": True})
431+
job = call("core.get_jobs", [["method", "=", "cloud_backup.sync"]], {"order_by": ["-time_started"], "get": True})
432432
assert job["logs_excerpt"].strip().split("\n")[-2] == expected
433433

434434

@@ -454,5 +454,5 @@ def test_pre_script_failure(cloud_backup_task, error, expected):
454454

455455
assert ve.value.error == error
456456

457-
job = call("core.get_jobs", [["method", "=", "cloud_backup.sync"]], {"order_by": ["-id"], "get": True})
457+
job = call("core.get_jobs", [["method", "=", "cloud_backup.sync"]], {"order_by": ["-time_started"], "get": True})
458458
assert job["logs_excerpt"].strip() == expected

tests/api2/test_cloud_sync_script.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_pre_script_failure():
1414

1515
assert ve.value.error == "[EFAULT] Pre-script failed with exit code 123"
1616

17-
job = call("core.get_jobs", [["method", "=", "cloudsync.sync"]], {"order_by": ["-id"], "get": True})
17+
job = call("core.get_jobs", [["method", "=", "cloudsync.sync"]], {"order_by": ["-time_started"], "get": True})
1818
assert job["logs_excerpt"] == "[Pre-script] Custom error\n"
1919

2020

@@ -59,5 +59,5 @@ def test_script_shebang():
5959
}) as task:
6060
run_task(task)
6161

62-
job = call("core.get_jobs", [["method", "=", "cloudsync.sync"]], {"order_by": ["-id"], "get": True})
62+
job = call("core.get_jobs", [["method", "=", "cloudsync.sync"]], {"order_by": ["-time_started"], "get": True})
6363
assert job["logs_excerpt"].endswith("[Post-script] TestTest\n")

0 commit comments

Comments
 (0)