Skip to content

Commit 5e22ac2

Browse files
authored
Merge pull request #54 from ecotaxa/tests_classif_update
Tests classif update
2 parents a0f96d9 + db28628 commit 5e22ac2

File tree

4 files changed

+171
-12
lines changed

4 files changed

+171
-12
lines changed

QA/py/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ scikit-learn==1.0
5656
#tensorflow-hub==0.12.0
5757
#tensorflow_addons==0.14.0
5858
# Used same place as TF, CNN generation
59-
#pandas==1.3.3
59+
pandas==1.3.3
6060
##lycon==0.2.0 # Conflicts with the version required by TF
6161
#opencv-python-headless==4.5.3.56
6262
#imgaug==0.4.0

QA/py/tests/test_classification.py

Lines changed: 111 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (C) 2015-2020 Picheral, Colin, Irisson (UPMC-CNRS)
44
#
55
import logging
6+
import pytest
67

78
from typing import List
89
from API_models.filters import ProjectFilters, ProjectFiltersDict
@@ -30,6 +31,7 @@ def _prj_query(fastapi, auth, prj_id, **kwargs) -> List[int]:
3031
OBJECT_SET_DELETE_URL = "/object_set/"
3132
OBJECT_SET_SUMMARY_URL = "/object_set/{project_id}/summary?only_total=False"
3233
OBJECT_SET_PARENTS_URL = "/object_set/parents"
34+
OBJECT_QUERY_URL = "/object/{object_id}"
3335

3436
PROJECT_SET_USER_STATS = "/project_set/user_stats?ids={prj_ids}"
3537

@@ -61,15 +63,43 @@ def classify_all(fastapi, obj_ids, classif_id):
6163
assert rsp.status_code == status.HTTP_200_OK
6264

6365

64-
def classify_auto_all(fastapi, obj_ids, classif_id):
66+
def classify_auto_all(fastapi, obj_ids, classif_id, scores=None):
6567
url = OBJECT_SET_CLASSIFY_AUTO_URL
6668
classifications = [classif_id for _obj in obj_ids]
67-
scores = [0.52 for _obj in obj_ids]
69+
if not scores:
70+
scores = [0.52 for _obj in obj_ids]
6871
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
6972
"classifications": classifications,
7073
"scores": scores,
7174
"keep_log": True})
7275
assert rsp.status_code == status.HTTP_200_OK
76+
77+
78+
def classify_auto_incorrect(fastapi, obj_ids):
79+
url = OBJECT_SET_CLASSIFY_AUTO_URL
80+
classifications = [-1 for _obj in obj_ids]
81+
82+
# List of scores of a different length, should raise an error
83+
scores = [0.1 for _obj in obj_ids[:-1]]
84+
with pytest.raises(AssertionError):
85+
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
86+
"classifications": classifications,
87+
"scores": scores,
88+
"keep_log": True})
89+
# List of scores outside [0, 1], should raise an error
90+
scores = [2. for _obj in obj_ids]
91+
with pytest.raises(AssertionError):
92+
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
93+
"classifications": classifications,
94+
"scores": scores,
95+
"keep_log": True})
96+
# List of scores with wrong type, should fail
97+
scores = [None for _obj in obj_ids]
98+
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
99+
"classifications": classifications,
100+
"scores": scores,
101+
"keep_log": True})
102+
assert rsp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
73103

74104

75105
# Note: to go faster in a local dev environment, use "filled_database" instead of "database" below
@@ -179,9 +209,26 @@ def get_object_set_stats():
179209
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
180210
assert rsp.status_code == status.HTTP_200_OK
181211

212+
# Incorrect ML results
213+
classify_auto_incorrect(fastapi, obj_ids[:4])
214+
182215
# Super ML result, 4 first objects are crustacea
183216
classify_auto_all(fastapi, obj_ids[:4], crustacea)
184217

218+
assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
219+
'nb_predicted': 4,
220+
'nb_unclassified': 4,
221+
'nb_validated': 0,
222+
'projid': prj_id,
223+
'used_taxa': [-1, crustacea]}
224+
225+
# New ML results with a different score for the second object
226+
classify_auto_all(fastapi, [obj_ids[1]], crustacea, [0.8])
227+
url = OBJECT_QUERY_URL.format(object_id=obj_ids[1])
228+
rsp = fastapi.get(url, headers=ADMIN_AUTH)
229+
assert rsp.status_code == status.HTTP_200_OK
230+
assert rsp.json()['classif_auto_score'] == 0.8
231+
185232
assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
186233
'nb_predicted': 4,
187234
'nb_unclassified': 4,
@@ -198,16 +245,44 @@ def get_object_set_stats():
198245
'nb_unclassified': 0,
199246
'nb_validated': 8,
200247
'projid': prj_id,
201-
'used_taxa': [25828]} # No more Unclassified and Copepod is in +
248+
'used_taxa': [copepod_id]} # No more Unclassified and Copepod is in +
202249

203250
# No history yet as the object was just created
204251
classif = classif_history(fastapi, obj_ids[0])
205252
assert len(classif) == 1
206253
assert classif[0]['classif_date'] is not None # e.g. 2021-09-12T09:28:03.278626
207254
classif[0]['classif_date'] = "now"
208255
assert classif == [
209-
{'objid': obj_ids[0], 'classif_id': 12846, 'classif_date': 'now', 'classif_who': None,
256+
{'objid': obj_ids[0], 'classif_id': crustacea, 'classif_date': 'now', 'classif_who': None,
210257
'classif_type': 'A', 'classif_qual': 'P', 'classif_score': 0.52, 'user_name': None, 'taxon_name': 'Crustacea'}]
258+
259+
# Revert on validated objects
260+
url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="")
261+
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
262+
assert rsp.status_code == status.HTTP_200_OK
263+
stats = rsp.json()
264+
265+
assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
266+
'nb_predicted': 4,
267+
'nb_unclassified': 4,
268+
'nb_validated': 0,
269+
'projid': prj_id,
270+
'used_taxa': [-1, crustacea]}
271+
272+
# Second revert, should not change since the last record in history is the same
273+
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
274+
assert rsp.status_code == status.HTTP_200_OK
275+
stats = rsp.json()
276+
277+
assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
278+
'nb_predicted': 4,
279+
'nb_unclassified': 4,
280+
'nb_validated': 0,
281+
'projid': prj_id,
282+
'used_taxa': [-1, crustacea]}
283+
284+
# Apply validation again after revert
285+
classify_all(fastapi, obj_ids, copepod_id)
211286

212287
# Not a copepod :(
213288
classify_all(fastapi, obj_ids, entomobryomorpha_id)
@@ -228,7 +303,7 @@ def classify_all_no_change(classif_id):
228303
classif2[0]['classif_date'] = 'hopefully just now'
229304
classif2[1]['classif_date'] = 'a bit before'
230305
assert classif2 == [{'classif_date': 'hopefully just now',
231-
'classif_id': 25828,
306+
'classif_id': copepod_id,
232307
'classif_qual': 'V',
233308
'classif_score': None,
234309
'classif_type': 'M',
@@ -237,7 +312,7 @@ def classify_all_no_change(classif_id):
237312
'taxon_name': 'Copepoda',
238313
'user_name': 'Application Administrator'},
239314
{'classif_date': 'a bit before',
240-
'classif_id': 12846,
315+
'classif_id': crustacea,
241316
'classif_qual': 'P',
242317
'classif_score': 0.52,
243318
'classif_type': 'A',
@@ -261,9 +336,35 @@ def classify_all_no_change(classif_id):
261336
'nb_unclassified': 0,
262337
'nb_validated': 8,
263338
'projid': prj_id,
264-
'used_taxa': [
265-
25835]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha
266-
339+
'used_taxa':
340+
[entomobryomorpha_id]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha
341+
342+
# Reset to predicted on validated objects
343+
url = OBJECT_SET_RESET_PREDICTED_URL.format(project_id=prj_id)
344+
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
345+
assert rsp.status_code == status.HTTP_200_OK
346+
stats = rsp.json()
347+
348+
assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
349+
'nb_predicted': 8,
350+
'nb_unclassified': 0,
351+
'nb_validated': 0,
352+
'projid': prj_id,
353+
'used_taxa': [entomobryomorpha_id]}
354+
355+
# Revert after reset to predicted
356+
url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="")
357+
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
358+
assert rsp.status_code == status.HTTP_200_OK
359+
stats = rsp.json()
360+
361+
assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
362+
'nb_predicted': 0,
363+
'nb_unclassified': 0,
364+
'nb_validated': 8,
365+
'projid': prj_id,
366+
'used_taxa': [entomobryomorpha_id]}
367+
267368
# Delete some object via API, why not?
268369
rsp = fastapi.delete(OBJECT_SET_DELETE_URL, headers=ADMIN_AUTH, json=obj_ids[:4])
269370
assert rsp.status_code == status.HTTP_200_OK
@@ -285,7 +386,7 @@ def classify_all_no_change(classif_id):
285386
ref_stats = [{"projid": prj_id,
286387
"annotators": [{"id": 1,
287388
"name": "Application Administrator"}],
288-
"activities": [{"id": 1, "nb_actions": 8,
389+
"activities": [{"id": 1, "nb_actions": 12,
289390
"last_annot": "2022-05-12T14:21:15"}]}]
290391
# Fix the date on both sides
291392
ref_stats[0]["activities"][0]["last_annot"] = "FIXED DATE"

QA/py/tests/test_prediction.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
import logging
2+
import pytest
3+
import pandas as pd
4+
import numpy as np
25

36
from starlette import status
47

5-
from tests.credentials import ADMIN_AUTH
8+
from tests.credentials import ADMIN_AUTH, CREATOR_AUTH
69
from tests.test_jobs import get_job_and_wait_until_ok
10+
from tests.test_classification import _prj_query
11+
12+
from BO.Prediction import DeepFeatures
13+
14+
from API_operations.CRUD.ObjectParents import SamplesService
715

816
OBJECT_SET_PREDICT_URL = "/object_set/predict"
917

@@ -37,3 +45,51 @@ def no_test_basic_prediction(config, database, fastapi, caplog):
3745
assert rsp.status_code == status.HTTP_200_OK
3846

3947
job_id = get_job_and_wait_until_ok(fastapi, rsp)
48+
49+
50+
def test_prediction_functions(config, database, fastapi, caplog):
51+
caplog.set_level(logging.ERROR)
52+
from tests.test_import import test_import
53+
prj_id = test_import(config, database, caplog, "Test Prediction")
54+
55+
obj_ids = _prj_query(fastapi, CREATOR_AUTH, prj_id)
56+
assert len(obj_ids) == 8
57+
58+
# Prepare fake CNN features to insert
59+
features = list()
60+
for i, oi in enumerate(obj_ids):
61+
features.append([(i+1) * .1] * 50)
62+
features_df = pd.DataFrame(features, index=obj_ids)
63+
64+
# Test features insertion
65+
with SamplesService() as sce:
66+
n_inserts = DeepFeatures.save(sce.session, features_df)
67+
assert n_inserts == 8
68+
sce.session.commit()
69+
70+
# Test features retrieval
71+
with SamplesService() as sce:
72+
ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids)
73+
assert (ret == np.array(features, dtype='float32')).all()
74+
75+
# Test find_missing without any missing features
76+
with SamplesService() as sce:
77+
ret = DeepFeatures.find_missing(sce.session, prj_id)
78+
assert ret == {}
79+
80+
# Test deletion
81+
with SamplesService() as sce:
82+
n_deletes = DeepFeatures.delete_all(sce.session, prj_id)
83+
assert n_deletes == 8
84+
sce.session.commit()
85+
86+
# Test find_missing after deletion
87+
with SamplesService() as sce:
88+
ret = DeepFeatures.find_missing(sce.session, prj_id)
89+
assert len(ret) == 8
90+
91+
# Test features retrieval in empty table, should raise an error
92+
with SamplesService() as sce:
93+
with pytest.raises(AssertionError):
94+
ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids)
95+

py/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,6 +1571,8 @@ def classify_auto_object_set(req: ClassifyAutoReq = Body(...),
15711571
"""
15721572
assert len(req.target_ids) == len(req.classifications) == len(req.scores), \
15731573
"Need the same number of objects, classifications and scores"
1574+
assert all(isinstance(score, float) and 0 <= score <= 1 for score in req.scores), \
1575+
"Scores should be floats between 0 and 1"
15741576
with ObjectManager() as sce:
15751577
with RightsThrower():
15761578
ret, prj_id, changes = sce.classify_auto_set(current_user, req.target_ids, req.classifications, req.scores,

0 commit comments

Comments
 (0)