Merge pull request #54 from ecotaxa/tests_classif_update

grololo06 · web-flow · commit 5e22ac25bbc5 · 2023-02-19T07:16:22.000+01:00
Tests classif update
diff --git a/QA/py/requirements.txt b/QA/py/requirements.txt
@@ -56,7 +56,7 @@ scikit-learn==1.0
 #tensorflow-hub==0.12.0
 #tensorflow_addons==0.14.0
 # Used same place as TF, CNN generation
-#pandas==1.3.3
+pandas==1.3.3
 ##lycon==0.2.0 # Conflicts with the version required by TF
 #opencv-python-headless==4.5.3.56
 #imgaug==0.4.0
diff --git a/QA/py/tests/test_classification.py b/QA/py/tests/test_classification.py
@@ -3,6 +3,7 @@
 # Copyright (C) 2015-2020  Picheral, Colin, Irisson (UPMC-CNRS)
 #
 import logging
+import pytest
 
 from typing import List
 from API_models.filters import ProjectFilters, ProjectFiltersDict
@@ -30,6 +31,7 @@ def _prj_query(fastapi, auth, prj_id, **kwargs) -> List[int]:
 OBJECT_SET_DELETE_URL = "/object_set/"
 OBJECT_SET_SUMMARY_URL = "/object_set/{project_id}/summary?only_total=False"
 OBJECT_SET_PARENTS_URL = "/object_set/parents"
+OBJECT_QUERY_URL = "/object/{object_id}"
 
 PROJECT_SET_USER_STATS = "/project_set/user_stats?ids={prj_ids}"
 
@@ -61,15 +63,43 @@ def classify_all(fastapi, obj_ids, classif_id):
     assert rsp.status_code == status.HTTP_200_OK
 
 
-def classify_auto_all(fastapi, obj_ids, classif_id):
+def classify_auto_all(fastapi, obj_ids, classif_id, scores=None):
     url = OBJECT_SET_CLASSIFY_AUTO_URL
     classifications = [classif_id for _obj in obj_ids]
-    scores = [0.52 for _obj in obj_ids]
+    if not scores:
+        scores = [0.52 for _obj in obj_ids]
     rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
                                                       "classifications": classifications,
                                                       "scores": scores,
                                                       "keep_log": True})
     assert rsp.status_code == status.HTTP_200_OK
+    
+    
+def classify_auto_incorrect(fastapi, obj_ids):
+    url = OBJECT_SET_CLASSIFY_AUTO_URL
+    classifications = [-1 for _obj in obj_ids]
+    
+    # List of scores of a different length, should raise an error
+    scores = [0.1 for _obj in obj_ids[:-1]]
+    with pytest.raises(AssertionError):
+        rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
+                                                          "classifications": classifications,
+                                                          "scores": scores,
+                                                          "keep_log": True})
+    # List of scores outside [0, 1], should raise an error
+    scores = [2. for _obj in obj_ids]
+    with pytest.raises(AssertionError):
+        rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
+                                                          "classifications": classifications,
+                                                          "scores": scores,
+                                                          "keep_log": True})
+    # List of scores with wrong type, should fail
+    scores = [None for _obj in obj_ids]
+    rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
+                                                      "classifications": classifications,
+                                                      "scores": scores,
+                                                      "keep_log": True})
+    assert rsp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
 
 
 # Note: to go faster in a local dev environment, use "filled_database" instead of "database" below
@@ -179,9 +209,26 @@ def get_object_set_stats():
     rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
     assert rsp.status_code == status.HTTP_200_OK
 
+    # Incorrect ML results
+    classify_auto_incorrect(fastapi, obj_ids[:4])
+    
     # Super ML result, 4 first objects are crustacea
     classify_auto_all(fastapi, obj_ids[:4], crustacea)
 
+    assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
+                                          'nb_predicted': 4,
+                                          'nb_unclassified': 4,
+                                          'nb_validated': 0,
+                                          'projid': prj_id,
+                                          'used_taxa': [-1, crustacea]}
+    
+    # New ML results with a different score for the second object
+    classify_auto_all(fastapi, [obj_ids[1]], crustacea, [0.8])
+    url = OBJECT_QUERY_URL.format(object_id=obj_ids[1])
+    rsp = fastapi.get(url, headers=ADMIN_AUTH)
+    assert rsp.status_code == status.HTTP_200_OK
+    assert rsp.json()['classif_auto_score'] == 0.8
+    
     assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
                                           'nb_predicted': 4,
                                           'nb_unclassified': 4,
@@ -198,16 +245,44 @@ def get_object_set_stats():
                                           'nb_unclassified': 0,
                                           'nb_validated': 8,
                                           'projid': prj_id,
-                                          'used_taxa': [25828]}  # No more Unclassified and Copepod is in +
+                                          'used_taxa': [copepod_id]}  # No more Unclassified and Copepod is in +
 
     # No history yet as the object was just created
     classif = classif_history(fastapi, obj_ids[0])
     assert len(classif) == 1
     assert classif[0]['classif_date'] is not None  # e.g. 2021-09-12T09:28:03.278626
     classif[0]['classif_date'] = "now"
     assert classif == [
-        {'objid': obj_ids[0], 'classif_id': 12846, 'classif_date': 'now', 'classif_who': None,
+        {'objid': obj_ids[0], 'classif_id': crustacea, 'classif_date': 'now', 'classif_who': None,
          'classif_type': 'A', 'classif_qual': 'P', 'classif_score': 0.52, 'user_name': None, 'taxon_name': 'Crustacea'}]
+    
+    # Revert on validated objects
+    url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="")
+    rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
+    assert rsp.status_code == status.HTTP_200_OK
+    stats = rsp.json()
+    
+    assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
+                                          'nb_predicted': 4,
+                                          'nb_unclassified': 4,
+                                          'nb_validated': 0,
+                                          'projid': prj_id,
+                                          'used_taxa': [-1, crustacea]}
+    
+    # Second revert, should not change since the last record in history is the same
+    rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
+    assert rsp.status_code == status.HTTP_200_OK
+    stats = rsp.json()
+    
+    assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
+                                          'nb_predicted': 4,
+                                          'nb_unclassified': 4,
+                                          'nb_validated': 0,
+                                          'projid': prj_id,
+                                          'used_taxa': [-1, crustacea]}
+    
+    # Apply validation again after revert
+    classify_all(fastapi, obj_ids, copepod_id)
 
     # Not a copepod :(
     classify_all(fastapi, obj_ids, entomobryomorpha_id)
@@ -228,7 +303,7 @@ def classify_all_no_change(classif_id):
     classif2[0]['classif_date'] = 'hopefully just now'
     classif2[1]['classif_date'] = 'a bit before'
     assert classif2 == [{'classif_date': 'hopefully just now',
-                         'classif_id': 25828,
+                         'classif_id': copepod_id,
                          'classif_qual': 'V',
                          'classif_score': None,
                          'classif_type': 'M',
@@ -237,7 +312,7 @@ def classify_all_no_change(classif_id):
                          'taxon_name': 'Copepoda',
                          'user_name': 'Application Administrator'},
                         {'classif_date': 'a bit before',
-                         'classif_id': 12846,
+                         'classif_id': crustacea,
                          'classif_qual': 'P',
                          'classif_score': 0.52,
                          'classif_type': 'A',
@@ -261,9 +336,35 @@ def classify_all_no_change(classif_id):
                            'nb_unclassified': 0,
                            'nb_validated': 8,
                            'projid': prj_id,
-                           'used_taxa': [
-                               25835]}]  # <- copepod is gone, unclassified as well, replaced with entomobryomorpha
-
+                           'used_taxa': 
+                           [entomobryomorpha_id]}]  # <- copepod is gone, unclassified as well, replaced with entomobryomorpha
+    
+    # Reset to predicted on validated objects
+    url = OBJECT_SET_RESET_PREDICTED_URL.format(project_id=prj_id)
+    rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
+    assert rsp.status_code == status.HTTP_200_OK
+    stats = rsp.json()
+    
+    assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
+                                          'nb_predicted': 8,
+                                          'nb_unclassified': 0,
+                                          'nb_validated': 0,
+                                          'projid': prj_id,
+                                          'used_taxa': [entomobryomorpha_id]}
+    
+    # Revert after reset to predicted
+    url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="")
+    rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
+    assert rsp.status_code == status.HTTP_200_OK
+    stats = rsp.json()
+    
+    assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
+                                          'nb_predicted': 0,
+                                          'nb_unclassified': 0,
+                                          'nb_validated': 8,
+                                          'projid': prj_id,
+                                          'used_taxa': [entomobryomorpha_id]}
+  
     # Delete some object via API, why not?
     rsp = fastapi.delete(OBJECT_SET_DELETE_URL, headers=ADMIN_AUTH, json=obj_ids[:4])
     assert rsp.status_code == status.HTTP_200_OK
@@ -285,7 +386,7 @@ def classify_all_no_change(classif_id):
     ref_stats = [{"projid": prj_id,
                   "annotators": [{"id": 1,
                                   "name": "Application Administrator"}],
-                  "activities": [{"id": 1, "nb_actions": 8,
+                  "activities": [{"id": 1, "nb_actions": 12,
                                   "last_annot": "2022-05-12T14:21:15"}]}]
     # Fix the date on both sides
     ref_stats[0]["activities"][0]["last_annot"] = "FIXED DATE"
diff --git a/QA/py/tests/test_prediction.py b/QA/py/tests/test_prediction.py
@@ -1,9 +1,17 @@
 import logging
+import pytest
+import pandas as pd
+import numpy as np
 
 from starlette import status
 
-from tests.credentials import ADMIN_AUTH
+from tests.credentials import ADMIN_AUTH, CREATOR_AUTH
 from tests.test_jobs import get_job_and_wait_until_ok
+from tests.test_classification import _prj_query
+
+from BO.Prediction import DeepFeatures
+
+from API_operations.CRUD.ObjectParents import SamplesService
 
 OBJECT_SET_PREDICT_URL = "/object_set/predict"
 
@@ -37,3 +45,51 @@ def no_test_basic_prediction(config, database, fastapi, caplog):
     assert rsp.status_code == status.HTTP_200_OK
 
     job_id = get_job_and_wait_until_ok(fastapi, rsp)
+    
+    
+def test_prediction_functions(config, database, fastapi, caplog):
+    caplog.set_level(logging.ERROR)
+    from tests.test_import import test_import
+    prj_id = test_import(config, database, caplog, "Test Prediction")
+
+    obj_ids = _prj_query(fastapi, CREATOR_AUTH, prj_id)
+    assert len(obj_ids) == 8
+    
+    # Prepare fake CNN features to insert
+    features = list()
+    for i, oi in enumerate(obj_ids):
+        features.append([(i+1) * .1] * 50)
+    features_df = pd.DataFrame(features, index=obj_ids)
+    
+    # Test features insertion
+    with SamplesService() as sce:
+        n_inserts = DeepFeatures.save(sce.session, features_df)
+        assert n_inserts == 8
+        sce.session.commit()
+    
+    # Test features retrieval
+    with SamplesService() as sce:
+        ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids)
+        assert (ret == np.array(features, dtype='float32')).all()
+    
+    # Test find_missing without any missing features
+    with SamplesService() as sce:
+        ret = DeepFeatures.find_missing(sce.session, prj_id)
+        assert ret == {}
+        
+    # Test deletion
+    with SamplesService() as sce:
+        n_deletes = DeepFeatures.delete_all(sce.session, prj_id)
+        assert n_deletes == 8
+        sce.session.commit()
+    
+    # Test find_missing after deletion
+    with SamplesService() as sce:
+        ret = DeepFeatures.find_missing(sce.session, prj_id)
+        assert len(ret) == 8
+    
+    # Test features retrieval in empty table, should raise an error
+    with SamplesService() as sce:
+        with pytest.raises(AssertionError):
+            ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids)
+
diff --git a/py/main.py b/py/main.py
@@ -1571,6 +1571,8 @@ def classify_auto_object_set(req: ClassifyAutoReq = Body(...),
     """
     assert len(req.target_ids) == len(req.classifications) == len(req.scores), \
         "Need the same number of objects, classifications and scores"
+    assert all(isinstance(score, float) and 0 <= score <= 1 for score in req.scores), \
+        "Scores should be floats between 0 and 1"
     with ObjectManager() as sce:
         with RightsThrower():
             ret, prj_id, changes = sce.classify_auto_set(current_user, req.target_ids, req.classifications, req.scores,