gerritjandebruin
diff --git a/‎01-preprocessing-stats.ipynb
Lines changed: 281 additions & 110 deletions b/‎01-preprocessing-stats.ipynb
Lines changed: 281 additions & 110 deletions
diff --git a/‎03-baseline-ship-risk-model.ipynb
Lines changed: 1 addition & 145 deletions b/‎03-baseline-ship-risk-model.ipynb
Lines changed: 1 addition & 145 deletions
diff --git a/‎06-confusion-matrix.ipynb
Lines changed: 15 additions & 15 deletions b/‎06-confusion-matrix.ipynb
Lines changed: 15 additions & 15 deletions
diff --git a/‎Makefile
Lines changed: 6 additions & 1 deletion b/‎Makefile
Lines changed: 6 additions & 1 deletion
diff --git a/‎fig/confusion_matrix.pdf
0 Bytes b/‎fig/confusion_matrix.pdf
0 Bytes
diff --git a/‎fig/network.pdf
134 KB b/‎fig/network.pdf
134 KB
diff --git a/‎run.py
Lines changed: 5 additions & 0 deletions b/‎run.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/get_folds.py
Lines changed: 0 additions & 1 deletion b/‎src/get_folds.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/import_portcalls.py
Lines changed: 1 addition & 1 deletion b/‎src/import_portcalls.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/learn.py
Lines changed: 1 addition & 1 deletion b/‎src/learn.py
Lines changed: 1 addition & 1 deletion
@@ -851,7 +851,7 @@
     "    .assign(Model='Baseline')\n",
     ")\n",
     "display(df)\n",
-    "df.to_pickle('cache/confusion-matrix-baseline.pkl')"
+    "df.to_pickle('models/confusion-matrix-baseline.pkl')"
    ]
   },
   {
@@ -968,150 +968,6 @@
     ")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "76ba2db4-9d93-48cc-82b3-e008debe36cf",
-   "metadata": {},
-   "source": [
-    "# Fairness measures"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3334bf80-57bd-4cbb-bda7-d79cc3ab127e",
-   "metadata": {},
-   "source": [
-    "NOTE SWAPPING! From `y=0` (compliant), `y=1` (deficiency), `y=2` (detention) to `y=False` (non-compliant) and `y=True` (compliant)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "c7e8e0f3-55f3-4169-96b2-0490239e7532",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2022-02-07T11:13:07.774047Z",
-     "iopub.status.busy": "2022-02-07T11:13:07.773930Z",
-     "iopub.status.idle": "2022-02-07T11:13:08.226414Z",
-     "shell.execute_reply": "2022-02-07T11:13:08.225929Z",
-     "shell.execute_reply.started": "2022-02-07T11:13:07.774033Z"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th>group</th>\n",
-       "      <th>sensitive</th>\n",
-       "      <th>non sensitive</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>measure</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>PPR</th>\n",
-       "      <td>0.640</td>\n",
-       "      <td>0.957</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>TPR</th>\n",
-       "      <td>0.676</td>\n",
-       "      <td>0.959</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>FPR</th>\n",
-       "      <td>0.592</td>\n",
-       "      <td>0.952</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "group    sensitive  non sensitive\n",
-       "measure                          \n",
-       "PPR          0.640          0.957\n",
-       "TPR          0.676          0.959\n",
-       "FPR          0.592          0.952"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data = [\n",
-    "    {'measure': 'PPR', 'group': 'sensitive',     'value': (y_score <= 1)[s ].mean()},\n",
-    "    {'measure': 'PPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s].mean()},\n",
-    "    {'measure': 'FPR', 'group': 'sensitive',     'value': (y_score <= 1)[s  & ~y_true].mean()},\n",
-    "    {'measure': 'FPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s & ~y_true].mean()},\n",
-    "    {'measure': 'TPR', 'group': 'sensitive',     'value': (y_score <= 1)[s  &  y_true].mean()},\n",
-    "    {'measure': 'TPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s &  y_true].mean()},\n",
-    "]\n",
-    "data = pd.DataFrame(data).pivot('measure', 'group', 'value')\n",
-    "data.round(3).reindex(['PPR', 'TPR', 'FPR'], axis=0).reindex(['sensitive', 'non sensitive'], axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "6e67eb70-c37a-4ad6-a12b-af8491119ee2",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2022-02-07T11:13:08.227232Z",
-     "iopub.status.busy": "2022-02-07T11:13:08.227116Z",
-     "iopub.status.idle": "2022-02-07T11:13:08.665396Z",
-     "shell.execute_reply": "2022-02-07T11:13:08.664824Z",
-     "shell.execute_reply.started": "2022-02-07T11:13:08.227219Z"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'ε_impact': -0.49599292325481104, 'ε_odds': 0.36028816577626677}"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ε_impact = 1-(data.at['PPR', 'non sensitive'] / data.at['PPR', 'sensitive'])\n",
-    "ε_odds = max(\n",
-    "    [\n",
-    "        abs(data.at['TPR', 'non sensitive'] - data.at['TPR', 'sensitive']),\n",
-    "        abs(data.at['FPR', 'non sensitive'] - data.at['FPR', 'sensitive'])\n",
-    "    ]\n",
-    ")   \n",
-    "{'ε_impact': ε_impact, 'ε_odds': ε_odds}"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
 
@@ -1,4 +1,9 @@
 PHONY: clean
 clean:
 	find . -name '__pycache__' -exec rm -fr {} +
-	find . -name '.ipynb_checkpoints' -exec rm -fr {} +
+	find . -name '.ipynb_checkpoints' -exec rm -fr {} +
+    
+PHONY: install
+install:
+	git clone https://github.com/franktakes/teexgraph.git
+	cd teexgraph/ && git reset --hard 0c4ebef4ee938aa842bf40d1aec8a66d95fd8a82 && make listener
@@ -2,6 +2,7 @@
 
 import json
 import os
+import subprocess
 
 import joblib
 import networkx as nx
@@ -30,6 +31,10 @@
 filepath_performance_folds = 'models/performance_folds.pkl'
 
 def run():
+    src.logger.info('#0 Check installs')
+    if not os.path.isfile('teexgraph/teexgraph'):
+        subprocess.run(['make', 'install'])
+    
     src.logger.info("#1 Import inspections.") 
     if not os.path.isfile(filepath_inspections_cleaned):
         inspections_cleaned = src.import_inspections(filepath_inspections_raw)
 
@@ -5,7 +5,6 @@
 def get_folds(X: pd.DataFrame, y: pd.Series, s: pd.Series, 
               random_state: int=42) -> tuple[list, list]:
     """Provide the outer and inner folds for the given instances."""
-    print(y)
     X = np.ascontiguousarray(X.values)
     y = np.ascontiguousarray(y.values.ravel())
     s = np.ascontiguousarray(s.values.ravel())
 
@@ -36,7 +36,6 @@ def import_portcalls(filepath: str, flag_performance: dict):
         .assign(
             flag_code=lambda x: x['flag'],
             flag=lambda x: x['flag'].replace(flag_performance))
-        .dropna(subset=['flag'])
         .replace({
             'risk': {'HRS': 2, 'SRS': 1, 'LRS': 0},
             'flag': {
@@ -46,6 +45,7 @@ def import_portcalls(filepath: str, flag_performance: dict):
                 'MZ': pd.NA
             }
         })
+        .fillna({'flag': 1})
         .astype({'port': str, 'ship': str, 'risk': 'Int8', 'flag': 'Int8'})
         .sort_values('arrival')
         [[
 
@@ -31,7 +31,7 @@ def learn(X: pd.DataFrame, y: pd.DataFrame, s: pd.DataFrame, outer_folds: list,
 
     # Convert X, y, s to np.arrays for compatibility reasons.
     X = np.ascontiguousarray(X.values)
-    y = np.ascontiguousarray(y.values.ravel())
+    y = np.ascontiguousarray(y.values.ravel()) > 1
     s = np.ascontiguousarray(s.values.ravel())
 
     params = [