Skip to content

Commit c805c15

Browse files
🎨 Refactor code
1 parent e205a79 commit c805c15

11 files changed

+310
-275
lines changed

01-preprocessing-stats.ipynb

Lines changed: 281 additions & 110 deletions
Large diffs are not rendered by default.

03-baseline-ship-risk-model.ipynb

Lines changed: 1 addition & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,7 @@
851851
" .assign(Model='Baseline')\n",
852852
")\n",
853853
"display(df)\n",
854-
"df.to_pickle('cache/confusion-matrix-baseline.pkl')"
854+
"df.to_pickle('models/confusion-matrix-baseline.pkl')"
855855
]
856856
},
857857
{
@@ -968,150 +968,6 @@
968968
")"
969969
]
970970
},
971-
{
972-
"cell_type": "markdown",
973-
"id": "76ba2db4-9d93-48cc-82b3-e008debe36cf",
974-
"metadata": {},
975-
"source": [
976-
"# Fairness measures"
977-
]
978-
},
979-
{
980-
"cell_type": "markdown",
981-
"id": "3334bf80-57bd-4cbb-bda7-d79cc3ab127e",
982-
"metadata": {},
983-
"source": [
984-
"NOTE SWAPPING! From `y=0` (compliant), `y=1` (deficiency), `y=2` (detention) to `y=False` (non-compliant) and `y=True` (compliant)"
985-
]
986-
},
987-
{
988-
"cell_type": "code",
989-
"execution_count": 14,
990-
"id": "c7e8e0f3-55f3-4169-96b2-0490239e7532",
991-
"metadata": {
992-
"execution": {
993-
"iopub.execute_input": "2022-02-07T11:13:07.774047Z",
994-
"iopub.status.busy": "2022-02-07T11:13:07.773930Z",
995-
"iopub.status.idle": "2022-02-07T11:13:08.226414Z",
996-
"shell.execute_reply": "2022-02-07T11:13:08.225929Z",
997-
"shell.execute_reply.started": "2022-02-07T11:13:07.774033Z"
998-
},
999-
"tags": []
1000-
},
1001-
"outputs": [
1002-
{
1003-
"data": {
1004-
"text/html": [
1005-
"<div>\n",
1006-
"<style scoped>\n",
1007-
" .dataframe tbody tr th:only-of-type {\n",
1008-
" vertical-align: middle;\n",
1009-
" }\n",
1010-
"\n",
1011-
" .dataframe tbody tr th {\n",
1012-
" vertical-align: top;\n",
1013-
" }\n",
1014-
"\n",
1015-
" .dataframe thead th {\n",
1016-
" text-align: right;\n",
1017-
" }\n",
1018-
"</style>\n",
1019-
"<table border=\"1\" class=\"dataframe\">\n",
1020-
" <thead>\n",
1021-
" <tr style=\"text-align: right;\">\n",
1022-
" <th>group</th>\n",
1023-
" <th>sensitive</th>\n",
1024-
" <th>non sensitive</th>\n",
1025-
" </tr>\n",
1026-
" <tr>\n",
1027-
" <th>measure</th>\n",
1028-
" <th></th>\n",
1029-
" <th></th>\n",
1030-
" </tr>\n",
1031-
" </thead>\n",
1032-
" <tbody>\n",
1033-
" <tr>\n",
1034-
" <th>PPR</th>\n",
1035-
" <td>0.640</td>\n",
1036-
" <td>0.957</td>\n",
1037-
" </tr>\n",
1038-
" <tr>\n",
1039-
" <th>TPR</th>\n",
1040-
" <td>0.676</td>\n",
1041-
" <td>0.959</td>\n",
1042-
" </tr>\n",
1043-
" <tr>\n",
1044-
" <th>FPR</th>\n",
1045-
" <td>0.592</td>\n",
1046-
" <td>0.952</td>\n",
1047-
" </tr>\n",
1048-
" </tbody>\n",
1049-
"</table>\n",
1050-
"</div>"
1051-
],
1052-
"text/plain": [
1053-
"group sensitive non sensitive\n",
1054-
"measure \n",
1055-
"PPR 0.640 0.957\n",
1056-
"TPR 0.676 0.959\n",
1057-
"FPR 0.592 0.952"
1058-
]
1059-
},
1060-
"execution_count": 14,
1061-
"metadata": {},
1062-
"output_type": "execute_result"
1063-
}
1064-
],
1065-
"source": [
1066-
"data = [\n",
1067-
" {'measure': 'PPR', 'group': 'sensitive', 'value': (y_score <= 1)[s ].mean()},\n",
1068-
" {'measure': 'PPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s].mean()},\n",
1069-
" {'measure': 'FPR', 'group': 'sensitive', 'value': (y_score <= 1)[s & ~y_true].mean()},\n",
1070-
" {'measure': 'FPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s & ~y_true].mean()},\n",
1071-
" {'measure': 'TPR', 'group': 'sensitive', 'value': (y_score <= 1)[s & y_true].mean()},\n",
1072-
" {'measure': 'TPR', 'group': 'non sensitive', 'value': (y_score <= 1)[~s & y_true].mean()},\n",
1073-
"]\n",
1074-
"data = pd.DataFrame(data).pivot('measure', 'group', 'value')\n",
1075-
"data.round(3).reindex(['PPR', 'TPR', 'FPR'], axis=0).reindex(['sensitive', 'non sensitive'], axis=1)"
1076-
]
1077-
},
1078-
{
1079-
"cell_type": "code",
1080-
"execution_count": 15,
1081-
"id": "6e67eb70-c37a-4ad6-a12b-af8491119ee2",
1082-
"metadata": {
1083-
"execution": {
1084-
"iopub.execute_input": "2022-02-07T11:13:08.227232Z",
1085-
"iopub.status.busy": "2022-02-07T11:13:08.227116Z",
1086-
"iopub.status.idle": "2022-02-07T11:13:08.665396Z",
1087-
"shell.execute_reply": "2022-02-07T11:13:08.664824Z",
1088-
"shell.execute_reply.started": "2022-02-07T11:13:08.227219Z"
1089-
},
1090-
"tags": []
1091-
},
1092-
"outputs": [
1093-
{
1094-
"data": {
1095-
"text/plain": [
1096-
"{'ε_impact': -0.49599292325481104, 'ε_odds': 0.36028816577626677}"
1097-
]
1098-
},
1099-
"execution_count": 15,
1100-
"metadata": {},
1101-
"output_type": "execute_result"
1102-
}
1103-
],
1104-
"source": [
1105-
"ε_impact = 1-(data.at['PPR', 'non sensitive'] / data.at['PPR', 'sensitive'])\n",
1106-
"ε_odds = max(\n",
1107-
" [\n",
1108-
" abs(data.at['TPR', 'non sensitive'] - data.at['TPR', 'sensitive']),\n",
1109-
" abs(data.at['FPR', 'non sensitive'] - data.at['FPR', 'sensitive'])\n",
1110-
" ]\n",
1111-
") \n",
1112-
"{'ε_impact': ε_impact, 'ε_odds': ε_odds}"
1113-
]
1114-
},
1115971
{
1116972
"cell_type": "code",
1117973
"execution_count": null,

06-confusion-matrix.ipynb

Lines changed: 15 additions & 15 deletions
Large diffs are not rendered by default.

Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
PHONY: clean
22
clean:
33
find . -name '__pycache__' -exec rm -fr {} +
4-
find . -name '.ipynb_checkpoints' -exec rm -fr {} +
4+
find . -name '.ipynb_checkpoints' -exec rm -fr {} +
5+
6+
PHONY: install
7+
install:
8+
git clone https://github.com/franktakes/teexgraph.git
9+
cd teexgraph/ && git reset --hard 0c4ebef4ee938aa842bf40d1aec8a66d95fd8a82 && make listener

fig/confusion_matrix.pdf

0 Bytes
Binary file not shown.

fig/network.pdf

134 KB
Binary file not shown.

run.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import os
5+
import subprocess
56

67
import joblib
78
import networkx as nx
@@ -30,6 +31,10 @@
3031
filepath_performance_folds = 'models/performance_folds.pkl'
3132

3233
def run():
34+
src.logger.info('#0 Check installs')
35+
if not os.path.isfile('teexgraph/teexgraph'):
36+
subprocess.run(['make', 'install'])
37+
3338
src.logger.info("#1 Import inspections.")
3439
if not os.path.isfile(filepath_inspections_cleaned):
3540
inspections_cleaned = src.import_inspections(filepath_inspections_raw)

src/get_folds.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
def get_folds(X: pd.DataFrame, y: pd.Series, s: pd.Series,
66
random_state: int=42) -> tuple[list, list]:
77
"""Provide the outer and inner folds for the given instances."""
8-
print(y)
98
X = np.ascontiguousarray(X.values)
109
y = np.ascontiguousarray(y.values.ravel())
1110
s = np.ascontiguousarray(s.values.ravel())

src/import_portcalls.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ def import_portcalls(filepath: str, flag_performance: dict):
3636
.assign(
3737
flag_code=lambda x: x['flag'],
3838
flag=lambda x: x['flag'].replace(flag_performance))
39-
.dropna(subset=['flag'])
4039
.replace({
4140
'risk': {'HRS': 2, 'SRS': 1, 'LRS': 0},
4241
'flag': {
@@ -46,6 +45,7 @@ def import_portcalls(filepath: str, flag_performance: dict):
4645
'MZ': pd.NA
4746
}
4847
})
48+
.fillna({'flag': 1})
4949
.astype({'port': str, 'ship': str, 'risk': 'Int8', 'flag': 'Int8'})
5050
.sort_values('arrival')
5151
[[

src/learn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def learn(X: pd.DataFrame, y: pd.DataFrame, s: pd.DataFrame, outer_folds: list,
3131

3232
# Convert X, y, s to np.arrays for compatibility reasons.
3333
X = np.ascontiguousarray(X.values)
34-
y = np.ascontiguousarray(y.values.ravel())
34+
y = np.ascontiguousarray(y.values.ravel()) > 1
3535
s = np.ascontiguousarray(s.values.ravel())
3636

3737
params = [

0 commit comments

Comments
 (0)