wearepal · tmke8 · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025 · Jun 12, 2025
diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml
@@ -157,4 +157,5 @@ jobs:
       - name: Test with pytest
         if: ${{ github.event_name == 'merge_group' }}
         run: |
+          mkdir ~/Data
           uv run python -m pytest -vv -n 2 --dist loadgroup --cov=ethicml --cov-fail-under=80  tests/
diff --git a/ethicml/data/tabular_data/acs.py b/ethicml/data/tabular_data/acs.py
@@ -8,7 +8,7 @@
 import contextlib
 import os
 from pathlib import Path
-from typing import Literal, TypeAlias, get_args
+from typing import Literal, TypeAlias, cast, get_args
 from typing_extensions import override
 
 import numpy as np
@@ -98,12 +98,13 @@ def _download_dir(root: Path) -> Generator[None, None, None]:
 class _AcsBase(Dataset):
     split: str
     target: str
+    year: Literal["2014", "2015", "2016", "2017", "2018"]
 
     def __init__(
         self,
         name: str,
         root: str | Path,
-        year: str,
+        year: Literal["2014", "2015", "2016", "2017", "2018"],
         horizon: int,
         states: list[StateList],
         class_label_spec: str,
@@ -119,7 +120,7 @@ def __init__(
 
         self.year = year
         self.horizon = horizon
-        self.survey = "person"
+        self.survey: Literal["person", "household"] = "person"
         self.states = states
         self._invert_s = invert_s
 
@@ -294,7 +295,7 @@ class AcsIncome(_AcsBase):
     def __init__(
         self,
         root: str | Path,
-        year: str,
+        year: Literal["2014", "2015", "2016", "2017", "2018"],
         horizon: int,
         states: list[StateList],
         split: str = "Sex",
@@ -341,9 +342,8 @@ def load(
     ) -> DataTuple:
         from folktables import ACSDataSource, adult_filter, folktables
 
-        datasource = ACSDataSource(
-            survey_year=self.year, horizon=f"{self.horizon}-Year", survey=self.survey
-        )
+        horizon = cast(Literal["1-Year", "5-Year"], f"{self.horizon}-Year")
+        datasource = ACSDataSource(survey_year=self.year, horizon=horizon, survey=self.survey)
 
         with _download_dir(self.root):
             dataframe = datasource.get_data(states=self.states, download=True)
@@ -374,8 +374,8 @@ def load(
             postprocess=lambda x: np.nan_to_num(x, nan=-1),
         )
 
-        dataframe = data_obj._preprocess(dataframe)
-        dataframe[data_obj.target] = dataframe[data_obj.target].apply(data_obj._target_transform)
+        dataframe = data_obj._preprocess(dataframe)  # type: ignore[attr-defined]
+        dataframe[data_obj.target] = dataframe[data_obj.target].apply(data_obj._target_transform)  # type: ignore[attr-defined]
 
         for feat in disc_feats:
             dataframe[feat] = (
@@ -388,7 +388,7 @@ def load(
 
         dataframe = pd.get_dummies(dataframe[disc_feats + continuous_features])
 
-        dataframe = dataframe.apply(data_obj._postprocess)
+        dataframe = dataframe.apply(data_obj._postprocess)  # type: ignore[attr-defined]
 
         cow_cols = [col for col in dataframe.columns if col.startswith("COW")]
         mar_cols = [col for col in dataframe.columns if col.startswith("MAR")]
@@ -451,7 +451,7 @@ class AcsEmployment(_AcsBase):
     def __init__(
         self,
         root: str | Path,
-        year: str,
+        year: Literal["2014", "2015", "2016", "2017", "2018"],
         horizon: int,
         states: list[StateList],
         split: str = "Sex",
@@ -506,9 +506,8 @@ def load(
     ) -> DataTuple:
         from folktables import ACSDataSource, folktables
 
-        datasource = ACSDataSource(
-            survey_year=self.year, horizon=f"{self.horizon}-Year", survey=self.survey
-        )
+        horizon = cast(Literal["1-Year", "5-Year"], f"{self.horizon}-Year")
+        datasource = ACSDataSource(survey_year=self.year, horizon=horizon, survey=self.survey)
 
         with _download_dir(self.root):
             dataframe = datasource.get_data(states=self.states, download=True)
@@ -545,9 +544,9 @@ def load(
             postprocess=lambda x: np.nan_to_num(x, nan=-1),
         )
 
-        dataframe = data_obj._preprocess(dataframe)
-        dataframe[data_obj.target] = dataframe[data_obj.target].apply(data_obj._target_transform)
-        dataframe = dataframe.apply(data_obj._postprocess)
+        dataframe = data_obj._preprocess(dataframe)  # type: ignore[attr-defined]
+        dataframe[data_obj.target] = dataframe[data_obj.target].apply(data_obj._target_transform)  # type: ignore[attr-defined]
+        dataframe = dataframe.apply(data_obj._postprocess)  # type: ignore[attr-defined]
 
         for feat in disc_feats:
             dataframe[feat] = (
@@ -560,7 +559,7 @@ def load(
 
         dataframe = pd.get_dummies(dataframe[disc_feats + continuous_features])
 
-        dataframe = dataframe.apply(data_obj._postprocess)
+        dataframe = dataframe.apply(data_obj._postprocess)  # type: ignore[attr-defined]
 
         schl_cols = [col for col in dataframe.columns if col.startswith("SCHL")]
         mar_cols = [col for col in dataframe.columns if col.startswith("MAR")]

diff --git a/ethicml/implementations/dro_tabular.py b/ethicml/implementations/dro_tabular.py
@@ -50,7 +50,7 @@ def train_model(
             num_epochs = len(train_loader.dataset)  # type: ignore[arg-type]
             print(
                 f"train Epoch: {epoch} [{batch_idx * len(data_x)}/{num_epochs}"
-                f"\t({100. * batch_idx / len(train_loader):.0f}%)]"
+                f"\t({100.0 * batch_idx / len(train_loader):.0f}%)]"
                 f"\tLoss: {loss.item() / len(data_x):.6f}"
             )
 

diff --git a/ethicml/implementations/vfae.py b/ethicml/implementations/vfae.py
@@ -114,7 +114,7 @@ def train_model(
             if flags["supervised"]:
                 print(
                     f"train Epoch: {epoch} [{batch_idx * len(data_x)}/{num_epochs}"
-                    f"({100. * batch_idx / len(train_loader):.0f}%)]\t"
+                    f"({100.0 * batch_idx / len(train_loader):.0f}%)]\t"
                     f"Loss: {loss.item() / len(data_x):.6f}\t"
                     f"pred_loss: {prediction_loss.item():.6f}\t"
                     f"recon_loss: {reconstruction_loss.item():.6f}\t"
@@ -124,7 +124,7 @@ def train_model(
             else:
                 print(
                     f"train Epoch: {epoch} [{batch_idx * len(data_x)}/{num_epochs}"
-                    f"({100. * batch_idx / len(train_loader):.0f}%)]\t"
+                    f"({100.0 * batch_idx / len(train_loader):.0f}%)]\t"
                     f"Loss: {loss.item() / len(data_x):.6f}\t"
                     f"recon_loss: {reconstruction_loss.item():.6f}\t"
                     f"mmd_loss: {flags['batch_size'] * mmd_loss.item():.6f}"

diff --git a/ethicml/implementations/vfae_modules/encoder.py b/ethicml/implementations/vfae_modules/encoder.py
@@ -19,11 +19,11 @@ def __init__(self, enc_size: list[int], init_size: int, ld: int) -> None:
             self.encoder.add_module("batch norm 0", nn.BatchNorm1d(enc_size[0]))
             for k in range(len(enc_size) - 1):
                 self.encoder.add_module(
-                    f"encoder layer {k+1}", nn.Linear(enc_size[k], enc_size[k + 1])
+                    f"encoder layer {k + 1}", nn.Linear(enc_size[k], enc_size[k + 1])
                 )
-                self.encoder.add_module(f"encoder activation {k+1}", activation)
+                self.encoder.add_module(f"encoder activation {k + 1}", activation)
                 self.encoder.add_module(
-                    f"encoder batch norm {k+1}", nn.BatchNorm1d(enc_size[k + 1])
+                    f"encoder batch norm {k + 1}", nn.BatchNorm1d(enc_size[k + 1])
                 )
             self.z1_enc_mu = nn.Linear(enc_size[-1], ld)
             self.z1_enc_logvar = nn.Linear(enc_size[-1], ld)

diff --git a/ethicml/implementations/zemel.py b/ethicml/implementations/zemel.py
@@ -10,7 +10,7 @@
 import pandas as pd
 import scipy.optimize as optim
 from scipy.spatial.distance import cdist
-from scipy.special import softmax  # type: ignore[attr-defined]
+from scipy.special import softmax
 
 from ethicml.implementations.utils import load_data_from_flags, save_transformations
 from ethicml.utility import DataTuple, SubgroupTuple, TestTuple
@@ -39,7 +39,7 @@ def LFR_optim_objective(  # noqa: N802, PLR0913
     A_z: float,  # noqa: N803
     print_interval: int,
     verbose: bool,  # noqa: FBT001  # disabled because scipy needs positional args
-) -> np.number:
+) -> np.floating:
     """LFR optim objective."""
     _, features_dim = x_unprivileged.shape
 
@@ -135,8 +135,8 @@ def fit(train: DataTuple, flags: "ZemelArgs", seed: int) -> Model:
     bnd = [(0, 1)] * flags["clusters"] + [(None, None)] * features_dim * flags["clusters"]
     LFR_optim_objective.steps = 0  # type: ignore[attr-defined]
 
-    learned_model = optim.fmin_l_bfgs_b(  # type: ignore[attr-defined]
-        LFR_optim_objective,
+    learned_model = optim.fmin_l_bfgs_b(
+        func=LFR_optim_objective,
         x0=parameters_initialization,
         epsilon=1e-5,
         args=(
@@ -151,11 +151,11 @@ def fit(train: DataTuple, flags: "ZemelArgs", seed: int) -> Model:
             print_interval,
             verbose,
         ),
-        bounds=bnd,
+        bounds=bnd,  # type: ignore[arg-type]
         approx_grad=True,
         maxfun=flags["maxfun"],
         maxiter=flags["max_iter"],
-        disp=verbose,
+        disp=verbose,  # type: ignore[arg-type]
     )[0]
     w = learned_model[: flags["clusters"]]
     prototypes = learned_model[flags["clusters"] :].reshape((flags["clusters"], features_dim))

diff --git a/ethicml/models/postprocess/dp_flip.py b/ethicml/models/postprocess/dp_flip.py
@@ -72,7 +72,7 @@ def _flip(
         idx_s_y = _y.index.intersection(_s.index)
         rng = np.random.RandomState(seed)
         idxs = list(rng.permutation(idx_s_y))
-        update = pd.Series({idx: post_y_val for idx in idxs[:num_to_flip]}, dtype=preds.hard.dtype)
+        update = pd.Series(dict.fromkeys(idxs[:num_to_flip], post_y_val), dtype=preds.hard.dtype)
         preds.hard.update(update)
         return preds
 

diff --git a/ethicml/models/postprocess/hardt.py b/ethicml/models/postprocess/hardt.py
@@ -6,7 +6,7 @@
 import numpy as np
 from numpy.random import RandomState
 import pandas as pd
-from scipy.optimize import OptimizeResult, linprog  # type: ignore[attr-defined]
+from scipy.optimize import OptimizeResult, linprog
 
 from ethicml.metrics.per_sensitive_attribute import metric_per_sens
 from ethicml.metrics.tnr import TNR

diff --git a/ethicml/utility/data_structures.py b/ethicml/utility/data_structures.py
@@ -480,7 +480,7 @@ def write_as_npz(
         for entry, values in data.items()
     }
 
-    np.savez(data_path, **as_numpy, **column_names, **extra)
+    np.savez(data_path, allow_pickle=False, **as_numpy, **column_names, **extra)
 
 
 def concat(datatup_list: Sequence[T], *, ignore_index: bool = False) -> T:

diff --git a/pyproject.toml b/pyproject.toml
@@ -52,6 +52,7 @@ data = [
 plot = [
     "matplotlib>=3.8",
     "seaborn>=0.9.0",
+    "imageio",
 ]
 metrics = [
     "scikit-learn>=0.20.1",
@@ -69,9 +70,10 @@ test = [
     "pip<23.0.0,>=22.3.1",
 ]
 typecheck = [
+    "microsoft-python-type-stubs @ git+https://github.com/microsoft/python-type-stubs.git@76ca370",
     "mypy>=0.990",
     "pandas-stubs>=1.4.2.220626",
-    "python-type-stubs @ git+https://github.com/wearepal/python-type-stubs.git@8d5f608",
+    "scipy-stubs>=1.15.3.0",
     "types-seaborn<1.0.0.0,>=0.13.2.20240205",
 ]
 lint = [
@@ -157,10 +159,7 @@ module = [
     "aif360.*",
     "cloudpickle",
     "fairlearn.*",
-    "folktables.*",
     "imageio",
-    "scipy.*",
-    "sklearn.*",
 ]
 ignore_missing_imports = true
 
@@ -186,8 +185,8 @@ reportUnknownVariableType = "none"
 reportUnknownParameterType = "none"
 reportUnknownArgumentType = "none"
 reportUnknownLambdaType = "none"
-venvPath = "/home/tmk/.cache/pypoetry/virtualenvs"
-venv = "ethicml-dzQunYke-py3.10"
+venvPath = "."
+venv = ".venv"
 
 [tool.ruff]
 line-length = 100

diff --git a/tests/models_test/inprocess_test/models_inprocessing_test.py b/tests/models_test/inprocess_test/models_inprocessing_test.py
@@ -58,7 +58,7 @@ class InprocessTest(NamedTuple):
 
 
 INPROCESS_TESTS = [
-    InprocessTest(name="Adversarial Debiasing", model=AdvDebiasing(dir=TMPDIR), num_pos=45),
+    InprocessTest(name="Adversarial Debiasing", model=AdvDebiasing(dir=TMPDIR), num_pos=40),
     InprocessTest(name="Agarwal, lr, dp, 0.1", model=Agarwal(dir=TMPDIR), num_pos=45),
     InprocessTest(
         name="Agarwal, gbt, dp, 0.1",
@@ -103,9 +103,9 @@ class InprocessTest(NamedTuple):
         name="HGR linear_model", model=HGR(dir=TMPDIR, model_type=ModelType.linear), num_pos=60
     ),
     InprocessTest(
-        name="HGR deep_model", model=HGR(dir=TMPDIR, model_type=ModelType.deep), num_pos=69
+        name="HGR deep_model", model=HGR(dir=TMPDIR, model_type=ModelType.deep), num_pos=68
     ),
-    InprocessTest(name="Fair Dummies deep_model", model=FairDummies(dir=TMPDIR), num_pos=59),
+    InprocessTest(name="Fair Dummies deep_model", model=FairDummies(dir=TMPDIR), num_pos=58),
     InprocessTest(name="Kamiran & Calders lr C=1.0", model=Reweighting(), num_pos=44),
     InprocessTest(name="Logistic Regression (C=1.0)", model=LR(), num_pos=44),
     InprocessTest(name="LRCV", model=LRCV(), num_pos=40),

diff --git a/tests/run_algorithm_test.py b/tests/run_algorithm_test.py
@@ -1,7 +1,6 @@
 """Test that an algorithm can run against some data."""
 
 from dataclasses import dataclass
-import os
 from pathlib import Path
 from typing import Literal
 from typing_extensions import Self
@@ -186,7 +185,7 @@ def test_run_alg_suite() -> None:
         topic="pytest",
     )
 
-    files = os.listdir(Path() / "results")
+    files = [p.name for p in (Path() / "results").iterdir()]
     file_names = [
         "pytest_Adult Race-Binary_Upsample uniform.csv",
         "pytest_Adult Race-Binary_no_transform.csv",

diff --git a/tests/saving_data_test.py b/tests/saving_data_test.py
@@ -90,8 +90,8 @@ def test_dataset_name_none() -> None:
     """Tests that a DataTuple can be saved without the name property."""
     datatup = DataTuple.from_df(
         x=pd.DataFrame([3.0], columns=["a1"]),
-        s=pd.Series([4.0], name="b2"),
-        y=pd.Series([6.0], name="c3"),
+        s=pd.Series([4], name="b2"),
+        y=pd.Series([6], name="c3"),
         name=None,
     )
     with TemporaryDirectory() as tmpdir:
@@ -110,7 +110,7 @@ def test_dataset_name_with_spaces() -> None:
     """Tests that a dataset name can contain spaces and special chars."""
     name = "This is a very@#$%^&*((())) complicated name"
     datatup = SubgroupTuple.from_df(
-        x=pd.DataFrame([3.0], columns=["a1"]), s=pd.Series([4.0], name="b2"), name=name
+        x=pd.DataFrame([3.0], columns=["a1"]), s=pd.Series([4], name="b2"), name=name
     )
     with TemporaryDirectory() as tmpdir:
         tmp_path = Path(tmpdir)
@@ -127,8 +127,8 @@ def test_apply_to_joined_df() -> None:
     """Tests apply_to_joined_df_function."""
     datatup = DataTuple.from_df(
         x=pd.DataFrame([3.0], columns=["a1"]),
-        s=pd.Series([4.0], name="b2"),
-        y=pd.Series([6.0], name="c3"),
+        s=pd.Series([4], name="b2"),
+        y=pd.Series([6], name="c3"),
         name=None,
     )
 
@@ -152,8 +152,8 @@ def test_data_tuple_len() -> None:
 
     datatup_equal_len = DataTuple.from_df(
         x=pd.DataFrame([3.0, 2.0, 1.0], columns=["a1"]),
-        s=pd.Series([4.0, 5.0, 9.0], name="b2"),
-        y=pd.Series([6.0, 4.2, 6.7], name="c3"),
+        s=pd.Series([4, 5, 9], name="b2"),
+        y=pd.Series([6, 4, 6], name="c3"),
         name=None,
     )
     assert len(datatup_equal_len) == 3

diff --git a/typings/folktables/__init__.pyi b/typings/folktables/__init__.pyi
@@ -0,0 +1,14 @@
+from .acs import ACSDataSource as ACSDataSource
+from .acs import ACSEmployment as ACSEmployment
+from .acs import ACSEmploymentFiltered as ACSEmploymentFiltered
+from .acs import ACSHealthInsurance as ACSHealthInsurance
+from .acs import ACSIncome as ACSIncome
+from .acs import ACSIncomePovertyRatio as ACSIncomePovertyRatio
+from .acs import ACSMobility as ACSMobility
+from .acs import ACSPublicCoverage as ACSPublicCoverage
+from .acs import ACSTravelTime as ACSTravelTime
+from .acs import adult_filter as adult_filter
+from .folktables import BasicProblem as BasicProblem
+from .folktables import DataSource as DataSource
+from .folktables import Problem as Problem
+from .load_acs import generate_categories as generate_categories