Make mypy very happy

michaelosthege · michaelosthege · commit 99cba96be367 · 2023-02-04T22:05:21.000+01:00
diff --git a/mcbackend/backends/clickhouse.py b/mcbackend/backends/clickhouse.py
@@ -5,7 +5,18 @@
 import logging
 import time
 from datetime import datetime, timezone
-from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+)
 
 import clickhouse_driver
 import numpy
@@ -156,7 +167,7 @@ def __init__(
         self._client = client
         # The following attributes belong to the batched insert mechanism.
         # Inserting in batches is much faster than inserting single rows.
-        self._str_cols = set()
+        self._str_cols: Set[str] = set()
         self._insert_query: str = ""
         self._insert_queue: List[Dict[str, Any]] = []
         self._last_insert = time.time()
@@ -176,13 +187,16 @@ def append(
             self._insert_query = f"INSERT INTO {self.cid} (`_draw_idx`,`{names}`) VALUES"
             self._str_cols = {k for k, v in params.items() if "str" in numpy.asarray(v).dtype.name}
 
-        # Convert str ndarrays to lists
+        params_ins: Dict[str, Union[numpy.ndarray, int, float, List[str]]] = {
+            "_draw_idx": self._draw_idx,
+            **params,
+        }
+        # Convert str-dtyped ndarrays to lists
         for col in self._str_cols:
-            params[col] = params[col].tolist()
+            params_ins[col] = params[col].tolist()
 
         # Queue up for insertion
-        params["_draw_idx"] = self._draw_idx
-        self._insert_queue.append(params)
+        self._insert_queue.append(params_ins)
         self._draw_idx += 1
 
         if (
@@ -242,13 +256,14 @@ def _get_rows(
 
         # Without draws return empty arrays of the correct shape/dtype
         if not draws:
-            if is_rigid(nshape):
-                return numpy.empty(shape=[0] + nshape, dtype=dtype)
+            if is_rigid(nshape) and nshape is not None:
+                return numpy.empty(shape=[0, *nshape], dtype=dtype)
             return numpy.array([], dtype=object)
 
         # The unpacking must also account for non-rigid shapes
         # and str-dtyped empty arrays default to fixed length 1 strings.
         # The [None] list is slower, but more flexible in this regard.
+        buffer: Union[numpy.ndarray, Sequence]
         if is_rigid(nshape) and dtype != "str":
             assert nshape is not None
             buffer = numpy.empty((draws, *nshape), dtype)
@@ -292,7 +307,7 @@ def __init__(
         self,
         meta: RunMeta,
         *,
-        created_at: datetime = None,
+        created_at: Optional[datetime] = None,
         client_fn: Callable[[], clickhouse_driver.Client],
     ) -> None:
         self._client_fn = client_fn
@@ -331,8 +346,8 @@ class ClickHouseBackend(Backend):
 
     def __init__(
         self,
-        client: clickhouse_driver.Client = None,
-        client_fn: Callable[[], clickhouse_driver.Client] = None,
+        client: Optional[clickhouse_driver.Client] = None,
+        client_fn: Optional[Callable[[], clickhouse_driver.Client]] = None,
     ):
         """Create a ClickHouse backend around a database client.
 
diff --git a/mcbackend/core.py b/mcbackend/core.py
@@ -3,31 +3,20 @@
 """
 import collections
 import logging
-from typing import (
-    TYPE_CHECKING,
-    Dict,
-    List,
-    Mapping,
-    Optional,
-    Sequence,
-    Sized,
-    TypeVar,
-)
+from typing import Dict, List, Mapping, Optional, Sequence, Sized, TypeVar, Union, cast
 
 import numpy
 
 from .meta import ChainMeta, RunMeta, Variable
 from .npproto.utils import ndarray_to_numpy
 from .utils import as_array_from_ragged
 
-InferenceData = TypeVar("InferenceData")
 try:
-    from arviz import from_dict
+    from arviz import InferenceData, from_dict
 
-    if not TYPE_CHECKING:
-        from arviz import InferenceData
     _HAS_ARVIZ = True
 except ModuleNotFoundError:
+    InferenceData = TypeVar("InferenceData")  # type: ignore
     _HAS_ARVIZ = False
 
 Shape = Sequence[int]
@@ -262,20 +251,22 @@ def to_inferencedata(self, *, equalize_chain_lengths: bool = True, **kwargs) ->
                 warmup_sample_stats[svar.name].append(stats[tune])
                 sample_stats[svar.name].append(stats[~tune])
 
+        w_pst = cast(Dict[str, Union[Sequence, numpy.ndarray]], warmup_posterior)
+        w_ss = cast(Dict[str, Union[Sequence, numpy.ndarray]], warmup_sample_stats)
+        pst = cast(Dict[str, Union[Sequence, numpy.ndarray]], posterior)
+        ss = cast(Dict[str, Union[Sequence, numpy.ndarray]], sample_stats)
         if not equalize_chain_lengths:
             # Convert ragged arrays to object-dtyped ndarray because NumPy >=1.24.0 no longer does that automatically
-            warmup_posterior = {k: as_array_from_ragged(v) for k, v in warmup_posterior.items()}
-            warmup_sample_stats = {
-                k: as_array_from_ragged(v) for k, v in warmup_sample_stats.items()
-            }
-            posterior = {k: as_array_from_ragged(v) for k, v in posterior.items()}
-            sample_stats = {k: as_array_from_ragged(v) for k, v in sample_stats.items()}
+            w_pst = {k: as_array_from_ragged(v) for k, v in warmup_posterior.items()}
+            w_ss = {k: as_array_from_ragged(v) for k, v in warmup_sample_stats.items()}
+            pst = {k: as_array_from_ragged(v) for k, v in posterior.items()}
+            ss = {k: as_array_from_ragged(v) for k, v in sample_stats.items()}
 
         idata = from_dict(
-            warmup_posterior=warmup_posterior,
-            warmup_sample_stats=warmup_sample_stats,
-            posterior=posterior,
-            sample_stats=sample_stats,
+            warmup_posterior=w_pst,
+            warmup_sample_stats=w_ss,
+            posterior=pst,
+            sample_stats=ss,
             coords=self.coords,
             dims=self.dims,
             attrs=self.meta.attributes,