Skip to content

Commit 5b1229b

Browse files
markov
1 parent 64ea962 commit 5b1229b

File tree

5 files changed

+206
-7
lines changed

5 files changed

+206
-7
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased]
88

9+
### Added
10+
- Added Markov model to simulate timestamps of the household loads [#4](https://github.com/ie3-institute/simonaMarkovLoad/issues/4)
11+
912
### Changed
1013
- Compute instantaneous kW from cumulative kWh via 15-minute differencing [#1](https://github.com/ie3-institute/simonaMarkovLoad/issues/1)

src/main.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
1+
from pathlib import Path
2+
13
import matplotlib.pyplot as plt
4+
import numpy as np
25

6+
from src.markov.plothelper import plot_transition_matrix
7+
from src.markov.transmats import build_transition_matrices_parallel
8+
from src.preprocessing.bucketing import add_bucket_columns
39
from src.preprocessing.loader import load_timeseries
410

11+
PROCESSED_DIR = Path(__file__).resolve().parents[2] / "data" / "processed"
12+
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
13+
514

615
def plot_state_distribution(df):
716

@@ -17,13 +26,32 @@ def plot_state_distribution(df):
1726

1827

1928
def main():
20-
df = load_timeseries()
21-
print(df)
22-
df_norm = load_timeseries(normalize=True)
23-
print(df_norm)
24-
df_disc = load_timeseries(normalize=True, discretize=True)
25-
print(df_disc)
26-
plot_state_distribution(df_disc)
29+
df = load_timeseries(normalize=True, discretize=True)
30+
df = add_bucket_columns(df)
31+
32+
n_states = int(df["state"].max()) + 1
33+
34+
counts, probs = build_transition_matrices_parallel(
35+
df,
36+
n_states=n_states,
37+
alpha=0.5,
38+
n_jobs=-1,
39+
)
40+
41+
out_file = PROCESSED_DIR / "transition_matrices.npz"
42+
np.savez_compressed(out_file, counts=counts, probs=probs)
43+
print(
44+
f"Gespeichert unter {out_file} "
45+
f"(counts: {counts.shape}, probs: {probs.shape})"
46+
)
47+
48+
data = np.load(PROCESSED_DIR / "transition_matrices.npz")
49+
probs = data["probs"]
50+
51+
bucket_ids_to_plot = [0, 1767]
52+
53+
for bid in bucket_ids_to_plot:
54+
plot_transition_matrix(probs[bid], bid)
2755

2856

2957
if __name__ == "__main__":

src/markov/plothelper.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import Tuple
2+
3+
import matplotlib.pyplot as plt
4+
import numpy as np
5+
6+
7+
def decode_bucket(bid: int) -> Tuple[int, int, int]:
8+
month = bid // 192 + 1
9+
weekend = (bid % 192) // 96
10+
quarter = bid % 96
11+
return month, weekend, quarter
12+
13+
14+
def plot_transition_matrix(P: np.ndarray, bid: int) -> None:
15+
month, weekend, quarter = decode_bucket(bid)
16+
17+
plt.figure(figsize=(5, 4))
18+
plt.imshow(P, origin="lower")
19+
plt.colorbar(label="P(i → j)")
20+
21+
plt.title(
22+
f"Bucket {bid} (Monat {month}, "
23+
f"{'WE' if weekend else 'WD'}, Q{quarter:02d})"
24+
)
25+
26+
plt.xlabel("Folge‑State j")
27+
plt.ylabel("Ausgangs‑State i")
28+
plt.xticks(range(P.shape[0]))
29+
plt.yticks(range(P.shape[0]))
30+
plt.tight_layout()
31+
plt.show()

src/markov/transmats.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from typing import Tuple
2+
3+
import numpy as np
4+
import pandas as pd
5+
from joblib import Parallel, delayed
6+
7+
from src.preprocessing.bucketing import TOTAL_BUCKETS
8+
9+
10+
def _counts_for_bucket(
11+
bid: int, bucket_ids: np.ndarray, idx_flat: np.ndarray, n_states: int
12+
) -> Tuple[int, np.ndarray]:
13+
14+
rows = idx_flat[bucket_ids == bid]
15+
C = (
16+
np.bincount(rows, minlength=n_states * n_states)
17+
.astype(np.uint32)
18+
.reshape(n_states, n_states)
19+
)
20+
return bid, C
21+
22+
23+
def build_transition_matrices_parallel(
24+
df: pd.DataFrame,
25+
*,
26+
bucket_col: str = "bucket_id",
27+
state_col: str = "state",
28+
n_states: int,
29+
alpha: float = 0.5,
30+
n_jobs: int | None = -1,
31+
) -> Tuple[np.ndarray, np.ndarray]:
32+
33+
df = df.sort_values("timestamp")
34+
df["next_state"] = df[state_col].shift(-1)
35+
df["next_bucket"] = df[bucket_col].shift(-1)
36+
37+
mask = df[bucket_col] == df["next_bucket"]
38+
pairs = df.loc[mask, [bucket_col, state_col, "next_state"]].to_numpy(np.uint16)
39+
40+
bucket_ids = pairs[:, 0]
41+
idx_flat = pairs[:, 1] * n_states + pairs[:, 2]
42+
43+
unique_buckets = np.arange(TOTAL_BUCKETS, dtype=np.uint16)
44+
results = Parallel(n_jobs=n_jobs, backend="loky", verbose=5)(
45+
delayed(_counts_for_bucket)(bid, bucket_ids, idx_flat, n_states)
46+
for bid in unique_buckets
47+
)
48+
49+
counts = np.zeros((TOTAL_BUCKETS, n_states, n_states), dtype=np.uint32)
50+
for bid, C in results:
51+
counts[bid] = C
52+
53+
counts_sm = counts.astype(np.float32) + alpha
54+
row_sums = counts_sm.sum(axis=2, keepdims=True)
55+
probs = (counts_sm / row_sums).astype(np.float32)
56+
57+
return counts, probs

src/preprocessing/bucketing.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import dataclass
4+
from typing import Tuple
5+
6+
import pandas as pd
7+
8+
QUARTERS_PER_DAY: int = 96
9+
WEEKEND_BUCKETS_PER_MONTH: int = 2 * QUARTERS_PER_DAY # 192
10+
TOTAL_BUCKETS: int = 12 * WEEKEND_BUCKETS_PER_MONTH # 2_304
11+
12+
13+
@dataclass(frozen=True, slots=True)
14+
class Bucket:
15+
16+
month: int
17+
weekend: int
18+
quarter: int
19+
20+
def key(self) -> Tuple[int, int, int]:
21+
22+
return (self.month, self.weekend, self.quarter)
23+
24+
def index(self) -> int:
25+
26+
return (
27+
(self.month - 1) * WEEKEND_BUCKETS_PER_MONTH
28+
+ self.weekend * QUARTERS_PER_DAY
29+
+ self.quarter
30+
)
31+
32+
@classmethod
33+
def from_timestamp(cls, ts: pd.Timestamp) -> "Bucket":
34+
35+
return cls(
36+
month=ts.month,
37+
weekend=int(ts.day_of_week >= 5),
38+
quarter=ts.hour * 4 + ts.minute // 15,
39+
)
40+
41+
42+
def _timestamp_series(
43+
df: pd.DataFrame, *, timestamp_col: str = "timestamp"
44+
) -> pd.Series:
45+
if not pd.api.types.is_datetime64_any_dtype(df[timestamp_col]):
46+
return pd.to_datetime(df[timestamp_col], utc=False, infer_datetime_format=True)
47+
return df[timestamp_col]
48+
49+
50+
def add_bucket_columns(
51+
df: pd.DataFrame,
52+
*,
53+
timestamp_col: str = "timestamp",
54+
inplace: bool = False,
55+
) -> pd.DataFrame:
56+
57+
base = df if inplace else df.copy()
58+
59+
ts = _timestamp_series(base, timestamp_col=timestamp_col)
60+
61+
base["month"] = ts.dt.month
62+
base["weekend"] = (ts.dt.dayofweek >= 5).astype("uint8")
63+
base["quarter"] = (ts.dt.hour * 4 + ts.dt.minute // 15).astype("uint16")
64+
65+
base["bucket_id"] = (
66+
(base["month"] - 1) * WEEKEND_BUCKETS_PER_MONTH
67+
+ base["weekend"] * QUARTERS_PER_DAY
68+
+ base["quarter"]
69+
).astype("uint16")
70+
71+
return base
72+
73+
74+
__all__ = [
75+
"Bucket",
76+
"add_bucket_columns",
77+
"QUARTERS_PER_DAY",
78+
"WEEKEND_BUCKETS_PER_MONTH",
79+
"TOTAL_BUCKETS",
80+
]

0 commit comments

Comments
 (0)