Skip to content

Commit 4b79f06

Browse files
Added full Markov-model pipeline (#11)
* added full markov model * comments * use of config * test loader * added tests
1 parent 6a85bd4 commit 4b79f06

15 files changed

+263
-46
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010
- `CODEOWNERS` file and dependabot automation [#7](https://github.com/ie3-institute/simonaMarkovLoad/issues/7)
11+
- Added full Markov-model pipeline [#10](https://github.com/ie3-institute/simonaMarkovLoad/issues/10)
1112

1213
### Changed
1314
- Compute instantaneous kW from cumulative kWh via 15-minute differencing [#1](https://github.com/ie3-institute/simonaMarkovLoad/issues/1)

config.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,7 @@ input:
33
timestamp_col: "Zeitstempel"
44
value_col: "Messwert"
55
factor: 4 # 15-min → kW depends on sampling
6+
7+
model:
8+
n_states: 10
9+
laplace_alpha: 1.0

src/config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# src/config.py
21
import os
32
from pathlib import Path
43

src/main.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,31 @@
11
import matplotlib.pyplot as plt
2+
import numpy as np
23

4+
from src.markov.transition_counts import build_transition_counts
5+
from src.markov.transitions import build_transition_matrices
36
from src.preprocessing.loader import load_timeseries
47

8+
df = load_timeseries(normalize=True, discretize=True)
59

6-
def plot_state_distribution(df):
710

8-
counts = df["state"].value_counts().sort_index()
11+
counts = build_transition_counts(df)
12+
probs = build_transition_matrices(df, alpha=1.0)
913

10-
plt.figure()
11-
plt.bar(counts.index, counts.values)
12-
plt.xlabel("State")
13-
plt.ylabel("Anzahl Einträge")
14-
plt.title("Verteilung der Einträge nach State")
15-
plt.xticks(counts.index)
16-
plt.show()
14+
print("counts shape :", counts.shape)
15+
print("probs shape :", probs.shape)
1716

1817

19-
def main():
20-
df = load_timeseries()
21-
print(df)
22-
df_norm = load_timeseries(normalize=True)
23-
print(df_norm)
24-
df_disc = load_timeseries(normalize=True, discretize=True)
25-
print(df_disc)
26-
plot_state_distribution(df_disc)
18+
active_buckets = np.where(counts.sum(axis=(1, 2)) > 0)[0]
19+
bucket = int(active_buckets[0]) if active_buckets.size else 0
20+
print(f"\nUsing bucket {bucket}")
2721

2822

29-
if __name__ == "__main__":
30-
main()
23+
print("row sums :", probs[bucket].sum(axis=1))
24+
25+
plt.imshow(probs[bucket], aspect="auto")
26+
plt.title(f"Bucket {bucket} – transition probabilities")
27+
plt.xlabel("state t+1")
28+
plt.ylabel("state t")
29+
plt.colorbar()
30+
plt.tight_layout()
31+
plt.show()

src/markov/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .buckets import NUM_BUCKETS, bucket_id
2+
from .transition_counts import build_transition_counts
3+
from .transitions import build_transition_matrices
4+
5+
__all__ = [
6+
"bucket_id",
7+
"NUM_BUCKETS",
8+
"build_transition_counts",
9+
"build_transition_matrices",
10+
]

src/markov/_core.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
from src.config import CONFIG
5+
6+
from .buckets import NUM_BUCKETS
7+
8+
N_STATES = int(CONFIG["model"]["n_states"])
9+
10+
11+
def _transition_counts(
12+
df: pd.DataFrame, *, state_col="state", bucket_col="bucket", dtype=np.uint32
13+
) -> np.ndarray:
14+
df = df.sort_values("timestamp")
15+
s_t = df[state_col].to_numpy(dtype=int)[:-1]
16+
s_tp1 = df[state_col].to_numpy(dtype=int)[1:]
17+
buckets = df[bucket_col].to_numpy(dtype=int)[:-1]
18+
19+
counts = np.zeros((NUM_BUCKETS, N_STATES, N_STATES), dtype=dtype)
20+
np.add.at(counts, (buckets, s_t, s_tp1), 1)
21+
return counts

src/markov/buckets.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import pandas as pd
2+
3+
_MONTH_FACTOR = 96 * 2
4+
_WEEKEND_FACTOR = 96
5+
_NUM_MONTHS = 12
6+
_NUM_QH = 96
7+
NUM_BUCKETS = _NUM_MONTHS * 2 * _NUM_QH # 2 304
8+
9+
10+
def _is_weekend(ts):
11+
if isinstance(ts, pd.Series):
12+
return ts.dt.dayofweek >= 5
13+
if isinstance(ts, pd.DatetimeIndex):
14+
return ts.dayofweek >= 5
15+
return ts.dayofweek >= 5
16+
17+
18+
def bucket_id(ts):
19+
if isinstance(ts, pd.Series):
20+
weekend = _is_weekend(ts).astype(int)
21+
qh = ts.dt.hour * 4 + ts.dt.minute // 15
22+
month = ts.dt.month - 1
23+
elif isinstance(ts, pd.DatetimeIndex):
24+
weekend = _is_weekend(ts).astype(int)
25+
qh = ts.hour * 4 + ts.minute // 15
26+
month = ts.month - 1
27+
else:
28+
weekend = int(_is_weekend(ts))
29+
qh = ts.hour * 4 + ts.minute // 15
30+
month = ts.month - 1
31+
32+
return month * _MONTH_FACTOR + weekend * _WEEKEND_FACTOR + qh
33+
34+
35+
def assign_buckets(
36+
df: pd.DataFrame,
37+
*,
38+
ts_col: str = "timestamp",
39+
bucket_col: str = "bucket",
40+
inplace: bool = False,
41+
) -> pd.DataFrame:
42+
tgt = df if inplace else df.copy()
43+
tgt[bucket_col] = bucket_id(tgt[ts_col]).astype("uint16")
44+
return tgt

src/markov/model.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/markov/transition_counts.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
from src.config import CONFIG
5+
6+
from .buckets import NUM_BUCKETS
7+
8+
N_STATES = int(CONFIG["model"]["n_states"])
9+
10+
11+
def build_transition_counts(
12+
df: pd.DataFrame,
13+
*,
14+
state_col: str = "state",
15+
bucket_col: str = "bucket",
16+
dtype=np.uint32,
17+
) -> np.ndarray:
18+
"""
19+
Absolute transition counts:
20+
C[b, i, j] = # of times state_t=i → state_{t+1}=j in bucket b
21+
Shape = (2 304, 10, 10).
22+
"""
23+
df = df.sort_values("timestamp")
24+
25+
s_t = df[state_col].to_numpy(dtype=int)[:-1]
26+
s_tp1 = df[state_col].to_numpy(dtype=int)[1:]
27+
buckets = df[bucket_col].to_numpy(dtype=int)[:-1]
28+
29+
counts = np.zeros((NUM_BUCKETS, N_STATES, N_STATES), dtype=dtype)
30+
np.add.at(counts, (buckets, s_t, s_tp1), 1)
31+
return counts

src/markov/transitions.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
from src.config import CONFIG
5+
6+
from ._core import _transition_counts
7+
8+
alpha = float(CONFIG["model"]["laplace_alpha"])
9+
10+
11+
def build_transition_matrices(df: pd.DataFrame, *, dtype=np.float32) -> np.ndarray:
12+
counts = _transition_counts(df, dtype=dtype)
13+
counts += alpha
14+
counts /= counts.sum(axis=2, keepdims=True)
15+
return counts.astype(dtype)

0 commit comments

Comments
 (0)