|
1 | 1 | import matplotlib.pyplot as plt
|
2 | 2 | import numpy as np
|
| 3 | +import pandas as pd |
| 4 | +from matplotlib.colors import Normalize |
3 | 5 |
|
| 6 | +from src.markov.buckets import assign_buckets, bucket_id |
| 7 | +from src.markov.gmm import fit_gmms, sample_value |
4 | 8 | from src.markov.transition_counts import build_transition_counts
|
5 | 9 | from src.markov.transitions import build_transition_matrices
|
6 | 10 | from src.preprocessing.loader import load_timeseries
|
7 | 11 |
|
8 |
| -df = load_timeseries(normalize=True, discretize=True) |
| 12 | +SIM_DAYS = 3 |
9 | 13 |
|
10 | 14 |
|
11 |
| -counts = build_transition_counts(df) |
12 |
| -probs = build_transition_matrices(df, alpha=1.0) |
| 15 | +def _detect_value_col(df: pd.DataFrame) -> str: |
| 16 | + candidate_cols = ["x", "value", "load", "power", "p_norm", "load_norm"] |
| 17 | + for c in candidate_cols: |
| 18 | + if c in df.columns and np.issubdtype(df[c].dtype, np.number): |
| 19 | + return c |
| 20 | + raise KeyError("No numeric load column found – please inspect the dataframe.") |
13 | 21 |
|
14 |
| -print("counts shape :", counts.shape) |
15 |
| -print("probs shape :", probs.shape) |
16 | 22 |
|
| 23 | +def _simulate_series( |
| 24 | + probs: np.ndarray, |
| 25 | + gmms, |
| 26 | + start_ts: pd.Timestamp, |
| 27 | + start_state: int, |
| 28 | + periods: int, |
| 29 | + rng: np.random.Generator | None = None, |
| 30 | +) -> pd.DataFrame: |
| 31 | + """Generate synthetic 15‑min series (timestamp, state, x).""" |
| 32 | + rng = np.random.default_rng() if rng is None else rng |
| 33 | + timestamps = pd.date_range(start_ts, periods=periods, freq="15min") |
| 34 | + states = np.empty(periods, dtype=int) |
| 35 | + xs = np.empty(periods, dtype=float) |
17 | 36 |
|
18 |
| -active_buckets = np.where(counts.sum(axis=(1, 2)) > 0)[0] |
19 |
| -bucket = int(active_buckets[0]) if active_buckets.size else 0 |
20 |
| -print(f"\nUsing bucket {bucket}") |
| 37 | + s = start_state |
| 38 | + for i, ts in enumerate(timestamps): |
| 39 | + b = bucket_id(ts) |
| 40 | + s = rng.choice(probs.shape[1], p=probs[b, s]) |
| 41 | + states[i] = s |
| 42 | + xs[i] = sample_value(gmms, b, s, rng=rng) |
| 43 | + return pd.DataFrame({"timestamp": timestamps, "state": states, "x_sim": xs}) |
21 | 44 |
|
22 | 45 |
|
23 |
| -print("row sums :", probs[bucket].sum(axis=1)) |
| 46 | +def main() -> None: |
| 47 | + df = load_timeseries(normalize=True, discretize=True) |
| 48 | + if "bucket" not in df.columns: |
| 49 | + df = assign_buckets(df) |
24 | 50 |
|
25 |
| -plt.imshow(probs[bucket], aspect="auto") |
26 |
| -plt.title(f"Bucket {bucket} – transition probabilities") |
27 |
| -plt.xlabel("state t+1") |
28 |
| -plt.ylabel("state t") |
29 |
| -plt.colorbar() |
30 |
| -plt.tight_layout() |
31 |
| -plt.show() |
| 51 | + value_col = _detect_value_col(df) |
| 52 | + print("Using load column:", value_col) |
| 53 | + |
| 54 | + counts = build_transition_counts(df) |
| 55 | + probs = build_transition_matrices(df) |
| 56 | + |
| 57 | + _plot_first_25_buckets(counts, probs) |
| 58 | + |
| 59 | + print("Fitting GMMs … (this may take a moment)") |
| 60 | + gmms = fit_gmms(df, value_col=value_col) |
| 61 | + |
| 62 | + periods = SIM_DAYS * 96 |
| 63 | + sim_df = _simulate_series( |
| 64 | + probs, |
| 65 | + gmms, |
| 66 | + start_ts=df["timestamp"].min().normalize(), |
| 67 | + start_state=int(df["state"].iloc[0]), |
| 68 | + periods=periods, |
| 69 | + ) |
| 70 | + |
| 71 | + _plot_simulation_diagnostics(df, sim_df, value_col) |
| 72 | + |
| 73 | + |
| 74 | +def _plot_first_25_buckets(counts: np.ndarray, probs: np.ndarray) -> None: |
| 75 | + """Heat‑map grid for buckets 0‑24.""" |
| 76 | + buckets = list(range(25)) |
| 77 | + fig, axes = plt.subplots(5, 5, figsize=(15, 15), sharex=True, sharey=True) |
| 78 | + vmax = probs[buckets].max() |
| 79 | + norm = Normalize(vmin=0, vmax=vmax) |
| 80 | + |
| 81 | + for idx, b in enumerate(buckets): |
| 82 | + ax = axes.flat[idx] |
| 83 | + if counts[b].sum() == 0: |
| 84 | + ax.axis("off") |
| 85 | + continue |
| 86 | + im = ax.imshow(probs[b], aspect="auto", origin="lower", norm=norm) |
| 87 | + ax.set_title(f"Bucket {b}", fontsize=8) |
| 88 | + ax.set_xticks([]) |
| 89 | + ax.set_yticks([]) |
| 90 | + |
| 91 | + for ax in axes.flat[len(buckets) :]: |
| 92 | + ax.axis("off") |
| 93 | + |
| 94 | + fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.6, label="p") |
| 95 | + fig.suptitle("Transition probabilities – buckets 0‑24", fontsize=14) |
| 96 | + fig.tight_layout(rect=[0, 0, 0.97, 0.96]) |
| 97 | + plt.show() |
| 98 | + |
| 99 | + |
| 100 | +def _plot_simulation_diagnostics( |
| 101 | + df: pd.DataFrame, sim: pd.DataFrame, value_col: str |
| 102 | +) -> None: |
| 103 | + first_day = sim.iloc[:96] |
| 104 | + plt.figure(figsize=(10, 3)) |
| 105 | + plt.plot(first_day["timestamp"], first_day["x_sim"], marker=".") |
| 106 | + plt.title("Simulated power – first day") |
| 107 | + plt.ylabel("normalised load x") |
| 108 | + plt.tight_layout() |
| 109 | + plt.show() |
| 110 | + |
| 111 | + plt.figure(figsize=(6, 4)) |
| 112 | + plt.hist(df[value_col], bins=50, alpha=0.6, density=True, label="original") |
| 113 | + plt.hist(sim["x_sim"], bins=50, alpha=0.6, density=True, label="simulated") |
| 114 | + plt.title("Original vs simulated load distribution") |
| 115 | + plt.xlabel("normalised load x") |
| 116 | + plt.ylabel("density") |
| 117 | + plt.legend() |
| 118 | + plt.tight_layout() |
| 119 | + plt.show() |
| 120 | + |
| 121 | + sim["hour"] = sim["timestamp"].dt.hour |
| 122 | + plt.figure(figsize=(10, 4)) |
| 123 | + sim.boxplot(column="x_sim", by="hour", grid=False) |
| 124 | + plt.suptitle("") |
| 125 | + plt.title("Simulated power by hour of day") |
| 126 | + plt.xlabel("hour of day") |
| 127 | + plt.ylabel("normalised load x") |
| 128 | + plt.tight_layout() |
| 129 | + plt.show() |
| 130 | + |
| 131 | + |
| 132 | +if __name__ == "__main__": |
| 133 | + main() |
0 commit comments