9
9
from src .markov .transitions import build_transition_matrices
10
10
from src .preprocessing .loader import load_timeseries
11
11
12
- SIM_DAYS = 3
12
+ SIM_DAYS = 10
13
+ PER_DAY = 96
13
14
14
15
15
16
def _detect_value_col (df : pd .DataFrame ) -> str :
16
- candidate_cols = ["x" , "value" , "load" , "power" , "p_norm" , "load_norm" ]
17
- for c in candidate_cols :
17
+ for c in ["x" , "value" , "load" , "power" , "p_norm" , "load_norm" ]:
18
18
if c in df .columns and np .issubdtype (df [c ].dtype , np .number ):
19
19
return c
20
- raise KeyError ("No numeric load column found – please inspect the dataframe. " )
20
+ raise KeyError ("numeric load column missing " )
21
21
22
22
23
23
def _simulate_series (
@@ -28,54 +28,25 @@ def _simulate_series(
28
28
periods : int ,
29
29
rng : np .random .Generator | None = None ,
30
30
) -> pd .DataFrame :
31
- """Generate synthetic 15‑min series (timestamp, state, x)."""
32
31
rng = np .random .default_rng () if rng is None else rng
33
- timestamps = pd .date_range (start_ts , periods = periods , freq = "15min" )
32
+ ts = pd .date_range (start_ts , periods = periods , freq = "15min" )
34
33
states = np .empty (periods , dtype = int )
35
34
xs = np .empty (periods , dtype = float )
36
35
37
36
s = start_state
38
- for i , ts in enumerate (timestamps ):
39
- b = bucket_id (ts )
37
+ for i , t in enumerate (ts ):
38
+ b = bucket_id (t )
40
39
s = rng .choice (probs .shape [1 ], p = probs [b , s ])
41
40
states [i ] = s
42
41
xs [i ] = sample_value (gmms , b , s , rng = rng )
43
- return pd .DataFrame ({"timestamp" : timestamps , "state" : states , "x_sim" : xs })
44
42
45
-
46
- def main () -> None :
47
- df = load_timeseries (normalize = True , discretize = True )
48
- if "bucket" not in df .columns :
49
- df = assign_buckets (df )
50
-
51
- value_col = _detect_value_col (df )
52
- print ("Using load column:" , value_col )
53
-
54
- counts = build_transition_counts (df )
55
- probs = build_transition_matrices (df )
56
-
57
- _plot_first_25_buckets (counts , probs )
58
-
59
- print ("Fitting GMMs … (this may take a moment)" )
60
- gmms = fit_gmms (df , value_col = value_col )
61
-
62
- periods = SIM_DAYS * 96
63
- sim_df = _simulate_series (
64
- probs ,
65
- gmms ,
66
- start_ts = df ["timestamp" ].min ().normalize (),
67
- start_state = int (df ["state" ].iloc [0 ]),
68
- periods = periods ,
69
- )
70
-
71
- _plot_simulation_diagnostics (df , sim_df , value_col )
43
+ return pd .DataFrame ({"timestamp" : ts , "state" : states , "x_sim" : xs })
72
44
73
45
74
46
def _plot_first_25_buckets (counts : np .ndarray , probs : np .ndarray ) -> None :
75
- """Heat‑map grid for buckets 0‑24."""
76
- buckets = list (range (25 ))
47
+ buckets = range (25 )
77
48
fig , axes = plt .subplots (5 , 5 , figsize = (15 , 15 ), sharex = True , sharey = True )
78
- vmax = probs [buckets ].max ()
49
+ vmax = probs [list ( buckets ) ].max ()
79
50
norm = Normalize (vmin = 0 , vmax = vmax )
80
51
81
52
for idx , b in enumerate (buckets ):
@@ -92,15 +63,15 @@ def _plot_first_25_buckets(counts: np.ndarray, probs: np.ndarray) -> None:
92
63
ax .axis ("off" )
93
64
94
65
fig .colorbar (im , ax = axes .ravel ().tolist (), shrink = 0.6 , label = "p" )
95
- fig .suptitle ("Transition probabilities – buckets 0‑ 24" , fontsize = 14 )
96
- fig .tight_layout (rect = [0 , 0 , 0.97 , 0.96 ])
66
+ fig .suptitle ("Transition probabilities – buckets 0– 24" , fontsize = 14 )
67
+ plt .tight_layout (rect = [0 , 0 , 0.97 , 0.96 ])
97
68
plt .show ()
98
69
99
70
100
71
def _plot_simulation_diagnostics (
101
72
df : pd .DataFrame , sim : pd .DataFrame , value_col : str
102
73
) -> None :
103
- first_day = sim .iloc [:96 ]
74
+ first_day = sim .iloc [:PER_DAY ]
104
75
plt .figure (figsize = (10 , 3 ))
105
76
plt .plot (first_day ["timestamp" ], first_day ["x_sim" ], marker = "." )
106
77
plt .title ("Simulated power – first day" )
@@ -121,13 +92,43 @@ def _plot_simulation_diagnostics(
121
92
sim ["hour" ] = sim ["timestamp" ].dt .hour
122
93
plt .figure (figsize = (10 , 4 ))
123
94
sim .boxplot (column = "x_sim" , by = "hour" , grid = False )
124
- plt .suptitle ("" )
125
95
plt .title ("Simulated power by hour of day" )
126
96
plt .xlabel ("hour of day" )
127
97
plt .ylabel ("normalised load x" )
128
98
plt .tight_layout ()
129
99
plt .show ()
130
100
131
101
102
+ def main () -> None :
103
+ df = load_timeseries (normalize = True , discretize = True )
104
+ if "bucket" not in df .columns :
105
+ df = assign_buckets (df )
106
+
107
+ val_col = _detect_value_col (df )
108
+
109
+ counts = build_transition_counts (df )
110
+ probs = build_transition_matrices (df )
111
+
112
+ _plot_first_25_buckets (counts , probs )
113
+
114
+ gmms = fit_gmms (
115
+ df ,
116
+ value_col = val_col ,
117
+ verbose = 1 ,
118
+ heartbeat_seconds = 60 ,
119
+ )
120
+
121
+ periods = SIM_DAYS * PER_DAY
122
+ sim = _simulate_series (
123
+ probs ,
124
+ gmms ,
125
+ start_ts = df ["timestamp" ].min ().normalize (),
126
+ start_state = int (df ["state" ].iloc [0 ]),
127
+ periods = periods ,
128
+ )
129
+
130
+ _plot_simulation_diagnostics (df , sim , val_col )
131
+
132
+
132
133
if __name__ == "__main__" :
133
134
main ()
0 commit comments