diskretisieren der Daten und plotten

PhilippSchmelter · PhilippSchmelter · commit 139c26bf8c0d · 2025-04-29T00:43:02.000+02:00
diff --git a/README.md b/README.md
@@ -39,6 +39,8 @@ poetry install
 # Install and activate the pre-commit hooks
 poetry run setup
 ```
+#### Alternative
+Check the format by hand `poetry run pre-commit run --all-files`
 
 ## ▶️ Usage
 
diff --git a/src/main.py b/src/main.py
@@ -1,12 +1,30 @@
+import matplotlib.pyplot as plt
+
 from src.preprocessing.loader import load_raw_timeseries
-from src.preprocessing.scaling import normalize_power
+from src.preprocessing.scaling import discretize_power, normalize_power
+
+
+def plot_state_distribution(df):
+
+    counts = df["state"].value_counts().sort_index()
+
+    plt.figure()
+    plt.bar(counts.index, counts.values)
+    plt.xlabel("State")
+    plt.ylabel("Anzahl Einträge")
+    plt.title("Verteilung der Einträge nach State")
+    plt.xticks(counts.index)
+    plt.show()
 
 
 def main():
     df = load_raw_timeseries()
     print(df)
-    df = normalize_power(df, col="power")
+    df = normalize_power(df)
+    print(df)
+    df = discretize_power(df)
     print(df)
+    plot_state_distribution(df)
 
 
 if __name__ == "__main__":
diff --git a/src/preprocessing/scaling.py b/src/preprocessing/scaling.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 
 
@@ -16,3 +17,18 @@ def normalize_power(
 
     df[col] = (df[col] - p_min) / denom
     return df
+
+
+def discretize_power(
+    df: pd.DataFrame,
+    *,
+    col: str = "power",
+    state_col: str = "state",
+) -> pd.DataFrame:
+    taus = np.array([(k / 10) ** 2 for k in range(1, 10)], dtype=float)
+
+    values = df[col].to_numpy()
+    states = np.searchsorted(taus, values, side="right")
+
+    df[state_col] = states
+    return df