Initial commit

eliona-smart-building-assistant · Dec 19, 2024 · 6a07224 · 6a07224
commit 6a07224
Show file tree

Hide file tree

Showing 7 changed files with 291 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+keys.txt
diff --git a/__pycache__/get_trend_data.cpython-311.pyc b/__pycache__/get_trend_data.cpython-311.pyc
diff --git a/corelation.py b/corelation.py
@@ -0,0 +1,148 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pytz
+from datetime import datetime
+from get_trend_data import fetch_pandas_data
+
+
+def compute_cross_correlation(df, lags, frequency_label):
+    """
+    Compute cross-correlation for a given DataFrame and lags.
+    """
+    best_correlation_results = []
+    for col1 in numerical_columns:
+        for col2 in numerical_columns:
+            max_corr = 0
+            best_lag = 0
+            for lag in lags:
+                shifted_series = df[col2].shift(lag)
+                correlation = df[col1].corr(shifted_series)
+                if abs(correlation) > abs(max_corr):
+                    max_corr = correlation
+                    best_lag = lag
+            best_correlation_results.append(
+                {
+                    "Attribute 1": col1,
+                    "Attribute 2": col2,
+                    "Best Lag": best_lag,
+                    "Max Correlation": max_corr,
+                    "Frequency": frequency_label,
+                }
+            )
+    return pd.DataFrame(best_correlation_results)
+
+
+def plot_heatmap(results_df, frequency_label):
+    """
+    Plot heatmap for the results DataFrame.
+    """
+    heatmap_corr = pd.DataFrame(
+        index=numerical_columns, columns=numerical_columns, dtype=float
+    )
+    heatmap_lag = pd.DataFrame(
+        index=numerical_columns, columns=numerical_columns, dtype=int
+    )
+
+    # Fill heatmap data
+    for _, row in results_df.iterrows():
+        attr1 = row["Attribute 1"]
+        attr2 = row["Attribute 2"]
+        heatmap_corr.loc[attr1, attr2] = row["Max Correlation"]
+        heatmap_corr.loc[attr2, attr1] = row["Max Correlation"]  # Symmetric matrix
+        heatmap_lag.loc[attr1, attr2] = row["Best Lag"]
+        heatmap_lag.loc[attr2, attr1] = row["Best Lag"]  # Symmetric matrix
+
+    # Fill NaN values with 0 for correlations and lags
+    heatmap_corr = heatmap_corr.fillna(0)
+    heatmap_lag = heatmap_lag.fillna(0)
+
+    # Create annotation matrix with both correlation and lag
+    annotations = heatmap_corr.copy()
+    for i in annotations.index:
+        for j in annotations.columns:
+            corr_value = heatmap_corr.loc[i, j]
+            lag_value = heatmap_lag.loc[i, j]
+            annotations.loc[i, j] = f"{corr_value:.2f}\nLag: {int(lag_value)}"
+
+    # Plot heatmap
+    plt.figure(figsize=(12, 8))
+    sns.heatmap(
+        heatmap_corr,
+        annot=annotations,
+        fmt="",
+        cmap="coolwarm",
+        cbar=True,
+        linewidths=0.5,
+        vmin=-1,  # Minimum value for color scale
+        vmax=1,  # Maximum value for color scale
+    )
+    plt.title(f"Cross-Correlation Heatmap ({frequency_label})")
+    plt.show()
+
+
+# Fetch data
+tz = pytz.timezone("Europe/Berlin")
+start_date_str = "2024-1-1"
+start_date = tz.localize(datetime.strptime(start_date_str, "%Y-%m-%d"))
+end_date_str = "2025-1-1"
+end_date = tz.localize(datetime.strptime(end_date_str, "%Y-%m-%d"))
+df = fetch_pandas_data(1603, start_date, end_date)
+
+# Print initial data
+print("Initial Data:")
+print(df.head())
+
+# Combine rows with the same minute by grouping by rounded timestamps
+df["timestamp"] = pd.to_datetime(df["timestamp"])
+df["minute"] = df["timestamp"].dt.floor("T")  # Round to the nearest minute
+df = df.groupby("minute").mean()  # Average the values for each minute
+df = df.reset_index().rename(columns={"minute": "timestamp"})
+
+# Forward-fill missing values
+df = df.fillna(method="ffill")
+
+# Print cleaned data
+print("Cleaned Data:")
+print(df.head())
+
+# Select numerical attributes
+numerical_columns = df.select_dtypes(include=[np.number]).columns
+
+# Define time scales for analysis
+time_scales = [
+    {"label": "Minutes", "groupby": "T", "lags": range(-60, 61)},
+    {"label": "Hours", "groupby": "H", "lags": range(-24, 25)},
+    {"label": "Days", "groupby": "D", "lags": range(-30, 31)},
+    {"label": "Months", "groupby": "M", "lags": range(-12, 13)},
+]
+
+# Analyze for each time scale
+for scale in time_scales:
+    print(f"Analyzing for {scale['label']} scale...")
+    df_grouped = df.copy()
+
+    # Sicherstellen, dass keine doppelten Spaltennamen entstehen
+    df_grouped = df_grouped.loc[
+        :, ~df_grouped.columns.duplicated()
+    ]  # Entfernt doppelte Spalten
+
+    # Timestamp auf die entsprechende Zeitskala abrunden
+    df_grouped["time_unit"] = pd.to_datetime(df_grouped["timestamp"]).dt.floor(
+        scale["groupby"]
+    )
+
+    # Gruppieren nach der Zeitskala und Mittelwerte berechnen
+    df_grouped = df_grouped.groupby("time_unit").mean().reset_index()
+
+    # Fehlende Werte vorwärts füllen
+    df_grouped = df_grouped.ffill()
+
+    # Cross-Correlation berechnen
+    results = compute_cross_correlation(df_grouped, scale["lags"], scale["label"])
+    print(f"Results for {scale['label']} scale:")
+    print(results)
+
+    # Heatmap plotten
+    plot_heatmap(results, scale["label"])
diff --git a/get_all_attributes.py b/get_all_attributes.py
diff --git a/get_trend_data.py b/get_trend_data.py
@@ -0,0 +1,101 @@
+import pandas as pd
+import numpy as np
+from datetime import timedelta
+import eliona.api_client2
+from eliona.api_client2.rest import ApiException
+from eliona.api_client2.api.data_api import DataApi
+import os
+import logging
+import pytz
+from datetime import datetime
+
+# Initialize the logger
+logger = logging.getLogger(__name__)
+# Set up configuration for the Eliona API
+configuration = eliona.api_client2.Configuration(host=os.getenv("API_ENDPOINT"))
+configuration.api_key["ApiKeyAuth"] = os.getenv("API_TOKEN")
+
+# Create an instance of the API client
+api_client = eliona.api_client2.ApiClient(configuration)
+data_api = DataApi(api_client)
+
+
+def get_trend_data(asset_id, start_date, end_date):
+    asset_id = int(asset_id)
+    from_date = start_date.isoformat()
+    to_date = end_date.isoformat()
+    try:
+        logger.info(f"Fetching data for asset {asset_id} from {from_date} to {to_date}")
+        result = data_api.get_data_trends(
+            from_date=from_date,
+            to_date=to_date,
+            asset_id=asset_id,
+            data_subtype="input",
+        )
+        logger.info(f"Received {len(result)} data points")
+        return result
+    except ApiException as e:
+        logger.info(f"Exception when calling DataApi->get_data_trends: {e}")
+        return None
+
+
+def fetch_data_in_chunks(asset_id, start_date, end_date):
+    all_data = []
+    current_start = start_date
+    while current_start < end_date:
+        current_end = min(current_start + timedelta(days=5), end_date)
+        data_chunk = get_trend_data(asset_id, current_start, current_end)
+        if data_chunk:
+            all_data.extend(data_chunk)
+        current_start = current_end + timedelta(seconds=1)
+    return all_data
+
+
+def convert_to_pandas(data):
+    # Dictionary to hold the rows, using the timestamp as the key
+    formatted_data = {}
+
+    for entry in data:
+        # Extract timestamp and data
+        timestamp = entry.timestamp
+        data_dict = entry.data
+
+        # If this timestamp already exists, update the existing row
+        if timestamp in formatted_data:
+            formatted_data[timestamp].update(data_dict)
+        else:
+            # Create a new row for this timestamp
+            formatted_data[timestamp] = data_dict
+
+    # Convert the dictionary to a pandas DataFrame
+    df = pd.DataFrame.from_dict(formatted_data, orient="index")
+
+    # Set the index (timestamp) as a proper datetime index
+    df.index = pd.to_datetime(df.index, utc=True)
+
+    # Convert the index to the desired timezone (e.g., Europe/Berlin)
+    df.index = df.index.tz_convert("Europe/Berlin")
+
+    # **Optional: Sort the DataFrame by index (timestamp)**
+    df.sort_index(inplace=True)
+
+    # Reset index to have 'timestamp' as a column
+    df.reset_index(inplace=True)
+    df.rename(columns={"index": "timestamp"}, inplace=True)
+
+    return df
+
+
+def fetch_pandas_data(
+    asset_id,
+    start_date,
+    end_date,
+):
+    # Fetch all data without filtering by attributes
+    print(f"Fetching data for asset {asset_id} from {start_date} to {end_date}")
+    data = fetch_data_in_chunks(asset_id, start_date, end_date)
+
+    # Convert data to pandas DataFrame
+    df = convert_to_pandas(data)
+
+    return df
diff --git a/main.py b/main.py
@@ -0,0 +1,6 @@
+import os
+import uvicorn
+
+def start_api():
+    port = int(os.getenv("API_SERVER_PORT", 3000))
+    uvicorn.run("api.openapi:app", host="0.0.0.0", port=port)
diff --git a/register_app.py b/register_app.py
@@ -0,0 +1,35 @@
+import logging
+import os
+from eliona.api_client2 import (
+    AppsApi,
+    ApiClient,
+    Configuration,
+)
+
+
+host = os.getenv("API_ENDPOINT")
+api_key = os.getenv("API_TOKEN")
+
+
+configuration = Configuration(host=host)
+configuration.api_key["ApiKeyAuth"] = api_key
+api_client = ApiClient(configuration)
+
+
+apps_api = AppsApi(api_client)
+
+
+# Initialize the logger
+logger = logging.getLogger(__name__)
+
+
+def Initialize():
+
+    app = apps_api.get_app_by_name("correlations")
+
+    if not app.registered:
+        apps_api.patch_app_by_name("correlations", True)
+        logger.info("App 'correlations' registered.")
+
+    else:
+        logger.info("App 'correlations' already active.")