diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..90cfd59 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +keys.txt diff --git a/__pycache__/get_trend_data.cpython-311.pyc b/__pycache__/get_trend_data.cpython-311.pyc new file mode 100644 index 0000000..2d6fb9c Binary files /dev/null and b/__pycache__/get_trend_data.cpython-311.pyc differ diff --git a/corelation.py b/corelation.py new file mode 100644 index 0000000..767db23 --- /dev/null +++ b/corelation.py @@ -0,0 +1,148 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import pytz +from datetime import datetime +from get_trend_data import fetch_pandas_data + + +def compute_cross_correlation(df, lags, frequency_label): + """ + Compute cross-correlation for a given DataFrame and lags. + """ + best_correlation_results = [] + for col1 in numerical_columns: + for col2 in numerical_columns: + max_corr = 0 + best_lag = 0 + for lag in lags: + shifted_series = df[col2].shift(lag) + correlation = df[col1].corr(shifted_series) + if abs(correlation) > abs(max_corr): + max_corr = correlation + best_lag = lag + best_correlation_results.append( + { + "Attribute 1": col1, + "Attribute 2": col2, + "Best Lag": best_lag, + "Max Correlation": max_corr, + "Frequency": frequency_label, + } + ) + return pd.DataFrame(best_correlation_results) + + +def plot_heatmap(results_df, frequency_label): + """ + Plot heatmap for the results DataFrame. + """ + heatmap_corr = pd.DataFrame( + index=numerical_columns, columns=numerical_columns, dtype=float + ) + heatmap_lag = pd.DataFrame( + index=numerical_columns, columns=numerical_columns, dtype=int + ) + + # Fill heatmap data + for _, row in results_df.iterrows(): + attr1 = row["Attribute 1"] + attr2 = row["Attribute 2"] + heatmap_corr.loc[attr1, attr2] = row["Max Correlation"] + heatmap_corr.loc[attr2, attr1] = row["Max Correlation"] # Symmetric matrix + heatmap_lag.loc[attr1, attr2] = row["Best Lag"] + heatmap_lag.loc[attr2, attr1] = row["Best Lag"] # Symmetric matrix + + # Fill NaN values with 0 for correlations and lags + heatmap_corr = heatmap_corr.fillna(0) + heatmap_lag = heatmap_lag.fillna(0) + + # Create annotation matrix with both correlation and lag + annotations = heatmap_corr.copy() + for i in annotations.index: + for j in annotations.columns: + corr_value = heatmap_corr.loc[i, j] + lag_value = heatmap_lag.loc[i, j] + annotations.loc[i, j] = f"{corr_value:.2f}\nLag: {int(lag_value)}" + + # Plot heatmap + plt.figure(figsize=(12, 8)) + sns.heatmap( + heatmap_corr, + annot=annotations, + fmt="", + cmap="coolwarm", + cbar=True, + linewidths=0.5, + vmin=-1, # Minimum value for color scale + vmax=1, # Maximum value for color scale + ) + plt.title(f"Cross-Correlation Heatmap ({frequency_label})") + plt.show() + + +# Fetch data +tz = pytz.timezone("Europe/Berlin") +start_date_str = "2024-1-1" +start_date = tz.localize(datetime.strptime(start_date_str, "%Y-%m-%d")) +end_date_str = "2025-1-1" +end_date = tz.localize(datetime.strptime(end_date_str, "%Y-%m-%d")) +df = fetch_pandas_data(1603, start_date, end_date) + +# Print initial data +print("Initial Data:") +print(df.head()) + +# Combine rows with the same minute by grouping by rounded timestamps +df["timestamp"] = pd.to_datetime(df["timestamp"]) +df["minute"] = df["timestamp"].dt.floor("T") # Round to the nearest minute +df = df.groupby("minute").mean() # Average the values for each minute +df = df.reset_index().rename(columns={"minute": "timestamp"}) + +# Forward-fill missing values +df = df.fillna(method="ffill") + +# Print cleaned data +print("Cleaned Data:") +print(df.head()) + +# Select numerical attributes +numerical_columns = df.select_dtypes(include=[np.number]).columns + +# Define time scales for analysis +time_scales = [ + {"label": "Minutes", "groupby": "T", "lags": range(-60, 61)}, + {"label": "Hours", "groupby": "H", "lags": range(-24, 25)}, + {"label": "Days", "groupby": "D", "lags": range(-30, 31)}, + {"label": "Months", "groupby": "M", "lags": range(-12, 13)}, +] + +# Analyze for each time scale +for scale in time_scales: + print(f"Analyzing for {scale['label']} scale...") + df_grouped = df.copy() + + # Sicherstellen, dass keine doppelten Spaltennamen entstehen + df_grouped = df_grouped.loc[ + :, ~df_grouped.columns.duplicated() + ] # Entfernt doppelte Spalten + + # Timestamp auf die entsprechende Zeitskala abrunden + df_grouped["time_unit"] = pd.to_datetime(df_grouped["timestamp"]).dt.floor( + scale["groupby"] + ) + + # Gruppieren nach der Zeitskala und Mittelwerte berechnen + df_grouped = df_grouped.groupby("time_unit").mean().reset_index() + + # Fehlende Werte vorwärts füllen + df_grouped = df_grouped.ffill() + + # Cross-Correlation berechnen + results = compute_cross_correlation(df_grouped, scale["lags"], scale["label"]) + print(f"Results for {scale['label']} scale:") + print(results) + + # Heatmap plotten + plot_heatmap(results, scale["label"]) diff --git a/get_all_attributes.py b/get_all_attributes.py new file mode 100644 index 0000000..e69de29 diff --git a/get_trend_data.py b/get_trend_data.py new file mode 100644 index 0000000..9e0f709 --- /dev/null +++ b/get_trend_data.py @@ -0,0 +1,101 @@ +import pandas as pd +import numpy as np +from datetime import timedelta +import eliona.api_client2 +from eliona.api_client2.rest import ApiException +from eliona.api_client2.api.data_api import DataApi +import os +import logging +import pytz +from datetime import datetime + +# Initialize the logger +logger = logging.getLogger(__name__) +# Set up configuration for the Eliona API +configuration = eliona.api_client2.Configuration(host=os.getenv("API_ENDPOINT")) +configuration.api_key["ApiKeyAuth"] = os.getenv("API_TOKEN") + +# Create an instance of the API client +api_client = eliona.api_client2.ApiClient(configuration) +data_api = DataApi(api_client) + + +def get_trend_data(asset_id, start_date, end_date): + asset_id = int(asset_id) + from_date = start_date.isoformat() + to_date = end_date.isoformat() + try: + logger.info(f"Fetching data for asset {asset_id} from {from_date} to {to_date}") + result = data_api.get_data_trends( + from_date=from_date, + to_date=to_date, + asset_id=asset_id, + data_subtype="input", + ) + logger.info(f"Received {len(result)} data points") + return result + except ApiException as e: + logger.info(f"Exception when calling DataApi->get_data_trends: {e}") + return None + + +def fetch_data_in_chunks(asset_id, start_date, end_date): + all_data = [] + current_start = start_date + while current_start < end_date: + current_end = min(current_start + timedelta(days=5), end_date) + data_chunk = get_trend_data(asset_id, current_start, current_end) + if data_chunk: + all_data.extend(data_chunk) + current_start = current_end + timedelta(seconds=1) + return all_data + + +def convert_to_pandas(data): + # Dictionary to hold the rows, using the timestamp as the key + formatted_data = {} + + for entry in data: + # Extract timestamp and data + timestamp = entry.timestamp + data_dict = entry.data + + # If this timestamp already exists, update the existing row + if timestamp in formatted_data: + formatted_data[timestamp].update(data_dict) + else: + # Create a new row for this timestamp + formatted_data[timestamp] = data_dict + + # Convert the dictionary to a pandas DataFrame + df = pd.DataFrame.from_dict(formatted_data, orient="index") + + # Set the index (timestamp) as a proper datetime index + df.index = pd.to_datetime(df.index, utc=True) + + # Convert the index to the desired timezone (e.g., Europe/Berlin) + df.index = df.index.tz_convert("Europe/Berlin") + + # **Optional: Sort the DataFrame by index (timestamp)** + df.sort_index(inplace=True) + + # Reset index to have 'timestamp' as a column + df.reset_index(inplace=True) + df.rename(columns={"index": "timestamp"}, inplace=True) + + return df + + +def fetch_pandas_data( + asset_id, + start_date, + end_date, +): + # Fetch all data without filtering by attributes + print(f"Fetching data for asset {asset_id} from {start_date} to {end_date}") + data = fetch_data_in_chunks(asset_id, start_date, end_date) + + # Convert data to pandas DataFrame + df = convert_to_pandas(data) + + return df diff --git a/main.py b/main.py new file mode 100644 index 0000000..9b8ef00 --- /dev/null +++ b/main.py @@ -0,0 +1,6 @@ +import os +import uvicorn + +def start_api(): + port = int(os.getenv("API_SERVER_PORT", 3000)) + uvicorn.run("api.openapi:app", host="0.0.0.0", port=port) diff --git a/register_app.py b/register_app.py new file mode 100644 index 0000000..5a9786a --- /dev/null +++ b/register_app.py @@ -0,0 +1,35 @@ +import logging +import os +from eliona.api_client2 import ( + AppsApi, + ApiClient, + Configuration, +) + + +host = os.getenv("API_ENDPOINT") +api_key = os.getenv("API_TOKEN") + + +configuration = Configuration(host=host) +configuration.api_key["ApiKeyAuth"] = api_key +api_client = ApiClient(configuration) + + +apps_api = AppsApi(api_client) + + +# Initialize the logger +logger = logging.getLogger(__name__) + + +def Initialize(): + + app = apps_api.get_app_by_name("correlations") + + if not app.registered: + apps_api.patch_app_by_name("correlations", True) + logger.info("App 'correlations' registered.") + + else: + logger.info("App 'correlations' already active.")