Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
sasax7 committed Dec 19, 2024
0 parents commit 6a07224
Show file tree
Hide file tree
Showing 7 changed files with 291 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
keys.txt
Binary file added __pycache__/get_trend_data.cpython-311.pyc
Binary file not shown.
148 changes: 148 additions & 0 deletions corelation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pytz
from datetime import datetime
from get_trend_data import fetch_pandas_data


def compute_cross_correlation(df, lags, frequency_label):
"""
Compute cross-correlation for a given DataFrame and lags.
"""
best_correlation_results = []
for col1 in numerical_columns:
for col2 in numerical_columns:
max_corr = 0
best_lag = 0
for lag in lags:
shifted_series = df[col2].shift(lag)
correlation = df[col1].corr(shifted_series)
if abs(correlation) > abs(max_corr):
max_corr = correlation
best_lag = lag
best_correlation_results.append(
{
"Attribute 1": col1,
"Attribute 2": col2,
"Best Lag": best_lag,
"Max Correlation": max_corr,
"Frequency": frequency_label,
}
)
return pd.DataFrame(best_correlation_results)


def plot_heatmap(results_df, frequency_label):
"""
Plot heatmap for the results DataFrame.
"""
heatmap_corr = pd.DataFrame(
index=numerical_columns, columns=numerical_columns, dtype=float
)
heatmap_lag = pd.DataFrame(
index=numerical_columns, columns=numerical_columns, dtype=int
)

# Fill heatmap data
for _, row in results_df.iterrows():
attr1 = row["Attribute 1"]
attr2 = row["Attribute 2"]
heatmap_corr.loc[attr1, attr2] = row["Max Correlation"]
heatmap_corr.loc[attr2, attr1] = row["Max Correlation"] # Symmetric matrix
heatmap_lag.loc[attr1, attr2] = row["Best Lag"]
heatmap_lag.loc[attr2, attr1] = row["Best Lag"] # Symmetric matrix

# Fill NaN values with 0 for correlations and lags
heatmap_corr = heatmap_corr.fillna(0)
heatmap_lag = heatmap_lag.fillna(0)

# Create annotation matrix with both correlation and lag
annotations = heatmap_corr.copy()
for i in annotations.index:
for j in annotations.columns:
corr_value = heatmap_corr.loc[i, j]
lag_value = heatmap_lag.loc[i, j]
annotations.loc[i, j] = f"{corr_value:.2f}\nLag: {int(lag_value)}"

# Plot heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(
heatmap_corr,
annot=annotations,
fmt="",
cmap="coolwarm",
cbar=True,
linewidths=0.5,
vmin=-1, # Minimum value for color scale
vmax=1, # Maximum value for color scale
)
plt.title(f"Cross-Correlation Heatmap ({frequency_label})")
plt.show()


# Fetch data
tz = pytz.timezone("Europe/Berlin")
start_date_str = "2024-1-1"
start_date = tz.localize(datetime.strptime(start_date_str, "%Y-%m-%d"))
end_date_str = "2025-1-1"
end_date = tz.localize(datetime.strptime(end_date_str, "%Y-%m-%d"))
df = fetch_pandas_data(1603, start_date, end_date)

# Print initial data
print("Initial Data:")
print(df.head())

# Combine rows with the same minute by grouping by rounded timestamps
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["minute"] = df["timestamp"].dt.floor("T") # Round to the nearest minute
df = df.groupby("minute").mean() # Average the values for each minute
df = df.reset_index().rename(columns={"minute": "timestamp"})

# Forward-fill missing values
df = df.fillna(method="ffill")

# Print cleaned data
print("Cleaned Data:")
print(df.head())

# Select numerical attributes
numerical_columns = df.select_dtypes(include=[np.number]).columns

# Define time scales for analysis
time_scales = [
{"label": "Minutes", "groupby": "T", "lags": range(-60, 61)},
{"label": "Hours", "groupby": "H", "lags": range(-24, 25)},
{"label": "Days", "groupby": "D", "lags": range(-30, 31)},
{"label": "Months", "groupby": "M", "lags": range(-12, 13)},
]

# Analyze for each time scale
for scale in time_scales:
print(f"Analyzing for {scale['label']} scale...")
df_grouped = df.copy()

# Sicherstellen, dass keine doppelten Spaltennamen entstehen
df_grouped = df_grouped.loc[
:, ~df_grouped.columns.duplicated()
] # Entfernt doppelte Spalten

# Timestamp auf die entsprechende Zeitskala abrunden
df_grouped["time_unit"] = pd.to_datetime(df_grouped["timestamp"]).dt.floor(
scale["groupby"]
)

# Gruppieren nach der Zeitskala und Mittelwerte berechnen
df_grouped = df_grouped.groupby("time_unit").mean().reset_index()

# Fehlende Werte vorwärts füllen
df_grouped = df_grouped.ffill()

# Cross-Correlation berechnen
results = compute_cross_correlation(df_grouped, scale["lags"], scale["label"])
print(f"Results for {scale['label']} scale:")
print(results)

# Heatmap plotten
plot_heatmap(results, scale["label"])
Empty file added get_all_attributes.py
Empty file.
101 changes: 101 additions & 0 deletions get_trend_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import pandas as pd
import numpy as np
from datetime import timedelta
import eliona.api_client2
from eliona.api_client2.rest import ApiException
from eliona.api_client2.api.data_api import DataApi
import os
import logging
import pytz
from datetime import datetime

# Initialize the logger
logger = logging.getLogger(__name__)
# Set up configuration for the Eliona API
configuration = eliona.api_client2.Configuration(host=os.getenv("API_ENDPOINT"))
configuration.api_key["ApiKeyAuth"] = os.getenv("API_TOKEN")

# Create an instance of the API client
api_client = eliona.api_client2.ApiClient(configuration)
data_api = DataApi(api_client)


def get_trend_data(asset_id, start_date, end_date):
asset_id = int(asset_id)
from_date = start_date.isoformat()
to_date = end_date.isoformat()
try:
logger.info(f"Fetching data for asset {asset_id} from {from_date} to {to_date}")
result = data_api.get_data_trends(
from_date=from_date,
to_date=to_date,
asset_id=asset_id,
data_subtype="input",
)
logger.info(f"Received {len(result)} data points")
return result
except ApiException as e:
logger.info(f"Exception when calling DataApi->get_data_trends: {e}")
return None


def fetch_data_in_chunks(asset_id, start_date, end_date):
all_data = []
current_start = start_date
while current_start < end_date:
current_end = min(current_start + timedelta(days=5), end_date)
data_chunk = get_trend_data(asset_id, current_start, current_end)
if data_chunk:
all_data.extend(data_chunk)
current_start = current_end + timedelta(seconds=1)
return all_data


def convert_to_pandas(data):
# Dictionary to hold the rows, using the timestamp as the key
formatted_data = {}

for entry in data:
# Extract timestamp and data
timestamp = entry.timestamp
data_dict = entry.data

# If this timestamp already exists, update the existing row
if timestamp in formatted_data:
formatted_data[timestamp].update(data_dict)
else:
# Create a new row for this timestamp
formatted_data[timestamp] = data_dict

# Convert the dictionary to a pandas DataFrame
df = pd.DataFrame.from_dict(formatted_data, orient="index")

# Set the index (timestamp) as a proper datetime index
df.index = pd.to_datetime(df.index, utc=True)

# Convert the index to the desired timezone (e.g., Europe/Berlin)
df.index = df.index.tz_convert("Europe/Berlin")

# **Optional: Sort the DataFrame by index (timestamp)**
df.sort_index(inplace=True)

# Reset index to have 'timestamp' as a column
df.reset_index(inplace=True)
df.rename(columns={"index": "timestamp"}, inplace=True)

return df


def fetch_pandas_data(
asset_id,
start_date,
end_date,
):
# Fetch all data without filtering by attributes
print(f"Fetching data for asset {asset_id} from {start_date} to {end_date}")
data = fetch_data_in_chunks(asset_id, start_date, end_date)

# Convert data to pandas DataFrame
df = convert_to_pandas(data)

return df
6 changes: 6 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os
import uvicorn

def start_api():
port = int(os.getenv("API_SERVER_PORT", 3000))
uvicorn.run("api.openapi:app", host="0.0.0.0", port=port)
35 changes: 35 additions & 0 deletions register_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import logging
import os
from eliona.api_client2 import (
AppsApi,
ApiClient,
Configuration,
)


host = os.getenv("API_ENDPOINT")
api_key = os.getenv("API_TOKEN")


configuration = Configuration(host=host)
configuration.api_key["ApiKeyAuth"] = api_key
api_client = ApiClient(configuration)


apps_api = AppsApi(api_client)


# Initialize the logger
logger = logging.getLogger(__name__)


def Initialize():

app = apps_api.get_app_by_name("correlations")

if not app.registered:
apps_api.patch_app_by_name("correlations", True)
logger.info("App 'correlations' registered.")

else:
logger.info("App 'correlations' already active.")

0 comments on commit 6a07224

Please sign in to comment.