From 64415c8c0b6373bcefa8d76fce82410ba2ed013f Mon Sep 17 00:00:00 2001 From: sasax7 Date: Fri, 3 Jan 2025 18:00:26 +0100 Subject: [PATCH] done creating report and sending as email Stash changes before rewriting history --- api/correlation.py | 2 +- get_trend_data.py => api/get_trend_data.py | 4 +- api/models.py | 5 +- api/openapi.py | 129 +++++++++++++-- api/pdf_template.py | 179 +++++++++++++++++++++ api/plot_correlation.py | 5 +- api/sendEmail.py | 71 ++++++++ get_all_attributes.py | 0 main.py | 2 +- requirements.txt | 3 +- 10 files changed, 377 insertions(+), 23 deletions(-) rename get_trend_data.py => api/get_trend_data.py (98%) create mode 100644 api/pdf_template.py create mode 100644 api/sendEmail.py delete mode 100644 get_all_attributes.py diff --git a/api/correlation.py b/api/correlation.py index 382b983..aa03ddb 100644 --- a/api/correlation.py +++ b/api/correlation.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, ConfigDict import pytz -from get_trend_data import fetch_pandas_data +from api.get_trend_data import fetch_pandas_data from api.models import CorrelationRequest, LagUnit diff --git a/get_trend_data.py b/api/get_trend_data.py similarity index 98% rename from get_trend_data.py rename to api/get_trend_data.py index 3cb023e..de7725b 100644 --- a/get_trend_data.py +++ b/api/get_trend_data.py @@ -1,5 +1,4 @@ import pandas as pd -import numpy as np from datetime import timedelta import eliona.api_client2 from eliona.api_client2.rest import ApiException @@ -7,8 +6,7 @@ from eliona.api_client2.api.assets_api import AssetsApi import os import logging -import pytz -from datetime import datetime + from api.models import AssetAttribute # Initialize the logger diff --git a/api/models.py b/api/models.py index 071aa11..cdc719b 100644 --- a/api/models.py +++ b/api/models.py @@ -1,8 +1,7 @@ -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel from typing import List, Dict, Optional from enum import Enum from datetime import datetime -import pandas as pd class LagUnit(str, Enum): @@ -24,6 +23,7 @@ class CorrelationRequest(BaseModel): lags: Optional[List[Dict[LagUnit, int]]] = None start_time: Optional[datetime] = None end_time: Optional[datetime] = None + to_email: Optional[str] = None class CorrelateChildrenRequest(BaseModel): @@ -31,6 +31,7 @@ class CorrelateChildrenRequest(BaseModel): lags: Optional[List[Dict[LagUnit, int]]] = None start_time: Optional[datetime] = None end_time: Optional[datetime] = None + to_email: Optional[str] = None class CorrelationResult(BaseModel): diff --git a/api/openapi.py b/api/openapi.py index 624566e..adcddf0 100644 --- a/api/openapi.py +++ b/api/openapi.py @@ -3,15 +3,17 @@ from datetime import datetime import pytz import yaml -from api.models import CorrelationRequest, CorrelateChildrenRequest, AssetAttribute +from api.models import CorrelationRequest, CorrelateChildrenRequest from api.correlation import get_data, compute_correlation from api.plot_correlation import ( create_best_correlation_heatmap, in_depth_plot_scatter, plot_lag_correlations, ) -from get_trend_data import get_all_asset_children - +from api.get_trend_data import get_all_asset_children +from api.pdf_template import create_pdf +from fastapi.responses import FileResponse +from api.sendEmail import send_evaluation_report_as_mail # Create the FastAPI app instance app = FastAPI( @@ -42,18 +44,40 @@ def correlate_assets(request: CorrelationRequest): dataframes = get_data(request) correlations = compute_correlation(dataframes, request) create_best_correlation_heatmap(correlations) + + include_heatmap: bool = True + include_scatter: bool = False + include_lag_plots: bool = False + include_details: bool = True + + pdf_file_path = "/tmp/correlation_report.pdf" + create_pdf( + request.start_time, + request.end_time, + pdf_file_path, + correlations, + include_heatmap, + include_scatter, + include_lag_plots, + include_details, + ) + html_file_path = "/tmp/report.html" + with open(html_file_path, "r", encoding="utf-8") as html_file: + html_content = html_file.read() + if request.to_email: + send_evaluation_report_as_mail(pdf_file_path, request.to_email) return { "assets": request.assets, "lags": request.lags, "start_time": request.start_time, "end_time": end_time, "correlation": correlations, + "report_html": html_content, } @app.post("/v1/correlate-children") def correlate_asset_children(request: CorrelateChildrenRequest): - end_time = request.end_time or datetime.now() child_asset_ids = get_all_asset_children(request.asset_id) print(f"Found {len(child_asset_ids)} children for asset {request.asset_id}") correlation_request = CorrelationRequest( @@ -61,16 +85,20 @@ def correlate_asset_children(request: CorrelateChildrenRequest): lags=request.lags, start_time=request.start_time, end_time=request.end_time, + to_email=request.to_email, ) - correlations = correlate_assets(correlation_request) + response = correlate_assets(correlation_request) + correlations = response["correlation"] + html_content = response["report_html"] return { "assets": child_asset_ids, "lags": request.lags, "start_time": request.start_time, - "end_time": end_time, + "end_time": request.end_time, "correlation": correlations, + "report_html": html_content, } @@ -95,7 +123,9 @@ def in_depth_correlation(request: CorrelationRequest): correlations = compute_correlation(df_infos, request) - lag_plots = plot_lag_correlations(correlations, output_dir="/tmp/lag_plots") + lag_plot_filenames = plot_lag_correlations( + correlations, output_dir="/tmp/lag_plots" + ) try: scatter_result = in_depth_plot_scatter( @@ -103,16 +133,89 @@ def in_depth_correlation(request: CorrelationRequest): ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) + include_heatmap: bool = False + include_scatter: bool = True + include_lag_plots: bool = True + include_details: bool = True + + pdf_file_path = "/tmp/correlation_report.pdf" + create_pdf( + request.start_time, + request.end_time, + pdf_file_path, + correlations, + include_heatmap, + include_scatter, + include_lag_plots, + include_details, + lag_plot_filenames, + ) + html_file_path = "/tmp/report.html" + with open(html_file_path, "r", encoding="utf-8") as html_file: + html_content = html_file.read() - end_time = request.end_time or datetime.now(pytz.timezone("Europe/Berlin")) - + if request.to_email: + send_evaluation_report_as_mail(pdf_file_path, request.to_email) return { "assets": request.assets, "lags": request.lags, "start_time": request.start_time, - "end_time": end_time, + "end_time": request.end_time, "correlation": correlations, - "scatter_plot": scatter_result["plot_base64_png"], - "columns": scatter_result["columns"], - "lag_plots": lag_plots, + "scatter_result_columns": scatter_result["columns"], + "report_html": html_content, } + + +@app.post("/v1/generate-report") +def generate_report(request: CorrelationRequest): + """ + Generate a PDF report for the correlation analysis. + """ + include_heatmap: bool = False + include_scatter: bool = True + include_lag_plots: bool = True + include_details: bool = True + if len(request.assets) != 2: + raise HTTPException(status_code=400, detail="Exactly two assets are required.") + + # 1) Fetch data + df_infos = get_data(request) + if len(df_infos) != 2: + raise HTTPException( + status_code=400, + detail="Could not retrieve data for both assets/attributes. Check logs.", + ) + + correlations = compute_correlation(df_infos, request) + + lag_plot_filenames = plot_lag_correlations( + correlations, output_dir="/tmp/lag_plots" + ) + + try: + scatter_result = in_depth_plot_scatter( + df_infos, output_file="/tmp/in_depth_scatter.png" + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + end_time = request.end_time or datetime.now(pytz.timezone("Europe/Berlin")) + + pdf_file_path = "/tmp/correlation_report.pdf" + create_pdf( + request.start_time, + request.end_time, + pdf_file_path, + correlations, + include_heatmap, + include_scatter, + include_lag_plots, + include_details, + lag_plot_filenames, + ) + if request.to_email: + send_evaluation_report_as_mail(pdf_file_path, request.to_email) + return FileResponse( + pdf_file_path, media_type="application/pdf", filename="correlation_report.pdf" + ) diff --git a/api/pdf_template.py b/api/pdf_template.py new file mode 100644 index 0000000..ec5a2f8 --- /dev/null +++ b/api/pdf_template.py @@ -0,0 +1,179 @@ +import os +import socket +import time +from threading import Thread +from http.server import HTTPServer, SimpleHTTPRequestHandler +from weasyprint import HTML +from datetime import datetime + + +def create_html( + fromdate, + todate, + correlations, + lag_plots, + include_heatmap=True, + include_scatter=True, + include_lag_plots=True, + include_details=True, +): + html_content = f""" + + + + + + + Correlation Analysis Report + + + + +
+

Correlation Analysis Report

+

Data is analyzed

+ +

from Date: {fromdate.strftime('%d %B %Y')} to Date: {todate.strftime('%d %B %Y')}

+

created at Date: {datetime.now().strftime('%d %B %Y')}

+
+ + {"

Best Correlation Heatmap

Best Correlation Heatmap
" if include_heatmap else ""} + + {"

In-Depth Scatter Plot

In-Depth Scatter Plot
" if include_scatter else ""} + + {"

Lag Correlation Plots

" + ''.join(f"
{os.path.basename(filename)}
" for filename in lag_plots) + "
" if include_lag_plots else ""} + + {"

Correlation Details

The following table provides detailed correlation values for each pair of columns analyzed:

" + ''.join(f"" for pair, info in correlations.items()) + "
Column PairBest CorrelationBest LagLag Unit
{pair}{info['best_correlation']}{info['best_lag']}{info['lag_unit']}
" if include_details else ""} + + + +""" + return html_content + + +def create_pdf( + fromdate, + todate, + file_path, + correlations, + include_heatmap=True, + include_scatter=True, + include_lag_plots=True, + include_details=True, + lag_plots=[], +): + print("Generating PDF report...") + + # Create HTML content + html_content = create_html( + fromdate, + todate, + correlations, + lag_plots, + include_heatmap, + include_scatter, + include_lag_plots, + include_details, + ) + + # Save the HTML content to a local file in /tmp + html_file_path = "/tmp/report.html" + with open(html_file_path, "w", encoding="utf-8") as html_file: + html_file.write(html_content) + + # Define the handler and server + class SilentHTTPRequestHandler(SimpleHTTPRequestHandler): + def log_message(self, format, *args): + pass # Silence the logging + + def start_server(): + os.chdir("/tmp") + httpd = HTTPServer(("localhost", 8000), SilentHTTPRequestHandler) + # Store the server object so it can be shut down later + server_info["httpd"] = httpd + httpd.serve_forever() + + server_info = {} + server_thread = Thread(target=start_server) + server_thread.daemon = True + server_thread.start() + + # Wait for the server to start + for _ in range(10): # Retry up to 10 times + try: + with socket.create_connection(("localhost", 8000), timeout=2): + break # Connection successful, proceed with PDF generation + except (ConnectionRefusedError, OSError): + time.sleep(0.5) + print("Waiting for server to start...") + else: + print("Failed to start server.") + return + + # Generate PDF with WeasyPrint with custom headers + try: + HTML("http://localhost:8000/report.html").write_pdf(file_path) + print("PDF file generated successfully.") + except Exception as e: + print(f"Error generating PDF: {e}") + finally: + # Shutdown the server after the work is done + if "httpd" in server_info: + server_info["httpd"].shutdown() + server_info["httpd"].server_close() + print("Server shut down successfully.") + # Adding a delay to ensure port release + time.sleep(1) diff --git a/api/plot_correlation.py b/api/plot_correlation.py index ddc44cf..2c028e8 100644 --- a/api/plot_correlation.py +++ b/api/plot_correlation.py @@ -4,7 +4,6 @@ import seaborn as sns import base64 import io -import os def create_best_correlation_heatmap(correlations_dict, output_file="/tmp/heatmap.png"): @@ -166,6 +165,7 @@ def plot_lag_correlations(correlations_dict, output_dir="/tmp/lag_plots"): seen_pairs = set() # Track pairs we've already plotted plot_images = {} # Dictionary to store base64-encoded images + plot_filenames = [] # List to store filenames of generated plots for pair_name, info in correlations_dict.items(): # Split on " and " to get the two column names. @@ -219,6 +219,7 @@ def plot_lag_correlations(correlations_dict, output_dir="/tmp/lag_plots"): plt.tight_layout() plt.savefig(filepath) + plot_filenames.append(filename) # Add filename to the list # Save the figure to a buffer and encode it in base64 buf = io.BytesIO() @@ -230,4 +231,4 @@ def plot_lag_correlations(correlations_dict, output_dir="/tmp/lag_plots"): # Store the base64 image in the dictionary plot_images[f"{pair_label}_{safe_unit}"] = img_base64 - return plot_images + return plot_filenames diff --git a/api/sendEmail.py b/api/sendEmail.py new file mode 100644 index 0000000..a5477b9 --- /dev/null +++ b/api/sendEmail.py @@ -0,0 +1,71 @@ +import smtplib +from email.mime.multipart import MIMEMultipart +from email.mime.base import MIMEBase +from email.mime.text import MIMEText +from email.utils import formatdate +from email import encoders +import os + + +def send_email( + file_path, + to_email, + from_email, + subject, + body, + smtp_server, + smtp_port, + smtp_user, + smtp_password, +): + if not os.path.exists(file_path): + raise FileNotFoundError(f"The file {file_path} does not exist.") + + msg = MIMEMultipart() + msg["From"] = from_email + msg["To"] = to_email + msg["Date"] = formatdate(localtime=True) + msg["Subject"] = subject + + msg.attach(MIMEText(body, "plain")) + + part = MIMEBase("application", "octet-stream") + with open(file_path, "rb") as file: + part.set_payload(file.read()) + encoders.encode_base64(part) + part.add_header( + "Content-Disposition", f'attachment; filename="{os.path.basename(file_path)}"' + ) + msg.attach(part) + + with smtplib.SMTP(smtp_server, smtp_port) as server: + server.starttls() + server.login(smtp_user, smtp_password) + server.sendmail(from_email, to_email, msg.as_string()) + + +def send_evaluation_report_as_mail(filepath, toEmail): + smtp_server = os.getenv("SMTP_SERVER") + smtp_port = int(os.getenv("SMTP_PORT")) + smtp_user = os.getenv("SMTP_USER") + smtp_password = os.getenv("SMTP_PASSWORD") + from_email = smtp_user + to_email = toEmail + subject = "Correlation Analysis Report" + + # Format the body with customer information + body = "Dear Customer,\n\nPlease find attached the correlation analysis report.\n\nBest regards,\nYour Data Science Team" + + # Send the email + send_email( + filepath, + to_email, + from_email, + subject, + body, + smtp_server, + smtp_port, + smtp_user, + smtp_password, + ) + print("Email sent successfully.") diff --git a/get_all_attributes.py b/get_all_attributes.py deleted file mode 100644 index e69de29..0000000 diff --git a/main.py b/main.py index 38ee6b2..d0beb38 100644 --- a/main.py +++ b/main.py @@ -8,5 +8,5 @@ def start_api(): uvicorn.run("api.openapi:app", host="0.0.0.0", port=port) -# Initialize() +Initialize() start_api() diff --git a/requirements.txt b/requirements.txt index 42fe6dd..f0ccd61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ pytz scipy seaborn uvicorn -pyyaml \ No newline at end of file +pyyaml +weasyprint \ No newline at end of file