From 5c7fa1661792e815a8b465c5bfecd4c4d0aa46a9 Mon Sep 17 00:00:00 2001
From: "Dixing (Dex) Xu" <i@dex.moe>
Date: Tue, 26 Nov 2024 23:07:18 +0800
Subject: [PATCH] Feat: Add WebUI (please merge with squash) (#27)

* :lipstick: Add webUI for aideml

* add gradio webui
* update requirements
* update gitignore

* :lipstick: Update webUI and use logs to display results

* :lipstick: Add graio theme and strealit app

* :lipstick: Update streamlit webui

* :lipstick: fix issues for serialization on config display

* :lipstick: Fix results at bottom issue

* :lipstick: Update the streamlit webui theme

* :lipstick: Update the streamlit app theme

* :fire: Remove gradio and use streamlit

* :rotating_light: add linter workflow and fix issues

* Add github action for linter
* Fix current linter issues

* :construction_worker: Add github templates

* Add issue templates
* Add pr template

* :rotating_light: Add black formatter (#25)

* :rotating_light: Add black formatter

* Add black to linter
* Fix the formatting

* :rotating_light: Update black linter to see the suggestion change

* :lipstick: Add webUI for aideml

* add gradio webui
* update requirements
* update gitignore

* :lipstick: Update streamlit app

* :rotating_light: fix linter issue

* :construction_worker: Fix styling issues for tree viz

* :construction_worker: Finalize the theme

* :recycle: Refactor webui code

* :memo: Update README

* :rotating_light: fix linter

* :bug: fix conflict

* :heavy_plus_sign: Add dependecy
---
 .gitignore             |   2 +-
 .streamlit/config.toml |  17 ++
 README.md              | 149 ++++++++----
 aide/webui/__init__.py |   0
 aide/webui/app.py      | 535 +++++++++++++++++++++++++++++++++++++++++
 aide/webui/style.css   | 156 ++++++++++++
 requirements.txt       |   1 +
 7 files changed, 816 insertions(+), 44 deletions(-)
 create mode 100644 .streamlit/config.toml
 create mode 100644 aide/webui/__init__.py
 create mode 100644 aide/webui/app.py
 create mode 100644 aide/webui/style.css

diff --git a/.gitignore b/.gitignore
index 74062f1..fe04838 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,4 +167,4 @@ logs
 .trunk
 
 .gradio/
-.ruff_cache/
\ No newline at end of file
+.ruff_cache/
diff --git a/.streamlit/config.toml b/.streamlit/config.toml
new file mode 100644
index 0000000..3668599
--- /dev/null
+++ b/.streamlit/config.toml
@@ -0,0 +1,17 @@
+[theme]
+# Primary colors
+primaryColor="#0D0F18"  # --wecopink: 343 98% 63%
+backgroundColor="#F0EFE9" # --background: 49 10% 94%
+secondaryBackgroundColor="#FFFFFF" # --card: 60 33.3% 98%
+textColor="#0A0A0A" # --primary: 0 0% 17%
+
+# Font
+font="sans serif"
+
+[ui]
+hideTopBar = true
+
+[client]
+toolbarMode = "minimal"
+showErrorDetails = true
+showSidebarNavigation = false
diff --git a/README.md b/README.md
index 8fbaba1..6e8333a 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ AIDE is an LLM agent that generates solutions for machine learning tasks just fr
 AIDE is the state-of-the-art agent on OpenAI's [MLE-bench](https://arxiv.org/pdf/2410.07095), a benchmark composed of 75 Kaggle machine learning tasks, where we achieved four times more medals compared to the runner-up agent architecture.
 
 In our own benchmark composed of over 60 Kaggle data science competitions, AIDE demonstrated impressive performance, surpassing 50% of Kaggle participants on average (see our [technical report](https://www.weco.ai/blog/technical-report) for details).
+
 More specifically, AIDE has the following features:
 
 1. **Instruct with Natural Language**: Describe your problem or additional requirements and expert insights, all in natural language.
@@ -18,7 +19,7 @@ More specifically, AIDE has the following features:
 3. **Iterative Optimization**: AIDE iteratively runs, debugs, evaluates, and improves the ML code, all by itself.
 4. **Visualization**: We also provide tools to visualize the solution tree produced by AIDE for a better understanding of its experimentation process. This gives you insights not only about what works but also what doesn't.
 
-# How to use AIDE?
+# How to Use AIDE?
 
 ## Setup
 
@@ -38,7 +39,7 @@ export OPENAI_API_KEY=<your API key>
 export ANTHROPIC_API_KEY=<your API key>
 ```
 
-## Running AIDE via the command line
+## Running AIDE via the Command Line
 
 To run AIDE:
 
@@ -54,9 +55,9 @@ aide data_dir="example_tasks/house_prices" goal="Predict the sales price for eac
 
 Options:
 
-- `data_dir` (required): a directory containing all the data relevant for your task (`.csv` files, images, etc.).
-- `goal`: describe what you want the models to predict in your task, for example, "Build a timeseries forcasting model for bitcoin close price" or "Predict sales price for houses".
-- `eval`: the evaluation metric used to evaluate the ML models for the task (e.g., accuracy, F1, Root-Mean-Squared-Error, etc.)
+- `data_dir` (required): A directory containing all the data relevant for your task (`.csv` files, images, etc.).
+- `goal`: Describe what you want the models to predict in your task, for example, "Build a time series forecasting model for bitcoin close price" or "Predict sales price for houses".
+- `eval`: The evaluation metric used to evaluate the ML models for the task (e.g., accuracy, F1, Root-Mean-Squared-Error, etc.).
 
 Alternatively, you can provide the entire task description as a `desc_str` string, or write it in a plaintext file and pass its path as `desc_file` ([example file](aide/example_tasks/house_prices.md)).
 
@@ -66,19 +67,19 @@ aide data_dir="my_data_dir" desc_file="my_task_description.txt"
 
 The result of the run will be stored in the `logs` directory.
 
-- `logs/<experiment-id>/best_solution.py`: Python code of _best solution_ according to the validation metric
-- `logs/<experiment-id>/journal.json`: a JSON file containing the metadata of the experiment runs, including all the code generated in intermediate steps, plan, evaluation results, etc.
-- `logs/<experiment-id>/tree_plot.html`: you can open it in your browser. It contains visualization of solution tree, which details the experimentation process of finding and optimizing ML code. You can explore and interact with the tree visualization to view what plan and code AIDE comes up with in each step.
+- `logs/<experiment-id>/best_solution.py`: Python code of the _best solution_ according to the validation metric.
+- `logs/<experiment-id>/journal.json`: A JSON file containing the metadata of the experiment runs, including all the code generated in intermediate steps, plan, evaluation results, etc.
+- `logs/<experiment-id>/tree_plot.html`: You can open it in your browser. It contains a visualization of the solution tree, which details the experimentation process of finding and optimizing ML code. You can explore and interact with the tree visualization to view what plan and code AIDE comes up with in each step.
 
 The `workspaces` directory will contain all the files and data that the agent generated.
 
 ### Advanced Usage
 
-To further customize the behaviour of AIDE, some useful options might be:
+To further customize the behavior of AIDE, some useful options might be:
 
-- `agent.code.model=...` to configure which model the agent should use for coding (default is `gpt-4-turbo`)
-- `agent.steps=...` to configure how many improvement iterations the agent should run (default is 20)
-- `agent.search.num_drafts=...` to configure the number of initial drafts the agent should generate (default is 5)
+- `agent.code.model=...` to configure which model the agent should use for coding (default is `gpt-4-turbo`).
+- `agent.steps=...` to configure how many improvement iterations the agent should run (default is 20).
+- `agent.search.num_drafts=...` to configure the number of initial drafts the agent should generate (default is 5).
 
 You can check the [`config.yaml`](aide/utils/config.yaml) file for more options.
 
@@ -88,23 +89,73 @@ AIDE supports using local LLMs through OpenAI-compatible APIs. Here's how to set
 
 1. Set up a local LLM server with an OpenAI-compatible API endpoint. You can use:
    - [Ollama](https://github.com/ollama/ollama)
-   - or similar solutions
+   - or similar solutions.
 
 2. Configure your environment to use the local endpoint:
+
+   ```bash
+   export OPENAI_BASE_URL="http://localhost:11434/v1"  # For Ollama
+   export OPENAI_API_KEY="local-llm"  # Can be any string if your local server doesn't require authentication
+   ```
+
+3. Update the model configuration in your AIDE command or config. For example, with Ollama:
+
+   ```bash
+   # Example with house prices dataset
+   aide agent.code.model="qwen2.5" agent.feedback.model="qwen2.5" report.model="qwen2.5" \
+       data_dir="example_tasks/house_prices" \
+       goal="Predict the sales price for each house" \
+       eval="Use the RMSE metric between the logarithm of the predicted and observed values."
+   ```
+
+## Running AIDE via the Web UI
+
+We have developed a user-friendly Web UI using Streamlit to make it even easier to interact with AIDE.
+
+### Prerequisites
+
+Ensure you have installed the development version of AIDE and its dependencies as described in the [Development](#development) section.
+
+### Running the Web UI
+
+Navigate to the `aide/webui` directory and run the Streamlit application:
+
 ```bash
-export OPENAI_BASE_URL="http://localhost:11434/v1"  # For Ollama
-export OPENAI_API_KEY="local-llm"  # Can be any string if your local server doesn't require authentication
+cd aide/webui
+streamlit run app.py
 ```
 
-3. Update the model configuration in your AIDE command or config. For example, with Ollama:
+Alternatively, you can run it from the root directory:
+
 ```bash
-# Example with house prices dataset
-aide agent.code.model="qwen2.5" agent.feedback.model="qwen2.5" report.model="qwen2.5" \
-    data_dir="example_tasks/house_prices" \
-    goal="Predict the sales price for each house" \
-    eval="Use the RMSE metric between the logarithm of the predicted and observed values."
+streamlit run aide/webui/app.py
 ```
 
+### Using the Web UI
+
+1. **API Key Configuration**: In the sidebar, input your OpenAI API key or Anthropic API key and click "Save API Keys".
+
+2. **Input Data**:
+   - You can either **upload your dataset files** (`.csv`, `.txt`, `.json`, `.md`) using the "Upload Data Files" feature.
+   - Or click on "Load Example Experiment" to use the example house prices dataset.
+
+3. **Define Goal and Evaluation Criteria**:
+   - In the "Goal" text area, describe what you want the model to achieve (e.g., "Predict the sales price for each house").
+   - In the "Evaluation Criteria" text area, specify the evaluation metric (e.g., "Use the RMSE metric between the logarithm of the predicted and observed values.").
+
+4. **Configure Steps**:
+   - Use the slider to set the number of steps (iterations) for the experiment.
+
+5. **Run the Experiment**:
+   - Click on "Run AIDE" to start the experiment.
+   - Progress and status updates will be displayed in the "Results" section.
+
+6. **View Results**:
+   - **Tree Visualization**: Explore the solution tree to understand how AIDE experimented and optimized the models.
+   - **Best Solution**: View the Python code of the best solution found.
+   - **Config**: Review the configuration used for the experiment.
+   - **Journal**: Examine the detailed journal entries for each step.
+
 ## Using AIDE in Python
 
 Using AIDE within your Python script/project is easy. Follow the setup steps above, and then create an AIDE experiment like below and start running:
@@ -113,7 +164,7 @@ Using AIDE within your Python script/project is easy. Follow the setup steps abo
 import aide
 exp = aide.Experiment(
     data_dir="example_tasks/bitcoin_price",  # replace this with your own directory
-    goal="Build a timeseries forcasting model for bitcoin close price.",  # replace with your own goal description
+    goal="Build a time series forecasting model for bitcoin close price.",  # replace with your own goal description
     eval="RMSLE"  # replace with your own evaluation metric
 )
 
@@ -125,7 +176,7 @@ print(f"Best solution code: {best_solution.code}")
 
 ## Development
 
-To install AIDE for development, clone this repository and install it locally.
+To install AIDE for development, clone this repository and install it locally:
 
 ```bash
 git clone https://github.com/WecoAI/aideml.git
@@ -133,33 +184,45 @@ cd aideml
 pip install -e .
 ```
 
-## Using AIDE with Docker
+### Running the Web UI in Development Mode
 
-You can also run AIDE using Docker:
+Ensure that you have all the required development dependencies installed. Then, you can run the Web UI as follows:
 
-1. Build the Docker image:
 ```bash
-docker build -t aide .
+cd aide/webui
+streamlit run app.py
 ```
 
-2. Run AIDE with Docker (example with house prices task):
-```bash
-# Set custom workspace and logs location (optional)
-export WORKSPACE_BASE=$(pwd)/workspaces
-export LOGS_DIR=$(pwd)/logs
-
-docker run -it --rm \
-          -v "${LOGS_DIR:-$(pwd)/logs}:/app/logs" \
-          -v "${WORKSPACE_BASE:-$(pwd)/workspaces}:/app/workspaces" \
-          -v "$(pwd)/aide/example_tasks:/app/data" \
-          -e OPENAI_API_KEY="your-actual-api-key" \
-          aide \
-          data_dir=/app/data/house_prices \
-          goal="Predict the sales price for each house" \
-          eval="Use the RMSE metric between the logarithm of the predicted and observed values."
-```
+## Using AIDE with Docker
+
+You can also run AIDE using Docker:
+
+1. **Build the Docker Image**:
+
+   ```bash
+   docker build -t aide .
+   ```
+
+2. **Run AIDE with Docker** (example with house prices task):
+
+   ```bash
+   # Set custom workspace and logs location (optional)
+   export WORKSPACE_BASE=$(pwd)/workspaces
+   export LOGS_DIR=$(pwd)/logs
+
+   docker run -it --rm \
+             -v "${LOGS_DIR:-$(pwd)/logs}:/app/logs" \
+             -v "${WORKSPACE_BASE:-$(pwd)/workspaces}:/app/workspaces" \
+             -v "$(pwd)/aide/example_tasks:/app/data" \
+             -e OPENAI_API_KEY="your-actual-api-key" \
+             aide \
+             data_dir=/app/data/house_prices \
+             goal="Predict the sales price for each house" \
+             eval="Use the RMSE metric between the logarithm of the predicted and observed values."
+   ```
 
 You can customize the location of workspaces and logs by setting environment variables before running the container:
+
 - `WORKSPACE_BASE`: Sets the base directory for AIDE workspaces (default: `$(pwd)/workspaces`)
 - `LOGS_DIR`: Sets the directory for AIDE logs (default: `$(pwd)/logs`)
 
diff --git a/aide/webui/__init__.py b/aide/webui/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/aide/webui/app.py b/aide/webui/app.py
new file mode 100644
index 0000000..e43d465
--- /dev/null
+++ b/aide/webui/app.py
@@ -0,0 +1,535 @@
+import streamlit as st
+import streamlit.components.v1 as components
+from pathlib import Path
+import tempfile
+import shutil
+import os
+import json
+from omegaconf import OmegaConf
+from rich.console import Console
+import sys
+from dotenv import load_dotenv
+import logging
+from aide import Experiment
+
+# Set up logging configuration
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler(sys.stderr)],
+)
+
+logger = logging.getLogger("aide")
+logger.setLevel(logging.INFO)
+
+console = Console(file=sys.stderr)
+
+
+class WebUI:
+    """
+    WebUI encapsulates the Streamlit application logic for the AIDE Machine Learning Engineer Agent.
+    """
+
+    def __init__(self):
+        """
+        Initialize the WebUI with environment variables and session state.
+        """
+        self.env_vars = self.load_env_variables()
+        self.project_root = Path(__file__).parent.parent.parent
+        self.config_session_state()
+        self.setup_page()
+
+    @staticmethod
+    def load_env_variables():
+        """
+        Load API keys and environment variables from .env file.
+
+        Returns:
+            dict: Dictionary containing API keys.
+        """
+        load_dotenv()
+        return {
+            "openai_key": os.getenv("OPENAI_API_KEY", ""),
+            "anthropic_key": os.getenv("ANTHROPIC_API_KEY", ""),
+        }
+
+    @staticmethod
+    def config_session_state():
+        """
+        Configure default values for Streamlit session state.
+        """
+        if "is_running" not in st.session_state:
+            st.session_state.is_running = False
+        if "current_step" not in st.session_state:
+            st.session_state.current_step = 0
+        if "total_steps" not in st.session_state:
+            st.session_state.total_steps = 0
+        if "progress" not in st.session_state:
+            st.session_state.progress = 0
+        if "results" not in st.session_state:
+            st.session_state.results = None
+
+    @staticmethod
+    def setup_page():
+        """
+        Set up the Streamlit page configuration and load custom CSS.
+        """
+        st.set_page_config(
+            page_title="AIDE: Machine Learning Engineer Agent",
+            layout="wide",
+        )
+        WebUI.load_css()
+
+    @staticmethod
+    def load_css():
+        """
+        Load custom CSS styles from 'style.css' file.
+        """
+        css_file = Path(__file__).parent / "style.css"
+        if css_file.exists():
+            with open(css_file) as f:
+                st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+        else:
+            st.warning(f"CSS file not found at: {css_file}")
+
+    def run(self):
+        """
+        Run the main logic of the Streamlit application.
+        """
+        self.render_sidebar()
+        input_col, results_col = st.columns([1, 3])
+        with input_col:
+            self.render_input_section(results_col)
+        with results_col:
+            self.render_results_section()
+
+    def render_sidebar(self):
+        """
+        Render the sidebar with API key settings.
+        """
+        with st.sidebar:
+            st.header("⚙️ Settings")
+            st.markdown(
+                "<p style='text-align: center;'>OpenAI API Key</p>",
+                unsafe_allow_html=True,
+            )
+            openai_key = st.text_input(
+                "OpenAI API Key",
+                value=self.env_vars["openai_key"],
+                type="password",
+                label_visibility="collapsed",
+            )
+            st.markdown(
+                "<p style='text-align: center;'>Anthropic API Key</p>",
+                unsafe_allow_html=True,
+            )
+            anthropic_key = st.text_input(
+                "Anthropic API Key",
+                value=self.env_vars["anthropic_key"],
+                type="password",
+                label_visibility="collapsed",
+            )
+            if st.button("Save API Keys", use_container_width=True):
+                st.session_state.openai_key = openai_key
+                st.session_state.anthropic_key = anthropic_key
+                st.success("API keys saved!")
+
+    def render_input_section(self, results_col):
+        """
+        Render the input section of the application.
+
+        Args:
+            results_col (st.delta_generator.DeltaGenerator): The results column to pass to methods.
+        """
+        st.header("Input")
+        uploaded_files = self.handle_file_upload()
+        goal_text, eval_text, num_steps = self.handle_user_inputs()
+        if st.button("Run AIDE", type="primary", use_container_width=True):
+            with st.spinner("AIDE is running..."):
+                results = self.run_aide(
+                    uploaded_files, goal_text, eval_text, num_steps, results_col
+                )
+                st.session_state.results = results
+
+    def handle_file_upload(self):
+        """
+        Handle file uploads and example file loading.
+
+        Returns:
+            list: List of uploaded or example files.
+        """
+        if st.button(
+            "Load Example Experiment", type="primary", use_container_width=True
+        ):
+            st.session_state.example_files = self.load_example_files()
+
+        if st.session_state.get("example_files"):
+            st.info("Example files loaded! Click 'Run AIDE' to proceed.")
+            with st.expander("View Loaded Files", expanded=False):
+                for file in st.session_state.example_files:
+                    st.text(f"📄 {file['name']}")
+            uploaded_files = st.session_state.example_files
+        else:
+            uploaded_files = st.file_uploader(
+                "Upload Data Files",
+                accept_multiple_files=True,
+                type=["csv", "txt", "json", "md"],
+            )
+        return uploaded_files
+
+    def handle_user_inputs(self):
+        """
+        Handle goal, evaluation criteria, and number of steps inputs.
+
+        Returns:
+            tuple: Goal text, evaluation criteria text, and number of steps.
+        """
+        goal_text = st.text_area(
+            "Goal",
+            value=st.session_state.get("goal", ""),
+            placeholder="Example: Predict house prices",
+        )
+        eval_text = st.text_area(
+            "Evaluation Criteria",
+            value=st.session_state.get("eval", ""),
+            placeholder="Example: Use RMSE metric",
+        )
+        num_steps = st.slider(
+            "Number of Steps",
+            min_value=1,
+            max_value=20,
+            value=st.session_state.get("steps", 10),
+        )
+        return goal_text, eval_text, num_steps
+
+    @staticmethod
+    def load_example_files():
+        """
+        Load example files from the 'example_tasks/house_prices' directory.
+
+        Returns:
+            list: List of example files with their paths.
+        """
+        package_root = Path(__file__).parent.parent
+        example_dir = package_root / "example_tasks" / "house_prices"
+
+        if not example_dir.exists():
+            st.error(f"Example directory not found at: {example_dir}")
+            return []
+
+        example_files = []
+
+        for file_path in example_dir.glob("*"):
+            if file_path.suffix.lower() in [".csv", ".txt", ".json", ".md"]:
+                with tempfile.NamedTemporaryFile(
+                    delete=False, suffix=file_path.suffix
+                ) as tmp_file:
+                    tmp_file.write(file_path.read_bytes())
+                    example_files.append(
+                        {"name": file_path.name, "path": tmp_file.name}
+                    )
+
+        if not example_files:
+            st.warning("No example files found in the example directory")
+
+        st.session_state["goal"] = "Predict the sales price for each house"
+        st.session_state["eval"] = (
+            "Use the RMSE metric between the logarithm of the predicted and observed values."
+        )
+
+        return example_files
+
+    def run_aide(self, files, goal_text, eval_text, num_steps, results_col):
+        """
+        Run the AIDE experiment with the provided inputs.
+
+        Args:
+            files (list): List of uploaded or example files.
+            goal_text (str): The goal of the experiment.
+            eval_text (str): The evaluation criteria.
+            num_steps (int): Number of steps to run.
+            results_col (st.delta_generator.DeltaGenerator): Results column for displaying progress.
+
+        Returns:
+            dict: Dictionary containing the results of the experiment.
+        """
+        try:
+            self.initialize_run_state(num_steps)
+            self.set_api_keys()
+
+            input_dir = self.prepare_input_directory(files)
+            if not input_dir:
+                return None
+
+            experiment = self.initialize_experiment(input_dir, goal_text, eval_text)
+            placeholders = self.create_results_placeholders(results_col, experiment)
+
+            for step in range(num_steps):
+                st.session_state.current_step = step + 1
+                progress = (step + 1) / num_steps
+                self.update_results_placeholders(placeholders, progress)
+                experiment.run(steps=1)
+
+            self.clear_run_state(placeholders)
+
+            return self.collect_results(experiment)
+
+        except Exception as e:
+            st.session_state.is_running = False
+            console.print_exception()
+            st.error(f"Error occurred: {str(e)}")
+            return None
+
+    @staticmethod
+    def initialize_run_state(num_steps):
+        """
+        Initialize the running state for the experiment.
+
+        Args:
+            num_steps (int): Total number of steps in the experiment.
+        """
+        st.session_state.is_running = True
+        st.session_state.current_step = 0
+        st.session_state.total_steps = num_steps
+        st.session_state.progress = 0
+
+    @staticmethod
+    def set_api_keys():
+        """
+        Set the API keys in the environment variables from the session state.
+        """
+        if st.session_state.get("openai_key"):
+            os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
+        if st.session_state.get("anthropic_key"):
+            os.environ["ANTHROPIC_API_KEY"] = st.session_state.anthropic_key
+
+    def prepare_input_directory(self, files):
+        """
+        Prepare the input directory and handle uploaded files.
+
+        Args:
+            files (list): List of uploaded or example files.
+
+        Returns:
+            Path: The input directory path, or None if files are missing.
+        """
+        input_dir = self.project_root / "input"
+        input_dir.mkdir(parents=True, exist_ok=True)
+
+        if files:
+            for file in files:
+                if isinstance(file, dict):  # Example files
+                    shutil.copy2(file["path"], input_dir / file["name"])
+                else:  # Uploaded files
+                    with open(input_dir / file.name, "wb") as f:
+                        f.write(file.getbuffer())
+        else:
+            st.error("Please upload data files")
+            return None
+        return input_dir
+
+    @staticmethod
+    def initialize_experiment(input_dir, goal_text, eval_text):
+        """
+        Initialize the AIDE Experiment.
+
+        Args:
+            input_dir (Path): Path to the input directory.
+            goal_text (str): The goal of the experiment.
+            eval_text (str): The evaluation criteria.
+
+        Returns:
+            Experiment: The initialized Experiment object.
+        """
+        experiment = Experiment(data_dir=str(input_dir), goal=goal_text, eval=eval_text)
+        return experiment
+
+    @staticmethod
+    def create_results_placeholders(results_col, experiment):
+        """
+        Create placeholders in the results column for dynamic content.
+
+        Args:
+            results_col (st.delta_generator.DeltaGenerator): The results column.
+            experiment (Experiment): The Experiment object.
+
+        Returns:
+            dict: Dictionary of placeholders.
+        """
+        with results_col:
+            status_placeholder = st.empty()
+            step_placeholder = st.empty()
+            config_title_placeholder = st.empty()
+            config_placeholder = st.empty()
+            progress_placeholder = st.empty()
+
+            step_placeholder.markdown(
+                f"### 🔥 Running Step {st.session_state.current_step}/{st.session_state.total_steps}"
+            )
+            config_title_placeholder.markdown("### 📋 Configuration")
+            config_placeholder.code(OmegaConf.to_yaml(experiment.cfg), language="yaml")
+            progress_placeholder.progress(0)
+
+        placeholders = {
+            "status": status_placeholder,
+            "step": step_placeholder,
+            "config_title": config_title_placeholder,
+            "config": config_placeholder,
+            "progress": progress_placeholder,
+        }
+        return placeholders
+
+    @staticmethod
+    def update_results_placeholders(placeholders, progress):
+        """
+        Update the placeholders with the current progress.
+
+        Args:
+            placeholders (dict): Dictionary of placeholders.
+            progress (float): Current progress value.
+        """
+        placeholders["step"].markdown(
+            f"### 🔥 Running Step {st.session_state.current_step}/{st.session_state.total_steps}"
+        )
+        placeholders["progress"].progress(progress)
+
+    @staticmethod
+    def clear_run_state(placeholders):
+        """
+        Clear the running state and placeholders after the experiment.
+
+        Args:
+            placeholders (dict): Dictionary of placeholders.
+        """
+        st.session_state.is_running = False
+        placeholders["status"].empty()
+        placeholders["step"].empty()
+        placeholders["config_title"].empty()
+        placeholders["config"].empty()
+        placeholders["progress"].empty()
+
+    @staticmethod
+    def collect_results(experiment):
+        """
+        Collect the results from the experiment.
+
+        Args:
+            experiment (Experiment): The Experiment object.
+
+        Returns:
+            dict: Dictionary containing the collected results.
+        """
+        solution_path = experiment.cfg.log_dir / "best_solution.py"
+        if solution_path.exists():
+            solution = solution_path.read_text()
+        else:
+            solution = "No solution found"
+
+        journal_data = [
+            {
+                "step": node.step,
+                "code": str(node.code),
+                "metric": str(node.metric.value) if node.metric else None,
+                "is_buggy": node.is_buggy,
+            }
+            for node in experiment.journal.nodes
+        ]
+
+        results = {
+            "solution": solution,
+            "config": OmegaConf.to_yaml(experiment.cfg),
+            "journal": json.dumps(journal_data, indent=2, default=str),
+            "tree_path": str(experiment.cfg.log_dir / "tree_plot.html"),
+        }
+        return results
+
+    def render_results_section(self):
+        """
+        Render the results section with tabs for different outputs.
+        """
+        st.header("Results")
+        if st.session_state.get("results"):
+            results = st.session_state.results
+            tabs = st.tabs(["Tree Visualization", "Best Solution", "Config", "Journal"])
+
+            with tabs[0]:
+                self.render_tree_visualization(results)
+            with tabs[1]:
+                self.render_best_solution(results)
+            with tabs[2]:
+                self.render_config(results)
+            with tabs[3]:
+                self.render_journal(results)
+        else:
+            st.info("No results to display. Please run an experiment.")
+
+    @staticmethod
+    def render_tree_visualization(results):
+        """
+        Render the tree visualization from the experiment results.
+
+        Args:
+            results (dict): The results dictionary containing paths and data.
+        """
+        if "tree_path" in results:
+            tree_path = Path(results["tree_path"])
+            logger.info(f"Loading tree visualization from: {tree_path}")
+            if tree_path.exists():
+                with open(tree_path, "r", encoding="utf-8") as f:
+                    html_content = f.read()
+                components.html(html_content, height=600, scrolling=True)
+            else:
+                st.error(f"Tree visualization file not found at: {tree_path}")
+                logger.error(f"Tree file not found at: {tree_path}")
+        else:
+            st.info("No tree visualization available for this run.")
+
+    @staticmethod
+    def render_best_solution(results):
+        """
+        Display the best solution code.
+
+        Args:
+            results (dict): The results dictionary containing the solution.
+        """
+        if "solution" in results:
+            solution_code = results["solution"]
+            st.code(solution_code, language="python")
+        else:
+            st.info("No solution available.")
+
+    @staticmethod
+    def render_config(results):
+        """
+        Display the configuration used in the experiment.
+
+        Args:
+            results (dict): The results dictionary containing the config.
+        """
+        if "config" in results:
+            st.code(results["config"], language="yaml")
+        else:
+            st.info("No configuration available.")
+
+    @staticmethod
+    def render_journal(results):
+        """
+        Display the experiment journal as JSON.
+
+        Args:
+            results (dict): The results dictionary containing the journal.
+        """
+        if "journal" in results:
+            try:
+                journal_data = json.loads(results["journal"])
+                formatted_journal = json.dumps(journal_data, indent=2)
+                st.code(formatted_journal, language="json")
+            except json.JSONDecodeError:
+                st.code(results["journal"], language="json")
+        else:
+            st.info("No journal available.")
+
+
+if __name__ == "__main__":
+    app = WebUI()
+    app.run()
diff --git a/aide/webui/style.css b/aide/webui/style.css
new file mode 100644
index 0000000..0219363
--- /dev/null
+++ b/aide/webui/style.css
@@ -0,0 +1,156 @@
+/* Main colors */
+:root {
+    --background: #F2F0E7;
+    --background-shaded: #EBE8DD;
+    --card: #FFFFFF;
+    --primary: #0D0F18;
+    --accent: #F04370;
+    --border: #D4D1C7;
+    --accent-hover: #E13D68;
+    --accent-light: #FEE5EC;
+}
+
+.stVerticalBlock {
+    padding-top: 0rem;
+    padding-bottom: 0rem;
+}
+
+.block-container {
+    padding-top: 0rem;
+    padding-bottom: 0rem;
+}
+header.stAppHeader {
+    display: none;
+}
+section.stMain .block-container {
+    padding-top: 0rem;
+    z-index: 1;
+}
+
+/* Main container */
+.stApp {
+    background-color: var(--background);
+    height: auto;
+    overflow: visible;
+}
+
+/* Widgets */
+.stSelectbox,
+.stTextInput,
+.stNumberInput {
+    background-color: var(--card);
+    border: 1px solid var(--border);
+    border-radius: 0.4rem;
+}
+
+.stMarkdown {
+    color: var(--primary);
+}
+
+/* Code block styling */
+.stCodeBlock {
+    max-height: 400px;
+    overflow-y: auto !important;
+    border: 1px solid var(--border);
+    border-radius: 0.4rem;
+    background-color: var(--background-shaded);
+}
+
+/* Custom scrollbar for code blocks */
+.stCodeBlock::-webkit-scrollbar {
+    width: 8px;
+    height: 8px;
+}
+
+.stCodeBlock::-webkit-scrollbar-track {
+    background: var(--background-shaded);
+    border-radius: 4px;
+}
+
+.stCodeBlock::-webkit-scrollbar-thumb {
+    background: var(--accent);
+    border-radius: 4px;
+}
+
+.stCodeBlock::-webkit-scrollbar-thumb:hover {
+    background: #e13d68;
+}
+
+
+
+.scrollable-code-container {
+    height: 600px;
+    overflow-y: auto;
+    border: 1px solid var(--border);
+    padding: 15px;
+    border-radius: 5px;
+    background-color: var(--background-shaded);
+}
+
+.scrollable-code-container pre {
+    margin: 0;
+    white-space: pre;
+    overflow-x: auto;
+    font-family: monospace;
+}
+
+.scrollable-code-container code {
+    display: block;
+    min-width: 100%;
+    padding: 0;
+    tab-size: 4;
+}
+
+/* Add custom scrollbar styling for code containers */
+.scrollable-code-container::-webkit-scrollbar {
+    width: 8px;
+    height: 8px;
+}
+
+.scrollable-code-container::-webkit-scrollbar-track {
+    background: var(--background-shaded);
+    border-radius: 4px;
+}
+
+.scrollable-code-container::-webkit-scrollbar-thumb {
+    background: var(--accent);
+    border-radius: 4px;
+}
+
+.scrollable-code-container::-webkit-scrollbar-thumb:hover {
+    background: #e13d68;
+}
+
+/* Style for expander */
+.streamlit-expanderHeader {
+    background-color: var(--card);
+    border: 1px solid var(--border);
+    border-radius: 0.4rem;
+    padding: 0.5rem !important;
+}
+
+.streamlit-expanderHeader:hover {
+    border-color: var(--accent);
+}
+
+/* Style for expander content */
+.streamlit-expanderContent {
+    background-color: var(--background-shaded);
+    border: 1px solid var(--border);
+    border-radius: 0 0 0.4rem 0.4rem;
+    margin-top: -1px;
+    padding: 0.5rem !important;
+}
+
+/* Style for st.code() blocks */
+.stCode {
+    max-height: 600px;
+    overflow-y: auto;
+    background-color: var(--background-shaded) !important;
+    border: 1px solid var(--border) !important;
+    border-radius: 5px !important;
+}
+
+.stCode pre {
+    background-color: var(--background-shaded) !important;
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 6c2c6c7..8af47e1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -90,3 +90,4 @@ pyocr
 pyarrow
 xlrd
 backoff
+streamlit