diff --git a/.gitignore b/.gitignore
index 46d70ff..9a614c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,6 @@ cython_debug/
 
 # PyCharm
 .idea/
+
+env_run_dmrg
+notebooks/solution_jsons
\ No newline at end of file
diff --git a/examples/run_dmrg.ipynb b/examples/run_dmrg.ipynb
new file mode 100644
index 0000000..b16fdb3
--- /dev/null
+++ b/examples/run_dmrg.ipynb
@@ -0,0 +1,467 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Running Instructions\n",
+    "\n",
+    "Below instructions tested in Ubuntu 22.04.3 (Windows Subsystem for Linux 2, WSL2). This notebook will likely also work on MaxOS X.\n",
+    "1. Download the GSEE benchmark from Github: https://github.com/isi-usc-edu/qb-gsee-benchmark/archive/refs/heads/main.zip\n",
+    "2. Using a terminal, unzip the benchmark code: `unzip qb-gsee-benchmark-main.zip`\n",
+    "3. Enter the benchmark folder: `cd qb-gsee-benchmark-main`\n",
+    "4. Create a clean Python 3.10 virtual environment ([Python Website](https://www.python.org/downloads/)) with this command: `python -m venv env_run_dmrg`\n",
+    "5. Activate environment: `source env_run_dmrg/bin/activate`\n",
+    "6. Install Jupyter notebook: `python -m pip install notebook`\n",
+    "7. Open this notebook: `jupyter notebook notebooks/run_dmrg.ipynb` \n",
+    "8. Update relevant parameters as needed; see [General Parameters](#general-parameters) and [DMRG Parameters](#dmrg-parameters.)\n",
+    "9. Run the notebook.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reset -f \n",
+    "# Install pip packages in the current Jupyter kernel (from https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/)\n",
+    "import sys\n",
+    "# Install DMRG\n",
+    "!{sys.executable} -m pip install --extra-index-url=https://block-hczhai.github.io/block2-preview/pypi/ git+https://github.com/jtcantin/dmrghandler\n",
+    "\n",
+    "!{sys.executable} -m pip install paramiko h5py numpy\n",
+    "\n",
+    "# This took about 8 minutes on my machine for the first time, but only about 7 seconds after that."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import urllib.request\n",
+    "from pathlib import Path\n",
+    "import json\n",
+    "import dmrghandler.config_io as config_io\n",
+    "import dmrghandler.slurm_scripts as slurm_scripts\n",
+    "import h5py\n",
+    "import jsonschema\n",
+    "import run_support as rs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# General Parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "remove_temp_files = True # IMPORTANT: If you want to keep the DRMG output files, not just the jsons, set this to False\n",
+    "environment_path=Path(\"../env_run_dmrg\")\n",
+    "ppk_path=\"/mnt/c/darpa-qb-key.ppk\" # Update this to the path of your private key\n",
+    "sftp_username=\"darpa-qb\" # Update this to your username\n",
+    "local_store_path = Path(\"dmrg_calculation_storage\")\n",
+    "repository_url = \"https://github.com/isi-usc-edu/qb-gsee-benchmark/archive/refs/heads/main.zip\"\n",
+    "\n",
+    "solution_save_location = \"solution_jsons\"\n",
+    "problem_instance_files_repository_path = (\n",
+    "    \"problem_instances\"\n",
+    ")\n",
+    "\n",
+    "chosen_problem_instance_files = [\n",
+    "    # \"problem_instance.mn_mono.cb40f3f7-ffe8-40e8-4544-f26aad5a8bd8.json\",\n",
+    "    # \"problem_instance.planted_solution_0007.3aaf0bb4-b412-4746-922a-c380b4024d00.json\",\n",
+    "    # \"problem_instance.planted_solution_0010.bf3fb654-72c7-4fd4-bfd1-9ffef5aaebd8.json\",\n",
+    "    \"problem_instance.planted_solution_0001.3d3b9e8a-7842-4ca1-bd82-eddb9804972d.json\",\n",
+    "    \"problem_instance.planted_solution_0008.b57eb979-5c09-4974-97b4-5862e109a1ae.json\",\n",
+    "]\n",
+    "\n",
+    "\n",
+    "json_solution_schema_url = \"https://raw.githubusercontent.com/isi-usc-edu/qb-gsee-benchmark/main/schemas/solution.schema.0.0.1.json\"\n",
+    "json_solution_schema_url_file = json_solution_schema_url\n",
+    "\n",
+    "contact_info = [{\n",
+    "    \"name\": \"Example E. Example\",\n",
+    "    \"email\": \"example@example.ca\",\n",
+    "    \"institution\": \"University of Example\",\n",
+    "}]\n",
+    "compute_details = {\n",
+    "    \"computing_environment_name\": \"Example Computer\",\n",
+    "    \"cpu_description\": 'Intel i5-1135G7 @ 2.40GHz',\n",
+    "    \"ram_available_gb\": \"16GB\",\n",
+    "    \"clock_speed\": \"2.4 GHz\",\n",
+    "    \"total_num_cores\" : 4,\n",
+    "}\n",
+    "\n",
+    "solver_details = {\n",
+    "\"solver_uuid\":\"13474cee-e648-48d3-9526-0314533ae30d\",\n",
+    "\"solver_short_name\":\"DMRG_surface_lowest_energy\",\n",
+    "\"compute_hardware_type\":\"classical_computer\",\n",
+    "\"classical_hardware_details\":compute_details,\n",
+    "\"algorithm_details\":\"DMRG with the lowest variational energy obtained so far.\",\n",
+    "\"software_details\":\"Block2 v0.5.3rc16 with dmrghandler, commit version d603fdc6409fc194a416aa3a519362d5d91790d9 or later.\",\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Obtain Problem Instance and FCIDUMP Files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download problem instance files\n",
+    "repository_filepath = Path(\"repository.zip\")\n",
+    "\n",
+    "repository_path = Path(\"qb-gsee-benchmark-main\")\n",
+    "if not repository_path.exists():\n",
+    "    # Download repository\n",
+    "    urllib.request.urlretrieve(repository_url, repository_filepath.name)\n",
+    "    # unzip repository\n",
+    "    os.system(f\"unzip {repository_filepath}\")\n",
+    "\n",
+    "problem_instance_files_path = repository_path / problem_instance_files_repository_path\n",
+    "problem_instance_files = list(problem_instance_files_path.glob(\"*.json\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and save FCIDUMPs as listed in chosen_problem_instance_files\n",
+    "data_file_list = rs.download_task_fcidump_files(\n",
+    "    chosen_problem_instance_files,\n",
+    "    problem_instance_files_path,\n",
+    "    local_store_path,\n",
+    "    ppk_path,\n",
+    "    sftp_username,\n",
+    ")\n",
+    "print(data_file_list)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DMRG Parameters "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# DMRG parameters for dmrghanlder\n",
+    "# dmrghandler is a wrapper for Block2: https://block2.readthedocs.io/en/latest/#\n",
+    "\n",
+    "# To simply run the script, you only need to update the following:\n",
+    "#       num_threads_list              # Ensure this is not more than the number of cores (or threads) on your machine\n",
+    "#       n_mkl_threads_list            # Ensure this is not more than the number of cores (or threads) on your machine\n",
+    "#       stack_mem                     # Ensure this is less than the total memory available on your machine; not a hard memory limit on Block2, though\n",
+    "#       python_environment_location   # Update this to the path of your python environment where this notebook is running; if the running instructions are followed, this need not be changed\n",
+    "\n",
+    "# For more control of the calculation, consider also changing:\n",
+    "#       max_time_limit_sec_list\n",
+    "#       starting_bond_dimension_list\n",
+    "#       max_num_sweeps_list\n",
+    "#       sweep_schedule_bond_dims_parameters\n",
+    "#       sweep_schedule_noise_list\n",
+    "#       sweep_schedule_davidson_threshold_list\n",
+    "#       init_state_seed_list\n",
+    "#       symmetry_type_list\n",
+    "#       reordering_method_list\n",
+    "#       config_file_prefix\n",
+    "#       job_name\n",
+    "\n",
+    "# For all list parameters, if a list with more than one value is given, the length of the list\n",
+    "# must be the same as the length of data_file_list\n",
+    "# If a single value is given, it will be used for all FCIDUMP files\n",
+    "config_dict = {\n",
+    "    \"plot_filename_prefix_list\": [\n",
+    "        Path(fcidump_file).stem for fcidump_file in data_file_list\n",
+    "    ],\n",
+    "    \"main_storage_folder_path_prefix\": str(local_store_path),\n",
+    "    \"max_bond_dimension_list\": [10],\n",
+    "    \"max_time_limit_sec_list\": [5 * 60],  # Max time limit per FCIDUMP file in seconds\n",
+    "    \"min_energy_change_hartree_list\": [\n",
+    "        1e-4\n",
+    "    ],  # Convergence threshold as the bond dimension is increased\n",
+    "    \"extrapolation_type_list\": [\"discard_weights\"],  # Auto extrapolation, unreliable\n",
+    "    \"starting_bond_dimension_list\": [4],\n",
+    "    \"max_num_sweeps_list\": [20],\n",
+    "    \"energy_convergence_threshold_list\": [1e-8],\n",
+    "    \"sweep_schedule_bond_dims_parameters\": [\n",
+    "        [(2, 4), (1, 5)]\n",
+    "    ],  # (division_factor, count),\n",
+    "    # e.g. [(2, 4), (1, 5)] and bond dimension of 3 -> [1, 1, 1, 1, 3, 3, 3, 3, 3]\n",
+    "    \"sweep_schedule_noise_list\": [[1e-4] * 4 + [1e-5] * 4 + [0]],\n",
+    "    \"sweep_schedule_davidson_threshold_list\": [[1e-10] * 9],\n",
+    "    \"init_state_bond_dimension_division_factor_list\": [2],\n",
+    "    \"init_state_seed_list\": [\n",
+    "        658724\n",
+    "    ],  # Random number generator seed for choosing the initial MPS state\n",
+    "    \"initial_mps_method_list\": [\"random\"],\n",
+    "    \"factor_half_convention_list\": [True],  # True for standard FCIDUMP files\n",
+    "    \"symmetry_type_list\": [\"SU(2)\"],  # \"SZ\" or \"SU(2)\"\n",
+    "    \"num_threads_list\": [4],\n",
+    "    \"n_mkl_threads_list\": [4],\n",
+    "    \"track_mem\": [False],\n",
+    "    \"reordering_method_list\": [\"fiedler, interaction matrix\"],\n",
+    "    \"calc_v_score_bool_list\": [True],\n",
+    "}\n",
+    "\n",
+    "dmrg_advanced_config = {\n",
+    "    \"occupancy_hint\": None,\n",
+    "    \"full_fci_space_bool\": True,\n",
+    "    \"init_state_direct_two_site_construction_bool\": False,\n",
+    "    \"davidson_type\": None,  # Default is None, for \"Normal\"\n",
+    "    \"eigenvalue_cutoff\": 1e-20,  # Cutoff of eigenvalues, default is 1e-20\n",
+    "    \"davidson_max_iterations\": 4000,  # Default is 4000\n",
+    "    \"davidson_max_krylov_subspace_size\": 50,  # Default is 50\n",
+    "    \"lowmem_noise_bool\": False,  # Whether to use a lower memory version of the noise, default is False\n",
+    "    \"sweep_start\": 0,  # Default is 0, where to start sweep\n",
+    "    \"initial_sweep_direction\": None,  # Default is None, True means forward sweep (left-to-right)\n",
+    "    \"stack_mem\": 10\n",
+    "    * 1024\n",
+    "    * 1024\n",
+    "    * 1024,  # in bytes; ensure that this value is less than the total memory available\n",
+    "    \"stack_mem_ratio\": 0.9,\n",
+    "    # \"do_single_calc\": False,\n",
+    "    \"num_states\": 1,  # Number of states to calculate, default is 1, the ground state\n",
+    "}\n",
+    "\n",
+    "# Generate configuration files\n",
+    "config_files_list, config_dict_single_file_list = config_io.gen_config_files(\n",
+    "    data_file_list=data_file_list,\n",
+    "    config_dict=config_dict,\n",
+    "    dmrg_advanced_config=dmrg_advanced_config,\n",
+    "    config_file_prefix=\"dmrg_example_run_\",\n",
+    ")\n",
+    "print(f\"config_files_list: {config_files_list}\")\n",
+    "# print(f\"config_dict_single_file_list: {config_dict_single_file_list}\")\n",
+    "\n",
+    "# Parameters for when using SLURM on a cluster\n",
+    "submit_dict = {\n",
+    "    \"time_cap_string\": \"00-23:59:00\",\n",
+    "    \"job_name\": \"dmrg_example_run_\",\n",
+    "    \"email\": \"eample_email@example.com\",\n",
+    "    \"account_name\": \"example\",\n",
+    "    \"tasks_per_node\": \"1\",\n",
+    "    \"cpus_per_task\": \"40\",\n",
+    "    \"partition\": \"debug\",\n",
+    "    \"python_environment_location\": \"../env_run_dmrg\",\n",
+    "}\n",
+    "\n",
+    "# Generate python and SLURM submission scripts\n",
+    "slurm_scripts.gen_run_files(submit_dict, config_dict_single_file_list)\n",
+    "\n",
+    "\n",
+    "# submit_commands only for use on a cluster with SLURM installed\n",
+    "submit_commands = slurm_scripts.gen_submit_commands(config_dict_single_file_list)\n",
+    "print(f\"submit_commands: \\n{submit_commands}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run DMRG"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run DMRG on the FCIDUMPs\n",
+    "scratch_sim_path = Path(local_store_path) / Path(\"scratch_sim\")\n",
+    "scratch_sim_path.mkdir(parents=True, exist_ok=True)\n",
+    "scratch_sim_path_absolute = scratch_sim_path.resolve()\n",
+    "dmrg_hdf5_files = []\n",
+    "for config_dict in config_dict_single_file_list:\n",
+    "\n",
+    "    data_config = config_dict[\"data_config\"]\n",
+    "    python_run_file_name = data_config[\"python_run_file\"]\n",
+    "    os.environ[\"SCRATCH\"] = str(scratch_sim_path_absolute)\n",
+    "\n",
+    "    # Run DMRG\n",
+    "    os.system(f\"{str(environment_path)}/bin/python {python_run_file_name}\")\n",
+    "    print(\"DMRG NOW EXITED\")\n",
+    "\n",
+    "    # Get results\n",
+    "    main_storage_folder_path = data_config[\"main_storage_folder_path\"]\n",
+    "    hdf5_file_path = Path(main_storage_folder_path) / Path(\"dmrg_results.hdf5\")\n",
+    "    dmrg_hdf5_files.append(hdf5_file_path)\n",
+    "\n",
+    "    with h5py.File(hdf5_file_path, \"r\") as f:\n",
+    "        dmrg_energies = f[\"/final_dmrg_results/past_energies_dmrg\"][:]\n",
+    "        dmrg_bond_dimensions = f[\"/final_dmrg_results/bond_dims_used\"][:]\n",
+    "        discarded_weights = f[\"/final_dmrg_results/past_discarded_weights\"][:]\n",
+    "\n",
+    "        h_min_e_optket_norm = float(\n",
+    "            f[\"/first_preloop_calc/dmrg_results/h_min_e_optket_norm\"][()]\n",
+    "        )\n",
+    "        variance = float(f[\"/first_preloop_calc/dmrg_results/optket_variance\"][()])\n",
+    "        v_score_numerator = float(\n",
+    "            f[\"/first_preloop_calc/dmrg_results/v_score_numerator\"][()]\n",
+    "        )\n",
+    "        deviation_init_ket = float(\n",
+    "            f[\"/first_preloop_calc/dmrg_results/deviation_init_ket\"][()][0]\n",
+    "        )\n",
+    "        v_score_init_ket = float(\n",
+    "            f[\"/first_preloop_calc/dmrg_results/v_score_init_ket\"][()][0]\n",
+    "        )\n",
+    "        hf_energy = float(f[\"/first_preloop_calc/dmrg_results/hf_energy\"][()])\n",
+    "        deviation_hf = float(f[\"/first_preloop_calc/dmrg_results/deviation_hf\"][()][0])\n",
+    "        v_score_hartree_fock = float(\n",
+    "            f[\"/first_preloop_calc/dmrg_results/v_score_hartree_fock\"][()][0]\n",
+    "        )\n",
+    "        initial_ket_energy = float(\n",
+    "            f[\"/first_preloop_calc/dmrg_results/initial_ket_energy\"][()]\n",
+    "        )\n",
+    "\n",
+    "    print(f\"dmrg_energies: {dmrg_energies}\")\n",
+    "    print(f\"dmrg_bond_dimensions: {dmrg_bond_dimensions}\")\n",
+    "    print(f\"discarded_weights: {discarded_weights}\")\n",
+    "    print(f\"h_min_e_optket_norm: {h_min_e_optket_norm}\")\n",
+    "    print(f\"variance: {variance}\")\n",
+    "    print(f\"v_score_numerator: {v_score_numerator}\")\n",
+    "    print(f\"deviation_init_ket: {deviation_init_ket}\")\n",
+    "    print(f\"v_score_init_ket: {v_score_init_ket}\")\n",
+    "    print(f\"hf_energy: {hf_energy}\")\n",
+    "    print(f\"deviation_hf: {deviation_hf}\")\n",
+    "    print(f\"v_score_hartree_fock: {v_score_hartree_fock}\")\n",
+    "    print(f\"initial_ket_energy: {initial_ket_energy}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Collect Solution Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "uuid_fcidump_mapping_dict = rs.get_uuid_fcidump_mapping(submit_commands, local_store_path)\n",
+    "print(uuid_fcidump_mapping_dict)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_dmrgh5_data = rs.collect_dmrg_data(dmrg_hdf5_files, uuid_fcidump_mapping_dict)\n",
+    "all_dmrgh5_data = rs.filter_lowest_energy_data(all_dmrgh5_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prob_inst_data_sol_dict = rs.map_fcidump_to_problem_instances(\n",
+    "    chosen_problem_instance_files, problem_instance_files_path, all_dmrgh5_data\n",
+    ")\n",
+    "print(prob_inst_data_sol_dict)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save solution json files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "rs.create_solution_files(\n",
+    "    prob_inst_data_sol_dict,\n",
+    "    solution_save_location,\n",
+    "    json_solution_schema_url_file,\n",
+    "    contact_info,\n",
+    "    solver_details,\n",
+    ")\n",
+    "\n",
+    "rs.validate_solution_files(json_solution_schema_url, prob_inst_data_sol_dict)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Remove Temporary Files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if remove_temp_files:\n",
+    "    os.remove(repository_filepath)\n",
+    "    os.remove(\"dmrghandler.log\")\n",
+    "    os.system(f\"rm -r {repository_path}\")\n",
+    "    os.system(f\"rm -r {scratch_sim_path}\")\n",
+    "    os.system(f\"rm -r {local_store_path}\")\n",
+    "    os.system(f\"rm -r config_store\")\n",
+    "    os.system(f\"rm -r tmp_dir\")\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "env_run_dmrg",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/run_support.py b/examples/run_support.py
new file mode 100644
index 0000000..e80f9b2
--- /dev/null
+++ b/examples/run_support.py
@@ -0,0 +1,471 @@
+from urllib.parse import urlparse
+import urllib.request
+import json
+import jsonschema
+import paramiko
+from pathlib import Path
+import gzip
+import shutil
+import io
+import dmrghandler.data_processing as dp
+import h5py
+import numpy as np
+import copy
+import uuid
+import datetime
+import os
+
+
+def fetch_file_from_sftp(
+    url=None, local_path=None, ppk_path=None, username=None, port=None
+):
+    """
+    Download a file from an SFTP server using a private key file (.ppk).
+    Code by John Penuel, with slight modifications; originates in https://github.com/isi-usc-edu/qb-gsee-benchmark/blob/main/examples/sftp-fetch.ipynb
+    Args:
+        url (str): The URL of the file to download.
+        local_path (str): The local path to save the file.
+        ppk_path (str): The path to the private key file (.ppk).
+        username (str): The username to use for the SFTP connection.
+        port (int): The port to use for the SFTP connection.
+    """
+
+    parsed_url = urlparse(url)
+    hostname = parsed_url.hostname
+    remote_path = parsed_url.path.lstrip("/")
+
+    try:
+        # Create an SSH client
+        with paramiko.SSHClient() as client:
+            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+
+            # Connect using the private key file (.ppk)
+            client.connect(
+                hostname=hostname, port=port, username=username, key_filename=ppk_path
+            )
+
+            # Open an SFTP session
+            with client.open_sftp() as sftp:
+                sftp.get(remote_path, local_path)
+
+        print(
+            f"File fetched successfully from {hostname}: {Path(remote_path).name} -> {local_path}"
+        )
+    except Exception as e:
+        print(f"Error: {e}")
+
+
+def download_task_fcidump_files(
+    chosen_problem_instance_files,
+    problem_instance_files_path,
+    local_store_path,
+    ppk_path,
+    sftp_username,
+):
+    data_file_list = []
+    for problem_instance_file in chosen_problem_instance_files:
+        with open(problem_instance_files_path / problem_instance_file) as f:
+            problem_instance = json.load(f)
+        tasks_list = problem_instance["tasks"]
+
+        for task in tasks_list:
+            for file_dict in task["supporting_files"]:
+                if (
+                    "fcidump" in file_dict["instance_data_object_url"]
+                    or "FCIDUMP" in file_dict["instance_data_object_url"]
+                ):
+                    fcidump_url = file_dict["instance_data_object_url"]
+                    fcidump_store_path = local_store_path / "fcidumps"
+                    fcidump_store_path.mkdir(parents=True, exist_ok=True)
+                    local_path = fcidump_store_path / fcidump_url.split("/")[-1]
+                    fetch_file_from_sftp(
+                        url=fcidump_url,
+                        local_path=local_path,
+                        ppk_path=ppk_path,
+                        username=sftp_username,
+                        port=22,
+                    )
+
+                    # If compressed, decompress
+                    if str(local_path).endswith(".gz"):
+                        print(f"Uncompressing {local_path.name}")
+                        with gzip.open(local_path, "rb") as f_in:
+                            local_path = Path(str(local_path).split(".gz")[0])
+                            with open(local_path, "wb") as f_out:
+                                shutil.copyfileobj(f_in, f_out)
+
+                    data_file_list.append(local_path)
+
+    return data_file_list
+
+
+def get_uuid_fcidump_mapping(submit_commands, local_store_path):
+    # Get uuid to fcidump mapping
+    buf = io.StringIO(submit_commands.split("cd config_store/submit_dir")[0])
+    orig_data_dict_list = []
+    uuid_fcidump_mapping_dict = {}
+    data_lines = buf.readlines()
+    for line_iter, line in enumerate(data_lines):
+        # print(line)
+        if line.startswith("### "):
+            continue
+        if line.startswith(f"# {local_store_path}/fcidumps/"):
+            fcidump_name_temp = line.split("/")[-1].split("#")[0].strip()
+            next_line = data_lines[line_iter + 1]
+            if not next_line.startswith("sbatch ../"):
+                raise ValueError("Expected sbatch line")
+            uuid_value = next_line.split("/")[1].strip()
+            uuid_fcidump_mapping_dict[uuid_value] = fcidump_name_temp
+    return uuid_fcidump_mapping_dict
+
+
+def collect_dmrg_data(dmrg_hdf5_files, uuid_fcidump_mapping_dict):
+    all_dmrgh5_data = {}
+
+    for file in dmrg_hdf5_files:
+        print(file)
+        with h5py.File(file, "r") as hdf5_file:
+            solution_uuid = hdf5_file["parent_folder_name"][()].decode("utf-8")
+            FCIDUMP_name = uuid_fcidump_mapping_dict[solution_uuid]
+            FCIDUMP_short_name_temp = FCIDUMP_name.split("fcidump.")[-1].split(
+                "FCIDUMP_"
+            )[-1]
+
+            if len(FCIDUMP_short_name_temp.split(".")[-1]) == len(
+                "66d35e58-89f8-4f9c-baa9-e7cbd0c846e4"
+            ):
+                FCIDUMP_short_name = ".".join(FCIDUMP_short_name_temp.split(".")[:-1])
+            else:
+                FCIDUMP_short_name = FCIDUMP_short_name_temp
+
+        if FCIDUMP_short_name not in all_dmrgh5_data:
+            all_dmrgh5_data[FCIDUMP_short_name] = {}
+
+        if solution_uuid in all_dmrgh5_data[FCIDUMP_short_name]:
+            raise ValueError("Duplicate solution uuid")
+
+        (
+            dmrg_energies,
+            bond_dimensions,
+            discarded_weights,
+            num_loops,
+            num_dmrg_calculations,
+            loop_cpu_times_s,
+            loop_wall_times_s,
+            num_sweeps_list,
+            final_sweep_delta_energies_list,
+            reordering_method_list,
+            reordering_method_cpu_times_s,
+            reordering_method_wall_times_s,
+            extra_dict,
+        ) = dp.get_data_from_incomplete_processing(file)
+
+        min_dmrg_energy_arg = np.argmin(dmrg_energies)
+        min_dmrg_energy = dmrg_energies[min_dmrg_energy_arg]
+        min_dmrg_energy_bond_dim = bond_dimensions[min_dmrg_energy_arg]
+        min_dmrg_energy_discarded_weight = discarded_weights[min_dmrg_energy_arg]
+        min_dmrg_energy_loop_cpu_time = loop_cpu_times_s[
+            min_dmrg_energy_arg
+        ]  # overall_time
+        min_dmrg_energy_loop_wall_time = loop_wall_times_s[
+            min_dmrg_energy_arg
+        ]  # overall_time
+        min_dmrg_energy_num_sweeps = num_sweeps_list[min_dmrg_energy_arg]
+        min_dmrg_energy_final_sweep_delta_energy = final_sweep_delta_energies_list[
+            min_dmrg_energy_arg
+        ]
+        min_dmrg_energy_reordering_method = reordering_method_list[min_dmrg_energy_arg]
+        min_dmrg_energy_reordering_method_cpu_time = reordering_method_cpu_times_s[
+            min_dmrg_energy_arg
+        ]
+        min_dmrg_energy_reordering_method_wall_time = reordering_method_wall_times_s[
+            min_dmrg_energy_arg
+        ]
+
+        min_dmrg_energy_preprocessing_sum = (
+            float(
+                extra_dict["loop_driver_initialize_system_wall_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(
+                extra_dict["loop_generate_initial_mps_wall_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(
+                extra_dict["loop_get_qchem_hami_mpo_wall_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(
+                extra_dict["loop_reorder_integrals_wall_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(
+                extra_dict["loop_make_driver_wall_time_s_list"][min_dmrg_energy_arg]
+            )
+        )
+
+        min_dmrg_energy_preprocessing_sum_cpu = (
+            float(
+                extra_dict["loop_driver_initialize_system_cpu_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(
+                extra_dict["loop_generate_initial_mps_cpu_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(
+                extra_dict["loop_get_qchem_hami_mpo_cpu_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(
+                extra_dict["loop_reorder_integrals_cpu_time_s_list"][
+                    min_dmrg_energy_arg
+                ]
+            )
+            + float(extra_dict["loop_make_driver_cpu_time_s_list"][min_dmrg_energy_arg])
+        )
+
+        min_dmrg_energy_algorithm_run_time = float(
+            extra_dict["loop_dmrg_optimization_wall_time_s_list"][min_dmrg_energy_arg]
+        )
+
+        min_dmrg_energy_postprocessing_time = float(
+            extra_dict["loop_copy_mps_wall_time_s_list"][min_dmrg_energy_arg]
+        )
+
+        min_dmrg_energy_algorithm_run_cpu_time = float(
+            extra_dict["loop_dmrg_optimization_cpu_time_s_list"][min_dmrg_energy_arg]
+        )
+
+        min_dmrg_energy_postprocessing_cpu_time = float(
+            extra_dict["loop_copy_mps_cpu_time_s_list"][min_dmrg_energy_arg]
+        )
+
+        dmrg_data_dict = {
+            "dmrg_energy": min_dmrg_energy,
+            "bond_dimension": min_dmrg_energy_bond_dim,
+            "discarded_weight": min_dmrg_energy_discarded_weight,
+            "loop_cpu_time": min_dmrg_energy_loop_cpu_time,
+            "loop_wall_time": min_dmrg_energy_loop_wall_time,
+            "num_sweeps": min_dmrg_energy_num_sweeps,
+            "final_sweep_delta_energy": min_dmrg_energy_final_sweep_delta_energy,
+            "reordering_method": min_dmrg_energy_reordering_method,
+            "reordering_method_cpu_time": min_dmrg_energy_reordering_method_cpu_time,
+            "reordering_method_wall_time": min_dmrg_energy_reordering_method_wall_time,
+            "preprocessing_time": min_dmrg_energy_preprocessing_sum,
+            "preprocessing_cpu_time": min_dmrg_energy_preprocessing_sum_cpu,
+            "algorithm_run_time": min_dmrg_energy_algorithm_run_time,
+            "postprocessing_time": min_dmrg_energy_postprocessing_time,
+            "algorithm_run_cpu_time": min_dmrg_energy_algorithm_run_cpu_time,
+            "postprocessing_cpu_time": min_dmrg_energy_postprocessing_cpu_time,
+        }
+
+        all_dmrgh5_data[FCIDUMP_short_name][solution_uuid] = copy.deepcopy(
+            dmrg_data_dict
+        )
+
+    return all_dmrgh5_data
+
+
+def filter_lowest_energy_data(all_dmrgh5_data):
+    filtered_data = {}
+
+    for FCIDUMP_short_name, solution_data in all_dmrgh5_data.items():
+        min_energy = np.inf
+        min_energy_uuid = None
+        for solution_uuid, dmrg_data in solution_data.items():
+            if dmrg_data["dmrg_energy"] < min_energy:
+                min_energy = dmrg_data["dmrg_energy"]
+                min_energy_uuid = solution_uuid
+
+        filtered_data[FCIDUMP_short_name] = {"solution_uuid": min_energy_uuid}
+        filtered_data[FCIDUMP_short_name].update(solution_data[min_energy_uuid])
+
+    return filtered_data
+
+
+def map_fcidump_to_problem_instances(
+    chosen_problem_instance_files, problem_instance_files_path, all_dmrgh5_data
+):
+    prob_inst_data_sol_dict = {}
+    print(chosen_problem_instance_files)
+
+    for prob_inst_file in chosen_problem_instance_files:
+        prob_inst_file_path = problem_instance_files_path / prob_inst_file
+        with open(prob_inst_file_path) as f:
+            prob_inst_data = json.load(f)
+        prob_inst_uuid = prob_inst_data["problem_instance_uuid"]
+        task_list = prob_inst_data["tasks"]
+
+        for task in task_list:
+            task_uuid = task["task_uuid"]
+            supporting_files = task["supporting_files"]
+            for supp_file in supporting_files:
+                if (
+                    "fcidump" in supp_file["instance_data_object_url"]
+                    or "FCIDUMP" in supp_file["instance_data_object_url"]
+                ):
+                    FCIDUMP_url = supp_file["instance_data_object_url"]
+                    FCIDUMP_uuid = supp_file["instance_data_object_uuid"]
+                    break
+
+            relevant_fcidump_short_name = None
+            FCIDUMP_url_test = FCIDUMP_url
+            print(FCIDUMP_url_test)
+            if "benzene" in FCIDUMP_url_test:
+                print(
+                    "Benzene found----------------------------------------------------"
+                )
+                FCIDUMP_url_test = FCIDUMP_url_test.replace("benzene", "b")
+                print(FCIDUMP_url_test)
+
+            for dmrg_solution_fcidump_short_name in all_dmrgh5_data.keys():
+                if "V1" in dmrg_solution_fcidump_short_name:
+                    print(dmrg_solution_fcidump_short_name)
+
+                if (
+                    "/fcidump." + dmrg_solution_fcidump_short_name + "."
+                    in FCIDUMP_url_test
+                    or "/FCIDUMP_" + dmrg_solution_fcidump_short_name + "."
+                    in FCIDUMP_url_test
+                    or "/FCIDUMP_" + dmrg_solution_fcidump_short_name + "_"
+                    in FCIDUMP_url_test
+                ):
+                    relevant_fcidump_short_name = dmrg_solution_fcidump_short_name
+                    break
+
+            if prob_inst_uuid not in prob_inst_data_sol_dict:
+                prob_inst_data_sol_dict[prob_inst_uuid] = {
+                    "prob_inst_file": prob_inst_file_path,
+                    "prob_inst_short_name": prob_inst_data["short_name"],
+                }
+
+            if relevant_fcidump_short_name is None:
+                print(
+                    f"Could not find relevant fcidump short name for {FCIDUMP_url}, assuming solution not available."
+                )
+                print(prob_inst_uuid)
+                print(task_uuid)
+                prob_inst_data_sol_dict[prob_inst_uuid].update(
+                    {task_uuid: "NO SOLUTION"}
+                )
+                continue
+
+            prob_inst_data_sol_dict[prob_inst_uuid][task_uuid] = {
+                "solution_uuid": all_dmrgh5_data[relevant_fcidump_short_name][
+                    "solution_uuid"
+                ],
+                "instance_data_object_uuid": FCIDUMP_uuid,
+                "instance_data_object_url": FCIDUMP_url,
+                "dmrg_data": all_dmrgh5_data[relevant_fcidump_short_name],
+            }
+
+    return prob_inst_data_sol_dict
+
+
+def create_solution_files(
+    prob_inst_data_sol_dict,
+    solution_save_location,
+    json_solution_schema_url_file,
+    contact_info,
+    solver_details,
+):
+    for prob_inst_uuid, prob_inst_data in prob_inst_data_sol_dict.items():
+        solution_file_uuid = str(uuid.uuid4())
+        print(prob_inst_data.keys())
+        print(prob_inst_data.items())
+        Path(solution_save_location).mkdir(parents=True, exist_ok=True)
+        prob_inst_sol_file = Path(solution_save_location) / Path(
+            f"solution.{prob_inst_data['prob_inst_short_name']}.{prob_inst_uuid}_{solution_file_uuid}.json"
+        )
+
+        # Timestamp in ISO 8601 format in UTC (note the `Z`) with final Z
+        creation_timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
+        # Replace the time zone shift with Z
+        creation_timestamp = creation_timestamp[:-6] + "Z"
+        prob_inst_sol_data = {
+            "$schema": json_solution_schema_url_file,
+            "solution_uuid": solution_file_uuid,
+            "problem_instance_uuid": prob_inst_uuid,
+            "creation_timestamp": creation_timestamp,
+            "contact_info": contact_info,
+            "is_resource_estimate": False,
+            "solution_data": [],
+            "solver_details": solver_details,
+            "digital_signature": None,
+        }
+        for task_uuid, task_data in prob_inst_data.items():
+            if len(task_uuid) != len("66d35e58-89f8-4f9c-baa9-e7cbd0c846e4"):
+                continue
+
+            if task_data == "NO SOLUTION":
+                continue
+            dmrg_data = task_data["dmrg_data"]
+
+            overall_time = dmrg_data["loop_wall_time"]
+            preprocessing_time = dmrg_data["preprocessing_time"]
+            algorithm_run_time = dmrg_data["algorithm_run_time"]
+            postprocessing_time = dmrg_data["postprocessing_time"]
+            overall_cpu_time = dmrg_data["loop_cpu_time"]
+            preprocessing_cpu_time = dmrg_data["preprocessing_cpu_time"]
+            algorithm_run_cpu_time = dmrg_data["algorithm_run_cpu_time"]
+            postprocessing_cpu_time = dmrg_data["postprocessing_cpu_time"]
+            # Verify algorithm_run_time is a float
+            if not isinstance(algorithm_run_time, float):
+                raise ValueError("algorithm_run_time is not a float")
+
+            run_time = {
+                "overall_time": {"seconds": overall_time},
+                "preprocessing_time": {"seconds": preprocessing_time},
+                "algorithm_run_time": {"seconds": algorithm_run_time},
+                "postprocessing_time": {"seconds": postprocessing_time},
+            }
+            run_time_cpu = {
+                "overall_time": {"seconds": overall_cpu_time},
+                "preprocessing_time": {"seconds": preprocessing_cpu_time},
+                "algorithm_run_time": {"seconds": algorithm_run_cpu_time},
+                "postprocessing_time": {"seconds": postprocessing_cpu_time},
+            }
+            solution_details = {
+                "instance_data_object_url": task_data["instance_data_object_url"],
+                "bond_dimension": dmrg_data["bond_dimension"],
+                "discarded_weight": dmrg_data["discarded_weight"],
+                "calculation_uuid": task_data["solution_uuid"],
+            }
+            solution_dict = {
+                "task_uuid": task_uuid,
+                "energy": dmrg_data["dmrg_energy"],
+                "energy_units": "Hartree",
+                "run_time": run_time,
+                "run_time_cpu": run_time_cpu,
+                "solution_details": solution_details,
+            }
+            prob_inst_sol_data["solution_data"].append(solution_dict)
+        with open(prob_inst_sol_file, "w") as f:
+            json.dump(prob_inst_sol_data, f, indent=4)
+            print(f"Saved {prob_inst_sol_file}")
+        prob_inst_data["file_name"] = prob_inst_sol_file
+
+def validate_solution_files(json_solution_schema_url, prob_inst_data_sol_dict):
+    # Validate the solution json files against the schema
+    schema_filepath = Path("temp_sol_schema.json")
+
+    # Download schema
+    urllib.request.urlretrieve(json_solution_schema_url, schema_filepath.name)
+    schema = json.load(open(schema_filepath))
+
+    for prob_inst_uuid, prob_inst_data in prob_inst_data_sol_dict.items():
+        prob_inst_sol_file = prob_inst_data["file_name"]
+        print(prob_inst_sol_file)
+        jsonschema.validate(json.load(open(prob_inst_sol_file)), schema)
+        print(f"Validated {prob_inst_sol_file}")
+
+    # Remove schema file
+    schema_filepath.unlink()