diff --git a/.gitignore b/.gitignore index 46d70ff..9a614c9 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ cython_debug/ # PyCharm .idea/ + +env_run_dmrg +notebooks/solution_jsons \ No newline at end of file diff --git a/examples/run_dmrg.ipynb b/examples/run_dmrg.ipynb new file mode 100644 index 0000000..b16fdb3 --- /dev/null +++ b/examples/run_dmrg.ipynb @@ -0,0 +1,467 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Running Instructions\n", + "\n", + "Below instructions tested in Ubuntu 22.04.3 (Windows Subsystem for Linux 2, WSL2). This notebook will likely also work on MaxOS X.\n", + "1. Download the GSEE benchmark from Github: https://github.com/isi-usc-edu/qb-gsee-benchmark/archive/refs/heads/main.zip\n", + "2. Using a terminal, unzip the benchmark code: `unzip qb-gsee-benchmark-main.zip`\n", + "3. Enter the benchmark folder: `cd qb-gsee-benchmark-main`\n", + "4. Create a clean Python 3.10 virtual environment ([Python Website](https://www.python.org/downloads/)) with this command: `python -m venv env_run_dmrg`\n", + "5. Activate environment: `source env_run_dmrg/bin/activate`\n", + "6. Install Jupyter notebook: `python -m pip install notebook`\n", + "7. Open this notebook: `jupyter notebook notebooks/run_dmrg.ipynb` \n", + "8. Update relevant parameters as needed; see [General Parameters](#general-parameters) and [DMRG Parameters](#dmrg-parameters.)\n", + "9. Run the notebook.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%reset -f \n", + "# Install pip packages in the current Jupyter kernel (from https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/)\n", + "import sys\n", + "# Install DMRG\n", + "!{sys.executable} -m pip install --extra-index-url=https://block-hczhai.github.io/block2-preview/pypi/ git+https://github.com/jtcantin/dmrghandler\n", + "\n", + "!{sys.executable} -m pip install paramiko h5py numpy\n", + "\n", + "# This took about 8 minutes on my machine for the first time, but only about 7 seconds after that." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib.request\n", + "from pathlib import Path\n", + "import json\n", + "import dmrghandler.config_io as config_io\n", + "import dmrghandler.slurm_scripts as slurm_scripts\n", + "import h5py\n", + "import jsonschema\n", + "import run_support as rs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# General Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remove_temp_files = True # IMPORTANT: If you want to keep the DRMG output files, not just the jsons, set this to False\n", + "environment_path=Path(\"../env_run_dmrg\")\n", + "ppk_path=\"/mnt/c/darpa-qb-key.ppk\" # Update this to the path of your private key\n", + "sftp_username=\"darpa-qb\" # Update this to your username\n", + "local_store_path = Path(\"dmrg_calculation_storage\")\n", + "repository_url = \"https://github.com/isi-usc-edu/qb-gsee-benchmark/archive/refs/heads/main.zip\"\n", + "\n", + "solution_save_location = \"solution_jsons\"\n", + "problem_instance_files_repository_path = (\n", + " \"problem_instances\"\n", + ")\n", + "\n", + "chosen_problem_instance_files = [\n", + " # \"problem_instance.mn_mono.cb40f3f7-ffe8-40e8-4544-f26aad5a8bd8.json\",\n", + " # \"problem_instance.planted_solution_0007.3aaf0bb4-b412-4746-922a-c380b4024d00.json\",\n", + " # \"problem_instance.planted_solution_0010.bf3fb654-72c7-4fd4-bfd1-9ffef5aaebd8.json\",\n", + " \"problem_instance.planted_solution_0001.3d3b9e8a-7842-4ca1-bd82-eddb9804972d.json\",\n", + " \"problem_instance.planted_solution_0008.b57eb979-5c09-4974-97b4-5862e109a1ae.json\",\n", + "]\n", + "\n", + "\n", + "json_solution_schema_url = \"https://raw.githubusercontent.com/isi-usc-edu/qb-gsee-benchmark/main/schemas/solution.schema.0.0.1.json\"\n", + "json_solution_schema_url_file = json_solution_schema_url\n", + "\n", + "contact_info = [{\n", + " \"name\": \"Example E. Example\",\n", + " \"email\": \"example@example.ca\",\n", + " \"institution\": \"University of Example\",\n", + "}]\n", + "compute_details = {\n", + " \"computing_environment_name\": \"Example Computer\",\n", + " \"cpu_description\": 'Intel i5-1135G7 @ 2.40GHz',\n", + " \"ram_available_gb\": \"16GB\",\n", + " \"clock_speed\": \"2.4 GHz\",\n", + " \"total_num_cores\" : 4,\n", + "}\n", + "\n", + "solver_details = {\n", + "\"solver_uuid\":\"13474cee-e648-48d3-9526-0314533ae30d\",\n", + "\"solver_short_name\":\"DMRG_surface_lowest_energy\",\n", + "\"compute_hardware_type\":\"classical_computer\",\n", + "\"classical_hardware_details\":compute_details,\n", + "\"algorithm_details\":\"DMRG with the lowest variational energy obtained so far.\",\n", + "\"software_details\":\"Block2 v0.5.3rc16 with dmrghandler, commit version d603fdc6409fc194a416aa3a519362d5d91790d9 or later.\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Obtain Problem Instance and FCIDUMP Files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download problem instance files\n", + "repository_filepath = Path(\"repository.zip\")\n", + "\n", + "repository_path = Path(\"qb-gsee-benchmark-main\")\n", + "if not repository_path.exists():\n", + " # Download repository\n", + " urllib.request.urlretrieve(repository_url, repository_filepath.name)\n", + " # unzip repository\n", + " os.system(f\"unzip {repository_filepath}\")\n", + "\n", + "problem_instance_files_path = repository_path / problem_instance_files_repository_path\n", + "problem_instance_files = list(problem_instance_files_path.glob(\"*.json\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download and save FCIDUMPs as listed in chosen_problem_instance_files\n", + "data_file_list = rs.download_task_fcidump_files(\n", + " chosen_problem_instance_files,\n", + " problem_instance_files_path,\n", + " local_store_path,\n", + " ppk_path,\n", + " sftp_username,\n", + ")\n", + "print(data_file_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DMRG Parameters " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# DMRG parameters for dmrghanlder\n", + "# dmrghandler is a wrapper for Block2: https://block2.readthedocs.io/en/latest/#\n", + "\n", + "# To simply run the script, you only need to update the following:\n", + "# num_threads_list # Ensure this is not more than the number of cores (or threads) on your machine\n", + "# n_mkl_threads_list # Ensure this is not more than the number of cores (or threads) on your machine\n", + "# stack_mem # Ensure this is less than the total memory available on your machine; not a hard memory limit on Block2, though\n", + "# python_environment_location # Update this to the path of your python environment where this notebook is running; if the running instructions are followed, this need not be changed\n", + "\n", + "# For more control of the calculation, consider also changing:\n", + "# max_time_limit_sec_list\n", + "# starting_bond_dimension_list\n", + "# max_num_sweeps_list\n", + "# sweep_schedule_bond_dims_parameters\n", + "# sweep_schedule_noise_list\n", + "# sweep_schedule_davidson_threshold_list\n", + "# init_state_seed_list\n", + "# symmetry_type_list\n", + "# reordering_method_list\n", + "# config_file_prefix\n", + "# job_name\n", + "\n", + "# For all list parameters, if a list with more than one value is given, the length of the list\n", + "# must be the same as the length of data_file_list\n", + "# If a single value is given, it will be used for all FCIDUMP files\n", + "config_dict = {\n", + " \"plot_filename_prefix_list\": [\n", + " Path(fcidump_file).stem for fcidump_file in data_file_list\n", + " ],\n", + " \"main_storage_folder_path_prefix\": str(local_store_path),\n", + " \"max_bond_dimension_list\": [10],\n", + " \"max_time_limit_sec_list\": [5 * 60], # Max time limit per FCIDUMP file in seconds\n", + " \"min_energy_change_hartree_list\": [\n", + " 1e-4\n", + " ], # Convergence threshold as the bond dimension is increased\n", + " \"extrapolation_type_list\": [\"discard_weights\"], # Auto extrapolation, unreliable\n", + " \"starting_bond_dimension_list\": [4],\n", + " \"max_num_sweeps_list\": [20],\n", + " \"energy_convergence_threshold_list\": [1e-8],\n", + " \"sweep_schedule_bond_dims_parameters\": [\n", + " [(2, 4), (1, 5)]\n", + " ], # (division_factor, count),\n", + " # e.g. [(2, 4), (1, 5)] and bond dimension of 3 -> [1, 1, 1, 1, 3, 3, 3, 3, 3]\n", + " \"sweep_schedule_noise_list\": [[1e-4] * 4 + [1e-5] * 4 + [0]],\n", + " \"sweep_schedule_davidson_threshold_list\": [[1e-10] * 9],\n", + " \"init_state_bond_dimension_division_factor_list\": [2],\n", + " \"init_state_seed_list\": [\n", + " 658724\n", + " ], # Random number generator seed for choosing the initial MPS state\n", + " \"initial_mps_method_list\": [\"random\"],\n", + " \"factor_half_convention_list\": [True], # True for standard FCIDUMP files\n", + " \"symmetry_type_list\": [\"SU(2)\"], # \"SZ\" or \"SU(2)\"\n", + " \"num_threads_list\": [4],\n", + " \"n_mkl_threads_list\": [4],\n", + " \"track_mem\": [False],\n", + " \"reordering_method_list\": [\"fiedler, interaction matrix\"],\n", + " \"calc_v_score_bool_list\": [True],\n", + "}\n", + "\n", + "dmrg_advanced_config = {\n", + " \"occupancy_hint\": None,\n", + " \"full_fci_space_bool\": True,\n", + " \"init_state_direct_two_site_construction_bool\": False,\n", + " \"davidson_type\": None, # Default is None, for \"Normal\"\n", + " \"eigenvalue_cutoff\": 1e-20, # Cutoff of eigenvalues, default is 1e-20\n", + " \"davidson_max_iterations\": 4000, # Default is 4000\n", + " \"davidson_max_krylov_subspace_size\": 50, # Default is 50\n", + " \"lowmem_noise_bool\": False, # Whether to use a lower memory version of the noise, default is False\n", + " \"sweep_start\": 0, # Default is 0, where to start sweep\n", + " \"initial_sweep_direction\": None, # Default is None, True means forward sweep (left-to-right)\n", + " \"stack_mem\": 10\n", + " * 1024\n", + " * 1024\n", + " * 1024, # in bytes; ensure that this value is less than the total memory available\n", + " \"stack_mem_ratio\": 0.9,\n", + " # \"do_single_calc\": False,\n", + " \"num_states\": 1, # Number of states to calculate, default is 1, the ground state\n", + "}\n", + "\n", + "# Generate configuration files\n", + "config_files_list, config_dict_single_file_list = config_io.gen_config_files(\n", + " data_file_list=data_file_list,\n", + " config_dict=config_dict,\n", + " dmrg_advanced_config=dmrg_advanced_config,\n", + " config_file_prefix=\"dmrg_example_run_\",\n", + ")\n", + "print(f\"config_files_list: {config_files_list}\")\n", + "# print(f\"config_dict_single_file_list: {config_dict_single_file_list}\")\n", + "\n", + "# Parameters for when using SLURM on a cluster\n", + "submit_dict = {\n", + " \"time_cap_string\": \"00-23:59:00\",\n", + " \"job_name\": \"dmrg_example_run_\",\n", + " \"email\": \"eample_email@example.com\",\n", + " \"account_name\": \"example\",\n", + " \"tasks_per_node\": \"1\",\n", + " \"cpus_per_task\": \"40\",\n", + " \"partition\": \"debug\",\n", + " \"python_environment_location\": \"../env_run_dmrg\",\n", + "}\n", + "\n", + "# Generate python and SLURM submission scripts\n", + "slurm_scripts.gen_run_files(submit_dict, config_dict_single_file_list)\n", + "\n", + "\n", + "# submit_commands only for use on a cluster with SLURM installed\n", + "submit_commands = slurm_scripts.gen_submit_commands(config_dict_single_file_list)\n", + "print(f\"submit_commands: \\n{submit_commands}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run DMRG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run DMRG on the FCIDUMPs\n", + "scratch_sim_path = Path(local_store_path) / Path(\"scratch_sim\")\n", + "scratch_sim_path.mkdir(parents=True, exist_ok=True)\n", + "scratch_sim_path_absolute = scratch_sim_path.resolve()\n", + "dmrg_hdf5_files = []\n", + "for config_dict in config_dict_single_file_list:\n", + "\n", + " data_config = config_dict[\"data_config\"]\n", + " python_run_file_name = data_config[\"python_run_file\"]\n", + " os.environ[\"SCRATCH\"] = str(scratch_sim_path_absolute)\n", + "\n", + " # Run DMRG\n", + " os.system(f\"{str(environment_path)}/bin/python {python_run_file_name}\")\n", + " print(\"DMRG NOW EXITED\")\n", + "\n", + " # Get results\n", + " main_storage_folder_path = data_config[\"main_storage_folder_path\"]\n", + " hdf5_file_path = Path(main_storage_folder_path) / Path(\"dmrg_results.hdf5\")\n", + " dmrg_hdf5_files.append(hdf5_file_path)\n", + "\n", + " with h5py.File(hdf5_file_path, \"r\") as f:\n", + " dmrg_energies = f[\"/final_dmrg_results/past_energies_dmrg\"][:]\n", + " dmrg_bond_dimensions = f[\"/final_dmrg_results/bond_dims_used\"][:]\n", + " discarded_weights = f[\"/final_dmrg_results/past_discarded_weights\"][:]\n", + "\n", + " h_min_e_optket_norm = float(\n", + " f[\"/first_preloop_calc/dmrg_results/h_min_e_optket_norm\"][()]\n", + " )\n", + " variance = float(f[\"/first_preloop_calc/dmrg_results/optket_variance\"][()])\n", + " v_score_numerator = float(\n", + " f[\"/first_preloop_calc/dmrg_results/v_score_numerator\"][()]\n", + " )\n", + " deviation_init_ket = float(\n", + " f[\"/first_preloop_calc/dmrg_results/deviation_init_ket\"][()][0]\n", + " )\n", + " v_score_init_ket = float(\n", + " f[\"/first_preloop_calc/dmrg_results/v_score_init_ket\"][()][0]\n", + " )\n", + " hf_energy = float(f[\"/first_preloop_calc/dmrg_results/hf_energy\"][()])\n", + " deviation_hf = float(f[\"/first_preloop_calc/dmrg_results/deviation_hf\"][()][0])\n", + " v_score_hartree_fock = float(\n", + " f[\"/first_preloop_calc/dmrg_results/v_score_hartree_fock\"][()][0]\n", + " )\n", + " initial_ket_energy = float(\n", + " f[\"/first_preloop_calc/dmrg_results/initial_ket_energy\"][()]\n", + " )\n", + "\n", + " print(f\"dmrg_energies: {dmrg_energies}\")\n", + " print(f\"dmrg_bond_dimensions: {dmrg_bond_dimensions}\")\n", + " print(f\"discarded_weights: {discarded_weights}\")\n", + " print(f\"h_min_e_optket_norm: {h_min_e_optket_norm}\")\n", + " print(f\"variance: {variance}\")\n", + " print(f\"v_score_numerator: {v_score_numerator}\")\n", + " print(f\"deviation_init_ket: {deviation_init_ket}\")\n", + " print(f\"v_score_init_ket: {v_score_init_ket}\")\n", + " print(f\"hf_energy: {hf_energy}\")\n", + " print(f\"deviation_hf: {deviation_hf}\")\n", + " print(f\"v_score_hartree_fock: {v_score_hartree_fock}\")\n", + " print(f\"initial_ket_energy: {initial_ket_energy}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Collect Solution Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uuid_fcidump_mapping_dict = rs.get_uuid_fcidump_mapping(submit_commands, local_store_path)\n", + "print(uuid_fcidump_mapping_dict)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "all_dmrgh5_data = rs.collect_dmrg_data(dmrg_hdf5_files, uuid_fcidump_mapping_dict)\n", + "all_dmrgh5_data = rs.filter_lowest_energy_data(all_dmrgh5_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "prob_inst_data_sol_dict = rs.map_fcidump_to_problem_instances(\n", + " chosen_problem_instance_files, problem_instance_files_path, all_dmrgh5_data\n", + ")\n", + "print(prob_inst_data_sol_dict)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save solution json files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "rs.create_solution_files(\n", + " prob_inst_data_sol_dict,\n", + " solution_save_location,\n", + " json_solution_schema_url_file,\n", + " contact_info,\n", + " solver_details,\n", + ")\n", + "\n", + "rs.validate_solution_files(json_solution_schema_url, prob_inst_data_sol_dict)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remove Temporary Files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if remove_temp_files:\n", + " os.remove(repository_filepath)\n", + " os.remove(\"dmrghandler.log\")\n", + " os.system(f\"rm -r {repository_path}\")\n", + " os.system(f\"rm -r {scratch_sim_path}\")\n", + " os.system(f\"rm -r {local_store_path}\")\n", + " os.system(f\"rm -r config_store\")\n", + " os.system(f\"rm -r tmp_dir\")\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env_run_dmrg", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/run_support.py b/examples/run_support.py new file mode 100644 index 0000000..e80f9b2 --- /dev/null +++ b/examples/run_support.py @@ -0,0 +1,471 @@ +from urllib.parse import urlparse +import urllib.request +import json +import jsonschema +import paramiko +from pathlib import Path +import gzip +import shutil +import io +import dmrghandler.data_processing as dp +import h5py +import numpy as np +import copy +import uuid +import datetime +import os + + +def fetch_file_from_sftp( + url=None, local_path=None, ppk_path=None, username=None, port=None +): + """ + Download a file from an SFTP server using a private key file (.ppk). + Code by John Penuel, with slight modifications; originates in https://github.com/isi-usc-edu/qb-gsee-benchmark/blob/main/examples/sftp-fetch.ipynb + Args: + url (str): The URL of the file to download. + local_path (str): The local path to save the file. + ppk_path (str): The path to the private key file (.ppk). + username (str): The username to use for the SFTP connection. + port (int): The port to use for the SFTP connection. + """ + + parsed_url = urlparse(url) + hostname = parsed_url.hostname + remote_path = parsed_url.path.lstrip("/") + + try: + # Create an SSH client + with paramiko.SSHClient() as client: + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + # Connect using the private key file (.ppk) + client.connect( + hostname=hostname, port=port, username=username, key_filename=ppk_path + ) + + # Open an SFTP session + with client.open_sftp() as sftp: + sftp.get(remote_path, local_path) + + print( + f"File fetched successfully from {hostname}: {Path(remote_path).name} -> {local_path}" + ) + except Exception as e: + print(f"Error: {e}") + + +def download_task_fcidump_files( + chosen_problem_instance_files, + problem_instance_files_path, + local_store_path, + ppk_path, + sftp_username, +): + data_file_list = [] + for problem_instance_file in chosen_problem_instance_files: + with open(problem_instance_files_path / problem_instance_file) as f: + problem_instance = json.load(f) + tasks_list = problem_instance["tasks"] + + for task in tasks_list: + for file_dict in task["supporting_files"]: + if ( + "fcidump" in file_dict["instance_data_object_url"] + or "FCIDUMP" in file_dict["instance_data_object_url"] + ): + fcidump_url = file_dict["instance_data_object_url"] + fcidump_store_path = local_store_path / "fcidumps" + fcidump_store_path.mkdir(parents=True, exist_ok=True) + local_path = fcidump_store_path / fcidump_url.split("/")[-1] + fetch_file_from_sftp( + url=fcidump_url, + local_path=local_path, + ppk_path=ppk_path, + username=sftp_username, + port=22, + ) + + # If compressed, decompress + if str(local_path).endswith(".gz"): + print(f"Uncompressing {local_path.name}") + with gzip.open(local_path, "rb") as f_in: + local_path = Path(str(local_path).split(".gz")[0]) + with open(local_path, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + + data_file_list.append(local_path) + + return data_file_list + + +def get_uuid_fcidump_mapping(submit_commands, local_store_path): + # Get uuid to fcidump mapping + buf = io.StringIO(submit_commands.split("cd config_store/submit_dir")[0]) + orig_data_dict_list = [] + uuid_fcidump_mapping_dict = {} + data_lines = buf.readlines() + for line_iter, line in enumerate(data_lines): + # print(line) + if line.startswith("### "): + continue + if line.startswith(f"# {local_store_path}/fcidumps/"): + fcidump_name_temp = line.split("/")[-1].split("#")[0].strip() + next_line = data_lines[line_iter + 1] + if not next_line.startswith("sbatch ../"): + raise ValueError("Expected sbatch line") + uuid_value = next_line.split("/")[1].strip() + uuid_fcidump_mapping_dict[uuid_value] = fcidump_name_temp + return uuid_fcidump_mapping_dict + + +def collect_dmrg_data(dmrg_hdf5_files, uuid_fcidump_mapping_dict): + all_dmrgh5_data = {} + + for file in dmrg_hdf5_files: + print(file) + with h5py.File(file, "r") as hdf5_file: + solution_uuid = hdf5_file["parent_folder_name"][()].decode("utf-8") + FCIDUMP_name = uuid_fcidump_mapping_dict[solution_uuid] + FCIDUMP_short_name_temp = FCIDUMP_name.split("fcidump.")[-1].split( + "FCIDUMP_" + )[-1] + + if len(FCIDUMP_short_name_temp.split(".")[-1]) == len( + "66d35e58-89f8-4f9c-baa9-e7cbd0c846e4" + ): + FCIDUMP_short_name = ".".join(FCIDUMP_short_name_temp.split(".")[:-1]) + else: + FCIDUMP_short_name = FCIDUMP_short_name_temp + + if FCIDUMP_short_name not in all_dmrgh5_data: + all_dmrgh5_data[FCIDUMP_short_name] = {} + + if solution_uuid in all_dmrgh5_data[FCIDUMP_short_name]: + raise ValueError("Duplicate solution uuid") + + ( + dmrg_energies, + bond_dimensions, + discarded_weights, + num_loops, + num_dmrg_calculations, + loop_cpu_times_s, + loop_wall_times_s, + num_sweeps_list, + final_sweep_delta_energies_list, + reordering_method_list, + reordering_method_cpu_times_s, + reordering_method_wall_times_s, + extra_dict, + ) = dp.get_data_from_incomplete_processing(file) + + min_dmrg_energy_arg = np.argmin(dmrg_energies) + min_dmrg_energy = dmrg_energies[min_dmrg_energy_arg] + min_dmrg_energy_bond_dim = bond_dimensions[min_dmrg_energy_arg] + min_dmrg_energy_discarded_weight = discarded_weights[min_dmrg_energy_arg] + min_dmrg_energy_loop_cpu_time = loop_cpu_times_s[ + min_dmrg_energy_arg + ] # overall_time + min_dmrg_energy_loop_wall_time = loop_wall_times_s[ + min_dmrg_energy_arg + ] # overall_time + min_dmrg_energy_num_sweeps = num_sweeps_list[min_dmrg_energy_arg] + min_dmrg_energy_final_sweep_delta_energy = final_sweep_delta_energies_list[ + min_dmrg_energy_arg + ] + min_dmrg_energy_reordering_method = reordering_method_list[min_dmrg_energy_arg] + min_dmrg_energy_reordering_method_cpu_time = reordering_method_cpu_times_s[ + min_dmrg_energy_arg + ] + min_dmrg_energy_reordering_method_wall_time = reordering_method_wall_times_s[ + min_dmrg_energy_arg + ] + + min_dmrg_energy_preprocessing_sum = ( + float( + extra_dict["loop_driver_initialize_system_wall_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float( + extra_dict["loop_generate_initial_mps_wall_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float( + extra_dict["loop_get_qchem_hami_mpo_wall_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float( + extra_dict["loop_reorder_integrals_wall_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float( + extra_dict["loop_make_driver_wall_time_s_list"][min_dmrg_energy_arg] + ) + ) + + min_dmrg_energy_preprocessing_sum_cpu = ( + float( + extra_dict["loop_driver_initialize_system_cpu_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float( + extra_dict["loop_generate_initial_mps_cpu_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float( + extra_dict["loop_get_qchem_hami_mpo_cpu_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float( + extra_dict["loop_reorder_integrals_cpu_time_s_list"][ + min_dmrg_energy_arg + ] + ) + + float(extra_dict["loop_make_driver_cpu_time_s_list"][min_dmrg_energy_arg]) + ) + + min_dmrg_energy_algorithm_run_time = float( + extra_dict["loop_dmrg_optimization_wall_time_s_list"][min_dmrg_energy_arg] + ) + + min_dmrg_energy_postprocessing_time = float( + extra_dict["loop_copy_mps_wall_time_s_list"][min_dmrg_energy_arg] + ) + + min_dmrg_energy_algorithm_run_cpu_time = float( + extra_dict["loop_dmrg_optimization_cpu_time_s_list"][min_dmrg_energy_arg] + ) + + min_dmrg_energy_postprocessing_cpu_time = float( + extra_dict["loop_copy_mps_cpu_time_s_list"][min_dmrg_energy_arg] + ) + + dmrg_data_dict = { + "dmrg_energy": min_dmrg_energy, + "bond_dimension": min_dmrg_energy_bond_dim, + "discarded_weight": min_dmrg_energy_discarded_weight, + "loop_cpu_time": min_dmrg_energy_loop_cpu_time, + "loop_wall_time": min_dmrg_energy_loop_wall_time, + "num_sweeps": min_dmrg_energy_num_sweeps, + "final_sweep_delta_energy": min_dmrg_energy_final_sweep_delta_energy, + "reordering_method": min_dmrg_energy_reordering_method, + "reordering_method_cpu_time": min_dmrg_energy_reordering_method_cpu_time, + "reordering_method_wall_time": min_dmrg_energy_reordering_method_wall_time, + "preprocessing_time": min_dmrg_energy_preprocessing_sum, + "preprocessing_cpu_time": min_dmrg_energy_preprocessing_sum_cpu, + "algorithm_run_time": min_dmrg_energy_algorithm_run_time, + "postprocessing_time": min_dmrg_energy_postprocessing_time, + "algorithm_run_cpu_time": min_dmrg_energy_algorithm_run_cpu_time, + "postprocessing_cpu_time": min_dmrg_energy_postprocessing_cpu_time, + } + + all_dmrgh5_data[FCIDUMP_short_name][solution_uuid] = copy.deepcopy( + dmrg_data_dict + ) + + return all_dmrgh5_data + + +def filter_lowest_energy_data(all_dmrgh5_data): + filtered_data = {} + + for FCIDUMP_short_name, solution_data in all_dmrgh5_data.items(): + min_energy = np.inf + min_energy_uuid = None + for solution_uuid, dmrg_data in solution_data.items(): + if dmrg_data["dmrg_energy"] < min_energy: + min_energy = dmrg_data["dmrg_energy"] + min_energy_uuid = solution_uuid + + filtered_data[FCIDUMP_short_name] = {"solution_uuid": min_energy_uuid} + filtered_data[FCIDUMP_short_name].update(solution_data[min_energy_uuid]) + + return filtered_data + + +def map_fcidump_to_problem_instances( + chosen_problem_instance_files, problem_instance_files_path, all_dmrgh5_data +): + prob_inst_data_sol_dict = {} + print(chosen_problem_instance_files) + + for prob_inst_file in chosen_problem_instance_files: + prob_inst_file_path = problem_instance_files_path / prob_inst_file + with open(prob_inst_file_path) as f: + prob_inst_data = json.load(f) + prob_inst_uuid = prob_inst_data["problem_instance_uuid"] + task_list = prob_inst_data["tasks"] + + for task in task_list: + task_uuid = task["task_uuid"] + supporting_files = task["supporting_files"] + for supp_file in supporting_files: + if ( + "fcidump" in supp_file["instance_data_object_url"] + or "FCIDUMP" in supp_file["instance_data_object_url"] + ): + FCIDUMP_url = supp_file["instance_data_object_url"] + FCIDUMP_uuid = supp_file["instance_data_object_uuid"] + break + + relevant_fcidump_short_name = None + FCIDUMP_url_test = FCIDUMP_url + print(FCIDUMP_url_test) + if "benzene" in FCIDUMP_url_test: + print( + "Benzene found----------------------------------------------------" + ) + FCIDUMP_url_test = FCIDUMP_url_test.replace("benzene", "b") + print(FCIDUMP_url_test) + + for dmrg_solution_fcidump_short_name in all_dmrgh5_data.keys(): + if "V1" in dmrg_solution_fcidump_short_name: + print(dmrg_solution_fcidump_short_name) + + if ( + "/fcidump." + dmrg_solution_fcidump_short_name + "." + in FCIDUMP_url_test + or "/FCIDUMP_" + dmrg_solution_fcidump_short_name + "." + in FCIDUMP_url_test + or "/FCIDUMP_" + dmrg_solution_fcidump_short_name + "_" + in FCIDUMP_url_test + ): + relevant_fcidump_short_name = dmrg_solution_fcidump_short_name + break + + if prob_inst_uuid not in prob_inst_data_sol_dict: + prob_inst_data_sol_dict[prob_inst_uuid] = { + "prob_inst_file": prob_inst_file_path, + "prob_inst_short_name": prob_inst_data["short_name"], + } + + if relevant_fcidump_short_name is None: + print( + f"Could not find relevant fcidump short name for {FCIDUMP_url}, assuming solution not available." + ) + print(prob_inst_uuid) + print(task_uuid) + prob_inst_data_sol_dict[prob_inst_uuid].update( + {task_uuid: "NO SOLUTION"} + ) + continue + + prob_inst_data_sol_dict[prob_inst_uuid][task_uuid] = { + "solution_uuid": all_dmrgh5_data[relevant_fcidump_short_name][ + "solution_uuid" + ], + "instance_data_object_uuid": FCIDUMP_uuid, + "instance_data_object_url": FCIDUMP_url, + "dmrg_data": all_dmrgh5_data[relevant_fcidump_short_name], + } + + return prob_inst_data_sol_dict + + +def create_solution_files( + prob_inst_data_sol_dict, + solution_save_location, + json_solution_schema_url_file, + contact_info, + solver_details, +): + for prob_inst_uuid, prob_inst_data in prob_inst_data_sol_dict.items(): + solution_file_uuid = str(uuid.uuid4()) + print(prob_inst_data.keys()) + print(prob_inst_data.items()) + Path(solution_save_location).mkdir(parents=True, exist_ok=True) + prob_inst_sol_file = Path(solution_save_location) / Path( + f"solution.{prob_inst_data['prob_inst_short_name']}.{prob_inst_uuid}_{solution_file_uuid}.json" + ) + + # Timestamp in ISO 8601 format in UTC (note the `Z`) with final Z + creation_timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat() + # Replace the time zone shift with Z + creation_timestamp = creation_timestamp[:-6] + "Z" + prob_inst_sol_data = { + "$schema": json_solution_schema_url_file, + "solution_uuid": solution_file_uuid, + "problem_instance_uuid": prob_inst_uuid, + "creation_timestamp": creation_timestamp, + "contact_info": contact_info, + "is_resource_estimate": False, + "solution_data": [], + "solver_details": solver_details, + "digital_signature": None, + } + for task_uuid, task_data in prob_inst_data.items(): + if len(task_uuid) != len("66d35e58-89f8-4f9c-baa9-e7cbd0c846e4"): + continue + + if task_data == "NO SOLUTION": + continue + dmrg_data = task_data["dmrg_data"] + + overall_time = dmrg_data["loop_wall_time"] + preprocessing_time = dmrg_data["preprocessing_time"] + algorithm_run_time = dmrg_data["algorithm_run_time"] + postprocessing_time = dmrg_data["postprocessing_time"] + overall_cpu_time = dmrg_data["loop_cpu_time"] + preprocessing_cpu_time = dmrg_data["preprocessing_cpu_time"] + algorithm_run_cpu_time = dmrg_data["algorithm_run_cpu_time"] + postprocessing_cpu_time = dmrg_data["postprocessing_cpu_time"] + # Verify algorithm_run_time is a float + if not isinstance(algorithm_run_time, float): + raise ValueError("algorithm_run_time is not a float") + + run_time = { + "overall_time": {"seconds": overall_time}, + "preprocessing_time": {"seconds": preprocessing_time}, + "algorithm_run_time": {"seconds": algorithm_run_time}, + "postprocessing_time": {"seconds": postprocessing_time}, + } + run_time_cpu = { + "overall_time": {"seconds": overall_cpu_time}, + "preprocessing_time": {"seconds": preprocessing_cpu_time}, + "algorithm_run_time": {"seconds": algorithm_run_cpu_time}, + "postprocessing_time": {"seconds": postprocessing_cpu_time}, + } + solution_details = { + "instance_data_object_url": task_data["instance_data_object_url"], + "bond_dimension": dmrg_data["bond_dimension"], + "discarded_weight": dmrg_data["discarded_weight"], + "calculation_uuid": task_data["solution_uuid"], + } + solution_dict = { + "task_uuid": task_uuid, + "energy": dmrg_data["dmrg_energy"], + "energy_units": "Hartree", + "run_time": run_time, + "run_time_cpu": run_time_cpu, + "solution_details": solution_details, + } + prob_inst_sol_data["solution_data"].append(solution_dict) + with open(prob_inst_sol_file, "w") as f: + json.dump(prob_inst_sol_data, f, indent=4) + print(f"Saved {prob_inst_sol_file}") + prob_inst_data["file_name"] = prob_inst_sol_file + +def validate_solution_files(json_solution_schema_url, prob_inst_data_sol_dict): + # Validate the solution json files against the schema + schema_filepath = Path("temp_sol_schema.json") + + # Download schema + urllib.request.urlretrieve(json_solution_schema_url, schema_filepath.name) + schema = json.load(open(schema_filepath)) + + for prob_inst_uuid, prob_inst_data in prob_inst_data_sol_dict.items(): + prob_inst_sol_file = prob_inst_data["file_name"] + print(prob_inst_sol_file) + jsonschema.validate(json.load(open(prob_inst_sol_file)), schema) + print(f"Validated {prob_inst_sol_file}") + + # Remove schema file + schema_filepath.unlink()