diff --git a/aiidalab_qe/report.py b/aiidalab_qe/report.py index f2b78dbc1..06aab11de 100644 --- a/aiidalab_qe/report.py +++ b/aiidalab_qe/report.py @@ -30,7 +30,7 @@ def _generate_report_dict(qeapp_wc): # Properties run_relax = builder_parameters["relax_type"] != "none" - run_bands = builder_parameters.get("run_bands", False) + run_bands = builder_parameters["run_bands"] run_pdos = builder_parameters.get("run_pdos", False) yield "relaxed", run_relax diff --git a/aiidalab_qe/steps.py b/aiidalab_qe/steps.py index 01dcac8c0..0f99f09f7 100644 --- a/aiidalab_qe/steps.py +++ b/aiidalab_qe/steps.py @@ -4,6 +4,7 @@ * Carl Simon Adorf """ +from math import ceil from pprint import pformat import ipywidgets as ipw @@ -251,9 +252,13 @@ def __init__(self, **kwargs): class SubmitQeAppWorkChainStep(ipw.VBox, WizardAppWidgetStep): """Step for submission of a bands workchain.""" - # The app will issue a warning to the user if the ratio between the total - # number of sites and the total number of CPUs is larger than this value: - MAX_NUM_SITES_PER_CPU_WARN_THRESHOLD = 6 + # This number provides a rough estimate for how many MPI tasks are needed + # for a given structure. + NUM_SITES_PER_MPI_TASK_DEFAULT = 6 + + # Warn the user if they are trying to run calculations for a large + # structure on localhost. + RUN_ON_LOCALHOST_NUM_SITES_WARN_THRESHOLD = 10 input_structure = traitlets.Instance(StructureData, allow_none=True) process = traitlets.Instance(WorkChainNode, allow_none=True) @@ -273,7 +278,9 @@ def __init__(self, **kwargs): self._setup_builder_parameters_update() self.codes_selector.pw.observe(self._update_state, "selected_code") - self.codes_selector.pw.observe(self._update_cpus_per_node, "selected_code") + self.codes_selector.pw.observe( + self._set_num_mpi_tasks_to_default, "selected_code" + ) self.tab = ipw.Tab( children=[ @@ -361,7 +368,7 @@ def _update_state(self, _=None):
× × - {message} + {message}
""" def _show_alert_message(self, message, alert_class="info"): @@ -372,42 +379,72 @@ def _show_alert_message(self, message, alert_class="info"): ) ) - def _get_default_cpus_per_node(self): - """Determine the default number of cpus per node based on the code configuration.""" + def _get_default_num_mpi_tasks(self): + """Determine a reasonable value for the number of MPI tasks for the selected structure.""" if self.codes_selector.pw.selected_code: - selected_code = self.codes_selector.pw.selected_code - return selected_code.computer.metadata["default_mpiprocs_per_machine"] + num_sites = len(self.input_structure.sites) if self.input_structure else 1 + num_mpi_tasks = max( + 1, ceil(num_sites / self.NUM_SITES_PER_MPI_TASK_DEFAULT) + ) + return num_mpi_tasks + return 1 - def _update_cpus_per_node(self, change): - """Update the configured cpus per node based on the current code selection.""" - if change["new"]: - current_value = self.resources_config.cpus_per_node.value - new_value = self._get_default_cpus_per_node() - self.resources_config.cpus_per_node.max = new_value - if current_value != new_value: - self.resources_config.cpus_per_node.value = new_value - self._show_alert_message( - "The number cpus per node was automatically adjusted to " - f"the number of cores per node for the selected code ({new_value})." - ) - self._check_resources() + def _set_num_mpi_tasks_to_default(self, _=None): + """Set the number of MPI tasks to a reasonable value for the selected structure.""" + self.resources_config.num_mpi_tasks.value = self._get_default_num_mpi_tasks() + self._check_resources() def _check_resources(self): """Check whether the currently selected resources will be sufficient and warn if not.""" - if self.input_structure: - num_sites = len(self.input_structure.sites) - num_cpus = self.resources_config.total_num_cpus.value - if num_sites // num_cpus > self.MAX_NUM_SITES_PER_CPU_WARN_THRESHOLD: - self._show_alert_message( - "The ratio of the number of sites in the selected structure " - f"({num_sites}) and the number of total CPUs available for the " - f"calculations ({num_cpus}) is very large. Consider to increase " - "the number of cores or nodes and select a code running on a " - 'larger computer if necessary (see the "Codes & Resources" tab).', - alert_class="warning", - ) - self.expert_mode = True + num_mpi_tasks = self.resources_config.num_mpi_tasks.value + on_localhost = ( + self.codes_selector.pw.selected_code.computer.get_hostname() == "localhost" + ) + if self.codes_selector.pw.selected_code and on_localhost and num_mpi_tasks > 1: + self._show_alert_message( + "The selected code would be executed on the local host, but " + "the number of MPI tasks is larger than one. Please review " + "the configuration and consider to select a code that runs " + 'on a larger system if necessary (see the "Codes & ' + 'Resources" tab).', + alert_class="warning", + ) + self.expert_mode = True + elif ( + self.input_structure + and on_localhost + and len(self.input_structure.sites) + > self.RUN_ON_LOCALHOST_NUM_SITES_WARN_THRESHOLD + ): + self._show_alert_message( + "The selected code would be executed on the local host, but the " + "number of sites of the selected structure is relatively large. " + "Consider to select a code that runs on a larger system if " + 'necessary (see the "Codes & Resources" tab).', + alert_class="warning", + ) + self.expert_mode = True + + def _get_cpus_per_node(self): + """Determine the default number of CPUs per node based on the code configuration.""" + if self.codes_selector.pw.selected_code: + selected_code = self.codes_selector.pw.selected_code + return selected_code.computer.metadata["default_mpiprocs_per_machine"] + return 1 + + def _determine_resources(self): + """Calculate the number of nodes and tasks per node.""" + cpus_per_node = self._get_cpus_per_node() + num_mpi_tasks_selected = self.resources_config.num_mpi_tasks.value + + num_nodes = max(1, ceil(num_mpi_tasks_selected / cpus_per_node)) + num_mpi_tasks_per_node = ceil(num_mpi_tasks_selected / num_nodes) + + return { + "num_machines": num_nodes, + "num_mpiprocs_per_machine": num_mpi_tasks_per_node, + } @traitlets.observe("state") def _observe_state(self, change): @@ -422,7 +459,7 @@ def _observe_state(self, change): def _observe_input_structure(self, change): self.set_trait("builder_parameters", self._default_builder_parameters()) self._update_state() - self._check_resources() + self._set_num_mpi_tasks_to_default() @traitlets.observe("process") def _observe_process(self, change): @@ -550,10 +587,7 @@ def submit(self, _=None): if not run_bands: builder.pop("bands") - resources = { - "num_machines": self.resources_config.number_of_nodes.value, - "num_mpiprocs_per_machine": self.resources_config.cpus_per_node.value, - } + resources = self._determine_resources() update_resources(builder, resources) self.process = submit(builder) diff --git a/aiidalab_qe/widgets.py b/aiidalab_qe/widgets.py index 9cd3e3d38..152d9d2c7 100644 --- a/aiidalab_qe/widgets.py +++ b/aiidalab_qe/widgets.py @@ -231,82 +231,39 @@ def _observe_node(self, change): display(viewer(change["new"])) -class ResourceSelectionWidget(ipw.HBox): - """Widget for the selection of compute (CPU) resources.""" +class ResourceSelectionWidget(ipw.VBox): + """Widget for the selection of compute resources.""" - resource_selection_title = ipw.HTML( + title = ipw.HTML( """

Resources

""" ) - resource_selection_prompt = ipw.HTML( - "Select the compute resources for this calculation." - ) - resource_selection_help = ipw.HTML( + prompt = ipw.HTML( """

- Select the amount of resources you want to use for the calculations. - Although specifying the optimal configuration of resources is a complex issue, in general: -

- -

Note that the amount of resources is limited by the computer on - which you are running the calculations on. The default computer - (localhost) is only suitable for single core operations which will be - insufficient for larger calculations. Make sure to setup a code for - these by clicking on "Setup new code".

""" + Specify the number of MPI tasks for this calculation. + In general, larger structures will require a larger number of tasks. +

""" ) def __init__(self, **kwargs): extra = { "style": {"description_width": "150px"}, - "layout": {"max_width": "200px"}, + # "layout": {"max_width": "200px"}, + "layout": {"min_width": "310px"}, } - self.number_of_nodes = ipw.BoundedIntText( - value=1, step=1, min=1, description="# nodes", disabled=False, **extra - ) - self.cpus_per_node = ipw.BoundedIntText( - value=1, step=1, min=1, max=1, description="# cpus per node", **extra - ) - self.total_num_cpus = ipw.BoundedIntText( - value=1, step=1, min=1, description="# total cpus", disabled=True, **extra - ) - # Update the total # of CPUs int text: - self.number_of_nodes.observe(self._update_total_num_cpus, "value") - self.cpus_per_node.observe(self._update_total_num_cpus, "value") + self.num_mpi_tasks = ipw.BoundedIntText( + value=1, step=1, min=1, description="# MPI tasks", **extra + ) super().__init__( children=[ - ipw.VBox( - children=[ - self.resource_selection_title, - ipw.HBox( - children=[ - self.resource_selection_help, - ipw.VBox( - children=[ - self.number_of_nodes, - self.cpus_per_node, - self.total_num_cpus, - ], - layout=ipw.Layout(min_width="310px"), - ), - ] - ), - ] - ) + self.title, + ipw.HBox(children=[self.prompt, self.num_mpi_tasks]), ] ) - def _update_total_num_cpus(self, change): - self.total_num_cpus.value = ( - self.number_of_nodes.value * self.cpus_per_node.value - ) - def reset(self): - with self.hold_trait_notifications(): - self.number_of_nodes.value = 1 - self.cpus_per_node.value = 1 + self.num_mpi_tasks.value = 1