From 32a9996956b21d89f78db2da8e15fc6693778c04 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 9 Jul 2022 06:20:04 -0400 Subject: [PATCH] add simplify jdata docs; fix and check example (#787) * add simplify jdata docs; fix and check example Fix #779. * `use_ele_temp` should be optional * `sys_format` should be optional * `sys_configs` should be list * `labeled` should be optional * `use_clusters` and `cluster_cutoff` are optional * fix keyword option * add to toc --- doc/index.rst | 1 + doc/simplify/simplify-jdata.rst | 6 ++ dpgen/generator/arginfo.py | 14 ++--- dpgen/simplify/arginfo.py | 107 +++++++++++++++++++++++++++++++- examples/simplify/qm7.json | 11 +--- tests/test_check_examples.py | 5 ++ 6 files changed, 127 insertions(+), 17 deletions(-) create mode 100644 doc/simplify/simplify-jdata.rst diff --git a/doc/index.rst b/doc/index.rst index acd30efbd..65161739d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -53,6 +53,7 @@ DPGEN's documentation :maxdepth: 2 :caption: Simplify + simplify/simplify-jdata simplify/simplify-mdata diff --git a/doc/simplify/simplify-jdata.rst b/doc/simplify/simplify-jdata.rst new file mode 100644 index 000000000..520c889ab --- /dev/null +++ b/doc/simplify/simplify-jdata.rst @@ -0,0 +1,6 @@ +dpgen simplify parameters +========================= + +.. dargs:: + :module: dpgen.simplify.arginfo + :func: simplify_jdata_arginfo diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 4cc055ad7..9357ee912 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -26,7 +26,7 @@ def basic_args() -> List[Argument]: return [ Argument("type_map", list, optional=False, doc=doc_type_map), Argument("mass_map", list, optional=False, doc=doc_mass_map), - Argument("use_ele_temp", int, optional=False, + Argument("use_ele_temp", int, optional=True, default=0, doc=doc_use_ele_temp), ] @@ -34,7 +34,7 @@ def basic_args() -> List[Argument]: def data_args() -> List[Argument]: doc_init_data_prefix = 'Prefix of initial data directories.' doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.' - doc_sys_format = 'Format of initial data. It will be vasp/poscar if not set.' + doc_sys_format = 'Format of initial data.' doc_init_batch_size = 'Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms.' doc_sys_configs_prefix = 'Prefix of sys_configs.' doc_sys_configs = 'Containing directories of structures to be explored in iterations.Wildcard characters are supported here.' @@ -45,12 +45,12 @@ def data_args() -> List[Argument]: doc=doc_init_data_prefix), Argument("init_data_sys", list, optional=False, doc=doc_init_data_sys), - Argument("sys_format", str, optional=False, doc=doc_sys_format), + Argument("sys_format", str, optional=True, default='vasp/poscar', doc=doc_sys_format), Argument("init_batch_size", str, optional=True, doc=doc_init_batch_size), Argument("sys_configs_prefix", str, optional=True, doc=doc_sys_configs_prefix), - Argument("sys_configs", str, optional=False, doc=doc_sys_configs), + Argument("sys_configs", list, optional=False, doc=doc_sys_configs), Argument("sys_batch_size", list, optional=True, doc=doc_sys_batch_size), ] @@ -195,7 +195,7 @@ def fp_style_gaussian_args() -> List[Argument]: doc_nproc = 'The number of processors for Gaussian input.' args = [ - Argument("doc_keywords", [str or list], + Argument("keywords", [str or list], optional=False, doc=doc_keywords), Argument("multiplicity", [int or str], optional=False, doc=doc_multiplicity), @@ -207,9 +207,9 @@ def fp_style_gaussian_args() -> List[Argument]: doc_fp_params_gaussian = 'Parameters for Gaussian calculation.' return [ - Argument("use_clusters", bool, optional=False, doc=doc_use_clusters), + Argument("use_clusters", bool, optional=True, default=False, doc=doc_use_clusters), Argument("cluster_cutoff", float, - optional=False, doc=doc_cluster_cutoff), + optional=True, doc=doc_cluster_cutoff), Argument("fp_params", dict, args, [], optional=False, doc=doc_fp_params_gaussian), ] diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py index 0fbfe606e..c325c5628 100644 --- a/dpgen/simplify/arginfo.py +++ b/dpgen/simplify/arginfo.py @@ -1,10 +1,113 @@ -from dargs import Argument +from typing import List +from dargs import Argument, Variant from dpgen.arginfo import general_mdata_arginfo +from dpgen.generator.arginfo import ( + basic_args, + data_args, + training_args, + fp_style_vasp_args, + fp_style_gaussian_args, +) + + +def general_simplify_arginfo() -> Argument: + """General simplify arginfo. + + Returns + ------- + Argument + arginfo + """ + doc_labeled = "If true, the initial data is labeled." + doc_pick_data = "Path to the directory with the pick data with the deepmd/npy format. Systems are detected recursively." + doc_init_pick_number = "The number of initial pick data." + doc_iter_pick_number = "The number of pick data in each iteration." + doc_model_devi_f_trust_lo = "The lower bound of forces for the selection for the model deviation." + doc_model_devi_f_trust_hi = "The higher bound of forces for the selection for the model deviation." + + return [ + Argument("labeled", bool, optional=True, default=False, doc=doc_labeled), + Argument("pick_data", str, doc=doc_pick_data), + Argument("init_pick_number", int, doc=doc_init_pick_number), + Argument("iter_pick_number", int, doc=doc_iter_pick_number), + Argument("model_devi_f_trust_lo", float, optional=False, doc=doc_model_devi_f_trust_lo), + Argument("model_devi_f_trust_hi", float, optional=False, doc=doc_model_devi_f_trust_hi), + ] + + +def fp_style_variant_type_args() -> Variant: + """Generate variant for fp style variant type. + + Returns + ------- + Variant + variant for fp style + """ + doc_fp_style = 'Software for First Principles, if `labeled` is false. Options include “vasp”, “gaussian” up to now.' + doc_fp_style_none = 'No fp.' + doc_fp_style_vasp = 'VASP.' + doc_fp_style_gaussian = 'Gaussian. The command should be set as `g16 < input`.' + + return Variant("fp_style", [ + Argument("none", dict, doc=doc_fp_style_none), + # simplify use the same fp method as run + Argument("vasp", dict, fp_style_vasp_args(), doc=doc_fp_style_vasp), + Argument("gaussian", dict, fp_style_gaussian_args(), + doc=doc_fp_style_gaussian), + ], + optional=True, + default_tag="none", + doc=doc_fp_style) + + +def fp_args() -> List[Argument]: + """Generate arginfo for fp. + + Returns + ------- + List[Argument] + arginfo + """ + doc_fp_task_max = 'Maximum of structures to be calculated in 02.fp of each iteration.' + doc_fp_task_min = 'Minimum of structures to be calculated in 02.fp of each iteration.' + + return [ + Argument("fp_task_max", int, optional=True, doc=doc_fp_task_max), + Argument("fp_task_min", int, optional=True, doc=doc_fp_task_min), + ] + + +def simplify_jdata_arginfo() -> Argument: + """Generate arginfo for dpgen simplify jdata. + + Returns + ------- + Argument + arginfo + """ + doc_run_jdata = "Parameters for simplify.json, the first argument of `dpgen simplify`." + return Argument("simplify_jdata", + dict, + sub_fields=[ + *basic_args(), + # TODO: we may remove sys_configs; it is required in train method + *data_args(), + *general_simplify_arginfo(), + # simplify use the same training method as run + *training_args(), + *fp_args(), + ], + sub_variants=[ + fp_style_variant_type_args(), + ], + doc=doc_run_jdata, + ) + def simplify_mdata_arginfo() -> Argument: """Generate arginfo for dpgen simplify mdata. - + Returns ------- Argument diff --git a/examples/simplify/qm7.json b/examples/simplify/qm7.json index 648c589e7..131a903a6 100644 --- a/examples/simplify/qm7.json +++ b/examples/simplify/qm7.json @@ -16,11 +16,11 @@ "pick_data": "/scratch/jz748/simplify/qm7", "init_data_prefix": "", "init_data_sys": [], + "sys_configs": [null], "sys_batch_size": [ "auto" ], "numb_models": 4, - "train_param": "input.json", "default_training_param": { "model": { "type_map": [ @@ -92,11 +92,8 @@ }, "use_clusters": true, "fp_style": "gaussian", - "shuffle_poscar": false, "fp_task_max": 1000, "fp_task_min": 10, - "fp_pp_path": "/home/jzzeng/", - "fp_pp_files": [], "fp_params": { "keywords": "mn15/6-31g** force nosymm scf(maxcyc=512)", "nproc": 28, @@ -105,9 +102,7 @@ }, "init_pick_number":100, "iter_pick_number":100, - "e_trust_lo":1e10, - "e_trust_hi":1e10, - "f_trust_lo":0.25, - "f_trust_hi":0.45, + "model_devi_f_trust_lo":0.10, + "model_devi_f_trust_hi":0.30, "_comment": " that's all " } diff --git a/tests/test_check_examples.py b/tests/test_check_examples.py index 102e98490..61dbbc977 100644 --- a/tests/test_check_examples.py +++ b/tests/test_check_examples.py @@ -9,8 +9,12 @@ from dpgen.data.arginfo import ( init_reaction_jdata_arginfo, ) +from dpgen.simplify.arginfo import ( + simplify_jdata_arginfo, +) init_reaction_jdata = init_reaction_jdata_arginfo() +simplify_jdata = simplify_jdata_arginfo() # directory of examples p_examples = Path(__file__).parent.parent / "examples" @@ -19,6 +23,7 @@ # tuple of example list input_files = ( (init_reaction_jdata, p_examples / "init" / "reaction.json"), + (simplify_jdata, p_examples / "simplify" / "qm7.json"), )