From 32a9996956b21d89f78db2da8e15fc6693778c04 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 9 Jul 2022 06:20:04 -0400
Subject: [PATCH] add simplify jdata docs; fix and check example (#787)

* add simplify jdata docs; fix and check example

Fix #779.

* `use_ele_temp` should be optional

* `sys_format` should be optional

* `sys_configs` should be list

* `labeled` should be optional

* `use_clusters` and `cluster_cutoff` are optional

* fix keyword option

* add to toc
---
 doc/index.rst                   |   1 +
 doc/simplify/simplify-jdata.rst |   6 ++
 dpgen/generator/arginfo.py      |  14 ++---
 dpgen/simplify/arginfo.py       | 107 +++++++++++++++++++++++++++++++-
 examples/simplify/qm7.json      |  11 +---
 tests/test_check_examples.py    |   5 ++
 6 files changed, 127 insertions(+), 17 deletions(-)
 create mode 100644 doc/simplify/simplify-jdata.rst

diff --git a/doc/index.rst b/doc/index.rst
index acd30efbd..65161739d 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -53,6 +53,7 @@ DPGEN's documentation
    :maxdepth: 2
    :caption: Simplify  
 
+   simplify/simplify-jdata
    simplify/simplify-mdata
 
 
diff --git a/doc/simplify/simplify-jdata.rst b/doc/simplify/simplify-jdata.rst
new file mode 100644
index 000000000..520c889ab
--- /dev/null
+++ b/doc/simplify/simplify-jdata.rst
@@ -0,0 +1,6 @@
+dpgen simplify parameters
+=========================
+
+.. dargs::
+   :module: dpgen.simplify.arginfo
+   :func: simplify_jdata_arginfo
diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py
index 4cc055ad7..9357ee912 100644
--- a/dpgen/generator/arginfo.py
+++ b/dpgen/generator/arginfo.py
@@ -26,7 +26,7 @@ def basic_args() -> List[Argument]:
     return [
         Argument("type_map", list, optional=False, doc=doc_type_map),
         Argument("mass_map", list, optional=False, doc=doc_mass_map),
-        Argument("use_ele_temp", int, optional=False,
+        Argument("use_ele_temp", int, optional=True,
                  default=0, doc=doc_use_ele_temp),
     ]
 
@@ -34,7 +34,7 @@ def basic_args() -> List[Argument]:
 def data_args() -> List[Argument]:
     doc_init_data_prefix = 'Prefix of initial data directories.'
     doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.'
-    doc_sys_format = 'Format of initial data. It will be vasp/poscar if not set.'
+    doc_sys_format = 'Format of initial data.'
     doc_init_batch_size = 'Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms.'
     doc_sys_configs_prefix = 'Prefix of sys_configs.'
     doc_sys_configs = 'Containing directories of structures to be explored in iterations.Wildcard characters are supported here.'
@@ -45,12 +45,12 @@ def data_args() -> List[Argument]:
                  doc=doc_init_data_prefix),
         Argument("init_data_sys", list,
                  optional=False, doc=doc_init_data_sys),
-        Argument("sys_format", str, optional=False, doc=doc_sys_format),
+        Argument("sys_format", str, optional=True, default='vasp/poscar', doc=doc_sys_format),
         Argument("init_batch_size", str, optional=True,
                  doc=doc_init_batch_size),
         Argument("sys_configs_prefix", str, optional=True,
                  doc=doc_sys_configs_prefix),
-        Argument("sys_configs", str, optional=False, doc=doc_sys_configs),
+        Argument("sys_configs", list, optional=False, doc=doc_sys_configs),
         Argument("sys_batch_size", list, optional=True,
                  doc=doc_sys_batch_size),
     ]
@@ -195,7 +195,7 @@ def fp_style_gaussian_args() -> List[Argument]:
     doc_nproc = 'The number of processors for Gaussian input.'
 
     args = [
-        Argument("doc_keywords", [str or list],
+        Argument("keywords", [str or list],
                  optional=False, doc=doc_keywords),
         Argument("multiplicity", [int or str],
                  optional=False, doc=doc_multiplicity),
@@ -207,9 +207,9 @@ def fp_style_gaussian_args() -> List[Argument]:
     doc_fp_params_gaussian = 'Parameters for Gaussian calculation.'
 
     return [
-        Argument("use_clusters", bool, optional=False, doc=doc_use_clusters),
+        Argument("use_clusters", bool, optional=True, default=False, doc=doc_use_clusters),
         Argument("cluster_cutoff", float,
-                 optional=False, doc=doc_cluster_cutoff),
+                 optional=True, doc=doc_cluster_cutoff),
         Argument("fp_params", dict, args, [],
                  optional=False, doc=doc_fp_params_gaussian),
     ]
diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py
index 0fbfe606e..c325c5628 100644
--- a/dpgen/simplify/arginfo.py
+++ b/dpgen/simplify/arginfo.py
@@ -1,10 +1,113 @@
-from dargs import Argument
+from typing import List
+from dargs import Argument, Variant
 
 from dpgen.arginfo import general_mdata_arginfo
+from dpgen.generator.arginfo import (
+    basic_args,
+    data_args,
+    training_args,
+    fp_style_vasp_args,
+    fp_style_gaussian_args,
+)
+
+
+def general_simplify_arginfo() -> Argument:
+    """General simplify arginfo.
+
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    doc_labeled = "If true, the initial data is labeled."
+    doc_pick_data = "Path to the directory with the pick data with the deepmd/npy format. Systems are detected recursively."
+    doc_init_pick_number = "The number of initial pick data."
+    doc_iter_pick_number = "The number of pick data in each iteration."
+    doc_model_devi_f_trust_lo = "The lower bound of forces for the selection for the model deviation."
+    doc_model_devi_f_trust_hi = "The higher bound of forces for the selection for the model deviation."
+
+    return [
+        Argument("labeled", bool, optional=True, default=False, doc=doc_labeled),
+        Argument("pick_data", str, doc=doc_pick_data),
+        Argument("init_pick_number", int, doc=doc_init_pick_number),
+        Argument("iter_pick_number", int, doc=doc_iter_pick_number),
+        Argument("model_devi_f_trust_lo", float, optional=False, doc=doc_model_devi_f_trust_lo),
+        Argument("model_devi_f_trust_hi", float, optional=False, doc=doc_model_devi_f_trust_hi),
+    ]
+
+
+def fp_style_variant_type_args() -> Variant:
+    """Generate variant for fp style variant type.
+    
+    Returns
+    -------
+    Variant
+        variant for fp style
+    """
+    doc_fp_style = 'Software for First Principles, if `labeled` is false. Options include “vasp”, “gaussian” up to now.'
+    doc_fp_style_none = 'No fp.'
+    doc_fp_style_vasp = 'VASP.'
+    doc_fp_style_gaussian = 'Gaussian. The command should be set as `g16 < input`.'
+
+    return Variant("fp_style", [
+        Argument("none", dict, doc=doc_fp_style_none),
+        # simplify use the same fp method as run
+        Argument("vasp", dict, fp_style_vasp_args(), doc=doc_fp_style_vasp),
+        Argument("gaussian", dict, fp_style_gaussian_args(),
+                 doc=doc_fp_style_gaussian),
+    ],
+        optional=True,
+        default_tag="none",
+        doc=doc_fp_style)
+
+
+def fp_args() -> List[Argument]:
+    """Generate arginfo for fp.
+
+    Returns
+    -------
+    List[Argument]
+        arginfo
+    """
+    doc_fp_task_max = 'Maximum of structures to be calculated in 02.fp of each iteration.'
+    doc_fp_task_min = 'Minimum of structures to be calculated in 02.fp of each iteration.'
+
+    return [
+        Argument("fp_task_max", int, optional=True, doc=doc_fp_task_max),
+        Argument("fp_task_min", int, optional=True, doc=doc_fp_task_min),
+    ]
+
+
+def simplify_jdata_arginfo() -> Argument:
+    """Generate arginfo for dpgen simplify jdata.
+
+    Returns
+    -------
+    Argument
+        arginfo
+    """
+    doc_run_jdata = "Parameters for simplify.json, the first argument of `dpgen simplify`."
+    return Argument("simplify_jdata",
+                    dict,
+                    sub_fields=[
+                        *basic_args(),
+                        # TODO: we may remove sys_configs; it is required in train method
+                        *data_args(),
+                        *general_simplify_arginfo(),
+                        # simplify use the same training method as run
+                        *training_args(),
+                        *fp_args(),
+                    ],
+                    sub_variants=[
+                        fp_style_variant_type_args(),
+                    ],
+                    doc=doc_run_jdata,
+                    )
+
 
 def simplify_mdata_arginfo() -> Argument:
     """Generate arginfo for dpgen simplify mdata.
-    
+
     Returns
     -------
     Argument
diff --git a/examples/simplify/qm7.json b/examples/simplify/qm7.json
index 648c589e7..131a903a6 100644
--- a/examples/simplify/qm7.json
+++ b/examples/simplify/qm7.json
@@ -16,11 +16,11 @@
     "pick_data": "/scratch/jz748/simplify/qm7",
     "init_data_prefix": "",
     "init_data_sys": [],
+    "sys_configs": [null],
     "sys_batch_size": [
         "auto"
     ],
     "numb_models": 4,
-    "train_param": "input.json",
     "default_training_param": {
         "model": {
             "type_map": [
@@ -92,11 +92,8 @@
     },
     "use_clusters": true,
     "fp_style": "gaussian",
-    "shuffle_poscar": false,
     "fp_task_max": 1000,
     "fp_task_min": 10,
-    "fp_pp_path": "/home/jzzeng/",
-    "fp_pp_files": [],
     "fp_params": {
         "keywords": "mn15/6-31g** force nosymm scf(maxcyc=512)",
         "nproc": 28,
@@ -105,9 +102,7 @@
     },
     "init_pick_number":100,
     "iter_pick_number":100,
-    "e_trust_lo":1e10,
-    "e_trust_hi":1e10,
-    "f_trust_lo":0.25,
-    "f_trust_hi":0.45,
+    "model_devi_f_trust_lo":0.10,
+    "model_devi_f_trust_hi":0.30,
     "_comment": " that's all "
 }
diff --git a/tests/test_check_examples.py b/tests/test_check_examples.py
index 102e98490..61dbbc977 100644
--- a/tests/test_check_examples.py
+++ b/tests/test_check_examples.py
@@ -9,8 +9,12 @@
 from dpgen.data.arginfo import (
     init_reaction_jdata_arginfo,
 )
+from dpgen.simplify.arginfo import (
+    simplify_jdata_arginfo,
+)
 
 init_reaction_jdata = init_reaction_jdata_arginfo()
+simplify_jdata = simplify_jdata_arginfo()
 
 # directory of examples
 p_examples = Path(__file__).parent.parent / "examples"
@@ -19,6 +23,7 @@
 #   tuple of example list
 input_files = (
     (init_reaction_jdata, p_examples / "init" / "reaction.json"),
+    (simplify_jdata, p_examples / "simplify" / "qm7.json"),
 )