feat(emp_sk): add command to generate empirical SK parameters

QG-phy · QG-phy · commit 3fa09449a913 · 2025-04-01T23:17:47.000+08:00
Introduce a new command 'esk' to generate initial empirical SK parameters. This includes the addition of the `emp_sk.py` module, which handles the conversion of model parameters to empirical SK format. The command supports both 'poly2' and 'poly4' base models and saves the output in JSON format. Also, fix a minor bug in `gen_inputs.py` where `model_options` was incorrectly accessed as a method.
diff --git a/dptb/entrypoints/emp_sk.py b/dptb/entrypoints/emp_sk.py
@@ -0,0 +1,104 @@
+import torch
+import numpy as np
+from dptb.nn.build import build_model
+import json
+import logging
+from dptb.nn.sktb.onsiteDB import onsite_energy_database
+import re
+import os
+from dptb.utils.gen_inputs import gen_inputs
+import json
+log = logging.getLogger(__name__)
+
+def to_empsk(
+    INPUT,
+    output='./', 
+    basemodel='poly2',
+    **kwargs):
+    """
+    Convert the model to empirical SK parameters.
+    """
+    if INPUT is None:
+        raise ValueError('INPUT is None.')
+    with open(INPUT, 'r') as f:
+        input = json.load(f)
+    common_options = input['common_options']
+    EmpSK(common_options, basemodel=basemodel).to_json(outdir=output)
+
+class EmpSK(object):
+    """
+    Empirical SK parameters.
+    """
+    def __init__(self, common_options, basemodel='poly2'):
+        """
+        Args:
+            common_options: common options for the model. especially contain the basis information.
+            basemodel: base model type for the empirical SK parameters  either 'poly2' or 'poly4'.
+        """
+        self.common_options,self.basisref = self.format_common_options(common_options)
+        if basemodel == 'poly2':
+            model_ckpt = os.path.join(os.path.dirname(__file__), '..', 'nn', 'dftb', "base_poly2.pth")
+        elif basemodel == 'poly4':
+            model_ckpt = os.path.join(os.path.dirname(__file__), '..', 'nn', 'dftb', "base_poly4.pth")
+        else:
+            raise ValueError(f'basemodel {basemodel} is not supported.')
+
+        self.model = build_model(model_ckpt, common_options=common_options, no_check=True)
+
+    def to_json(self, outdir='./'):
+        """
+        Convert the model to json format.
+        """
+
+        json_dict = self.model.to_json(basisref=self.basisref)
+        with open(os.path.join(outdir,'sktb.json'), 'w') as f:
+            json.dump(json_dict, f, indent=4)
+        
+        # save input template
+        # input_template = gen_inputs(model=self.model, task='train', mode=mode)
+        
+        #with open(os.path.join(outdir,'input_template.json'), 'w') as f:
+        #    json.dump(input_template, f, indent=4)
+        log.info(f'Empirical SK parameters are saved in {os.path.join(outdir,"sktb.json")}')
+        log.info('If you want to further train the model, please use `dptb config` command to generate input template.')
+        return json_dict
+
+    def format_common_options(self, common_options):
+        """
+        Format the common options for the model.
+        """        # check basis in common_options
+        if 'basis' not in common_options:
+            raise ValueError('basis information is not given in common_options.')
+        # check basis type
+        assert isinstance(common_options['basis'], dict), 'basis information is not a dictionary.'
+        basis = common_options['basis'] 
+        sys_ele =  "".join(list(basis.keys()))
+        log.info(f'Extracting empirical SK parameters for {sys_ele}')
+
+        use_basis_ref = False
+        basisref = {}
+        for ie in basis.keys():
+            basisref[ie] = {}
+            assert isinstance(basis[ie], list), f'basis information for {ie} is not a list.'
+            for ieorb in basis[ie]:
+                assert isinstance(ieorb, str), f'basis information for {ie} is not a string.'
+                if len(ieorb) == 1:
+                    assert use_basis_ref is False, 'wrong basis seting eithor s, p ,d or ns np d*. can not be both s and ns np d*.'
+                    continue
+                else:
+                    use_basis_ref = True
+                    assert ieorb in onsite_energy_database[ie], f'basis information for {ie} is not in onsite_energy_database : {onsite_energy_database[ie].keys()}.'
+                    orbsymb = re.findall(r'[A-Za-z]', ieorb)[0]
+                    basisref[ie][orbsymb] = ieorb
+
+        if use_basis_ref:
+            std_basis = {}
+            for ie in basis.keys():
+                std_basis[ie] = []
+                for ieorb in basis[ie]:
+                    std_basis[ie].append(re.findall(r'[A-Za-z]', ieorb)[0])
+            common_options['basis'].update(std_basis)
+        
+            return common_options, basisref
+        else:
+            return common_options, None
diff --git a/dptb/entrypoints/main.py b/dptb/entrypoints/main.py
@@ -13,6 +13,8 @@
 from dptb.utils.loggers import set_log_handles
 from dptb.utils.config_check import check_config_train
 from dptb.entrypoints.collectskf import skf2pth, skf2nnsk
+from dptb.entrypoints.emp_sk import to_empsk
+
 from dptb import __version__
 
 
@@ -402,7 +404,30 @@ def main_parser() -> argparse.ArgumentParser:
         help="The output files in training.",
     )
 
-
+    parser_esk = subparsers.add_parser(
+        "esk",
+        parents=[parser_log],
+        help="Generate initial empirical SK parameters.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser_esk.add_argument(
+        "INPUT", help="the input parameter file in json or yaml format",
+        type=str,
+        default=None
+    )
+    parser_esk.add_argument(
+        "-o",
+        "--output",
+        type=str,
+        default="./",
+        help="The output files in training."
+    )
+    parser_esk.add_argument(
+        "-m",
+        "--basemodel",
+        type=str,
+        default="poly2",
+    )
     return parser
 
 def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
@@ -466,3 +491,6 @@ def main():
 
     elif args.command == 'skf2nn':
         skf2nnsk(**dict_args)
+
+    elif args.command == 'esk':
+        to_empsk(**dict_args)
diff --git a/dptb/utils/gen_inputs.py b/dptb/utils/gen_inputs.py
@@ -54,7 +54,7 @@ def gen_inputs(mode, task='train', model=None):
             "overlap": is_overlap,
         }
         input_dict["common_options"].update(common_options)
-        input_dict["model_options"].update(model.model_options())
+        input_dict["model_options"].update(model.model_options)
     #with open(os.path.join(outdir,'input_template.json'), 'w') as f:
     #    json.dump(input_dict, f, indent=4)
     return input_dict

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ def gen_inputs(mode, task='train', model=None):`
`54`	`54`	`"overlap": is_overlap,`
`55`	`55`	`}`
`56`	`56`	`input_dict["common_options"].update(common_options)`
`57`		`- input_dict["model_options"].update(model.model_options())`
	`57`	`+ input_dict["model_options"].update(model.model_options)`
`58`	`58`	`#with open(os.path.join(outdir,'input_template.json'), 'w') as f:`
`59`	`59`	`# json.dump(input_dict, f, indent=4)`
`60`	`60`	`return input_dict`