@@ -102,8 +102,8 @@ def __init__(self, covariance='pckl', calculator=None, process_group=None, meta=
102
102
include_tape for training from a sgpr tape
103
103
104
104
*** important ***
105
- You may wants to wrap atoms with FilterDeltas if you intend to
106
- carry out molecular dynamics simulations.
105
+ You may wants to wrap atoms with FilterDeltas if you intend to
106
+ carry out molecular dynamics simulations.
107
107
108
108
*** important ***
109
109
For training the model with existing data use the following
@@ -135,25 +135,25 @@ def __init__(self, covariance='pckl', calculator=None, process_group=None, meta=
135
135
136
136
calculator:
137
137
The ab initio calculator which the model intends to learn.
138
- In case one wishes to use an existing model without further updates,
138
+ In case one wishes to use an existing model without further updates,
139
139
then pass "calculator=None".
140
140
141
141
process_group:
142
142
For parallelism, import "mpi_init" from theforce.util.parallel,
143
- then set
143
+ then set
144
144
process_group = mpi_init()
145
145
as kwarg when creating the ActiveCalculator.
146
146
147
147
pckl:
148
148
The model will be pickled after every update in this folder
149
149
which can be loaded in the future simulations simply by
150
150
calc = ActiveCalculator(pckl)
151
- This way, there is no overhead for rebuilding the model from
151
+ This way, there is no overhead for rebuilding the model from
152
152
scratch.
153
153
154
154
tape:
155
- The tape arg is the name of the file used for saving the
156
- updates (the added data and inducing LCEs).
155
+ The tape arg is the name of the file used for saving the
156
+ updates (the added data and inducing LCEs).
157
157
"tape" files are never overwritten (allways appended).
158
158
These files can be used for rebuilding the model with different
159
159
parameters, combining models, and in general post processing.
@@ -163,12 +163,12 @@ def __init__(self, covariance='pckl', calculator=None, process_group=None, meta=
163
163
164
164
test:
165
165
For instance, if test=100 and 100 steps have passed since the last
166
- exact calculation, an exact calculation will be performed.
167
- This can be used for monitoring the on-the-fly ML accuracy and
166
+ exact calculation, an exact calculation will be performed.
167
+ This can be used for monitoring the on-the-fly ML accuracy and
168
168
has no effect on training. The exact calculation (FP) will be saved
169
169
in 'active_FP.traj' while the models predictions (ML) will be saved
170
- in 'active_ML.traj'. These files will be overwritten in the next
171
- simulation. The following command can be used for a quick description
170
+ in 'active_ML.traj'. These files will be overwritten in the next
171
+ simulation. The following command can be used for a quick description
172
172
of ML errors
173
173
174
174
python -m theforce.regression.scores active_ML.traj active_FP.traj
@@ -391,7 +391,8 @@ def grads(self, energy, retain_graph=False):
391
391
392
392
def initiate_model (self ):
393
393
data = AtomsData ([self .snapshot ()])
394
- i = self .atoms .first_of_each_atom_type ()
394
+ #i = self.atoms.first_of_each_atom_type()
395
+ i = self .get_unique_lces ()
395
396
inducing = LocalsData ([self .atoms .local (j , detach = True ) for j in i ])
396
397
self .model .set_data (data , inducing )
397
398
# data is stored in _exact, thus we only store the inducing
@@ -401,10 +402,28 @@ def initiate_model(self):
401
402
self .log ('seed size: {} {} details: {}' .format (
402
403
* self .size , details ))
403
404
if self .tune_for_md :
404
- self .sample_rand_lces (repeat = 1 )
405
+ self .sample_rand_lces (indices = i , repeat = 1 )
405
406
self .optimize ()
406
407
407
- def sample_rand_lces (self , repeat = 1 , extend_cov = False ):
408
+ def get_unique_lces (self , thresh = 0.95 ):
409
+ tmp = (self .atoms .as_ase () if self .to_ase else self .atoms ).copy ()
410
+ tmp .calc = None
411
+ atoms = TorchAtoms (ase_atoms = tmp )
412
+ atoms .update (posgrad = False , cellgrad = False , dont_save_grads = True ,
413
+ cutoff = self .model .cutoff , descriptors = self .model .descriptors )
414
+ k = self .model .gp .kern (atoms , atoms )
415
+ unique = []
416
+ for i in range (k .shape [0 ]):
417
+ is_unique = True
418
+ for j in unique :
419
+ if k [i , j ] >= thresh :
420
+ is_unique = False
421
+ break
422
+ if is_unique :
423
+ unique .append (i )
424
+ return unique
425
+
426
+ def sample_rand_lces (self , indices = None , repeat = 1 , extend_cov = False ):
408
427
added = 0
409
428
for _ in range (repeat ):
410
429
tmp = (self .atoms .as_ase () if self .to_ase else self .atoms ).copy ()
@@ -414,8 +433,9 @@ def sample_rand_lces(self, repeat=1, extend_cov=False):
414
433
atoms = TorchAtoms (ase_atoms = tmp )
415
434
atoms .update (posgrad = False , cellgrad = False , dont_save_grads = True ,
416
435
cutoff = self .model .cutoff , descriptors = self .model .descriptors )
417
- m = len (atoms .loc )
418
- for k in np .random .permutation (m ):
436
+ if indices is None :
437
+ indices = np .random .permutation (len (atoms .loc ))
438
+ for k in indices :
419
439
res = abs (self .update_lce (atoms .loc [k ]))
420
440
added += res
421
441
if res > 0 and extend_cov :
@@ -605,7 +625,7 @@ def update_data(self, try_fake=True):
605
625
#
606
626
n = self .model .ndata
607
627
new = self .snapshot (fake = try_fake )
608
- #self.model.add_1atoms(new, self.ediff_tot, self.fdiff)
628
+ # self.model.add_1atoms(new, self.ediff_tot, self.fdiff)
609
629
a , de , df = self .model .add_1atoms_fast (new , self .ediff_tot , self .fdiff , self .atoms .xyz ,
610
630
self .cov , self .atoms .is_distributed )
611
631
added = self .model .ndata - n
@@ -872,8 +892,8 @@ def log_to_figure(file, figsize=(10, 5), window=(None, None), meta_ax=True, plot
872
892
wall .plot (* zip (* elapsed ), color = 'cyan' , alpha = 0.5 )
873
893
wall .set_ylabel ('minutes' )
874
894
axes [2 ].axhline (y = settings ['ediff:' ], ls = '--' , color = 'k' )
875
- #axes[2].axhline(y=settings['ediff_lb:'], ls='--', color='k')
876
- #axes[2].axhline(y=settings['ediff_ub:'], ls='--', color='k', alpha=0.3)
895
+ # axes[2].axhline(y=settings['ediff_lb:'], ls='--', color='k')
896
+ # axes[2].axhline(y=settings['ediff_ub:'], ls='--', color='k', alpha=0.3)
877
897
axes [2 ].grid ()
878
898
if len (DF ) > 0 :
879
899
steps , df , add = zip (* DF )
0 commit comments