Skip to content

Commit 09a1982

Browse files
initiate model: use unique LCEs
1 parent e63f2d8 commit 09a1982

File tree

1 file changed

+39
-19
lines changed

1 file changed

+39
-19
lines changed

theforce/calculator/active.py

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ def __init__(self, covariance='pckl', calculator=None, process_group=None, meta=
102102
include_tape for training from a sgpr tape
103103
104104
*** important ***
105-
You may wants to wrap atoms with FilterDeltas if you intend to
106-
carry out molecular dynamics simulations.
105+
You may wants to wrap atoms with FilterDeltas if you intend to
106+
carry out molecular dynamics simulations.
107107
108108
*** important ***
109109
For training the model with existing data use the following
@@ -135,25 +135,25 @@ def __init__(self, covariance='pckl', calculator=None, process_group=None, meta=
135135
136136
calculator:
137137
The ab initio calculator which the model intends to learn.
138-
In case one wishes to use an existing model without further updates,
138+
In case one wishes to use an existing model without further updates,
139139
then pass "calculator=None".
140140
141141
process_group:
142142
For parallelism, import "mpi_init" from theforce.util.parallel,
143-
then set
143+
then set
144144
process_group = mpi_init()
145145
as kwarg when creating the ActiveCalculator.
146146
147147
pckl:
148148
The model will be pickled after every update in this folder
149149
which can be loaded in the future simulations simply by
150150
calc = ActiveCalculator(pckl)
151-
This way, there is no overhead for rebuilding the model from
151+
This way, there is no overhead for rebuilding the model from
152152
scratch.
153153
154154
tape:
155-
The tape arg is the name of the file used for saving the
156-
updates (the added data and inducing LCEs).
155+
The tape arg is the name of the file used for saving the
156+
updates (the added data and inducing LCEs).
157157
"tape" files are never overwritten (allways appended).
158158
These files can be used for rebuilding the model with different
159159
parameters, combining models, and in general post processing.
@@ -163,12 +163,12 @@ def __init__(self, covariance='pckl', calculator=None, process_group=None, meta=
163163
164164
test:
165165
For instance, if test=100 and 100 steps have passed since the last
166-
exact calculation, an exact calculation will be performed.
167-
This can be used for monitoring the on-the-fly ML accuracy and
166+
exact calculation, an exact calculation will be performed.
167+
This can be used for monitoring the on-the-fly ML accuracy and
168168
has no effect on training. The exact calculation (FP) will be saved
169169
in 'active_FP.traj' while the models predictions (ML) will be saved
170-
in 'active_ML.traj'. These files will be overwritten in the next
171-
simulation. The following command can be used for a quick description
170+
in 'active_ML.traj'. These files will be overwritten in the next
171+
simulation. The following command can be used for a quick description
172172
of ML errors
173173
174174
python -m theforce.regression.scores active_ML.traj active_FP.traj
@@ -391,7 +391,8 @@ def grads(self, energy, retain_graph=False):
391391

392392
def initiate_model(self):
393393
data = AtomsData([self.snapshot()])
394-
i = self.atoms.first_of_each_atom_type()
394+
#i = self.atoms.first_of_each_atom_type()
395+
i = self.get_unique_lces()
395396
inducing = LocalsData([self.atoms.local(j, detach=True) for j in i])
396397
self.model.set_data(data, inducing)
397398
# data is stored in _exact, thus we only store the inducing
@@ -401,10 +402,28 @@ def initiate_model(self):
401402
self.log('seed size: {} {} details: {}'.format(
402403
*self.size, details))
403404
if self.tune_for_md:
404-
self.sample_rand_lces(repeat=1)
405+
self.sample_rand_lces(indices=i, repeat=1)
405406
self.optimize()
406407

407-
def sample_rand_lces(self, repeat=1, extend_cov=False):
408+
def get_unique_lces(self, thresh=0.95):
409+
tmp = (self.atoms.as_ase() if self.to_ase else self.atoms).copy()
410+
tmp.calc = None
411+
atoms = TorchAtoms(ase_atoms=tmp)
412+
atoms.update(posgrad=False, cellgrad=False, dont_save_grads=True,
413+
cutoff=self.model.cutoff, descriptors=self.model.descriptors)
414+
k = self.model.gp.kern(atoms, atoms)
415+
unique = []
416+
for i in range(k.shape[0]):
417+
is_unique = True
418+
for j in unique:
419+
if k[i, j] >= thresh:
420+
is_unique = False
421+
break
422+
if is_unique:
423+
unique.append(i)
424+
return unique
425+
426+
def sample_rand_lces(self, indices=None, repeat=1, extend_cov=False):
408427
added = 0
409428
for _ in range(repeat):
410429
tmp = (self.atoms.as_ase() if self.to_ase else self.atoms).copy()
@@ -414,8 +433,9 @@ def sample_rand_lces(self, repeat=1, extend_cov=False):
414433
atoms = TorchAtoms(ase_atoms=tmp)
415434
atoms.update(posgrad=False, cellgrad=False, dont_save_grads=True,
416435
cutoff=self.model.cutoff, descriptors=self.model.descriptors)
417-
m = len(atoms.loc)
418-
for k in np.random.permutation(m):
436+
if indices is None:
437+
indices = np.random.permutation(len(atoms.loc))
438+
for k in indices:
419439
res = abs(self.update_lce(atoms.loc[k]))
420440
added += res
421441
if res > 0 and extend_cov:
@@ -605,7 +625,7 @@ def update_data(self, try_fake=True):
605625
#
606626
n = self.model.ndata
607627
new = self.snapshot(fake=try_fake)
608-
#self.model.add_1atoms(new, self.ediff_tot, self.fdiff)
628+
# self.model.add_1atoms(new, self.ediff_tot, self.fdiff)
609629
a, de, df = self.model.add_1atoms_fast(new, self.ediff_tot, self.fdiff, self.atoms.xyz,
610630
self.cov, self.atoms.is_distributed)
611631
added = self.model.ndata - n
@@ -872,8 +892,8 @@ def log_to_figure(file, figsize=(10, 5), window=(None, None), meta_ax=True, plot
872892
wall.plot(*zip(*elapsed), color='cyan', alpha=0.5)
873893
wall.set_ylabel('minutes')
874894
axes[2].axhline(y=settings['ediff:'], ls='--', color='k')
875-
#axes[2].axhline(y=settings['ediff_lb:'], ls='--', color='k')
876-
#axes[2].axhline(y=settings['ediff_ub:'], ls='--', color='k', alpha=0.3)
895+
# axes[2].axhline(y=settings['ediff_lb:'], ls='--', color='k')
896+
# axes[2].axhline(y=settings['ediff_ub:'], ls='--', color='k', alpha=0.3)
877897
axes[2].grid()
878898
if len(DF) > 0:
879899
steps, df, add = zip(*DF)

0 commit comments

Comments
 (0)