Skip to content

Commit 654d096

Browse files
committed
no more hard-coded dtypes for Molecule properties. Upgraded Molecule.time from float32 to float64 to support large simulation times. Upgraded Molecule.step from np.int32 to np.uint32 since we can't have negative simulation steps
1 parent eda28be commit 654d096

File tree

4 files changed

+98
-78
lines changed

4 files changed

+98
-78
lines changed

moleculekit/molecule.py

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,8 @@ class Molecule(object):
364364
"boxangles": np.float32,
365365
"formalcharge": np.int32,
366366
"virtualsite": bool,
367+
"step": np.uint32,
368+
"time": np.float64,
367369
}
368370

369371
_dims = {
@@ -392,6 +394,8 @@ class Molecule(object):
392394
"boxangles": (3, 0),
393395
"formalcharge": (0,),
394396
"virtualsite": (0,),
397+
"step": (0,),
398+
"time": (0,),
395399
}
396400

397401
def __init__(self, filename=None, name=None, **kwargs):
@@ -402,8 +406,6 @@ def __init__(self, filename=None, name=None, **kwargs):
402406
self.ssbonds = []
403407
self._frame = 0
404408
self.fileloc = []
405-
self.time = np.array([], dtype=np.float32)
406-
self.step = np.array([], dtype=np.int32)
407409
self.crystalinfo = None
408410

409411
self.reps = Representations(self)
@@ -968,16 +970,16 @@ def _getBonds(self, fileBonds=True, guessBonds=True):
968970
bonds : np.ndarray
969971
An array of bonds
970972
"""
971-
bonds = np.empty((0, 2), dtype=np.uint32)
973+
bonds = np.empty((0, 2), dtype=Molecule._dtypes["bonds"])
972974
if fileBonds:
973975
if (
974976
len(self.bonds) == 0
975977
): # This is a patch for the other readers not returning correct empty dimensions
976-
self.bonds = np.empty((0, 2), dtype=np.uint32)
978+
self.bonds = np.empty((0, 2), dtype=Molecule._dtypes["bonds"])
977979
bonds = np.vstack((bonds, self.bonds))
978980
if guessBonds:
979981
bonds = np.vstack((bonds, self._guessBonds()))
980-
return bonds.astype(np.uint32)
982+
return bonds.astype(Molecule._dtypes["bonds"])
981983

982984
def atomselect(
983985
self,
@@ -1176,7 +1178,9 @@ def _updateBondsAnglesDihedrals(self, idx):
11761178
tempdata[:] = map[tempdata[:]]
11771179
stays = np.invert(np.any(tempdata == -1, axis=1))
11781180
# Delete bonds/angles/dihedrals between non-existent atoms
1179-
self.__dict__[field] = tempdata[stays, ...].astype(np.uint32).copy()
1181+
self.__dict__[field] = (
1182+
tempdata[stays, ...].astype(Molecule._dtypes[field]).copy()
1183+
)
11801184
if field == "bonds" and len(self.bondtype):
11811185
self.bondtype = self.bondtype[stays]
11821186

@@ -1915,7 +1919,7 @@ def wrap(
19151919
if wrapcenter is None:
19161920
centersel = self.atomselect(
19171921
wrapsel, indexes=True, guessBonds=guessAtomSelBonds
1918-
)
1922+
).astype(np.uint32)
19191923
wrapcenter = np.array([0, 0, 0], dtype=np.float32)
19201924
else:
19211925
wrapcenter = np.array(wrapcenter, dtype=np.float32)
@@ -1936,48 +1940,36 @@ def wrap(
19361940

19371941
is_triclinic = np.any(self.boxangles != 90)
19381942
if not is_triclinic:
1939-
wrapping.wrap_box(
1940-
groups,
1941-
self.coords,
1942-
self.box,
1943-
centersel.astype(np.uint32),
1944-
wrapcenter.astype(np.float32),
1945-
)
1943+
wrapping.wrap_box(groups, self.coords, self.box, centersel, wrapcenter)
19461944
else:
19471945
if unitcell == "triclinic":
19481946
wrapping.wrap_triclinic_unitcell(
1949-
groups,
1950-
self.coords,
1951-
self.boxvectors,
1952-
centersel.astype(np.uint32),
1953-
wrapcenter.astype(np.float32),
1947+
groups, self.coords, self.boxvectors, centersel, wrapcenter
19541948
)
19551949
elif unitcell == "compact":
19561950
wrapping.wrap_compact_unitcell(
1957-
groups,
1958-
self.coords,
1959-
self.boxvectors,
1960-
centersel.astype(np.uint32),
1961-
wrapcenter.astype(np.float32),
1962-
1, # compact wrapping
1951+
groups, self.coords, self.boxvectors, centersel, wrapcenter, 1
19631952
)
19641953
elif unitcell == "rectangular":
19651954
wrapping.wrap_compact_unitcell(
1966-
groups,
1967-
self.coords,
1968-
self.boxvectors,
1969-
centersel.astype(np.uint32),
1970-
wrapcenter.astype(np.float32),
1971-
0, # rectangular wrapping
1955+
groups, self.coords, self.boxvectors, centersel, wrapcenter, 0
19721956
)
19731957

19741958
def _emptyTopo(self, numAtoms):
19751959
for field in Molecule._atom_fields:
19761960
self.__dict__[field] = self._empty(numAtoms, field)
19771961

19781962
def _emptyTraj(self, numAtoms):
1979-
self.coords = self._empty(numAtoms, "coords")
1980-
self.box = np.zeros(self._dims["box"], dtype=np.float32)
1963+
for field in Molecule._traj_fields:
1964+
if field == "fileloc":
1965+
continue
1966+
if field == "coords":
1967+
self.coords = self._empty(numAtoms, field)
1968+
else:
1969+
self.__dict__[field] = np.zeros(
1970+
self._dims[field], dtype=Molecule._dtypes[field]
1971+
)
1972+
self.fileloc = []
19811973

19821974
def empty(self, numAtoms):
19831975
"""Creates an empty molecule of `numAtoms` atoms.
@@ -2533,8 +2525,16 @@ def addBond(self, idx1, idx2, btype):
25332525
if hasb:
25342526
self.bondtype[oldidx] = btype
25352527
else:
2536-
self.bonds = np.vstack((self.bonds, [idx1, idx2])).astype(np.uint32).copy()
2537-
self.bondtype = np.hstack((self.bondtype, [btype])).astype(object).copy()
2528+
self.bonds = (
2529+
np.vstack((self.bonds, [idx1, idx2]))
2530+
.astype(Molecule._dtypes["bonds"])
2531+
.copy()
2532+
)
2533+
self.bondtype = (
2534+
np.hstack((self.bondtype, [btype]))
2535+
.astype(Molecule._dtypes["bondtype"])
2536+
.copy()
2537+
)
25382538

25392539
def removeBond(self, idx1, idx2):
25402540
"""Remove an existing bond between a pair of atoms
@@ -3024,23 +3024,22 @@ def calculateUniqueBonds(bonds, bondtype):
30243024
)
30253025
)
30263026
)
3027-
bonds = unique_sorted[:, :2].astype(np.uint32)
3028-
bondtypes = unique_sorted[:, 2].astype(object)
3027+
bonds = unique_sorted[:, :2].astype(Molecule._dtypes["bonds"])
3028+
bondtypes = unique_sorted[:, 2].astype(Molecule._dtypes["bondtype"])
30293029
# Sort both arrays for prettiness by the first then second bond index
30303030
sortidx = np.lexsort((bonds[:, 1], bonds[:, 0]))
30313031
return bonds[sortidx].copy(), bondtypes[sortidx].copy()
30323032
else:
30333033
bonds = np.array(list(set(tuple(bb) for bb in np.sort(bonds, axis=1).tolist())))
30343034
# Sort both arrays for prettiness by the first then second bond index
30353035
sortidx = np.lexsort((bonds[:, 1], bonds[:, 0]))
3036-
return (
3037-
bonds[sortidx].astype(np.uint32).copy(),
3038-
(
3039-
None
3040-
if not len(bondtype)
3041-
else np.array([bondtype[0]] * bonds.shape[0], dtype=object)
3042-
),
3043-
)
3036+
if len(bondtype):
3037+
bondtype = np.array(
3038+
[bondtype[0]] * bonds.shape[0], dtype=Molecule._dtypes["bondtype"]
3039+
)
3040+
else:
3041+
bondtype = None
3042+
return (bonds[sortidx].astype(Molecule._dtypes["bonds"]).copy(), bondtype)
30443043

30453044

30463045
def getBondedGroups(mol, bonds=None):

moleculekit/readers.py

Lines changed: 42 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -155,27 +155,27 @@ def __init__(
155155

156156
nframes = self.numFrames
157157
if box is None:
158-
self.box = np.zeros((3, nframes), np.float32)
158+
self.box = np.zeros((3, nframes), Molecule._dtypes["box"])
159159
if boxangles is None:
160-
self.boxangles = np.zeros((3, nframes), np.float32)
160+
self.boxangles = np.zeros((3, nframes), Molecule._dtypes["boxangles"])
161161
if step is None:
162-
self.step = np.arange(nframes, dtype=int)
162+
self.step = np.arange(nframes, dtype=Molecule._dtypes["step"])
163163
if time is None:
164-
self.time = np.zeros(nframes, dtype=np.float32)
164+
self.time = np.zeros(nframes, dtype=Molecule._dtypes["time"])
165165
if box is not None:
166-
self.box = box
166+
self.box = np.array(box, dtype=Molecule._dtypes["box"])
167167
if boxangles is None:
168168
raise ValueError("boxangles must be provided if box is provided")
169169
if boxangles is not None:
170-
self.boxangles = boxangles
170+
self.boxangles = np.array(boxangles, dtype=Molecule._dtypes["boxangles"])
171171
if box is None:
172172
raise ValueError("box must be provided if boxangles is provided")
173173
if fileloc is not None:
174174
self.fileloc = fileloc
175175
if step is not None:
176-
self.step = step
176+
self.step = np.array(step, dtype=Molecule._dtypes["step"])
177177
if time is not None:
178-
self.time = time
178+
self.time = np.array(time, dtype=Molecule._dtypes["time"])
179179

180180
@property
181181
def numFrames(self):
@@ -410,25 +410,27 @@ def _parseTraj(mol, traj, filename, frame):
410410
setattr(traj, field, np.ascontiguousarray(getattr(traj, field)))
411411

412412
if len(traj.coords):
413-
mol.coords = traj.coords
413+
mol.coords = np.array(traj.coords, dtype=Molecule._dtypes["coords"])
414414

415415
if traj.box is None:
416416
mol.box = np.zeros((3, 1), dtype=Molecule._dtypes["box"])
417417
else:
418-
mol.box = np.array(traj.box)
418+
mol.box = np.array(traj.box, dtype=Molecule._dtypes["box"])
419419
if mol.box.ndim == 1:
420420
mol.box = mol.box[:, np.newaxis]
421421

422422
if traj.boxangles is None:
423423
mol.boxangles = np.zeros((3, 1), dtype=Molecule._dtypes["boxangles"])
424424
else:
425-
mol.boxangles = np.array(traj.boxangles)
425+
mol.boxangles = np.array(
426+
traj.boxangles, dtype=Molecule._dtypes["boxangles"]
427+
)
426428
if mol.boxangles.ndim == 1:
427429
mol.boxangles = mol.boxangles[:, np.newaxis]
428430

429431
# mol.fileloc = traj.fileloc
430-
mol.step = np.hstack(traj.step).astype(int)
431-
mol.time = np.hstack(traj.time)
432+
mol.step = np.hstack(traj.step).astype(Molecule._dtypes["step"])
433+
mol.time = np.hstack(traj.time).astype(Molecule._dtypes["time"])
432434

433435
if ext in _TRAJECTORY_READERS and frame is None and len(traj.coords):
434436
# Writing hidden index file containing number of frames in trajectory file
@@ -1251,15 +1253,17 @@ def _fix_formal_charge(val):
12511253
]
12521254

12531255
if len(topo.bonds):
1254-
topo.bonds = np.array(topo.bonds, dtype=np.uint32)
1256+
topo.bonds = np.array(topo.bonds, dtype=Molecule._dtypes["bonds"])
12551257
badidx = ~np.all(np.isin(topo.bonds, topo.serial), axis=1)
12561258
if np.any(badidx):
12571259
# Some PDBs have bonds to non-existing serials... go figure
12581260
topo.bonds = topo.bonds[~badidx]
12591261
logger.info(
12601262
f"Discarded {np.sum(badidx)} bonds to non-existing indexes in the PDB file."
12611263
)
1262-
topo.bonds = np.array(mapserials[topo.bonds[:]], dtype=np.uint32)
1264+
topo.bonds = np.array(
1265+
mapserials[topo.bonds[:]], dtype=Molecule._dtypes["bonds"]
1266+
)
12631267

12641268
# If no segid was read, use the TER rows to define segments
12651269
if len(topo.segid) and np.all(np.array(topo.segid) == "") and currter != 0:
@@ -1559,9 +1563,9 @@ def XTCread(filename, frame=None, topoloc=None):
15591563
time *= 1e3 # Convert from ps to fs. This seems to be ACEMD3 specific. GROMACS writes other units in time
15601564
nframes = coords.shape[2]
15611565
if len(step) != nframes or np.sum(step) == 0:
1562-
step = np.arange(nframes)
1566+
step = np.arange(nframes, dtype=Molecule._dtypes["step"])
15631567
if len(time) != nframes or np.sum(time) == 0:
1564-
time = np.zeros(nframes, dtype=np.float32)
1568+
time = np.zeros(nframes, dtype=Molecule._dtypes["time"])
15651569

15661570
bx, by, bz, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
15671571
boxvectors[0].T, boxvectors[1].T, boxvectors[2].T
@@ -1594,16 +1598,16 @@ def XSCread(filename, frame=None, topoloc=None):
15941598
bx, by, bz, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
15951599
pieces[1:4], pieces[4:7], pieces[7:10]
15961600
)
1597-
box = np.array([bx, by, bz], dtype=np.float32)
1598-
boxangles = np.array([alpha, beta, gamma], dtype=np.float32)
1601+
box = np.array([bx, by, bz])
1602+
boxangles = np.array([alpha, beta, gamma])
15991603

16001604
return MolFactory.construct(
16011605
None,
16021606
Trajectory(
16031607
box=box,
16041608
boxangles=boxangles,
16051609
step=[step],
1606-
time=np.zeros(1, dtype=np.float32),
1610+
time=[0],
16071611
),
16081612
filename,
16091613
frame,
@@ -1710,7 +1714,9 @@ def BINCOORread(filename, frame=None, topoloc=None):
17101714
dat = f.read(natoms * 3 * 8)
17111715
fmt = "d" * (natoms * 3)
17121716
coords = struct.unpack(fmt, dat)
1713-
coords = np.array(coords, dtype=np.float32).reshape((natoms, 3, 1))
1717+
coords = np.array(coords, dtype=Molecule._dtypes["coords"]).reshape(
1718+
(natoms, 3, 1)
1719+
)
17141720
return MolFactory.construct(None, Trajectory(coords=coords), filename, frame)
17151721

17161722

@@ -1772,7 +1778,10 @@ def MDTRAJTOPOread(filename, frame=None, topoloc=None, validateElements=True):
17721778
for k in table.keys():
17731779
topo.__dict__[translate[k]] = table[k].tolist()
17741780

1775-
coords = np.array(mdstruct.xyz.swapaxes(0, 1).swapaxes(1, 2) * 10, dtype=np.float32)
1781+
coords = np.array(
1782+
mdstruct.xyz.swapaxes(0, 1).swapaxes(1, 2) * 10,
1783+
dtype=Molecule._dtypes["coords"],
1784+
)
17761785
topo.bonds = bonds[:, :2]
17771786
return MolFactory.construct(
17781787
topo,
@@ -2322,9 +2331,11 @@ def fixDefault(val, dtype):
23222331
if len(ideal_coords) != 0:
23232332
ideal_allcoords.append(ideal_coords)
23242333

2325-
allcoords = np.stack(allcoords, axis=2).astype(np.float32)
2334+
allcoords = np.stack(allcoords, axis=2).astype(Molecule._dtypes["coords"])
23262335
if len(ideal_allcoords):
2327-
ideal_allcoords = np.stack(ideal_allcoords, axis=2).astype(np.float32)
2336+
ideal_allcoords = np.stack(ideal_allcoords, axis=2).astype(
2337+
Molecule._dtypes["coords"]
2338+
)
23282339

23292340
coords = allcoords
23302341
if np.any(np.all(allcoords == 0, axis=1)) and len(ideal_allcoords):
@@ -2423,10 +2434,12 @@ def _guessMass(element):
24232434
topo.element = np.array(
24242435
[element_by_type[t].capitalize() for t in topo.atomtype], dtype=object
24252436
)
2426-
topo.masses = np.array([mass_by_type[t] for t in topo.atomtype], dtype=np.float32)
2427-
topo.bonds = np.vstack(bonds)
2437+
topo.masses = np.array(
2438+
[mass_by_type[t] for t in topo.atomtype], dtype=Molecule._dtypes["masses"]
2439+
)
2440+
topo.bonds = np.vstack(bonds).astype(Molecule._dtypes["bonds"])
24282441

2429-
improper_indices = np.array(improper_indices).astype(np.uint32)
2442+
improper_indices = np.array(improper_indices).astype(Molecule._dtypes["impropers"])
24302443
if improper_indices.ndim == 1:
24312444
improper_indices = improper_indices[:, np.newaxis]
24322445
topo.impropers = improper_indices
@@ -2483,7 +2496,7 @@ def PREPIread(filename, frame=None, topoloc=None):
24832496
continue
24842497
impropers.append([names.index(impn) for impn in impropernames])
24852498

2486-
impropers = np.array(impropers).astype(np.uint32)
2499+
impropers = np.array(impropers).astype(Molecule._dtypes["impropers"])
24872500
if impropers.ndim == 1:
24882501
impropers = impropers[:, np.newaxis]
24892502

@@ -2496,7 +2509,7 @@ def PREPIread(filename, frame=None, topoloc=None):
24962509
topo = Topology()
24972510
topo.name = np.array(names, dtype=object)
24982511
topo.atomtype = np.array(atomtypes, dtype=object)
2499-
topo.charge = np.array(charges, dtype=np.float32)
2512+
topo.charge = np.array(charges, dtype=Molecule._dtypes["charge"])
25002513
topo.impropers = impropers
25012514

25022515
return MolFactory.construct(topo, None, filename, frame)

0 commit comments

Comments
 (0)