Skip to content

Commit 70f3506

Browse files
committed
fix DCD to read/write timestep information
1 parent 7c5251d commit 70f3506

File tree

6 files changed

+171
-73
lines changed

6 files changed

+171
-73
lines changed

moleculekit/fileformats/dcd/dcd.pyx

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,20 @@ cdef class DCDTrajectoryFile:
201201
free(self.filename)
202202
self.close()
203203

204-
def _initialize_write(self, int n_atoms, int with_unitcell):
204+
def read_header(self):
205+
"""
206+
Returns
207+
-------
208+
istart : int
209+
The starting step
210+
nsavc : int
211+
The number of steps between each saved frame
212+
delta : float
213+
The timestep of the simulation
214+
"""
215+
return self.fh.istart, self.fh.nsavc, self.fh.delta
216+
217+
def _initialize_write(self, int n_atoms, int with_unitcell, int istart, int nsavc, double delta):
205218
"""We don't actually want to open the dcd file during write mode
206219
until we know how many atoms the user wants to save. so this
207220
is delayed until the first call to write()
@@ -210,7 +223,7 @@ cdef class DCDTrajectoryFile:
210223

211224
self.n_atoms = n_atoms
212225
self.with_unitcell = with_unitcell
213-
self.fh = open_dcd_write(self.filename, "dcd", self.n_atoms, with_unitcell)
226+
self.fh = open_dcd_write(self.filename, "dcd", self.n_atoms, with_unitcell, istart, nsavc, delta)
214227
if self.fh is NULL:
215228
raise IOError('There was an error opening the file: %s' % self.filename)
216229
self.is_open = True
@@ -435,7 +448,7 @@ cdef class DCDTrajectoryFile:
435448
# If we got some other status, thats a "real" error.
436449
raise IOError("Error: %s", ERROR_MESSAGES(status))
437450

438-
def write(self, xyz, cell_lengths=None, cell_angles=None):
451+
def write(self, xyz, cell_lengths=None, cell_angles=None, istart=0, nsavc=1, delta=1.0):
439452
"""write(xyz, cell_lengths=None, cell_angles=None)
440453
441454
Write one or more frames of data to the DCD file
@@ -453,6 +466,12 @@ cdef class DCDTrajectoryFile:
453466
Organized analogously to cell_lengths. Gives the alpha, beta and
454467
gamma angles respectively. By convention for
455468
this trajectory format, the angles should be in units of degrees.
469+
istart : int, optional
470+
The starting step
471+
nsavc : int, optional
472+
The number of steps between written frames
473+
delta : float, optional
474+
The time-step of the simulation.
456475
"""
457476
if str(self.mode) != 'w':
458477
raise ValueError('write() is only available when the file is opened in mode="w"')
@@ -478,7 +497,7 @@ cdef class DCDTrajectoryFile:
478497
warn_on_cast=False)
479498

480499
if self._needs_write_initialization:
481-
self._initialize_write(xyz.shape[1], (cell_lengths is not None) and (cell_angles is not None))
500+
self._initialize_write(xyz.shape[1], (cell_lengths is not None) and (cell_angles is not None), istart, nsavc, delta)
482501
else:
483502
if not self.n_atoms == xyz.shape[1]:
484503
raise ValueError('Number of atoms doesnt match')

moleculekit/fileformats/dcd/dcdlib.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ cdef extern from "include/dcdplugin.h":
2323
int read_next_timestep(dcdhandle *v, int natoms, molfile_timestep_t *ts)
2424
void close_file_read(dcdhandle *v)
2525

26-
dcdhandle* open_dcd_write(const char *path, const char *filetype, const int natoms, const int with_unitcell)
26+
dcdhandle* open_dcd_write(const char *path, const char *filetype, const int natoms, const int with_unitcell, const int istart, const int nsavc, const double delta)
2727
int write_timestep(dcdhandle *v, molfile_timestep_t *ts)
2828
void close_file_write(dcdhandle *v)
2929
int dcd_nsets(dcdhandle* v)

moleculekit/fileformats/dcd/include/dcdplugin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ void close_file_read(dcdhandle *v);
6868
int read_next_timestep(dcdhandle *v, int natoms, molfile_timestep_t *ts);
6969

7070
dcdhandle* open_dcd_write(const char *path, const char *filetype, const int natoms,
71-
const int with_unitcell);
71+
const int with_unitcell, const int istart, const int nsavc, const double delta);
7272
int write_timestep(dcdhandle *v, const molfile_timestep_t *ts);
7373
void close_file_write(dcdhandle *v);
7474
int dcd_nsets(dcdhandle* v);

moleculekit/fileformats/dcd/src/dcdplugin.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,13 +1144,11 @@ void close_file_read(dcdhandle *v)
11441144
}
11451145

11461146
dcdhandle *open_dcd_write(const char *path, const char *filetype, const int natoms,
1147-
const int with_unitcell)
1147+
const int with_unitcell, const int istart, const int nsavc, const double delta)
11481148
{
11491149
dcdhandle *dcd;
11501150
fio_fd fd;
11511151
int rc;
1152-
int istart, nsavc;
1153-
double delta;
11541152
int charmm;
11551153

11561154
if (fio_open(path, FIO_WRITE, &fd) < 0)
@@ -1163,10 +1161,6 @@ dcdhandle *open_dcd_write(const char *path, const char *filetype, const int nato
11631161
memset(dcd, 0, sizeof(dcdhandle));
11641162
dcd->fd = fd;
11651163

1166-
istart = 0; /* starting timestep of DCD file */
1167-
nsavc = 1; /* number of timesteps between written DCD frames */
1168-
delta = 1.0; /* length of a timestep */
1169-
11701164
charmm = DCD_IS_CHARMM; /* charmm-formatted DCD file */
11711165
if (with_unitcell)
11721166
charmm |= DCD_HAS_EXTRA_BLOCK;

moleculekit/readers.py

Lines changed: 127 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2523,15 +2523,14 @@ def SDFread(filename, frame=None, topoloc=None, mol_idx=None):
25232523
"MoleculeKit will only read the first molecule from the SDF file."
25242524
)
25252525

2526+
v3000 = False
25262527
with openFileOrStringIO(filename, "r") as f:
25272528
curr_mol = 0
25282529
lines = []
25292530
for line in f:
25302531
lines.append(line)
25312532
if "V3000" in line:
2532-
raise RuntimeError(
2533-
"Moleculekit does not support parsing V3000 SDF files yet."
2534-
)
2533+
v3000 = True
25352534
if line.strip() == "$$$$":
25362535
if curr_mol == mol_idx:
25372536
break
@@ -2544,61 +2543,119 @@ def SDFread(filename, frame=None, topoloc=None, mol_idx=None):
25442543
f"SDF file contains only {curr_mol} molecules. Cannot read the requested mol_idx {mol_idx}"
25452544
)
25462545

2547-
topo = Topology()
2548-
coords = []
2549-
mol_start = 0
2550-
2551-
molname = lines[0].strip().split()
2552-
if len(molname):
2553-
molname = molname[0]
2554-
if len(molname) == 3:
2555-
resname = molname[:3].upper()
2546+
if v3000:
2547+
topo, coords = parseV3000SDF(lines, chargemap, bondmap)
25562548
else:
2557-
resname = "MOL"
2558-
2559-
n_atoms = int(lines[mol_start + 3][:3])
2560-
n_bonds = int(lines[mol_start + 3][3:6])
2549+
topo = Topology()
2550+
coords = []
2551+
mol_start = 0
25612552

2562-
coord_start = mol_start + 4
2563-
bond_start = coord_start + n_atoms
2564-
bond_end = bond_start + n_bonds
2565-
for n in range(coord_start, bond_start):
2566-
line = lines[n]
2567-
coords.append(
2568-
[
2569-
float(line[:10].strip()),
2570-
float(line[10:20].strip()),
2571-
float(line[20:30].strip()),
2572-
]
2573-
)
2574-
atom_symbol = line[31:34].strip()
2575-
topo.record.append("HETATM")
2576-
topo.element.append(atom_symbol)
2577-
topo.name.append(atom_symbol)
2578-
topo.serial.append(n - coord_start)
2579-
topo.formalcharge.append(chargemap[line[36:39].strip()])
2580-
topo.resname.append(resname)
2553+
molname = lines[0].strip().split()
2554+
if len(molname):
2555+
molname = molname[0]
2556+
if len(molname) == 3:
2557+
resname = molname[:3].upper()
2558+
else:
2559+
resname = "MOL"
25812560

2582-
for n in range(bond_start, bond_end):
2583-
line = lines[n]
2584-
idx1 = line[:3].strip()
2585-
idx2 = line[3:6].strip()
2586-
bond_type = line[6:9].strip()
2587-
topo.bonds.append([int(idx1) - 1, int(idx2) - 1])
2588-
topo.bondtype.append(bondmap[bond_type])
2561+
n_atoms = int(lines[mol_start + 3][:3])
2562+
n_bonds = int(lines[mol_start + 3][3:6])
25892563

2590-
for line in lines[bond_end:]:
2591-
if line.strip() == "$$$$":
2592-
break
2593-
if line.startswith("M CHG"): # These charges are more correct
2594-
pairs = line.strip().split()[3:]
2595-
for cc in range(0, len(pairs), 2):
2596-
topo.formalcharge[int(pairs[cc]) - 1] = int(pairs[cc + 1])
2564+
coord_start = mol_start + 4
2565+
bond_start = coord_start + n_atoms
2566+
bond_end = bond_start + n_bonds
2567+
for n in range(coord_start, bond_start):
2568+
line = lines[n]
2569+
coords.append(
2570+
[
2571+
float(line[:10].strip()),
2572+
float(line[10:20].strip()),
2573+
float(line[20:30].strip()),
2574+
]
2575+
)
2576+
atom_symbol = line[31:34].strip()
2577+
topo.record.append("HETATM")
2578+
topo.element.append(atom_symbol)
2579+
topo.name.append(atom_symbol)
2580+
topo.serial.append(n - coord_start)
2581+
topo.formalcharge.append(chargemap[line[36:39].strip()])
2582+
topo.resname.append(resname)
2583+
2584+
for n in range(bond_start, bond_end):
2585+
line = lines[n]
2586+
idx1 = line[:3].strip()
2587+
idx2 = line[3:6].strip()
2588+
bond_type = line[6:9].strip()
2589+
topo.bonds.append([int(idx1) - 1, int(idx2) - 1])
2590+
topo.bondtype.append(bondmap[bond_type])
2591+
2592+
for line in lines[bond_end:]:
2593+
if line.strip() == "$$$$":
2594+
break
2595+
if line.startswith("M CHG"): # These charges are more correct
2596+
pairs = line.strip().split()[3:]
2597+
for cc in range(0, len(pairs), 2):
2598+
topo.formalcharge[int(pairs[cc]) - 1] = int(pairs[cc + 1])
25972599

25982600
traj = Trajectory(coords=np.vstack(coords))
25992601
return MolFactory.construct(topo, traj, filename, frame)
26002602

26012603

2604+
def parseV3000SDF(lines, chargemap, bondmap):
2605+
topo = Topology()
2606+
coords = []
2607+
2608+
molname = lines[0].strip().split()
2609+
if len(molname):
2610+
molname = molname[0]
2611+
if len(molname) == 3:
2612+
resname = molname[:3].upper()
2613+
else:
2614+
resname = "MOL"
2615+
2616+
section = None
2617+
for line in lines[4:]:
2618+
line = line.strip()
2619+
if line == "$$$$":
2620+
break
2621+
if "v30 end" in line.lower() or "m end" in line.lower():
2622+
section = None
2623+
continue
2624+
if "begin ctab" in line.lower():
2625+
section = "ctab"
2626+
continue
2627+
if "begin atom" in line.lower():
2628+
section = "atom"
2629+
continue
2630+
if "begin bond" in line.lower():
2631+
section = "bond"
2632+
continue
2633+
2634+
if section is None:
2635+
continue
2636+
if section == "ctab":
2637+
# M V30 COUNTS 49 53 0 0 0
2638+
if "v30 counts" in line.lower():
2639+
n_atoms, n_bonds = map(int, line.split()[3:5])
2640+
if section == "atom":
2641+
# M V30 1 C 42.4990 45.8550 38.9350 0
2642+
pieces = line.split()[2:]
2643+
topo.record.append("HETATM")
2644+
topo.element.append(pieces[1])
2645+
topo.name.append(pieces[1])
2646+
topo.serial.append(int(pieces[0]))
2647+
topo.formalcharge.append(chargemap[pieces[5]])
2648+
topo.resname.append(resname)
2649+
coords.append([float(pieces[2]), float(pieces[3]), float(pieces[4])])
2650+
if section == "bond":
2651+
# M V30 bond_idx bond_type atom1 atom2
2652+
pieces = line.split()[3:]
2653+
topo.bonds.append([int(pieces[1]) - 1, int(pieces[2]) - 1])
2654+
topo.bondtype.append(bondmap[pieces[0]])
2655+
2656+
return topo, coords
2657+
2658+
26022659
def sdf_generator(sdffile):
26032660
"""Generates Molecule objects from an SDF file"""
26042661
from io import StringIO
@@ -2884,29 +2941,40 @@ def NETCDFread(
28842941

28852942

28862943
def DCDread(filename, frame=None, topoloc=None, stride=None, atom_indices=None):
2887-
from moleculekit.dcd import load_dcd
2944+
from moleculekit.fileformats.utils import cast_indices
2945+
from moleculekit.dcd import DCDTrajectoryFile
2946+
2947+
atom_indices = cast_indices(atom_indices)
2948+
with DCDTrajectoryFile(str(filename)) as f:
2949+
if frame is not None:
2950+
f.seek(frame)
2951+
n_frames = 1
2952+
else:
2953+
n_frames = None
2954+
xyz, cell_lengths, cell_angles = f.read(
2955+
n_frames=n_frames, stride=stride, atom_indices=atom_indices
2956+
)
2957+
istart, nsavc, delta = f.read_header()
2958+
# Timestep conversion factor found in OpenMM
2959+
delta = np.round(delta * 0.04888821, decimals=8)
2960+
steps = np.arange(istart, (nsavc * xyz.shape[0]) + 1, nsavc)
2961+
if stride is not None:
2962+
steps = steps[::stride]
28882963

2889-
xyz, cell_lengths, cell_angles = load_dcd(
2890-
filename, stride=stride, atom_indices=atom_indices, frame=frame
2891-
)
28922964
xyz = np.transpose(xyz, (1, 2, 0))
28932965
if cell_lengths is not None:
28942966
cell_lengths = cell_lengths.T
28952967
if cell_angles is not None:
28962968
cell_angles = cell_angles.T
28972969

2898-
if stride is None:
2899-
stride = 1
2900-
initial = 0
2901-
time = (stride * np.arange(xyz.shape[2])) + initial
29022970
return MolFactory.construct(
29032971
None,
29042972
Trajectory(
29052973
coords=xyz,
29062974
box=cell_lengths,
29072975
boxangles=cell_angles,
2908-
step=range(xyz.shape[2]),
2909-
time=time * 1000, # ps to fs
2976+
step=steps,
2977+
time=steps * delta * 1000, # ps to fs
29102978
),
29112979
filename,
29122980
frame,

moleculekit/writers.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,8 +666,25 @@ def DCDwrite(mol, filename):
666666
else:
667667
boxangles = np.zeros((n_frames, 3), dtype=np.float32)
668668

669+
try:
670+
istart = int(mol.step[0])
671+
nsavc = int(mol.step[1] - mol.step[0])
672+
fstep = mol.fstep * 1000 # ns to ps
673+
delta = fstep / nsavc / 0.04888821 # Conversion factor found in OpenMM
674+
except Exception:
675+
istart = 0
676+
nsavc = 1
677+
delta = 1.0
678+
669679
with DCDTrajectoryFile(filename, "w") as fh:
670-
fh.write(xyz, cell_lengths=box, cell_angles=boxangles)
680+
fh.write(
681+
xyz,
682+
cell_lengths=box,
683+
cell_angles=boxangles,
684+
istart=istart,
685+
nsavc=nsavc,
686+
delta=delta,
687+
)
671688

672689

673690
def BINPOSwrite(mol, filename):

0 commit comments

Comments
 (0)