@@ -2523,15 +2523,14 @@ def SDFread(filename, frame=None, topoloc=None, mol_idx=None):
25232523 "MoleculeKit will only read the first molecule from the SDF file."
25242524 )
25252525
2526+ v3000 = False
25262527 with openFileOrStringIO (filename , "r" ) as f :
25272528 curr_mol = 0
25282529 lines = []
25292530 for line in f :
25302531 lines .append (line )
25312532 if "V3000" in line :
2532- raise RuntimeError (
2533- "Moleculekit does not support parsing V3000 SDF files yet."
2534- )
2533+ v3000 = True
25352534 if line .strip () == "$$$$" :
25362535 if curr_mol == mol_idx :
25372536 break
@@ -2544,61 +2543,119 @@ def SDFread(filename, frame=None, topoloc=None, mol_idx=None):
25442543 f"SDF file contains only { curr_mol } molecules. Cannot read the requested mol_idx { mol_idx } "
25452544 )
25462545
2547- topo = Topology ()
2548- coords = []
2549- mol_start = 0
2550-
2551- molname = lines [0 ].strip ().split ()
2552- if len (molname ):
2553- molname = molname [0 ]
2554- if len (molname ) == 3 :
2555- resname = molname [:3 ].upper ()
2546+ if v3000 :
2547+ topo , coords = parseV3000SDF (lines , chargemap , bondmap )
25562548 else :
2557- resname = "MOL"
2558-
2559- n_atoms = int (lines [mol_start + 3 ][:3 ])
2560- n_bonds = int (lines [mol_start + 3 ][3 :6 ])
2549+ topo = Topology ()
2550+ coords = []
2551+ mol_start = 0
25612552
2562- coord_start = mol_start + 4
2563- bond_start = coord_start + n_atoms
2564- bond_end = bond_start + n_bonds
2565- for n in range (coord_start , bond_start ):
2566- line = lines [n ]
2567- coords .append (
2568- [
2569- float (line [:10 ].strip ()),
2570- float (line [10 :20 ].strip ()),
2571- float (line [20 :30 ].strip ()),
2572- ]
2573- )
2574- atom_symbol = line [31 :34 ].strip ()
2575- topo .record .append ("HETATM" )
2576- topo .element .append (atom_symbol )
2577- topo .name .append (atom_symbol )
2578- topo .serial .append (n - coord_start )
2579- topo .formalcharge .append (chargemap [line [36 :39 ].strip ()])
2580- topo .resname .append (resname )
2553+ molname = lines [0 ].strip ().split ()
2554+ if len (molname ):
2555+ molname = molname [0 ]
2556+ if len (molname ) == 3 :
2557+ resname = molname [:3 ].upper ()
2558+ else :
2559+ resname = "MOL"
25812560
2582- for n in range (bond_start , bond_end ):
2583- line = lines [n ]
2584- idx1 = line [:3 ].strip ()
2585- idx2 = line [3 :6 ].strip ()
2586- bond_type = line [6 :9 ].strip ()
2587- topo .bonds .append ([int (idx1 ) - 1 , int (idx2 ) - 1 ])
2588- topo .bondtype .append (bondmap [bond_type ])
2561+ n_atoms = int (lines [mol_start + 3 ][:3 ])
2562+ n_bonds = int (lines [mol_start + 3 ][3 :6 ])
25892563
2590- for line in lines [bond_end :]:
2591- if line .strip () == "$$$$" :
2592- break
2593- if line .startswith ("M CHG" ): # These charges are more correct
2594- pairs = line .strip ().split ()[3 :]
2595- for cc in range (0 , len (pairs ), 2 ):
2596- topo .formalcharge [int (pairs [cc ]) - 1 ] = int (pairs [cc + 1 ])
2564+ coord_start = mol_start + 4
2565+ bond_start = coord_start + n_atoms
2566+ bond_end = bond_start + n_bonds
2567+ for n in range (coord_start , bond_start ):
2568+ line = lines [n ]
2569+ coords .append (
2570+ [
2571+ float (line [:10 ].strip ()),
2572+ float (line [10 :20 ].strip ()),
2573+ float (line [20 :30 ].strip ()),
2574+ ]
2575+ )
2576+ atom_symbol = line [31 :34 ].strip ()
2577+ topo .record .append ("HETATM" )
2578+ topo .element .append (atom_symbol )
2579+ topo .name .append (atom_symbol )
2580+ topo .serial .append (n - coord_start )
2581+ topo .formalcharge .append (chargemap [line [36 :39 ].strip ()])
2582+ topo .resname .append (resname )
2583+
2584+ for n in range (bond_start , bond_end ):
2585+ line = lines [n ]
2586+ idx1 = line [:3 ].strip ()
2587+ idx2 = line [3 :6 ].strip ()
2588+ bond_type = line [6 :9 ].strip ()
2589+ topo .bonds .append ([int (idx1 ) - 1 , int (idx2 ) - 1 ])
2590+ topo .bondtype .append (bondmap [bond_type ])
2591+
2592+ for line in lines [bond_end :]:
2593+ if line .strip () == "$$$$" :
2594+ break
2595+ if line .startswith ("M CHG" ): # These charges are more correct
2596+ pairs = line .strip ().split ()[3 :]
2597+ for cc in range (0 , len (pairs ), 2 ):
2598+ topo .formalcharge [int (pairs [cc ]) - 1 ] = int (pairs [cc + 1 ])
25972599
25982600 traj = Trajectory (coords = np .vstack (coords ))
25992601 return MolFactory .construct (topo , traj , filename , frame )
26002602
26012603
2604+ def parseV3000SDF (lines , chargemap , bondmap ):
2605+ topo = Topology ()
2606+ coords = []
2607+
2608+ molname = lines [0 ].strip ().split ()
2609+ if len (molname ):
2610+ molname = molname [0 ]
2611+ if len (molname ) == 3 :
2612+ resname = molname [:3 ].upper ()
2613+ else :
2614+ resname = "MOL"
2615+
2616+ section = None
2617+ for line in lines [4 :]:
2618+ line = line .strip ()
2619+ if line == "$$$$" :
2620+ break
2621+ if "v30 end" in line .lower () or "m end" in line .lower ():
2622+ section = None
2623+ continue
2624+ if "begin ctab" in line .lower ():
2625+ section = "ctab"
2626+ continue
2627+ if "begin atom" in line .lower ():
2628+ section = "atom"
2629+ continue
2630+ if "begin bond" in line .lower ():
2631+ section = "bond"
2632+ continue
2633+
2634+ if section is None :
2635+ continue
2636+ if section == "ctab" :
2637+ # M V30 COUNTS 49 53 0 0 0
2638+ if "v30 counts" in line .lower ():
2639+ n_atoms , n_bonds = map (int , line .split ()[3 :5 ])
2640+ if section == "atom" :
2641+ # M V30 1 C 42.4990 45.8550 38.9350 0
2642+ pieces = line .split ()[2 :]
2643+ topo .record .append ("HETATM" )
2644+ topo .element .append (pieces [1 ])
2645+ topo .name .append (pieces [1 ])
2646+ topo .serial .append (int (pieces [0 ]))
2647+ topo .formalcharge .append (chargemap [pieces [5 ]])
2648+ topo .resname .append (resname )
2649+ coords .append ([float (pieces [2 ]), float (pieces [3 ]), float (pieces [4 ])])
2650+ if section == "bond" :
2651+ # M V30 bond_idx bond_type atom1 atom2
2652+ pieces = line .split ()[3 :]
2653+ topo .bonds .append ([int (pieces [1 ]) - 1 , int (pieces [2 ]) - 1 ])
2654+ topo .bondtype .append (bondmap [pieces [0 ]])
2655+
2656+ return topo , coords
2657+
2658+
26022659def sdf_generator (sdffile ):
26032660 """Generates Molecule objects from an SDF file"""
26042661 from io import StringIO
@@ -2884,29 +2941,40 @@ def NETCDFread(
28842941
28852942
28862943def DCDread (filename , frame = None , topoloc = None , stride = None , atom_indices = None ):
2887- from moleculekit .dcd import load_dcd
2944+ from moleculekit .fileformats .utils import cast_indices
2945+ from moleculekit .dcd import DCDTrajectoryFile
2946+
2947+ atom_indices = cast_indices (atom_indices )
2948+ with DCDTrajectoryFile (str (filename )) as f :
2949+ if frame is not None :
2950+ f .seek (frame )
2951+ n_frames = 1
2952+ else :
2953+ n_frames = None
2954+ xyz , cell_lengths , cell_angles = f .read (
2955+ n_frames = n_frames , stride = stride , atom_indices = atom_indices
2956+ )
2957+ istart , nsavc , delta = f .read_header ()
2958+ # Timestep conversion factor found in OpenMM
2959+ delta = np .round (delta * 0.04888821 , decimals = 8 )
2960+ steps = np .arange (istart , (nsavc * xyz .shape [0 ]) + 1 , nsavc )
2961+ if stride is not None :
2962+ steps = steps [::stride ]
28882963
2889- xyz , cell_lengths , cell_angles = load_dcd (
2890- filename , stride = stride , atom_indices = atom_indices , frame = frame
2891- )
28922964 xyz = np .transpose (xyz , (1 , 2 , 0 ))
28932965 if cell_lengths is not None :
28942966 cell_lengths = cell_lengths .T
28952967 if cell_angles is not None :
28962968 cell_angles = cell_angles .T
28972969
2898- if stride is None :
2899- stride = 1
2900- initial = 0
2901- time = (stride * np .arange (xyz .shape [2 ])) + initial
29022970 return MolFactory .construct (
29032971 None ,
29042972 Trajectory (
29052973 coords = xyz ,
29062974 box = cell_lengths ,
29072975 boxangles = cell_angles ,
2908- step = range ( xyz . shape [ 2 ]) ,
2909- time = time * 1000 , # ps to fs
2976+ step = steps ,
2977+ time = steps * delta * 1000 , # ps to fs
29102978 ),
29112979 filename ,
29122980 frame ,
0 commit comments