Skip to content

Commit ab6d54d

Browse files
chrisiacovellaChristopher Iacovellapre-commit-ci[bot]daico007Christopher Iacovella
authored
print the hierarchy of a compound (#1097)
* replaced bondgraph with networkx * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * misc fix for docs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * use more particles in residue map testing to ensure actually faster * reduced number of children and grandchildren in test_nested_compound * commented out silica_interface test * added in print hierarchy functionality * changed duplicate checking to also look at names of child compoudns and names of the particles contained by the given compound * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added more tests for print hierarchy * commented out test silica interface test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * small edits to tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added in test for missed lines * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update mbuild/compound.py marking argument as optional Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com> * Update mbuild/compound.py marking argument as optional Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com> * Update mbuild/compound.py marking argument as optional Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com> * addressed PR comments * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * addressed minor docs formatting * added in treelib to environment-docs.yml file * tried to fix readthedocs issue unrelated to these changes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Christopher Iacovella <cri@MB22.local> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com> Co-authored-by: Co Quach <daico007@gmail.com> Co-authored-by: Christopher Iacovella <cri@MB22.hsd1.or.comcast.net>
1 parent 00d8c77 commit ab6d54d

File tree

6 files changed

+286
-0
lines changed

6 files changed

+286
-0
lines changed

docs/environment-docs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ dependencies:
1515
- ele
1616
- sphinx_rtd_theme
1717
- widgetsnbextension
18+
- treelib
1819
- pip:
1920
- sphinxcontrib-svg2pdfconverter[CairoSVG]
2021
- cairosvg

environment-dev-win.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ dependencies:
2828
- python<3.10
2929
- rdkit>=2021
3030
- scipy
31+
- treelib

environment-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ dependencies:
3030
- python<3.10
3131
- rdkit>=2021
3232
- scipy
33+
- treelib

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ dependencies:
1010
- python<3.10
1111
- rdkit>=2021
1212
- scipy
13+
- treelib

mbuild/compound.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import numpy as np
1616
from ele.element import Element, element_from_name, element_from_symbol
1717
from ele.exceptions import ElementError
18+
from treelib import Tree
1819

1920
from mbuild import conversion
2021
from mbuild.bond_graph import BondGraph
@@ -278,6 +279,156 @@ def _contains_only_ports(self):
278279
return False
279280
return True
280281

282+
def print_hierarchy(self, print_full=False, index=None, show_tree=True):
283+
"""Print the hierarchy of the Compound.
284+
285+
Parameters
286+
----------
287+
print_full: bool, optional, default=False
288+
The full hierarchy will be printed, rather than condensing
289+
compounds with identical topologies.
290+
Topologies are considered identical if they have the same name,
291+
contain the number and names of children,
292+
contain the same number and names of particles,
293+
and the same number of bonds.
294+
index: int, optional, default=None
295+
Print the branch of the first level of the hiearchy
296+
corresponding to the value specified by index.
297+
This only applies when print_full is True.
298+
show_tree: bool, optional, default=True
299+
If False, do not print the tree to the screen.
300+
301+
Returns
302+
-------
303+
tree, treelib.tree.Tree, hierarchy of the compound as a tree
304+
305+
"""
306+
tree = Tree()
307+
308+
# loop through the hierarchy saving the data to an array hh
309+
if print_full:
310+
hh = [h for h in self._get_hierarchy()]
311+
else:
312+
hh = [h for h in self._get_hierarchy_nodup()]
313+
314+
# if our compound does not have any children we need to call n_direct_bonds instead of n_bonds
315+
if len(self.children) == 0:
316+
n_bonds = self.n_direct_bonds
317+
else:
318+
n_bonds = self.n_bonds
319+
320+
# add the top level compound to create the top level of the tree
321+
# note that node identifiers passed as the second argument
322+
# correspond to the compound id
323+
tree.create_node(
324+
f"{self.name}, {self.n_particles} particles, {n_bonds} bonds, {len(self.children)} children",
325+
f"{id(self)}",
326+
)
327+
328+
# if index is specified, ensure we are not selecting an index out of range
329+
if not index is None:
330+
if index >= len(self.children):
331+
raise MBuildError(
332+
f"Index {index} out of range. The number of first level nodes in the tree is {len(self.children)}."
333+
)
334+
335+
count = -1
336+
337+
for h in hh:
338+
if len(h["comp"].children) == 0:
339+
n_bonds = h["comp"].n_direct_bonds
340+
else:
341+
n_bonds = h["comp"].n_bonds
342+
if h["level"] == 0:
343+
count = count + 1
344+
if print_full:
345+
if index is None:
346+
tree.create_node(
347+
f"[{h['comp'].name}]: {h['comp'].n_particles} particles, {n_bonds} bonds, {len(h['comp'].children)} children",
348+
f"{h['comp_id']}",
349+
f"{h['parent_id']}",
350+
)
351+
elif count == index:
352+
tree.create_node(
353+
f"[{h['comp'].name}]: {h['comp'].n_particles} particles, {n_bonds} bonds, {len(h['comp'].children)} children",
354+
f"{h['comp_id']}",
355+
f"{h['parent_id']}",
356+
)
357+
else:
358+
tree.create_node(
359+
f"[{h['comp'].name} x {h['n_dup']}], {h['comp'].n_particles} particles, {n_bonds} bonds, {len(h['comp'].children)} children",
360+
f"{h['comp_id']}",
361+
f"{h['parent_id']}",
362+
)
363+
if show_tree:
364+
tree.show()
365+
return tree
366+
367+
def _get_hierarchy(self, level=0):
368+
"""Return an array of dictionaries corresponding to hierarchy of the compound, recursively."""
369+
if not self.children:
370+
return
371+
for child in self.children:
372+
yield {
373+
"level": level,
374+
"parent_id": id(self),
375+
"comp_id": id(child),
376+
"comp": child,
377+
}
378+
for subchild in child._get_hierarchy(level + 1):
379+
yield subchild
380+
381+
def _get_hierarchy_nodup(self, level=0):
382+
"""Return an array of dictionaries corresponding to hierarchy of the compound, recursively.
383+
384+
This routine will identify any duplicate compounds at a given level, including the number of
385+
duplicates for each compound. Compounds are considered to be identical if the name,
386+
number of children, and number of particles are the same at the same level.
387+
"""
388+
if not self.children:
389+
return
390+
391+
duplicates = {}
392+
for child in self.children:
393+
part_string = "".join([part.name for part in child.particles()])
394+
child_string = "".join([child.name for child in child.children])
395+
396+
if len(child.children) == 0:
397+
n_bonds = child.n_direct_bonds
398+
else:
399+
n_bonds = child.n_bonds
400+
401+
identifier = f"{child.name}_{len(child.children)}_{child_string}_{child.n_particles}_{part_string}_{n_bonds}"
402+
403+
if not identifier in duplicates:
404+
duplicates[identifier] = [1, True]
405+
else:
406+
duplicates[identifier][0] += 1
407+
408+
for child in self.children:
409+
part_string = "".join([part.name for part in child.particles()])
410+
child_string = "".join([child.name for child in child.children])
411+
412+
if len(child.children) == 0:
413+
n_bonds = child.n_direct_bonds
414+
else:
415+
n_bonds = child.n_bonds
416+
417+
identifier = f"{child.name}_{len(child.children)}_{child_string}_{child.n_particles}_{part_string}_{n_bonds}"
418+
419+
if duplicates[identifier][1]:
420+
yield {
421+
"level": level,
422+
"parent_id": id(self),
423+
"comp_id": id(child),
424+
"comp": child,
425+
"n_dup": duplicates[identifier][0],
426+
}
427+
428+
for subchild in child._get_hierarchy_nodup(level + 1):
429+
yield subchild
430+
duplicates[identifier][1] = False
431+
281432
def ancestors(self):
282433
"""Generate all ancestors of the Compound recursively.
283434

mbuild/tests/test_compound.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,137 @@ def test_direct_bonds_cloning(self, ethane):
138138
for p1, p2 in zip(ethane.particles(), ethane_clone.particles()):
139139
assert p1.n_direct_bonds == p2.n_direct_bonds
140140

141+
def test_hierarchy(self, ethane):
142+
# first check the hierarchy returned where we don't print duplicates
143+
ethane_hierarchy = ethane._get_hierarchy_nodup()
144+
eh = [t for t in ethane_hierarchy]
145+
assert len(eh) == 3
146+
assert eh[0]["level"] == 0
147+
assert eh[0]["parent_id"] == id(ethane)
148+
assert eh[0]["comp_id"] == id(ethane.children[0])
149+
assert eh[0]["comp"] == ethane.children[0]
150+
151+
assert eh[0]["n_dup"] == 2
152+
assert eh[1]["level"] == 1
153+
assert eh[1]["parent_id"] == id(ethane.children[0])
154+
assert eh[1]["comp_id"] == id(ethane.children[0].children[0])
155+
assert eh[1]["comp"] == ethane.children[0].children[0]
156+
assert eh[1]["n_dup"] == 1
157+
158+
assert eh[2]["level"] == 1
159+
assert eh[2]["parent_id"] == id(ethane.children[0])
160+
assert eh[2]["comp_id"] == id(ethane.children[0].children[1])
161+
assert eh[2]["comp"] == ethane.children[0].children[1]
162+
assert eh[2]["n_dup"] == 3
163+
164+
# now check the hierarchy returned with duplicates
165+
ethane_hierarchy_full = ethane._get_hierarchy()
166+
ehf = [t for t in ethane_hierarchy_full]
167+
assert ehf[0]["level"] == 0
168+
assert ehf[1]["level"] == 1
169+
assert ehf[2]["level"] == 1
170+
assert ehf[3]["level"] == 1
171+
assert ehf[4]["level"] == 1
172+
assert ehf[5]["level"] == 0
173+
assert ehf[6]["level"] == 1
174+
assert ehf[7]["level"] == 1
175+
assert ehf[8]["level"] == 1
176+
assert ehf[9]["level"] == 1
177+
178+
assert len(ehf) == 10
179+
assert ehf[0]["parent_id"] == id(ethane)
180+
assert ehf[0]["comp_id"] == id(ethane.children[0])
181+
assert ehf[0]["comp"] == ethane.children[0]
182+
assert ehf[1]["parent_id"] == id(ethane.children[0])
183+
assert ehf[1]["comp_id"] == id(ethane.children[0].children[0])
184+
assert ehf[1]["comp"] == ethane.children[0].children[0]
185+
assert ehf[2]["parent_id"] == id(ethane.children[0])
186+
assert ehf[2]["comp_id"] == id(ethane.children[0].children[1])
187+
assert ehf[2]["comp"] == ethane.children[0].children[1]
188+
assert ehf[3]["parent_id"] == id(ethane.children[0])
189+
assert ehf[3]["comp_id"] == id(ethane.children[0].children[2])
190+
assert ehf[3]["comp"] == ethane.children[0].children[2]
191+
assert ehf[4]["parent_id"] == id(ethane.children[0])
192+
assert ehf[4]["comp_id"] == id(ethane.children[0].children[3])
193+
assert ehf[4]["comp"] == ethane.children[0].children[3]
194+
assert ehf[5]["parent_id"] == id(ethane)
195+
assert ehf[5]["comp_id"] == id(ethane.children[1])
196+
assert ehf[5]["comp"] == ethane.children[1]
197+
assert ehf[6]["parent_id"] == id(ethane.children[1])
198+
assert ehf[6]["comp_id"] == id(ethane.children[1].children[0])
199+
assert ehf[6]["comp"] == ethane.children[1].children[0]
200+
assert ehf[7]["parent_id"] == id(ethane.children[1])
201+
assert ehf[7]["comp_id"] == id(ethane.children[1].children[1])
202+
assert ehf[7]["comp"] == ethane.children[1].children[1]
203+
assert ehf[8]["parent_id"] == id(ethane.children[1])
204+
assert ehf[8]["comp_id"] == id(ethane.children[1].children[2])
205+
assert ehf[8]["comp"] == ethane.children[1].children[2]
206+
assert ehf[9]["parent_id"] == id(ethane.children[1])
207+
assert ehf[9]["comp_id"] == id(ethane.children[1].children[3])
208+
assert ehf[9]["comp"] == ethane.children[1].children[3]
209+
210+
# examine the tree output from print_hierarchy
211+
ethane_tree = ethane.print_hierarchy()
212+
assert ethane_tree.depth() == 2
213+
tree_json = ethane_tree.to_json(with_data=False)
214+
assert (
215+
tree_json
216+
== '{"Ethane, 8 particles, 7 bonds, 2 children": {"children": [{"[CH3 x 2], 4 particles, 3 bonds, 4 children": {"children": ["[C x 1], 1 particles, 4 bonds, 0 children", "[H x 3], 1 particles, 1 bonds, 0 children"]}}]}}'
217+
)
218+
219+
ethane_tree_full = ethane.print_hierarchy(print_full=True)
220+
assert ethane_tree_full.depth() == 2
221+
tree_json_full = ethane_tree_full.to_json(with_data=False)
222+
assert (
223+
tree_json_full
224+
== '{"Ethane, 8 particles, 7 bonds, 2 children": {"children": [{"[CH3]: 4 particles, 3 bonds, 4 children": {"children": ["[C]: 1 particles, 4 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children"]}}, {"[CH3]: 4 particles, 3 bonds, 4 children": {"children": ["[C]: 1 particles, 4 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children"]}}]}}'
225+
)
226+
227+
ethane_tree_full_index = ethane.print_hierarchy(
228+
print_full=True, index=0
229+
)
230+
assert ethane_tree_full_index.depth() == 2
231+
tree_json_full_index = ethane_tree_full_index.to_json(with_data=False)
232+
assert (
233+
tree_json_full_index
234+
== '{"Ethane, 8 particles, 7 bonds, 2 children": {"children": [{"[CH3]: 4 particles, 3 bonds, 4 children": {"children": ["[C]: 1 particles, 4 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children"]}}]}}'
235+
)
236+
237+
system = mb.Compound()
238+
system.add(mb.clone(ethane))
239+
system.add(mb.clone(ethane))
240+
241+
system_hierarchy = system._get_hierarchy_nodup()
242+
243+
sh_array = [t for t in system_hierarchy]
244+
assert len(sh_array) == 4
245+
246+
# let us change the name to ensure that it doens't see it as a duplicate
247+
system.children[0].name = "Ethane_new_name"
248+
system_hierarchy = system._get_hierarchy_nodup()
249+
250+
sh_array = [t for t in system_hierarchy]
251+
assert len(sh_array) == 8
252+
253+
# make sure we throw an error if we try to index out of range
254+
with pytest.raises(MBuildError):
255+
system.print_hierarchy(print_full=True, index=10)
256+
temp_particle = mb.Compound(name="C", element="C")
257+
temp_tree = temp_particle.print_hierarchy()
258+
assert temp_tree.depth() == 0
259+
260+
def test_show_hierarchy(self, capsys):
261+
# test that the output written to the screen is correct
262+
temp_particle = mb.Compound(name="C", element="C")
263+
temp_particle.print_hierarchy()
264+
265+
captured = capsys.readouterr()
266+
assert captured.out.strip() == "C, 1 particles, 0 bonds, 0 children"
267+
268+
temp_particle.print_hierarchy(show_tree=False)
269+
captured = capsys.readouterr()
270+
assert captured.out.strip() == ""
271+
141272
def test_load_protein(self):
142273
# Testing the loading function with complicated protein,
143274
# The protein file is taken from RCSB protein data bank

0 commit comments

Comments
 (0)