Skip to content

Commit 6f94362

Browse files
authored
Merge pull request #19 from iwyoo/user-given-temp-filepath
feat: Add temporary-file-path argument for multi-processing
2 parents 85af817 + 0e3b890 commit 6f94362

File tree

4 files changed

+24
-6
lines changed

4 files changed

+24
-6
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ smiles_str = "CC"
3838
allow_bad_stereo = False,
3939
wildcard_radicals = False,
4040
jar_fpath = "/path/to/opsin.jar",
41+
tmp_fpath = "py2opsin_temp_input.txt",
4142
)
4243
```
4344

44-
The result is returned as a Python string, or False if an unexpected error occurs when calling OPSIN. If a list of IUPAC names is provided, a list is returned. It is __highly__ reccomended to use `py2opsin` in this manner if you need to resolve any more than a couple names -- the performance cost of running `OPSIN` from Python one name at a time is significant (~5 seconds/molecule individually, milliseconds otherwise).
45+
The result is returned as a Python string, or False if an unexpected error occurs when calling OPSIN. If a list of IUPAC names is provided, a list is returned. It is __highly__ recommended to use `py2opsin` in this manner if you need to resolve any more than a couple names -- the performance cost of running `OPSIN` from Python one name at a time is significant (~5 seconds/molecule individually, milliseconds otherwise).
4546

4647
Arguments:
4748
- chemical_name (str): IUPAC name of chemical as a Python string, or a list of strings.
@@ -51,7 +52,10 @@ Arguments:
5152
- allow_bad_stereo (bool, optional): Allow OPSIN to ignore uninterpreatable stereochem. Defaults to False.
5253
- wildcard_radicals (bool, optional): Output radicals as wildcards. Defaults to False.
5354
- jar_fpath (str, optional): Filepath to OPSIN jar file. Defaults to "opsin-cli.jar" which is distributed with py2opsin.
55+
- tmp_fpath (str, optional): tmp_fpath (str, optional): Name for temporary file used for calling OPSIN. Defaults to "py2opsin_temp_input.txt". When multiprocessing, set this to a unique name for each process.
5456

57+
> [!TIP]
58+
> `OPSIN` will already parallelize itself by creating multiple threads! Be wary when using `py2opsin` with multiprocessing to avoid spawning too many processes.
5559
5660
## Massive speedup from `pubchempy` for batch translations
5761
`py2opsin` runs locally and is smaller in scope in what it provides, which makes it __dramatically__ faster at resolving identifiers. In the code block below, the call to `py2opsin` will execute faster than an equivalent call to `pubchempy`:

py2opsin/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from .py2opsin import py2opsin
22

3-
__version__ = "1.0.6"
3+
__version__ = "1.1.0"

py2opsin/py2opsin.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def py2opsin(
5353
allow_bad_stereo: bool = False,
5454
wildcard_radicals: bool = False,
5555
jar_fpath: str = "default",
56+
tmp_fpath: str = "py2opsin_temp_input.txt",
5657
) -> str:
5758
"""Simple passthrough to opsin, returning results as Python strings.
5859
@@ -65,6 +66,8 @@ def py2opsin(
6566
allow_bad_stereo (bool, optional): Allow OPSIN to ignore uninterpreatable stereochem. Defaults to False.
6667
wildcard_radicals (bool, optional): Output radicals as wildcards. Defaults to False.
6768
jar_fpath (str, optional): Filepath to OPSIN jar file. Defaults to "default", which causes py2opsin to use its included jar.
69+
tmp_fpath (str, optional): Name for temporary file used for calling OPSIN. Defaults to "py2opsin_temp_input.txt".
70+
When multiprocessing, set this to a unique name for each process.
6871
6972
Returns:
7073
str: Species in requested format, or False if not found or an error ocurred. List of strings if input is list.
@@ -112,15 +115,14 @@ def py2opsin(
112115
)
113116

114117
# write the input to a text file
115-
temp_f = "py2opsin_temp_input.txt"
116-
with open(temp_f, "w") as file:
118+
with open(tmp_fpath, "w") as file:
117119
if type(chemical_name) is str:
118120
file.write(chemical_name)
119121
else:
120122
file.writelines("\n".join(chemical_name) + "\n")
121123

122124
# add the temporary file to the args
123-
arg_list.append(temp_f)
125+
arg_list.append(tmp_fpath)
124126

125127
# grab the optional boolean flags
126128
if allow_acid:
@@ -168,4 +170,4 @@ def py2opsin(
168170
warnings.warn("Unexpected error ocurred! " + e)
169171
return False
170172
finally:
171-
os.remove(temp_f)
173+
os.remove(tmp_fpath)

test/test_py2opsin.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
import os
2+
import multiprocessing
23
import sys
34
import unittest
45

56
from py2opsin import py2opsin
67

78

9+
# multiprocessing test function
10+
def _f(b):
11+
return py2opsin(b[0], tmp_fpath=f"tmp_{b[1]}.txt")
12+
13+
814
class Test_py2opsin(unittest.TestCase):
915
"""
1016
Test the various functionalities of py2opsin.
@@ -97,6 +103,12 @@ def test_invalid_output_helpful_error(self):
97103
"Output format SMOLES is invalid. Did you mean 'SMILES'?",
98104
)
99105

106+
def test_multiprocessing(self):
107+
"""py2opsin should safely work when run with multiprocessing"""
108+
with multiprocessing.Pool(2) as pool:
109+
res = pool.map(_f, [("methanol", 0), ("ethanol", 1)])
110+
self.assertEqual(res, ["CO", "C(C)O"])
111+
100112
def test_name_to_smiles(self):
101113
"""
102114
Tests converting IUPAC names to SMILES strings

0 commit comments

Comments
 (0)