Skip to content

Commit e968956

Browse files
committed
perf: read_off replace pandas.read_csv engine=python with c
Big performance improvement by removing the need to use the slow `engine="python"` by reading the sliced file from an in-memory StringIO buffer. Also fixes bug where OFF files containing more lines than `num_points + num_faces` tries to read potential edges as faces! As [Wikipedia] says, the OFF file may contain: - points - faces (optional) - edges (optional) Of course, this still does not encompass all possible OFF file variants described by Wikipedia, but it's an improvement. [Wikipedia]: https://en.wikipedia.org/wiki/OFF_(file_format)
1 parent cab0c7f commit e968956

File tree

1 file changed

+51
-33
lines changed

1 file changed

+51
-33
lines changed

pyntcloud/io/off.py

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,80 @@
1-
import pandas as pd
1+
from contextlib import contextmanager
2+
from io import StringIO
3+
from itertools import islice
4+
25
import numpy as np
6+
import pandas as pd
37

48

59
def read_off(filename):
6-
7-
with open(filename) as off:
8-
9-
first_line = off.readline()
10+
with open(filename) as f:
11+
first_line = f.readline()
1012
if "OFF" not in first_line:
11-
raise ValueError('The file does not start with the word OFF')
12-
color = True if "C" in first_line else False
13+
raise ValueError("The file does not start with the word OFF")
14+
has_color = "C" in first_line
1315

14-
n_points = 0
15-
n_faces = 0
16+
num_rows = None
17+
n_points = None
18+
n_faces = None
1619

17-
count = 1
18-
for line in off:
19-
count += 1
20+
# Read header.
21+
for line in f:
2022
if line.startswith("#"):
2123
continue
2224
line = line.strip().split()
23-
if len(line) > 1:
24-
n_points = int(line[0])
25-
n_faces = int(line[1])
26-
break
25+
if len(line) <= 1:
26+
continue
27+
n_points = int(line[0])
28+
n_faces = int(line[1])
29+
num_rows = n_points + n_faces
30+
break
2731

28-
if (n_points == 0):
29-
raise ValueError('The file has no points')
32+
if num_rows is None:
33+
raise ValueError("The file does not contain a valid header")
3034

31-
data = {}
32-
point_names = ["x", "y", "z"]
33-
point_types = {'x': np.float32, 'y': np.float32, 'z': np.float32}
35+
# Read remaining lines.
36+
lines = [next(f) for _ in range(num_rows)]
3437

35-
if color:
36-
point_names.extend(["red", "green", "blue"])
37-
point_types = dict(point_types, **{'red': np.uint8, 'green': np.uint8, 'blue': np.uint8})
38+
if n_points == 0:
39+
raise ValueError("The file has no points")
3840

41+
data = {}
42+
point_names = ["x", "y", "z"]
43+
point_types = {"x": np.float32, "y": np.float32, "z": np.float32}
44+
45+
if has_color:
46+
point_names.extend(["red", "green", "blue"])
47+
color_point_types = {"red": np.uint8, "green": np.uint8, "blue": np.uint8}
48+
point_types = {**point_types, **color_point_types}
49+
50+
with _file_from_lines(lines, 0, n_points) as f:
3951
data["points"] = pd.read_csv(
40-
off,
52+
f,
4153
sep=" ",
4254
header=None,
4355
engine="c",
44-
nrows=n_points,
4556
names=point_names,
46-
dtype=point_types,
4757
index_col=False,
48-
comment="#"
58+
comment="#",
4959
)
5060

61+
with _file_from_lines(lines, n_points, n_points + n_faces) as f:
5162
data["mesh"] = pd.read_csv(
52-
filename,
63+
f,
5364
sep=" ",
5465
header=None,
5566
engine="c",
56-
skiprows=(count + n_points),
57-
nrows=n_faces,
5867
usecols=[1, 2, 3],
5968
names=["v1", "v2", "v3"],
60-
comment="#"
69+
comment="#",
6170
)
62-
return data
71+
72+
return data
73+
74+
75+
@contextmanager
76+
def _file_from_lines(lines, start=None, stop=None):
77+
with StringIO() as f:
78+
f.writelines("".join(islice(lines, start, stop)))
79+
f.seek(0)
80+
yield f

0 commit comments

Comments
 (0)