Skip to content

Commit 12ee9f2

Browse files
committed
fix: read_off improve robustness of header parsing
Improve robustness of header parsing a bit. In particular, ModelNet40 has faulty headers: ```bash $ head -n 1 ModelNet40/chair/train/chair_0856.off OFF6586 5534 0 ``` For reference, the correct format is: ``` OFF 6586 5534 0 ``` Nonetheless, it is still valuable to parse the faulty header. Also, reuse already open file for reading instead of opening it twice.
1 parent 248fd0f commit 12ee9f2

File tree

1 file changed

+48
-27
lines changed

1 file changed

+48
-27
lines changed

pyntcloud/io/off.py

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,83 @@
1-
import pandas as pd
1+
import re
2+
23
import numpy as np
4+
import pandas as pd
35

46

57
def read_off(filename):
6-
7-
with open(filename) as off:
8-
9-
first_line = off.readline()
8+
with open(filename) as f:
9+
first_line = f.readline()
1010
if "OFF" not in first_line:
11-
raise ValueError('The file does not start with the word OFF')
12-
color = True if "C" in first_line else False
11+
raise ValueError("The file does not start with the word OFF")
12+
has_color = "C" in first_line
13+
14+
num_rows = None
15+
n_points = None
16+
n_faces = None
17+
n_header = 1
1318

14-
n_points = 0
15-
n_faces = 0
19+
# Backtrack to account for faulty headers, e.g. "OFF4 4 0".
20+
m = re.match(r"^(?P<prefix>\D+)([\d\s]+)$", first_line)
21+
if m:
22+
f.seek(len(m.group("prefix")))
23+
n_header = 0
1624

17-
count = 1
18-
for line in off:
19-
count += 1
25+
# Read header.
26+
for line in f:
27+
n_header += 1
2028
if line.startswith("#"):
2129
continue
2230
line = line.strip().split()
23-
if len(line) > 1:
24-
n_points = int(line[0])
25-
n_faces = int(line[1])
26-
break
31+
if len(line) <= 1:
32+
continue
33+
n_points = int(line[0])
34+
n_faces = int(line[1])
35+
num_rows = n_points + n_faces
36+
break
37+
38+
if num_rows is None:
39+
raise ValueError("The file does not contain a valid header")
2740

28-
if (n_points == 0):
29-
raise ValueError('The file has no points')
41+
if n_points == 0:
42+
raise ValueError("The file contains no points")
3043

3144
data = {}
3245
point_names = ["x", "y", "z"]
33-
point_types = {'x': np.float32, 'y': np.float32, 'z': np.float32}
46+
point_types = {"x": np.float32, "y": np.float32, "z": np.float32}
3447

35-
if color:
48+
if has_color:
3649
point_names.extend(["red", "green", "blue"])
37-
point_types = dict(point_types, **{'red': np.uint8, 'green': np.uint8, 'blue': np.uint8})
50+
color_point_types = {"red": np.uint8, "green": np.uint8, "blue": np.uint8}
51+
point_types = {**point_types, **color_point_types}
3852

3953
data["points"] = pd.read_csv(
40-
off,
54+
f,
4155
sep=" ",
4256
header=None,
4357
engine="c",
4458
nrows=n_points,
4559
names=point_names,
4660
dtype=point_types,
4761
index_col=False,
48-
comment="#"
62+
comment="#",
4963
)
5064

65+
assert len(data["points"]) == n_points
66+
67+
f.seek(0)
68+
5169
data["mesh"] = pd.read_csv(
52-
filename,
70+
f,
5371
sep=" ",
5472
header=None,
5573
engine="c",
56-
skiprows=(count + n_points),
74+
skiprows=n_header + n_points,
5775
nrows=n_faces,
5876
usecols=[1, 2, 3],
5977
names=["v1", "v2", "v3"],
60-
comment="#"
78+
comment="#",
6179
)
62-
return data
80+
81+
assert len(data["mesh"]) == n_faces
82+
83+
return data

0 commit comments

Comments
 (0)