-
Notifications
You must be signed in to change notification settings - Fork 225
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: read_off replace pandas.read_csv engine=python with c
Big performance improvement by removing the need to use the slow `engine="python"` by reading the sliced file from an in-memory StringIO buffer. Also fixes bug where OFF files containing more lines than `num_points + num_faces` tries to read potential edges as faces! As [Wikipedia] says, the OFF file may contain: - points - faces (optional) - edges (optional) Of course, this still does not encompass all possible OFF file variants described by Wikipedia, but it's an improvement. [Wikipedia]: https://en.wikipedia.org/wiki/OFF_(file_format)
- Loading branch information
1 parent
cab0c7f
commit e968956
Showing
1 changed file
with
51 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,80 @@ | ||
import pandas as pd | ||
from contextlib import contextmanager | ||
from io import StringIO | ||
from itertools import islice | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
def read_off(filename): | ||
|
||
with open(filename) as off: | ||
|
||
first_line = off.readline() | ||
with open(filename) as f: | ||
first_line = f.readline() | ||
if "OFF" not in first_line: | ||
raise ValueError('The file does not start with the word OFF') | ||
color = True if "C" in first_line else False | ||
raise ValueError("The file does not start with the word OFF") | ||
has_color = "C" in first_line | ||
|
||
n_points = 0 | ||
n_faces = 0 | ||
num_rows = None | ||
n_points = None | ||
n_faces = None | ||
|
||
count = 1 | ||
for line in off: | ||
count += 1 | ||
# Read header. | ||
for line in f: | ||
if line.startswith("#"): | ||
continue | ||
line = line.strip().split() | ||
if len(line) > 1: | ||
n_points = int(line[0]) | ||
n_faces = int(line[1]) | ||
break | ||
if len(line) <= 1: | ||
continue | ||
n_points = int(line[0]) | ||
n_faces = int(line[1]) | ||
num_rows = n_points + n_faces | ||
break | ||
|
||
if (n_points == 0): | ||
raise ValueError('The file has no points') | ||
if num_rows is None: | ||
raise ValueError("The file does not contain a valid header") | ||
|
||
data = {} | ||
point_names = ["x", "y", "z"] | ||
point_types = {'x': np.float32, 'y': np.float32, 'z': np.float32} | ||
# Read remaining lines. | ||
lines = [next(f) for _ in range(num_rows)] | ||
|
||
if color: | ||
point_names.extend(["red", "green", "blue"]) | ||
point_types = dict(point_types, **{'red': np.uint8, 'green': np.uint8, 'blue': np.uint8}) | ||
if n_points == 0: | ||
raise ValueError("The file has no points") | ||
|
||
data = {} | ||
point_names = ["x", "y", "z"] | ||
point_types = {"x": np.float32, "y": np.float32, "z": np.float32} | ||
|
||
if has_color: | ||
point_names.extend(["red", "green", "blue"]) | ||
color_point_types = {"red": np.uint8, "green": np.uint8, "blue": np.uint8} | ||
point_types = {**point_types, **color_point_types} | ||
|
||
with _file_from_lines(lines, 0, n_points) as f: | ||
data["points"] = pd.read_csv( | ||
off, | ||
f, | ||
sep=" ", | ||
header=None, | ||
engine="c", | ||
nrows=n_points, | ||
names=point_names, | ||
dtype=point_types, | ||
index_col=False, | ||
comment="#" | ||
comment="#", | ||
) | ||
|
||
with _file_from_lines(lines, n_points, n_points + n_faces) as f: | ||
data["mesh"] = pd.read_csv( | ||
filename, | ||
f, | ||
sep=" ", | ||
header=None, | ||
engine="c", | ||
skiprows=(count + n_points), | ||
nrows=n_faces, | ||
usecols=[1, 2, 3], | ||
names=["v1", "v2", "v3"], | ||
comment="#" | ||
comment="#", | ||
) | ||
return data | ||
|
||
return data | ||
|
||
|
||
@contextmanager | ||
def _file_from_lines(lines, start=None, stop=None): | ||
with StringIO() as f: | ||
f.writelines("".join(islice(lines, start, stop))) | ||
f.seek(0) | ||
yield f |