Skip to content

Commit

Permalink
perf: read_off replace pandas.read_csv engine=python with c
Browse files Browse the repository at this point in the history
Big performance improvement by removing the need to use the slow
`engine="python"` by reading the sliced file from an in-memory
StringIO buffer.

Also fixes bug where OFF files containing more lines than
`num_points + num_faces` tries to read potential edges as faces!

As [Wikipedia] says, the OFF file may contain:

- points
- faces (optional)
- edges (optional)

Of course, this still does not encompass all possible OFF file variants
described by Wikipedia, but it's an improvement.

[Wikipedia]: https://en.wikipedia.org/wiki/OFF_(file_format)
  • Loading branch information
YodaEmbedding committed Dec 24, 2023
1 parent cab0c7f commit e968956
Showing 1 changed file with 51 additions and 33 deletions.
84 changes: 51 additions & 33 deletions pyntcloud/io/off.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,80 @@
import pandas as pd
from contextlib import contextmanager
from io import StringIO
from itertools import islice

import numpy as np
import pandas as pd


def read_off(filename):

with open(filename) as off:

first_line = off.readline()
with open(filename) as f:
first_line = f.readline()
if "OFF" not in first_line:
raise ValueError('The file does not start with the word OFF')
color = True if "C" in first_line else False
raise ValueError("The file does not start with the word OFF")
has_color = "C" in first_line

n_points = 0
n_faces = 0
num_rows = None
n_points = None
n_faces = None

count = 1
for line in off:
count += 1
# Read header.
for line in f:
if line.startswith("#"):
continue
line = line.strip().split()
if len(line) > 1:
n_points = int(line[0])
n_faces = int(line[1])
break
if len(line) <= 1:
continue
n_points = int(line[0])
n_faces = int(line[1])
num_rows = n_points + n_faces
break

if (n_points == 0):
raise ValueError('The file has no points')
if num_rows is None:
raise ValueError("The file does not contain a valid header")

data = {}
point_names = ["x", "y", "z"]
point_types = {'x': np.float32, 'y': np.float32, 'z': np.float32}
# Read remaining lines.
lines = [next(f) for _ in range(num_rows)]

if color:
point_names.extend(["red", "green", "blue"])
point_types = dict(point_types, **{'red': np.uint8, 'green': np.uint8, 'blue': np.uint8})
if n_points == 0:
raise ValueError("The file has no points")

data = {}
point_names = ["x", "y", "z"]
point_types = {"x": np.float32, "y": np.float32, "z": np.float32}

if has_color:
point_names.extend(["red", "green", "blue"])
color_point_types = {"red": np.uint8, "green": np.uint8, "blue": np.uint8}
point_types = {**point_types, **color_point_types}

with _file_from_lines(lines, 0, n_points) as f:
data["points"] = pd.read_csv(
off,
f,
sep=" ",
header=None,
engine="c",
nrows=n_points,
names=point_names,
dtype=point_types,
index_col=False,
comment="#"
comment="#",
)

with _file_from_lines(lines, n_points, n_points + n_faces) as f:
data["mesh"] = pd.read_csv(
filename,
f,
sep=" ",
header=None,
engine="c",
skiprows=(count + n_points),
nrows=n_faces,
usecols=[1, 2, 3],
names=["v1", "v2", "v3"],
comment="#"
comment="#",
)
return data

return data


@contextmanager
def _file_from_lines(lines, start=None, stop=None):
with StringIO() as f:
f.writelines("".join(islice(lines, start, stop)))
f.seek(0)
yield f

0 comments on commit e968956

Please sign in to comment.