Skip to content

Commit

Permalink
Merge pull request #137 from treasure-data/chunk-size-cap
Browse files Browse the repository at this point in the history
Cap tempfile number up to 200
  • Loading branch information
chezou authored Sep 16, 2024
2 parents b560475 + f597395 commit 0ed76b4
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions pytd/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,13 @@ def write_dataframe(
fps.append(fp)
elif fmt == "msgpack":
_replace_pd_na(dataframe)

num_rows = len(dataframe)
# chunk number of records should not exceed 200 to avoid OSError
_chunk_record_size = max(chunk_record_size, num_rows//200)
try:
for start in range(0, len(dataframe), chunk_record_size):
for start in range(0, num_rows, _chunk_record_size):
records = dataframe.iloc[
start : start + chunk_record_size
start : start + _chunk_record_size
].to_dict(orient="records")
fp = tempfile.NamedTemporaryFile(
suffix=".msgpack.gz", delete=False
Expand Down

0 comments on commit 0ed76b4

Please sign in to comment.