From 04216eab4f2a15fc14f78f9c51c14493ae691911 Mon Sep 17 00:00:00 2001 From: Jesper Glintborg Date: Sun, 19 Nov 2023 08:37:24 +0100 Subject: [PATCH 1/2] #407 added option to reduce flushing of streams during write operation. --- src/Parquet/File/DataColumnWriter.cs | 5 ++++- src/Parquet/ParquetOptions.cs | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Parquet/File/DataColumnWriter.cs b/src/Parquet/File/DataColumnWriter.cs index 9f1cf3b8..135af50b 100644 --- a/src/Parquet/File/DataColumnWriter.cs +++ b/src/Parquet/File/DataColumnWriter.cs @@ -84,7 +84,10 @@ private async Task CompressAndWriteAsync( ph.Write(new Meta.Proto.ThriftCompactProtocolWriter(headerMs)); int headerSize = (int)headerMs.Length; headerMs.Position = 0; - _stream.Flush(); + if(_options.ReduceStreamFlushing == false) { + _stream.Flush(); + } + await headerMs.CopyToAsync(_stream); // write data diff --git a/src/Parquet/ParquetOptions.cs b/src/Parquet/ParquetOptions.cs index da186651..b56007e8 100644 --- a/src/Parquet/ParquetOptions.cs +++ b/src/Parquet/ParquetOptions.cs @@ -6,6 +6,12 @@ namespace Parquet { /// Parquet options /// public class ParquetOptions { + + /// + /// Reduce flushing of stream during write, to increase performance of for instance Azure blob + /// + public bool ReduceStreamFlushing { get; set; } + /// /// When true byte arrays will be treated as UTF-8 strings on read /// From 3016ee88a9c49d698c21f4991e17f993f86f7a0b Mon Sep 17 00:00:00 2001 From: IG Date: Fri, 22 Dec 2023 10:02:40 +0000 Subject: [PATCH 2/2] adding my suggesions to include this change in 4.18 release, hope you don't mind ;) --- src/Parquet/File/DataColumnWriter.cs | 4 +--- src/Parquet/ParquetOptions.cs | 7 +------ 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/Parquet/File/DataColumnWriter.cs b/src/Parquet/File/DataColumnWriter.cs index 135af50b..d530393d 100644 --- a/src/Parquet/File/DataColumnWriter.cs +++ b/src/Parquet/File/DataColumnWriter.cs @@ -84,9 +84,7 @@ private async Task CompressAndWriteAsync( ph.Write(new Meta.Proto.ThriftCompactProtocolWriter(headerMs)); int headerSize = (int)headerMs.Length; headerMs.Position = 0; - if(_options.ReduceStreamFlushing == false) { - _stream.Flush(); - } + // there used to be a flush here, but removed in #432 due to excessive flushing on expensive streams await headerMs.CopyToAsync(_stream); diff --git a/src/Parquet/ParquetOptions.cs b/src/Parquet/ParquetOptions.cs index b56007e8..9a6bb7f1 100644 --- a/src/Parquet/ParquetOptions.cs +++ b/src/Parquet/ParquetOptions.cs @@ -6,12 +6,7 @@ namespace Parquet { /// Parquet options /// public class ParquetOptions { - - /// - /// Reduce flushing of stream during write, to increase performance of for instance Azure blob - /// - public bool ReduceStreamFlushing { get; set; } - + /// /// When true byte arrays will be treated as UTF-8 strings on read ///