Skip to content

Commit d284254

Browse files
authored
chore: optimize appending new columns to Pandas DataFrame (#348)
1 parent 242444d commit d284254

File tree

3 files changed

+36
-7
lines changed

3 files changed

+36
-7
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
1. [#335](https://github.com/influxdata/influxdb-client-python/pull/335): Add support for custom precision for index specified as number [DataFrame]
66
1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): Add support for handling batch events
77

8+
### Bug Fixes
9+
1. [#348](https://github.com/influxdata/influxdb-client-python/pull/348): Optimize appending new columns to Pandas DataFrame [DataFrame]
10+
811
### Documentation
912
1. [#331](https://github.com/influxdata/influxdb-client-python/pull/331): Add [Migration Guide](MIGRATION_GUIDE.rst)
1013
1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): How to handle client errors

influxdb_client/client/flux_csv_parser.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,8 @@ def _parse_flux_response(self):
133133
# Create DataFrame with default values
134134
if self._serialization_mode is FluxSerializationMode.dataFrame:
135135
from ..extras import pd
136-
self._data_frame = pd.DataFrame(data=[], columns=[], index=None)
137-
for column in table.columns:
138-
self._data_frame[column.label] = column.default_value
136+
labels = list(map(lambda it: it.label, table.columns))
137+
self._data_frame = pd.DataFrame(data=[], columns=labels, index=None)
139138
pass
140139
continue
141140

tests/test_FluxCSVParser.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,38 @@ def test_to_json(self):
212212
import json
213213
self.assertEqual(query_output, json.dumps(tables, cls=FluxStructureEncoder, indent=2))
214214

215+
def test_pandas_lot_of_columns(self):
216+
data_types = ""
217+
groups = ""
218+
defaults = ""
219+
columns = ""
220+
values = ""
221+
for i in range(0, 200):
222+
data_types += f",long"
223+
groups += f",false"
224+
defaults += f","
225+
columns += f",column_{i}"
226+
values += f",{i}"
227+
228+
data = f"#datatype,string,long,string,string,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,double,string{data_types}\n" \
229+
f"#group,false,false,true,true,true,true,false,false,true{groups}\n" \
230+
f"#default,_result,,,,,,,,{defaults}\n" \
231+
f",result,table,_field,_measurement,_start,_stop,_time,_value,tag{columns}\n" \
232+
f",,0,value,python_client_test,2010-02-27T04:48:32.752600083Z,2020-02-27T16:48:32.752600083Z,2020-02-27T16:20:00Z,2,test1{values}\n" \
233+
234+
parser = self._parse(data=data, serialization_mode=FluxSerializationMode.dataFrame)
235+
_dataFrames = list(parser.generator())
236+
self.assertEqual(1, _dataFrames.__len__())
237+
215238
@staticmethod
216-
def _parse_to_tables(data: str):
217-
fp = BytesIO(str.encode(data))
218-
_parser = FluxCsvParser(response=HTTPResponse(fp, preload_content=False),
219-
serialization_mode=FluxSerializationMode.tables)
239+
def _parse_to_tables(data: str, serialization_mode=FluxSerializationMode.tables):
240+
_parser = FluxCsvParserTest._parse(data, serialization_mode)
220241
list(_parser.generator())
221242
tables = _parser.tables
222243
return tables
244+
245+
@staticmethod
246+
def _parse(data, serialization_mode):
247+
fp = BytesIO(str.encode(data))
248+
return FluxCsvParser(response=HTTPResponse(fp, preload_content=False),
249+
serialization_mode=serialization_mode)

0 commit comments

Comments
 (0)