@@ -26,6 +26,7 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
26
26
>>> with GMTTempFile(suffix=".txt") as tmpfile:
27
27
... # Prepare the sample data file
28
28
... with Path(tmpfile.name).open(mode="w") as fp:
29
+ ... print("# x y z name", file=fp)
29
30
... print(">", file=fp)
30
31
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
31
32
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
@@ -42,7 +43,8 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
42
43
... print(ds.min[: ds.n_columns], ds.max[: ds.n_columns])
43
44
... # The table
44
45
... tbl = ds.table[0].contents
45
- ... print(tbl.n_columns, tbl.n_segments, tbl.n_records)
46
+ ... print(tbl.n_columns, tbl.n_segments, tbl.n_records, tbl.n_headers)
47
+ ... print(tbl.header[: tbl.n_headers])
46
48
... print(tbl.min[: tbl.n_columns], ds.max[: tbl.n_columns])
47
49
... for i in range(tbl.n_segments):
48
50
... seg = tbl.segment[i].contents
@@ -51,7 +53,8 @@ class _GMT_DATASET(ctp.Structure): # noqa: N801
51
53
... print(seg.text[: seg.n_rows])
52
54
1 3 2
53
55
[1.0, 2.0, 3.0] [10.0, 11.0, 12.0]
54
- 3 2 4
56
+ 3 2 4 1
57
+ [b'x y z name']
55
58
[1.0, 2.0, 3.0] [10.0, 11.0, 12.0]
56
59
[1.0, 4.0]
57
60
[2.0, 5.0]
@@ -144,8 +147,9 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801
144
147
("hidden" , ctp .c_void_p ),
145
148
]
146
149
147
- def to_dataframe (
150
+ def to_dataframe ( # noqa: PLR0912
148
151
self ,
152
+ header : int | None = None ,
149
153
column_names : pd .Index | None = None ,
150
154
dtype : type | Mapping [Any , type ] | None = None ,
151
155
index_col : str | int | None = None ,
@@ -164,6 +168,9 @@ def to_dataframe(
164
168
----------
165
169
column_names
166
170
A list of column names.
171
+ header
172
+ Row number containing column names. ``header=None`` means not to parse the
173
+ column names from data header.
167
174
dtype
168
175
Data type. Can be a single type for all columns or a dictionary mapping
169
176
column names to types.
@@ -184,6 +191,7 @@ def to_dataframe(
184
191
>>> with GMTTempFile(suffix=".txt") as tmpfile:
185
192
... # prepare the sample data file
186
193
... with Path(tmpfile.name).open(mode="w") as fp:
194
+ ... print("# col1 col2 col3 colstr", file=fp)
187
195
... print(">", file=fp)
188
196
... print("1.0 2.0 3.0 TEXT1 TEXT23", file=fp)
189
197
... print("4.0 5.0 6.0 TEXT4 TEXT567", file=fp)
@@ -194,9 +202,9 @@ def to_dataframe(
194
202
... with lib.virtualfile_out(kind="dataset") as vouttbl:
195
203
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
196
204
... ds = lib.read_virtualfile(vouttbl, kind="dataset")
197
- ... df = ds.contents.to_dataframe()
205
+ ... df = ds.contents.to_dataframe(header=0 )
198
206
>>> df
199
- 0 1 2 3
207
+ col1 col2 col3 colstr
200
208
0 1.0 2.0 3.0 TEXT1 TEXT23
201
209
1 4.0 5.0 6.0 TEXT4 TEXT567
202
210
2 7.0 8.0 9.0 TEXT8 TEXT90
@@ -230,14 +238,19 @@ def to_dataframe(
230
238
pd .Series (data = np .char .decode (textvector ), dtype = pd .StringDtype ())
231
239
)
232
240
241
+ if header is not None :
242
+ tbl = self .table [0 ].contents # Use the first table!
243
+ if header < tbl .n_headers :
244
+ column_names = tbl .header [header ].decode ().split ()
245
+
233
246
if len (vectors ) == 0 :
234
247
# Return an empty DataFrame if no columns are found.
235
248
df = pd .DataFrame (columns = column_names )
236
249
else :
237
250
# Create a DataFrame object by concatenating multiple columns
238
251
df = pd .concat (objs = vectors , axis = "columns" )
239
252
if column_names is not None : # Assign column names
240
- df .columns = column_names
253
+ df .columns = column_names [: df . shape [ 1 ]]
241
254
if dtype is not None : # Set dtype for the whole dataset or individual columns
242
255
df = df .astype (dtype )
243
256
if index_col is not None : # Use a specific column as index
0 commit comments