20
20
from copy import copy
21
21
from dataclasses import dataclass
22
22
from enum import Enum
23
- from typing import TYPE_CHECKING , Dict , List , Optional , Set , Tuple , Union
23
+ from typing import TYPE_CHECKING , Any , Dict , List , Optional , Set , Tuple , Union
24
24
25
25
from pyiceberg .exceptions import ResolveError , ValidationError
26
+ from pyiceberg .expressions import literal # type: ignore
26
27
from pyiceberg .schema import (
27
28
PartnerAccessor ,
28
29
Schema ,
@@ -153,7 +154,12 @@ def union_by_name(self, new_schema: Union[Schema, "pa.Schema"]) -> UpdateSchema:
153
154
return self
154
155
155
156
def add_column (
156
- self , path : Union [str , Tuple [str , ...]], field_type : IcebergType , doc : Optional [str ] = None , required : bool = False
157
+ self ,
158
+ path : Union [str , Tuple [str , ...]],
159
+ field_type : IcebergType ,
160
+ doc : Optional [str ] = None ,
161
+ required : bool = False ,
162
+ default_value : Optional [Any ] = None ,
157
163
) -> UpdateSchema :
158
164
"""Add a new column to a nested struct or Add a new top-level column.
159
165
@@ -168,6 +174,7 @@ def add_column(
168
174
field_type: Type for the new column.
169
175
doc: Documentation string for the new column.
170
176
required: Whether the new column is required.
177
+ default_value: Default value for the new column.
171
178
172
179
Returns:
173
180
This for method chaining.
@@ -177,10 +184,6 @@ def add_column(
177
184
raise ValueError (f"Cannot add column with ambiguous name: { path } , provide a tuple instead" )
178
185
path = (path ,)
179
186
180
- if required and not self ._allow_incompatible_changes :
181
- # Table format version 1 and 2 cannot add required column because there is no initial value
182
- raise ValueError (f"Incompatible change: cannot add required column: { '.' .join (path )} " )
183
-
184
187
name = path [- 1 ]
185
188
parent = path [:- 1 ]
186
189
@@ -212,13 +215,35 @@ def add_column(
212
215
213
216
# assign new IDs in order
214
217
new_id = self .assign_new_column_id ()
218
+ new_type = assign_fresh_schema_ids (field_type , self .assign_new_column_id )
219
+
220
+ if default_value is not None :
221
+ try :
222
+ # To make sure that the value is valid for the type
223
+ initial_default = literal (default_value ).to (new_type ).value
224
+ except ValueError as e :
225
+ raise ValueError (f"Invalid default value: { e } " ) from e
226
+ else :
227
+ initial_default = default_value
228
+
229
+ if (required and initial_default is None ) and not self ._allow_incompatible_changes :
230
+ # Table format version 1 and 2 cannot add required column because there is no initial value
231
+ raise ValueError (f"Incompatible change: cannot add required column: { '.' .join (path )} " )
232
+
215
233
216
234
# update tracking for moves
217
235
self ._added_name_to_id [full_name ] = new_id
218
236
self ._id_to_parent [new_id ] = parent_full_path
219
237
220
- new_type = assign_fresh_schema_ids (field_type , self .assign_new_column_id )
221
- field = NestedField (field_id = new_id , name = name , field_type = new_type , required = required , doc = doc )
238
+ field = NestedField (
239
+ field_id = new_id ,
240
+ name = name ,
241
+ field_type = new_type ,
242
+ required = required ,
243
+ doc = doc ,
244
+ initial_default = initial_default ,
245
+ write_default = initial_default ,
246
+ )
222
247
223
248
if parent_id in self ._adds :
224
249
self ._adds [parent_id ].append (field )
@@ -330,6 +355,7 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b
330
355
field_type = updated .field_type ,
331
356
doc = updated .doc ,
332
357
required = required ,
358
+ initial_default = updated .initial_default ,
333
359
)
334
360
else :
335
361
self ._updates [field .field_id ] = NestedField (
@@ -338,6 +364,7 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b
338
364
field_type = field .field_type ,
339
365
doc = field .doc ,
340
366
required = required ,
367
+ initial_default = field .initial_default ,
341
368
)
342
369
343
370
def update_column (
@@ -387,6 +414,7 @@ def update_column(
387
414
field_type = field_type or updated .field_type ,
388
415
doc = doc if doc is not None else updated .doc ,
389
416
required = updated .required ,
417
+ initial_default = updated .initial_default ,
390
418
)
391
419
else :
392
420
self ._updates [field .field_id ] = NestedField (
@@ -395,6 +423,7 @@ def update_column(
395
423
field_type = field_type or field .field_type ,
396
424
doc = doc if doc is not None else field .doc ,
397
425
required = field .required ,
426
+ initial_default = field .initial_default ,
398
427
)
399
428
400
429
if required is not None :
0 commit comments