@@ -1211,3 +1211,58 @@ def test_athena_to_iceberg_alter_schema(
1211
1211
)
1212
1212
1213
1213
assert_pandas_equals (df , df_actual )
1214
+
1215
+
1216
+ @pytest .mark .parametrize ("partition_cols" , [None , ["name" ]])
1217
+ def test_athena_to_iceberg_append_schema_evolution (
1218
+ path : str ,
1219
+ path2 : str ,
1220
+ path3 : str ,
1221
+ glue_database : str ,
1222
+ glue_table : str ,
1223
+ partition_cols : list [str ] | None ,
1224
+ ) -> None :
1225
+ df = pd .DataFrame (
1226
+ {
1227
+ "id" : [1 , 2 , 3 , 4 , 5 ],
1228
+ "name" : ["a" , "b" , "c" , "a" , "c" ],
1229
+ "age" : [None , None , None , None , 50 ],
1230
+ }
1231
+ )
1232
+ df ["id" ] = df ["id" ].astype ("Int64" ) # Cast as nullable int64 type
1233
+ df ["name" ] = df ["name" ].astype ("string" )
1234
+ df ["age" ] = df ["age" ].astype ("Int64" ) # Cast as nullable int64 type
1235
+ split_index_rows = 4
1236
+ split_index_columns = 2
1237
+
1238
+ wr .athena .to_iceberg (
1239
+ df = df .iloc [:split_index_rows , :split_index_columns ],
1240
+ database = glue_database ,
1241
+ table = glue_table ,
1242
+ table_location = path ,
1243
+ temp_path = path2 ,
1244
+ partition_cols = partition_cols ,
1245
+ keep_files = False ,
1246
+ )
1247
+
1248
+ wr .athena .to_iceberg (
1249
+ df = df .iloc [split_index_rows :, :],
1250
+ database = glue_database ,
1251
+ table = glue_table ,
1252
+ table_location = path ,
1253
+ temp_path = path2 ,
1254
+ partition_cols = partition_cols ,
1255
+ schema_evolution = True ,
1256
+ keep_files = False ,
1257
+ mode = "append" ,
1258
+ s3_output = path3 ,
1259
+ )
1260
+
1261
+ df_actual = wr .athena .read_sql_query (
1262
+ sql = f'SELECT * FROM "{ glue_table } " ORDER BY id' ,
1263
+ database = glue_database ,
1264
+ ctas_approach = False ,
1265
+ unload_approach = False ,
1266
+ )
1267
+
1268
+ assert_pandas_equals (df , df_actual )
0 commit comments