@@ -322,7 +322,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
322
322
and args ["y" ]
323
323
and len (trace_data [[args ["x" ], args ["y" ]]].dropna ()) > 1
324
324
):
325
-
326
325
# sorting is bad but trace_specs with "trendline" have no other attrs
327
326
sorted_trace_data = trace_data .sort_values (by = args ["x" ])
328
327
y = sorted_trace_data [args ["y" ]].values
@@ -563,7 +562,6 @@ def set_cartesian_axis_opts(args, axis, letter, orders):
563
562
564
563
565
564
def configure_cartesian_marginal_axes (args , fig , orders ):
566
-
567
565
if "histogram" in [args ["marginal_x" ], args ["marginal_y" ]]:
568
566
fig .layout ["barmode" ] = "overlay"
569
567
@@ -1065,14 +1063,14 @@ def _escape_col_name(columns, col_name, extra):
1065
1063
return col_name
1066
1064
1067
1065
1068
- def to_unindexed_series (x ):
1066
+ def to_unindexed_series (x , name = None ):
1069
1067
"""
1070
1068
assuming x is list-like or even an existing pd.Series, return a new pd.Series with
1071
1069
no index, without extracting the data from an existing Series via numpy, which
1072
1070
seems to mangle datetime columns. Stripping the index from existing pd.Series is
1073
1071
required to get things to match up right in the new DataFrame we're building
1074
1072
"""
1075
- return pd .Series (x ).reset_index (drop = True )
1073
+ return pd .Series (x , name = name ).reset_index (drop = True )
1076
1074
1077
1075
1078
1076
def process_args_into_dataframe (args , wide_mode , var_name , value_name ):
@@ -1087,9 +1085,12 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1087
1085
df_input = args ["data_frame" ]
1088
1086
df_provided = df_input is not None
1089
1087
1090
- df_output = pd .DataFrame ()
1091
- constants = dict ()
1092
- ranges = list ()
1088
+ # we use a dict instead of a dataframe directly so that it doesn't cause
1089
+ # PerformanceWarning by pandas by repeatedly setting the columns.
1090
+ # a dict is used instead of a list as the columns needs to be overwritten.
1091
+ df_output = {}
1092
+ constants = {}
1093
+ ranges = []
1093
1094
wide_id_vars = set ()
1094
1095
reserved_names = _get_reserved_col_names (args ) if df_provided else set ()
1095
1096
@@ -1100,7 +1101,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1100
1101
"No data were provided. Please provide data either with the `data_frame` or with the `dimensions` argument."
1101
1102
)
1102
1103
else :
1103
- df_output [ df_input . columns ] = df_input [ df_input . columns ]
1104
+ df_output = { col : series for col , series in df_input . items ()}
1104
1105
1105
1106
# hover_data is a dict
1106
1107
hover_data_is_dict = (
@@ -1141,7 +1142,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1141
1142
# argument_list and field_list ready, iterate over them
1142
1143
# Core of the loop starts here
1143
1144
for i , (argument , field ) in enumerate (zip (argument_list , field_list )):
1144
- length = len (df_output )
1145
+ length = len (df_output [ next ( iter ( df_output ))]) if len ( df_output ) else 0
1145
1146
if argument is None :
1146
1147
continue
1147
1148
col_name = None
@@ -1182,11 +1183,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1182
1183
% (
1183
1184
argument ,
1184
1185
len (real_argument ),
1185
- str (list (df_output .columns )),
1186
+ str (list (df_output .keys () )),
1186
1187
length ,
1187
1188
)
1188
1189
)
1189
- df_output [col_name ] = to_unindexed_series (real_argument )
1190
+ df_output [col_name ] = to_unindexed_series (real_argument , col_name )
1190
1191
elif not df_provided :
1191
1192
raise ValueError (
1192
1193
"String or int arguments are only possible when a "
@@ -1215,13 +1216,15 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1215
1216
% (
1216
1217
field ,
1217
1218
len (df_input [argument ]),
1218
- str (list (df_output .columns )),
1219
+ str (list (df_output .keys () )),
1219
1220
length ,
1220
1221
)
1221
1222
)
1222
1223
else :
1223
1224
col_name = str (argument )
1224
- df_output [col_name ] = to_unindexed_series (df_input [argument ])
1225
+ df_output [col_name ] = to_unindexed_series (
1226
+ df_input [argument ], col_name
1227
+ )
1225
1228
# ----------------- argument is likely a column / array / list.... -------
1226
1229
else :
1227
1230
if df_provided and hasattr (argument , "name" ):
@@ -1248,9 +1251,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1248
1251
"All arguments should have the same length. "
1249
1252
"The length of argument `%s` is %d, whereas the "
1250
1253
"length of previously-processed arguments %s is %d"
1251
- % (field , len (argument ), str (list (df_output .columns )), length )
1254
+ % (field , len (argument ), str (list (df_output .keys () )), length )
1252
1255
)
1253
- df_output [str (col_name )] = to_unindexed_series (argument )
1256
+ df_output [str (col_name )] = to_unindexed_series (argument , str ( col_name ) )
1254
1257
1255
1258
# Finally, update argument with column name now that column exists
1256
1259
assert col_name is not None , (
@@ -1268,12 +1271,19 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1268
1271
if field_name != "wide_variable" :
1269
1272
wide_id_vars .add (str (col_name ))
1270
1273
1271
- for col_name in ranges :
1272
- df_output [col_name ] = range (len (df_output ))
1273
-
1274
- for col_name in constants :
1275
- df_output [col_name ] = constants [col_name ]
1274
+ length = len (df_output [next (iter (df_output ))]) if len (df_output ) else 0
1275
+ df_output .update (
1276
+ {col_name : to_unindexed_series (range (length ), col_name ) for col_name in ranges }
1277
+ )
1278
+ df_output .update (
1279
+ {
1280
+ # constant is single value. repeat by len to avoid creating NaN on concating
1281
+ col_name : to_unindexed_series ([constants [col_name ]] * length , col_name )
1282
+ for col_name in constants
1283
+ }
1284
+ )
1276
1285
1286
+ df_output = pd .DataFrame (df_output )
1277
1287
return df_output , wide_id_vars
1278
1288
1279
1289
0 commit comments