Skip to content

Commit 7095e75

Browse files
geekalaaSunsetWolf
andauthored
fix: replace deprecated pandas fillna(method=) with ffill()/bfill() (#1987)
* fix: replace deprecated pandas fillna(method=) with ffill()/bfill() Replace deprecated fillna(method="ffill"/"bfill") calls with modern pandas ffill() and bfill() methods to fix FutureWarnings in pandas 2.x. Also includes black formatting fixes for compliance. This addresses the pandas deprecation warnings portion of issue #1981. Other issues (date parsing, type conversion, timezone handling) will be addressed in separate commits. Fixes: - Yahoo collector: 2 instances in calc_change() and adjusted_price() - BaoStock collector: 1 instance in calc_change() - Core utils: resam.py fillna operations - Backtest: profit_attribution.py stock data processing - High-freq ops: FFillNan and BFillNan operators - Position analysis: parse_position.py weight processing Partially addresses GitHub issue #1981 * lint with black * lint with black * limit minimum version of pandas * limit minimum version of pandas --------- Co-authored-by: Linlang <[email protected]>
1 parent 2d05a70 commit 7095e75

File tree

9 files changed

+13
-12
lines changed

9 files changed

+13
-12
lines changed

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ license = { text = "MIT" }
2727
dependencies = [
2828
"pyyaml",
2929
"numpy",
30-
"pandas>=0.24",
30+
# Since version 1.1.0, pandas supports the ffill and bfill methods.
31+
# Since version 2.1.0, pandas has deprecated the method parameter of the fillna method.
32+
# qlib has updated the fillna method in PR 1987 and limited the minimum version of pandas.
33+
"pandas>=1.1",
3134
# I encoutered an Error that the set_uri does not work when downloading artifacts in mlflow 3.1.1;
3235
# But earlier versions of mlflow does not have this problem.
3336
# But when I switch to 2.*.* version, another error occurs, which is even more strange...

qlib/backtest/profit_attribution.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,13 +281,13 @@ def brinson_pa(
281281

282282
stock_group_field = stock_df[group_field].unstack().T
283283
# FIXME: some attributes of some suspend stock is NAN.
284-
stock_group_field = stock_group_field.fillna(method="ffill")
284+
stock_group_field = stock_group_field.ffill()
285285
stock_group_field = stock_group_field.loc[start_date:end_date]
286286

287287
stock_group = get_stock_group(stock_group_field, bench_stock_weight, group_method, group_n)
288288

289289
deal_price_df = stock_df["deal_price"].unstack().T
290-
deal_price_df = deal_price_df.fillna(method="ffill")
290+
deal_price_df = deal_price_df.ffill()
291291

292292
# NOTE:
293293
# The return will be slightly different from the of the return in the report.

qlib/contrib/ops/high_freq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ class FFillNan(ElemOperator):
135135

136136
def _load_internal(self, instrument, start_index, end_index, freq):
137137
series = self.feature.load(instrument, start_index, end_index, freq)
138-
return series.fillna(method="ffill")
138+
return series.ffill()
139139

140140

141141
class BFillNan(ElemOperator):
@@ -154,7 +154,7 @@ class BFillNan(ElemOperator):
154154

155155
def _load_internal(self, instrument, start_index, end_index, freq):
156156
series = self.feature.load(instrument, start_index, end_index, freq)
157-
return series.fillna(method="bfill")
157+
return series.bfill()
158158

159159

160160
class Date(ElemOperator):

qlib/contrib/report/analysis_position/parse_position.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def parse_position(position: dict = None) -> pd.DataFrame:
3333

3434
position_weight_df = get_stock_weight_df(position)
3535
# If the day does not exist, use the last weight
36-
position_weight_df.fillna(method="ffill", inplace=True)
36+
position_weight_df.ffill(inplace=True)
3737

3838
previous_data = {"date": None, "code_list": []}
3939

qlib/data/dataset/storage.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ def fetch(
6767
col_set: Union[str, List[str]] = DataHandler.CS_ALL,
6868
fetch_orig: bool = True,
6969
) -> pd.DataFrame:
70-
7170
# Following conflicts may occur
7271
# - Does [20200101", "20210101"] mean selecting this slice or these two days?
7372
# To solve this issue

qlib/utils/mod.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@ def init_instance_by_config(
161161
# path like 'file:///<path to pickle file>/obj.pkl'
162162
pr = urlparse(config)
163163
if pr.scheme == "file":
164-
165164
# To enable relative path like file://data/a/b/c.pkl. pr.netloc will be data
166165
path = pr.path
167166
if pr.netloc != "":

qlib/utils/resam.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ def get_valid_value(series, last=True):
222222
Nan | float
223223
the first/last valid value
224224
"""
225-
return series.fillna(method="ffill").iloc[-1] if last else series.fillna(method="bfill").iloc[0]
225+
return series.ffill().iloc[-1] if last else series.bfill().iloc[0]
226226

227227

228228
def _ts_data_valid(ts_feature, last=False):

scripts/data_collector/baostock_5min/collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def __init__(
172172
@staticmethod
173173
def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series:
174174
df = df.copy()
175-
_tmp_series = df["close"].fillna(method="ffill")
175+
_tmp_series = df["close"].ffill()
176176
_tmp_shift_series = _tmp_series.shift(1)
177177
if last_close is not None:
178178
_tmp_shift_series.iloc[0] = float(last_close)

scripts/data_collector/yahoo/collector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ class YahooNormalize(BaseNormalize):
371371
@staticmethod
372372
def calc_change(df: pd.DataFrame, last_close: float) -> pd.Series:
373373
df = df.copy()
374-
_tmp_series = df["close"].fillna(method="ffill")
374+
_tmp_series = df["close"].ffill()
375375
_tmp_shift_series = _tmp_series.shift(1)
376376
if last_close is not None:
377377
_tmp_shift_series.iloc[0] = float(last_close)
@@ -459,7 +459,7 @@ def adjusted_price(self, df: pd.DataFrame) -> pd.DataFrame:
459459
df.set_index(self._date_field_name, inplace=True)
460460
if "adjclose" in df:
461461
df["factor"] = df["adjclose"] / df["close"]
462-
df["factor"] = df["factor"].fillna(method="ffill")
462+
df["factor"] = df["factor"].ffill()
463463
else:
464464
df["factor"] = 1
465465
for _col in self.COLUMNS:

0 commit comments

Comments
 (0)