Skip to content

Commit c97a963

Browse files
zhupryou-n-g
authored andcommitted
Add a check if change is mutated to YahooNormalize1d
1 parent 2023f71 commit c97a963

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

scripts/data_collector/yahoo/collector.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -325,9 +325,22 @@ def normalize_yahoo(
325325
# NOTE: The data obtained by Yahoo finance sometimes has exceptions
326326
# WARNING: If it is normal for a `symbol(exchange)` to differ by a factor of *89* to *111* for consecutive trading days,
327327
# WARNING: the logic in the following line needs to be modified
328-
_mask = (change_series >= 89) & (change_series <= 111)
329-
_tmp_cols = ["high", "close", "low", "open", "adjclose"]
330-
df.loc[_mask, _tmp_cols] = df.loc[_mask, _tmp_cols] / 100
328+
_count = 0
329+
while True:
330+
# NOTE: may appear unusual for many days in a row
331+
change_series = YahooNormalize.calc_change(df, last_close)
332+
_mask = (change_series >= 89) & (change_series <= 111)
333+
if not _mask.any():
334+
break
335+
_tmp_cols = ["high", "close", "low", "open", "adjclose"]
336+
df.loc[_mask, _tmp_cols] = df.loc[_mask, _tmp_cols] / 100
337+
_count += 1
338+
if _count >= 10:
339+
_symbol = df.loc[df[symbol_field_name].first_valid_index()]["symbol"]
340+
logger.warning(
341+
f"{_symbol} `change` is abnormal for {_count} consecutive days, please check the specific data file carefully"
342+
)
343+
331344
df["change"] = YahooNormalize.calc_change(df, last_close)
332345

333346
columns += ["change"]

0 commit comments

Comments
 (0)