Skip to content

Commit 4b66304

Browse files
committed
Fix us_index collector
1 parent 253378a commit 4b66304

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

scripts/data_collector/index.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ def save_new_companies(self):
114114
$ python collector.py save_new_companies --index_name CSI300 --qlib_dir ~/.qlib/qlib_data/cn_data
115115
"""
116116
df = self.get_new_companies()
117+
if df is None or df.empty:
118+
raise ValueError(f"get new companies error: {self.index_name}")
117119
df = df.drop_duplicates([self.SYMBOL_FIELD_NAME])
118120
df.loc[:, self.INSTRUMENTS_COLUMNS].to_csv(
119121
self.instruments_dir.joinpath(f"{self.index_name.lower()}_only_new.txt"), sep="\t", index=False, header=None
@@ -184,7 +186,10 @@ def parse_instruments(self):
184186
logger.info(f"start parse {self.index_name.lower()} companies.....")
185187
instruments_columns = [self.SYMBOL_FIELD_NAME, self.START_DATE_FIELD, self.END_DATE_FIELD]
186188
changers_df = self.get_changes()
187-
new_df = self.get_new_companies().copy()
189+
new_df = self.get_new_companies()
190+
if new_df is None or new_df.empty:
191+
raise ValueError(f"get new companies error: {self.index_name}")
192+
new_df = new_df.copy()
188193
logger.info("parse history companies by changes......")
189194
for _row in tqdm(changers_df.sort_values(self.DATE_FIELD_NAME, ascending=False).itertuples(index=False)):
190195
if _row.type == self.ADD:

scripts/data_collector/us_index/collector.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
class WIKIIndex(IndexBase):
3636
# NOTE: The US stock code contains "PRN", and the directory cannot be created on Windows system, use the "_" prefix
3737
# https://superuser.com/questions/613313/why-cant-we-make-con-prn-null-folder-in-windows
38-
INST_PREFIX = "_"
38+
INST_PREFIX = ""
3939

4040
def __init__(self, index_name: str, qlib_dir: [str, Path] = None, request_retry: int = 5, retry_sleep: int = 3):
4141
super(WIKIIndex, self).__init__(
@@ -123,7 +123,7 @@ class NASDAQ100Index(WIKIIndex):
123123
MAX_WORKERS = 16
124124

125125
def filter_df(self, df: pd.DataFrame) -> pd.DataFrame:
126-
if not (set(df.columns) - {"Company", "Ticker"}):
126+
if len(df) >= 100 and "Ticker" in df.columns:
127127
return df.loc[:, ["Ticker"]].copy()
128128

129129
@property

0 commit comments

Comments
 (0)