Skip to content

Commit

Permalink
[improve] Change the dataset download source from GitLab to HuggingFace.
Browse files Browse the repository at this point in the history
  • Loading branch information
kervias committed Mar 5, 2024
1 parent e935469 commit d4814e5
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 13 deletions.
12 changes: 6 additions & 6 deletions edustudio/assets/datasets.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
ASSIST_0910:
middata_url: https://gitlab.com/hfut-lec/edudatafiles/-/raw/main/ASSIST_0910/ASSIST_0910-middata.zip
middata_url: https://huggingface.co/datasets/lmcRS/edustudio-datasets/resolve/main/ASSIST_0910/ASSIST_0910-middata.zip
FrcSub:
middata_url: https://gitlab.com/hfut-lec/edudatafiles/-/raw/main/FrcSub/FrcSub-middata.zip
middata_url: https://huggingface.co/datasets/lmcRS/edustudio-datasets/resolve/main/FrcSub/FrcSub-middata.zip
Math1:
middata_url: https://gitlab.com/hfut-lec/edudatafiles/-/raw/main/Math1/Math1-middata.zip
middata_url: https://huggingface.co/datasets/lmcRS/edustudio-datasets/resolve/main/Math1/Math1-middata.zip
Math2:
middata_url: https://gitlab.com/hfut-lec/edudatafiles/-/raw/main/Math2/Math2-middata.zip
middata_url: https://huggingface.co/datasets/lmcRS/edustudio-datasets/resolve/main/Math2/Math2-middata.zip
AAAI_2023:
middata_url: https://gitlab.com/hfut-lec/edudatafiles/-/raw/main/AAAI_2023/AAAI_2023-middata.zip
middata_url: https://huggingface.co/datasets/lmcRS/edustudio-datasets/resolve/main/AAAI_2023/AAAI_2023-middata.zip
PISA_2015_ECD:
middata_url: https://gitlab.com/hfut-lec/edudatafiles/-/raw/main/PISA_2015_ECD/PISA_2015_ECD-middata.zip
middata_url: https://huggingface.co/datasets/lmcRS/edustudio-datasets/resolve/main/PISA_2015_ECD/PISA_2015_ECD-middata.zip
8 changes: 5 additions & 3 deletions edustudio/datatpl/common/base_datatpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,17 @@ def download_dataset(cls, cfg):
cfg (UnifyConfig):the global config object
"""
dt_name = cfg.dataset
cfg.logger.warning(f"Can't find dataset files of {dt_name} in local environment!")
cfg.logger.info(f"Prepare to download {dt_name} from Internet.")
cfg.logger.warning(f"Can't find dataset files of {dt_name} in local disk!")

fph = cfg.frame_cfg['DT_INFO_FILE_PATH']
dataset_info = cls.read_yml_file(fph)
dataset_info_from_cfg: dict = cfg['frame_cfg']['DT_INFO_DICT']
dataset_info.update(dataset_info_from_cfg)

cfg.logger.info(f"Prepare to download {dt_name} dataset from online")
cfg.logger.info(f"Download_url: {dataset_info[dt_name]['middata_url']}")
if dt_name not in dataset_info:
raise Exception("Can't find dataset files from Local and Internet!")
raise Exception("Can't find dataset files from local disk and online")

if not os.path.exists(cfg.frame_cfg.data_folder_path):
os.makedirs(cfg.frame_cfg.data_folder_path)
Expand Down
3 changes: 1 addition & 2 deletions edustudio/datatpl/common/general_datatpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ def from_cfg(cls, cfg):
Returns:
BaseDataTPL
"""
if not os.path.exists(f'{cfg.frame_cfg.data_folder_path}'):
print(cfg.frame_cfg.data_folder_path)
if not os.path.exists(cfg.frame_cfg.data_folder_path) or len(os.listdir(cfg.frame_cfg.data_folder_path)) == 0:
cls.download_dataset(cfg)

load_data_from = cfg.datatpl_cfg['load_data_from']
Expand Down
2 changes: 1 addition & 1 deletion edustudio/datatpl/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
class BigfileDownloader(object):
@staticmethod
def download(url, title, filepath, chunk_size=10240):
with closing(requests.get(url, stream=True)) as resp:
with closing(requests.get(url, stream=True, allow_redirects=True)) as resp:
if resp.status_code != 200:
raise Exception("[ERROR]: {} - {} -{}".format(str(resp.status_code), title, url))
chunk_size = chunk_size
Expand Down
2 changes: 1 addition & 1 deletion examples/single_model/run_ncdm_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@
'cls': 'NCDM',
},
evaltpl_cfg_dict={
'clses': ['PredictionEvalTPL'],
'clses': ['PredictionEvalTPL', 'InterpretabilityEvalTPL'],
}
)

0 comments on commit d4814e5

Please sign in to comment.