Skip to content

Commit

Permalink
create sample folder after unzip kaggle data
Browse files Browse the repository at this point in the history
  • Loading branch information
qew21 committed Dec 26, 2024
1 parent a4e3ced commit e009fd7
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
8 changes: 6 additions & 2 deletions rdagent/scenarios/data_science/debug/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def create_debug_data(
dataset_path = KAGGLE_IMPLEMENT_SETTING.local_data_path

if sample_path is None:
# Create a sample folder under the dataset folder, which should be available in docker container
sample_path = Path(dataset_path) / "sample"

data_folder = Path(dataset_path) / competition
Expand Down Expand Up @@ -159,8 +160,11 @@ def create_debug_data(
df_sampled = data_reducer.reduce(df)

# Dump the sampled data
data_handler.dump(df_sampled, sampled_file_path)

try:
data_handler.dump(df_sampled, sampled_file_path)
except Exception as e:
print(f"Error processing {file_path}: {e}")
continue

if __name__ == "__main__":
fire.Fire(create_debug_data)
4 changes: 4 additions & 0 deletions rdagent/scenarios/kaggle/kaggle_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.env import MLEBDockerEnv
from rdagent.scenarios.data_science.debug.data import create_debug_data

# %%
options = webdriver.ChromeOptions()
Expand Down Expand Up @@ -161,6 +162,9 @@ def download_data(competition: str, settings: ExtendedBaseSettings = KAGGLE_IMPL
unzip_data(unzip_file_path=f"{zipfile_path}/{competition}.zip", unzip_target_path=unzip_path)
for sub_zip_file in Path(unzip_path).rglob("*.zip"):
unzip_data(sub_zip_file, unzip_target_path=unzip_path)

# sample data
create_debug_data(competition, dataset_path=local_path)


def unzip_data(unzip_file_path: str, unzip_target_path: str) -> None:
Expand Down

0 comments on commit e009fd7

Please sign in to comment.