Skip to content

Commit

Permalink
fix data sample for bms
Browse files Browse the repository at this point in the history
  • Loading branch information
qew21 committed Jan 7, 2025
1 parent 29e7149 commit 7e3d774
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions rdagent/scenarios/data_science/debug/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def create_debug_data(
dataset_path = KAGGLE_IMPLEMENT_SETTING.local_data_path # FIXME: don't hardcode this KAGGLE_IMPLEMENT_SETTING

if sample_path is None:
sample_path = Path(dataset_path) / "sample"
sample_path = Path(dataset_path) / "sample1"

data_folder = Path(dataset_path) / competition
sample_folder = Path(sample_path) / competition
Expand Down Expand Up @@ -186,16 +186,17 @@ def create_debug_data(
subfolder_dict.setdefault(rel_dir, []).append(file_path)

# For each subfolder, decide which files to copy
cnt =0
for rel_dir, file_list in subfolder_dict.items():
used_files = []
not_used_files = []

# Check if each file is in the "used" list
for fp in file_list:
# If your logic is only about the file's name:
# if fp.name in sample_used_file_names:
if str(fp.name) in sample_used_file_names or str(fp) in sample_used_file_names:
if str(fp.name) in sample_used_file_names or str(fp.stem) in sample_used_file_names:
used_files.append(fp)
print(f"{cnt} Copying {fp} to used_files")
cnt += 1
else:
not_used_files.append(fp)

Expand Down

0 comments on commit 7e3d774

Please sign in to comment.