diff --git a/src/datasets/packaged_modules/webdataset/webdataset.py b/src/datasets/packaged_modules/webdataset/webdataset.py index 0768437b36a..ed0bf6428b1 100644 --- a/src/datasets/packaged_modules/webdataset/webdataset.py +++ b/src/datasets/packaged_modules/webdataset/webdataset.py @@ -34,6 +34,9 @@ def _get_pipeline_from_tar(cls, tar_path, tar_iterator): if example_key is None: continue if current_example and current_example["__key__"] != example_key: + # reposition some keys in last position + current_example["__key__"] = current_example.pop("__key__") + current_example["__url__"] = current_example.pop("__url__") yield current_example current_example = {} current_example["__key__"] = example_key