Skip to content

Commit

Permalink
retry dask drop
Browse files Browse the repository at this point in the history
  • Loading branch information
davedavemckay committed Jan 10, 2025
1 parent 7db26e8 commit 6c7418e
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions csd3-side/scripts/lsst-backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,17 +1198,17 @@ def process_files(s3, bucket_name, api, current_objects, exclude, local_dir, des
del zip_batch_files, zip_batch_object_names, zip_batch_sizes

else:
npart = len(client.scheduler_info()['workers'])*10
to_collate = dd.from_pandas(pd.read_csv(collate_list_file).drop('upload', axis=1), npartitions=npart)
to_collate.object_names = dd.from_pandas(to_collate.object_names.compute().apply(literal_eval),npartitions=npart)
to_collate.file_paths = dd.from_pandas(to_collate.file_paths.compute().apply(literal_eval),npartitions=npart)
to_collate = dd.from_pandas(pd.read_csv(collate_list_file)
p = to_collate.npartitions
to_collate.object_names = dd.from_pandas(to_collate.object_names.compute().apply(literal_eval),npartitions=p)
to_collate.file_paths = dd.from_pandas(to_collate.file_paths.compute().apply(literal_eval),npartitions=p)
print(f'Loaded collate list from {collate_list_file}.', flush=True)
if not current_objects.empty:
# now using pandas for both current_objects and to_collate - this could be re-written to using vectorised operations
# client.scatter([current_objects,to_collate])
# to_collate = dd.from_pandas(to_collate, npartitions=len(client.scheduler_info()['workers'])*10)
print('Created Dask dataframe for to_collate.', flush=True)
# to_collate = to_collate.drop(columns=['upload'])
to_collate = to_collate.drop(columns='upload')
print(to_collate.index)
print(to_collate.columns)
print(to_collate.dtypes)
Expand All @@ -1217,7 +1217,6 @@ def process_files(s3, bucket_name, api, current_objects, exclude, local_dir, des
print('Comparison setup.', flush=True)
to_collate = to_collate.compute()
print('Comparison complete.', flush=True)
print(to_collate)
exit()

if save_collate_file:
Expand Down

0 comments on commit 6c7418e

Please sign in to comment.