From e229fc4b3839cfbb424ad040a469b3bb43df45cf Mon Sep 17 00:00:00 2001 From: Pallab Pain Date: Thu, 7 Nov 2024 17:04:16 +0530 Subject: [PATCH] fix(paramserver): file is too large error --- rapyuta_io/clients/paramserver.py | 44 +++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/rapyuta_io/clients/paramserver.py b/rapyuta_io/clients/paramserver.py index 7b7bf6be..c317750c 100644 --- a/rapyuta_io/clients/paramserver.py +++ b/rapyuta_io/clients/paramserver.py @@ -3,6 +3,7 @@ import enum import errno import hashlib +import json import mimetypes import os import tempfile @@ -147,7 +148,7 @@ def process_dir(self, executor, rootdir, tree_path, level, dir_futures, file_fut elif not in_attribute_dir: # ignore files in attribute directories file_stat = os.stat(full_path) file_name = os.path.basename(full_path) - if file_stat.st_size > self.max_non_binary_size: + if self.should_upload_as_binary(file_stat): future = executor.submit(self.create_binary_file, new_tree_path, full_path) if file_name.endswith('.yaml'): data = parse_yaml(full_path) @@ -164,25 +165,40 @@ def process_folder(self, executor, rootdir, tree_path, level, dir_futures, file_ for name in listdir(join(rootdir, tree_path)): full_path = join(rootdir, tree_path, name) new_tree_path = join(tree_path, name) + if isdir(full_path): future = executor.submit(self.create_folder, new_tree_path) dir_futures[future] = (new_tree_path, level + 1) + continue + + file_stat = os.stat(full_path) + file_name = os.path.basename(full_path) + + if self.should_upload_as_binary(file_stat): + future = executor.submit(self.create_binary_file, new_tree_path, full_path) + elif file_name.endswith('.yaml'): + data = parse_yaml(full_path) + future = executor.submit(self.create_file, new_tree_path, data) + elif file_name.endswith('.json'): + data = parse_json(full_path) + future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type) else: - file_stat = os.stat(full_path) - file_name = os.path.basename(full_path) - if file_stat.st_size > self.max_non_binary_size: - future = executor.submit(self.create_binary_file, new_tree_path, full_path) - elif file_name.endswith('.yaml'): - data = parse_yaml(full_path) - future = executor.submit(self.create_file, new_tree_path, data) - elif file_name.endswith('.json'): - data = parse_json(full_path) - future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type) - else: - future = executor.submit(self.create_binary_file, new_tree_path, full_path) - file_futures[future] = new_tree_path + future = executor.submit(self.create_binary_file, new_tree_path, full_path) + file_futures[future] = new_tree_path + return dir_futures, file_futures + def should_upload_as_binary(self, file_stat: os.stat_result): + """Determines if the file should be uploaded as binary based on the file size + + While the file data may be less than the supported limit, the combined size of + the API payload is what is stored by paramserver. This method calculates the + size of the payload and determines if it exceeds the supported limit. If it does, + the file is uploaded as a binary to the blob store. + """ + metadata_size_buffer = 200 # In bytes + return file_stat.st_size + metadata_size_buffer > self.max_non_binary_size + def upload_configurations(self, rootdir, tree_names, delete_existing_trees, as_folder=False): self.validate_args(rootdir, tree_names, delete_existing_trees, as_folder) with futures.ThreadPoolExecutor(max_workers=15) as executor: