Skip to content

Commit

Permalink
X
Browse files Browse the repository at this point in the history
  • Loading branch information
John Major committed Nov 25, 2024
1 parent 435949f commit 7a01bf8
Showing 1 changed file with 44 additions and 9 deletions.
53 changes: 44 additions & 9 deletions bloom_lims/bobjs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2993,7 +2993,8 @@ def add_file_data(
f"A file with EUID {euid} already exists in bucket {s3_bucket_name} {s3_key_path}."
)

if import_or_remote in ["Remote", "remote"] and s3_uri is not None:
if s3_uri:
# Was just doing this for only remote s3 uris, but am going to leave them be for now
# Check if a remote file with the same metadata already exists

search_criteria = {"properties": {"current_s3_uri": s3_uri}}
Expand All @@ -3002,20 +3003,48 @@ def add_file_data(
if len(existing_euids) > 0:
raise Exception(f"Remote file with URI {s3_uri} already exists in the database as {existing_euids}.")

s3uri_bucket=s3_uri.split("/")[2]
s3uri_key="/".join(s3_uri.split("/")[3:])
# Store metadata for the remote file
file_properties = {
"remote_s3_uri": s3_uri,
"original_file_name": file_name,
"name": file_name,
"original_file_size_bytes": None, # Size is unknown for remote files
"original_file_suffix": file_suffix,
"original_file_data_type": "remote",
"original_file_data_type": "s3uri",
"file_type": file_suffix,
"current_s3_uri": s3_uri,
"original_s3_uri": s3_uri,
"current_s3_key": "/".join(s3_uri.split("/")[3:]),
"current_s3_bucket_name": s3_uri.split("/")[2],
"current_s3_key": s3uri_key,
"current_s3_bucket_name": s3uri_bucket,
"import_or_remote": import_or_remote,
}

# Construct the tags
tagging = {
'TagSet': [
{'Key': 'creating_service', 'Value': 'dewey'},
{'Key': 'original_file_name', 'Value': self.sanitize_tag(file_name)},
{'Key': 'original_file_path', 'Value': 'N/A'},
{'Key': 'original_file_size_bytes', 'Value': self.sanitize_tag(str(file_properties.get("original_file_size_bytes", "unknown")))},
{'Key': 'original_file_suffix', 'Value': self.sanitize_tag(file_suffix)},
{'Key': 'euid', 'Value': self.sanitize_tag(euid)},
{'Key': 'import_or_remote', 'Value': self.sanitize_tag(import_or_remote)}
]
}

# Apply the tags to the existing object
try:
self.s3_client.put_object_tagging(
Bucket=s3uri_bucket,
Key=s3uri_key,
Tagging=tagging
)
self.logger.info(f"Tags successfully applied to S3 object {s3_uri}")
except Exception as e:
self.logger.exception(f"Error tagging existing S3 object {s3_uri}: {e}\n\n{tagging}")
raise Exception(f"Failed to tag S3 object: {e}\n{tagging}")

_update_recursive(file_instance.json_addl["properties"], file_properties)
flag_modified(file_instance, "json_addl")
Expand All @@ -3033,7 +3062,7 @@ def add_file_data(
Bucket=s3_bucket_name,
Key=s3_key,
Body=file_data,
Tagging=f"creating_service=dewey&original_file_name={self.sanitize_tag(file_name)}&original_file_path=N/A&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}"
Tagging=f"import_or_remote={import_or_remote}&creating_service=dewey&original_file_name={self.sanitize_tag(file_name)}&original_file_path=N/A&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}"
)

except Exception as e:
Expand All @@ -3051,6 +3080,7 @@ def add_file_data(
"original_file_data_type": "raw data",
"file_type": file_suffix,
"current_s3_uri": f"s3://{s3_bucket_name}/{s3_key}",
"import_or_remote": import_or_remote,
}

elif url:
Expand All @@ -3062,7 +3092,7 @@ def add_file_data(
Bucket=s3_bucket_name,
Key=s3_key,
Body=response.content,
Tagging=f"creating_service=dewey&original_file_name={self.sanitize_tag(url_info)}&original_url={self.sanitize_tag(url)}&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}",
Tagging=f"import_or_remote={import_or_remote}&creating_service=dewey&original_file_name={self.sanitize_tag(url_info)}&original_url={self.sanitize_tag(url)}&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}",
)
file_properties = {
"current_s3_key": s3_key,
Expand All @@ -3074,6 +3104,7 @@ def add_file_data(
"original_file_data_type": "url",
"file_type": file_suffix,
"current_s3_uri": f"s3://{s3_bucket_name}/{s3_key}",
"import_or_remote": import_or_remote,
}

elif full_path_to_file:
Expand All @@ -3091,7 +3122,7 @@ def add_file_data(
Bucket=s3_bucket_name,
Key=s3_key,
Body=file_data,
Tagging=f"creating_service=dewey&original_file_name={self.sanitize_tag(local_path_info.name)}&original_file_path={self.sanitize_tag(full_path_to_file)}&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}",
Tagging=f"import_or_remote={import_or_remote}&creating_service=dewey&original_file_name={self.sanitize_tag(local_path_info.name)}&original_file_path={self.sanitize_tag(full_path_to_file)}&original_file_size_bytes={self.sanitize_tag(str(file_size))}&original_file_suffix={self.sanitize_tag(file_suffix)}&euid={self.sanitize_tag(euid)}{addl_tag_string}",
)
file_properties = {
"current_s3_key": s3_key,
Expand All @@ -3105,9 +3136,12 @@ def add_file_data(
"original_file_data_type": "local file",
"file_type": file_suffix,
"current_s3_uri": f"s3://{s3_bucket_name}/{s3_key}",
"import_or_remote": import_or_remote,
}

elif s3_uri:
elif s3_uri in ["deprecate this"]:
# I do not want to be in the business of moving files around here
#elif s3_uri:
# Validate and move the file from the provided s3_uri
s3_parsed_uri = re.match(r"s3://([^/]+)/(.+)", s3_uri)
if not s3_parsed_uri:
Expand Down Expand Up @@ -3139,6 +3173,7 @@ def add_file_data(
"original_file_data_type": "s3_uri",
"file_type": file_suffix,
"current_s3_uri": f"s3://{s3_bucket_name}/{s3_key}",
"import_or_remote": import_or_remote,
}

# Delete the old file and create a marker file
Expand All @@ -3149,7 +3184,7 @@ def add_file_data(
Bucket=source_bucket,
Key=marker_key,
Body=b"",
Tagging=f"euid={euid}&original_s3_uri={self.sanitize_tag(s3_uri)}{addl_tag_string}",
Tagging=f"import_or_remote={import_or_remote}&euid={euid}&original_s3_uri={self.sanitize_tag(s3_uri)}{addl_tag_string}",
)
self.s3_client.delete_object(Bucket=source_bucket, Key=source_key)

Expand Down

0 comments on commit 7a01bf8

Please sign in to comment.