Skip to content

Commit

Permalink
Merge pull request #27 from CBIIT/crdcdh-1588-0010pgu
Browse files Browse the repository at this point in the history
Crdcdh 1588 0010pgu
  • Loading branch information
n2iw authored Sep 12, 2024
2 parents f8c66be + 9f322ad commit 38b467f
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
FILE_ID_FIELD = "id-field"
FILE_ID_DEFAULT = "fileID"
OMIT_DCF_PREFIX = "omit-DCF-prefix"
DCF_PREFIX = "dg.4DFC/"
MD5_DEFAULT = "md5sum" #match data model md5 name
TOKEN = "token"
API_URL = "api-url"
Expand Down
3 changes: 2 additions & 1 deletion src/copier.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def copy_file(self, file_info, overwrite, dryrun):
self.files_exist_at_dest += 1
file_info[SKIPPED] = True
return succeed

else:
file_info[SKIPPED] = False
#self.log.info(f'Copying from {org_url} to s3://{self.bucket_name}/{key.strip("/")} ...')
self.log.info(f'Copying from {org_url} to destination folder in S3 bucket ...')
dest_size = self._upload_obj(org_url, key, org_size)
Expand Down
10 changes: 5 additions & 5 deletions src/process_manifest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import csv, os, io
from common.constants import FILE_ID_DEFAULT, FILE_NAME_FIELD, BATCH_BUCKET, S3_BUCKET, FILE_PREFIX, BATCH_ID, BATCH, BATCH_CREATED,\
FILE_ID_FIELD, UPLOAD_TYPE, FILE_NAME_DEFAULT, FILE_PATH, FILE_SIZE_DEFAULT, BATCH_STATUS, PRE_MANIFEST
from common.constants import FILE_ID_DEFAULT, FILE_NAME_FIELD, BATCH_BUCKET, S3_BUCKET, FILE_PREFIX, BATCH_ID, DCF_PREFIX, BATCH_CREATED,\
FILE_ID_FIELD, UPLOAD_TYPE, FILE_NAME_DEFAULT, FILE_PATH, FILE_SIZE_DEFAULT, BATCH_STATUS, PRE_MANIFEST, OMIT_DCF_PREFIX
from common.graphql_client import APIInvoker
from copier import Copier

Expand Down Expand Up @@ -37,7 +37,7 @@ def process_manifest_file(configs, has_file_id, file_infos, manifest_rows, manif
manifest_file_info = None
try:
if not has_file_id:
result = add_file_id(file_id_name, file_name_name, final_manifest_path , file_infos, manifest_rows, manifest_columns)
result = add_file_id(file_id_name, file_name_name, final_manifest_path , file_infos, manifest_rows, manifest_columns, configs.get(OMIT_DCF_PREFIX))
if not result:
print(f"Failed to add file id to the pre-manifest, {final_manifest_path }.")
return False
Expand Down Expand Up @@ -79,15 +79,15 @@ def process_manifest_file(configs, has_file_id, file_infos, manifest_rows, manif
return True

# This method will create a new manifest file with the file id column added to the pre-manifest.
def add_file_id(file_id_name, file_name_name, final_manifest_path, file_infos, manifest_rows, manifest_columns):
def add_file_id(file_id_name, file_name_name, final_manifest_path, file_infos, manifest_rows, manifest_columns, omit_prefix):
output = []
for file in file_infos:
row = [row for row in manifest_rows if row[file_name_name] == file["fileName"]][0]
file[FILE_ID_DEFAULT] = file[FILE_ID_DEFAULT] if omit_prefix == False else file[FILE_ID_DEFAULT].replace(DCF_PREFIX, "")
row[file_id_name] = file[FILE_ID_DEFAULT]
output.append(row.values())
with open(final_manifest_path, 'w', newline='') as f:
writer = csv.writer(f, delimiter='\t')
writer.writerow(manifest_columns)
writer.writerows(output)

return True

0 comments on commit 38b467f

Please sign in to comment.