diff --git a/src/library/lakify.py b/src/library/lakify.py index 95c9bee..65651c1 100644 --- a/src/library/lakify.py +++ b/src/library/lakify.py @@ -91,14 +91,18 @@ def process_hash_list(document_datasets): if identifiers: id_hash = utils.get_hash_for_identifier( clean_identifier(identifiers[0])) + + # XML activity_xml = etree.tostring(activity, encoding='utf-8') - activity_json = recursive_json_nest(activity, {}) act_blob_client = blob_service_client.get_blob_client( - container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}.xml'.format(id_hash)) + container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}/{}.xml'.format(doc_id, id_hash)) act_blob_client.upload_blob(activity_xml, overwrite=True) act_blob_client.set_blob_tags({"dataset_hash": file_hash}) + + # JSON + activity_json = recursive_json_nest(activity, {}) act_blob_json_client = blob_service_client.get_blob_client( - container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}.json'.format(id_hash)) + container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}/{}.json'.format(doc_id, id_hash)) act_blob_json_client.upload_blob( json.dumps(activity_json, ensure_ascii=False).replace( '{http://www.w3.org/XML/1998/namespace}', 'xml:').encode('utf-8'), diff --git a/src/library/solrize.py b/src/library/solrize.py index 35ec8eb..caa2d42 100644 --- a/src/library/solrize.py +++ b/src/library/solrize.py @@ -143,7 +143,7 @@ def process_hash_list(document_datasets): for fa in flattened_activities[0]: hashed_identifier = utils.get_hash_for_identifier( fa['iati_identifier']) - blob_name = '{}.xml'.format(hashed_identifier) + blob_name = '{}/{}.xml'.format(file_id, hashed_identifier) try: blob_client = blob_service_client.get_blob_client( @@ -165,7 +165,7 @@ def process_hash_list(document_datasets): raise SolrizeSourceError('Could not identify charset for blob: ' + blob_name + ', file hash: ' + file_hash + ', iati-identifier: ' + fa['iati_identifier']) - json_blob_name = '{}.json'.format(hashed_identifier) + json_blob_name = '{}/{}.json'.format(file_id, hashed_identifier) try: json_blob_client = blob_service_client.get_blob_client(