Skip to content

Commit

Permalink
Bug Fix: Correct JSON attached to correct Activity when Activity is i…
Browse files Browse the repository at this point in the history
…n more than 1 document

#300

Also resorted and regrouped a tiny bit of code so it's easier to read
  • Loading branch information
jarofgreen committed Dec 6, 2023
1 parent 69768ee commit 6d30c36
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
10 changes: 7 additions & 3 deletions src/library/lakify.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,18 @@ def process_hash_list(document_datasets):
if identifiers:
id_hash = utils.get_hash_for_identifier(
clean_identifier(identifiers[0]))

# XML
activity_xml = etree.tostring(activity, encoding='utf-8')
activity_json = recursive_json_nest(activity, {})
act_blob_client = blob_service_client.get_blob_client(
container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}.xml'.format(id_hash))
container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}/{}.xml'.format(doc_id, id_hash))
act_blob_client.upload_blob(activity_xml, overwrite=True)
act_blob_client.set_blob_tags({"dataset_hash": file_hash})

# JSON
activity_json = recursive_json_nest(activity, {})
act_blob_json_client = blob_service_client.get_blob_client(
container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}.json'.format(id_hash))
container=config['ACTIVITIES_LAKE_CONTAINER_NAME'], blob='{}/{}.json'.format(doc_id, id_hash))
act_blob_json_client.upload_blob(
json.dumps(activity_json, ensure_ascii=False).replace(
'{http://www.w3.org/XML/1998/namespace}', 'xml:').encode('utf-8'),
Expand Down
4 changes: 2 additions & 2 deletions src/library/solrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def process_hash_list(document_datasets):
for fa in flattened_activities[0]:
hashed_identifier = utils.get_hash_for_identifier(
fa['iati_identifier'])
blob_name = '{}.xml'.format(hashed_identifier)
blob_name = '{}/{}.xml'.format(file_id, hashed_identifier)

try:
blob_client = blob_service_client.get_blob_client(
Expand All @@ -165,7 +165,7 @@ def process_hash_list(document_datasets):
raise SolrizeSourceError('Could not identify charset for blob: ' + blob_name +
', file hash: ' + file_hash + ', iati-identifier: ' + fa['iati_identifier'])

json_blob_name = '{}.json'.format(hashed_identifier)
json_blob_name = '{}/{}.json'.format(file_id, hashed_identifier)

try:
json_blob_client = blob_service_client.get_blob_client(
Expand Down

0 comments on commit 6d30c36

Please sign in to comment.