-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from AAFC-BICoE/External-Resource-Upload
External resource upload
- Loading branch information
Showing
4 changed files
with
178 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# dina-py | ||
|
||
## External Resource URL importer | ||
|
||
Reads a .txt file to create External Resource URLs to be linked to Material Samples as Attachments | ||
|
||
### Usage | ||
|
||
Before using the script, make sure to go through the installation instructions in the repository's README | ||
|
||
Once done, in the terminal, change current working directory: | ||
```bash | ||
cd examples/external-resource-import-demo | ||
``` | ||
|
||
Copy the .txt file containing the External URLs to be uploaded into the current folder. | ||
|
||
Then modify (as needed) and run the **external_resource_importer.py** script. | ||
|
||
An output file called **external_url_uuids.txt** should have been created containing the generated UUIDs for each External Resource. | ||
A similar file called material_sample_uuids.txt should also appear for the created Material Samples. |
100 changes: 100 additions & 0 deletions
100
examples/external-resource-import-demo/external_resource_importer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
from dinapy.apis.objectstoreapi.objectstore_api import ObjectStoreApi | ||
from dinapy.entities.Metadata import MetadataAttributesDTOBuilder, MetadataDTOBuilder | ||
from dinapy.schemas.metadata_schema import MetadataSchema | ||
from dinapy.apis.collectionapi.materialsampleapi import MaterialSampleAPI | ||
from dinapy.entities.MaterialSample import MaterialSampleDTOBuilder, MaterialSampleAttributesDTOBuilder | ||
from dinapy.schemas.materialsampleschema import MaterialSampleSchema | ||
from dinapy.entities.Relationships import RelationshipDTO | ||
|
||
import traceback | ||
import os | ||
import sys | ||
|
||
# Add the root directory of the project to the Python path | ||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
sys.path.insert(0, project_root) | ||
|
||
def main(): | ||
|
||
# File to be uploaded | ||
# Change as needed | ||
file = "examples/external-resource-import-demo/dataset.txt" | ||
|
||
try: | ||
with open(file, 'r') as f: | ||
for line in f: | ||
# Parse through file | ||
# Take the last split for the filename | ||
file_name = line.split("/")[-1].split("\n")[0] | ||
|
||
# Set metadata attributes | ||
# Change as needed | ||
metadata_attributes = MetadataAttributesDTOBuilder( | ||
).set_dcType("DATASET").set_acCaption(file_name).set_acSubtype("SEQUENCE FILE" | ||
).set_dcFormat("application/gzip").set_resourceExternalURL(f"file:/{line}").set_fileExtension(".fastq.gz" | ||
).set_bucket("aafc").set_originalFilename(file_name).build() | ||
file_metadata = MetadataDTOBuilder().set_attributes(metadata_attributes).build() | ||
|
||
file_metadata_api = ObjectStoreApi() | ||
file_metadata_schema = MetadataSchema() | ||
|
||
# Build Metadata JSON object | ||
metadata_payload = file_metadata_schema.dump(file_metadata) | ||
# Upload to DINA instance | ||
extern_res_response = file_metadata_api.create_entity(metadata_payload, 'metadata') | ||
|
||
# Write UUIDs to file | ||
extern_res_uuid = extern_res_response.json()['data']['id'] | ||
with open('examples/external-resource-import-demo/external_url_uuids.txt', 'a') as ff: | ||
print(extern_res_uuid, file=ff) | ||
print(f"External Resource URL {extern_res_uuid} Created") | ||
|
||
# Build relationship to External Resource | ||
link_to_url = ( | ||
RelationshipDTO.Builder() | ||
.add_relationship( | ||
"attachment", # Makes an attachment relationship | ||
"metadata", # The type of object to be attached | ||
extern_res_uuid # Object UUID | ||
) | ||
.build() | ||
) | ||
|
||
# Get Material Sample info from file name | ||
# Parsing done for a sample .txt file | ||
# Change as needed | ||
sample_name = file_name.split(".")[0] | ||
|
||
# Create Material Samples | ||
material_sample_api = MaterialSampleAPI() | ||
material_sample_schema = MaterialSampleSchema() | ||
# Define Material Sample Attributes based on variables | ||
# Change as needed | ||
material_sample_attributes = MaterialSampleAttributesDTOBuilder( | ||
).createdBy("dina-admin").group("aafc").materialSampleName(sample_name | ||
).materialSampleRemarks(f"File Name: {file_name}").build() | ||
# Build Material Sample | ||
material_sample = MaterialSampleDTOBuilder( | ||
).attributes(material_sample_attributes).relationships(link_to_url).build() | ||
|
||
serialized_material_sample = material_sample_schema.dump(material_sample) | ||
|
||
# Create DINA object | ||
mat_sample_response = material_sample_api.create_entity(serialized_material_sample) | ||
mat_sample_id = mat_sample_response.json()['data']['id'] | ||
print(f"Material Sample {mat_sample_id} Created") | ||
print() | ||
|
||
# Write Material Sample UUIDs to file | ||
with open("examples/external-resource-import-demo/material_sample_uuids.txt", 'a') as writer: | ||
print(mat_sample_id, file=writer) | ||
|
||
# Check exceptions | ||
except: | ||
with open('examples/external-resource-import-demo/error_log.txt', 'a') as f: | ||
print(f'File Not Uploaded: {file}\n{traceback.format_exc()}', file=f) | ||
if 'Duplicate File Exists' in traceback.format_exc(): | ||
return 'Duplicate File, File not Uploaded' | ||
|
||
if __name__ == '__main__': | ||
main() |
55 changes: 55 additions & 0 deletions
55
examples/external-resource-import-demo/link_samples_to_project.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from dinapy.apis.collectionapi.materialsampleapi import MaterialSampleAPI | ||
from dinapy.entities.MaterialSample import MaterialSampleDTOBuilder | ||
from dinapy.schemas.materialsampleschema import MaterialSampleSchema | ||
from dinapy.entities.Relationships import RelationshipDTO | ||
|
||
import traceback | ||
import os | ||
import sys | ||
|
||
# Add the root directory of the project to the Python path | ||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
sys.path.insert(0, project_root) | ||
|
||
def main(): | ||
|
||
# Pre-created Project Object UUID | ||
# Change as needed | ||
project_uuid = "01939841-002e-7597-86f2-368144d714b9" | ||
|
||
# Build relationship to Project | ||
link_to_project = ( | ||
RelationshipDTO.Builder() | ||
.add_relationship( | ||
"projects", # Makes a projects relationship | ||
"project", # The type of object to be attached | ||
project_uuid # Project UUID | ||
) | ||
.build() | ||
) | ||
|
||
# Build Material Sample JSON Data | ||
material_sample_api = MaterialSampleAPI() | ||
material_sample_schema = MaterialSampleSchema() | ||
material_sample = MaterialSampleDTOBuilder( | ||
).relationships(link_to_project).build() | ||
# JSON data to be passed | ||
serialized_material_sample = material_sample_schema.dump(material_sample) | ||
|
||
# File that stores Material Sample UUIDs | ||
file = "examples/external-resource-import-demo/material_sample_uuids.txt" | ||
try: | ||
with open(file, 'r') as f: | ||
for line in f: | ||
mat_sample_uuid = line.split("\n")[0] | ||
|
||
# Update DINA Material Sample Object | ||
response = material_sample_api.update_entity(mat_sample_uuid, serialized_material_sample) | ||
mat_sample_id = response.json()['data']['id'] | ||
print(f"Material Sample {mat_sample_id} Updated") | ||
except: | ||
with open('examples/external-resource-import-demo/error_log.txt', 'a') as f: | ||
print(f'File Not Uploaded: {file}\n{traceback.format_exc()}', file=f) | ||
|
||
if __name__ == '__main__': | ||
main() |