Skip to content

Commit

Permalink
Merge pull request #18 from AAFC-BICoE/External-Resource-Upload
Browse files Browse the repository at this point in the history
External resource upload
  • Loading branch information
brandonandre authored Dec 5, 2024
2 parents 84b1573 + ad31ff3 commit df56ffa
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dinapy/schemas/materialsampleschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def object_deserialization(self, data, **kwargs):
preparationMethod = create_relationship("material-sample","preparationMethod")
parentMaterialSample = create_relationship("material-sample","parentMaterialSample")
preparedBy = create_relationship("material-sample","preparedBy")
attachment = create_relationship("material-sample","attachment")
attachment = create_relationship("material-sample","metadata","attachment")
preparationProtocol = create_relationship("material-sample","preparationProtocol")
projects = create_relationship("material-sample","projects")
projects = create_relationship("material-sample","project","projects")
assemblages = create_relationship("material-sample","assemblages")
organism = create_relationship("material-sample","organism")
storageUnit = create_relationship("material-sample","storageUnit")
Expand Down
21 changes: 21 additions & 0 deletions examples/external-resource-import-demo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# dina-py

## External Resource URL importer

Reads a .txt file to create External Resource URLs to be linked to Material Samples as Attachments

### Usage

Before using the script, make sure to go through the installation instructions in the repository's README

Once done, in the terminal, change current working directory:
```bash
cd examples/external-resource-import-demo
```

Copy the .txt file containing the External URLs to be uploaded into the current folder.

Then modify (as needed) and run the **external_resource_importer.py** script.

An output file called **external_url_uuids.txt** should have been created containing the generated UUIDs for each External Resource.
A similar file called material_sample_uuids.txt should also appear for the created Material Samples.
100 changes: 100 additions & 0 deletions examples/external-resource-import-demo/external_resource_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from dinapy.apis.objectstoreapi.objectstore_api import ObjectStoreApi
from dinapy.entities.Metadata import MetadataAttributesDTOBuilder, MetadataDTOBuilder
from dinapy.schemas.metadata_schema import MetadataSchema
from dinapy.apis.collectionapi.materialsampleapi import MaterialSampleAPI
from dinapy.entities.MaterialSample import MaterialSampleDTOBuilder, MaterialSampleAttributesDTOBuilder
from dinapy.schemas.materialsampleschema import MaterialSampleSchema
from dinapy.entities.Relationships import RelationshipDTO

import traceback
import os
import sys

# Add the root directory of the project to the Python path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, project_root)

def main():

# File to be uploaded
# Change as needed
file = "examples/external-resource-import-demo/dataset.txt"

try:
with open(file, 'r') as f:
for line in f:
# Parse through file
# Take the last split for the filename
file_name = line.split("/")[-1].split("\n")[0]

# Set metadata attributes
# Change as needed
metadata_attributes = MetadataAttributesDTOBuilder(
).set_dcType("DATASET").set_acCaption(file_name).set_acSubtype("SEQUENCE FILE"
).set_dcFormat("application/gzip").set_resourceExternalURL(f"file:/{line}").set_fileExtension(".fastq.gz"
).set_bucket("aafc").set_originalFilename(file_name).build()
file_metadata = MetadataDTOBuilder().set_attributes(metadata_attributes).build()

file_metadata_api = ObjectStoreApi()
file_metadata_schema = MetadataSchema()

# Build Metadata JSON object
metadata_payload = file_metadata_schema.dump(file_metadata)
# Upload to DINA instance
extern_res_response = file_metadata_api.create_entity(metadata_payload, 'metadata')

# Write UUIDs to file
extern_res_uuid = extern_res_response.json()['data']['id']
with open('examples/external-resource-import-demo/external_url_uuids.txt', 'a') as ff:
print(extern_res_uuid, file=ff)
print(f"External Resource URL {extern_res_uuid} Created")

# Build relationship to External Resource
link_to_url = (
RelationshipDTO.Builder()
.add_relationship(
"attachment", # Makes an attachment relationship
"metadata", # The type of object to be attached
extern_res_uuid # Object UUID
)
.build()
)

# Get Material Sample info from file name
# Parsing done for a sample .txt file
# Change as needed
sample_name = file_name.split(".")[0]

# Create Material Samples
material_sample_api = MaterialSampleAPI()
material_sample_schema = MaterialSampleSchema()
# Define Material Sample Attributes based on variables
# Change as needed
material_sample_attributes = MaterialSampleAttributesDTOBuilder(
).createdBy("dina-admin").group("aafc").materialSampleName(sample_name
).materialSampleRemarks(f"File Name: {file_name}").build()
# Build Material Sample
material_sample = MaterialSampleDTOBuilder(
).attributes(material_sample_attributes).relationships(link_to_url).build()

serialized_material_sample = material_sample_schema.dump(material_sample)

# Create DINA object
mat_sample_response = material_sample_api.create_entity(serialized_material_sample)
mat_sample_id = mat_sample_response.json()['data']['id']
print(f"Material Sample {mat_sample_id} Created")
print()

# Write Material Sample UUIDs to file
with open("examples/external-resource-import-demo/material_sample_uuids.txt", 'a') as writer:
print(mat_sample_id, file=writer)

# Check exceptions
except:
with open('examples/external-resource-import-demo/error_log.txt', 'a') as f:
print(f'File Not Uploaded: {file}\n{traceback.format_exc()}', file=f)
if 'Duplicate File Exists' in traceback.format_exc():
return 'Duplicate File, File not Uploaded'

if __name__ == '__main__':
main()
55 changes: 55 additions & 0 deletions examples/external-resource-import-demo/link_samples_to_project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from dinapy.apis.collectionapi.materialsampleapi import MaterialSampleAPI
from dinapy.entities.MaterialSample import MaterialSampleDTOBuilder
from dinapy.schemas.materialsampleschema import MaterialSampleSchema
from dinapy.entities.Relationships import RelationshipDTO

import traceback
import os
import sys

# Add the root directory of the project to the Python path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, project_root)

def main():

# Pre-created Project Object UUID
# Change as needed
project_uuid = "01939841-002e-7597-86f2-368144d714b9"

# Build relationship to Project
link_to_project = (
RelationshipDTO.Builder()
.add_relationship(
"projects", # Makes a projects relationship
"project", # The type of object to be attached
project_uuid # Project UUID
)
.build()
)

# Build Material Sample JSON Data
material_sample_api = MaterialSampleAPI()
material_sample_schema = MaterialSampleSchema()
material_sample = MaterialSampleDTOBuilder(
).relationships(link_to_project).build()
# JSON data to be passed
serialized_material_sample = material_sample_schema.dump(material_sample)

# File that stores Material Sample UUIDs
file = "examples/external-resource-import-demo/material_sample_uuids.txt"
try:
with open(file, 'r') as f:
for line in f:
mat_sample_uuid = line.split("\n")[0]

# Update DINA Material Sample Object
response = material_sample_api.update_entity(mat_sample_uuid, serialized_material_sample)
mat_sample_id = response.json()['data']['id']
print(f"Material Sample {mat_sample_id} Updated")
except:
with open('examples/external-resource-import-demo/error_log.txt', 'a') as f:
print(f'File Not Uploaded: {file}\n{traceback.format_exc()}', file=f)

if __name__ == '__main__':
main()

0 comments on commit df56ffa

Please sign in to comment.