diff --git a/README.md b/README.md index 51417de..706c3d9 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,23 @@ Tools for working with the Amazon Omics Service. ## Using the Omics Transfer Manager +### Set-up +When using the Omics Transfer Manager, it's important to install any necessary required dependencies and models. This includes installation of the latest Omics service model if it is not latest updated model. + +To install Omics Transfer Manager dependencies, use the pip command. If using Python3, use the pip3 command. + +The Omics Transfer Manager contains dependencies that are reliant on having Python 3.7 or later. + +Omics Transfer Manager uses the poetry library for dependency management and packaging. + +``` +pip install botocore3 +pip install poetry +poetry install +``` + +After running `poetry install`, the Omics Transfer Manager should be ready for usage. + ### Basic Usage The `TransferManager` class makes it easy to download files for an Omics reference or read set. By default the files are saved to the current directory, or you can specify a custom location with the `directory` parameter. @@ -57,7 +74,7 @@ For paired end reads, you can define `fileobjs` as a list of files. read_set_id = manager.upload_read_set( "my-sequence-data/read-set-file.bam", SEQUENCE_STORE_ID, - ReadSetFileType.BAM, + "BAM", "name", "subject-id", "sample-id", @@ -68,7 +85,7 @@ read_set_id = manager.upload_read_set( read_set_id = manager.upload_read_set( ["my-sequence-data/read-set-file_1.fastq.gz", "my-sequence-data/read-set-file_2.fastq.gz"], SEQUENCE_STORE_ID, - ReadSetFileType.FASTQ, + "FASTQ", "name", "subject-id", "sample-id", diff --git a/omics/common/omics_file_types.py b/omics/common/omics_file_types.py index 941f5ac..4296486 100644 --- a/omics/common/omics_file_types.py +++ b/omics/common/omics_file_types.py @@ -73,3 +73,4 @@ class ReadSetFileType(ExtendedEnum): FASTQ = "FASTQ" BAM = "BAM" CRAM = "CRAM" + UBAM = "UBAM" diff --git a/omics/transfer/__init__.py b/omics/transfer/__init__.py index f0b2479..1876364 100644 --- a/omics/transfer/__init__.py +++ b/omics/transfer/__init__.py @@ -3,7 +3,7 @@ from s3transfer.futures import TransferFuture from s3transfer.subscribers import BaseSubscriber -from omics.common.omics_file_types import OmicsFileType, ReadSetFileType +from omics.common.omics_file_types import OmicsFileType class OmicsTransferSubscriber(BaseSubscriber): @@ -58,18 +58,18 @@ class ReadSetUpload: """Details of an Omics read set upload.""" def __init__( - self, - store_id: str, - file_type: ReadSetFileType, - name: str, - subject_id: str, - sample_id: str, - reference_arn: str, - fileobj: Union[IO[Any], str], - generated_from: Optional[str] = None, - description: Optional[str] = None, - tags: Optional[Dict[str, str]] = None, - subscribers: Optional[List[BaseSubscriber]] = None, + self, + store_id: str, + file_type: str, + name: str, + subject_id: str, + sample_id: str, + fileobj: Union[IO[Any], str], + reference_arn: Optional[str] = None, + generated_from: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[Dict[str, str]] = None, + subscribers: Optional[List[BaseSubscriber]] = None, ): """Details of a read set upload. diff --git a/omics/transfer/manager.py b/omics/transfer/manager.py index 5fbcb34..8acd508 100644 --- a/omics/transfer/manager.py +++ b/omics/transfer/manager.py @@ -25,7 +25,6 @@ from omics.common.omics_file_types import ( OmicsFileType, ReadSetFileName, - ReadSetFileType, ReferenceFileName, ) from omics.transfer import ( @@ -52,6 +51,7 @@ "FASTQ": "fastq", "BAM": "bam", "CRAM": "cram", + "UBAM": "bam", } # Map of file type to index file extension. @@ -353,11 +353,11 @@ def upload_read_set( self, fileobjs: Union[IO[Any], str, List[Union[IO[Any], str]]], sequence_store_id: str, - file_type: ReadSetFileType, + file_type: str, name: str, subject_id: str, sample_id: str, - reference_arn: str, + reference_arn: Optional[str] = None, generated_from: Optional[str] = None, description: Optional[str] = None, tags: Optional[Dict[str, str]] = None, @@ -387,9 +387,13 @@ def upload_read_set( if len(fileobjs) > 2: raise AttributeError("at most two files can be uploaded to a read set") - if len(fileobjs) > 1 and file_type is not ReadSetFileType.FASTQ: + if len(fileobjs) > 1 and file_type != "FASTQ": raise AttributeError("paired end read files only supported for FASTQ") + if not reference_arn and (file_type != "FASTQ" or file_type != "UBAM"): + raise AttributeError("Unlinked readset file types must specify a referenceArn") + + transfer_coordinator = self._get_future_coordinator() transfer_futures = [] for fileobj in fileobjs: diff --git a/omics/transfer/read_set_upload.py b/omics/transfer/read_set_upload.py index eeafe67..e0a2ce6 100644 --- a/omics/transfer/read_set_upload.py +++ b/omics/transfer/read_set_upload.py @@ -38,7 +38,7 @@ def _main( """ args = { "sequenceStoreId": create_args.store_id, - "sourceFileType": create_args.file_type.value, + "sourceFileType": create_args.file_type, "subjectId": create_args.subject_id, "sampleId": create_args.sample_id, "generatedFrom": create_args.generated_from, @@ -53,6 +53,9 @@ def _main( ) upload_id = response["uploadId"] + if (args["referenceArn"] == "" and args["sourceFileType" != "FASTQ" or "UBAM"]): + raise AttributeError("Unlinked readset file types must specify a referenceArn") + # Add a cleanup if the multipart upload fails at any point. self._transfer_coordinator.add_failure_cleanup( client.abort_multipart_read_set_upload, diff --git a/tests/transfer/functional/test_manager.py b/tests/transfer/functional/test_manager.py index af0796b..fdf283c 100644 --- a/tests/transfer/functional/test_manager.py +++ b/tests/transfer/functional/test_manager.py @@ -9,7 +9,6 @@ from omics.common.omics_file_types import ( OmicsFileType, ReadSetFileName, - ReadSetFileType, ReferenceFileName, ) from omics.transfer.config import TransferConfig @@ -214,7 +213,7 @@ def test_upload_read_set(self): read_set_id = self.manager.upload_read_set( io.BytesIO(os.urandom(MIB_BYTES * 250)), TEST_CONSTANTS["sequence_store_id"], - ReadSetFileType.CRAM, + "CRAM", "name", "subjectId", "sampleId", @@ -359,7 +358,7 @@ def test_upload_read_set(self): read_set_id = self.manager.upload_read_set( io.BytesIO(os.urandom(MIB_BYTES)), TEST_CONSTANTS["sequence_store_id"], - ReadSetFileType.CRAM, + "CRAM", "name", "subjectId", "sampleId", diff --git a/tests/transfer/unit/test_manager.py b/tests/transfer/unit/test_manager.py index f6ff577..8efe3ef 100644 --- a/tests/transfer/unit/test_manager.py +++ b/tests/transfer/unit/test_manager.py @@ -7,7 +7,6 @@ from omics.common.omics_file_types import ( ReadSetFileName, - ReadSetFileType, ReferenceFileName, ) from omics.transfer.manager import TransferManager, _format_local_filename @@ -129,6 +128,11 @@ def test_format_fastq_index_local_filename(self): filename = _format_local_filename("test-filename", ReadSetFileName.INDEX, "FASTQ") self.assertEqual(filename, "test-filename.index") + #UBAM should not have an .index file but we include this for consistency. + def test_format_ubam_index_local_filename(self): + filename = _format_local_filename("test-filename", ReadSetFileName.INDEX, "UBAM") + self.assertEqual(filename, "test-filename.index") + def test_format_gz_local_filename(self): filename = _format_local_filename("test-filename", ReadSetFileName.SOURCE1, "FASTQ", True) self.assertEqual(filename, "test-filename_1.fastq") @@ -193,12 +197,37 @@ def test_upload_too_many_files_throws_exception(self): def test_upload_paired_with_wrong_file_type_throws_exception(self): with self.assertRaises(AttributeError): self.run_simple_upload( - [io.BytesIO(b"content1"), io.BytesIO(b"content2")], ReadSetFileType.BAM + [io.BytesIO(b"content1"), io.BytesIO(b"content2")], "BAM" ).result() self.stubber.assert_no_pending_responses() + def test_upload_no_reference_with_BAM_file_type_exception(self): + with self.assertRaises(AttributeError): + self.self.transfer_manager.upload_read_set( + io.BytesIO(b"some file content1"), + TEST_CONSTANTS["sequence_store_id"], + "BAM", + "name", + "subjectId", + "sampleId", + ).result() + + self.stubber.assert_no_pending_responses() + + def test_upload_no_reference_with_BAM_file_type_exception(self): + with self.assertRaises(AttributeError): + self.self.transfer_manager.upload_read_set( + io.BytesIO(b"some file content1"), + TEST_CONSTANTS["sequence_store_id"], + "CRAM", + "name", + "subjectId", + "sampleId", + ).result() + + self.stubber.assert_no_pending_responses() def run_simple_upload( - self, files: any, file_type: ReadSetFileType = ReadSetFileType.FASTQ + self, files: any, file_type: str = "FASTQ" ) -> TransferFuture: return self.transfer_manager.upload_read_set( files,