Skip to content

Commit

Permalink
Added doc strings to interactive session class and other respective m…
Browse files Browse the repository at this point in the history
…ethods.

Removed README.md to align with main branch.
  • Loading branch information
parth-kulkarni1 committed Aug 22, 2024
1 parent 0bf9926 commit 78baedf
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 169 deletions.
167 changes: 0 additions & 167 deletions src/README.md

This file was deleted.

131 changes: 129 additions & 2 deletions src/provenaclient/modules/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
HISTORY:
Date By Comments
---------- --- ---------------------------------------------------------
22-08-2024 | Parth Kulkarni | Completed Interactive Dataset class + Doc Strings.
15-08-2024 | Parth Kulkarni | Added a prototype/draft of the Interactive Dataset Class.
'''

from distutils.version import Version
from provenaclient.auth.manager import AuthManager
from provenaclient.utils.config import Config
from provenaclient.clients import DatastoreClient, SearchClient
Expand All @@ -23,7 +23,7 @@
from provenaclient.models import HealthCheckResponse, LoadedSearchResponse, LoadedSearchItem, UnauthorisedSearchItem, FailedSearchItem, RevertMetadata
from provenaclient.utils.exceptions import *
from provenaclient.modules.module_helpers import *
from ProvenaInterfaces.RegistryAPI import NoFilterSubtypeListRequest, VersionRequest, VersionResponse, SortOptions, SortType, DatasetListResponse
from ProvenaInterfaces.RegistryAPI import NoFilterSubtypeListRequest, VersionRequest, VersionResponse, SortOptions, DatasetListResponse
from provenaclient.modules.submodules import IOSubModule

from typing import AsyncGenerator, List
Expand Down Expand Up @@ -124,37 +124,135 @@ async def action_approval_request(self, action_approval_request: ActionApprovalR

class InteractiveDataset(ModuleService):

dataset_id: str
auth: AuthManager
datastore_client: DatastoreClient
io: IOSubModule

def __init__(self, dataset_id: str, auth: AuthManager, datastore_client: DatastoreClient, io: IOSubModule) -> None:
"""Initialise an interactive dataset session.
Parameters
----------
dataset_id : str
The unique identifier of the dataset to interact with.
datastore_client : DatastoreClient
The client responsible for interacting with the datastore API.
io : IOSubModule
The input/output submodule for handling dataset IO operations.
auth : AuthManager
An abstract interface containing the user's requested auth flow method.
"""

self.dataset_id = dataset_id
self._auth = auth
self._datastore_client = datastore_client
self.io = io

async def fetch_dataset(self) -> RegistryFetchResponse :
"""Fetches current dataset from the datastore.
Returns
-------
RegistryFetchResponse
A interactive python datatype of type RegistryFetchResponse
containing the dataset details.
"""

return await self._datastore_client.fetch_dataset(id=self.dataset_id)

async def download_all_files(self, destination_directory: str) -> None:
"""
Downloads all files to the destination path for your current dataset.
- Fetches info
- Fetches creds
- Uses s3 cloud path lib to download all files to specified location
Parameters:
---------
destination_directory (str):
The destination path to save files to - use a directory
"""

return await self.io.download_all_files(destination_directory=destination_directory, dataset_id=self.dataset_id)

async def upload_all_files(self, source_directory: str) -> None:
"""
Uploads all files in the source path to the current dataset's storage location.
- Fetches info
- Fetches creds
- Uses s3 cloud path lib to upload all files to specified location
Parameters
----------
source_directory (str):
The source path to upload files from - use a directory
"""

return await self.io.upload_all_files(source_directory=source_directory, dataset_id=self.dataset_id)

async def version(self, reason: str) -> VersionResponse:
"""Versioning operation which creates a new version from the current dataset.
Parameters
----------
reason : str
The reason for versioning this dataset.
Returns
-------
VersionResponse
Response of the versioning of the dataset, containing new version ID and
job session ID.
"""

version_request: VersionRequest = VersionRequest(
id = self.dataset_id,
reason = reason
)

return await self._datastore_client.version_dataset(version_dataset_payload=version_request)

async def revert_dataset_metadata(self, history_id: int, reason: str) -> StatusResponse:
"""Reverts the metadata for the current dataset to a previous identified historical version.
Parameters
----------
history_id : int
The identifier of the historical version to revert to.
reason : str
The reason for reverting the dataset's metadata.
Returns
-------
StatusResponse
Response indicating whether your dataset metadata revert request was successful.
"""

revert_request: RevertMetadata = RevertMetadata(
id=self.dataset_id,
history_id=history_id,
reason=reason
)

return await self._datastore_client.revert_metadata(metadata_payload=revert_request)

async def generate_read_access_credentials(self, console_session_required: bool) -> CredentialResponse:
"""Given an S3 location, will attempt to generate programmatic access keys for
the storage bucket at this particular subdirectory.
Parameters
----------
console_session_required : bool
Specifies whether a console session URL is required.
Returns
-------
CredentialResponse
The AWS credentials creating read level access into the subset of the bucket requested in the S3 location object.
"""

credentials_request = CredentialsRequest(
dataset_id=self.dataset_id,
Expand All @@ -164,6 +262,19 @@ async def generate_read_access_credentials(self, console_session_required: bool)
return await self._datastore_client.generate_read_access_credentials(read_access_credentials=credentials_request)

async def generate_write_access_credentials(self, console_session_required: bool) -> CredentialResponse:
"""Given an S3 location, will attempt to generate programmatic access keys for
the storage bucket at this particular subdirectory.
Parameters
----------
console_session_required : bool
Specifies whether a console session URL is required.
Returns
-------
CredentialResponse
The AWS credentials creating write level access into the subset of the bucket requested in the S3 location object.
"""

credentials_request = CredentialsRequest(
dataset_id=self.dataset_id,
Expand Down Expand Up @@ -550,6 +661,22 @@ async def search_datasets(self, query: str, limit: int = DEFAULT_SEARCH_LIMIT) -
)

async def interactive_dataset(self, dataset_id: str) -> InteractiveDataset:
"""Creates an interactive "session" with a dataset that allows you
to perform further operations without re-supplying dataset id and
creating objects required for other methods.
Parameters
----------
dataset_id : str
The unique identifier of the dataset to be retrieved.
For example: "10378.1/1451860"
Returns
-------
InteractiveDataset
An instance that allows you to perform various operations on the provided dataset.
"""

return InteractiveDataset(
dataset_id=dataset_id,
datastore_client=self._datastore_client,
Expand Down

0 comments on commit 78baedf

Please sign in to comment.