diff --git a/.wordlist.txt b/.wordlist.txt index 80246ef9..6fad9de6 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -31,7 +31,7 @@ FileParam MultiFileParam FileTag txt -AgentModelsAndMethods +AgentSDK SDKSidebar NodeChip FileWatcherResult @@ -304,3 +304,27 @@ upsert FileTags programmatically interpretable +QuickstartBuildAgent +QuickstartAgent +subdirectory +Allotrope +AgentTemplates +SDKs +GanymedeSDKOverview +bq +templating +DataFrameSchema +Pandera +SchemaValidation +nullable +assaysch +ent +src +assaysch +FolderID +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +xxxx +xxxxxxxx +xxxxxxxxxxxx +BenchlingClass +DataValidation diff --git a/docs/app/agents/Agent.mdx b/docs/app/agents/Agent.mdx index 3aca3b97..3bbba8a7 100644 --- a/docs/app/agents/Agent.mdx +++ b/docs/app/agents/Agent.mdx @@ -284,7 +284,7 @@ For example, if your instrument writes out files in a directory like: You would use parameters like `*/configuration.xml` and `*/results.csv` to upload the files and submit them to a flow. -Documentation on the objects used in the user-defined code and additional examples can be found on the [Agent Data Models](../../sdk/markdowns/AgentModelsAndMethods.mdx) page. +Documentation on the objects used in the user-defined code and additional examples can be found on the [Agent Templates](./AgentTemplates.mdx#watch-for-files-locally-and-run-flow) page. #### Example use case @@ -319,7 +319,7 @@ def execute(**kwargs) -> UploadFileParams | None: return UploadFileParams(files=[new_file_param]) ``` -Additional examples can be found on the [Agent Models and Methods page](../../sdk/markdowns/AgentModelsAndMethods#cron-agent) +Additional examples can be found on the [Agent Templates page](./AgentTemplates.mdx#cron-agent) #### Input Parameters @@ -391,6 +391,8 @@ The _output_path_ variable, configured for each Connection [at installation](./A   +Additional examples can be found on the [Agent Templates page](./AgentTemplates.mdx#watch-for-flow-outputs-then-save-locally) + #### Example use case Instructions for lab execution are generated in Ganymede Cloud and downloaded to the instrument PC for execution. diff --git a/docs/app/agents/AgentMonitoring.mdx b/docs/app/agents/AgentMonitoring.mdx index c06a091f..cc986804 100644 --- a/docs/app/agents/AgentMonitoring.mdx +++ b/docs/app/agents/AgentMonitoring.mdx @@ -18,7 +18,7 @@ import { DiffOutlined, EditOutlined, PlusOutlined, SelectOutlined } from "@ant-d ### Viewing Logs -Logs can be found on the Logs tab for each Agent and Connection, and filtered by both log severity and date. For Agents v4.9+, user-defined log messages can be added by referencing the corresponding [`agent_sdk` method for logging](../../sdk/markdowns/AgentModelsAndMethods#logging-methods). +Logs can be found on the Logs tab for each Agent and Connection, and filtered by both log severity and date. For Agents v4.9+, user-defined log messages can be added by referencing the corresponding [`agent_sdk` method for logging](../../sdk/markdowns/AgentSDK#logging-methods). Descriptions of log messages can be found on the [Agent Logs page](./AgentLogs). diff --git a/docs/sdk/markdowns/AgentModelsAndMethods.mdx b/docs/app/agents/AgentTemplates.mdx similarity index 50% rename from docs/sdk/markdowns/AgentModelsAndMethods.mdx rename to docs/app/agents/AgentTemplates.mdx index 16549ad8..1b520570 100644 --- a/docs/sdk/markdowns/AgentModelsAndMethods.mdx +++ b/docs/app/agents/AgentTemplates.mdx @@ -1,17 +1,15 @@ --- -id: AgentModelsAndMethods -title: Agent Models and Methods +id: AgentTemplates +title: Agent Templates displayed_sidebar: SDKSidebar -toc_max_heading_level: 4 +sidebar_label: Agent Templates --- import NodeChip from '@site/src/components/NodeChip.js' -## User-defined code examples by Agent type - This section contains a cookbook of user-defined code examples, which can be helpful for expediting Agent definition. -### Watch for files locally and run Flow +## Watch for files locally and run Flow In the user-defined function for Agents executing Flows, the _get_param_mapping_ function is run whenever a file is added or modified in the directory that the Agent is watching. The _execute_ function is run whenever all files are observed. The _get_param_mapping_ function @@ -68,7 +66,7 @@ def execute(flow_params_fw: FileWatcherResult) -> TriggerFlowParams: pass ``` -#### Delivering files to a Flow with single file input Nodes +### Delivering files to a Flow with single file input Nodes This example shows an Agent that delivers a csv file to the [Bioreactor_File Node](../../nodes/File/CSV_Read), an excel file containing the word 'medium' to the [Medium_Composition Node](../../nodes/File/Excel_Read.md), and an excel file containing the word 'eventlog' to the [Event_Log Node](../../nodes/File/Excel_Read.md). @@ -79,7 +77,7 @@ import re from typing import Callable from urllib import parse -from ganymede_sdk.agent.models import ( +from agent_sdk import ( FileParam, FileWatcherResult, MultiFileParam, @@ -122,7 +120,7 @@ def execute(flow_params_fw: FileWatcherResult, **kwargs) -> TriggerFlowParams: ) ``` -#### Deliver files to a Flow with a multi-input Node +### Deliver files to a Flow with a multi-input Node This example shows an Agent configured to work with an flow with a Node taking multiple inputs, picking up filenames starting with 'Yeast_B1', 'Yeast_B2', 'Yeast_C1', 'Yeast_C2' and delivering the observed files to the Read_FCS_Files node. The Agent also delivers an input parameter of "exp234" to the [Experiment_ID Node](../../nodes/File/Input_File_Multi.md), which is an [Input_Param node](../../nodes/Tag/Input_Param). @@ -131,12 +129,12 @@ import glob import os from typing import Callable -from agent_sdk import info - -from ganymede_sdk.agent.models import ( +from agent_sdk import ( FileWatcherResult, MultiFileParam, TriggerFlowParams, + info, + file_params_list_to_multi, ) @@ -177,7 +175,7 @@ def execute(flow_params_fw: FileWatcherResult, **kwargs) -> TriggerFlowParams: file_param.param = fcs_param file_param_list.append(file_param) - m = MultiFileParam.from_file_param(file_param_list) + m = file_params_list_to_multi(file_param_list) m.param = fcs_param @@ -189,14 +187,13 @@ def execute(flow_params_fw: FileWatcherResult, **kwargs) -> TriggerFlowParams: ) ``` -### Watch for flow outputs then save locally +## Watch for flow outputs then save locally -#### Deliver worklist to a liquid handler PC +### Deliver worklist to a liquid handler PC ```python -from agent_sdk import info, error -from ganymede_sdk.agent.models import FileParam +from agent_sdk import FileParam, info, error from pathlib import Path import os @@ -262,18 +259,17 @@ def execute(new_file: FileParam, **kwargs) -> None: return None ``` -### Cron Agent +## Cron Agent -#### Upload file if modified in the last day +### Upload file if modified in the last day This Agent would require _watch_dir_ to be configured. Recency of last modified date can be configured if desired; if it isn't configured, then the watched file would be uploaded if it was modified in the last hour. ```python -from ganymede_sdk.agent.models import FileParam, UploadFileParams from pathlib import Path import os import time -from agent_sdk import info, error +from agent_sdk import info, error, FileParam, UploadFileParams # Required Function def execute(**kwargs) -> UploadFileParams | None: @@ -327,183 +323,3 @@ Variables can be configured during the installation by [passing additional varia Post-installation, for Agents v4.8+, the parameters for Windows Connections can be [updated in the Connection UI](../../app/agents/AgentMonitoring#monitoring-agent-connections). -## Classes for Agent-triggered flows - -Objects for triggering a Flow from an [Agent](../../app/agents/Agent) can generally be found in `ganymede_sdk.agent.models`. - -### FileWatcherResult Class - -FileWatcherResult is a dictionary of FileParam objects indexed by `node name`.`param name`. - -- _param_ **files**: dict[str, fileParam] - Dictionary of FileParam objects indexed by `node name`.`param name` -- _param_ **tags**: Optional[List[FileTag]] - List of tags to be applied to all files - -### TriggerFlowParams Class - -TriggerFlowParams specifies the inputs for the Flow executed when all files are observed. It includes the following parameters: - -- _param_ **single_file_params**: Optional[dict[str, FileParam]] - Dict of FileParam objects indexed by `node name`.`param name`. These parameters are used for Nodes that accept a single file as input. -- _param_ **multi_file_params**: Optional[dict[str, MultiFileParam]] - Dict of MultiFileParam objects indexed by `node name`.`param name`. These parameters are used for Nodes that accept multiple files as input. -- _param_ **benchling_tag**: Optional[Tag] - Additional parameters to be passed to flow. This parameter is used for inputs to the Input_Benchling node. -- _param_ **additional_params**: Optional[dict[str, str]] - Additional parameters to be passed to flow. This parameter is used for inputs to the [Input_Param node](../../nodes/Tag/Input_Param.md); the key is the name if the Node name for the input parameter, and the value is the string to pass into the Node. - -### FileParam Class - -FileParam specifies files to be uploaded to Ganymede Cloud and their corresponding Flow parameters. These parameters are provided to the _execute_ function once all files are detected. - -- _param_ **filename**: str - Name of the file, e.g. "my_file.txt" -- _param_ **content_type**: str - Content type of the file, e.g. "text/plain". If not specified, the content type of the first file in the files dict will be used. -- _param_ **body**: bytes - File contents in bytes -- _param_ **param**: str - Name of parameter to be used in Flow, e.g. `node_name`.`parameter_field_name` -- _param_ **parent_dir**: str - Path within the Agent watch directory containing the file. For example. if C:/Users/username/watch_dir/ is being watched and C:/Users/username/watch_dir/abc/def/my_file.txt is found, then parent_dir would be "abc/def" -- _param_ **upload_ts**: str - Timestamp string in ISO format of when file was uploaded to the Agent watch directory, e.g. "2021-01-01T00:00:00Z" -- _param_ **upload_path**: Optional[str] - Path in Ganymede storage where file will be uploaded -- _param_ **tags**: Optional[List[FileTag]] - List of tags to be applied to the file -- _param_ **bucket_name**: str - Bucket associated with file -- _param_ **files**: str - Alternative method for specifying file contents, where the key is the filename and the value is the file body. - -### MultiFileParam Class - -MultiFileParam is used for submitting multiple files to a single node. It includes the following parameters: - -- _param_ **files**: str - Alternative method for specifying file contents, where the key is the filename and the value is the file body. -- _param_ **content_type**: str - Content type of file, e.g. "text/plain". If not specified, the content type of the first file in the files dict will be used. -- _param_ **param**: str - Name of parameter to be used in flow, e.g. `node_name`.`parameter_field_name` -- _param_ **parent_dir**: str - Path within Agent watch directory that contains file. For example. if C:/Users/username/watch_dir/ is being watched and C:/Users/username/watch_dir/abc/def/my_file.txt is found, then parent_dir would be "abc/def" -- _param_ **upload_ts**: str - Timestamp string in ISO format of when file was uploaded to Agent watch directory, e.g. "2021-01-01T00:00:00Z" -- _param_ **upload_paths**: Optional[List[str]] - Path in Ganymede storage where file will be uploaded -- _param_ **tags**: Optional[List[FileTag]] - List of tags to be applied to file -- _param_ **bucket_name**: str - Bucket associated with file - -The MultiFileParam object contains a method for initiation from a list of FileParam objects as shown below. The content type of the object is assumed to take on the content type of the first item in the list. - -```python -# assume fp1 and fp2 are FileParam objects -m = MultiFileParam.from_file_param([fp1, fp2]) -``` - -## Utility functions - -Agent utility functions are provided in `ganymede_sdk.agent.utils` for validating data integrity and interacting with file systems. - -### Computing file checksums - -Ganymede provides functions to validate file integrity, accessible via `ganymede_sdk.agent.utils`. These values can be used to verify the integrity of a file uploaded to cloud storage: - -```python -from ganymede_sdk.agent.utils import calculate_md5, calculate_crc32c - -file_path = "path/to/local/file" - -# either md5 or crc32c can be used to validate the integrity of a file -md5 = calculate_md5(file_path) -crc32c = calculate_crc32c(file_path) -``` - -You can also calculate the checksum of a file uploaded to Ganymede Cloud by creating a a tempfile.TemporaryFile object, writing the file contents to it, and then calculating the checksum: - -```python -from ganymede_sdk.agent.utils import calculate_md5, calculate_crc32c -import os -import tempfile - -data = b"Example data to calculate checksum" - -with tempfile.NamedTemporaryFile(delete=False) as tmp_file: - tmp_file.write(data) - tmp_file_name = tmp_file.name - -md5 = calculate_md5(tmp_file_name) -crc32c = calculate_crc32c(tmp_file_name) - -os.remove(tmp_file_name) -``` - -### File system utilities - -`ganymede_sdk.agent.utils` provides a number of convenience functions, which can be helpful to use with cron Agents that involve more complex logic prior to invoking a flow. Some examples of this are when a file is written to multiple times before being processed, or if there is a variable number of files being processed, such that the trigger for invoking a flow requires more than just the presence of a file. - -#### ScanResult Dataclass - -ScanResult stores file paths for files of interest. It includes: - -- _param_ **file_path**: str - Path to file -- _param_ **modified_time**: datetime - Datetime of when file was last modified - -#### Functions - -`list_files_recursive` returns a list of all files in a directory and its subdirectories. - -- _param_ **file_path**: str - Path to directory to list files from - -`matches_pattern` returns True if a file path matches at least one of the specified regex patterns specified and False otherwise. - -- _param_ **filename**: str - Name of file -- _param_ **pattern**: re.Pattern | list[re.Pattern] - Regex pattern or list of regex patterns to match against - -`is_file_ready` returns True if a file has the modified time is within the last **interval_in_seconds** seconds, or if the size of the file has changed in that same timespan. - -- _param_ **file_path**: str - Path to file to watch -- _param_ **threshold_seconds**: int - Number of seconds to wait between checks, by default 0.1 - -`get_most_recent_access_result` returns a ScanResult object referencing the most recently accessed file in a directory. Access time is updated when a file is read from or written to. - -- _param_ **directory**: str - Path to directory to watch - -`filter_by_age` returns a list of files that have not been modified within the last **age_in_minutes** minutes. - -- _param_ **scan_results**: List[ScanResult] - List of ScanResult objects -- _param_ **age_in_minutes**: int - Minimum age in minutes - -`zip_directory` creates a zip file of a directory and its contents. - -- _param_ **directory**: str - Path to directory to zip -- _param_ **zip_file**: str - Path to zip file to create - -`scan_for_finished_files` scans a directory, returning paths to files with a modified date older than the specified number of minutes - -- _param_ **directory**: str - Path to directory to scan -- _param_ **age_in_minutes**: int - Minimum age in minutes for files to be included in the results -- _param_ **pattern**: re.Pattern | list[re.Pattern] - Regex pattern to match files against; only files that match against at least one of the specified patterns will be included in results - -#### Example Use Case - -You can use `scan_for_finished_files` to continuously scan a directory for files, uploading them to Ganymede Cloud for processing when they are older than a specified number of minutes. The Flow could query previously uploaded files using the [list_files](../GanymedeClass.mdx#method-list_files) method to avoid uploading the same file multiple times. - -## Agent SDK - -The Agent SDK offers access to query the Ganymede database and add logging messages to the web app (for Agents v4.8+). - -### Querying Ganymede from Agent Code - -```python -from agent_sdk.query import read_sql_query - -df = read_sql_query('SELECT * FROM instrument_methods') -``` - -### Logging Methods - -Ganymede Agents (v4.9+) support user-defined logging messages in the `agent_sdk`, aligning with [logging level for Agent messages](../../app/agents/AgentLogs#logging-level). Each level corresponds with a separate method in agent_sdk. - -```python -from agent_sdk import internal, debug, info, activity, error - -# log internal -internal('Display internal message') - -# log debug -debug('Display debug message') - -# log info -info('Display info message') - -# log activity -activity('Display activity message') - -# log error -error('Display error message') -``` - -In the UI, these log messages are [viewable and filterable on the corresponding Connections page](../../app/agents/AgentMonitoring#viewing-logs). - diff --git a/docs/app/agents/QuickstartBuildAgent.mdx b/docs/app/agents/QuickstartBuildAgent.mdx index 58be5436..1bf5bef8 100644 --- a/docs/app/agents/QuickstartBuildAgent.mdx +++ b/docs/app/agents/QuickstartBuildAgent.mdx @@ -151,6 +151,6 @@ Now that you have successfully created and installed an Agent, you can explore m - [How to modify the agent code](./Agent#watch-for-files-locally-then-run-flow) to add custom functionality, such as - [Adding tags to files](../files/Tags.mdx) to make captured files easier to find in Ganymede - Parsing metadata from file contents to determine how files are processed - - Delivering [multiple files into a single Node](../../sdk/markdowns/AgentModelsAndMethods#classes-for-agent-triggered-flows) -- Incorporating [Agent utility functions](../../sdk/markdowns/AgentModelsAndMethods) from the Ganymede SDK and Agent SDK + - Delivering [multiple files into a single Node](../../sdk/markdowns/AgentSDK#classes-for-agent-triggered-flows) +- Incorporating [Agent utility functions](../../sdk/markdowns/AgentSDK) from the Ganymede SDK and Agent SDK - Interpreting [Agent log messages](./AgentLogs) diff --git a/docs/app/files/Tags.mdx b/docs/app/files/Tags.mdx index ae5c6921..55472e20 100644 --- a/docs/app/files/Tags.mdx +++ b/docs/app/files/Tags.mdx @@ -43,7 +43,7 @@ The strict mode setting, if disabled, allows admins to delete or modify tags. T ### Tagging Files -Files can be tagged in user-defined code within flows and Agents, though the methods differ slightly. In flows, files are tagged by passing the file path to the `add_file_tag` function. Within Agents, files are tagged by passing the [FileParam](../../sdk/markdowns/AgentModelsAndMethods#classes-for-agent-triggered-flows) object into the `add_file_tag_to_fileparam` function. The FileParam object contains the file that the Agent submits to Ganymede storage (for initiating a flow if the Agent is configured to do so). +Files can be tagged in user-defined code within flows and Agents, though the methods differ slightly. In flows, files are tagged by passing the file path to the `add_file_tag` function. Within Agents, files are tagged by passing the [FileParam](../../sdk/markdowns/AgentSDK#classes-for-agent-triggered-flows) object into the `add_file_tag_to_fileparam` function. The FileParam object contains the file that the Agent submits to Ganymede storage (for initiating a flow if the Agent is configured to do so). The full set of methods available for interacting with tags can be found on the [File Tag](../../sdk/FileTags.mdx) module in the SDK documentation. diff --git a/docs/sdk/Benchling.mdx b/docs/sdk/Benchling.mdx index e2e9ea89..d7c53e9b 100644 --- a/docs/sdk/Benchling.mdx +++ b/docs/sdk/Benchling.mdx @@ -9,7 +9,7 @@ import NodeChip from '@site/src/components/NodeChip.js' ## Overview -The Benchling object can be used within editor notebooks and operators to +The **Benchling** object can be used within editor notebooks and operators to - Upload files to Benchling - Upload assay results to Benchling @@ -58,7 +58,7 @@ Custom entities can be created and updated with - **create_or_update_custom_entity**(self, entity_name: str, folder_id: str, schema_id: str, registry_id: str, author_id=None, custom_entity_fields=None, if_exists="fail") Here is an example of creating a custom entity from the columns of a dataframe. If the entity -already exsits, the method will update it if if\_exists = "update". +already exists, the method will update it if if\_exists = "update". ```python from ganymede_sdk.api.benchling import Benchling @@ -210,7 +210,7 @@ file_ids = b.create_benchling_ids_from_files( ### Uploading Assay Results - create_assay_results_from_dataframe This method can be used to upload assay results to Benchling. You can use create\_ids\_from\_files to -associate rows in your dataframe with uplaoded benchling files. +associate rows in your dataframe with uploaded benchling files. ```python from ganymede_sdk.api.benchling import Benchling diff --git a/docs/sdk/DataValidation.mdx b/docs/sdk/DataValidation.mdx index 84398cf3..067f3cb4 100644 --- a/docs/sdk/DataValidation.mdx +++ b/docs/sdk/DataValidation.mdx @@ -27,11 +27,11 @@ validated_df = SchemaValidation(df) ``` ### Constructor Parameters -**prefix_cols**: `Optional[list[str]]`: List of column prefixes to treat as a group. For example, if the DataFrame has columns ['a_1', 'a_2', 'b_1', 'b_2'], and ['a_', 'b_'] was specified as prefix_cols, then all columns that start with 'a_' or 'b_' would receive the same schema as the first element in the group. +**prefix_cols**: `list[str] | None`: List of column prefixes to treat as a group. For example, if the DataFrame has columns ['a_1', 'a_2', 'b_1', 'b_2'], and ['a_', 'b_'] was specified as prefix_cols, then all columns that start with 'a_' or 'b_' would receive the same schema as the first element in the group. -**nullable_cols**: `Optional[list[str]]`: List of columns that are nullable in the schema. If not specified, all columns are considered nullable. +**nullable_cols**: `list[str] | None`: List of columns that are nullable in the schema. If not specified, all columns are considered nullable. -**required_cols**: `Optional[list[str]]`: List of columns that are required in the schema. If not specified, all columns are considered optional. +**required_cols**: `list[str] | None`: List of columns that are required in the schema. If not specified, all columns are considered optional. ### Attributes **property** df: `pd.DataFrame`: Pandas DataFrame to validate diff --git a/docs/sdk/FileTags.mdx b/docs/sdk/FileTags.mdx index 41fc5e41..a57e83e3 100644 --- a/docs/sdk/FileTags.mdx +++ b/docs/sdk/FileTags.mdx @@ -16,16 +16,16 @@ Adds a file tag to a file. - _param_ **input_file_path**: str - The name of the file to tag. The file path can be obtained from the keys in the dictionary returned by the [retrieve_files](../sdk/GanymedeClass#method-retrieve_files) for Ganymede class or by calling the [get_gcs_uri method](#get_gcs_uri). - _param_ **tag_type_id**: str - The name of the tag type to apply to the file. This corresponds with the tag type created on the Manage Tag Types tab of the Files page in the Ganymede UI. - _param_ **display_value**: str - Value to apply to the tag. -- _param_ **tag_id**: Optional[str] - The tag id that acts as a unique identifier for the tag. -- _param_ **url**: Optional[str] - URL associated with the file Tag, which can be used to link the tag to external sources -- _param_ **bucket**: Optional[str] - The bucket to use for the file uri; either "input" or "output" +- _param_ **tag_id**: str | None - The tag id that acts as a unique identifier for the tag. +- _param_ **url**: str | None - URL associated with the file Tag, which can be used to link the tag to external sources +- _param_ **bucket**: str | None - The bucket to use for the file uri; either "input" or "output" ### get_file_tags Gets all tags for a file. - _param_ **input_file_path**: Path to the file for listing tags. This path is the GCS URI, which can be obtained by passing the file path to the [get_gcs_uri function](#get_gcs_uri). -- _param_ **bucket**: Optional[str] - The bucket to use for the file uri; either "input" or "output" +- _param_ **bucket**: str | None - The bucket to use for the file uri; either "input" or "output" ### delete_file_tag @@ -33,7 +33,7 @@ Deletes all tags of type tag_type_id from the specified file. - _param_ **tag_type_id**: str - Tag type to delete - _param_ **input_file_path**: Path to the file for listing tags. This path is the GCS URI, which can be obtained by passing the file path to the [get_gcs_uri function](#get_gcs_uri). -- _param_ **bucket**: Optional[str] - The bucket to use for the file uri; either "input" or "output" +- _param_ **bucket**: str | None - The bucket to use for the file uri; either "input" or "output" ### delete_specific_file_tag @@ -42,7 +42,7 @@ Deletes a specific tag from a File. - _param_ **tag_type_id**: str - Tag type to delete - _param_ **tag_value**: str - Value of the specific tag to delete - _param_ **file_uri**: Path to the file for listing tags. This path is the GCS URI, which can be obtained by passing the file path to the [get_gcs_uri function](#get_gcs_uri). -- _param_ **bucket**: Optional[str] - The bucket to use for the file uri; either "input" or "output" +- _param_ **bucket**: str | None - The bucket to use for the file uri; either "input" or "output" ### upsert_file_tag @@ -51,9 +51,9 @@ Add or replace a file tag on a file. - _param_ **input_file_path**: str - The name of the file to tag. The file path can be obtained from the keys in the dictionary returned by the [retrieve_files](../sdk/GanymedeClass#method-retrieve_files) for Ganymede class. - _param_ **tag_type_id**: str - The name of the tag type to apply to the file. This corresponds with the tag type created on the Manage Tag Types tab of the Files page in the Ganymede UI. - _param_ **display_value**: str - Value to apply to the tag. -- _param_ **tag_id**: Optional[str] - The tag id that acts as a unique identifier for the tag. -- _param_ **url**: Optional[str] - URL associated with the file Tag, which can be used to link the tag to external sources -- _param_ **bucket**: Optional[str] - The bucket to use for the file uri; either "input" or "output" +- _param_ **tag_id**: str | None - The tag id that acts as a unique identifier for the tag. +- _param_ **url**: str | None - URL associated with the file Tag, which can be used to link the tag to external sources +- _param_ **bucket**: str | None - The bucket to use for the file uri; either "input" or "output" ## File Utilities diff --git a/docs/sdk/GanymedeClass.mdx b/docs/sdk/GanymedeClass.mdx index 661933d4..ad014422 100644 --- a/docs/sdk/GanymedeClass.mdx +++ b/docs/sdk/GanymedeClass.mdx @@ -15,7 +15,7 @@ The **Ganymede** object is a powerful tool used within editor notebooks to acces - **initiator_type**: str - Type of the user who initiated the flow run (AGENT, USER, EVENT, FLOW) - **ganymede_context**: GanymedeContext - Run context information, detailed in the [GanymedeContext section of this page](#class-ganymedecontext) -As an example, you can create a Ganymede object associated with the most recent run to mirror the prior execution in user-defined code: +As an example, you can create a **Ganymede** object associated with the most recent run to mirror the prior execution in user-defined code: ```python import pandas as pd @@ -254,12 +254,12 @@ email_alert.send_email( _send_email_ sends an email notification to the specified recipient(s). The method returns the HTML object of the email sent. -- _param_ **to** : Union[str, Iterable[str]] - The recipient(s) of the email. This can be a single email address (str) or a list of email addresses (Iterable). +- _param_ **to** : str | Iterable[str] - The recipient(s) of the email. This can be a single email address (str) or a list of email addresses (Iterable). - _param_ **subject** : str - The subject of the email. - _param_ **message** : str - The plain text message content. -- _param_ **cc** : Optional[Union[str, Iterable[str]]] - The recipient(s) to be copied on the email (CC), by default None. This can be a single email address or a list of email addresses. -- _param_ **bcc** : Optional[Union[str, Iterable[str]]] - The recipient(s) to be blindly copied on the email (BCC), by default None. This can be a single email address or a list of email addresses. -- _param_ **custom_headers** : Optional[Dict[str, Any]] - A dictionary of custom email headers, by default None. +- _param_ **cc** : str | Iterable[str] | None - The recipient(s) to be copied on the email (CC), by default None. This can be a single email address or a list of email addresses. +- _param_ **bcc** : str | Iterable[str] | None - The recipient(s) to be blindly copied on the email (BCC), by default None. This can be a single email address or a list of email addresses. +- _param_ **custom_headers** : dict[str, Any] | None - A dictionary of custom email headers, by default None. ## Other methods diff --git a/docs/sdk/GanymedeSDKOverview.mdx b/docs/sdk/GanymedeSDKOverview.mdx index d6cbe6c3..83de4513 100644 --- a/docs/sdk/GanymedeSDKOverview.mdx +++ b/docs/sdk/GanymedeSDKOverview.mdx @@ -1,12 +1,17 @@ --- id: GanymedeSDKOverview -title: Ganymede SDK +title: SDK Overview displayed_sidebar: SDKSidebar sidebar_label: SDK Overview --- -The Ganymede SDK offers a suite of methods for interacting with Ganymede directly from editor and analysis notebooks. +The Ganymede platform contains 2 SDKs: + +- [`ganymede_sdk`](#ganymede-sdk): for interacting with Ganymede directly from editor and analysis notebooks +- [`agent_sdk`](#agent-sdk): for Agents v5.0+, interacting with Ganymede from [Agent notebooks](../app/agents/Agent.mdx) + +### Ganymede SDK ```python from ganymede_sdk import Ganymede @@ -18,6 +23,15 @@ from ganymede_sdk import Ganymede - [**AI Integration**](./AI): Learn how to interact with Ganymede using natural language. - [**Allotrope Schemas**](./markdowns/allotrope_schema.md): Access DataFrame schemas for validating DataFrames against Allotrope standards. +### Agent SDK +```python +import agent_sdk +``` + +The [Agent SDK](./markdowns/AgentSDK.mdx) contains methods for displaying log messages, sending data to Ganymede, and interacting with the file system that a Connection is running on. The separation between Agent SDK and Ganymede SDK allows for a more lightweight Agent. + +### Tips and Tricks + :::tip You can introspect functions directly within a notebook by using `?` or `??` before the function name in a notebook cell. For example: diff --git a/docs/sdk/markdowns/AgentSDK.mdx b/docs/sdk/markdowns/AgentSDK.mdx new file mode 100644 index 00000000..0d2a4d5f --- /dev/null +++ b/docs/sdk/markdowns/AgentSDK.mdx @@ -0,0 +1,199 @@ +--- +id: AgentSDK +title: Agent SDK +displayed_sidebar: SDKSidebar +sidebar_label: Agent SDK +toc_max_heading_level: 4 +--- + +import NodeChip from '@site/src/components/NodeChip.js' + +## Classes for Agent-triggered flows + +Objects for triggering a Flow from an [Agent](../../app/agents/Agent) can be found in `agent_sdk` for Agents v5.0+. + +### FileWatcherResult Class + +FileWatcherResult is a dictionary of FileParam objects indexed by `node name`.`param name`. + +- _param_ **files**: dict[str, fileParam] - Dictionary of FileParam objects indexed by `node name`.`param name` +- _param_ **tags**: list[FileTag] | None - List of tags to be applied to all files + +### TriggerFlowParams Class + +TriggerFlowParams specifies the inputs for the Flow executed when all files are observed. It includes the following parameters: + +- _param_ **single_file_params**: dict[str, FileParam] | None - Dict of FileParam objects indexed by `node name`.`param name`. These parameters are used for Nodes that accept a single file as input. +- _param_ **multi_file_params**: dict[str, MultiFileParam] | None - Dict of MultiFileParam objects indexed by `node name`.`param name`. These parameters are used for Nodes that accept multiple files as input. +- _param_ **benchling_tag**: Tag | None - Additional parameters to be passed to flow. This parameter is used for inputs to the Input_Benchling node. +- _param_ **additional_params**: dict[str, str] | None - Additional parameters to be passed to flow. This parameter is used for inputs to the [Input_Param node](../../nodes/Tag/Input_Param.md); the key is the name if the Node name for the input parameter, and the value is the string to pass into the Node. + +### FileParam Class + +FileParam specifies files to be uploaded to Ganymede Cloud and their corresponding Flow parameters. These parameters are provided to the _execute_ function once all files are detected. + +- _param_ **filename**: str - Name of the file, e.g. "my_file.txt" +- _param_ **content_type**: str - Content type of the file, e.g. "text/plain". If not specified, the content type of the first file in the files dict will be used. +- _param_ **body**: bytes - File contents in bytes +- _param_ **param**: str - Name of parameter to be used in Flow, e.g. `node_name`.`parameter_field_name` +- _param_ **parent_dir**: str - Path within the Agent watch directory containing the file. For example. if C:/Users/username/watch_dir/ is being watched and C:/Users/username/watch_dir/abc/def/my_file.txt is found, then parent_dir would be "abc/def" +- _param_ **upload_ts**: str - Timestamp string in ISO format of when file was uploaded to the Agent watch directory, e.g. "2021-01-01T00:00:00Z" +- _param_ **upload_path**: str | None - Path in Ganymede storage where file will be uploaded +- _param_ **tags**: list[FileTag] | None - List of tags to be applied to the file +- _param_ **bucket_name**: str - Bucket associated with file +- _param_ **files**: str - Alternative method for specifying file contents, where the key is the filename and the value is the file body. + +### MultiFileParam Class + +MultiFileParam is used for submitting multiple files to a single node. It includes the following parameters: + +- _param_ **files**: str - Alternative method for specifying file contents, where the key is the filename and the value is the file body. +- _param_ **content_type**: str - Content type of file, e.g. "text/plain". If not specified, the content type of the first file in the files dict will be used. +- _param_ **param**: str - Name of parameter to be used in flow, e.g. `node_name`.`parameter_field_name` +- _param_ **parent_dir**: str - Path within Agent watch directory that contains file. For example. if C:/Users/username/watch_dir/ is being watched and C:/Users/username/watch_dir/abc/def/my_file.txt is found, then parent_dir would be "abc/def" +- _param_ **upload_ts**: str - Timestamp string in ISO format of when file was uploaded to Agent watch directory, e.g. "2021-01-01T00:00:00Z" +- _param_ **upload_paths**: list[str] | None - Path in Ganymede storage where file will be uploaded +- _param_ **tags**: list[FileTag] | None - List of tags to be applied to file +- _param_ **bucket_name**: str - Bucket associated with file + +The MultiFileParam object contains a method for initiation from a list of FileParam objects as shown below. The content type of the object is assumed to take on the content type of the first item in the list. + +```python +# assume fp1 and fp2 are FileParam objects +m = agent_sdk.file_params_list_to_multi([fp1, fp2]) +``` + +## Utility functions + +Agent utility functions are provided in `agent_sdk` for validating data integrity and interacting with file systems. + +:::note + +The `agent_sdk` is only available for Agents v5.0+. Prior to v5.0, these functions were included in `ganymede_sdk.agent`. + +:::note + +### Computing file checksums + +Ganymede provides functions to validate file integrity; these values can be used to verify the integrity of a file uploaded to cloud storage: + +```python +# Before Agent v5.0 +# from ganymede_sdk.agent.utils import calculate_md5, calculate_crc32c + +from agent_sdk import calculate_md5, calculate_crc32c + +file_path = "path/to/local/file" + +# either md5 or crc32c can be used to validate the integrity of a file +md5 = calculate_md5(file_path) +crc32c = calculate_crc32c(file_path) +``` + +You can also calculate the checksum of a file uploaded to Ganymede Cloud by creating a a tempfile.TemporaryFile object, writing the file contents to it, and then calculating the checksum: + +```python +from agent_sdk import calculate_md5, calculate_crc32c +import os +import tempfile + +data = b"Example data to calculate checksum" + +with tempfile.NamedTemporaryFile(delete=False) as tmp_file: + tmp_file.write(data) + tmp_file_name = tmp_file.name + +md5 = calculate_md5(tmp_file_name) +crc32c = calculate_crc32c(tmp_file_name) + +os.remove(tmp_file_name) +``` + +### File system utilities + +`agent_sdk` provides a number of convenience functions, which can be helpful to use with cron Agents that involve more complex logic prior to invoking a flow. Some examples of this are when a file is written to multiple times before being processed, or if there is a variable number of files being processed, such that the trigger for invoking a flow requires more than just the presence of a file. + +#### ScanResult Dataclass + +ScanResult stores file paths for files of interest. It includes: + +- _param_ **file_path**: str - Path to file +- _param_ **modified_time**: datetime - Datetime of when file was last modified + +#### Functions + +`list_files_recursive` returns a list of all files in a directory and its subdirectories. + +- _param_ **file_path**: str - Path to directory to list files from + +`matches_pattern` returns True if a file path matches at least one of the specified regex patterns specified and False otherwise. + +- _param_ **filename**: str - Name of file +- _param_ **pattern**: str | re.Pattern - Regex pattern or list of regex patterns to match against + +`is_file_ready` returns True if a file has the modified time is within the last **interval_in_seconds** seconds, or if the size of the file has changed in that same timespan. + +- _param_ **file_path**: str - Path to file to watch +- _param_ **threshold_seconds**: int - Number of seconds to wait between checks, by default 0.1 + +`get_most_recent_access_result` returns a ScanResult object referencing the most recently accessed file in a directory. Access time is updated when a file is read from or written to. + +- _param_ **directory**: str - Path to directory to watch + +`filter_by_age` returns a list of files that have not been modified within the last **age_in_minutes** minutes. + +- _param_ **scan_results**: list[ScanResult] - List of ScanResult objects +- _param_ **age_in_minutes**: int - Minimum age in minutes + +`zip_directory` creates a zip file of a directory and its contents. + +- _param_ **directory**: str - Path to directory to zip +- _param_ **zip_file**: str - Path to zip file to create + +`scan_for_finished_files` scans a directory, returning paths to files with a modified date older than the specified number of minutes + +- _param_ **directory**: str - Path to directory to scan +- _param_ **age_in_minutes**: int - Minimum age in minutes for files to be included in the results +- _param_ **pattern**: re.Pattern | list[re.Pattern] - Regex pattern to match files against; only files that match against at least one of the specified patterns will be included in results + +#### Example Use Case + +You can use `scan_for_finished_files` to continuously scan a directory for files, uploading them to Ganymede Cloud for processing when they are older than a specified number of minutes. The Flow could query previously uploaded files using the [list_files](../GanymedeClass.mdx#method-list_files) method to avoid uploading the same file multiple times. + +## Agent SDK + +The Agent SDK offers access to query the Ganymede database and add logging messages to the web app (for Agents v4.8+). + +### Querying Ganymede from Agent Code + +```python +from agent_sdk.query import read_sql_query + +df = read_sql_query('SELECT * FROM instrument_methods') +``` + +### Logging Methods + +Ganymede Agents (v4.9+) support user-defined logging messages in the `agent_sdk`, aligning with [logging level for Agent messages](../../app/agents/AgentLogs#logging-level). Each level corresponds with a separate method in agent_sdk. + +```python +from agent_sdk import internal, debug, info, activity, error + +# log internal +internal('Display internal message') + +# log debug +debug('Display debug message') + +# log info +info('Display info message') + +# log activity +activity('Display activity message') + +# log error +error('Display error message') +``` + +In the UI, these log messages are [viewable and filterable on the corresponding Connections page](../../app/agents/AgentMonitoring#viewing-logs). + diff --git a/sidebars.js b/sidebars.js index 9b6c30f1..f1a233ed 100644 --- a/sidebars.js +++ b/sidebars.js @@ -135,6 +135,11 @@ module.exports = { id: 'app/agents/AgentMonitoring', label: 'Monitoring Agents' }, + { + type: 'doc', + id: 'app/agents/AgentTemplates', + label: 'Example Agent Templates' + }, { type: 'doc', id: 'app/agents/DebuggingAgents', @@ -316,11 +321,11 @@ module.exports = { { type: 'category', label: 'SDK', - collapsed: true, + collapsed: false, items: [ { type: 'doc', - id: 'sdk/markdowns/AgentModelsAndMethods', + id: 'sdk/markdowns/AgentSDK', }, { type: 'doc',