From 6ea63d7e945ff72fbc1f6a955280c0a4c7eac69b Mon Sep 17 00:00:00 2001
From: smvgarcia <111767892+smvgarcia@users.noreply.github.com>
Date: Thu, 12 Dec 2024 08:00:08 -0600
Subject: [PATCH 1/4] continuing update of Helm docs (#291)
---
.secrets.baseline | 88 +-----
.../{ => helm-config}/helm-config-auth.md | 108 +++----
.../helm-config-data-svcs.md | 176 ++++++------
.../{ => helm-config}/helm-config-frontend.md | 0
.../{ => helm-config}/helm-config-revproxy.md | 0
.../helm-config-workspaces.md | 17 ++
.../operator-guide/helm/helm-config/index.md | 269 +++++++++++++++++
.../helm-deploy-production-example.md} | 37 ++-
.../operator-guide/helm/index.md | 67 ++++-
.../operator-guide/prerequisites.md | 271 ------------------
gen3/mkdocs.yml | 24 +-
11 files changed, 531 insertions(+), 526 deletions(-)
rename gen3/docs/gen3-resources/operator-guide/helm/{ => helm-config}/helm-config-auth.md (87%)
rename gen3/docs/gen3-resources/operator-guide/helm/{ => helm-config}/helm-config-data-svcs.md (99%)
rename gen3/docs/gen3-resources/operator-guide/helm/{ => helm-config}/helm-config-frontend.md (100%)
rename gen3/docs/gen3-resources/operator-guide/helm/{ => helm-config}/helm-config-revproxy.md (100%)
rename gen3/docs/gen3-resources/operator-guide/helm/{ => helm-config}/helm-config-workspaces.md (84%)
create mode 100644 gen3/docs/gen3-resources/operator-guide/helm/helm-config/index.md
rename gen3/docs/gen3-resources/operator-guide/{tutorial_production-setup.md => helm/helm-deploy-production-example.md} (90%)
diff --git a/.secrets.baseline b/.secrets.baseline
index ff8d14ac..cb09e10e 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -314,13 +314,13 @@
"line_number": 96
}
],
- "gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md": [
+ "gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-auth.md": [
{
"type": "Secret Keyword",
- "filename": "gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md",
+ "filename": "gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-auth.md",
"hashed_secret": "64ab0c1d3edc1c8c166351207b840ac7b2a90523",
"is_verified": false,
- "line_number": 82
+ "line_number": 38
}
],
"gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-example.md": [
@@ -509,87 +509,7 @@
"is_verified": false,
"line_number": 162
}
- ],
- "gen3/docs/resources/user-guide/analyze-data.md": [
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/analyze-data.md",
- "hashed_secret": "98fd178574fc77b708149e559bf75052cc3c81a6",
- "is_verified": false,
- "line_number": 228
- }
- ],
- "gen3/docs/resources/user-guide/img/analyze-data.md": [
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/img/analyze-data.md",
- "hashed_secret": "98fd178574fc77b708149e559bf75052cc3c81a6",
- "is_verified": false,
- "line_number": 255
- }
- ],
- "gen3/docs/resources/user-guide/img/gen3-client.md": [
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/img/gen3-client.md",
- "hashed_secret": "98cf839d65cf07eebc6c05bb7195e0266afc01db",
- "is_verified": false,
- "line_number": 398
- },
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/img/gen3-client.md",
- "hashed_secret": "e70fbe21d0e764d51e4d70021616e46f13661efe",
- "is_verified": false,
- "line_number": 399
- },
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/img/gen3-client.md",
- "hashed_secret": "9a85ba1c1deb9374d089b5e0ec2b29ebb82b0b5f",
- "is_verified": false,
- "line_number": 400
- },
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/img/gen3-client.md",
- "hashed_secret": "df6c17e3e41c4a2971823e110a7ee702288f574c",
- "is_verified": false,
- "line_number": 401
- }
- ],
- "gen3/docs/resources/user-guide/img/query-data.md": [
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/img/query-data.md",
- "hashed_secret": "13333fbf17a0b71d02dda8e6e5c195263bfe03d6",
- "is_verified": false,
- "line_number": 87
- },
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/img/query-data.md",
- "hashed_secret": "8595e69444146fbf187e8c512b6fdc66f8da1d23",
- "is_verified": false,
- "line_number": 111
- }
- ],
- "gen3/docs/resources/user-guide/query-data.md": [
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/query-data.md",
- "hashed_secret": "13333fbf17a0b71d02dda8e6e5c195263bfe03d6",
- "is_verified": false,
- "line_number": 75
- },
- {
- "type": "Hex High Entropy String",
- "filename": "gen3/docs/resources/user-guide/query-data.md",
- "hashed_secret": "8595e69444146fbf187e8c512b6fdc66f8da1d23",
- "is_verified": false,
- "line_number": 99
- }
]
},
- "generated_at": "2024-11-18T20:09:19Z"
+ "generated_at": "2024-12-12T11:53:58Z"
}
diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-auth.md
similarity index 87%
rename from gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md
rename to gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-auth.md
index 18fe39ad..38e6cfae 100644
--- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md
+++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-auth.md
@@ -2,65 +2,21 @@
Authentication (AuthN) and authorization (AuthZ) work together as part of identity and access management (IAM). AuthN is controlled by Fence - it relates to confirming the identity of the user (often through signle sign-on). AuthZ is controlled by Arborist - it determines what an authenticated user can see and do.
-## Arborist (AuthN)
-
-### What Does it Do
-
-Arborist is the authorization service. It works with Fence to assign authorizations to a user based on their authentication information. Information around user authorizations are set within a useryaml, or telemetry file for dbgap authorized users, and put into the arborist db during usersync.
-
-### Default settings
-
-If you deploy Helm without customizing any configuration, you can see the [default Arborist values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/arborist/values.yaml).
-
-### How to configure it
-
-For the full set of configuration options, see the [Helm README.md for Arborist](https://github.com/uc-cdis/gen3-helm/tree/master/helm/arborist)
-
-Some common configuration options include:
-
-**Postgres configuration**
-
-```
-# -- (map) To configure postgresql subchart
-# Persistence is disabled by default
-postgresql:
- primary:
- persistence:
- # -- (bool) Option to persist the dbs data.
- enabled: true
-```
-
-You can see examples of this configuration in context in the following [example Gen3 values.yamls](https://github.com/uc-cdis/gen3-helm/tree/master/examples):
-
-* [aws_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/aws_dev_values.yaml)
-* [gke_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_dev_values.yaml)
-* [gke_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_values.yaml)
-
-**Image repo/ tag**
-
-```
-arborist:
- enabled: true
-
- # What image/ tag to pull
- image:
- tag:
- repository:
-```
-
-Common Arborist database SQL queries can be [found here](https://github.com/uc-cdis/cdis-wiki/blob/master/dev/gen3-sql-queries.md#arborist-database). *Note: this link is only visible to CTDS employees*
-
## Fence (AuthZ)
### What Does it Do
-Fence is a core service for a Gen3 datacommons which handles authentication. It is necessary for a commons to run at all, and will handle authentication on the `/login` endpoint as well as creating presigned url's in the presigned-url-fence pods.
+Fence handles authentication, and is a core service for Gen3 data commons and any other type of Gen3 deployment. It is a required service for a commons to run at all, and will handle authentication on the `/login` endpoint as well as creating presigned url's in the presigned-url-fence pods.
+
+For full functionality in a Gen3 instance, Fence depends on a [configured user.yaml](#how-to-configure-the-useryaml) unless you [enable mock authorization](#mock-authorization-for-development-only).
-### Default settings
+### Default settings for Fence and user.yaml
If you deploy Helm without customizing any configuration, you can see the [default Fence values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/fence/values.yaml).
-### How to configure it
+A [default user.yaml is provided in the Fence values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/fence/values.yaml#L517-L726). However, it is configured with stand-in information to demonstrate where to add your real email and project name values, so it cannot work as a user.yaml without further configuration because some information is fake. For development work, you can configure Fence to use mock authorization to bypass the need for configuring the user.yaml for
+
+### How to configure Fence
For the full set of configuration options, see the [Helm README.md for Fence](https://github.com/uc-cdis/gen3-helm/tree/master/helm/fence)
@@ -114,7 +70,7 @@ You can see examples of Fence configuration overriding defaults in context in th
* [gke_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_dev_values.yaml)
* [gke_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_values.yaml)
-### User.yaml
+### How to configure the user.yaml
A user.yaml will control access to your data commons. To see how to construct a user.yaml properly:
@@ -172,6 +128,54 @@ OIDC (OpenID Connect) clients allow applications to authenticate with Fence. Thi
Once the client is created, share the client_id and client_secret with the application owner so they can configure their application to authenticate with Fence. To create these clients you will need to exec into a fence container and [run the following commands](https://github.com/uc-cdis/fence/blob/master/docs/additional_documentation/setup.md#register-oauth-client).
+## Arborist (AuthN)
+
+### What Does Arborist Do
+
+Arborist is the authorization service. It works with Fence to assign authorizations to a user based on their authentication information. Information around user authorizations are set within a useryaml, or telemetry file for dbgap authorized users, and put into the arborist db during usersync.
+
+### Default settings for Arborist
+
+If you deploy Helm without customizing any configuration, you can see the [default Arborist values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/arborist/values.yaml).
+
+### How to configure Arborist
+
+For the full set of configuration options, see the [Helm README.md for Arborist](https://github.com/uc-cdis/gen3-helm/tree/master/helm/arborist)
+
+Some common configuration options include:
+
+**Postgres configuration**
+
+```
+# -- (map) To configure postgresql subchart
+# Persistence is disabled by default
+postgresql:
+ primary:
+ persistence:
+ # -- (bool) Option to persist the dbs data.
+ enabled: true
+```
+
+You can see examples of this configuration in context in the following [example Gen3 values.yamls](https://github.com/uc-cdis/gen3-helm/tree/master/examples):
+
+* [aws_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/aws_dev_values.yaml)
+* [gke_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_dev_values.yaml)
+* [gke_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_values.yaml)
+
+**Image repo/ tag**
+
+```
+arborist:
+ enabled: true
+
+ # What image/ tag to pull
+ image:
+ tag:
+ repository:
+```
+
+Common Arborist database SQL queries can be [found here](https://github.com/uc-cdis/cdis-wiki/blob/master/dev/gen3-sql-queries.md#arborist-database). *Note: this link is only visible to CTDS employees*
+
## Relevant AuthN/AuthZ Tutorials
See the following tutorials for additional information relevant to AuthN/AuthZ.
diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-data-svcs.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-data-svcs.md
similarity index 99%
rename from gen3/docs/gen3-resources/operator-guide/helm/helm-config-data-svcs.md
rename to gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-data-svcs.md
index 3199e8a4..71d420ad 100644
--- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-data-svcs.md
+++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-data-svcs.md
@@ -2,54 +2,95 @@
---
-## aws-es-proxy
+## Indexd
### What does it do
-aws-es-proxy is a small web server application sitting between Gen3 services and Amazon Elasticsearch service.
+Indexd is a core service of the commons. It is used to index files within the commons, to be used by Fence to download data.
Note:
-* This service is only needed when you deploy Gen3 on AWS and use the AWS OpenSearch Service.
-* This pod can also be used to make direct queries to ElasticSearch. If you know you want to make a manual query to ElasticSearch, you can exec into the aws-es-proxy pod and run the following, filling in the appropriate endpoint you want to hit to query elasticsearch:
-
-```
-kubectl exec -it bash
-curl http://localhost:9200/_cluster/status
-```
+Indexd is used to hold information regarding files in the commons. We can index any files we want, but should ensure that bucket in Indexd are configured within Fence, so that downloading the files will work. To index files, we have a variety of tools. First, data upload will automatically create indexd records for files uploaded. If we want to index files from external buckets, we can also use [indexd-utils](https://github.com/uc-cdis/indexd_utils), or if the commons has dirm setup, create a manifest and upload it to the `/indexing` endpoint of a commons. From there, GUID's will be created and/or assigned to objects. You can view the information about the records by hitting the `(commons url)/index/(GUID)` endpoint. To test that the download works for these files, you will want to hit the `(commons url)/user/data/download/(GUID)` endpoint, while ensuring your user has the proper access to the ACL/AuthZ assigned to the Indexd record.
### Default settings
-If you deploy Helm without customizing any configuration, you can see the [default aws-es-proxy values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/values.yaml).
+If you deploy Helm without customizing any configuration, you can see the [default Indexd values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/values.yaml).
### How to configure it
-For a full set of configuration see the [Helm README.md for aws-es-proxy](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/README.md) or read the [aws-es-proxy values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/values.yaml) directly.
-
-Some important configuration items for aws-es-proxy in helm:
+For a full set of configuration see the [Helm README.md for Indexd](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/README.md) or read the [Indexd values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/values.yaml) directly.
```
-# -- AWS user to use to connect to ES
-aws-es-proxy:
- # Whether or not to deploy the service or not
+indexd:
enabled: true
- # What image/ tag to pull
image:
repository:
tag:
- # AWS secrets
- secrets:
- awsAccessKeyId: ""
- awsSecretAccessKey: ""
+ # default prefix that gets added to all indexd records.
+ defaultPrefix: "TEST/"
- # Elasticsearch endpoint in AWS
- esEndpoint: test.us-east-1.es.amazonaws.com
+ # Secrets for fence and sheepdog to use to authenticate with indexd.
+ # If left blank, will be autogenerated.
+ secrets:
+ userdb:
+ fence:
+ sheepdog:
```
---
-## ETL
+## Sower
+
+### What does it do
+
+Sower is a job dispatching service. Jobs are configured within the manifest, and sower handles dispatching the jobs.
+
+### Default settings
+
+If you deploy Helm without customizing any configuration, you can see the [default Sower values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/values.yaml).
+
+### How to configure it
+
+For a full set of configuration see the [Helm README.md for Sower](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/README.md) or read the [Sower values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/values.yaml) directly.
+
+---
+
+## Sheepdog
+
+### What does it do
+
+Sheepdog is a core service that handles data submission. Data gets submitted to the commons, using the dictionary as a schema, which is reflected within the sheepdog database.
+
+### Default settings
+
+If you deploy Helm without customizing any configuration, you can see the [default Sheepdog values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/values.yaml).
+
+### How to configure it
+
+For a full set of configuration see the [Helm README.md for Sheepdog](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/README.md) or read the [Sheepdog values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/values.yaml) directly.
+
+---
+
+## Peregrine
+
+### What does it do
+
+The Peregrine service is used to query data in Postgres. It works similar to Guppy, but relies on querying Postgres directly. It will create the charts on the front page of the commons, as well as the `/query` endpoint of a commons.
+
+### Default settings
+
+If you deploy Helm without customizing any configuration, you can see the [default Peregrine values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/values.yaml).
+
+### How to configure it
+
+For a full set of configuration see the [Helm README.md for Peregrine](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/README.md) or read the [Peregrine values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/values.yaml) directly.
+
+To configure Peregrine, you must have an entry in the versions block. It also requires a dictionary in the global block.
+
+---
+
+## ETL (Tube)
### What does it do
@@ -148,40 +189,49 @@ Last, Guppy works closely with Portal to render the Explorer page. You will need
---
-## Indexd
+## aws-es-proxy
### What does it do
-Indexd is a core service of the commons. It is used to index files within the commons, to be used by Fence to download data.
+aws-es-proxy is a small web server application sitting between Gen3 services and Amazon Elasticsearch service.
Note:
-Indexd is used to hold information regarding files in the commons. We can index any files we want, but should ensure that bucket in Indexd are configured within Fence, so that downloading the files will work. To index files, we have a variety of tools. First, data upload will automatically create indexd records for files uploaded. If we want to index files from external buckets, we can also use [indexd-utils](https://github.com/uc-cdis/indexd_utils), or if the commons has dirm setup, create a manifest and upload it to the `/indexing` endpoint of a commons. From there, GUID's will be created and/or assigned to objects. You can view the information about the records by hitting the `(commons url)/index/(GUID)` endpoint. To test that the download works for these files, you will want to hit the `(commons url)/user/data/download/(GUID)` endpoint, while ensuring your user has the proper access to the ACL/AuthZ assigned to the Indexd record.
+* This service is only needed when you deploy Gen3 on AWS and use the AWS OpenSearch Service.
+* This pod can also be used to make direct queries to ElasticSearch. If you know you want to make a manual query to ElasticSearch, you can exec into the aws-es-proxy pod and run the following, filling in the appropriate endpoint you want to hit to query elasticsearch:
+
+```
+kubectl exec -it bash
+curl http://localhost:9200/_cluster/status
+```
### Default settings
-If you deploy Helm without customizing any configuration, you can see the [default Indexd values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/values.yaml).
+If you deploy Helm without customizing any configuration, you can see the [default aws-es-proxy values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/values.yaml).
### How to configure it
-For a full set of configuration see the [Helm README.md for Indexd](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/README.md) or read the [Indexd values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/values.yaml) directly.
+For a full set of configuration see the [Helm README.md for aws-es-proxy](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/README.md) or read the [aws-es-proxy values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/values.yaml) directly.
+
+Some important configuration items for aws-es-proxy in helm:
```
-indexd:
+# -- AWS user to use to connect to ES
+aws-es-proxy:
+ # Whether or not to deploy the service or not
enabled: true
+ # What image/ tag to pull
image:
repository:
tag:
- # default prefix that gets added to all indexd records.
- defaultPrefix: "TEST/"
-
- # Secrets for fence and sheepdog to use to authenticate with indexd.
- # If left blank, will be autogenerated.
+ # AWS secrets
secrets:
- userdb:
- fence:
- sheepdog:
+ awsAccessKeyId: ""
+ awsSecretAccessKey: ""
+
+ # Elasticsearch endpoint in AWS
+ esEndpoint: test.us-east-1.es.amazonaws.com
```
---
@@ -201,53 +251,3 @@ If you deploy Helm without customizing any configuration, you can see the [defau
### How to configure it
For a full set of configuration see the [Helm README.md for Metadata](https://github.com/uc-cdis/gen3-helm/blob/master/helm/metadata/README.md) or read the [Metadata values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/metadata/values.yaml) directly.
-
----
-
-## Peregrine
-
-### What does it do
-
-The Peregrine service is used to query data in Postgres. It works similar to Guppy, but relies on querying Postgres directly. It will create the charts on the front page of the commons, as well as the `/query` endpoint of a commons.
-
-### Default settings
-
-If you deploy Helm without customizing any configuration, you can see the [default Peregrine values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/values.yaml).
-
-### How to configure it
-
-For a full set of configuration see the [Helm README.md for Peregrine](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/README.md) or read the [Peregrine values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/values.yaml) directly.
-
-To configure Peregrine, you must have an entry in the versions block. It also requires a dictionary in the global block.
-
----
-
-## Sheepdog
-
-### What does it do
-
-Sheepdog is a core service that handles data submission. Data gets submitted to the commons, using the dictionary as a schema, which is reflected within the sheepdog database.
-
-### Default settings
-
-If you deploy Helm without customizing any configuration, you can see the [default Sheepdog values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/values.yaml).
-
-### How to configure it
-
-For a full set of configuration see the [Helm README.md for Sheepdog](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/README.md) or read the [Sheepdog values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/values.yaml) directly.
-
----
-
-## Sower
-
-### What does it do
-
-Sower is a job dispatching service. Jobs are configured within the manifest, and sower handles dispatching the jobs.
-
-### Default settings
-
-If you deploy Helm without customizing any configuration, you can see the [default Sower values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/values.yaml).
-
-### How to configure it
-
-For a full set of configuration see the [Helm README.md for Sower](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/README.md) or read the [Sower values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/values.yaml) directly.
diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-frontend.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-frontend.md
similarity index 100%
rename from gen3/docs/gen3-resources/operator-guide/helm/helm-config-frontend.md
rename to gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-frontend.md
diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-revproxy.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-revproxy.md
similarity index 100%
rename from gen3/docs/gen3-resources/operator-guide/helm/helm-config-revproxy.md
rename to gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-revproxy.md
diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-workspaces.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-workspaces.md
similarity index 84%
rename from gen3/docs/gen3-resources/operator-guide/helm/helm-config-workspaces.md
rename to gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-workspaces.md
index 4c758323..865ba333 100644
--- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-workspaces.md
+++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/helm-config-workspaces.md
@@ -114,6 +114,23 @@ hatchery:
gen3-volume-location: "/home/jovyan/.gen3"
```
+## Workspace Token Service (wts)
+
+### What Does it Do
+
+WTS acts as an OIDC client which acts on behalf of users to request refresh tokens from Fence. This happens when a user logs into a workspace from the browser. WTS then stores the refresh token for that user, and manages access tokens and refresh tokens for workers that belong to specific users in the workspace.
+
+### Default settings
+
+If you deploy Helm without customizing any configuration, you can see the [default WTS values here](https://github.com/uc-cdis/gen3-helm/tree/master/helm/wts/values.yaml).
+
+### How to configure it
+
+Check out the [quick-start guide for WTS with Helm](https://github.com/uc-cdis/workspace-token-service/blob/master/docs/img/quickstart_helm.md).
+
+For a full set of configuration see the [Helm README.md for WTS](https://github.com/uc-cdis/gen3-helm/tree/master/helm/wts/README.md) or read the [WTS values.yaml](https://github.com/uc-cdis/gen3-helm/tree/master/helm/wts/values.yaml) directly.
+
+
## Manifestservice
### What Does it Do
diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config/index.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/index.md
new file mode 100644
index 00000000..963bf67f
--- /dev/null
+++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config/index.md
@@ -0,0 +1,269 @@
+# Configuring Gen3 Microservices for Helm Deployment
+
+## Gen3 is Modular
+
+Gen3 is a modular system that offers many microservices that allow you to customize your Gen3 instance. Many services are not required for a functional Gen3 instance; the services you need depend on how you plan to use your Gen3 instance.
+
+**Regardless of use case, all Gen3 instances must have these "framework services":**
+
+* Service to **authorize users** to see data in the instance (Fence and Arborist)
+* Service to **index data files** in the instance (Indexd)
+* Service to **manage metadata** for finding data files (Sheepdog and/or Metadata Service, or other external services you may choose to use to extend Gen3)
+
+The typical use case is that a Gen3 operator wants to set up a Gen3 data commons to share their data. However, there are many other use cases possible because Gen3 is modular and highly configurable (e.g., Gen3 can be used as a data mesh connecting to external data commons).
+
+## How to Approach Configuration in Helm
+
+A big advantage of deploying Gen3 with Helm is that it comes preconfigured with many reasonable default settings. That means that you can deploy a minimally-configured "fully default" Gen3 instance locally and ensure that all the foundational steps for deployment are working properly, or minimize the places you need to troubleshoot if there are any problems.
+
+However, once you have the "default Gen3" deployment working, you will want to add your data and configure it so that it reflects the features and appearance you envision.
+
+We recommend approaching configuration in a step-by-step approach to minimize the places you may need to troubleshoot if there are problems. Below, we have described such an approach to configuring your Gen3 instance.
+
+## First, deploy the minimally-configured "fully default" Gen3
+
+For instructions to do this, see our [example Helm deployment](../helm-deploy-example.md). Once you deploy this, you will have a basic default Gen3 portal that will help make sure that your foundational Helm installation and Kubernetes tools are working as expected. Having a functional portal will also help you see what your configurations are working as you continue to configure other services.
+
+## Global block in Gen3 values.yaml
+
+The global block of the Gen3 values.yaml is not for a specific service. Instead, this is a place to declare variables that are relevant for many different services. This saves you from needing to repeatedly define these variables for each service where they are required.
+
+Some variables that are often configured in the global block are:
+
+* `hostname`
+* `revproxyArn`
+* `dictionaryUrl`
+* `externalSecrets`
+* `aws`
+
+You can see the [default values for the global block here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/gen3/values.yaml#L5).
+
+## Configure first: AuthN/AuthZ
+
+**Fence is the authentication (AuthN)** service. It authenticates on the `/login` endpoint, and also creates presigned URLs in the presigned-url-fence pods. You can find information about [configuring Fence here](./helm-config-auth.md#fence-authz).
+
+**Fence depends on having a functional user.yaml**. A [default user.yaml is provided in the values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/fence/values.yaml#L517-L726) for the Fence chart. However, it is configured with stand-in information to demonstrate where to add your real email/login and project name values. Therefore, the base user.yaml cannot work as a user.yaml without further configuration in Fence block of your Gen3 values.yaml because some default information is fake. The base user.yaml has blocks that will allow you to grant yourself admin privileges for you to properly use various Gen3 services.
+
+If the `.Values.usersync.userYamlS3Path` string is set to "none" (which is what it should be if your user.yaml is not in an S3 bucket), the user.yaml file specified in the Fence values.yaml [HERE](https://github.com/uc-cdis/gen3-helm/blob/078c7ae094efa5c56f8fd732c75e5e939fbcfc24/helm/fence/values.yaml#L516) will be used. Anything you add to the `USER_YAML: |` section in the Fence block in the Gen3 umbrella values.yaml will override the `USER_YAML: |` in the Fence values.yaml
+
+We have a [guide to more extensive configuration of the user.yaml](https://github.com/uc-cdis/fence/blob/master/docs/additional_documentation/user.yaml_guide.md).
+
+**Arborist is the authorization (AuthZ) service**. It works with Fence to assign authorizations to a user based on their authentication information. Information around user authorizations are set within a user.yaml, or telemetry file for dbGaP-authorized users, and put into the Arborist database during [the usersync job](../../tutorial_fence_usersync_job.md). You can find information about [configuring Arborist here](./helm-config-auth.md#arborist-authn)
+
+Arborist depends on Fence, so a problem with Fence will cause problems for Arborist.
+
+## Data-related services
+
+### Configure Indexd
+
+Indexd will index files in the commons to be used by Fence to download data. Indexd will assign a GUID (Globally Unique IDentifier) to each file so it can be managed in the data commons. Many data-relevant services depend on Indexd, so it should be functioning before you proceed. The default configuration may be functional for a local deployment for development. You can find information about [configuring Indexd here](./helm-config-data-svcs/#indexd).
+
+### Data Dictionary and Data
+
+Before proceeding with other service configurations, you will need to have a data dictionary and data to upload.
+
+* See our [documentation for guidance creating your data dictionary](../../create-data-dictionary.md).
+* If you do not yet have data that correlates to the data dictionary you are using, you can create synthetic data based on your data dictionary using our [data-simulator tool](https://github.com/uc-cdis/data-simulator?tab=readme-ov-file#data-simulator).
+
+In the Gen3 values.yaml, the path or link to the dictionary file should be the value for the `dictionaryUrl` field in the `global` block ([see here for an example of the dictionaryUrl field](https://github.com/uc-cdis/gen3-helm/blob/master/helm/gen3/values.yaml#L54) in the default Gen3 values.yaml).
+
+*Note: if you are creating a data lake for your Gen3 instance, you are not required to have a data dictionary.*
+
+### Query page: Graph Model tab
+
+On the Graph Model tab of the Query page, data that has been ingested with Sheepdog can be queried with Peregrine, as long as Guppy is enabled (although Guppy does not need to be configured yet).
+
+#### Sheepdog (data submission)
+
+Sheepdog handles data submission. When data files are submitted to a Gen3 Data Commons using Sheepdog, it uses the data dictionary as a schema, and the files are automatically indexed into Indexd. You can find information about [configuring Sheepdog here](./helm-config-data-svcs/#sheepdog).
+
+Sheepdog depends on:
+
+* Indexd
+* Fence
+* Sower and ssjdispatcher
+* poetry and Postgres
+* data and a data dictionary
+
+#### Peregrine (query Postgres)
+
+Peregrine directly queries data in Postgres. You can find information about [configuring Peregrine here](./helm-config-data-svcs/#peregrine).
+
+Peregrine depends on:
+
+* Sheepdog
+* Fence
+* data in Postgres
+
+#### Enable Guppy
+
+In the Gen3 values.yaml, by default, Guppy is not enabled. For the Graph Model tab of the Query page to work, Guppy must be enabled (although it does not need to be configured beyond the default values yet).
+
+To enable Guppy, you can set `enabled: true` in the [guppy block of the Gen3 values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/gen3/values.yaml#L152).
+
+#### Portal configuration
+
+The portal gitops.json should also be configured for things to render properly on the Query page. You can see more about [generally configuring the portal here](https://github.com/uc-cdis/data-portal/blob/master/docs/portal_config.md), and you can see the relevant [`gitops: | json` block in the Portal values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/portal/values.yaml#L211)
+
+### Query page: Flat Model tab
+
+The Flat Model tab of the Query page enables faster search of the data based on pre-selected search fields (these become indices). To get the flattened data, you must identify what fields you want your users to be able to query on (these will also be the fields available on the Explorer page). Along with the data dictionary, these field selections will guide development of the etlMapping.yaml file to describe which which tables and fields to "ETL" to ElasticSearch. Tube, the Gen3 ETL service, will use the etlMapping.yaml to run an Extract, Transform, Load (ETL) process on the data in Postgres. Tube will populate ElasticSearch indices to create flattened tables in ElasticSearch (ES). Then, the Guppy makes available the ElasticSearch indices created by Tube to quickly traverse the flat data model and find data in ES.
+
+#### etlMapping.yaml
+
+The etlMapping.yaml file describes which tables and fields to ETL to ElasticSearch. These are the fields you want to be searchable on the Explorer page or the Flat Model tab of the Query page. You must create an etlMapping.yaml to be able to use Tube or Guppy.
+
+Configuring the etlMapping.yaml depends on what users want to search by on the Query page and/or display on the Explorer page. You can read more about [configuring an etlMapping.yaml here](https://github.com/uc-cdis/tube/blob/master/docs/configuration_file.md).
+
+The etlMapping.yaml must match the Data Dictionary. It can be [validated against the Data Dictionary as described here](https://github.com/uc-cdis/gen3utils#etlmappingyaml-validation).
+
+You can see the [default etlMapping block in the ETL values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/etl/values.yaml#L47).
+
+The etlMapping.yaml depends on the data dictionary.
+
+#### Tube (ETL, ElasticSearch)
+
+The Gen3 Tube ETL is designed to translate data from a graph data model, stored in a PostgreSQL database, to indexed documents in ElasticSearch (ES), which supports efficient ways to query data from the front-end. The purpose of the Gen3 Tube ETL is to create indexed documents to reduce the response time of requests to query data. It is configured through an etlMapping.yaml configuration file, which describes which tables and fields to ETL to ElasticSearch. You can find information about [configuring Tube/ETL here](./helm-config-data-svcs/#etl-tube).
+
+Tube depends on:
+
+* Indexd
+* etlMapping.yaml and data dictionary
+* data ingested into Postgres database using the data dictionary as a schema
+
+#### Guppy
+
+Guppy is used to render the Explorer page and to permit function of the Flat Model tab of the Query page. Guppy makes available the ElasticSearch indices created by Tube to quickly traverse the flat data model and find data in ES. So, after running the ETL, copy the indices into the `indices` block, as seen [here in the guppy values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/guppy/values.yaml#L186). You can find information about [configuring Guppy here](./helm-config-data-svcs/#guppy).
+
+Guppy depends on:
+
+* Tube/ETL - Guppy relies on indices being created by Tube to run. If there are no indices created, Guppy will fail to start up.
+* aws-es-proxy (if deploying Gen3 on AWS) ([see configuration info here](./helm-config-data-svcs/#aws-es-proxy))
+
+#### Portal configuration
+
+The portal gitops.json should also be configured for things to render properly on the Query page. You can see more about [generally configuring the portal here](https://github.com/uc-cdis/data-portal/blob/master/docs/portal_config.md). You must define the guppyConfig; see the [example default guppyConfig values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/portal/values.yaml#L401).
+
+### Explorer page
+
+The Explorer page is powered by Guppy, so it is already mostly configured once you have finished configuring the [Flat Model tab of the Query page](#query-page-flat-model-tab). However, there is still some Portal configuration required for it to render properly.
+
+#### Portal configuration
+
+The portal gitops.json must have the `dataExplorerConfig` field configured for the Explorer page to render properly. You can see more about [generally configuring the portal here](https://github.com/uc-cdis/data-portal/blob/master/docs/portal_config.md). See the [example default dataExplorerConfig values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/portal/values.yaml#L364).
+
+### Discovery page
+
+The Discovery page is powered by the Metadata Service. By default, the Discovery page feature is turned off in the portal gitops.json block. To configure the Discovery page, it must be enabled in the portal values.yaml, and the Metadata Service (also called MDS) must be configured and populated.
+
+#### Portal: Enable the Discovery page and add the `discoveryConfig`
+
+In the `portal` block of the Gen3 values.yaml, [the field gitops.json.featureFlags.discovery](https://github.com/uc-cdis/gen3-helm/blob/master/helm/portal/values.yaml#L364) should have the value `true` to enable the Discovery page.
+
+To configure the Discovery page, you need to add a `discoveryConfig` to the gitops.json. Here is an [example of `discoveryConfig` config](https://github.com/uc-cdis/cdis-manifest/blob/551f0963e60f6000ae8b9987592495406a031c81/gen3.datacommons.io/portal/gitops.json#L175-L297) from a non-Helm deployment. This config is how to edit the table items, advanced search fields, tags, and study page fields (i.e., page that opens up upon clicking on a row). You can see more about [generally configuring the portal, including the `discoveryConfig`, here](https://github.com/uc-cdis/data-portal/blob/master/docs/portal_config.md).
+
+#### Configure Metadata Service
+
+The Metadata Service (also called MDS) provides an API for retrieving JSON metadata of GUIDs. It is a flexible option for "semi-structured" data (key:value mappings). You can find information about [configuring Metadata Service here](./helm-config-data-svcs/#metadata).
+
+[Discovery page for Gen3 Data Hub](https://gen3.datacommons.io/discovery)
+[Example MDS powering Gen3 Data Hub Discovery page](https://gen3.datacommons.io/mds/metadata?data=True)
+
+## Workspaces
+
+Gen3 workspaces use the Ambassador, Hatchery, and Manifestservice services to create and run the workspace in a Gen3 data commons. You can find information about [configuring all of these Workspace services here](./helm-config-workspaces).
+
+Workspace services depend on:
+
+* Fence
+* Arborist
+* user.yaml (give user who should have access the `workspace` policy to give them access)
+
+#### Ambassador
+
+Ambassador is an envoy proxy. We use this service to proxy traffic toward our workspaces, Hatchery and Jupyter containers.
+
+#### Hatchery
+
+Hatchery is used to create workspaces. It contains information about workspaces images and resources set around those images to run.
+
+#### Workspace Token Service
+
+The Gen3 workspace token service acts as an OIDC client which acts on behalf of users to request refresh tokens from Fence. This happens when a user logs into a workspace from the browser. WTS then stores the refresh token for that user, and manages access tokens and refresh tokens for workers that belong to specific users in the workspace.
+
+#### Manifestservice
+
+The manifestservice is used by the workspaces to mount files to a workspace. Workspace pods get setup with a sidecar container which will mount files to the data directory. This is used so that users can access files directly on the workspace container. The files pulled are defined by manifests, created through the export to workspace button in the explorer page. These manifests live in an s3 bucket which the manifestservice can query.
+
+## Data Commons Notebook Browser
+
+This is an option if you want to make HTML versions of Jupyter notebooks viewable by commons users. Customize the [Notebook Browser page](https://chicagoland.pandemicresponsecommons.org/resource-browser) to preview Jupyter Notebooks by adding images, titles, descriptions, and links to the Jupyter Notebook.
+
+* [Review the code to edit the title (top; notebooks), description (top; notebooks), link, and imageURL (preview image)](https://github.com/uc-cdis/cdis-manifest/blob/0e5a08eed8b417a721a6324f820abe8ea4ef4e17/chicagoland.pandemicresponsecommons.org/portal/gitops.json#L1097-L1175)
+
+## Front End (Portal) Configuration examples
+
+You can see more about [generally configuring the portal here](https://github.com/uc-cdis/data-portal/blob/master/docs/portal_config.md). Below, there are different code examples for aspects of the portal.
+
+### Login Page
+
+#### Helix Image
+
+Customize the image that appears on the [Login Page](https://gen3.datacommons.io/login) with a vector graphic (eg. *.svg) of your choice.
+
+* [Review the code to save the image](https://github.com/uc-cdis/cdis-manifest/blob/456e1a3b5b3cc5dc23b83e1f96c0770a2007162a/gen3.datacommons.io/portal/gitops-sponsors/gene_bgy.svg)
+* [Review code to include the path-to-image in gitops.json](https://github.com/uc-cdis/cdis-manifest/blob/456e1a3b5b3cc5dc23b83e1f96c0770a2007162a/gen3.datacommons.io/portal/gitops.json#L130)
+
+#### Information on Login and Commons
+
+Customize the text that appears on the [Login Page](https://gen3.datacommons.io/login) by specifying title, description, subtitle, contact, or email.
+
+* [Review the code to edit title, subtitle, text, contact, and email](https://github.com/uc-cdis/cdis-manifest/blob/456e1a3b5b3cc5dc23b83e1f96c0770a2007162a/gen3.datacommons.io/portal/gitops.json#L124-L129)
+
+### Landing Page
+
+#### Information on Commons
+
+Customize the name of the Data Commons, the info text, and the button below that appear on the top left side of the Landing Page after logging in.
+
+* [Review the code to edit heading, text, and link](https://github.com/uc-cdis/cdis-manifest/blob/456e1a3b5b3cc5dc23b83e1f96c0770a2007162a/gen3.datacommons.io/portal/gitops.json#L39-L44)
+
+#### Summary Statistics
+
+Customize the summary statistics that appear on the top right side of the Landing Page after logging in. The attributes are graphQL fields, which must be in the dictionary, configured in the etlMapping.yaml, and populated with data on the backend.
+
+* [Review the code to edit graphQL queries](https://github.com/uc-cdis/cdis-manifest/blob/456e1a3b5b3cc5dc23b83e1f96c0770a2007162a/gen3.datacommons.io/portal/gitops.json#L3-L36)
+* [Review the code to edit the graphQl queries after being logged in](https://github.com/uc-cdis/cdis-manifest/blob/4a922a04456423fea5d1e59c5431cedb460280d0/data.midrc.org/portal/gitops.json#L98-L113)
+
+#### Cards
+
+Customize the cards that appear on the bottom of the Landing Page after logging in.
+
+* [Review the code to edit name, icons, body, link, and label of the cards](https://github.com/uc-cdis/cdis-manifest/blob/551f0963e60f6000ae8b9987592495406a031c81/gen3.datacommons.io/portal/gitops.json#L84-L134)
+* Adding a new icon requires saving the icon in [this repository](https://github.com/uc-cdis/data-portal/tree/master/src/img/icons) and in [this file](https://github.com/uc-cdis/data-portal/blob/67f2b83227b9c3b48143bd2938cad160fc225394/src/img/icons/index.jsx).
+
+### Data Commons
+
+#### Title
+
+Customize the title that appears in the top left corner.
+
+* [Review the code to edit the title of the Data Commons](https://github.com/uc-cdis/cdis-manifest/blob/a68f8df12173e4b9d06dcdf3fad2cc1643a73f89/gen3.theanvil.io/portal/gitops.json#L71-L72)
+
+#### Top Bar
+
+Customize the top bar that appears in the top right corner.
+
+* [Review the code to edit the top bar (link, name, icon, dropdown) of the Data Commons](https://github.com/uc-cdis/cdis-manifest/blob/4a922a04456423fea5d1e59c5431cedb460280d0/data.midrc.org/portal/gitops.json#L146-L171)
+
+#### Color Theme
+
+Customize the color theme for buttons, top navigation bar, and any types of charts on the Exploration and Landing Page
+
+* [Review the code to edit the 9 colors of a Data Commons](https://github.com/uc-cdis/cdis-manifest/blob/4a922a04456423fea5d1e59c5431cedb460280d0/data.midrc.org/portal/gitops.json#L146-L171)
+
+#### Footer Logo
+
+Customize the logos in the Footer.
+
+* [Review the code to edit the source, link, and name of logos in the footer of a Data Commons](https://github.com/uc-cdis/cdis-manifest/blob/551f0963e60f6000ae8b9987592495406a031c81/gen3.datacommons.io/portal/gitops.json#L156-L168)
diff --git a/gen3/docs/gen3-resources/operator-guide/tutorial_production-setup.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-production-example.md
similarity index 90%
rename from gen3/docs/gen3-resources/operator-guide/tutorial_production-setup.md
rename to gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-production-example.md
index 1939cf2b..cefddb17 100644
--- a/gen3/docs/gen3-resources/operator-guide/tutorial_production-setup.md
+++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-production-example.md
@@ -1,11 +1,9 @@
+# Tutorial for Production Deployment
-# Production Deployment of Gen3 in AWS
-
-## Introduction
This guide walks you through deploying Gen3 in a production environment on AWS using Infrastructure as Code (IaC), Kubernetes, and GitOps best practices. This approach ensures a robust, scalable, and repeatable deployment process.
-### Key Benefits
+## Key Benefits
- **Infrastructure as Code (IaC)**
- **Efficiency:** Streamlines and automates infrastructure provisioning, eliminating error-prone manual steps.
@@ -21,7 +19,7 @@ This guide walks you through deploying Gen3 in a production environment on AWS u
- **Security:** Securely store and manage sensitive credentials (e.g., database passwords, API keys) using AWS Secrets Manager.
- **Seamless Integration:** Easily retrieve secrets from within your applications, adhering to security best practices.
-# Tutorial
+
## 1. Admin VM (Jump Box) Setup
@@ -62,13 +60,13 @@ The Admin VM, also known as a jump box or bastion host, serves as your secure en
Install the following tools on your Admin VM:
-1. **AWS CLI:** Pre-installed on most Amazon Linux and Ubuntu instances. If not, follow [this guide](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) to install and configure it.
-2. **Terraform:** Install Terraform using [this guide](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli).
-3. **kubectl:** Install kubectl following the instructions [here](https://kubernetes.io/docs/tasks/tools/).
-4. **Helm:** Install Helm using [this guide](https://helm.sh/docs/helm/helm_install/).
-5. **k9s (Optional):** Install k9s [here](https://k9scli.io/topics/install/) for a terminal-based UI for your Kubernetes cluster.
+1. **AWS CLI:** Pre-installed on most Amazon Linux and Ubuntu instances. If not, follow [this guide][aws cli user guide] to install and configure it.
+2. **Terraform:** Install Terraform using [this guide][terraform user guide].
+3. **kubectl:** Install kubectl following the instructions [here][kubectl].
+4. **Helm:** Install Helm using [this guide][helm].
+5. **k9s (Optional):** Install k9s [here][k9s]for a terminal-based UI for your Kubernetes cluster.
-##3. Infrastructure Deployment with Terraform
+## 3. Infrastructure Deployment with Terraform
Use the provided Terraform module to create your infrastructure:
@@ -152,7 +150,7 @@ gitops-repo/
touch common1/values/guppy.yaml
etc...
```
-3. Create a templates folder to house the Argocd application file:
+6. Create a templates folder to house the Argocd application file:
```
mkdir common1/templates
touch common1/templates/app.yaml
@@ -194,13 +192,13 @@ gitops-repo/
selfHeal: true
```
-4. Commit and Push to Repository
+7. Commit and Push to Repository
PLEASE NOTE!:
It is crucial to ensure that sensitive information, such as secret access keys, database passwords, and any other confidential data, is never uploaded to GitHub. This helps prevent unauthorized access and potential security breaches.
To securely manage sensitive data, we have incorporated external secrets into our Helm charts. Users can utilize this feature to safely handle and store their sensitive information.
- For more details on managing sensitive data using external secrets, please refer to our External Secrets Operator documentation [HERE](tutorial_secrets-mgr.md).
+ For more details on managing sensitive data using external secrets, please refer to our External Secrets Operator documentation [HERE][secrets manager].
Add and commit your changes:
```
@@ -263,3 +261,14 @@ spec:
automated:
selfHeal: true
``` -->
+
+
+
+
+[argo wrapper]: https://github.com/uc-cdis/argo-wrapper
+[aws cli user guide]: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
+[terraform user guide]: https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli
+[kubectl]: https://kubernetes.io/docs/tasks/tools/
+[helm]: https://helm.sh/docs/helm/helm_install/
+[k9s]: https://k9scli.io/topics/install/
+[secrets manager]: tutorial_secrets-mgr.md
diff --git a/gen3/docs/gen3-resources/operator-guide/helm/index.md b/gen3/docs/gen3-resources/operator-guide/helm/index.md
index afe9a1e3..87e90318 100644
--- a/gen3/docs/gen3-resources/operator-guide/helm/index.md
+++ b/gen3/docs/gen3-resources/operator-guide/helm/index.md
@@ -1,9 +1,66 @@
# Helm to Configure and Deploy Gen3
-https://docs.gen3.org/docs/Deployment/Helm%20Overview
+[**Helm**](https://helm.sh) plays a crucial role in simplifying the deployment and management of Gen3 components within your environment. It is a Kubernetes package manager that allows you to define, install, and upgrade even complex applications with ease.
-https://docs.gen3.org/docs/Deployment/Deployment%20Architecture/
+If you haven't already installed Helm for your Gen3 deployment:
-* Note: There’s some sensible defaults that ship with helm, and if you don’t provide overrides it will fall back to the defaults.
-* This is something we need to document more as Gen3 is HIGHLY configurable. Probably need a whole section, or a small book about this
-* This includes fence configuration, user yaml, data dictionary url, workspaces, etc etc. There’s some sensible defaults that ship with helm, and if you don’t provide overrides it will fall back to the defaults.
+ > **Installation**: Install Helm by following the [official Helm installation guide](https://helm.sh/docs/intro/install/) for your specific platform.
+
+## Why is Helm a good choice for Gen3 deployment?
+
+Gen3 Helm includes many sensible defaults for configuration. If you are just beginning to look into Gen3 and are uncertain about configuration decisions or options - you can do minimal configuration and the defaults will help you end up with a functional generic Gen3 instance to play with.
+
+But -- Gen3 is also highly configurable. If you are an experienced Gen3 user looking to make sophisticated configurations, Helm will facilitate that, as well.
+
+## Role of Helm in Gen3 Deployment
+
+In a Gen3 deployment, Helm serves as the primary tool for:
+
+- **Defining Deployments**: Helm uses configuration files called charts to define how Gen3 components should be deployed. These charts encapsulate the necessary configuration, dependencies, and deployment logic.
+
+- **Installation**: Helm streamlines the process of installing Gen3 components into your Kubernetes cluster. With Helm, you can easily deploy Gen3 services, databases, and other essential components.
+
+- **Configuration Management**: Helm simplifies the management of configuration settings for Gen3 services. You can customize settings, such as database connection details, service replicas, and more, through Helm values.
+
+- **Upgrades and Rollbacks**: As Gen3 evolves, Helm enables you to effortlessly upgrade your deployment to the latest versions. In case of issues, it also provides the ability to roll back to previous configurations.
+
+## Helm Charts, values.yamls, templates, and more
+
+In the Gen3 Helm repo, in the `/helm` directory, there are directories for each of the Gen3 microservices, as well as a directory called `/gen3`. Each of these folders is called a Helm "chart". Helm charts are packages of pre-configured Kubernetes resources that define and deploy microservices. A chart contains all of the resource definitions necessary to run the service inside of a Kubernetes cluster. In any Helm chart, there are several items:
+
+* **values.yaml:** The values.yaml in a Helm chart is what defines the variables relevant for the chart
+* **chart.yaml:** The chart.yaml is what makes the directory a chart; it defines the dependencies for the chart
+* **templates:** The templates have incomplete fields that are completed with values from the values.yaml. Templates generate manifest files that Kubernetes can understand.
+
+### Umbrella Chart for Coordinated Deployment (Gen3 Chart)
+
+To coordinate the deployment of our microservices and additional development-related resources, we use the `/gen3` Helm chart as the Gen3 "umbrella" chart. This "gen3" chart serves as the central point for deploying and managing our application as a whole.
+
+Values in the Gen3 umbrella values.yaml (also called the Gen3 values.yaml) can override default values in the microservice charts.
+
+>For example: in the Guppy microservice chart, the default is that Guppy is not enabled (`enabled: false`). We can turn on Guppy from the Gen3 values yaml by adding a `guppy` block and including `enabled: true`, without making any changed to the Guppy values.yaml.
+
+**Most users should only be making configuration changes in the Gen3 values.yaml, using blocks specific for the service to be configured. Users generally should NOT be editing anything charts for the microservices.**
+
+The `gen3` chart includes the following components:
+
+* **Microservices Helm Charts:** All individual microservice Helm charts are incorporated into the `gen3` chart, ensuring a coordinated and cohesive deployment of the entire application stack.
+* **Development Resources:** Development tools and resources, such as PostgreSQL and Elasticsearch, are included in the `gen3` chart to streamline the development and testing process. These resources are essential for replicating the production environment in a development context.
+
+### Common Chart for Shared Components
+
+To streamline the deployment of shared components and utilities across our microservices, we have a dedicated **`common` Helm chart**. The common chart includes various components and configurations that are shared among multiple microservices. These components typically include:
+
+- **Database Setup Jobs**: Common database setup tasks, such as schema initialization and data migration jobs, are defined within the common chart. This ensures consistent database management across microservices.
+
+- **Secrets Management**: Shared secrets and credentials required by multiple microservices are securely managed within the common chart. This centralization enhances security and simplifies secrets management.
+
+- **Shared Components**: Other components and utilities that are reused across microservices. This promotes code reuse and maintainability.
+
+By centralizing these common features in a dedicated chart, we reduce redundancy, ensure consistency, and simplify the maintenance and updates of shared components.
+
+## The benefits of charts
+
+- **Isolation**: Each microservice operates independently within its own Helm chart, allowing for isolation and decoupling of services. This isolation enhances fault tolerance and simplifies updates and maintenance.
+
+- **Customization**: Microservices can have their specific configurations and dependencies defined within their Helm charts, making it easier to tailor each service to its unique requirements.
diff --git a/gen3/docs/gen3-resources/operator-guide/prerequisites.md b/gen3/docs/gen3-resources/operator-guide/prerequisites.md
index f62d3783..330683d2 100644
--- a/gen3/docs/gen3-resources/operator-guide/prerequisites.md
+++ b/gen3/docs/gen3-resources/operator-guide/prerequisites.md
@@ -133,277 +133,6 @@ For detailed information on setting up Argo Workflows and configuring the `argo-
-## Tutorial for Production Deployment
-
-
-This guide walks you through deploying Gen3 in a production environment on AWS using Infrastructure as Code (IaC), Kubernetes, and GitOps best practices. This approach ensures a robust, scalable, and repeatable deployment process.
-
-### Key Benefits
-
-- **Infrastructure as Code (IaC)**
- - **Efficiency:** Streamlines and automates infrastructure provisioning, eliminating error-prone manual steps.
- - **Reproducibility:** Easily create consistent environments, reducing deployment inconsistencies.
- - **Scalability:** Quickly scale up or down to meet demand, adapting to changing requirements.
-- **Kubernetes**
- - **Robust Platform:** Leverages Kubernetes for container orchestration, providing scalability and resilience for your Gen3 applications.
- - **Extensive Ecosystem:** Tap into a vast array of tools and resources to customize and extend your Kubernetes deployment.
-- **GitOps**
- - **Version Control:** Manage your infrastructure and application configurations in Git for better tracking and rollback capabilities.
- - **Automation:** Automate deployments and updates, reducing manual intervention and risk of errors.
-- **Secrets Management**
- - **Security:** Securely store and manage sensitive credentials (e.g., database passwords, API keys) using AWS Secrets Manager.
- - **Seamless Integration:** Easily retrieve secrets from within your applications, adhering to security best practices.
-
-
-
-### 1. Admin VM (Jump Box) Setup
-
-The Admin VM, also known as a jump box or bastion host, serves as your secure entry point into your production environment. You'll control your Gen3 deployment from this machine, so it's crucial to follow best practices to protect it:
-
-1. **Create a Dedicated EC2 Instance:**
- - Launch a new EC2 instance specifically for your Admin VM. Avoid using existing instances or shared resources.
- - Choose an instance type with appropriate resources for your workload (e.g., t3.medium or similar).
-
-2. **Security Groups (Firewall):**
- - Restrict inbound traffic to the Admin VM:
- - Allow SSH access (port 22) only from your trusted IP addresses or a specific bastion host security group.
- - Limit other incoming traffic (e.g., RDP) as needed.
- - Outbound traffic can be more permissive, allowing access to your Gen3 infrastructure components and other necessary AWS services.
-
-3. **SSH Key Pair:**
- - Create a new SSH key pair specifically for the Admin VM. Avoid using default or shared key pairs.
- - Securely store the private key on your local machine and never share it.
-
-4. **OS Hardening:**
- - Choose a minimal base operating system (e.g., Ubuntu Server, Amazon Linux 2) and apply updates regularly.
- - Disable unnecessary services and protocols.
- - Enforce strong password policies or, ideally, use SSH key-based authentication exclusively.
-
-5. **Additional Security Measures (Recommended):**
- - Enable multi-factor authentication (MFA) for SSH access.
- - Use a centralized logging solution to monitor access and activity on the Admin VM.
- - Regularly review and update security groups as needed.
- - Consider implementing intrusion detection or prevention systems (IDS/IPS).
-
-6. **Connect to Admin VM:**
- - Use SSH from your local machine with the private key you created:
- ```bash
- ssh -i your_private_key.pem ec2-user@
- ```
-
-### 2. Admin VM Software Installation
-
-Install the following tools on your Admin VM:
-
-1. **AWS CLI:** Pre-installed on most Amazon Linux and Ubuntu instances. If not, follow [this guide][aws cli user guide] to install and configure it.
-2. **Terraform:** Install Terraform using [this guide][terraform user guide].
-3. **kubectl:** Install kubectl following the instructions [here][kubectl].
-4. **Helm:** Install Helm using [this guide][helm].
-5. **k9s (Optional):** Install k9s [here][k9s]for a terminal-based UI for your Kubernetes cluster.
-
-### 3. Infrastructure Deployment with Terraform
-
-Use the provided Terraform module to create your infrastructure:
-
-```terraform
-# ... (Your Terraform configuration)
-```
-
-Customize: Adjust the Terraform variables to match your desired configuration.
-Plan & Apply: Run terraform plan to preview the changes and terraform apply to create the infrastructure.
-
-### 4. GitOps Repository Structure for Helm Charts and Terraform Outputs
-
-This section outlines the steps to create a GitOps repository structure where you can upload a values.yaml file generated by Terraform and manage multiple Helm charts for different environments (referred to as "commons").
-
-Please see an example of a gen3 gitops repository here: https://github.com/uc-cdis/gitops-example/tree/master
-
-#### Repository Structure:
-
-The repository will have the following structure:
-
-```
-gitops-repo/
-├── commons1/
-│ ├── Chart.yaml
-│ ├── values/
-│ │ │
-│ │ └── values.yaml (Terraform output)
-│ ├── templates/
-│ │ └── app.yaml
-├── commons2/
-│ ├── Chart.yaml
-│ ├── values/
-│ │ │
-│ │ └── values.yaml (Terraform output)
-│ ├── templates/
-│ │ └── app.yaml
-```
-
-#### Step-by-Step Instructions:
-1. Create the GitOps Repository
-2. Initialize a new Git repository:
- ```
- git init gitops-gen3
- cd gitops-gen3
- ```
-3. Create a directory for each "common" (environment):
- ```
- mkdir common1 common2
- ```
-4. Setting Up the Common Directory
- For each common directory (e.g., common1), create the following structure:
- ```
- touch common1/Chart.yaml
- ```
- Example `Chart.yaml` content:
- ```
- apiVersion: v2
- name: common1.org
- description: common1.org argo application
-
- type: application
-
- # This is the chart version. This version number should be incremented each time you make changes
- # to the chart and its templates, including the app version.
- # Versions are expected to follow Semantic Versioning (https://semver.org/)
- version: 0.1.0
-
- # This is the version number of the application being deployed. This version number should be
- # incremented each time you make changes to the application. Versions are not expected to
- # follow Semantic Versioning. They should reflect the version the application is using.
- appVersion: "1.0"
- ```
-5. Create a values folder to organize values files:
- ```
- mkdir common1/values
- ```
- Place the main Terraform output file and additional values files in this folder:
- ```
- touch common1/values/values.yaml
- touch common1/values/fence.yaml
- touch common1/values/guppy.yaml
- etc...
- ```
-6. Create a templates folder to house the Argocd application file:
- ```
- mkdir common1/templates
- touch common1/templates/app.yaml
- ```
- Example app.yaml content:
- ```
- apiVersion: argoproj.io/v1alpha1
- kind: Application
- metadata:
- name: gen3-commons1
- namespace: argocd
- finalizers:
- - resources-finalizer.argocd.argoproj.io
- spec:
- project: default
- sources:
- - path: helm/gen3
- repoURL: https://github.com/uc-cdis/gen3-helm
- targetRevision: master
- helm:
- releaseName: commons1
- valueFiles:
- - $values/commons1.org/values/values.yaml
- - $values/commons1.org/values/fence.yaml
- - $values/commons1.org/values/portal.yaml
- - $values/commons1.org/values/guppy.yaml
- - $values/commons1.org/values/hatchery.yaml
- - $values/commons1.org/values/etl.yaml
- - repoURL: 'https://github.com/uc-cdis/gen3-gitops.git'
- targetRevision: master
- ref: values
- destination:
- server: "https://kubernetes.default.svc"
- namespace: default
- syncPolicy:
- syncOptions:
- - CreateNamespace=true
- automated:
- selfHeal: true
- ```
-
-7. Commit and Push to Repository
- PLEASE NOTE!:
- It is crucial to ensure that sensitive information, such as secret access keys, database passwords, and any other confidential data, is never uploaded to GitHub. This helps prevent unauthorized access and potential security breaches.
-
- To securely manage sensitive data, we have incorporated external secrets into our Helm charts. Users can utilize this feature to safely handle and store their sensitive information.
-
- For more details on managing sensitive data using external secrets, please refer to our External Secrets Operator documentation [HERE][secrets manager].
-
- Add and commit your changes:
- ```
- git add .
- git commit -m "Initial commit with common1 structure and Terraform output"
- ```
- Push to your remote repository:
- ```
- git remote add origin
- git push -u origin main
- ```
-
-
-
-
[argo wrapper]: https://github.com/uc-cdis/argo-wrapper
-[aws cli user guide]: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html
-[terraform user guide]: https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli
-[kubectl]: https://kubernetes.io/docs/tasks/tools/
-[helm]: https://helm.sh/docs/helm/helm_install/
-[k9s]: https://k9scli.io/topics/install/
-[secrets manager]: tutorial_secrets-mgr.md
diff --git a/gen3/mkdocs.yml b/gen3/mkdocs.yml
index ab39929b..a4cff528 100644
--- a/gen3/mkdocs.yml
+++ b/gen3/mkdocs.yml
@@ -20,7 +20,7 @@ nav:
- Gen3 Operator Guide - Deploy Gen3:
- gen3-resources/operator-guide/index.md
# homepage for operator guide, describes contents and links to different sections
- - Overview for Deploying Gen3: gen3-resources/operator-guide/deploy-gen3.md
+ #- Overview for Deploying Gen3: gen3-resources/operator-guide/deploy-gen3.md
# reviews all major steps for deploying Gen3 for production
- Prerequisites for Deploying Gen3: gen3-resources/operator-guide/prerequisites.md
- Pre-Deployment:
@@ -31,18 +31,19 @@ nav:
- Atlantis: gen3-resources/operator-guide/iac-atlantis.md
- Prepare SSL Certificate: gen3-resources/operator-guide/ssl-cert.md
- Authentication Methods: gen3-resources/operator-guide/gen3-authn-methods.md
- - Setup Gen3 with Helm:
+ - Setup Gen3 (Configure and Deploy) with Helm:
- gen3-resources/operator-guide/helm/index.md
+ - Deployment Overview: gen3-resources/operator-guide/helm/helm-deploy-overview.md
- Example - Minimal Local Deployment: gen3-resources/operator-guide/helm/helm-deploy-example.md
+ - Example - Production Deployment: gen3-resources/operator-guide/helm/helm-deploy-production-example.md
+ - Databases in Gen3 Helm charts: gen3-resources/operator-guide/helm/helm-deploy-databases.md
- Configure services with Helm:
- - AuthN/AuthZ: gen3-resources/operator-guide/helm/helm-config-auth.md
- - Data-related Services: gen3-resources/operator-guide/helm/helm-config-data-svcs.md
- - Front End: gen3-resources/operator-guide/helm/helm-config-frontend.md
- - Workspaces: gen3-resources/operator-guide/helm/helm-config-workspaces.md
- - Revproxy: gen3-resources/operator-guide/helm/helm-config-revproxy.md
- - Deployment with Helm:
- - Deployment Overview: gen3-resources/operator-guide/helm/helm-deploy-overview.md
- - Databases in Gen3 Helm charts: gen3-resources/operator-guide/helm/helm-deploy-databases.md
+ - gen3-resources/operator-guide/helm/helm-config/index.md
+ - AuthN/AuthZ: gen3-resources/operator-guide/helm/helm-config/helm-config-auth.md
+ - Data-related Services: gen3-resources/operator-guide/helm/helm-config/helm-config-data-svcs.md
+ - Front End: gen3-resources/operator-guide/helm/helm-config/helm-config-frontend.md
+ - Workspaces: gen3-resources/operator-guide/helm/helm-config/helm-config-workspaces.md
+ - Revproxy: gen3-resources/operator-guide/helm/helm-config/helm-config-revproxy.md
- Post-Deployment:
- Create Data Dictionary: gen3-resources/operator-guide/create-data-dictionary.md
# contains info about creating (and maintaining?) a data dictionary
@@ -77,8 +78,6 @@ theme:
favicon: img/Gen3-3.ico
logo: img/gen3_new_logo_white.svg
name: material
- features:
- - navigation.footer
custom_dir: overrides
palette:
- primary: black
@@ -98,6 +97,7 @@ theme:
icon: material/eye-outline
name: Switch to light mode
features:
+ - navigation.footer
- navigation.indexes
- navigation.tracking
- navigation.path
From df04533414e071a301b2b02bef776b5a01324596 Mon Sep 17 00:00:00 2001
From: Alexander VanTol
Date: Fri, 13 Dec 2024 09:21:07 -0600
Subject: [PATCH 2/4] =?UTF-8?q?docs(updates):=20update=20precommit=20and?=
=?UTF-8?q?=20fix=20whitespace,=20comments/updates/=E2=80=A6=20(#289)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* docs(updates): update precommit and fix whitespace, comments/updates/suggestions throughout
* Update architecture.md
formatting update
* response to review
* Update using-api.md (#292)
* Update using-api.md
change the code so users are not told to paste their API key into their code, but instead reference the file
* Update using-api.md
update environment reference to https://gen3.datacommons.io
* remove comment
* Update gen3/docs/gen3-resources/user-guide/search.md
Co-authored-by: Alexander VanTol
* Update gen3/docs/gen3-resources/user-guide/search.md
Co-authored-by: Alexander VanTol
* Update using-api.md
Fix Typo
* removing AggMDS repo from docs
---------
Co-authored-by: michaelfitzo <116322184+michaelfitzo@users.noreply.github.com>
Co-authored-by: michaelfitzo
Co-authored-by: smvgarcia <111767892+smvgarcia@users.noreply.github.com>
---
.gitignore | 2 +-
.pre-commit-config.yaml | 2 +-
.secrets.baseline | 8 +-
archetypes/default.md | 1 -
config.yaml | 4 +-
content/community/events.md | 2 +-
content/community/medium.md | 1 -
content/community/webinars/coming_soon.svg | 2 +-
content/figs/BioTeam-Logo_2021.svg | 2 +-
content/figs/crdc_logo_color.svg | 2 +-
content/figs/dcf_gears.svg | 2 +-
content/figs/features/api.svg | 2 +-
content/figs/features/auth.svg | 2 +-
content/figs/features/build-apps.svg | 2 +-
content/figs/features/index.svg | 2 +-
content/figs/features/search.svg | 2 +-
content/figs/features/set-up.svg | 2 +-
content/figs/features/submit-data.svg | 2 +-
content/figs/kv.svg | 2 +-
content/figs/productkv.svg | 2 +-
content/figs/roles/icon-bio.svg | 2 +-
content/figs/roles/icon-devs.svg | 2 +-
content/figs/roles/icon-research.svg | 2 +-
content/figs/webinar.svg | 2 +-
content/ga4gh.md | 4 +-
content/gen3-tools.md | 12 +--
content/gen3refs.md | 4 +-
content/powered-by-gen3.md | 2 +-
content/resources/developer/build-app.md | 2 +-
content/resources/developer/tech-intro.md | 1 -
content/resources/developer/ui-library.md | 1 -
.../resources/faq/img/cloud-automation.svg | 2 +-
.../resources/faq/img/compose-services.svg | 2 +-
.../operator/img/cloud-automation.svg | 2 +-
.../operator/img/compose-services.svg | 2 +-
content/resources/user/analyze-data.md | 6 +-
...gen3_core_metadata_collection_template.tsv | 2 +-
...gen3_core_metadata_collection_template.tsv | 2 +-
.../resources/user/template-tsvs/aliquot.tsv | 2 +-
.../resources/user/template-tsvs/analyte.tsv | 2 +-
.../user/template-tsvs/biospecimen.tsv | 2 +-
.../user/template-tsvs/cell_image.tsv | 2 +-
.../template-tsvs/contrived_expectations.tsv | 2 +-
.../user/template-tsvs/demographic.tsv | 2 +-
.../user/template-tsvs/diagnosis.tsv | 2 +-
.../resources/user/template-tsvs/exposure.tsv | 2 +-
.../user/template-tsvs/family_history.tsv | 2 +-
.../resources/user/template-tsvs/followup.tsv | 2 +-
.../user/template-tsvs/immunoassay.tsv | 2 +-
.../template-tsvs/mass_cytometry_assay.tsv | 2 +-
.../template-tsvs/mass_cytometry_image.tsv | 2 +-
.../user/template-tsvs/pcr_assay.tsv | 2 +-
.../resources/user/template-tsvs/protocol.tsv | 2 +-
.../template-tsvs/quantification_assay.tsv | 2 +-
.../user/template-tsvs/read_group.tsv | 2 +-
.../user/template-tsvs/read_group_qc.tsv | 2 +-
.../resources/user/template-tsvs/sample.tsv | 2 +-
.../user/template-tsvs/sequencing_assay.tsv | 2 +-
.../user/template-tsvs/slide_image.tsv | 2 +-
.../user/template-tsvs/study copy.tsv | 2 +-
.../template-tsvs/submitted_aligned_reads.tsv | 2 +-
.../template-tsvs/submitted_copy_number.tsv | 2 +-
.../template-tsvs/submitted_methylation.tsv | 2 +-
.../submitted_somatic_mutation.tsv | 2 +-
.../submitted_unaligned_reads.tsv | 2 +-
.../user/template-tsvs/treatment.tsv | 2 +-
.../developer-guide/architecture.md | 100 ++++++++++++++----
.../developer-guide/contribute.md | 10 +-
.../gen3-resources/developer-guide/index.md | 2 +-
.../developer-guide/microservices.md | 40 ++++---
gen3/docs/gen3-resources/glossary.md | 65 +++++-------
gen3/docs/gen3-resources/index.md | 25 ++---
.../operator-guide/authorization.md | 4 +-
.../operator-guide/customize-search.md | 2 +-
.../operator-guide/img/cloud-automation.svg | 2 +-
.../operator-guide/img/compose-services.svg | 2 +-
...gen3_core_metadata_collection_template.tsv | 2 +-
.../submit-unstructured-data.md | 2 +-
.../operator-guide/tutorial_alloy.md | 2 +-
.../operator-guide/tutorial_faro.md | 8 +-
.../tutorial_global_IAM_helm_user.md | 12 +--
.../gen3-resources/user-guide/access-data.md | 19 ++--
.../gen3-resources/user-guide/analyze-data.md | 30 ++++--
gen3/docs/gen3-resources/user-guide/portal.md | 17 ++-
gen3/docs/gen3-resources/user-guide/search.md | 84 +++++++++++----
.../gen3-resources/user-guide/using-api.md | 27 ++---
gen3/docs/index.md | 8 +-
layouts/_default/baseof.html | 2 +-
layouts/_default/list.html | 2 +-
layouts/_default/single.html | 2 +-
layouts/_default/withtoc.html | 2 +-
layouts/partials/footer.html | 2 +-
layouts/partials/ga.html | 4 +-
layouts/partials/header.html | 10 +-
layouts/shortcodes/makelist.html | 2 +-
layouts/shortcodes/markdownwrapper.html | 2 +-
layouts/shortcodes/param.html | 2 +-
static/css/footer.css | 2 +-
static/css/markdown.css | 2 +-
static/img/icons/menu.svg | 2 +-
static/img/icons/x.svg | 2 +-
101 files changed, 374 insertions(+), 281 deletions(-)
diff --git a/.gitignore b/.gitignore
index b5017121..db64ef4f 100755
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,4 @@ themes_used
*.DS_Store
./resources/
*/_gen/
-.idea
\ No newline at end of file
+.idea
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9f681f62..7ab78219 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ repos:
- id: detect-secrets
args: ['--baseline', '.secrets.baseline']
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v2.5.0
+ rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
diff --git a/.secrets.baseline b/.secrets.baseline
index cb09e10e..c65c4071 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -368,7 +368,7 @@
"filename": "gen3/docs/gen3-resources/user-guide/analyze-data.md",
"hashed_secret": "98fd178574fc77b708149e559bf75052cc3c81a6",
"is_verified": false,
- "line_number": 215
+ "line_number": 227
}
],
"gen3/docs/gen3-resources/user-guide/notebooks/notebook2_canine.ipynb": [
@@ -500,16 +500,16 @@
"filename": "gen3/docs/gen3-resources/user-guide/search.md",
"hashed_secret": "13333fbf17a0b71d02dda8e6e5c195263bfe03d6",
"is_verified": false,
- "line_number": 138
+ "line_number": 146
},
{
"type": "Hex High Entropy String",
"filename": "gen3/docs/gen3-resources/user-guide/search.md",
"hashed_secret": "8595e69444146fbf187e8c512b6fdc66f8da1d23",
"is_verified": false,
- "line_number": 162
+ "line_number": 170
}
]
},
- "generated_at": "2024-12-12T11:53:58Z"
+ "generated_at": "2024-12-09T16:54:41Z"
}
diff --git a/archetypes/default.md b/archetypes/default.md
index 00e77bd7..26f317f3 100644
--- a/archetypes/default.md
+++ b/archetypes/default.md
@@ -3,4 +3,3 @@ title: "{{ replace .Name "-" " " | title }}"
date: {{ .Date }}
draft: true
---
-
diff --git a/config.yaml b/config.yaml
index e7758f54..619c5620 100644
--- a/config.yaml
+++ b/config.yaml
@@ -334,8 +334,8 @@ params:
link: /resources/user/analyze-data/#1-launch-workspace
- name: Getting Files into the Gen3 Workspace
link: /resources/user/analyze-data/#2-getting-files-into-the-gen3-workspace
- - name: Working with the proxy and whitelists
- link: /resources/user/analyze-data/#3-working-with-the-proxy-and-whitelists
+ - name: Working with the proxy and allow lists
+ link: /resources/user/analyze-data/#3-working-with-the-proxy-and-allow-lists
- name: Using the Gen3 Python SDK
link: /resources/user/analyze-data/#4-using-the-gen3-python-sdk
- name: Jupyter Notebook Demos
diff --git a/content/community/events.md b/content/community/events.md
index baf5eff3..ef41815a 100644
--- a/content/community/events.md
+++ b/content/community/events.md
@@ -56,7 +56,7 @@ g3past:
Slides: Gen3 Forum September 6 2023 - Security and Compliance.pdf
- Title: Data Modeling in Gen3 Data Commons
Date: July 6, 2023, 3:30-5:00 pm CDT; July 7, 2023, 6:30-8:00 am AEST
- Description: Gen3 supports a flexible graph-based data model, which can be customized for a wide variety of projects and use cases. At this community event we will hear from several data commons operators on how they have created their dictionaries and about any tools or processes they use for updating and configuring them.
+ Description: Gen3 supports a flexible graph-based data model, which can be customized for a wide variety of projects and use cases. At this community event we will hear from several data commons operators on how they have created their dictionaries and about any tools or processes they use for updating and configuring them.
Agenda2: /community/events/gen3forum_20230706
Youtube: xjFUYa4C_hw
Slides: Gen3 Forum July 6 2023 - Data Models.pdf
diff --git a/content/community/medium.md b/content/community/medium.md
index 15ee129e..36a4d0d8 100644
--- a/content/community/medium.md
+++ b/content/community/medium.md
@@ -5,4 +5,3 @@ linktitle: /community/medium
date: 2018-09-04T22:16:21-05:00
draft: true
---
-
diff --git a/content/community/webinars/coming_soon.svg b/content/community/webinars/coming_soon.svg
index 77255825..96b05875 100755
--- a/content/community/webinars/coming_soon.svg
+++ b/content/community/webinars/coming_soon.svg
@@ -116,4 +116,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/BioTeam-Logo_2021.svg b/content/figs/BioTeam-Logo_2021.svg
index c6633041..1397d9f2 100644
--- a/content/figs/BioTeam-Logo_2021.svg
+++ b/content/figs/BioTeam-Logo_2021.svg
@@ -310,4 +310,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/crdc_logo_color.svg b/content/figs/crdc_logo_color.svg
index d8540d89..c4b53b56 100644
--- a/content/figs/crdc_logo_color.svg
+++ b/content/figs/crdc_logo_color.svg
@@ -1 +1 @@
-
\ No newline at end of file
+
diff --git a/content/figs/dcf_gears.svg b/content/figs/dcf_gears.svg
index ad447399..e4cf9d9c 100644
--- a/content/figs/dcf_gears.svg
+++ b/content/figs/dcf_gears.svg
@@ -2,7 +2,7 @@
\ No newline at end of file
+
diff --git a/content/figs/features/auth.svg b/content/figs/features/auth.svg
index b636cf69..386854e9 100755
--- a/content/figs/features/auth.svg
+++ b/content/figs/features/auth.svg
@@ -30,4 +30,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/features/build-apps.svg b/content/figs/features/build-apps.svg
index 9bb1d0d4..bbb98d4f 100755
--- a/content/figs/features/build-apps.svg
+++ b/content/figs/features/build-apps.svg
@@ -46,4 +46,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/features/index.svg b/content/figs/features/index.svg
index 39195379..2e059265 100755
--- a/content/figs/features/index.svg
+++ b/content/figs/features/index.svg
@@ -33,4 +33,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/features/search.svg b/content/figs/features/search.svg
index e96d86aa..ebc3c986 100755
--- a/content/figs/features/search.svg
+++ b/content/figs/features/search.svg
@@ -88,4 +88,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/features/set-up.svg b/content/figs/features/set-up.svg
index 506b21b6..512afddf 100755
--- a/content/figs/features/set-up.svg
+++ b/content/figs/features/set-up.svg
@@ -59,4 +59,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/features/submit-data.svg b/content/figs/features/submit-data.svg
index 5977547e..6782f6d5 100755
--- a/content/figs/features/submit-data.svg
+++ b/content/figs/features/submit-data.svg
@@ -47,4 +47,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/kv.svg b/content/figs/kv.svg
index 3429450b..48ef70db 100755
--- a/content/figs/kv.svg
+++ b/content/figs/kv.svg
@@ -242,4 +242,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/productkv.svg b/content/figs/productkv.svg
index 851de92b..2b17cbe1 100755
--- a/content/figs/productkv.svg
+++ b/content/figs/productkv.svg
@@ -509,4 +509,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/roles/icon-bio.svg b/content/figs/roles/icon-bio.svg
index 9f43888e..c4d7664e 100755
--- a/content/figs/roles/icon-bio.svg
+++ b/content/figs/roles/icon-bio.svg
@@ -21,4 +21,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/roles/icon-devs.svg b/content/figs/roles/icon-devs.svg
index f73982d9..610d804a 100755
--- a/content/figs/roles/icon-devs.svg
+++ b/content/figs/roles/icon-devs.svg
@@ -26,4 +26,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/roles/icon-research.svg b/content/figs/roles/icon-research.svg
index 65614603..0696824e 100755
--- a/content/figs/roles/icon-research.svg
+++ b/content/figs/roles/icon-research.svg
@@ -24,4 +24,4 @@
-
\ No newline at end of file
+
diff --git a/content/figs/webinar.svg b/content/figs/webinar.svg
index e7cac1d2..5aeb5c7e 100644
--- a/content/figs/webinar.svg
+++ b/content/figs/webinar.svg
@@ -199,4 +199,4 @@
-
\ No newline at end of file
+
diff --git a/content/ga4gh.md b/content/ga4gh.md
index 03fc8788..8dea4a5f 100644
--- a/content/ga4gh.md
+++ b/content/ga4gh.md
@@ -49,9 +49,9 @@ g3Feature:
Driver Projects
- Gen3 technology is used by four different driver projects:
+ Gen3 technology is used by four different driver projects:
-
The Biomedical Research Hub, which as a data fabric, enables access to multiple independent data commons containing biomedical data.
+
The Biomedical Research Hub, which as a data fabric, enables access to multiple independent data commons containing biomedical data.
The Data Commons Framework Services, which underpins interoperability within the Cancer Research Data Commons.
The NHLBI BioData Catalyst (BDC) is a cloud-based ecosystem that offers researchers data, analytic tools, applications, and workflows in secure workspaces. It is a community where researchers can find, access, share, store, and analyze heart, lung, blood, and sleep data resources.
The NIH Cloud Platform Interoperability (NCPI) program seeks to create a federated genomic data ecosystem and is a collaborative project between NIH and external partners comprising three working groups.
diff --git a/content/gen3-tools.md b/content/gen3-tools.md
index cadf7f26..f214c03d 100644
--- a/content/gen3-tools.md
+++ b/content/gen3-tools.md
@@ -18,21 +18,21 @@ g3Starts:
s2:
name: gen3utils
fig: /figs/UChicago_CTDS_Vertical_Color RGB.svg
- detail: Utilities for Gen3 management including validation tools. Created by the Center for Translational Data Science at the University of Chicago.
+ detail: Utilities for Gen3 management including validation tools. Created by the Center for Translational Data Science at the University of Chicago.
button:
caption: GitHub
link: https://github.com/uc-cdis/gen3utils
s3:
name: g3t
fig: /figs/OHSU-logo.png
- detail: Gen3 Tracker (g3t) includes utilities to manage Gen3 schemas, projects and submissions. Tools include those to create and manage metadata, manipulate file manifests, status checks, metadata validation, user access, and others.
+ detail: Gen3 Tracker (g3t) includes utilities to manage Gen3 schemas, projects and submissions. Tools include those to create and manage metadata, manipulate file manifests, status checks, metadata validation, user access, and others.
button:
caption: GitHub
link: https://github.com/ACED-IDP/gen3_util
s4:
name: g3t_etl
fig: /figs/OHSU-logo.png
- detail: g3t_etl is a command-line tool and library designed to manage Gen3 metadata including transformations to and from FHIR formatted data. The tool provides various commands, each serving a specific purpose.
+ detail: g3t_etl is a command-line tool and library designed to manage Gen3 metadata including transformations to and from FHIR formatted data. The tool provides various commands, each serving a specific purpose.
button:
caption: GitHub
link: https://github.com/ACED-IDP/g3t_etl
@@ -99,7 +99,7 @@ g3Starts:
{{< param "g3Starts.s2.name" >}}
- Kim, Erika, et al. "NCI cancer research data commons: lessons learned and future state." Cancer Research 84.9 (2024): 1404-1409.
+ Kim, Erika, et al. "NCI cancer research data commons: lessons learned and future state." Cancer Research 84.9 (2024): 1404-1409.
View Article
@@ -92,7 +92,7 @@ The Gen3 data platform is used as the basis for many data repositories and enabl
Wyatt, Kirk D., et al. "An open-source platform for pediatric cancer data exploration: a report from Data for the Common Good." JAMIA open 7.1 (2024): ooae004.
View Article
{{< param "g3Teaser.detail" >}}
diff --git a/content/resources/developer/build-app.md b/content/resources/developer/build-app.md
index e9eff12f..9178144f 100644
--- a/content/resources/developer/build-app.md
+++ b/content/resources/developer/build-app.md
@@ -35,7 +35,7 @@ The standard GraphQL endpoint for a Gen3 commons is available via POST at `/api/
{"query":"query { project(first:0) {code, project_id, availability_type}}"}
```
-* First: 0 means return all, otherwise it defaults to the first 10.
+* First: 0 means return all, otherwise it defaults to the first 10.
* Project_id = - used to reference any projects
* Code = project name in the project node
* Availability_type = is this project open access to everyone or not
diff --git a/content/resources/developer/tech-intro.md b/content/resources/developer/tech-intro.md
index c03a76ca..69a42ded 100644
--- a/content/resources/developer/tech-intro.md
+++ b/content/resources/developer/tech-intro.md
@@ -6,4 +6,3 @@ layout: withtoc
menuname: developerMenu
draft: true
---
-
diff --git a/content/resources/developer/ui-library.md b/content/resources/developer/ui-library.md
index 28203863..c9558154 100644
--- a/content/resources/developer/ui-library.md
+++ b/content/resources/developer/ui-library.md
@@ -6,4 +6,3 @@ draft: true
layout: withtoc
menuname: developerMenu
---
-
diff --git a/content/resources/faq/img/cloud-automation.svg b/content/resources/faq/img/cloud-automation.svg
index d7a44b1a..8d2b6ead 100644
--- a/content/resources/faq/img/cloud-automation.svg
+++ b/content/resources/faq/img/cloud-automation.svg
@@ -49,4 +49,4 @@
-
\ No newline at end of file
+
diff --git a/content/resources/faq/img/compose-services.svg b/content/resources/faq/img/compose-services.svg
index 9c847594..9d4cbbb2 100644
--- a/content/resources/faq/img/compose-services.svg
+++ b/content/resources/faq/img/compose-services.svg
@@ -33,4 +33,4 @@
-
\ No newline at end of file
+
diff --git a/content/resources/operator/img/cloud-automation.svg b/content/resources/operator/img/cloud-automation.svg
index d7a44b1a..8d2b6ead 100644
--- a/content/resources/operator/img/cloud-automation.svg
+++ b/content/resources/operator/img/cloud-automation.svg
@@ -49,4 +49,4 @@
-
\ No newline at end of file
+
diff --git a/content/resources/operator/img/compose-services.svg b/content/resources/operator/img/compose-services.svg
index 9c847594..9d4cbbb2 100644
--- a/content/resources/operator/img/compose-services.svg
+++ b/content/resources/operator/img/compose-services.svg
@@ -33,4 +33,4 @@
-
\ No newline at end of file
+
diff --git a/content/resources/user/analyze-data.md b/content/resources/user/analyze-data.md
index 80e9c693..32fe3c3c 100644
--- a/content/resources/user/analyze-data.md
+++ b/content/resources/user/analyze-data.md
@@ -15,7 +15,7 @@ The Gen3 platform for creating data commons co-locates data management with anal
* [1. Launch Workspace](#1-launch-workspace)
* [2. Getting Files into the Gen3 Workspace](#2-getting-files-into-the-gen3-workspace)
-* [3. Working with the proxy and whitelists](#3-working-with-the-proxy-and-whitelists)
+* [3. Working with the proxy and allow lists](#3-working-with-the-proxy-and-allow-lists)
* [4. Using the Gen3 Python SDK](#4-using-the-gen3-python-sdk)
* [5. Jupyter Notebook Demos](#5-jupyter-notebook-demos)
@@ -133,7 +133,7 @@ GSM1558854_Sample40_3.CEL.gz 4.20 MiB / 4.20 MiB [====================....
jovyan@jupyter-user:~$ mv *.gz files
```
-## 3. Working with the proxy and whitelists
+## 3. Working with the proxy and allow lists
* * *
Working with the Proxy
@@ -151,7 +151,7 @@ Alternatively, if you have a different service or a tool that needs to call out,
https_proxy=https://cloud-proxy.internal.io:3128 aws s3 ls s3://gen3-data/ --profile
```
-
Whitelists
+
Allow lists
Additionally, to aid Gen3 Commons security, the installation of tools from outside resources is managed through a whitelist. If you have problems installing a tool you need for your work, contact and with a list of any sites you might wish to install tools from. After passing a security review, these can be added to the whitelist to facilitate access.
diff --git a/content/resources/user/img/gen3_core_metadata_collection_template.tsv b/content/resources/user/img/gen3_core_metadata_collection_template.tsv
index 12d72853..a696cb83 100644
--- a/content/resources/user/img/gen3_core_metadata_collection_template.tsv
+++ b/content/resources/user/img/gen3_core_metadata_collection_template.tsv
@@ -1 +1 @@
-type project_id submitter_id projects.code contributor coverage creator data_type date description format language publisher relation rights source subject title
core_metadata_collection example-training collection-01 training
\ No newline at end of file
+type project_id submitter_id projects.code contributor coverage creator data_type date description format language publisher relation rights source subject title
core_metadata_collection example-training collection-01 training
diff --git a/content/resources/user/submit-data/gen3_core_metadata_collection_template.tsv b/content/resources/user/submit-data/gen3_core_metadata_collection_template.tsv
index 12d72853..a696cb83 100644
--- a/content/resources/user/submit-data/gen3_core_metadata_collection_template.tsv
+++ b/content/resources/user/submit-data/gen3_core_metadata_collection_template.tsv
@@ -1 +1 @@
-type project_id submitter_id projects.code contributor coverage creator data_type date description format language publisher relation rights source subject title
core_metadata_collection example-training collection-01 training
\ No newline at end of file
+type project_id submitter_id projects.code contributor coverage creator data_type date description format language publisher relation rights source subject title
core_metadata_collection example-training collection-01 training
diff --git a/content/resources/user/template-tsvs/aliquot.tsv b/content/resources/user/template-tsvs/aliquot.tsv
index 694adbd2..d779b0ef 100755
--- a/content/resources/user/template-tsvs/aliquot.tsv
+++ b/content/resources/user/template-tsvs/aliquot.tsv
@@ -1,2 +1,2 @@
type submitter_id samples.submitter_id aliquot_container clinical_or_contrived hours_to_freezer_lower hours_to_freezer_upper preservation_method storage_temperature aliquot_quantity aliquot_volume contrivance_method methanol_added project_id
-aliquot
\ No newline at end of file
+aliquot
diff --git a/content/resources/user/template-tsvs/analyte.tsv b/content/resources/user/template-tsvs/analyte.tsv
index 551f0cdd..95e667d9 100755
--- a/content/resources/user/template-tsvs/analyte.tsv
+++ b/content/resources/user/template-tsvs/analyte.tsv
@@ -1,2 +1,2 @@
type submitter_id aliquots.submitter_id analyte_isolation_method analyte_type cell_identifier cell_type days_to_assay frame_identifier llod project_id run_identifier sensitivity specificity
-analyte
\ No newline at end of file
+analyte
diff --git a/content/resources/user/template-tsvs/biospecimen.tsv b/content/resources/user/template-tsvs/biospecimen.tsv
index 6e746aff..47a0a43e 100755
--- a/content/resources/user/template-tsvs/biospecimen.tsv
+++ b/content/resources/user/template-tsvs/biospecimen.tsv
@@ -1,2 +1,2 @@
type submitter_id cases.submitter_id biospecimen_anatomic_site biospecimen_type blood_tube_type days_to_collection days_to_procurement method_of_procurement procured_or_purchased tissue_type biospecimen_volume biospecimen_weight blood_draw_method clinical_site collaboration_id disease_type metastatic_bone metastatic_lymph_node metastatic_visceral primary_site procurement_temperature project_id shipping_temperature tumor_code tumor_descriptor
-biospecimen
\ No newline at end of file
+biospecimen
diff --git a/content/resources/user/template-tsvs/cell_image.tsv b/content/resources/user/template-tsvs/cell_image.tsv
index c8b71b99..fd1ea3a8 100755
--- a/content/resources/user/template-tsvs/cell_image.tsv
+++ b/content/resources/user/template-tsvs/cell_image.tsv
@@ -1,2 +1,2 @@
type submitter_id analytes.submitter_id data_category data_format data_type file_name file_size md5sum project_id
-cell_image
\ No newline at end of file
+cell_image
diff --git a/content/resources/user/template-tsvs/contrived_expectations.tsv b/content/resources/user/template-tsvs/contrived_expectations.tsv
index c69114b9..4f5fece2 100755
--- a/content/resources/user/template-tsvs/contrived_expectations.tsv
+++ b/content/resources/user/template-tsvs/contrived_expectations.tsv
@@ -1,2 +1,2 @@
type submitter_id aliquots.submitter_id expected_allelic_fraction expected_copy_number expected_mutation_alt expected_mutation_chromosome expected_mutation_gene expected_mutation_position expected_mutation_reference expected_mutation_type project_id
-contrived_expectations
\ No newline at end of file
+contrived_expectations
diff --git a/content/resources/user/template-tsvs/demographic.tsv b/content/resources/user/template-tsvs/demographic.tsv
index ca1bc995..3e1764bd 100755
--- a/content/resources/user/template-tsvs/demographic.tsv
+++ b/content/resources/user/template-tsvs/demographic.tsv
@@ -1,2 +1,2 @@
type submitter_id cases.submitter_id cause_of_death days_to_birth days_to_death ethnicity gender project_id race vital_status
-demographic
\ No newline at end of file
+demographic
diff --git a/content/resources/user/template-tsvs/diagnosis.tsv b/content/resources/user/template-tsvs/diagnosis.tsv
index 669018e1..00bf92fd 100755
--- a/content/resources/user/template-tsvs/diagnosis.tsv
+++ b/content/resources/user/template-tsvs/diagnosis.tsv
@@ -1,2 +1,2 @@
type submitter_id cases.submitter_id classification_of_tumor days_to_diagnosis morphology primary_diagnosis tissue_or_organ_of_origin tumor_grade ajcc_clinical_m ajcc_clinical_n ajcc_clinical_stage ajcc_clinical_t ajcc_pathologic_m ajcc_pathologic_n ajcc_pathologic_stage ajcc_pathologic_t best_overall_response days_to_best_overall_response method_of_diagnosis overall_survival prior_malignancy prior_treatment progression_free_survival progression_free_survival_event project_id residual_disease
-diagnosis
\ No newline at end of file
+diagnosis
diff --git a/content/resources/user/template-tsvs/exposure.tsv b/content/resources/user/template-tsvs/exposure.tsv
index 2572664c..dbc0a868 100755
--- a/content/resources/user/template-tsvs/exposure.tsv
+++ b/content/resources/user/template-tsvs/exposure.tsv
@@ -1,2 +1,2 @@
type submitter_id cases.submitter_id alcohol_history alcohol_intensity cigarettes_per_day project_id tobacco_smoking_status years_smoked
-exposure
\ No newline at end of file
+exposure
diff --git a/content/resources/user/template-tsvs/family_history.tsv b/content/resources/user/template-tsvs/family_history.tsv
index 4a37c32c..6eae946c 100755
--- a/content/resources/user/template-tsvs/family_history.tsv
+++ b/content/resources/user/template-tsvs/family_history.tsv
@@ -1,2 +1,2 @@
type submitter_id cases.submitter_id project_id relationship_age_at_diagnosis relationship_gender relationship_primary_diagnosis relationship_type relative_with_cancer_history
-family_history
\ No newline at end of file
+family_history
diff --git a/content/resources/user/template-tsvs/followup.tsv b/content/resources/user/template-tsvs/followup.tsv
index 8d774571..ef2cb386 100755
--- a/content/resources/user/template-tsvs/followup.tsv
+++ b/content/resources/user/template-tsvs/followup.tsv
@@ -1,2 +1,2 @@
type submitter_id cases.submitter_id days_to_followup adverse_event bmi cause_of_response comorbidity days_to_adverse_event days_to_comorbidity days_to_progression days_to_recurrence disease_response ecog_performance_status height progression_or_recurrence project_id weight
-followup
\ No newline at end of file
+followup
diff --git a/content/resources/user/template-tsvs/immunoassay.tsv b/content/resources/user/template-tsvs/immunoassay.tsv
index 38b45a81..c8472ee1 100755
--- a/content/resources/user/template-tsvs/immunoassay.tsv
+++ b/content/resources/user/template-tsvs/immunoassay.tsv
@@ -1,2 +1,2 @@
type submitter_id read_groups.submitter_id assay_instrument assay_instrument_model assay_method assay_target target_measurement target_measurement_units assay_kit_name assay_kit_vendor assay_kit_version project_id target_localization
-immunoassay
\ No newline at end of file
+immunoassay
diff --git a/content/resources/user/template-tsvs/mass_cytometry_assay.tsv b/content/resources/user/template-tsvs/mass_cytometry_assay.tsv
index a15aed80..b0fb6e90 100755
--- a/content/resources/user/template-tsvs/mass_cytometry_assay.tsv
+++ b/content/resources/user/template-tsvs/mass_cytometry_assay.tsv
@@ -1,2 +1,2 @@
type submitter_id analytes.submitter_id assay_instrument assay_instrument_model assay_method data_category data_format data_type file_name file_size md5sum panel_used project_id protocol_used
-mass_cytometry_assay
\ No newline at end of file
+mass_cytometry_assay
diff --git a/content/resources/user/template-tsvs/mass_cytometry_image.tsv b/content/resources/user/template-tsvs/mass_cytometry_image.tsv
index f7afb52d..74a59db5 100755
--- a/content/resources/user/template-tsvs/mass_cytometry_image.tsv
+++ b/content/resources/user/template-tsvs/mass_cytometry_image.tsv
@@ -1,2 +1,2 @@
type submitter_id mass_cytometry_assays.submitter_id assay_target data_category data_format data_type file_name file_size md5sum project_id
-mass_cytometry_image
\ No newline at end of file
+mass_cytometry_image
diff --git a/content/resources/user/template-tsvs/pcr_assay.tsv b/content/resources/user/template-tsvs/pcr_assay.tsv
index b16346d1..14c9e4d1 100755
--- a/content/resources/user/template-tsvs/pcr_assay.tsv
+++ b/content/resources/user/template-tsvs/pcr_assay.tsv
@@ -1,2 +1,2 @@
type submitter_id analytes.submitter_id assay_instrument assay_instrument_model assay_method assay_kit_name assay_kit_vendor assay_kit_version assay_target mutant_copies mutant_fraction_percent mutation_result project_id target_alt target_chromosome target_position target_ref
-pcr_assay
\ No newline at end of file
+pcr_assay
diff --git a/content/resources/user/template-tsvs/protocol.tsv b/content/resources/user/template-tsvs/protocol.tsv
index c9e6cffa..8b6437a3 100755
--- a/content/resources/user/template-tsvs/protocol.tsv
+++ b/content/resources/user/template-tsvs/protocol.tsv
@@ -1,2 +1,2 @@
type submitter_id studies.submitter_id data_category data_format data_type file_name file_size md5sum project_id
-protocol
\ No newline at end of file
+protocol
diff --git a/content/resources/user/template-tsvs/quantification_assay.tsv b/content/resources/user/template-tsvs/quantification_assay.tsv
index 5a12a97f..057b69c3 100755
--- a/content/resources/user/template-tsvs/quantification_assay.tsv
+++ b/content/resources/user/template-tsvs/quantification_assay.tsv
@@ -1,2 +1,2 @@
type submitter_id analytes.submitter_id assay_instrument assay_instrument_model assay_method assay_kit_name assay_kit_vendor assay_kit_version cell_count molecular_concentration project_id
-quantification_assay
\ No newline at end of file
+quantification_assay
diff --git a/content/resources/user/template-tsvs/read_group.tsv b/content/resources/user/template-tsvs/read_group.tsv
index f88134a1..7bd8ea77 100755
--- a/content/resources/user/template-tsvs/read_group.tsv
+++ b/content/resources/user/template-tsvs/read_group.tsv
@@ -1,2 +1,2 @@
type submitter_id analytes.submitter_id instrument_model is_paired_end library_strategy platform read_length_lower read_length_upper RIN adapter_sequence barcoding_applied flow_cell_barcode library_name library_preparation_kit_catalog_number library_preparation_kit_name library_preparation_kit_vendor library_preparation_kit_version library_selection library_strand project_id sequencing_center target_capture_kit_catalog_number target_capture_kit_name target_capture_kit_target_region target_capture_kit_vendor target_capture_kit_version
-read_group
\ No newline at end of file
+read_group
diff --git a/content/resources/user/template-tsvs/read_group_qc.tsv b/content/resources/user/template-tsvs/read_group_qc.tsv
index bf28ddeb..b36fafc8 100755
--- a/content/resources/user/template-tsvs/read_group_qc.tsv
+++ b/content/resources/user/template-tsvs/read_group_qc.tsv
@@ -1,2 +1,2 @@
type submitter_id read_groups.submitter_id submitted_aligned_reads_files.submitter_id submitted_unaligned_reads_files.submitter_id adapter_content basic_statistics created_datetime encoding kmer_content overrepresented_sequences per_base_n_content per_base_sequence_content per_base_sequence_quality per_sequence_gc_content per_sequence_quality_score per_tile_sequence_quality percent_aligned percent_gc_content project_id sequence_duplication_levels sequence_length_distribution total_aligned_reads total_sequences
-read_group_qc
\ No newline at end of file
+read_group_qc
diff --git a/content/resources/user/template-tsvs/sample.tsv b/content/resources/user/template-tsvs/sample.tsv
index 7778eda5..7ec76c5d 100755
--- a/content/resources/user/template-tsvs/sample.tsv
+++ b/content/resources/user/template-tsvs/sample.tsv
@@ -1,2 +1,2 @@
type submitter_id biospecimens.submitter_id composition blood_fractionation_method hours_to_fractionation_lower hours_to_fractionation_upper project_id sample_quantity sample_volume
-sample
\ No newline at end of file
+sample
diff --git a/content/resources/user/template-tsvs/sequencing_assay.tsv b/content/resources/user/template-tsvs/sequencing_assay.tsv
index 5b02edda..74ac185a 100755
--- a/content/resources/user/template-tsvs/sequencing_assay.tsv
+++ b/content/resources/user/template-tsvs/sequencing_assay.tsv
@@ -1,2 +1,2 @@
type submitter_id read_groups.submitter_id WT_copies assay_method assay_target copies_unit created_datetime mutant_copies mutant_fraction_ percent mutation_result project_id read_depth target_alt target_chromosome target_position target_ref
-sequencing_assay
\ No newline at end of file
+sequencing_assay
diff --git a/content/resources/user/template-tsvs/slide_image.tsv b/content/resources/user/template-tsvs/slide_image.tsv
index 9598fb1b..c525d723 100755
--- a/content/resources/user/template-tsvs/slide_image.tsv
+++ b/content/resources/user/template-tsvs/slide_image.tsv
@@ -1,2 +1,2 @@
type submitter_id aliquots.submitter_id data_category data_format data_type file_name file_size md5sum project_id
-slide_image
\ No newline at end of file
+slide_image
diff --git a/content/resources/user/template-tsvs/study copy.tsv b/content/resources/user/template-tsvs/study copy.tsv
index f18d2283..98e22f01 100755
--- a/content/resources/user/template-tsvs/study copy.tsv
+++ b/content/resources/user/template-tsvs/study copy.tsv
@@ -1,2 +1,2 @@
type submitter_id projects.code data_description study_description study_design study_objective study_setup associated_study project_id
-study
\ No newline at end of file
+study
diff --git a/content/resources/user/template-tsvs/submitted_aligned_reads.tsv b/content/resources/user/template-tsvs/submitted_aligned_reads.tsv
index f3e77648..b990eda4 100755
--- a/content/resources/user/template-tsvs/submitted_aligned_reads.tsv
+++ b/content/resources/user/template-tsvs/submitted_aligned_reads.tsv
@@ -1,2 +1,2 @@
type submitter_id read_groups.submitter_id data_category data_format data_type experimental_strategy file_name file_size md5sum
-submitted_aligned_reads
\ No newline at end of file
+submitted_aligned_reads
diff --git a/content/resources/user/template-tsvs/submitted_copy_number.tsv b/content/resources/user/template-tsvs/submitted_copy_number.tsv
index 945e379e..ed97982f 100755
--- a/content/resources/user/template-tsvs/submitted_copy_number.tsv
+++ b/content/resources/user/template-tsvs/submitted_copy_number.tsv
@@ -1,2 +1,2 @@
type submitter_id read_groups.submitter_id data_category data_format data_type experimental_strategy file_name file_size md5sum project_id
-submitted_copy_number
\ No newline at end of file
+submitted_copy_number
diff --git a/content/resources/user/template-tsvs/submitted_methylation.tsv b/content/resources/user/template-tsvs/submitted_methylation.tsv
index 0553f064..5155bc7c 100755
--- a/content/resources/user/template-tsvs/submitted_methylation.tsv
+++ b/content/resources/user/template-tsvs/submitted_methylation.tsv
@@ -1,2 +1,2 @@
type submitter_id analytes.submitter_id assay_instrument assay_instrument_model assay_method data_category data_format data_type file_name file_size md5sum project_id
-submitted_methylation
\ No newline at end of file
+submitted_methylation
diff --git a/content/resources/user/template-tsvs/submitted_somatic_mutation.tsv b/content/resources/user/template-tsvs/submitted_somatic_mutation.tsv
index d8ee5aa8..0ef2f4d7 100755
--- a/content/resources/user/template-tsvs/submitted_somatic_mutation.tsv
+++ b/content/resources/user/template-tsvs/submitted_somatic_mutation.tsv
@@ -1,2 +1,2 @@
type submitter_id read_groups.submitter_id data_category data_format data_type experimental_strategy file_name file_size md5sum project_id total_variants
-submitted_somatic_mutation
\ No newline at end of file
+submitted_somatic_mutation
diff --git a/content/resources/user/template-tsvs/submitted_unaligned_reads.tsv b/content/resources/user/template-tsvs/submitted_unaligned_reads.tsv
index e86e6bad..ffc558d4 100755
--- a/content/resources/user/template-tsvs/submitted_unaligned_reads.tsv
+++ b/content/resources/user/template-tsvs/submitted_unaligned_reads.tsv
@@ -1,2 +1,2 @@
type submitter_id read_groups.submitter_id data_category data_format data_type experimental_strategy file_name file_size md5sum
-submitted_unaligned_reads
\ No newline at end of file
+submitted_unaligned_reads
diff --git a/content/resources/user/template-tsvs/treatment.tsv b/content/resources/user/template-tsvs/treatment.tsv
index 6fb02257..b1d23352 100755
--- a/content/resources/user/template-tsvs/treatment.tsv
+++ b/content/resources/user/template-tsvs/treatment.tsv
@@ -1,2 +1,2 @@
type submitter_id diagnoses.submitter_id days_to_treatment_end days_to_treatment_start project_id regimen_or_line_of_therapy therapeutic_agents treatment_anatomic_site treatment_intent_type treatment_outcome treatment_type
-treatment
\ No newline at end of file
+treatment
diff --git a/gen3/docs/gen3-resources/developer-guide/architecture.md b/gen3/docs/gen3-resources/developer-guide/architecture.md
index 742f0e07..82c37048 100644
--- a/gen3/docs/gen3-resources/developer-guide/architecture.md
+++ b/gen3/docs/gen3-resources/developer-guide/architecture.md
@@ -3,42 +3,56 @@ This documentation is intended for developers who want to understand the design
## Overview
-Gen3 is a modular, open-source software platform that exposes a standard set of application programming interfaces (APIs) and user interfaces (UIs). It is capable of managing various types of cloud-based data and enabling cloud-based compute over those data. The underlying software and APIs are designed from the ground up to be interoperable, standards-based, and configurable. The Gen3 software is cloud agnostic and its deployment is facilitated by containerization and orchestration frameworks such as Kubernetes. While cloud-agnostic, we use AWS internally and therefore some documentation may include AWS specific instructions.
+Gen3 is a modular, open-source software platform that exposes a standard set of application programming interfaces (APIs) and user interfaces (UIs). It is capable of managing various types of cloud-based data and enabling cloud-based compute over those data.
-Gen3 can handle various categories of data based on their structure. Unstructured data represents files on a hard drive or cloud storage with no consistent schema. These data tend to represent either bulk clinical and phenotypic data in spreadsheet format or patient level data such as images or genomic sequencing files.
+The underlying software and APIs are designed from the ground up to be interoperable, standards-based, and configurable. The Gen3 software is intended to be cloud-agnostic and its deployment is facilitated by containerization and orchestration frameworks such as Kubernetes.
-Structured data are data that adhere to a specific and strict schema with a graphical representation of nodes with properties and relationships to other nodes. This schema can be referred to as a data model or data dictionary and tends to represent clinical and phenotypic data, linking subjects and their clinical and phenotypic data to studies and their samples. This is typically used to represent a harmonized version of the data.
+> NOTE: While cloud-agnostic, we (the Center for Translational Data Science, maintainers of Gen3) use AWS internally for most instances of Gen3 we manage, and therefore some documentation may include AWS specific instructions.
-Semi-structured data is organized as unique identifiers with flexible key/value pairs (including nesting). The key/value pairs may be consistent between records, but are not required to be. This is typically used for storing publicly available metadata about available datasets or additional public metadata about samples.
+Gen3 can handle various categories of data based on their structure.
+
+**Unstructured** data represents files on a hard drive or cloud storage with no consistent schema. These data tend to represent either bulk clinical and phenotypic data in spreadsheet format or patient level data such as images or genomic sequencing files.
+
+**Structured** data are data that adhere to a specific and strict schema with a graphical representation of nodes with properties and relationships to other nodes. This schema can be referred to as a data model or data dictionary and tends to represent clinical and phenotypic data, linking subjects and their clinical and phenotypic data to studies and their samples. This is typically used to represent a harmonized version of the data.
+
+**Semi-structured** data is organized as unique identifiers with flexible key/value pairs (including nesting). The key/value pairs may be consistent between records, but are not required to be. This is typically used for storing publicly available metadata about available datasets or additional public metadata about samples.
The overall architecture is designed to support FAIR data access[[1]](architecture.md#references), which includes features such as permanent digital IDs, open APIs, rich clinical and experimental data, and services modeled from external standards such as from GA4GH. Gen3 data meshes go one step further and support Secure and Authorized FAIR Environment (SAFE) environments[[2]](architecture.md#references).
## Products
-The distinct Gen3 products include Data Commons, Data Meshes, Framework Services, and Analytic Workspaces. Each product represents common applications and use cases with a subset of Gen3 UIs and APIs made available.
+The distinct Gen3 products include Gen3 Data Commons, Gen3 Data Meshes, Gen3 Framework Services, and Gen3 Analytic Workspaces. Each product represents common applications and use cases with a subset of Gen3 UIs and APIs made available.
-### Data Commons
+### Gen3 Data Commons
-Gen3 Data Commons co-locate exploration and visualization tools with data management services for import and export of structured information like clinical, phenotypic, or biospecimen data, along with data objects, like genomics data files or medical images. Gen3 Data Commons are capable of interoperation with other resources in a data mesh (aka fabric or ecosystem) by utilizing the Gen3 Frameworwork Services.
+Gen3 Data Commons co-locate exploration and visualization tools with data management services for import and export of structured information like clinical, phenotypic, or biospecimen data, along with data objects, like genomics data files or medical images. Gen3 Data Commons are capable of interoperation with other resources in a data mesh (AKA fabric or ecosystem) by utilizing the Gen3 Framework Services.
-### Data Meshes
+### Gen3 Data Meshes
A Gen3 Data Mesh connects independent data resources into a single interoperable data ecosystem. The APIs provided as part of this product include support for indexing data objects, associating metadata with the data objects, associating metadata with each data resource, controlling user access to data via a flexible access control policy engine, a graphical user interface, and workspaces that run over the open APIs.
-The level of integration in a data mesh can vary greatly. In a very integrated scenario, all data has a single data model and governance structure and is in fact just a data commons. In a less integrated scenario, data may live in separate data commons with separate data models and access control policies, but where Gen3 indexes and associates metadata with files, provides GUIDs (Globally Unique Identifiers) for all files, provides access control, and exposes open APIs. On the least integrated side of the spectrum, Gen3 may provide only dataset-level metadata to make datasets discoverable and enable workspaces.
+The level of integration in a data mesh can vary greatly. In a very integrated scenario, all data has a single data model and governance structure and is in fact just a data commons.
+
+In a less integrated scenario, data may live in separate data commons with separate data models and access control policies, but where Gen3 indexes and associates metadata with files, provides GUIDs (Globally Unique Identifiers) for all files, provides access control, and exposes open APIs.
+
+On the least integrated side of the spectrum, Gen3 may provide only dataset-level metadata to make datasets discoverable and enable workspaces.
+
+### Gen3 Framework Services
-### Framework Services
+Gen3 Framework Services are a minimal set of software that provide open APIs and UIs that form the foundation, or “framework”, for building systems. Such foundational support includes indexing data objects, associating metadata with the data objects, controlling user access to data via a policy engine, and providing a data discovery UI.
-Gen3 Framework Services are a minimal set of software that provide open APIs and UIs that form the foundation, or “framework”, for building systems. Such foundational support includes indexing data objects, associating metadata with the data objects, controlling user access to data via a policy engine, and providing a data discovery UI. Gen3 framework services can be deployed alone and other desired functionality can be built off the extensive open APIs. It is more common that they are deployed as part of a broader product like Gen3 Data Commons or a Gen3 Data Mesh.
+Gen3 framework services can be deployed alone and other desired functionality can be built off the extensive open APIs. It is more common that they are deployed as part of a broader product like Gen3 Data Commons or a Gen3 Data Mesh.
-Gen3 Framework Services aim to provide mechanisms to enable data to be fully Findable, Accessible, Interoperable, and Reusable (FAIR). One of the guiding principles of Gen3 as a whole, but most importantly the Gen3 Framework, is to use existing standards and solutions to common problems. This is why standards like OpenID Connect, OAuth 2.0, and GA4GH DRS have been adopted. We aim to adopt community standards and solutions when they provide an interoperable solution.
+Gen3 Framework Services aim to provide mechanisms to enable data to be fully Findable, Accessible, Interoperable, and Reusable (FAIR). One of the guiding principles of Gen3 as a whole, but most importantly the Gen3 Framework, is to use existing standards and solutions to common problems. This is why standards like OpenID Connect, OAuth 2.0, and GA4GH DRS have been adopted. We aim to adopt community standards and solutions when they provide an interoperable solution in an efficient way.
-### Analytic Workspaces
+### Gen3 Analytic Workspaces
-Gen3 analytic workspaces support secure data analysis environments in the cloud that can access data from one or more data resources, including Gen3 Data Commons. Gen3 workspaces are often fully integrated with a specific data commons, and coming soon are workspaces as stand-alone analysis environments with a user pay model. Workspaces use the Gen3 Framework for user authentication and authorization and for retrieving data objects and metadata from data resources, like Gen3 Data Commons.
+Gen3 Analytic Workspaces support secure data analysis environments in the cloud that can access data from one or more data resources, including Gen3 Data Commons.
-By default, Gen3 workspaces include Jupyter notebooks and RStudio but can be configured to host virtually any application, including analysis workflows such as Nextflow, data processing pipelines, or data visualization apps. In the future, the Gen3 Workflow Execution API will enable asynchronous and long-running workflows and pipelines to be executed.
+Gen3 workspaces are often fully integrated with a specific data commons, and coming soon are workspaces as stand-alone analysis environments with a user pay model. Workspaces use the Gen3 Framework Services for user authentication and authorization and for retrieving data objects and metadata from data resources, like Gen3 Data Commons.
+
+By default, Gen3 workspaces include Jupyter notebooks and RStudio but can be configured to host virtually any application, including analysis workflows such as Nextflow, data processing pipelines, or data visualization apps. In the future, the Gen3 Workflow Execution API will enable asynchronous and long-running workflows and pipelines to be executed.
## Security
@@ -49,7 +63,41 @@ Gen3 is designed to enable organizations to support secure and compliant data sh
* The Gen3 operator or technical team maintaining the Gen3 deployment
* The project sponsors who commission the Gen3 instance around a scientific research question
-Each party’s responsibilities can be summarized as follows: the Gen3 team at the University of Chicago (1) is responsible for providing stable, secure Gen3 releases and appropriate communications with the open-source community around those releases. It is highly recommended that Gen3 operators (3) keep their deployment on the latest monthly release of Gen3. The physical security controls and measures are inherited from the infrastructure provider (2). It is possible this service provider (2) is the same as the sponsor (4) or technical operator (3) if the instance is deployed on institutional on-premises infrastructure. The Gen3 operator (3) is responsible for understanding all applicable laws and regulations for compliance, Gen3 configurations, CI/CD flow, platform operation and monitoring architecture, Continuous Monitoring (ConMon), policies and procedures related to the specific deployment, and any tools deployed outside of Gen3. The technical Gen3 operator team (3) needs a good understanding of security best practices and laws governing their specific deployment, use case, and data so they can design a secure and compliant environment for with the appropriate configurations. If applicable, the sponsor (4) may define specific use cases or requirements that the technical operator (3) will configure in Gen3, like an OIDC Identity Provider/System or data use restrictions and policies.
+### Gen3 Development Team at the University of Chicago
+
+* Provides stable, secure Gen3 releases and appropriate communications with the open-source community.
+
+Note: The Gen3 team is responsible for providing a solid foundation for the Gen3 platform, but external factors like infrastructure providers and operators will also impact the overall security and compliance posture.
+
+### Infrastructure or Platform Provider (e.g. AWS, Azure, Private Cloud)
+
+* Responsible for physical security controls and measures.
+* May be the same as the sponsor or technical operator if deployed on institutional on-premises infrastructure.
+
+Interaction Note: The Gen3 team relies on the infrastructure provider to ensure the underlying security measures are in place. However, the Gen3 operator is still responsible for understanding applicable laws and regulations.
+
+### Gen3 Operator or Technical Team
+
+* Responsible for:
+ + Understanding all applicable laws and regulations for compliance
+ + Gen3 configurations
+ + CI/CD flow
+ + Platform operation and monitoring architecture
+ + Continuous Monitoring (ConMon)
+ + Policies and procedures related to the specific deployment
+ + Tools deployed outside of Gen3
+* Needs a good understanding of security best practices and laws governing their specific deployment, use case, and data.
+
+> NOTE: It is highly recommended that Gen3 operators keep their deployment on the latest monthly release of Gen3
+
+Interaction Note: The Gen3 operator is responsible for ensuring compliance with applicable laws and regulations, but relies on the infrastructure provider to ensure the underlying security measures are in place. The project sponsor may also define specific requirements that the technical operator will configure in Gen3.
+
+### Project Sponsors
+
+* May define specific use cases or requirements that the technical operator will configure in Gen3.
+* Responsibilities include commissioning the Gen3 instance around a scientific research question.
+
+Interaction Note: The sponsor commissions the Gen3 instance, but does not directly manage its operation. The technical operator and Gen3 team must work together to ensure compliance with applicable laws and regulations while meeting the sponsor's requirements.
Many of the CTDS-run commons operate at a FedRAMP (Federal Risk and Authorization Management Program) Moderate level as Gen3 Data Ecosystems Platform and, in collaboration with the Open Commons Consortium, LI-SaaS (Low Impact Software as a Service) as Gen3 Data Commons Service. However, other operators can choose to run a Gen3 system at a higher or lower security standard.
@@ -59,22 +107,28 @@ Many of the CTDS-run commons operate at a FedRAMP (Federal Risk and Authorizatio
Gen3 can be used as a tool to manage small to very large projects. This inherent flexibility is embedded in several Gen3 features:
-* Gen3 is cloud agnostic and can be run on any commercial cloud as well as on-prem infrastructure. This will allow a range of groups to take advantage of the software given the requirements of different organizations.
-* It takes advantage of kubernetes, which is an open-source system for automating deployment, scaling, and management of containerized applications.
-* Our installation uses Karpenter, which is an open-source Kubernetes cluster autoscaler. This allows a project to easily transition as it gains more or fewer users over a long or short period of time.
-* Helm charts, which is a kubernetes package manager, allows operators to quickly deploy a new system.
+* Gen3 is cloud-agnostic and can be run on any commercial cloud as well as on-prem infrastructure. This will allow a range of groups to take advantage of the software given the requirements of different organizations.
+* It takes advantage of kubernetes, which is an open-source system for automating deployment, scaling, and management of containerized applications.
+* Our installation uses Karpenter, which is an open-source Kubernetes cluster autoscaler. This allows a project to easily transition as it gains more or fewer users over a long or short period of time.
+* Helm charts, which is a kubernetes package manager, allows operators to quickly deploy a new system.
* The use of terraform scripts allow operators to efficiently manage both Gen3 and infrastructure resources.
### Data Model
-The Gen3 data model is a graph-like relational model with nodes and edges; it specifies how different files, patients, experiments, and clinical visits are all related to one another. It is central to a Gen3 commons, which is autogenerated after the data dictionary and data model are specified. To be more specific, each node represents an entity, which consists of a related group of attributes or data elements. Clinical variables like a cancer diagnosis or a subject’s gender might go into the diagnosis or demographic nodes, respectively. Variables related to how a biological sample was collected or processed may be found in a biospecimen node. Data files, such as medical images or genomic files, can also be nodes and have associated metadata variables like file size, format, and file name.
+The Gen3 data model is a graph-like relational model with nodes and edges; it specifies how different files, patients, experiments, and clinical visits are all related to one another. It is central to a Gen3 Data Commons, which is autogenerated after the data dictionary and data model are specified.
+
+To be more specific, each node represents an entity, which consists of a related group of attributes or data elements. Clinical variables like a cancer diagnosis or a subject’s gender might go into the diagnosis or demographic nodes, respectively. Variables related to how a biological sample was collected or processed may be found in a biospecimen node. Data files, such as medical images or genomic files, can also be nodes and have associated metadata variables like file size, format, and file name.
-Edges or links between nodes indicate relationships between them. For example, a Sample node may derive or connect to a specific patient or a genomic data file may also be associated with a specific sample. Properties are assigned different types including: string, boolean, floating point number, integer, or enumeration. Properties can also be defined as arrays of any of those types. The acceptable values for properties can be further restrained by defining regex patterns that strings must match or minimum or maximum values for numeric data. Nodes, properties, and permissible values are specified in a series of YAML files.
+Edges or links between nodes indicate relationships between them. For example, a Sample node may derive or connect to a specific patient or a genomic data file may also be associated with a specific sample.
+
+Properties are assigned different types including: string, boolean, floating point number, integer, or enumeration. Properties can also be defined as arrays of any of those types. The acceptable values for properties can be further restrained by defining regex patterns that strings must match or minimum or maximum values for numeric data. Nodes, properties, and permissible values are specified in a series of YAML files.
### Data Portal
-Since the launch of Gen3, the user interface has been powered by the monolithic Gen3 Data Portal. Its design presents challenges in data movement between pages and has become increasingly difficult to extend. We are developing a new service, the Gen3 Frontend Framework (Gen3.2), to overcome these limitations. It offers enhanced custom content development, application performance, deployment, and maintenance. As we transition to the new framework, we will focus on the Gen3 Frontend Framework rather than the soon to be replaced Gen3 Data Portal.
+Since the launch of Gen3, the user interface has been powered by the monolithic Gen3 Data Portal. Its design presents challenges in data movement between pages and has become increasingly difficult to extend.
+
+We are developing a new service, the Gen3 Frontend Framework (Gen3.2), to overcome these limitations. It offers enhanced custom content development, application performance, deployment, and maintenance. As we transition to the new framework, we will focus on the Gen3 Frontend Framework rather than the soon to be replaced Gen3 Data Portal.
The Gen3 Frontend Framework consists of two primary modules—core and frontend—combined with a NextJS web application to create the Gen3 data commons UI. This architecture reduces code complexity, abstracts UI interactions with Gen3 services, supports customization, and simplifies deployment and cost.
diff --git a/gen3/docs/gen3-resources/developer-guide/contribute.md b/gen3/docs/gen3-resources/developer-guide/contribute.md
index d3591fed..a19ee0e7 100644
--- a/gen3/docs/gen3-resources/developer-guide/contribute.md
+++ b/gen3/docs/gen3-resources/developer-guide/contribute.md
@@ -20,13 +20,13 @@ Gen3 documentation is constantly evolving. If you see an error or omission we w
#### Tutorials
-Beyond the basic documentation, having an end to end tutorial on a particular topic can extraordinarily helpful. Please feel free to suggest such a tutorial.
+Beyond the basic documentation, having an end to end tutorial on a particular topic can extraordinarily helpful. Please feel free to suggest such a tutorial.
### Participate in community events or on Slack
Gen3 has a [community forum](https://gen3.org/community/events/) every other month where CTDS or other Gen3 users/operators present on topics of mutual interest to the community. We are always looking for contributions so please recommend topics or volunteer to present if a call is made to the community. Your experiences are valuable for the rest of the community to learn from!
-Your participation in the community slack channel is also very much appreciated. If you know the answer to a question or have something to contribute please speak up!
+Your participation in the community Slack channel is also very much appreciated. If you know the answer to a question or have something to contribute please speak up!
### Create an Issue
@@ -49,8 +49,8 @@ You may submit updates to code or documentation by creating a pull request.
1. Note that all code contributions are subject to our Apache 2.0 license attached to each of our repositories (https://www.apache.org/licenses/LICENSE-2.0).
2. Background information
- * Should you create an issue first? No, just use the description of the pull request to provide context and motivation, as you would for an issue.
- * If your code update is at all complicated you will likely be asked to fill out a Community Feature Document to help the Gen3 team review your PR. You can find the template [here](https://docs.google.com/document/d/1P2dfqnSH-e7OX1Hw62sDL8zcR7gZp4d152TlDBlomDc/edit?tab=t.0#heading=h.5e0lej9k5tiv). Please make a copy, and provide with your PR either as an attachment or provide a shareable link to a completed version. Sharing as a google doc may be helpful to allow for comments and discussion between you and the Gen3 team.
+ * Should you create an issue first? No, just use the description of the pull request to provide context and motivation, as you would for an issue.
+ * If your code update is at all complicated you will likely be asked to fill out a Community Feature Document to help the Gen3 team review your PR. You can find the template [here](https://docs.google.com/document/d/1P2dfqnSH-e7OX1Hw62sDL8zcR7gZp4d152TlDBlomDc/edit?tab=t.0#heading=h.5e0lej9k5tiv). Please make a copy, and provide with your PR either as an attachment or provide a shareable link to a completed version. Sharing as a Google doc may be helpful to allow for comments and discussion between you and the Gen3 team.
3. Always check out the `main` or `master` branch and submit pull requests against it.
@@ -61,7 +61,7 @@ You may submit updates to code or documentation by creating a pull request.
5. If there is a prior issue, reference the GitHub issue number in the description of the pull request.
6. Ensure your PR description is populated and following the [required template](https://github.com/uc-cdis/.github/blob/master/.github/pull_request_template.md)
- * Our Gen3 Release Notes are parsed from PR descriptions. Each bullet/line under the PR template headings get into our release notes as individual bullets, so the language should be succint and high-level
+ * Our Gen3 Release Notes are parsed from PR descriptions. Each bullet/line under the PR template headings get into our release notes as individual bullets, so the language should be succinct and high-level
* Context, motivation, overview, and all other info about the change should go _above_ the required headings (which won’t get parsed and pulled into the Gen3 Release Notes)
7. Once your PR is made, a CTDS staff member may reach out with additional questions and/or comment directly on your PR in GitHub.
diff --git a/gen3/docs/gen3-resources/developer-guide/index.md b/gen3/docs/gen3-resources/developer-guide/index.md
index 41ed55c0..0ff9d60c 100644
--- a/gen3/docs/gen3-resources/developer-guide/index.md
+++ b/gen3/docs/gen3-resources/developer-guide/index.md
@@ -1,6 +1,6 @@
# Gen3 Developer Guide - Extend Gen3
-Welcome to the Gen3 Developer Guide. This guide is primarily for software engineers that are looking to attain a deeper understanding of Gen3 services in order to interact with or modify them.
+Welcome to the Gen3 Developer Guide. This guide is primarily for software engineers that are looking to attain a deeper understanding of Gen3 services in order to interact with or modify them.
If you are a researcher or data scientist looking to access a Gen3 Data Commons or mesh to locate, access, or analyze data please read the [Gen3 User Guide][Gen3 User Guide]. If you are looking to deploy, maintain, configure, or submit data please take a look at the [Gen3 Operator Guide][Gen3 Operator Guide].
diff --git a/gen3/docs/gen3-resources/developer-guide/microservices.md b/gen3/docs/gen3-resources/developer-guide/microservices.md
index fa624ad4..3b6ca654 100644
--- a/gen3/docs/gen3-resources/developer-guide/microservices.md
+++ b/gen3/docs/gen3-resources/developer-guide/microservices.md
@@ -4,38 +4,51 @@ Gen3 features and functionality are enabled by independent and modular microserv
While the average user does not need to know the details and names of each microservice, if you are interested in adding new features or modifying your Gen3 system in some way it may be helpful to have a deeper understanding of a specific microservice. We have included brief descriptions below along with a link to their documentation in GitHub.
-## [Aggregated Metadata Service (AggMDS)][aggmds github]
-The aggregated MDS is a service which caches metadata from commons metadata services and becomes a centralize API for browsing Metadata with clients such as the Ecosystem browser. The AggMDS holds the content viewable in a Data Portal Discovery page for a Data Mesh.
## [Arborist][arborist github]
-Arborist is an attribute-based access control (ABAC) policy engine, designed for use with the Gen3 stack. Arborist tracks resources requiring access control, along with actions which users may perform to operate on these resources, and roles, which aggregate permissions to perform one or more actions
+Arborist acts as the Gen3 Policy Engine. It is an attribute-based access control (ABAC) policy engine, designed for use with the Gen3 stack. Arborist tracks resources requiring access control, along with actions which users may perform to operate on these resources, and roles, which aggregate permissions to perform one or more actions.
+
+It is utilized by any service to make authorization decisions, whether that be API-level access, or the permission to read or delete a specific indexed record.
+
+Services should offload _all_ authorization related logic to Arborist as much as possible.
## [Data Portal][data portal github]
The data portal service is an interactive website that allows users to explore, submit, and download data. The Windmill service utilizes the APIs offered by the data commons just as any other externally built app could.
## [Fence][fence github]
-The Fence service controls access to the metadata, submission, indexing, and data itself. Fence is an authentication (AuthN) and authorization (AuthZ) service which utilizes OpenID Connect flow (an extension of OAuth2) to generate tokens for clients. It can also provide tokens directly to a user. Clients and users may then use those tokens (JWT) with other Gen3 Data Commons services to access protected endpoints that require specific permissions. Fence can be configured to support different Identity Providers (IDPs) for AuthN. At the moment, supported IDPs include Google, and Shibboleth supporting providers such as NIH iTrust.
+Fence is an authentication (AuthN) and authorization (AuthZ) service which utilizes OpenID Connect flow (an extension of OAuth2) to generate tokens for clients. It can also provide tokens directly to a user. Clients and users may then use those tokens (JWT) with other Gen3 Data Commons services to access protected endpoints that require specific permissions. Fence can be configured to support different Identity Providers (IDPs) for AuthN. At the moment, supported IDPs include Google, and Shibboleth supporting providers such as NIH iTrust.
+
+Fence also handles the authorization syncing (though the results end up in Arborist), the management of necessary AWS/Google IAM credentials for administering signed URLs to end-users, and the exposure of the APIs for signed URL generation (both Gen3's and GA4GH DRS).
+
+> **NOTE**: The Fence images serving signed URLs are typically deployed as a separate kubernetes service `presigned-url-fence` with much greater scaling capabilities (to separate traffic related to authN/Z from data access traffic). The Indexd service still handles the management of the indexed records themselves (with their GUIDs and DRS URIs).
+>
+> This separation of concerns (records vs data access) is intentional, but the fact that the Fence codebase holds both authN/Z and data access logic (requiring 2 kubernetes services of the same image) is known technical debt. It is likely that in the future a new service or mechanism for retrieving file-based data will replace `presigned-url-fence`.
## [Guppy][guppy github]
-Server that support GraphQL queries on data from elasticsearch.
+Server that supports GraphQL queries on data from elasticsearch. Guppy integrates with the Gen3 Policy Engine, Arborist, to filter out results that a user should not see. This is done by comparing a list of resources the user has access to (from Arborist), with the specified authorization resources of the records they are querying.
## [Hatchery][hatchery github]
Hatchery creates Kubernetes Pods for workspace services. Workspace services must expose HTTP servers. Ambassador is used to proxy user traffic through to their container workspace once it is launched by Hatchery.
## [Helm][helm github]
-Gen3 relies upon Helm to manage installation and management of Kubernetes applications. Helm is used to build ”charts”, which are packages of Kubernetes resources that are used to deploy apps to a cluster. Helm is the recommended way to deploy Gen3.
+Gen3 relies upon Helm to manage installation and management of Kubernetes applications. Helm is used to build "charts", which are packages of Kubernetes resources that are used to deploy apps to a cluster. Helm is the recommended way to deploy Gen3.
## [Indexd][indexd github]
-The Indexd service provides permanent digital IDs for data objects. These IDs can be used to retrieve the data, or query the metadata associated with the object. The Indexd service tracks the locations and hash of every asset (file) in the data commons object store. It exports RESTful APIs for registering a new asset, and retrieving data for an existing asset.
+The Indexd service provides permanent, digital, globally unique IDs (GUIDs) for data objects. These IDs can be used to retrieve the data, or query the metadata associated with the object. The Indexd service tracks the locations and hash of every asset (file) in the data commons object store. It exports RESTful APIs for registering a new asset, and retrieving data for an existing asset.
## [Manifest Service][manifest service github]
This service handles reading from and writing to a user's S3 folder containing their manifests. A manifest is a JSON file that lists records a researcher may be interested in analyzing. This service stores a manifest to a user folder in an s3 bucket and delivers it for later use, such as when the researcher wants to mount the manifest in their workspace. If the "prefix" config variable is set, user folders will be stored in a directory of that name within the s3 bucket.
+> **NOTE**: We are developing a comprehensive replacement of this service, which will include real cohort management and better integration of selected data across UI pages.
+
## [Metadata Service (MDS)][MDS github]
-The Metadata Service provides an API for retrieving JSON metadata of GUIDs. It is a flexible option for "semi-structured" data (key:value mappings). The content of the MDS powers the Data Portal Discovery Page for a Data Commons. The Gen3 SDK can be used to upload and edit the metadata.
+The Metadata Service provides an API for retrieving JSON metadata of GUIDs. It is a flexible option for "semi-structured" data (key:value mappings). The content of the MDS powers the Data Portal Discovery Page for a Data Commons. The Gen3 SDK can be used to upload and edit the metadata. This service includes a feature known as the aggregated metadata service (AggMDS), which caches metadata from the metadata services of multiple data commons. The AggMDS holds the content viewable in a Data Portal Discovery page for a Data Mesh.
## [Peregrine][peregrine github]
-Peregrine is the high speed metadata seeking service which responds to GraphQL search queries. The GraphQL service allows Commons operators and users to precisely query only the information they are most interested in from the metadata collections. The service translates the GraphQL search into the appropriate statements which are run against the PostgreSQL backend before being returned as friendly JSON.
+Peregrine is the metadata seeking service which responds to GraphQL search queries and translates them to queries over our graph-like source of truth postgres database for structured data. The GraphQL service allows Commons operators and users to precisely query only the information they are most interested in from the metadata collections. The service translates the GraphQL search into the appropriate statements which are run against the PostgreSQL backend before being returned as friendly JSON.
+
+## [Requestor][requestor github]
+Requestor exposes an API to manage access requests.
## [Sheepdog][sheepdog github]
The Sheepdog service is responsible for herding user submissions of metadata into the graph database. The submissions are quality controlled against the data dictionary to ensure all required fields are present and have appropriate data values. The Sheepdog service is also responsible for supporting bulk export of the metadata into TSV or JSON formats.
@@ -54,6 +67,8 @@ The Gen3 workspace token service acts as an OIDC client which acts on behalf of
This table is helpful for debugging errors in front-end applications like [Windmill: data portal](https://github.com/uc-cdis/data-portal) or other Gen3 clients. You can easily identify the running service that is returning an error, based on its absolute HTTP request path. [Source](https://github.com/uc-cdis/cloud-automation/tree/master/kube/services/revproxy/gen3.nginx.conf).
+> **NOTE**: We intend to eventually have a more centralized API definition and potentially an API Gateway, while shifting towards an API First development strategy.
+
| Microservice | URL Path Prefix (NGINX Location) | GitHub Repository |
|------------------------------|----------------------------------|--------------------------------------------------------------------------------------------|
| ambassador-service | /lw-workspace/proxy/ | https://github.com/uc-cdis/cloud-automation/tree/master/kube/services/ambassador |
@@ -87,17 +102,18 @@ This table is helpful for debugging errors in front-end applications like [Windm
-[aggmds github]: https://github.com/uc-cdis/agg-metadata
+
[arborist github]: https://github.com/uc-cdis/arborist
[data portal github]: https://github.com/uc-cdis/data-portal
-[fence github]: https://github.com/uc-cdis/fence
+[fence github]: https://github.com/uc-cdis/fence
[guppy github]: https://github.com/uc-cdis/guppy
[hatchery github]: https://github.com/uc-cdis/hatchery
[helm github]: https://github.com/uc-cdis/gen3-helm
[indexd github]: https://github.com/uc-cdis/indexd
[manifest service github]: https://github.com/uc-cdis/manifestservice
[MDS github]: https://github.com/uc-cdis/metadata-service
-[peregrine github]: https://github.com/uc-cdis/peregrine
+[peregrine github]: https://github.com/uc-cdis/peregrine
+[requestor github]: https://github.com/uc-cdis/requestor
[sheepdog github]: https://github.com/uc-cdis/sheepdog
[sower github]: https://github.com/uc-cdis/sower
[tube github]: https://github.com/uc-cdis/tube
diff --git a/gen3/docs/gen3-resources/glossary.md b/gen3/docs/gen3-resources/glossary.md
index 97228ba3..17fbf73a 100644
--- a/gen3/docs/gen3-resources/glossary.md
+++ b/gen3/docs/gen3-resources/glossary.md
@@ -5,7 +5,7 @@ A Common Services Operations Center is an operations center operated by a common
## Crosswalk
Linking patients from across data commons where some patient data exists in commons A and additional data exists in commons B. This linkage is recorded in the metadata service. An example of how to set this up is found [here][crosswalk setup].
## Data Commons
-A data commons co-locates data with cloud computing infrastructure and com- monly used software services, tools, and applications for managing, integrating, analyzing and shar- ing data that are exposed through web portals and APIs to create an interoperable resource for a research community. A data commons provides services so that the data is findable, accessible, interoperable and reusable (FAIR)
+A data commons co-locates data with cloud computing infrastructure and commonly used software services, tools, and applications for managing, integrating, analyzing and sharing data that are exposed through web portals and APIs to create an interoperable resource for a research community. A data commons provides services so that the data is findable, accessible, interoperable and reusable (FAIR)
## Data Dictionary
Every Gen3 data commons employs a data model, which serves to describe, organize, and harmonize data sets submitted by different users. Data harmonization facilitates cross-project analyses and is thus one of the pillars of the data commons paradigm. The data model organizes experimental metadata variables, “properties”, into linked categories, “nodes”, through the use of a data dictionary. The data dictionary lists and describes all nodes in the data model, as well as defines and describes the properties in each node. A Gen3 Data Dictionary is specified by a YAML file per node. Additional details on Gen3 data dictionaries can be found [here][data dictionary overview].
## Data Hub
@@ -24,7 +24,7 @@ See an in-depth description of each page [here][Data Portal User Guide].
### Data Dictionary Page
Interactive page that shows the data dictionary in both a graph and table format. It is powered by the data dictionary YAML files and allows users to see the controlled vocabulary across all data model nodes.
### Discovery Page
-Powered by the metadata service. Typically includes public metadata at the project level including search fields, tags, and study page fields.
+Powered by the metadata service. Typically includes public metadata at the project level including search fields, tags, and study page fields.
### Explorer Page
Primary page for exploring data within a Gen3 data commons. It is powered by flattened data within the Guppy microservice. It is highly configurable based on operator requirements for building cohorts.
### Landing Page
@@ -36,58 +36,43 @@ Edge nodes may be created at clinics, labs, hospitals, or academic institutions
## ETL
Structured data submitted to commons are stored in PostgreSQL. Querying data from PostgreSQL with multiple join is painful and inefficient. So, we use ElasticSearch as a place to store materialized dataset. Extract-transform-load (ETL) is a process that creates the materialized data from PostgreSQL and store them in ElasticSearch. This is accomplished via the Tube microservice. More details of running an ETL can be found [here](operator-guide/customize-search.md#etl).
## FAIR Data
-FAIR data are data which meet the principles of findability, accessibility, interoper- ability, and reusability [12]. There is now an extensive literature on FAIR data.
+FAIR data are data which meet the principles of findability, accessibility, interoperability, and reusability [12]. There is now an extensive literature on FAIR data.
## Framework Services
-Framework Services or Data Commons Framework (DCF) Services is the term used by Gen3 to refer to data mesh services in the narrow middle architecture, for data meshes, such as the NCI Cancer Research Data Commons. These are set of standards-based services with open APIs for authentication, authorization, creating and accessing FAIR data objects, and for working with bulk structured data in machine readable, self-contained format.
+Framework Services or Data Commons Framework (DCF) Services is the term used by Gen3 to refer to data mesh services in the narrow middle architecture, for data meshes, such as the NCI Cancer Research Data Commons. These are set of standards-based services with open APIs for authentication, authorization, creating and accessing FAIR data objects, and for working with bulk structured data in machine-readable, self-contained format.
## Globally Unique Identifier (GUID)
-A GUID is an essentially unique identifier that is generated by an algorithm so that no central authority is needed, but rather different programs running in different locations can generate GUID with a low probability that they will collide. A common format for a GUID is the hexadecimal representation of a 128 bit binary number.
+A GUID is an essentially unique identifier that is generated by an algorithm so that no central authority is needed, but rather different programs running in different locations can generate GUID with a low probability that they will collide. A common format for a GUID is the hexadecimal representation of a 128-bit binary number.
## Kubernetes
An open-source system for automating deployment, scaling, and management of containerized applications, which Gen3 is built from.
## Microservice
-Microservices are a software architecture that organizes software into small, independent services that communicate over well defined APIs. These services can be developed, set up, and scaled independently. A more traditional architecture is to put all the APIs and other required functionality into a single application. This is sometimes called a monolithic architecture. Microservices provide important advantages for large-scale systems that require scalability and must continue to evolve even as their code base grows very large, but increases the complexity of operating small-scale systems.
+Microservices are a software architecture that organizes software into small, independent services that communicate over well-defined APIs. These services can be developed, set up, and scaled independently. A more traditional architecture is to put all the APIs and other required functionality into a single application. This is sometimes called a monolithic architecture. Microservices provide important advantages for large-scale systems that require scalability and must continue to evolve even as their code base grows very large, but increases the complexity of operating small-scale systems.
## Flattened Data
-Structured data that has been processed via Tube and stored in elasticsearch to accelerate searchability.
+Structured data that has been processed via Tube and stored in elasticsearch to accelerate searchability.
## Gen3 Client
The Gen3 Client is a command-line tool for downloading, uploading, and submitting data files to and from a Gen3 data commons. Some of the same functionality can be found in the Gen3 SDK. You can find installation and use instructions [here][Gen3 client docs].
## Gen3 SDK
The Gen3 Software Development Kit (SDK) for Python provides classes and functions for handling common tasks when interacting with a Gen3 commons. It also exposes a Command Line Interface (CLI). The API for a commons can be overwhelming, so this SDK/CLI aims to simplify communication with various microservices. It can also download and upload files like the Gen3 Client. You can find installation and use instructions [here][SDK docs].
## Gen3 Microservices
-Brief descriptions of the most relevant microservices are included below. For more detail and links to their respective repositories please visit the [Developer's Guide][Microservices].
-### Aggregated Metadata Service (AggMDS)
-The aggregated MDS is a service which caches metadata from commons metadata services and becomes a centralize API for browsing Metadata with clients such as the Ecosystem browser. The AggMDS holds the content viewable in a Data Portal Discovery page for a Data Mesh.
-### Arborist
-Arborist is an attribute-based access control (ABAC) policy engine, designed for use with the Gen3 stack. Arborist tracks resources requiring access control, along with actions which users may perform to operate on these resources, and roles, which aggregate permissions to perform one or more actions
-### Data Portal
-A generic data portal that supports some basic interaction with Gen3 services like peregrine, sheepdog and fence
-### Indexd
-The Indexd service provides permanent digital IDs for data objects. These IDs can be used to retrieve the data, or query the metadata associated with the object. The Indexd service tracks the locations and hash of every asset (file) in the data commons object store. It exports RESTful APIs for registering a new asset, and retrieving data for an existing asset.
-### Fence
-The Fence service controls access to the metadata, submission, indexing, and data itself. Fence is an authentication (AuthN) and authorization (AuthZ) service which utilizes OpenID Connect flow (an extension of OAuth2) to generate tokens for clients. It can also provide tokens directly
-to a user. Clients and users may then use those tokens (JWT) with other Gen3 Data Commons services to access protected endpoints that require specific permissions. Fence can be configured to support different Identity Providers (IDPs) for AuthN. At the moment, supported IDPs include Google, and Shibboleth supporting providers such as NIH iTrust.
-### Guppy
-Server that support GraphQL queries on data from elasticsearch.
-### Hatchery
-Hatchery creates Kubernetes Pods for workspace services. Workspace services must expose HTTP servers. Ambassador is used to proxy user traffic through to their container workspace once it is launched by Hatchery.
-### Helm
-Gen3 relies upon Helm to manage installation and management of Kubernetes applications. Helm is used to build ”charts”, which are packages of Kubernetes resources that are used to deploy apps to a cluster. Helm is the recommended way to deploy Gen3.
-### Metadata Service (MDS)
-The Metadata Service provides an API for retrieving JSON metadata of GUIDs. It is a flexible option for "semi-structured" data (key:value mappings). The content of the MDS powers the Data Portal Discovery Page for a Data Commons. The Gen3 SDK can be used to upload and edit the metadata.
-### Pelican
-Provides Docker images with Sower jobs to export and import PFB in Gen3.
-### Peregrine
-Peregrine is the high speed metadata seeking service which responds to GraphQL search queries. The GraphQL service allows Commons operators and users to precisely query only the information they are most interested in from the metadata collections. The service translates the GraphQL search into the appropriate statements which are run against the PostgreSQL backend before being returned as friendly JSON.
-### Sheepdog
-The Sheepdog service is responsible for herding user submissions of metadata into the graph database. The submissions are quality controlled against the data dictionary to ensure all required fields are present and have appropriate data values. The Sheepdog service is also responsible for supporting bulk export of the metadata into TSV or JSON formats.
-### Sower
-Sower dispatches Kubernetes jobs.
-### Tube
-Microservice that controls the ETL process of structured data.
-### Workspace Token Service
-The Gen3 workspace token service acts as an OIDC client which acts on behalf of users to request refresh tokens from Fence. This happens when a user logs into a workspace from the browser. WTS then stores the refresh token for that user, and manages access tokens and refresh tokens for workers that belong to specific users in the workspace.
+A simple list of most relevant microservices are included below. For a description of each service and links to their respective repositories please visit the [Developer's Guide][Microservices].
+
+* Data Portal
+* Indexd
+* Fence
+* Guppy
+* Hatchery
+* Helm
+* Metadata Service (MDS)
+* Pelican
+* Peregrine
+* Requestor
+* Sheepdog
+* Sower
+* Tube
+* Workspace Token Service
+
## Portable Format for Biomedical data (PFB)
PFB is a serialization file format designed to store bio-medical data and metadata. The format is built on top Avro to make it fast, extensible and interoperable between different systems. You can find the GitHub repo [here][PFB GitHub] and the publication [here][PFB Pub].
## Workspace
-Gen3 workspaces are secure data analysis environments in the cloud that can access data from one or more data resources, including Gen3 Data Commons. Gen3 Workspaces use the Gen3 Framework Services for user authentication and authorization, data object indexing, and metadata services. Gen3 Workspaces support Jupyter notebooks, RStudio notebooks, and other custom applications that can access data through Gen3 open APIs. For instructions on the use of a workspace see [here][workspace use].
+Gen3 workspaces are secure data analysis environments in the cloud that can access data from one or more data resources, including Gen3 Data Commons. Gen3 Workspaces use the Gen3 Framework Services for user authentication and authorization, data object indexing, and metadata services. Gen3 Workspaces support Jupyter notebooks, RStudio notebooks, and other custom applications that can access data through Gen3 open APIs. For instructions on the use of a workspace see [here][workspace use].
diff --git a/gen3/docs/gen3-resources/index.md b/gen3/docs/gen3-resources/index.md
index 1d3cca1e..60aee1bb 100644
--- a/gen3/docs/gen3-resources/index.md
+++ b/gen3/docs/gen3-resources/index.md
@@ -1,20 +1,9 @@
-# Gen3 Resources - the home for Gen3 Documentation
+# Gen3 Resources
-this is the "home page" for the New Resources page
-contains an outline for these 5 separate sections, and what they cover.
-also some sort of figure
+This section contains the bulk of the Gen3 technical documentation. It is broken up in the following sections:
-- Configure and Deploy Gen3: gen3-resources/deploy-gen3.md
- # contains content from the operator guide and also Helm docs
-- Create Data Dictionary: gen3-resources/create-data-dictionary.md
- # contains info about creating (and maintaining?) a data dictionary
-- Submit Data: gen3-resources/submit-data.md
- # contains info about submitting data (both for the first time and for updating content)
-- Customize Gen3 Search Interface: gen3-resources/customize-search.md
- # info from the current operator guide about configuring ETL and data explorer
-- Find and Use Data: gen3-resources/consume-data.md
- # info from the current User Guide - except not the stuff about data submission/
-- Extend your Gen3 Instance: gen3-resources/developing-in-gen3.md
- # contains info from developer guide and more, with the goal to provide information
- # about how to create code to extend your Gen3 deployment, and also
- # about how to share your code (contributor guidelines)
+
+* **Gen3 User Guide** - This is for the data scientist, researcher, or analyst who needs to explore, download, or analyze data found within an existing instance of Gen3.
+* **Gen3 Operator Guide** - This is for those organizations who operate their own Gen3 instances. It will include content on how to deploy, configure, and maintain a Gen3 instances; configuring a data dictionary and uploading data; and customizing the frontend.
+* **Gen3 Developer Guide** - This is for a software engineer who wants to extend Gen3 either by contributing to the source code or by integrating Gen3 services into a larger system. This section will cover the Gen3 architecture including the individual microservices and how they interact with each other.
+* **Glossary** - This section can be used as reference for terminology found within Gen3 technical documentation.
diff --git a/gen3/docs/gen3-resources/operator-guide/authorization.md b/gen3/docs/gen3-resources/operator-guide/authorization.md
index ce43f059..5e5d7b9e 100644
--- a/gen3/docs/gen3-resources/operator-guide/authorization.md
+++ b/gen3/docs/gen3-resources/operator-guide/authorization.md
@@ -2,13 +2,13 @@
# Controlling authorization of data access
## Unstructured data
-Files can be either open access or controlled access within a Gen3 Data Commons. Access to controlled files is managed either through dbGaP or via a white list.
+Files can be either open access or controlled access within a Gen3 Data Commons. Access to controlled files is managed either through dbGaP or via an allow list.
### Authentication
-Authentication is a way of telling a Gen3 system who you are. This requires you configure an Identity Provider (IdP), which is configured through the [Fence service][Fence]. At the moment, the supported IDPs include:
+Authentication is a way of telling a Gen3 system who you are. This requires you configure an Identity Provider (IdP), which is configured through the [Fence service][Fence]. At the moment, the supported IDPs include:
* Google
* Shibboleth
diff --git a/gen3/docs/gen3-resources/operator-guide/customize-search.md b/gen3/docs/gen3-resources/operator-guide/customize-search.md
index e2fd4c39..e3370a04 100644
--- a/gen3/docs/gen3-resources/operator-guide/customize-search.md
+++ b/gen3/docs/gen3-resources/operator-guide/customize-search.md
@@ -67,7 +67,7 @@ For more information on using the Discovery Page please see the User Guide [Disc
### Metadata Service
To view data in the discovery page you must have a populated [metadata service][mds github] or alternatively an [Aggregated metadata service (aggMDS)][aggmds github], which caches the metadata from two or more metadata source to provide a unified view of the commons on the discovery page.
-Instructions for the creation and modification of an MDS record can be found here as part of the [Gen3 SDK][sdk for discovery page]. Every data commons is different as there is no standardization of MDS and therefore any example we provide may not apply to your particular system.
+Instructions for the creation and modification of an MDS record can be found here as part of the [Gen3 SDK][sdk for discovery page]. Every data commons is different as there is no standardization of MDS and therefore any example we provide may not apply to your particular system.
To view the MDS for the Gen3 Data Hub you can go [here][gen3 data hub mds]. You can see in the snippet below some summary metadata for the 1000 Genomes project with is part of the Gen3 Data Hub:
diff --git a/gen3/docs/gen3-resources/operator-guide/img/cloud-automation.svg b/gen3/docs/gen3-resources/operator-guide/img/cloud-automation.svg
index d7a44b1a..8d2b6ead 100644
--- a/gen3/docs/gen3-resources/operator-guide/img/cloud-automation.svg
+++ b/gen3/docs/gen3-resources/operator-guide/img/cloud-automation.svg
@@ -49,4 +49,4 @@
-
\ No newline at end of file
+
diff --git a/gen3/docs/gen3-resources/operator-guide/img/compose-services.svg b/gen3/docs/gen3-resources/operator-guide/img/compose-services.svg
index 9c847594..9d4cbbb2 100644
--- a/gen3/docs/gen3-resources/operator-guide/img/compose-services.svg
+++ b/gen3/docs/gen3-resources/operator-guide/img/compose-services.svg
@@ -33,4 +33,4 @@
-
\ No newline at end of file
+
diff --git a/gen3/docs/gen3-resources/operator-guide/img/gen3_core_metadata_collection_template.tsv b/gen3/docs/gen3-resources/operator-guide/img/gen3_core_metadata_collection_template.tsv
index 12d72853..a696cb83 100644
--- a/gen3/docs/gen3-resources/operator-guide/img/gen3_core_metadata_collection_template.tsv
+++ b/gen3/docs/gen3-resources/operator-guide/img/gen3_core_metadata_collection_template.tsv
@@ -1 +1 @@
-type project_id submitter_id projects.code contributor coverage creator data_type date description format language publisher relation rights source subject title
core_metadata_collection example-training collection-01 training
\ No newline at end of file
+type project_id submitter_id projects.code contributor coverage creator data_type date description format language publisher relation rights source subject title
core_metadata_collection example-training collection-01 training
diff --git a/gen3/docs/gen3-resources/operator-guide/submit-unstructured-data.md b/gen3/docs/gen3-resources/operator-guide/submit-unstructured-data.md
index f9d726a9..4046bdfe 100644
--- a/gen3/docs/gen3-resources/operator-guide/submit-unstructured-data.md
+++ b/gen3/docs/gen3-resources/operator-guide/submit-unstructured-data.md
@@ -11,7 +11,7 @@ Unstructured data are simply data files that have do not necessarily conform to
In order to upload data files you must at minimum have a `program`, `project`, and at least one record in the `core_metadata_collection` node or other data containing node. To review how to submit the program and project nodes see [here][Node Order].
-This documentation will utilize the core_metadata_collection node but other nodes can be used depending on your unique data model. If your project already has at least one record in a node of this type, you can skip to [step 2](#2-upload-data-files-to-object-storage).
+This documentation will utilize the core_metadata_collection node but other nodes can be used depending on your unique data model. If your project already has at least one record in a node of this type, you can skip to [step 2](#2-upload-data-files-to-object-storage).
Do the following to create your first `core_metadata_collection` record:
diff --git a/gen3/docs/gen3-resources/operator-guide/tutorial_alloy.md b/gen3/docs/gen3-resources/operator-guide/tutorial_alloy.md
index d9046348..eb387a2b 100644
--- a/gen3/docs/gen3-resources/operator-guide/tutorial_alloy.md
+++ b/gen3/docs/gen3-resources/operator-guide/tutorial_alloy.md
@@ -54,4 +54,4 @@ The link below will take you to the Grafana Alloy chart, providing a comprehensi
---
-By following this guide, you'll successfully configure Alloy to send logs and metrics to Grafana Loki and Mimir. The setup will ensure that Alloy collects the necessary observability data from your environment and forwards logs to Loki and metrics to Mimir for analysis and storage. This configuration will allow you to monitor your system's logs and metrics efficiently through Grafana.
\ No newline at end of file
+By following this guide, you'll successfully configure Alloy to send logs and metrics to Grafana Loki and Mimir. The setup will ensure that Alloy collects the necessary observability data from your environment and forwards logs to Loki and metrics to Mimir for analysis and storage. This configuration will allow you to monitor your system's logs and metrics efficiently through Grafana.
diff --git a/gen3/docs/gen3-resources/operator-guide/tutorial_faro.md b/gen3/docs/gen3-resources/operator-guide/tutorial_faro.md
index cbf81c20..4aee1db9 100644
--- a/gen3/docs/gen3-resources/operator-guide/tutorial_faro.md
+++ b/gen3/docs/gen3-resources/operator-guide/tutorial_faro.md
@@ -68,13 +68,13 @@ alloy-configmap-data: |
}
}
}
-
+
loki.write "endpoint" {
endpoint {
url = "http://grafana-loki-gateway.monitoring:80/loki/api/v1/push"
}
}
-
+
faro.receiver "default" {
server {
listen_address = "0.0.0.0"
@@ -151,6 +151,6 @@ portal:
"grafanaFaroSampleRate": 1, // optional; numeric; the Grafana Faro option specifying the percentage of sessions to track: 1 for all, 0 for none. Default to 1 if omitted
},
```
----
+---
-By following this guide, you'll have successfully set up Alloy to receive Grafana Faro logs and metrics while exposing the service over the internet using Kubernetes ingress. You’ll also be able to monitor Faro metrics through Fence and make necessary configurations in Gen3 Portal for seamless Faro integration.
\ No newline at end of file
+By following this guide, you'll have successfully set up Alloy to receive Grafana Faro logs and metrics while exposing the service over the internet using Kubernetes ingress. You’ll also be able to monitor Faro metrics through Fence and make necessary configurations in Gen3 Portal for seamless Faro integration.
diff --git a/gen3/docs/gen3-resources/operator-guide/tutorial_global_IAM_helm_user.md b/gen3/docs/gen3-resources/operator-guide/tutorial_global_IAM_helm_user.md
index e2a35d9c..efbb8b20 100644
--- a/gen3/docs/gen3-resources/operator-guide/tutorial_global_IAM_helm_user.md
+++ b/gen3/docs/gen3-resources/operator-guide/tutorial_global_IAM_helm_user.md
@@ -22,11 +22,11 @@ Example policy containing all the proper permissions:
"Effect": "Allow",
"Action": "s3:GetObject",
"Resource": [
- "arn:aws:s3:::$BUCKET/$ENVIRONMENT/*",
+ "arn:aws:s3:::$BUCKET/$ENVIRONMENT/*",
# Fence Usersync Job: Name of the userYamlS3Path containing the user.yaml file
- "arn:aws:s3:::$BUCKET/$ENVIRONMENT/$VERSION/elasticsearch/*",
+ "arn:aws:s3:::$BUCKET/$ENVIRONMENT/$VERSION/elasticsearch/*",
# ES Index Restore Job: Name of the dbRestoreBucket with the proper path to the ES dump files.
- "arn:aws:s3:::$BUCKET/$ENVIRONMENT/$VERSION/pgdumps/*"
+ "arn:aws:s3:::$BUCKET/$ENVIRONMENT/$VERSION/pgdumps/*"
# DB PG Dump Restore Job: Name of the dbRestoreBucket with the proper path to the SQL dump files.
]
},
@@ -37,14 +37,14 @@ Example policy containing all the proper permissions:
"secretsmanager:GetSecretValue"
],
"Resource": [
- "*"
+ "*"
# External Secrets: Leave as is to allow External Secrets access to your secrets in Secrets Manager.
]
},
{
"Effect": "Allow",
"Action": "es:*",
- "Resource": "arn:aws:es:REGION:ACCOUNT_ID:domain/CLUSTER_NAME/*"
+ "Resource": "arn:aws:es:REGION:ACCOUNT_ID:domain/CLUSTER_NAME/*"
# AWS ES Proxy Service: Arn of your Elasticsearch Cluster in AWS.
}
]
@@ -52,4 +52,4 @@ Example policy containing all the proper permissions:
```
## After Creating the User
-In order to integrate the user in Helm, paste in the values of your Access and Secret Access key in `.Values.global.aws.awsAccessKeyId` and `.Values.global.aws.awsSecretAccessKey`
\ No newline at end of file
+In order to integrate the user in Helm, paste in the values of your Access and Secret Access key in `.Values.global.aws.awsAccessKeyId` and `.Values.global.aws.awsSecretAccessKey`
diff --git a/gen3/docs/gen3-resources/user-guide/access-data.md b/gen3/docs/gen3-resources/user-guide/access-data.md
index 46c9e4dc..a3cc87ef 100644
--- a/gen3/docs/gen3-resources/user-guide/access-data.md
+++ b/gen3/docs/gen3-resources/user-guide/access-data.md
@@ -1,25 +1,27 @@
# Access Data Files
-
-
## Authentication and Authorization
### Authentication
-Authentication refers to how a user identifies themself to the Gen3 system. The method of user authentication varies from system to system. This could include eRA Commons, Google, Microsoft Office 365, InCommons, eduGAIN, ORCID, or generally anything following an OIDC standard. This is configured by your system operator and you can find more details in the [Gen3 Operator's Guide][configure auth].
+Authentication refers to how a user identifies themselves to the Gen3 system. The method of user authentication varies from system to system. This could include eRA Commons, Google, Microsoft Office 365, InCommons, eduGAIN, ORCID, or generally anything following an OIDC standard. This is configured by your system operator, and you can find more details in the [Gen3 Operator's Guide][configure auth].
### Authorization
-Authorization indicates to which data a particular user has access. Governance practices vary from Gen3 system to system and this can take a variety of forms, but typically falls into two buckets: white list and dbGaP. You can find more in-depth details on how this is configured within the [Gen3 Operator's Guide][configure auth].
+Authorization indicates to which data a particular user has access. Governance practices vary from Gen3 system to system and this can take a variety of forms, but typically falls into two buckets: allow list and dbGaP. You can find more in-depth details on how this is configured within the [Gen3 Operator's Guide][configure auth].
-#### White list
-A white list is simply a list of users (identified based on your method of authentication) that controls which users have access to which data. It is in the form of a user.yaml file that is maintained by a the operator of your Gen3 system. You should contact the operator of your system or follow whatever process they have in place to request access. Gaining access may require you to sign a Data Use Agreement. Data access is granted at the program or project level.
+#### Allow list
+An allow list is simply a list of users (identified based on your method of authentication) that controls which users have access to which data. It is in the form of a user.yaml file that is maintained by the operator of your Gen3 system. You should contact the operator of your system or follow whatever process they have in place to request access. Gaining access may require you to sign a Data Use Agreement. Data access is granted at the program or project level.
#### dbGaP
Another common authorization mechanism is dbGaP. In order to obtain access to controlled-access data via dbGaP, PIs must first obtain an [NIH eRA Commons account][era_commons] and then obtain authorization to access the data through the [NIH database of Genotypes and Phenotypes (dbGaP)][dbgap].
To obtain dbGaP access, navigate to the [dbGaP Authorized Access site][dbgap auth] and follow the instructions. This process includes working with your institutional research office, reviewing the consent agreement for the particular project, and writing a Research Use Statement and thus can take a significant amount of time.
+#### Bulk allow list
+Another option is to use a bulk allow list from an SFTP server in the same format as dbGaP, but not actually controlled by dbGaP.
+#### Requestor Service
+Operators can also take advantage of the Requestor Service for dynamic authorization. In this case Gen3 interacts with another system where authorization requests are reviewed, approved, denied, or revoked.
## Download Files Using the Gen3-client
@@ -27,9 +29,8 @@ The gen3-client provides an easy-to-use, command-line interface for uploading an
This guide has the following sections:
-
### Installation Instructions
-A binary executable of the latest version of the gen3-client should be [downloaded from Github][Gen3 Client]. Choose the file that matches your operating system (Windows, Linux, or Mac OS).
+A binary executable of the latest version of the gen3-client should be [downloaded from Github][Gen3 Client]. Choose the file that matches your operating system (Windows, Linux, or macOS).
No installation is necessary. Simply download the correct version for your operating system and unzip the archive. The program is then executed from the command-line by running the command `gen3-client `. For more detailed instructions, see the section below for your operating system.
@@ -129,7 +130,7 @@ Before using the gen3-client to upload or download data, the gen3-client needs t
### Download a Single Data File Using a GUID
-Files with a valid storage location in the file index database (AKA *indexd*) can downloaded using the `gen3-client download-single` command by providing the file's object_id (AKA *GUID* or *did*).
+Files with a valid storage location in the file index database (AKA *indexd*) can be downloaded using the `gen3-client download-single` command by providing the file's object_id (AKA *GUID* or *did*).
For example, the indexd record for object_id ["00149bcf-e057-4ecc-b22d-53648ae0b35f"](https://gen3.datacommons.io/index/00149bcf-e057-4ecc-b22d-53648ae0b35f) points to a [location in the GDC](https://api.gdc.cancer.gov/data/47b982b3-c7ce-4ca7-8c86-c71c15979620).
diff --git a/gen3/docs/gen3-resources/user-guide/analyze-data.md b/gen3/docs/gen3-resources/user-guide/analyze-data.md
index cf54a097..befb74f2 100644
--- a/gen3/docs/gen3-resources/user-guide/analyze-data.md
+++ b/gen3/docs/gen3-resources/user-guide/analyze-data.md
@@ -1,5 +1,9 @@
# Data Analysis in a Gen3 Data Commons
-The Gen3 platform for creating data commons co-locates data management with analysis workspaces, apps and tools. Workspaces are highly customizable by the operators of a Gen3 data commons and offer a variety of VM images (virtual machines) pre-configured with tools for specific analysis tasks. Custom applications for executing bioinformatics workflows or exploratory analyses may be integrated in the navigation bar as well. The following documentation primarily covers exploratory data analysis in the standard Gen3 Workspace, which can be accessed by clicking the “Workspace” icon in the top navigation bar or navigating to the [/workspace][workspace] endpoint.
+The Gen3 platform for creating data commons co-locates data management with analysis workspaces, apps and tools.
+
+Workspaces are highly customizable by the operators of a Gen3 data commons and offer a variety of VM images (virtual machines) pre-configured with tools for specific analysis tasks. Custom applications for executing bioinformatics workflows or exploratory analyses may be integrated in the navigation bar as well.
+
+The following documentation primarily covers exploratory data analysis in the standard Gen3 Workspace, which can be accessed by clicking the “Workspace” icon in the top navigation bar or navigating to the [/workspace][workspace] endpoint.
## Launch Workspace
@@ -41,7 +45,15 @@ Bringing in files into the Gen3 Workspace can be achieved via the UI (directly f
> Note: Not every PlanX Data Commons has the function in the UI enabled; users are advised to follow available commons-specific documentation.
### Exporting Files from the Exploration Tab to the Workspace
-The Exploration page allows to search through data and create cohorts, which can be exported to the Workspace. After a cohort has been selected, the data can be exported to a Workspace by clicking “Export to Workspace”. Do not navigate away from the browser after clicking the button. Allow up to 5 minutes to export your files. A popup window will appear confirming that exporting a “manifest” to the workspace has been successful. Find the data or data files in the folder “data” on your persistent drive “/pd”. Please note, that the workspace mounts up to 5 different manifests while the workspace is running, but shows only the latest exported manifest in a newly launched workspace.
+The Exploration page allows to search through data and create cohorts, which can be exported to the Workspace.
+
+- After a cohort has been selected, the data can be exported to a Workspace by clicking “Export to Workspace”.
+- Do not navigate away from the browser after clicking the button.
+- Allow up to 5 minutes to export your files.
+- A popup window will appear confirming that exporting a “manifest” to the workspace has been successful.
+- Find the data or data files in the folder “data” on your persistent drive “/pd”.
+
+> Please note: the workspace mounts up to 5 different manifests while the workspace is running, but shows only the latest exported manifest in a newly launched workspace.
![Screenshot of Exploration page showing how to export data to a Workspace with button][img Export Data to Workspace]
@@ -52,7 +64,7 @@ In order to download data files directly and programmatically from a Gen3 data c
From the command line, download the latest [Linux version of the gen3-client][linux gen3 client] using the `wget` command. Next, unzip the archive and add it to your path:
Example:
-```
+```bash
wget https://github.com/uc-cdis/cdis-data-client/releases/download/2020.11/dataclient_linux.zip
unzip dataclient_linux.zip
PATH=$PATH:~/
@@ -109,24 +121,24 @@ GSM1558854_Sample40_3.CEL.gz 4.20 MiB / 4.20 MiB [====================....
jovyan@jupyter-user:~$ mv *.gz files
```
-## Working with the proxy and whitelists
+## Working with the proxy and allow lists
### Working with the Proxy
To prevent unauthorized traffic, the Gen3 VPC utilizes a proxy. If you are using one of the custom VMs setup, there is already a line in your .bashrc file to handle traffic requests.
-```
+```bash
export http_proxy=http://cloud-proxy.internal.io:3128
export https_proxy=$http_proxy
```
Alternatively, if you have a different service or a tool that needs to call out, you can set the proxy with each command.
-```
+```bash
https_proxy=https://cloud-proxy.internal.io:3128 aws s3 ls s3://gen3-data/ --profile
```
-### Whitelists
+### Allow lists
-Additionally, to aid Gen3 Commons security, the installation of tools from outside resources is managed through a whitelist. If you have problems installing a tool you need for your work, contact [support@gen3.org](mailto:support@gen3.org) and with a list of any sites from which you might wish to install tools. After passing a security review, these can be added to the whitelist to facilitate access.
+Additionally, to aid Gen3 Commons security, the installation of tools from outside resources is managed through an allow list. If you have problems installing a tool you need for your work, contact [support@gen3.org](mailto:support@gen3.org) and with a list of any sites from which you might wish to install tools. After passing a security review, these can be added to the allow list to facilitate access.
## Using the Gen3 Python SDK
To make programmatic interaction with Gen3 data commons easier, the bioinformatics team at the Center for Translational Data Science (CTDS) at University of Chicago has developed the Gen3 Python SDK, which is a Python library containing functions for sending standard requests to the Gen3 APIs. The code is open-source and available on [GitHub][Gen3 Python SDK Github] along with [documentation for using it][Gen3 Python SDK doc].
@@ -256,4 +268,4 @@ When finished, please, shut down the workspace server by clicking the “Termina
[#2 .ipynb]: notebooks/notebook2_canine.ipynb
[#3 notebook]: notebooks/notebook3_gen3datacommonsio.html
[Gen3 Data Hub]: https://gen3.datacommons.io/
-[#3 .ipynb]: notebooks/notebook3_gen3datacommonsio.ipynb
+[#3 .ipynb]: notebooks/notebook3_gen3datacommonsio.ipynb
diff --git a/gen3/docs/gen3-resources/user-guide/portal.md b/gen3/docs/gen3-resources/user-guide/portal.md
index 210bb44d..f2247e4f 100644
--- a/gen3/docs/gen3-resources/user-guide/portal.md
+++ b/gen3/docs/gen3-resources/user-guide/portal.md
@@ -1,7 +1,7 @@
# Accessing and Exploring Metadata from the Gen3 Data Portal
The data in a Gen3 data commons can be browsed and downloaded using several different methods. The following general documentation will cover some standard methods of data access in a Gen3 data commons. Ultimately, however, the methods of data access offered in a Gen3 data commons is determined by agreements made between the data commons’ sponsors and data contributors.
-Various levels of data access can be configured in a Gen3 data commons using the Fence and Arborist services. If open access data is hosted, a data commons can be configured to allow anonymous access to data, which means users can explore data without logging in. This is the case for the [Gen3 Data Hub][Gen3 Data Hub].
+Various levels of data access can be configured in a Gen3 data commons using the Gen3 Framework Services. If open access data is hosted, a data commons can be configured to allow anonymous access to data, which means users can explore data without logging in. This is the case for the [Gen3 Data Hub][Gen3 Data Hub].
In cases where data is controlled access, typically external users will receive instructions on how to access data and may be required to sign a DUA (Data Use Agreement) legal document.
@@ -10,8 +10,6 @@ The following sections provide details on how to explore and access data from wi
## Access Data from the Data Portal
The Gen3 software stack offers a data portal service that creates a website with graphical tools for performing the basic functionality of a data commons, like browsing data in projects, building patient cohorts across projects, downloading metadata or data files for cohorts, and building database queries.
-
-
### Profile Page
@@ -37,8 +35,16 @@ If the table is a list of files, there should be a button for downloading a JSON
![GIF showing how to use the Gen3 Explorer][img Explorer GIF]
+
>Note: Some data commons have security measures in place that limit what environments users can access data files. For example, users may be required to download and analyze data files in a protected environment, such as a virtual machines (VM) in a virtual private cloud (VPC) or in the built-in Gen3 Workspace, which is accessed by clicking on “Workspace” in the top navigation bar of the data commons website. For more information on the Workspace, see the [documentation on how to access and use the Gen3 Workspace][Gen3 Workspace].
+#### Export to external analysis workspaces
+
+The explorer page can also be configured to allow export of a PFB (Portable Format for Biomedical data) file to external systems such as Terra or Seven Bridges. A PFB file contains structured clinical data, the data dictionary, and pointers to associated files.
+
+
+
+
### Query Page
The structured data in a Gen3 data commons can be queried by using the [graphQL query language][GraphQL language] within the GraphiQL interface for building queries, which can be accessed by clicking the “Query” button in the top navigation bar or by navigating to the /query endpoint, for example, [the Gen3 Data Hub Query Page][Gen3 Query pg].
@@ -120,7 +126,7 @@ NOTE: For these user guides, https://gen3.datacommons.io is an example URL and c
### Discovery Page
-The Gen3 Discovery Page allows the visualization of metadata from within the metadata service (MDS). This typically includes public metadata about a project to make it discoverable. It can be used by both data commons and meshes although it can play a more central role in a data mesh. Users should be able to search based on free text or filter based on tags.
+The Gen3 Discovery Page allows the visualization of metadata from within the metadata service (MDS). This typically includes public metadata about projects to make it discoverable. The Discovery Page can also be used to store publication information, DOI metadata, or FHIR metadata. It can be used by both data commons and meshes, although it can play a more central role in a data mesh. Users should be able to search based on free text or filter based on tags.
![Animation showing how to navigate around the Discovery page][Discovery Page]
@@ -177,6 +183,7 @@ All the functionality of the data commons data portal is available by sending re
[Gen3 client]: access-data.md#download-files-using-the-gen3-client
[Gen3 bulk download]: access-data.md#multiple-file-download-with-manifest
[img Explorer GIF]: img/explorer_gif_2020.gif
+[Gen3 Workspace]: analyze-data.md#launch-workspace
@@ -210,4 +217,4 @@ All the functionality of the data commons data portal is available by sending re
-[API documentation]: using-api.md
+[API documentation]: using-api.md
diff --git a/gen3/docs/gen3-resources/user-guide/search.md b/gen3/docs/gen3-resources/user-guide/search.md
index 1a9ff6a9..21a3ccdd 100644
--- a/gen3/docs/gen3-resources/user-guide/search.md
+++ b/gen3/docs/gen3-resources/user-guide/search.md
@@ -2,14 +2,44 @@
# Searching and Exploring Structured Data
The data in a Gen3 data commons can be browsed and downloaded using several different methods. The following general documentation will cover some standard methods of data access in a Gen3 data commons. Ultimately, however, the methods of data access offered in a Gen3 data commons is determined by agreements made between the data commons’ sponsors and data contributors.
-Various levels of data access can be configured in a Gen3 data commons using the Fence and Arborist services. If open access data is hosted, a data commons can be configured to allow anonymous access to data, which means users can explore data without logging in. This is the case for the [Gen3 Data Hub][Gen3 Data Hub].
+Various levels of data access can be configured in a Gen3 data commons using the Gen3 Framework Services. If open access data is hosted, a data commons can be configured to allow anonymous access to data, which means users can explore data without logging in. This is the case for the [Gen3 Data Hub][Gen3 Data Hub].
In cases where data is controlled access, typically external users will receive instructions on how to access data and may be required to sign a DUA (Data Use Agreement) legal document.
The following sections provide details on how to explore and access data from within the data commons website and from the command-line by sending requests to the Gen3 open APIs.
+[//]: # (Alex: I would like to see this ordered like this Data Discovery -> Data Access Approval - if controlled data -> Data Exploration. I would like to promote using the Gen3 Discovery page as the initial place to find data as that's it's intended purpose.)
+
+[//]: # (Alex: I started thinking through user "questions" and relating them to particular groups of endpoints in our API and thinking through how users would expect to move between parts of the product. It's old and maybe needs some dust removed, but it was done while thinking through the idea of the Data Lake. The relevant section of the feature doc is here: https://docs.google.com/document/d/1UjQjvUuasmfe_5iaEfqjFzLTHsC-8EXXNYCSyP0-k_s/edit?tab=t.0#heading=h.xp6x9p5szrmw)
+
## Searching for Data from the Data Portal
-The Gen3 software stack offers a data portal service that creates a website with graphical tools for performing the basic functionality of a data commons, like browsing data in projects, building patient cohorts across projects, downloading metadata or data files for cohorts, and building database queries. The data portal relies on the same [API queries][API instructions] that you can explore directly if you prefer.
+Gen3 offers a data portal service that creates a website with graphical tools for performing the basic functionality of a data commons, like browsing data in projects, building patient cohorts across projects, downloading metadata or data files for cohorts, and building database queries.
+
+While this may vary from system to system, to find data in a data commons you can follow a general workflow of:
+
+1. Discover data in a mesh or commons using the Discovery Page
+2. Request Access to data using system specific solution
+3. Explore files in the Exploration Page
+4. Export data to workspace or download locally depending on the requirements of your system.
+
+Instead of using the Discovery and Exploration pages you could instead use the [API][API instructions] for locating data of interest.
+
+
+### Discovery Page
+
+In many data commons or meshes the first place to explore your data will be on a Discovery Page. This typically includes public metadata about projects to make it discoverable. The Discovery Page can also be used to store publication information, DOI metadata, and/or FHIR metadata.
+
+Users should be able to search based on free text or filter based on tags and can determine whether they have sufficient authorization to access file for a given project.
+
+
+![Animation showing how to navigate around the Discovery page][Discovery Page]
+
+
+The [Discovery Page][BRH Discovery] provides users a venue to search and find studies and datasets displayed on the Biomedical Research Hub. Users can **browse through the publicly accessible study-level metadata** without requiring authorization.
+
+> Use text-based search, faceted search, and tags to rapidly and efficiently find relevant studies, discover new datasets across multiple resources, and easily export selected data files to the analysis workspace. Browse through datasets and study-level metadata and find studies using tags, advanced search, or the free text search field.
+
+![The Discovery Page of the Biomedical Research Hub.][img Discover grid]
### Exploration Page
The primary tool for exploring data within a Gen3 data commons is the Exploration Page, which offers faceted search of data across projects, for example, https://gen3.datacommons.io/explorer. This page can be accessed from the /explorer endpoint or the top navigation bar, by clicking on the “Exploration” icon.
@@ -40,7 +70,7 @@ Users can submit queries to the Gen3 APIs to access structured data across the p
Users with “read” access to a project can export entire structured metadata records by sending requests to the API. Single records can be exported or all records in a specific node of a project can be retrieved. For more information, see the [documentation on using the API][API documentation].
#### The Gen3 SDK
-To make sending requests to the Gen3 APIs easier, the bioinformatics team at the Center for Translational Data Science (CTDS) at University of Chicago has put together a basic Python SDK (software development kit) to help users interact with the Gen3 APIs.
+To make sending requests to the Gen3 APIs easier, the bioinformatics team at the Center for Translational Data Science (CTDS) at University of Chicago has put together a basic Python SDK (software development kit) to help users interact with the Gen3 APIs. It also exposes a Command Line Interface (CLI), which covers a lot of data ingestion support and doesn't require the user to write python.
The SDK is essentially a collection of Python wrapper functions for sending requests to the API. It is open source and can be found on [Github][Gen3 SDK GitHub pg]. Thorough documentation for the SDK can be found in the GitHub repository [documentation page][SDK doc pg].
@@ -53,27 +83,12 @@ Entire structured data records can be exported as a JSON or TSV file using the [
More SDK examples and how to get started with the SDK can be also found in the [analyze-data section][Using Gen3 SDK].
-
-### Submission Page
-
-#### Browsing the List of Projects
-A graphical model of the structured data in individual data projects of a data commons can be browsed by navigating to the /submission endpoint of a data commons website or by clicking on the “Browse Data” or “Submit Data” buttons in the top navigation bar, for example, [https://gen3.datacommons.io/submission][Gen3 Data Submission pg]. This page lists all the data projects within a commons a user has authorization to view. Clicking the “Browse” or “Submit Data” button by a project ID will open a view of that individual project’s structured metadata graph, which can be further inspected by clicking on a node in the graph model and then viewing individual records by clicking “View” by the submitter_id or downloading all the records in that node by clicking the “Download All” button.
-
-> Note: Users who are authorized to submit data may see a “Submit Data” button instead of “Browse Data”, and will also be able to upload or create structured data in the project on this page.
-
-##### Example: Browse Data in Individual Projects
-![Image showing options for browsing nodes in individual projects using either the dropdown list or project graph model][img Browse Nodes in Projects]
-
-In the graphical model of a data project, the number you see underneath the node name, for example ‘subject’, reflects the number of records in that node of the project. The “Toggle View” button is used to show or hide nodes in the data model that the project has no records for.
-
-![GIF showing how to view the graphical model of a project, toggling to show or hide nodes that have no records][img Graphing a project]
-
-
-
### Query Page
-The easiest way to query structured data in a Gen3 data commons is done by using the [graphQL query language][GraphQL] with the GraphiQL interface, which can be accessed by clicking “Query” in the top navigation bar or by navigating to the URL: [https://gen3.datacommons.io/query][Query page]. The URL https://gen3.datacommons.io can be replaced with the URL of other Gen3 data commons.
+While you may call the API directly as described above, Gen3 also includes an interactive interface for creating [graphQL query language][GraphQL] calls on the Query Page. This can be accessed by clicking “Query” in the top navigation bar or by navigating to the URL: [https://gen3.datacommons.io/query][Query page]. The URL https://gen3.datacommons.io can be replaced with the URL of other Gen3 data commons.
+
+This query portal has been optimized to autocomplete fields based on content, increase speed and responsiveness, pass variables, and generally make it easier for users to find information. The “Docs” button will display documentation of the queryable nodes and properties.
-This query portal has been optimized to autocomplete fields based on content, increase speed and responsiveness, pass variables, and generally make it easier for users to find information. The “Docs” button will display documentation of the queryable nodes and properties. From the GraphiQL interface of the data portal, you can switch between *Graph Model* or *Flat Model* – each using endpoints that query different databases (Postgres and ElasticSearch, respectively). Notably, the same queries can be sent to both the flat and graph model API endpoints from the command-line.
+From the GraphiQL interface of the data portal, you can switch between *Graph Model* or *Flat Model* – each using endpoints that query different databases (Postgres and ElasticSearch, respectively). Notably, the same queries can be sent to both the flat and graph model API endpoints from the command-line.
#### Graph Model
@@ -269,9 +284,32 @@ query ($filter: JSON) {
```
+### Submission Page
+
+#### Browsing the List of Projects
+A graphical model of the structured data in individual data projects of a data commons can be browsed by navigating to the /submission endpoint of a data commons website or by clicking on the “Browse Data” or “Submit Data” buttons in the top navigation bar, for example, [https://gen3.datacommons.io/submission][Gen3 Data Submission pg]. This page lists all the data projects within a commons a user has authorization to view. Clicking the “Browse” or “Submit Data” button by a project ID will open a view of that individual project’s structured metadata graph, which can be further inspected by clicking on a node in the graph model and then viewing individual records by clicking “View” by the submitter_id or downloading all the records in that node by clicking the “Download All” button.
+
+> Note: Users who are authorized to submit data may see a “Submit Data” button instead of “Browse Data”, and will also be able to upload or create structured data in the project on this page.
+
+##### Example: Browse Data in Individual Projects
+![Image showing options for browsing nodes in individual projects using either the dropdown list or project graph model][img Browse Nodes in Projects]
+
+In the graphical model of a data project, the number you see underneath the node name, for example ‘subject’, reflects the number of records in that node of the project. The “Toggle View” button is used to show or hide nodes in the data model that the project has no records for.
+
+![GIF showing how to view the graphical model of a project, toggling to show or hide nodes that have no records][img Graphing a project]
+
+
+
+
+
-
+
+
+
+[Discovery Page]: img/Discovery_page3.gif
+[BRH Discovery]: https://brh.data-commons.org/discovery
+
[img Gen3 Toolbar Exploration]: img/Gen3_Toolbar_exploration.png
[img Explorer GIF]: img/explorer_gif_2020.gif
diff --git a/gen3/docs/gen3-resources/user-guide/using-api.md b/gen3/docs/gen3-resources/user-guide/using-api.md
index 4f8cb908..e52978dd 100644
--- a/gen3/docs/gen3-resources/user-guide/using-api.md
+++ b/gen3/docs/gen3-resources/user-guide/using-api.md
@@ -8,7 +8,7 @@ The beauty of a Gen3 data commons is that all the functionality of the data comm
**Note:** The Gen3 commons uses GraphQL as the language for querying metadata across Gen3 Data Commons. To learn the basics of writing queries in GraphQL, please visit: [http://graphql.org/learn][learn GraphQL].
-Gen3 features a variety of API endpoints such as `/submission`, `/index`, or `/graphql`, which differ in how they access the resource and contain each a subset of REST (Representational State Transfer) APIs for networked applications. REST APIs are restricted in their interactions via HTTP request methods such as GET, POST, PATCH, PUT, or DELETE. The GET request retrieves data in read-only mode, POST sends data and creates a new resource, PATCH updates/modifies a resource, PUT updates/replaces a resource, and DELETE deletes a resource. At Gen3, the GET endpoint
+Gen3 features a variety of API endpoints such as `/submission`, `/index`, or `/graphql`, which differ in how they access the resource and contain each a subset of REST (Representational State Transfer) APIs for networked applications. REST APIs are restricted in their interactions via HTTP request methods such as GET, POST, PATCH, PUT, or DELETE. The GET request retrieves data in read-only mode, POST typically sends data and creates a new resource, PATCH typically updates/modifies a resource, PUT typically updates/replaces a resource, and DELETE deletes a resource. At Gen3, the GET endpoint
```
/api/v0/submission///_dictionary
```
@@ -37,24 +37,27 @@ While displayed, click “copy” to copy the API key to the clipboard or “dow
![Screenshot of the API Key window showing the option to copy the API key or download the .json file with credentials][img API copy keys]
In Python, the following command is sent, using the module “requests”, to receive the access token:
-```
-# Save the copied credentials.json from the website and paste the api_key and key_id into a variable "key":
-key = {
- "api_key": "",
- "key_id": ""
- }
+```python
+# Save the credentials.json file from the website. Copy the file path to the credentials file with the key.
+# Then, paste the file path as the value for the `key_file` variable
+
+import json, requests
-# Import the "requests" Python module:
-import requests
+api = "https://gen3.datacommons.io"
+key_file = "/put_path_to/credentials.json"
+
+# Read the key from the file
+with open(key_file) as json_file:
+ key = json.load(json_file)
# Pass the API key to the Gen3 API using "requests.post" to receive the access token:
-token = requests.post('https://gen3.datacommons.io/user/credentials/cdis/access_token', json=key).json()
+token = requests.post('{}/user/credentials/cdis/access_token'.format(api), json=key).json()
# Now the access_token should be displayed when the following line is entered:
token
```
When submitting a graphQL query to the Gen3 API, or requesting data download/upload, include the access token in the request header:
-```
+```python
headers = {'Authorization': 'bearer '+ token['access_token']}
# A GraphQL Endpoint Query Using the "key" JSON:
@@ -82,7 +85,7 @@ u = requests.put('https://gen3.datacommons.io/api/v0/submission/{}/{}'.format(pr
u.text # should display the API response
```
-If an an error such as “You don’t have access… " occurs, then either you do not have access, or the API key is out of date and a new access token will need to be made. Further errors could occur if the uploaded file is not correctly formatted for the Gen3 data model.
+If an error such as "You don’t have access… " occurs, then either you do not have access, or the API key is out of date and a new access token will need to be made. Further errors could occur if the uploaded file is not correctly formatted for the Gen3 data model.
## Querying and Downloading Metadata using the API
diff --git a/gen3/docs/index.md b/gen3/docs/index.md
index b16a347b..4a72db26 100644
--- a/gen3/docs/index.md
+++ b/gen3/docs/index.md
@@ -1,16 +1,16 @@
# Gen3 Documentation
-This is your home for all technical documentation related to the design, deployment, use, or maintenance of a Gen3 data commons or mesh.
+This is your home for all technical documentation related to the design, deployment, use, and maintenance of a Gen3 data commons or mesh.
-Please visit [Gen3.org](https://gen3.org) if you would like a high-level overview of Gen3 as well as details about the Gen3 philosophy, community events, and governance.
+Please visit [Gen3.org](https://gen3.org) if you would like a high-level overview of Gen3 as well as details about the Gen3 philosophy, community events, and governance.
Gen3 documentation is organized by the category of person interacting with Gen3:
-* **Gen3 User** - This is a data scientist, researcher, or analyst who needs to explore, download, or analyze data found within an existing data commons or mesh.
+* **Gen3 User** - This is a data scientist, researcher, or analyst who needs to explore, download, or analyze data found within an existing instance of Gen3.
* **Gen3 Developer** - This is a software engineer who wants to extend Gen3 either by contributing to the source code or by integrating Gen3 services into a larger system. This section will cover the Gen3 architecture including the individual microservices and how they interact with each other.
-* **Gen3 Operator** - This is for those organizations who operate their own Gen3 commons. It will include content on how to Deploy or Spin up a Gen3 commons, configuring and data dictionary and uploading data, and customizing the frontend.
+* **Gen3 Operator** - This is for those organizations who operate their own Gen3 instances. It will include content on how to Deploy or Spin up a Gen3 instances, configuring and data dictionary and uploading data, and customizing the frontend.
diff --git a/layouts/_default/baseof.html b/layouts/_default/baseof.html
index 80917ca0..03897f88 100644
--- a/layouts/_default/baseof.html
+++ b/layouts/_default/baseof.html
@@ -13,4 +13,4 @@
{{ end }}
{{ partial "footer.html" . }}