diff --git a/.secrets.baseline b/.secrets.baseline index 7b5d70c1..ff8d14ac 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -314,13 +314,22 @@ "line_number": 96 } ], + "gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md": [ + { + "type": "Secret Keyword", + "filename": "gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md", + "hashed_secret": "64ab0c1d3edc1c8c166351207b840ac7b2a90523", + "is_verified": false, + "line_number": 82 + } + ], "gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-example.md": [ { "type": "Private Key", "filename": "gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-example.md", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 238 + "line_number": 265 } ], "gen3/docs/gen3-resources/operator-guide/submit-unstructured-data.md": [ @@ -582,5 +591,5 @@ } ] }, - "generated_at": "2024-10-30T19:31:32Z" + "generated_at": "2024-11-18T20:09:19Z" } diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md index fde8433b..a13bd076 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-auth.md @@ -1,5 +1,180 @@ # Configure AuthN/AuthZ for Helm Deployment -https://docs.gen3.org/docs/Deployment/Configurations/Auth/arborist +Authentication (AuthN) and authorization (AuthZ) work together as part of identity and access management (IAM). AuthN is controlled by Fence - it relates to confirming the identity of the user (often through signle sign-on). AuthZ is controlled by Arborist - it determines what an authenticated user can see and do. -https://docs.gen3.org/docs/Deployment/Configurations/Auth/fence +## Arborist (AuthN) + +### What Does it Do + +Arborist is the authorization service. It works with Fence to assign authorizations to a user based on their authentication information. Information around user authorizations are set within a useryaml, or telemetry file for dbgap authorized users, and put into the arborist db during usersync. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Arborist values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/arborist/values.yaml). + +### How to configure it + +For the full set of configuration options, see the [Helm README.md for Arborist](https://github.com/uc-cdis/gen3-helm/tree/master/helm/arborist) + +Some common configuration options include: + +**Postgres configuration** + +``` +# -- (map) To configure postgresql subchart +# Persistence is disabled by default +postgresql: + primary: + persistence: + # -- (bool) Option to persist the dbs data. + enabled: true +``` + +You can see examples of this configuration in context in the following [example Gen3 values.yamls](https://github.com/uc-cdis/gen3-helm/tree/master/examples): + +* [aws_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/aws_dev_values.yaml) +* [gke_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_dev_values.yaml) +* [gke_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_values.yaml) + +**Image repo/ tag** + +``` +arborist: + enabled: true + + # What image/ tag to pull + image: + tag: + repository: +``` + +Common Arborist database SQL queries can be [found here](https://github.com/uc-cdis/cdis-wiki/blob/master/dev/gen3-sql-queries.md#arborist-database). *Note: this link is only visible to CTDS employees* + +## Fence (AuthZ) + +### What Does it Do + +Fence is a core service for a Gen3 datacommons which handles authentication. It is necessary for a commons to run at all, and will handle authentication on the `/login` endpoint as well as creating presigned url's in the presigned-url-fence pods. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Fence values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/fence/values.yaml). + +### How to configure it + +For the full set of configuration options, see the [Helm README.md for Fence](https://github.com/uc-cdis/gen3-helm/tree/master/helm/fence) + +``` +fence: + # Whether or not to deploy the service or not + enabled: true + + # What image/ tag to pull + image: + tag: + repository: + + # FENCE_CONFIG + FENCE_CONFIG: + OPENID_CONNECT: + google: + client_id: "insert.google.client_id.here" + client_secret: "insert.google.client_secret.here" + + # -- (string) USER YAML. Passed in as a multiline string. + USER_YAML: | + +``` + +You need to ensure a proper working fence-config file. Fence is highly configurable and a lot of configuration is commons specific, but some important fields to configure are described in the next section. + +#### Important Fence Config fields + +**`BASE_URL`:** This should be (the url of the commons)/user. + +**`DB`:** This should contain the psql connection string, which should contain the correct database, user, password and hostname. + +**`OPENID_CONNECT`:** This is where different IdP's can be configured. To be able to leverage an IdP as a login option you need to add the client ID's/secrets and any other necesary config to the predefined blocks. + +**`ENABLED_IDENTITY_PROVIDERS/LOGIN_OPTIONS`:** Use one of these blocks to enable/configure buttons for logging into the IdP's defined in the `OPENID_CONNECT` block. + +**`DEFAULT_LOGIN_IDP`/`DEFAULT_LOGIN_URL`:** These blocks will define the default login option, which will be used by most external oidc clients. + +**`dbGaP`:** This will be used to connect to an sftp server which will contain telemetry files for usersync. It is necessary for setting up authorizations outside of the useryaml. + +**`AWS_CREDENTIALS`/`S3_BUCKETS/DATA_UPLOAD_BUCKET`:** The AWS_CREDENTIALS block will define credentials for service accounts used to access s3 buckets. The s3 buckets are defined in the S3_BUCKETS block, which will reference a credential in the `AWS_CREDENTIALS` block. The `DATA_UPLOAD_BUCKET` block defines the data upload bucket, which is the bucket used in the data upload flow, to upload files to a commons. + +**`CIRRUS_CFG`:** If Google buckets are used, you must configure this block. It is used to set up the Google bucket workflow, which essentially creates Google users and Google bucket access groups, which get filled with users and added to bucket policies to allow implicit access to users. + +For more information about Fence config options, see [the config-default.yaml in the Fence repo](https://github.com/uc-cdis/fence/blob/master/fence/config-default.yaml). + +You can see examples of Fence configuration overriding defaults in context in the following [example Gen3 values.yamls](https://github.com/uc-cdis/gen3-helm/tree/master/examples): + +* [aws_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/aws_dev_values.yaml) +* [gke_dev_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_dev_values.yaml) +* [gke_values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/examples/gke_values.yaml) + +### User.yaml + +A user.yaml will control access to your data commons. To see how to construct a user.yaml properly: + +[https://github.com/uc-cdis/fence/blob/master/docs/additional_documentation/user.yaml_guide.md](https://github.com/uc-cdis/fence/blob/master/docs/additional_documentation/user.yaml_guide.md) + +### Fence Pods + +Fence is split into 2 deployments. + +* There is the regular fence deployment which handles commons authentication. +* We also split the presigned url feature of fence into a seperate deployment, the presigned-url-fence deployment. + +They will both get setup and deployed with a Gen3 installation. + +### Troubleshooting Fence + +There are some commons sql queries that can be [found here](https://github.com/uc-cdis/cdis-wiki/blob/master/dev/gen3-sql-queries.md#fence-database). *Note: this link is only visible to CTDS employees* + +## Mock authorization (for development only) + +Mock authorization will bypass OIDC login and login a user with username "test". To deploy an instance that allows a mock authorization, add these Arborist and Fence config sections to the Gen3 values.yaml + +!!! warning + + Mock authorization should only be configured for development or testing purposes - do not use this in production. + +``` +global: +hostname: [your hostname here] + + tls: + [key and cert info] + + # Deploy postgres/elasticsearch in same deployment for development purposes. + dev: true + +arborist: + enabled: true + +fence: + FENCE_CONFIG: + # if true, will bypass OIDC login, and login a user with username "test" + # WARNING: DO NOT ENABLE IN PRODUCTION (for testing purposes only) + MOCK_AUTH: true +``` + +## Example: Google login + +To deploy an instance that will allow you to log in with Google, see here: +[https://github.com/uc-cdis/gen3-helm?tab=readme-ov-file#google-login-generation](https://github.com/uc-cdis/gen3-helm?tab=readme-ov-file#google-login-generation) + +## Setting up OIDC clients + +OIDC (OpenID Connect) clients allow applications to authenticate with Fence. This setup is often necessary for external users who want to integrate their applications with Gen3. For each application, you'll need to create a unique OIDC client, which will provide a client_id and client_secret for the application to use. + +Once the client is created, share the client_id and client_secret with the application owner so they can configure their application to authenticate with Fence. To create these clients you will need to exec into a fence container and [run the following commands](https://github.com/uc-cdis/fence/blob/master/docs/additional_documentation/setup.md#register-oauth-client). + +## Relevant AuthN/AuthZ Tutorials + +See the following tutorials for additional information relevant to AuthN/AuthZ. + +* [Fence Usersync CronJob](../fence_usersync_job.md) +* [AWS IAM Global User](../global_IAM_helm_user.md) diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-data-svcs.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-data-svcs.md index 8c566775..3199e8a4 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-data-svcs.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-data-svcs.md @@ -1,15 +1,253 @@ # Configure Data-Related Services for Helm Deployment -https://docs.gen3.org/docs/Deployment/Configurations/Data/aws-es-proxy +--- -https://docs.gen3.org/docs/Deployment/Configurations/Data/etl +## aws-es-proxy -https://docs.gen3.org/docs/Deployment/Configurations/Data/guppy +### What does it do -https://docs.gen3.org/docs/Deployment/Configurations/Data/indexd +aws-es-proxy is a small web server application sitting between Gen3 services and Amazon Elasticsearch service. -https://docs.gen3.org/docs/Deployment/Configurations/Data/metadata +Note: +* This service is only needed when you deploy Gen3 on AWS and use the AWS OpenSearch Service. +* This pod can also be used to make direct queries to ElasticSearch. If you know you want to make a manual query to ElasticSearch, you can exec into the aws-es-proxy pod and run the following, filling in the appropriate endpoint you want to hit to query elasticsearch: -https://docs.gen3.org/docs/Deployment/Configurations/Data/peregrine +``` +kubectl exec -it bash +curl http://localhost:9200/_cluster/status +``` -https://docs.gen3.org/docs/Deployment/Configurations/Data/sheepdog +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default aws-es-proxy values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for aws-es-proxy](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/README.md) or read the [aws-es-proxy values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/aws-es-proxy/values.yaml) directly. + +Some important configuration items for aws-es-proxy in helm: + +``` +# -- AWS user to use to connect to ES +aws-es-proxy: + # Whether or not to deploy the service or not + enabled: true + + # What image/ tag to pull + image: + repository: + tag: + + # AWS secrets + secrets: + awsAccessKeyId: "" + awsSecretAccessKey: "" + + # Elasticsearch endpoint in AWS + esEndpoint: test.us-east-1.es.amazonaws.com +``` + +--- + +## ETL + +### What does it do + +The Gen3 Tube ETL is designed to translate data from a graph data model, stored in a PostgreSQL database, to indexed documents in ElasticSearch (ES), which supports efficient ways to query data from the front-end. The purpose of the Gen3 Tube ETL is to create indexed documents to reduce the response time of requests to query data. It is configured through an etlMapping.yaml configuration file, which describes which tables and fields to ETL to ElasticSearch. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default ETL values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/etl/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for ETL](https://github.com/uc-cdis/gen3-helm/blob/master/helm/etl/README.md) or read the [ETL values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/etl/values.yaml) directly. + +You can configure the ETL like this: + +``` +etl: + enabled: true + esEndpoint: "" + etlMapping: + +``` + +To kick off the ETL job, run this command: + +``` +kubectl create job --from=cronjob/etl-cronjob etl +``` + +If you already have a job called etl, run the following. This will delete the old job and create a new instance. + +``` +kubectl delete job etl +kubectl create job --from=cronjob/etl-cronjob etl +``` + +For more information about our ETL, [read more in our Tube repo](https://github.com/uc-cdis/tube). + +--- + +## Guppy + +### What does it do + +Guppy is used to render the Explorer page. It uses Elasticsearch indices to render the page, so it depends on ETL. + +Note: +Guppy relies on indices being created to run; if there are no indices created, Guppy will fail to start up. + +To create these indices, you can run ETL; however a valid ETL mapping file must be created, and data must be submitted to the commons before you can run ETL. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Guppy values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/guppy/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Guppy](https://github.com/uc-cdis/gen3-helm/blob/master/helm/guppy/README.md) or read the [Guppy values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/guppy/values.yaml) directly. + +There is also config that needs to be set within the global block around the tier access level, defining how the explorer page should handle displaying unauthorized files, and the limit to how far unauthroized user can filter down files. Last there is a guppy block that needs to be configured with the elastic search indices guppy will use to render the explorer page. + +``` +global: + tierAccessLevel: "(libre|regular|private)" + +guppy: + # -- (int) Only relevant if tireAccessLevel is set to "regular". + # The minimum amount of files unauthorized users can filter down to + tierAccessLimit: 1000 + + # -- (list) Elasticsearch index configurations + indices: + - index: dev_case + type: case + - index: dev_file + type: file + + # -- (string) The Elasticsearch configuration index + configIndex: dev_case-array-config + # -- (string) The field used for access control and authorization filters + authFilterField: auth_resource_path + # -- (bool) Whether or not to enable encryption for specified fields + enableEncryptWhitelist: true + # -- (string) A comma-separated list of fields to encrypt + encryptWhitelist: test1 + + + # -- (string) Elasticsearch endpoint. + # defaults to "elasticsearch:9200" + esEndpoint: "" +``` + +You will also need a mapping file to map the fields you want to pull from postgres into the elasticsearch indices. There are too many fields to describe here, but an [example BDC mapping file can be found here](https://github.com/uc-cdis/cdis-manifest/blob/master/gen3.biodatacatalyst.nhlbi.nih.gov/etlMapping.yaml). + +Last, Guppy works closely with Portal to render the Explorer page. You will need to ensure a proper dataExplorer block ([see this BDC example](https://github.com/uc-cdis/cdis-manifest/blob/master/gen3.biodatacatalyst.nhlbi.nih.gov/portal/gitops.json#L212)) is setup within the gitops.json file, referencing fields that have been pulled from Postgres into the Elasticsearch indices. + +--- + +## Indexd + +### What does it do + +Indexd is a core service of the commons. It is used to index files within the commons, to be used by Fence to download data. + +Note: +Indexd is used to hold information regarding files in the commons. We can index any files we want, but should ensure that bucket in Indexd are configured within Fence, so that downloading the files will work. To index files, we have a variety of tools. First, data upload will automatically create indexd records for files uploaded. If we want to index files from external buckets, we can also use [indexd-utils](https://github.com/uc-cdis/indexd_utils), or if the commons has dirm setup, create a manifest and upload it to the `/indexing` endpoint of a commons. From there, GUID's will be created and/or assigned to objects. You can view the information about the records by hitting the `(commons url)/index/(GUID)` endpoint. To test that the download works for these files, you will want to hit the `(commons url)/user/data/download/(GUID)` endpoint, while ensuring your user has the proper access to the ACL/AuthZ assigned to the Indexd record. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Indexd values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Indexd](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/README.md) or read the [Indexd values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/indexd/values.yaml) directly. + +``` +indexd: + enabled: true + + image: + repository: + tag: + + # default prefix that gets added to all indexd records. + defaultPrefix: "TEST/" + + # Secrets for fence and sheepdog to use to authenticate with indexd. + # If left blank, will be autogenerated. + secrets: + userdb: + fence: + sheepdog: +``` + +--- + +## Metadata + +### What does it do + +The Metadata Service (also called MDS) provides an API for retrieving JSON metadata of GUIDs. It is a flexible option for "semi-structured" data (key:value mappings). + +The GUID (the key) can be any string that is unique within the instance. The value is the metadata associated with the GUID; it is a JSON blob whose structure is not enforced on the server side. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Metadata values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/metadata/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Metadata](https://github.com/uc-cdis/gen3-helm/blob/master/helm/metadata/README.md) or read the [Metadata values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/metadata/values.yaml) directly. + +--- + +## Peregrine + +### What does it do + +The Peregrine service is used to query data in Postgres. It works similar to Guppy, but relies on querying Postgres directly. It will create the charts on the front page of the commons, as well as the `/query` endpoint of a commons. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Peregrine values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Peregrine](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/README.md) or read the [Peregrine values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/peregrine/values.yaml) directly. + +To configure Peregrine, you must have an entry in the versions block. It also requires a dictionary in the global block. + +--- + +## Sheepdog + +### What does it do + +Sheepdog is a core service that handles data submission. Data gets submitted to the commons, using the dictionary as a schema, which is reflected within the sheepdog database. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Sheepdog values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Sheepdog](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/README.md) or read the [Sheepdog values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sheepdog/values.yaml) directly. + +--- + +## Sower + +### What does it do + +Sower is a job dispatching service. Jobs are configured within the manifest, and sower handles dispatching the jobs. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Sower values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Sower](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/README.md) or read the [Sower values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/sower/values.yaml) directly. diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-frontend.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-frontend.md index fb9b02bb..0f51ad92 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-frontend.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-frontend.md @@ -1,3 +1,55 @@ # Configure Frontend for Helm Deployment -https://docs.gen3.org/docs/Deployment/Configurations/Frontend/portal +Portal is the front-end service Gen3 currently uses to render the commons webpage. However, we expect to soon offer the Frontend Framework service (not yet available) as the preferred front-end service. + +## Portal + +### What Does it Do +Portal is a core service that renders the complete commons webpage, it is the front end service. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Portal values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/portal/values.yaml). + +### How to Configure it + +To configure portal, there must be an entry in the versions block. The portal_app also needs to be defined in the global block. Gitops sets to use the files in the ~/cdis-manifest/(commons url)/ portal directory, dev is the common setup for development environments and there are [default gitops.json files for most commons in the data-portal repo](https://github.com/uc-cdis/data-portal/tree/master/data/config) that the portal app can be set to. + +``` +portal: + enabled: true + + gitops: + # -- (string) multiline string - gitops.json + json: | + {} + # -- (string) - favicon in base64 + favicon: "" + # -- (string) - multiline string - gitops.css + css: | + /* gitops default css */ + # -- (string) - logo in base64 + logo: "" + # -- (string) - createdby.png - base64 + createdby: "" + sponsors: +``` + +You can find more information about [portal configuration options here](https://github.com/uc-cdis/data-portal/blob/master/docs/portal_config.md) + +You can also be configure Portal with different images and icons by updating the values; an [example from the BDC data commons is here](https://github.com/uc-cdis/cdis-manifest/tree/master/gen3.biodatacatalyst.nhlbi.nih.gov/portal). + +## Frontend Framework + +Note: the Frontend Framework is not yet fully released. You can learn more about it from [this Gen3 Community Forum from May 2024](https://gen3.org/community/events/#gen3.2---how-to-build-a-gen3-data-portal-using-the-new-frontend-framework). + +### What Does it Do +Frontend Framework will be a core service that renders the complete commons webpage; it will replace the Portal as the frontend service. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Frontend Framework values here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/frontend-framework/values.yaml). + +### How to Configure it + +More information about this will be provided when the Frontend Framework is released. diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-revproxy.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-revproxy.md index c65c5439..d31bb164 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-revproxy.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-revproxy.md @@ -1,3 +1,18 @@ # Configure Revproxy for Helm Deployment -https://docs.gen3.org/docs/Deployment/Configurations/revproxy +## Revproxy + +### What Does it Do + +Revproxy is a core service to a commons which handles networking within the Kubernetes cluster. + +Note: +Revproxy is essentially an nginx container, which contains informtation about the endpoints within the cluster. There must be an endpoint set up for Revproxy to be able to send traffic to it and start normally. Because we have many services that may or may not be set up, we only configure Revproxy with the services that are deployed to a commons. The `kube-setup-revproxy` script will look at current deployments and add configuration files from [here (in the ...)](https://github.com/uc-cdis/cloud-automation/tree/master/kube/services/revproxy/gen3.nginx.conf) to the pod. So, if a new service is added, you will need to run `kube-setup-revproxy` to set up the endpoint. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Revproxy values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/revproxy/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Revproxy](https://github.com/uc-cdis/gen3-helm/blob/master/helm/revproxy/README.md) or read the [Revproxy values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/revproxy/values.yaml) directly. diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-workspaces.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-workspaces.md index 5d47b43b..4c758323 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-config-workspaces.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-config-workspaces.md @@ -1,7 +1,145 @@ # Configure Workspaces for Helm Deployment -https://docs.gen3.org/docs/Deployment/Configurations/Workspaces/ambassador +Gen3 workspaces are secure data analysis environments in the cloud that can access data objects and metadata from data resources like the data commons or other external resources. -https://docs.gen3.org/docs/Deployment/Configurations/Workspaces/hatchery +By default, Gen3 Workspaces include Jupyter notebooks and RStudio, but can be configured to host many other applications, including analysis workflows, data processing pipelines, or data visualization apps. -https://docs.gen3.org/docs/Deployment/Configurations/Workspaces/manifestservice +Gen3 workspaces use the Ambassador, Hatchery, and Manifestservice services to create and run the workspace in a Gen3 data commons. + +## Ambassador + +### What Does it Do + +Ambassador is an envoy proxy. We use this service to proxy traffic toward our workspaces, Hatchery and Jupyter containers. + +*Note: Currently, Ambassador is only necessary if there is a Hatchery deployment, as this is used as an envoy proxy primarily for workspaces. This may change in the future.* + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Ambassador values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/tree/master/helm/ambassador/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Ambassador](https://github.com/uc-cdis/gen3-helm/tree/master/helm/ambassador) or read the [Ambassador values.yaml](https://github.com/uc-cdis/gen3-helm/tree/master/helm/ambassador/values.yaml) directly. + +Example configuration using gen3 umbrella chart: + +``` +ambassador: + # Whether or not to deploy the service or not + enabled: true + + # What image/ tag to pull + image: + repository: quay.io/datawire/ambassador + tag: "1.4.2" + pullPolicy: Always +``` + +## Hatchery + +### What Does it Do + +Hatchery is used to create workspaces. It contains information about workspaces images and resources set around those images to run. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Hatchery values here](https://github.com/uc-cdis/gen3-helm/tree/master/helm/hatchery/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Hatchery](https://github.com/uc-cdis/gen3-helm/tree/master/helm/hatchery/README.md) or read the [Hatchery values.yaml](https://github.com/uc-cdis/gen3-helm/tree/master/helm/hatchery/values.yaml) directly. + +``` +hatchery: + enabled: true + image: + repository: + tag: + + + # -- (map) Hatchery sidcar container configuration. + hatchery: + sidecarContainer: + cpu-limit: '0.1' + memory-limit: 256Mi + image: quay.io/cdis/ecs-ws-sidecar:master + + env: + NAMESPACE: "{{ .Release.Namespace }}" + HOSTNAME: "{{ .Values.global.hostname }}" + + args: [] + + command: + - "/bin/bash" + - "./sidecar.sh" + + lifecycle-pre-stop: + - su + - "-c" + - echo test + - "-s" + - "/bin/sh" + - root + + containers: + - target-port: 8888 + cpu-limit: '1.0' + memory-limit: 2Gi + name: "(Tutorials) Example Analysis Jupyter Lab Notebooks" + image: quay.io/cdis/heal-notebooks:combined_tutorials__latest + env: + FRAME_ANCESTORS: https://{{ .Values.global.hostname }} + args: + - "--NotebookApp.base_url=/lw-workspace/proxy/" + - "--NotebookApp.default_url=/lab" + - "--NotebookApp.password=''" + - "--NotebookApp.token=''" + - "--NotebookApp.shutdown_no_activity_timeout=5400" + - "--NotebookApp.quit_button=False" + command: + - start-notebook.sh + path-rewrite: "/lw-workspace/proxy/" + use-tls: 'false' + ready-probe: "/lw-workspace/proxy/" + lifecycle-post-start: + - "/bin/sh" + - "-c" + - export IAM=`whoami`; rm -rf /home/$IAM/pd/dockerHome; rm -rf /home/$IAM/pd/lost+found; + ln -s /data /home/$IAM/pd/; true + user-uid: 1000 + fs-gid: 100 + user-volume-location: "/home/jovyan/pd" + gen3-volume-location: "/home/jovyan/.gen3" +``` + +## Manifestservice + +### What Does it Do + +The manifestservice is used by the workspaces to mount files to a workspace. Workspace pods get setup with a sidecar container which will mount files to the data directory. This is used so that users can access files directly on the workspace container. The files pulled are defined by manifests, created through the export to workspace button in the explorer page. These manifests live in an s3 bucket which the manifestservice can query. + +### Default settings + +If you deploy Helm without customizing any configuration, you can see the [default Manifestservice values in the values.yaml here](https://github.com/uc-cdis/gen3-helm/blob/master/helm/manifestservice/values.yaml). + +### How to configure it + +For a full set of configuration see the [Helm README.md for Manifestservice](https://github.com/uc-cdis/gen3-helm/blob/master/helm/manifestservice/README.md) or read the [Manifestservice values.yaml](https://github.com/uc-cdis/gen3-helm/blob/master/helm/manifestservice/values.yaml) directly. + +``` +manifestservice: + enabled: true + + manifestserviceG3auto: + hostname: testinstall + # -- (string) Bucket for the manifestservice to read and write to. + bucketName: testbucket + # -- (string) Directory name to use within the s3 bucket. + prefix: test + # -- (string) AWS access key. + awsaccesskey: "" + # -- (string) AWS secret access key. + awssecretkey: "" +``` diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-databases.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-databases.md index d6368305..fc64260b 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-databases.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-databases.md @@ -1,4 +1,113 @@ # Databases in Gen3 Helm charts +This document describes how databases are provisioned and used in Gen3 when deploying with Helm charts. -https://docs.gen3.org/docs/Deployment/Databases +We highly recommend using a managed PostgreSQL service like AWS RDS/Aurora, or managing PostgreSQL outside of Helm, for production Gen3 deployments. + +The bundled PostgreSQL used for development is deployed with the Bitnami chart: [https://bitnami.com/stack/postgresql/helm](https://bitnami.com/stack/postgresql/helm) + +## Database credentials + +Every service requiring a PostgreSQL database has credentials stored in a Kubernetes secret. + +Example (secret values base64 decoded): + +```yaml + +apiVersion: v1 +kind: Secret +data: + database: fence_gen3 + dbcreated: true + host: gen3-postgresql + password: example_pass + port: 5432 + username: fence_gen3 +``` + +Each service consumes this secret and mounts the values as environment variables to access the database. + +For production, provide PostgreSQL credentials via these values: + +``` +global: + postgres: + dbCreate: true + master: + host: insert.postgres.hostname.here + username: postgres + password: + port: "5432" + +fence: + postgres: + host: postgres.example.com + username: fence + port: 5432 + +peregrine: + postgres: + host: postgres.example.com + username: peregrine + port: 5432 + +sheepdog: + postgres: + host: postgres.example.com + username: sheepdog + port: 5432 +``` + +These values can provision and configure databases for Gen3. + + +## Auto-generated passwords + +If a per-service password is not specified, Helm will auto-generate a random password. For example: + +```yaml + +global: + postgres: + master: + password: example_master_password + +fence: + postgres: + host: postgres.example.com + username: fence + +peregrine: + postgres: + host: postgres.example.com + username: peregrine +``` + +Helm will create a random password for Fence and Peregrine. + +**Note:** The lookup function used to generate passwords does not work in ArgoCD. When using ArgoCD, you must explicitly define a password for each service. + +## Automatic database creation through jobs + +The `dbCreate` flag can be set globally or per service. + +Setting `global.postgres.dbCreate: true` or `.postgres.dbCreate: true` kicks off the database setup job for that service. + +The setup job: + +- Checks if the database exists +- Creates it if needed +- Grants privileges +- Updates `dbcreated: true` in the secret +- Services wait for this dbcreated flag before starting. - This ensures the database is ready before the service is started. + +If the database setup job fails, it will not set `dbcreated: true` in the secret. + +When a service starts up, it will look for this value in the secret. If dbcreated is not present or false, the service will fail to start and log an error that it cannot find dbcreated in the database secret. + +This prevents services starting up before the database is ready and properly configured. The root cause of the failure can be investigated by checking the logs and status of the setup job. + + + diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-example.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-example.md index dac13db3..f63cc1fa 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-example.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-example.md @@ -4,6 +4,18 @@ An important strength of Gen3 deployment with Helm is that the Gen3 Helm repo pr Therefore, **new Gen3 operators should consider first deploying the minimal, most generic Gen3 deployment with Helm.** It will allow them to establish that all of the necessary software is installed and functional, and lets them walk through basic Gen3 deployment on their computer in 30-60 minutes (assuming pre-installation of all necessary software). Once a Gen3 operator has the minimal Gen3 deployed locally, they can begin customizing in a systematic way that simplifies troubleshooting by limiting the changes made at each step. +The simplest minimal deployment is deploying locally to your localhost, without any SSL certificate or host domain. Although you can deploy a Gen3 instance with all the default values in this way, full configuration and some Gen3 tools depend on the site having an SSL certificate. + +We provide instructions for minimal deployment with or without an SSL certificate and host domain in the example below. The bulk of the instructions are identical, regardless. In the few places where instructions are different, we provide distinct instructions for these using the following visual callouts: + +!!! local "Local deployment with no SSL certificate" + + The information specific to local deployment with no SSL will be here. Note that, although you can deploy Gen3 with no SSL, some tools or features may require that the Gen3 site has an SSL for full functionality. + +!!! served "Local deployment to a host domain with SSL" + + The information specific to deployment to a host domain with an SSL will be here. + ## Prerequisites to locally deploy generic Gen3 * Install Helm (https://helm.sh/docs/intro/install/) - there are options for installation using a package installer, or downloading directly, for various OS. Note that we will need the Helm CLI, which may not come with all packages. @@ -139,7 +151,14 @@ It’s ready when the output indicates the condition is met. ## Obtain certificate and create K8s secret -*If you already have an SSL certificate, you can skip to [Create a minimal values.yaml](#create-a-minimal-valuesyaml)* +!!! local "Local deployment with no SSL certificate" + + You can deploy locally with no SSL or host domain, although some features and tools may have limited functionality without an SSL. If this is how you will deploy, you do not need to create a certificate. You can skip to [Create a minimal values.yaml](#create-a-minimal-valuesyaml). + +!!! served "Local deployment to a host domain with SSL" + + If you choose to deploy with an SSL certificate and you do not yet have one, follow the instructions in this section. If you already have an SSL certificate, you can skip to [Create a minimal values.yaml](#create-a-minimal-valuesyaml) + You will need to have a host site that you own to proceed. Here, our host site is `sara.dev.planx-pla.net`. @@ -227,6 +246,14 @@ NEXT STEPS: Once you have a certificate, you can use this to create a minimal Gen3 values.yaml for Helm deployment. This provides any values for the Gen3 umbrella chart that are different than the default configuration. You can create this file in your preferred IDE or text editor. Save it as “values.yaml” +!!! local "Local deployment with no SSL certificate" + + If you are deploying locally and you are deploying with all the default values configured in the Helm Gen3 repo, you do not need to have a Gen3 values.yaml at this stage. When you are ready to override any default values, you can create a Gen3 values.yaml at that point. + +!!! served "Local deployment to a host domain with SSL" + + You will need a Gen3 values.yaml to provide your SSL certificate and point to your host site. Use the instructions in this sections to create one. + A minimal values.yaml will have a `global` section with nested `hostname` and `tls` sections. (I have elided most of the key and certificate body, but left the structure visible so you can see how it was organized.) Note that there is one private key, but two certificates in my example. You may have more or fewer certificates; be sure you include all certificates from your output. (These should have all the certificates in the SSL certificate chain.) ``` @@ -261,7 +288,7 @@ The output from these `cat` commands will be the key and certificate values that ### Adding to the Gen3 values.yaml later -Deploying initially with a minimal values.yaml will allow you to limit the number of places you need to troubleshoot if your Gen3 deployment has problems. It is much easier to troubleshoot problems if there are fewer variables that could be the problem. Proceed with your first deployment using only the minimal values.yaml. +Deploying initially with a minimal values.yaml will allow you to limit the number of places you need to troubleshoot if your Gen3 deployment has problems. It is much easier to troubleshoot problems if there are fewer variables that could be the problem. Proceed with your first deployment using only the minimal values.yaml (or no Gen3 values.yaml at all, if deploying without SSL). However, once you know you have your instance up, you will want to expand the values.yaml to customize your instance. There is information at the end of this tutorial that will help you customize your values.yaml. @@ -280,9 +307,10 @@ helm repo add gen3 https://helm.gen3.org Deploy Gen3 using your values.yaml with the command below. * Where `dev` is what we’re calling our release -* `gen3/gen3` means the chart we’re deploying is `gen3` (after slash) from the helm repo called `gen3` (before slash). -* If the values.yaml is named differently, update the name. -* If the values.yaml is located somewhere other than the directory you are in for your terminal, change directories to where the values.yaml is before running the command, or else include the path to the values.yaml from the current directory. +* `gen3/gen3` means the chart we’re deploying is `gen3` (after slash) from the helm repo called `gen3` (before slash) +* If you have no Gen3 values.yaml, remove the `-f values.yaml` part +* If the values.yaml is named differently, update the name +* If the values.yaml is located somewhere other than the directory you are in for your terminal, change directories to where the values.yaml is before running the command, or else include the path to the values.yaml from the current directory ``` helm upgrade --install dev gen3/gen3 -f values.yaml @@ -347,6 +375,14 @@ Here, if you look again in k9s, you will see another portal pod coming up (the l ## Point your domain to localhost +!!! local "Local deployment with no SSL certificate" + + If you are deploying directly to localhost with no SSL, you can skip to [View Your New Deployment](#view-your-new-deployment). + +!!! served "Local deployment to a host domain with SSL" + + Follow the instructions below to point your host site to the localhost. + In your terminal, run the following command: ``` @@ -376,7 +412,7 @@ Save your file changes and return to your terminal. ## View your new deployment! -Type your host name in your browser, and view your newly-deployed Gen3! +Type your host site name (`localhost` if you deployed without an SSL) in your browser, and view your newly-deployed Gen3! ![Landing page for generic Gen3 deployment](../img/generic-Gen3-landing.png) diff --git a/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-overview.md b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-overview.md index 0ac3d5a7..66e02534 100644 --- a/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-overview.md +++ b/gen3/docs/gen3-resources/operator-guide/helm/helm-deploy-overview.md @@ -1,3 +1,49 @@ # Deployment Overview -https://docs.gen3.org/docs/Deployment/Deployment%20Overview +1. **Install Helm**: Ensure you have Helm installed on your local machine. You can install Helm by following the instructions provided on the Helm website: [Helm Installation Guide](https://helm.sh/docs/intro/install/). + +2. **Prepare a Kubernetes Cluster**: Make sure you have a Kubernetes cluster up and running with an Ingress Controller configured. You can use a cloud provider's managed Kubernetes service or set up your own Kubernetes cluster using tools like Kubernetes in Docker (KIND) or Minikube + + > + Note: We do not recommend Rancher Desktop for deploying Helm; we have sometimes seen issues with PostgreSQL while using Rancher. Instead, we recommend KIND or Minikube, which are lightweight and appropriate for dev setup. + > + +3. **Database Services Configuration**: + + - Determine where you want to run the database services (Elasticsearch and PostgreSQL). + - For a development environment, you can set `global.dev` to `true` in your configuration. In this mode, Gen3 will deploy these services with minimal persistence and resource consumption. + - For non-development environments (such as production), it is recommended to run these services externally from the Gen3 Helm charts. You will need to configure these services separately. + +4. **Prepare a `values.yaml` Configuration File**: + + - Create a `values.yaml` file to customize the Gen3 deployment. This file will contain various configuration settings for your Gen3 deployment. + - Refer to the [Configuration](./Configurations/) section for a full list of configurations for each Gen3 service. Customize the configuration according to your requirements. + +5. **Prepare SSL Certificate**: + + - Obtain an SSL/TLS certificate for securing your Gen3 deployment. You can use a certificate authority (CA) or use Let's Encrypt with Certbot for free certificates. + - Ensure you have a valid domain name for your Gen3 deployment. + + A certificate can be created using [certbot](https://certbot.eff.org). It will ask you to create a DNS TXT record to verify domain ownership. + + ``` + sudo certbot certonly --manual --preferred-challenges=dns -d fairtox.com + ``` + + Complete the DNS challenge, wait for DNS (1-5 min) and then click continue. + + Once you have the certificate, create a Kubernetes secret with it. + + ``` + kubectl create secret tls --cert= --key= + ``` + + We will use this secret later on in our deployment. + +6. **Deployment with Helm**: + + - Deploy Gen3 using Helm. Use the following command, replacing `[RELEASE_NAME]` with your desired Helm release name and `[VALUES_FILE]` with the path to your `values.yaml` file: + + ```bash + helm install [RELEASE_NAME] gen3/gen3 -f [VALUES_FILE] + ``` diff --git a/gen3/docs/stylesheets/extra.css b/gen3/docs/stylesheets/extra.css new file mode 100644 index 00000000..cbcaa463 --- /dev/null +++ b/gen3/docs/stylesheets/extra.css @@ -0,0 +1,35 @@ +:root { + --md-admonition-icon--local: url('data:image/svg+xml;charset=utf-8,') + } + .md-typeset .admonition.local, + .md-typeset details.local { + border-color: rgb(43, 155, 70); + } + .md-typeset .local > .admonition-title, + .md-typeset .local > summary { + background-color: rgba(43, 155, 70, 0.1); + } + .md-typeset .local > .admonition-title::before, + .md-typeset .local > summary::before { + background-color: rgb(43, 155, 70); + -webkit-mask-image: var(--md-admonition-icon--local); + mask-image: var(--md-admonition-icon--local); + } + + :root { + --md-admonition-icon--served: url('data:image/svg+xml;charset=utf-8,') + } + .md-typeset .admonition.served, + .md-typeset details.served { + border-color: rgb(43, 155, 70); + } + .md-typeset .served > .admonition-title, + .md-typeset .served > summary { + background-color: rgba(43, 155, 70, 0.1); + } + .md-typeset .served > .admonition-title::before, + .md-typeset .served > summary::before { + background-color: rgb(43, 155, 70); + -webkit-mask-image: var(--md-admonition-icon--served); + mask-image: var(--md-admonition-icon--served); + }