diff --git a/.github/workflows/license.yaml b/.github/workflows/license.yaml index 9b8baf9..ca55d94 100644 --- a/.github/workflows/license.yaml +++ b/.github/workflows/license.yaml @@ -15,6 +15,8 @@ jobs: uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.ref }} - name: Check License Header uses: apache/skywalking-eyes/header@v0.6.0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..163046e --- /dev/null +++ b/.gitignore @@ -0,0 +1,30 @@ +/.local/ +# Mac OS +.DS_Store +# IDEs and editors +.idea +.project +.classpath +.c9/ +*.launch +.settings/ +*.sublime-workspace +# ignore vscode debug binary +__debug_bin + +# ignore vscode workspaces of individual users +*.code-workspace + +# IDE - VSCode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +# editor and IDE paraphernalia +*.swp +*.swo +*~ +*.bkp +*.dtmp diff --git a/.log4brains.yml b/.log4brains.yml new file mode 100644 index 0000000..7491e92 --- /dev/null +++ b/.log4brains.yml @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors +# SPDX-License-Identifier: Apache-2.0 + +project: + name: Greenhouse + tz: Europe/Berlin + adrFolder: architecture-decision-records + packages: [] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6001242 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors +# SPDX-License-Identifier: Apache-2.0 + +PREFIX ?=greenhouse +TITLE ?= +docsFolder=architecture-decision-records +LC_PREFIX=$(shell echo $(PREFIX) | tr '[:upper:]' '[:lower:]') +LC_TITLE=$(shell echo $(TITLE) | tr '[:upper:]' '[:lower:]') + +.PHONY: check +check: + @if [ -z "$(LC_TITLE)" ]; then \ + echo "TITLE is required. Please provide a title using make init TITLE=Your Title Here"; \ + exit 1; \ + fi + @echo "$(LC_TITLE)" | grep -qE "^[a-zA-Z0-9-]+$$" || { \ + echo "TITLE contains invalid characters. Only alphanumeric characters and hyphens are allowed."; \ + exit 1; \ + } + +.PHONY: init +init: check + @echo "Checking for Node.js..." + @command -v node >/dev/null 2>&1 || { echo >&2 "Node.js is not installed. Please install Node.js."; exit 1; } + @echo "Checking for log4brains..." + @command -v log4brains >/dev/null 2>&1 || { echo >&2 "log4brains is not installed globally. Please install it by running 'npm install -g log4brains'."; exit 1; } + $(eval MAX_INDEX=$(shell find ${docsFolder} -name '[0-9][0-9][0-9]-*.md' | sed 's/.*\/\([0-9][0-9][0-9]\)-.*/\1/' | sort -n | tail -1)) + $(eval NEW_INDEX=$(shell printf "%03d" $$((10#$(MAX_INDEX) + 1)))) + @echo "Next ADR index: $(NEW_INDEX)" + @echo "Creating new ADR with title prefix $(NEW_INDEX)-$(LC_PREFIX)-$(LC_TITLE).md" + $(eval ADR_TITLE=$(shell echo "$(NEW_INDEX)-$(LC_PREFIX)-$(LC_TITLE)")) + $(eval GENERATED_FILE=$(shell log4brains adr new --quiet $(ADR_TITLE))) + @mv "${docsFolder}/${GENERATED_FILE}.md" "${docsFolder}/$(ADR_TITLE).md" diff --git a/architecture-decision-records/001-greenhouse-logical-authorization-concept-for-greenhouse-plugins.md b/architecture-decision-records/001-greenhouse-logical-authorization-concept-for-greenhouse-plugins.md new file mode 100644 index 0000000..7244593 --- /dev/null +++ b/architecture-decision-records/001-greenhouse-logical-authorization-concept-for-greenhouse-plugins.md @@ -0,0 +1,193 @@ +# 001 Logical Authorization Concept for Greenhouse Plugins + +- Status: accepted +- Deciders: Fabian Ruff, Esther Schmitz, Arno Uhlig, Uwe Mayer, David Rochow +- Date: 2023-03-09 +- Tags: greenhouse + +## Context and Problem Statement + +Greenhouse is a Platform that aims to aggregate a variety of Applications into a single Platform using a Plugin Concept +that allows Applications to be integrated into Greenhouse while being maintained and developed in a distributed manner. + +Furthermore, it intentionally does not support multi-tenancy across Plugin Instances to enable segregation between +tenants and make the Platform usable by totally unrelated LoB's. + +While the Authorization concept for the Platform itself is clear, and rather simple, with Plugin Architecture of the +Platform comes a challenge in respect to the Authorization Concept for those and the underlying architecture and +constraints. + +This Decision record is about the Authorization Concept on a logical level to decide how permissions are mapped within +Greenhouse. + +## Decision Drivers + +- Transparency: + * Clear visibility about who has access to which Plugins within a Greenhouse Organization: + - without the need to know internals about the Plugin + - within Greenhouse --> no need to view Source Code, Deployed Configuration, Pipeline etc. +- Open for Adoption: + * It should not block applications to get ported to Greenhouse Plugins + * It should follow industry standards as good as possible + * It should enable support of other OIDC Providers then CCloud to be used +- Open to Automation: + * Potential capability of automation and simplification in regards to CAM Profile Management + * Potential capability for automated integration of Plugin Instance Backends to central Greenhouse configuration +- Standardization: + * Supports standardization and harmonization of Role Concepts +- Ease of Use: + * It should be as easy as possible for end users to manage the access to Plugins + +- Avoidance of compliance implications: + * Do not re-introduce already fullfilled compliance requirements (by CAM) such as: + - Approval Processes + - Role Ownerships + - Automatic Access Revocation + - Time constrained Role assignements + +## Considered Options + +- Permission Mapping via Teams with Plugin provided Access levels +- Permission Mapping via Teams with a pre-defined set of Access Levels +- Permission Mapping via Org Wide Roles +- Permission Mapping via Greenhouse Roles +- No permission mapping + +## Decision Outcome + +Chosen option: **"No permission mapping"** + +We decided to approach the situation with a phased approach. In the first Phase of Greenhouse we go with: + +- No permission mapping + +The Main drivers for this decision have been: + +- The first plugins that are about to get integrated do not require any permission model/authorization concept for the + Plugins itself. +- The capability of auto-creating roles & access levels through Greenhouse on CAM is considered an advanced feature not + planned for the initial phase. + +This is not considered a final decision and rather as an intermediate step. The final decision about one of the +described approaches is going to happen in the future and documented in a separate ADR. + +### Positive Consequences + +- Smaller implementation effort +- We do not shoot for a solution of a problem that we right now do not have +- We can concentrate on core features that are required for the MVP + +### Negative Consequences + +- Likely unsuitable as a permanent solution +- Receives the lowest ratings in terms of decision drivers +- Future developments need to consider approaches described in this document to avoid creating blockers for any of the + potential final solutions + +## Pros and Cons of the Options + +### Permission Mapping via Teams with Plugin provided Access levels + +![](./assets/df757ef6-b316-4dec-b6de-a0ed0e5716a5.png) + +In this solution, we have four factors that grant access to a Plugin: + +- The user is a member of an OIDC Group +- The respective Group is mapped to a Team within the Team Configuration +- The Plugin is mapped to the Team +- The permission which the Team gives to the Plugin is set within the Plugin to Team Map, where the Plugin does provide + the permission levels. + +| Decision Driver | Rating | Reason | +|--------------------------------------|--------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Transparency | +++ | Good, because it gives full overview of who has access to a Plugin within Greenhouse, including access levels | +| Open for Adoption | +++ | Good, because it allows Plugins to use as many roles as they want, and as they are on a existing app already | +| Open for Automation | +++ | Good, because we could integrate in future CAM profile and Access Level creation | +| Avoidance of compliance implications | +++ | Good, because the actual access provisioning is still done via CAM including all the Compliance related processes | +| Standardization | --- | Bad, because by giving Plugins the full flexibility of choosing any amount of Roles and name them how they want discourages standardization and harmonization | +| Ease of Use | -- | Bad, because if a plugin wants to use multiple Roles then the Org Admin has to be aware of those and they are not ensured to be standardized, therefore an assignment would be probably not intuitive and would require the Org Admin to read the Permission Level descriptions. | + +### Permission Mapping via Teams with a pre-defined set of Access Levels + +![](./assets/9d6e6c65-1229-4aba-ab8f-a732f3e68e68.png) + +In this solution, we have four factors that grant access to a Plugin: + +- The user is a member of an OIDC Group +- The respective Group is mapped to a Team within the Team Configuration +- The Plugin is mapped to the Team +- The permission which the Team gives to the Plugin is set within the Plugin to Team Map, where you can select from a + default set which are (open to change): + * Admin + * User + * Viewer + +| Decision Driver | Rating | Reason | +|--------------------------------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Transparency | +++ | Gives full overview who has access to a Plugin within Greenhouse | +| Open for Adoption | ++ | Predefined set of (e.g. 3 Access Levels) can catch 99% of use cases for role segregation | +| Open for Automation | +++ | We could integrate in future CAM profile and Access Level creation | +| Standardization | +++ | We can enforce a clear set of Roles to be used on Plugins | +| Ease of Use | o | Whole Permission Configuration is configured on a single place (within Greenhouse), the ability to select different access levels comes with additional configurations steps though | +| Avoidance of compliance implications | +++ | The actual access provisioning is still done via CAM including all the Compliance related processes | + +### Permission Mapping via Org Wide Roles + +![](./assets/0b91d753-b862-4e0c-b43e-e87f6fe852ee.png) + +In this Solution, we have two factors that grant access to a Plugin: + +- The user is a member of an OIDC Group +- The User is mapped to one of the three pre-defined Organization Roles + +The main difference here is that we separate Teams from Role assignments. + +| Decision Driver | Rating | Reason | +|--------------------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Transparency | +++ | Gives full overview who has access to a Plugin within Greenhouse | +| Open for Adoption | + | Predefined set of (e.g. 3 Access Levels) can catch 99% of use cases for role segregation. Additionally we can not segregate access to individual plugins which could potentially be a problem | +| Open for Automation | +++ | We could integrate in future CAM profile and Access Level creation | +| Standardization | +++ | We can enforce a clear set of Roles to be used on Plugins | +| Ease of Use | + | Decoupling of Teams and Roles could lead to double effort in Management. The difference between teams and roles could be unintuitive and misleading. | +| Avoidance of compliance implications | +++ | The actual access provisioning is still done via CAM including all the Compliance related processes | + +### Permission Mapping via Greenhouse Roles + +![](./assets/e23bbf83-02b3-44f2-9f5d-e1cd2f9b9dea.png) + +In this Solution, we have two factors that grant access to a Plugin: + +- The user is a member of an OIDC Group and holds any Greenhouse Role (Greenhouse Admin, Organization Admin, + Organization Member) +- The Plugin is configured for the Organization the user is part of. + +| Decision Driver | Rating | Reason | +|--------------------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Transparency | + | We would implicitly know who has access by looking who has which role on Greenhouse. As greenhouse membership is defined through teams the member derivation is automtically mapped to multiple teams | +| Open for Adoption | - | Only allows 2 Roles to be used by Plugins. We can not segregate access to individual plugins which could potentially be a problem. We could integrate in future CAM profile and Access Level creation. It would potentially lead to a lot of profiles on CAM with the same access level. | +| Open for Automation | ++ | We could integrate in future CAM profile and Access Level creation. Would lead potentially to a lot of profiles on CAM with the same access levels | +| Standardization | ++ | We would encourage that we have only 2 Roles on a organization to be applied to all Plugins | +| Ease of Use | ++ | No additional burden of permission management except for Org Admins | +| Avoidance of compliance implications | +++ | The actual access provisioning is still done via CAM including all the compliance related processes | + +### No permission mapping + +![](./assets/b36ac896-60c0-4074-9f20-7e6c554f4ace.png) + +In this Solution we do not have any Authorization check for Plugins. + +This means, if you are a member of an Organization, you are able to Access all Plugins and there will be no official +support to provide fine-grained-access to Plugins + +| Decision Driver | Rating | Reason | +|--------------------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Transparency | - | We would implicitly know who has access by looking who has which role on Greenhouse. As greenhouse membership is defined through teams the member derivation is actually automatically mapped to multiple teams. This could lead to roles being used in the background | +| Open for Adoption | -- | Encourages Plugins to use only 1 Role or manage permissions in addition outside of Greenhouse | +| Open for Automation | o | Automation of Role Provisioning would be probably unnecessary from Greenhouse perspective | +| Standardization | o | We would basically encourage to use no roles in Plugins | +| Ease of Use | +++ | No additional burden of permission management except for Org Admins | +| Avoidance of compliance implications | +++ | The actual access provisioning is still done via CAM including all the Compliance related processes | + +## Related Decision Records + +**002 Technical Implementation of access Authorization for Greenhouse Plugins** diff --git a/architecture-decision-records/002-greenhouse-technical-implementation-of-access-authorization-for-plugins.md b/architecture-decision-records/002-greenhouse-technical-implementation-of-access-authorization-for-plugins.md new file mode 100644 index 0000000..13c189b --- /dev/null +++ b/architecture-decision-records/002-greenhouse-technical-implementation-of-access-authorization-for-plugins.md @@ -0,0 +1,120 @@ +# 002 Technical Implementation of access Authorization for Greenhouse Plugins + +- Status: draft +- Deciders: Fabian Ruff, Esther Schmitz, Arno Uhlig, Uwe Mayer, David Rochow +- Date: 2023-03-09 +- Tags: greenhouse + +## Context and Problem Statement + +Greenhouse is a Platform that aims to aggregate a variety of Applications into a single Platform using a Plugin Concept +that allows Applications to be integrated into Greenhouse while being maintained and developed in a distributed manner. + +Furthermore, it intentionally does not support multi-tenancy across Plugin Instances to enable segregation between +tenants and make the Platform usable by totally unrelated LoB's. + +This Decision record is about the technical solution how we do Authorizations for Plugins. + +## Decision Drivers + +- Enables support of multiple Identity Providers + * To allow Organizations to use their own IdP +- Open for adoption + * allows also Plugin Backends to be used that are not developed internally +- Support of Role Mapping within Greenhouse + * Supports the usage for the considered solutions of ADR-1 +- Supports running Greenhouse components in a single Kubernetes Cluster + * On kubernetes you can only configure one OIDC Provider +- Implementation effort +- Maintenance effort + +## Considered Options + +- Team Sync during Deployment +- Team Sync by Plugin during runtime +- Usage of internal IdP for Group Ownership Rewriting based on Greenhouse mappings + +## Decision Outcome + +Chosen option: **"Usage of internal IdP for Group Ownership Rewriting based on Greenhouse mappings"** + +### Positive Consequences + +- Overall best decision driver ratings +- Most flexible solution +- Does not require additional syncing of mappings between Greenhouse and Plugins +- We are in control of the OIDC Provider that is used for Authorization of Requests on Plugins +- The authentication is still happening on the external Identity Provider +- Only of the Solutions that solves the Kubernetes problem(just supports one OIDC Provider) by design + +### Negative Consequences + +- Introduction of an additional Open Source Project +- In case we need to support Plugin Backends outside Converged Cloud, we would need to expose the internal OIDC + Provider (somehow) or build an additional proxy solution. +- This solution is expected to require the most implementation and maintenance effort + +## Pros and Cons of the Options + +### Team Sync during Deployment + +![](./assets/a0b55e95-8ded-47bb-96ce-67729b3f123c.png) + +This Solution is using an external OIDC Provider. +Within Greenhouse, mappings from OIDC Groups to Plugin Permissions are done, +and the respective mappings are distributed to Plugins during the deployment of the Plugins. + +This means any change in the mapping of a Team/Role would require a re-deployment of the Plugins to happen. + +| Decision Driver | Rating | Reason | +|-----------------------------------------------------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------| +| Enables support of multiple Identity Providers | + | possible | +| Open for adoption | + | Would use 100% standard OIDC for Authorization on Plugin Side. Organizations would be forced to use a OIDC Provider though. | +| Support of Role Mapping within Greenhouse | + | supports with variations in the details all options | +| Supports running Greenhouse components in a single Kubernetes Cluster | - | unclear, how this would be solved | +| Implementation effort | o | | +| Maintenance effort | - | The required redeployment of components | + +### Team Sync by Plugin during runtime + +![](./assets/c652bfd8-2552-4eea-9e1a-89ee1a078e69.png) + +In this Solution we use a external OIDC provider as well. +The mapping of Access Levels for Plugins is also done within Greenhouse. +The difference is that the mapping of OIDC Groups to permissions is fetched from the Plugin at runtime from +Greenhouse using API endpoint implemented for this purpose. + +| Decision Driver | Rating | Reason | +|-----------------------------------------------------------------------|--------|----------------------------------------------------------------------------------------| +| Enables support of multiple Identity Providers | + | possible | +| Open for adoption | - | Would use for the Authorization a custom implementation through retrieving the mapping | +| Support of Role Mapping within Greenhouse | + | supports with variations in the implementation details all options | +| Supports running Greenhouse components in a single Kubernetes Cluster | - | unclear how this would be solved | +| Implementation effort | - | We would need to create an additional API Endpoint | +| Maintenance effort | o | Neutral | + +### Usage of internal IdP for Group Ownership Rewriting based on Greenhouse mappings + +![](./assets/7f365a58-5c96-4648-8c15-d53b32e5b3f7.png) + +This Solution does use a federated IdP that handles the authorization. +The Idea here is to us any external Authentication Provider (which could also be something else than an OIDC provider) +and use an internal OIDC Provider that is used for the Plugins and Kubernetes. +Within the internal OIDC Provider, we can then create the Group to Role mappings for plugins before issuing a token. +This way, the token would include all custom Permission mappings that we configure in Greenhouse using a standardized +approach. +This also means that Plugins can either advertise their expected naming schema with their Plugin Schema or +use a default pre-defined schema that all Greenhouse Plugins are using. + +| Decision Driver | Rating | Reason | +|-----------------------------------------------------------------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Enables support of multiple Identity Providers | +++ | Even allows usage of other Protocols than OIDC | +| Open for adoption | +++ | Openness for different Identity providers enables Organizations to have a very flexible choice | +| Support of Role Mapping within Greenhouse | + | Supports all the variants | +| Supports running Greenhouse components in a single Kubernetes Cluster | +++ | We would internally use only our internal OIDC Provider for issuing tokens which would solve the problem that Kubernetes Clusters only support one OIDC Provider | +| Implementation effort | - | Probably more effort to implement than other solutions | +| Maintenance effort | - | Probably more maintenance effort than the other solutions especially due to the additional open source dependencies introduced | + +## Related Decision Records + +**001 Logical Authorization Concept for Greenhouse Plugins** diff --git a/architecture-decision-records/003-greenhouse-location-of-plugins.md b/architecture-decision-records/003-greenhouse-location-of-plugins.md new file mode 100644 index 0000000..2e176cc --- /dev/null +++ b/architecture-decision-records/003-greenhouse-location-of-plugins.md @@ -0,0 +1,274 @@ +# 003 Location of Greenhouse Backend Plugins in overarching Architecture + +- Status: draft +- Deciders: Fabian Ruff, Esther Schmitz, Arturo Reuschenbach, Arno Uhlig, Uwe Mayer, David Rochow, David Gogl +- Date: 2023-09-14 +- Tags: greenhouse + +## Context and Problem Statement + +During development, a question was raised if it is a good decision/architecture to run the Plugin Backends on the +Greenhouse Platform Cluster +or if they should run in the "Customer Clusters". +Furthermore, it was unclear how communication from potential agents running on the customer cluster would communicate +back to the Backends. +> (Arno) +> One key objective of Greenhouse is to continuously manage Plugins in customer clusters. +> +> Ideally, the customer cluster is directly reachable from the operator from our Greenhouse cluster. +> However, this cannot be guaranteed. In such a case, a reverse tunnel needs to be established from the customer cluster +> to our infrastructure. +> [Headscale](https://github.com/juanfont/headscale) is currently being evaluated for this purpose. +> +> A cluster exists within an organization and is thus namespaced. +> Onboarding of clusters is restricted to organization administrators. +> +> User story: Customer onboards a newly created cluster and requires an Ingress to expose applications. +> Via Greenhouse the Ingress Plugin can be configured which results in a deployment of the ingress controller within the +> customer cluster. +> The PluginConfig, dashboard reflects the current status of relevant underlying resources. + +## Decision Drivers + +- Should work with/ focus on the for MVP in scope Applications + * Do not put to much value in hypotethical assumptions of what may be (YAGNI) +- Architectural complexity + * try to favor the "easiest" Solution from architectual complexity perspective (KISS) +- Operative support complexity + * The complexity to provide operative support for Greenhouse Plugins should be as low as possible +- Plugin complexity + * The solution should add no unnecessary complexity to the plugin configuration / setup for End Users +- Security + * Consider security aspects in term of Isolation of Components and Blast Radius + +## Considered Options + +- Plugin Backends and Agents within Greenhouse Platform +- Plugin Backends within Greenhouse Platform with Agents running on Customer Cluster +- Plugin Backends within Greenhouse Platform using separate `Headscale` client per Organization +- Plugin Backends within separate Clusters +- Plugin Backends and Agents running on Customer Cluster + +## Decision Outcome + +Chosen option: "TBD", + +### Positive Consequences + +- TBD + +### Negative Consequences + +- TBD + +## Pros and Cons of the Options + +### [1] Plugin Backends and Agents within Greenhouse Platform + +![](./assets/5562b186-8744-4dc0-8d85-8d5edaf2150c.png) + +In this option, the Plugin Backends as well as potential Agents are running on the Greenhouse Platform Cluster within +the +namespace of the respective Organization. +Potential "Special Purpose Backends" such as e.g., Monitoring Infrastructure can still run on the Customer Backend but +would need to be exposed to be reachable for corresponding Plugin Frontends. + +**Good** + +- We are in full control of the Backends and Agents, and can similar to other Platform Core Components take full + responsibility for those Components. +- As long as no Special Purpose Plugins are added it comes with the lowest Architectural complexity +- UI Components can directly speak to the Backends within our cluster +- Relatively easy from our side to operate and support as we have direct access to the resources / plugin backends + +**Bad** + +- Compromise of a Plugin Backend has a high potential Blast Radius as from there the customer clusters are reachable. +- Hard to delegate costs to customers +- IF we add "Special Purpose Plugins" in addition that run on the Customer Cluster this increases the complexity +- Increases the error surface for the customer, we could have: + * Their cluster is down / has a problem + * Our cluster is down / has a problem + * The connection between the cluster is down/has a problem +- No possibility to add Agents that need to run on the Cluster / Cluster Nodes and feed data to the Backends running in + platform cluster + +| Decision Driver | Rating | Reason | +|--------------------------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------| +| Should work with/ focus on the for MVP in scope Applications | + + + | With the current MVP applications this will work without harnessing other drivers to much | +| Architectural complexity | + + + / - - | Easiest complexity / only as long as no special purpose Plugins involved | +| Operative support complexity | + + + / - - | Easiest operative support / as long as no Special purpose Plugins involved | +| Plugin complexity | + + + / - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | +| Security | - - | Relative high Blast Radius of Plugin Backends and higher attack surface on the Platform | + +### [2] Plugin Backends within Greenhouse Platform with Agents running on Customer Cluster + +![](./assets/f8d6f6fd-2fe8-4ce1-8fc2-52bcc3d2ebd6.png) + +In this option, the Plugin Backends are running on the Greenhouse Platform Cluster within the namespace of the +respective +Organization. Agents can be deployed on the customer cluster, and they feed data to a "Plugin Cache" which can then be +consumed by Plugin Backends on the Greenhouse Platform in a polling manner. +Potential "Special Purpose Backends" such as e.g., Monitoring Infrastructure can still run on the Customer Backend but +would need to be exposed to be reachable for corresponding Plugin Frontends. + +**Good** + +- We are in control of the Backends, and can similar to other Platform Core Components take full responsibility for + those Components. +- As long as no Special Purpose Plugins are added, it comes with medium Architectural complexity (more in comparison + to [1] due to agents & agent cache) +- UI Components can directly speak to the Backends within our cluster +- Relatively easy from our side to operate and support the backends as we have direct access to them. +- We won't loose any data if the Platform is down as agents would still collect data on the customer clusters + +**Bad** + +- In comparison to [1] there is a lot more complexity involved due to the Agents running on the Customer Cluster +- Compromise of a Plugin Backend has a high potential Blast Radius as from there the customer clusters are reachable. +- Hard to delegate costs to customers +- As Soon as we add Agents to the customer Clusters we adding a lot of architectural and operatrional complexity +- IF we add "Special Purpose Plugins" in addition that run on the Customer Cluster this increases the complexity +- Increases the error surface for the customer, we could have: + * Their cluster is down / has a problem + * Our cluster is down / has a problem + * The connection between the cluster is down/has a problem + +| Decision Driver | Rating | Reason | +|--------------------------------------------------------------|---------------|---------------------------------------------------------------------------------------------------------------------------------------------------| +| Should work with/ focus on the for MVP in scope Applications | + | Considering that `Heureka` will have Agents there will be already added complexity in MVP to support all MVP apps in comparison to [1] | +| Architectural complexity | - / - - - | Plugin Agents on the Customer Cluster increasing complexity / Special Purpose Plugin Backends would increase furthermore | +| Operative support complexity | - / - - - | Plugin Agents on the Customer Cluster increasing complexity / Special Purpose Plugin Backends would increase furthermore | +| Plugin complexity | + + + / - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | +| Security | - | Relative high Blast Radius of Plugin Backends and higher attack surface on the Platform (a little lower due to Agents not running on our Cluster) | + +### [3] Plugin Backends within Greenhouse Platform using separate Headscale client per Organization + +![](./assets/d315c458-b3b1-4ede-99cc-0d427dfe9d83.png) + +In this option, the Plugin Backends are running on the Greenhouse Platform Cluster within the namespace of the +respective Organization. +Agents can be deployed on the customer cluster, and they feed data to a "Plugin Cache" which can then be consumed by +Plugin Backends on the Greenhouse Platform in a polling manner. +Potential "Special Purpose Backends" such as e.g., Monitoring Infrastructure can still run on the Customer Backend but +would need to be exposed to be reachable for corresponding Plugin Frontends. + +**Good** + +- We are in control of the Backends, and can similar to other Platform Core Components take full responsibility for + those Components. +- As long as no Special Purpose Plugins are added, it comes with medium Architectural complexity (more in comparison + to [1] due to agents & agent cache) +- UI Components can directly speak to the Backends within our cluster +- Relatively easy from our side to operate and support the backends as we have direct access to them. + +**Bad** + +- In comparison to [1] there is a lot more complexity involved due to the Agents running on the Customer Cluster +- In comparison to [1] and [2] there is more complexity due to the separate `headscale` client +- Hard to delegate costs to customers +- We have a channel back from the customer cluster to us +- If we add "Special Purpose Plugins" in addition that run on the Customer Cluster, this increases the complexity +- Increases the error surface for the customer, we could have: + * Their cluster is down / has a problem + * Our cluster is down / has a problem + * The connection between the cluster is down/has a problem + +| Decision Driver | Rating | Reason | +|--------------------------------------------------------------|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Should work with/ focus on the for MVP in scope Applications | + | Considering that `Heureka` will have Agents there will be already added complexity in MVP to support all MVP apps in comparison to [1] | +| Architectural complexity | - - / - - - | Plugin Agents on the Customer Cluster as well as the dedicated `Headscale` Clients per Organization on our Cluster increasing complexity / Special Purpose Plugin Backends would increase furthermore | +| Operative support complexity | - - / - - - | Plugin Agents on the Customer Cluster increasing complexity , as well as the dedicated `Headscale` Clients per Organization on our Cluster/ Special Purpose Plugin Backends would increase furthermore | +| Plugin complexity | + + + / - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | +| Security | o | We still have a higher attack surface on Platform Cluster but the Plugin Backends are stricter isolated and the dedicated `Headscale` Client ensures the organization boundaries which is reducing the blast radius, on the other side we open a channel back into our cluster from the customer cluster | + +### [4] Plugin Backends within seperate Clusters + +![](./assets/5f1b0a46-15f2-414b-99d5-8f102b67dcd8.png) + +This option is similar to option [3] with the difference that for stronger encapsulation we run separate clusters from +Organizations instead of using Namespaces for segregation + +**Good** + +- We are in control of the Backends, and can similar to other Platform Core Components take full responsibility for + those Components. +- Increased Security due to higher separation + +**Bad** + +* In comparison to [3] there is a lot more complexity involved on all layers due to the separation by clusters +* IF we add "Special Purpose Plugins" in addition that run on the Customer Cluster this increases the complexity further +* Increases the error surface for the customer, we could have: + * Their cluster is down / has a problem + * Our cluster is down / has a problem + * The backend cluster is down / has a problem + * The connection between the cluster is down/has a problem + +| Decision Driver | Rating | Reason | +|--------------------------------------------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Should work with/ focus on the for MVP in scope Applications | + | Considering that `Heureka` will have Agents there will be already added complexity in MVP to support all MVP apps in comparison to [1] | +| Architectural complexity | - - - | Plugin Agents on the Customer Cluster as well as the dedicated `Headscale` Clients per Organization on our Cluster increasing complexity / Special Purpose Plugin Backends would increase furthermore | +| Operative support complexity | - - - | Plugin Agents on the Customer Cluster increasing complexity , as well as the dedicated `Headscale` Clients per Organization on our Cluster/ Special Purpose Plugin Backends would increase furthermore | +| Plugin complexity | - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | +| Security | + | We still have a higher attack surface on Platform Cluster but the Plugin Backends are stricter isolated and the dedicated `Headscale` Client ensures the organization boundaries which is reducing the blast radius, on the other side we open a channel back into our cluster from the customer cluster | + +### [5] Plugin Backends and Agents running on Customer Cluster + +![](./assets/d5689168-6b71-4468-ac9e-ada131ec46c2.png) + +In this option all Plugin Backends run on the customer Clusters and the communication from the UI to the Backends is +going through a "Plugin Proxy" that is facilitating the `Headscale` client to talk with the Plugin Backends to keep +the "Plugin configuration complexity" reasonable and avoid that all Plugin Backends need to include a ingress +configuration +to be reachable from outside. + +**Good** + +- We do not have to carry costs for backends +- No difference between special purpose and "normal" plugins +- Customer clusters are fully operational when Greenhouse Platform would be down +- Best security posture of options +- Avoid conflicting cluster-scoped resources, foremost CRD + +**Bad** + +- Plugin Proxy and `Headscale` are critical components to reach the Plugin Backends +- The operational complexity is among the highest of all the options + > (Arno) Complexity will increase for sure, since for the POC we limited ourselves to a single cluster. +- As customers can mess with the Plugin Backends on their cluster, they could potentially make the Plugin Backends + non-functional and then ask for support to fix the mistakes they made + > (Arno) The PluginConfig defines the desired state. Underlying resources are being watched and reconciled until they + match the desired state (Helm diff + drift detection) ensuring functionality. + +* In the case of multi-cluster scenarios, the plugin complexity increases dramatically as you need to define a "main" + cluster + and need to have a way that enables Agents from non-main clusters to talk to the main cluster. + > (Arno) Ensuring connectivity between registered customer cluster is not part of the Greenhouse scope but a customer + responsibility. + Example: Prometheus federation. A large set of metrics being pulled from various Prometheis to and would be persisted + in the central cluster. We want that in the customer cluster. + +| Decision Driver | Rating | Reason | +|--------------------------------------------------------------|--------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Should work with/ focus on the for MVP in scope Applications | + | Works with all MVP products but for MVP more complex than ][1] | +| Architectural complexity | + | relatively clean Architectural but more complex than option [1] if no special purpose plugins are involved | +| Operative support complexity | - - - | as the Plugin Backends are running on the customer cluster its hard for us to monitor them and we are required to go through the `Headscale` client to provide operative support | +| Plugin complexity | o | More complex then option [1] as the customer would need to configure in multi cluster scenarios where the Backend is actually living in and how agents can talk back to it. | +| Security | + + | We have a uni directional connection to the customer clusters and the most minimal attack surface on the Platform cluster | + +### General comments (Arno) + +- We go for option 5 though `headscale` is not required if the cluster is directly accessible. +- One core goal of Greenhouse is to manage Plugins and related resources in all registered Kubernetes clusters. + Thus, no difference between "special" and "normal" Plugins. + Rather distinguish between Plugins with/without backend. + During a transition phase (MVP) we allow plugin backends in the central cluster. + Future: Plugins with backend require a customer cluster for the workload. +- `Headscale` coordination server downtime + * Not critical until key expiration (1d). Registered clients continue to work. + * New client cannot be added w/o coordination server though. +- Open topics: + * Dashboard - customer cluster connectivity required? + * Currently, we assume all relevant information is made transparent via the available CRDs (PluginConfig) in the + central cluster. Thus the dashboard only needs to act within the Greenhouse central cluster. diff --git a/architecture-decision-records/Greenhouse-ADR-004-resource_status_reporting.md b/architecture-decision-records/004-greenhouse-resource-status-reporting.md similarity index 58% rename from architecture-decision-records/Greenhouse-ADR-004-resource_status_reporting.md rename to architecture-decision-records/004-greenhouse-resource-status-reporting.md index 5f8d7a3..4bfb6c5 100644 --- a/architecture-decision-records/Greenhouse-ADR-004-resource_status_reporting.md +++ b/architecture-decision-records/004-greenhouse-resource-status-reporting.md @@ -1,13 +1,9 @@ -# ADR-4 Greenhouse Resource States +# 004 Greenhouse Resource States -## Decision Contributors - -- Ivo Gosemann -- Uwe Mayer - -## Status - -- Proposed +- Status: proposed +- Deciders: Ivo Gosemann, Uwe Mayer +- Date: 2023-12-18 +- Tags: greenhouse ## Context and Problem Statement @@ -15,33 +11,40 @@ Greenhouse contains controllers for several custom resources such as `Cluster`, These objects need a unified approach for reporting and maintaining their states. -Therefore this ADR adresses two concerns: +Therefore this ADR addresses two concerns: -1. provide a single source of truth for maintaining resource states -2. provide a common guideline with best practices on reporting states during reconciliation +- provide a single source of truth for maintaining resource states +- provide a common guideline with best practices on reporting states during reconciliation ## Decision Drivers - Uniformity: - - All resource states should be accessible the same way. + * All resource states should be accessible the same way. - Expandability: - - New resources with respective requirements should be easily integrated into the existing structure + * New resources with respective requirements should be easily integrated into the existing structure - Ease of use: - - Interaction with the provided structure should be clear and as easy as possible -## Decision + * Interaction with the provided structure should be clear and as easy as possible + +## Decision Outcome -We will follow [kubernetes SIG architecture advice](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties) and many other renowned projects introducing a greenhouse condition. +We will +follow [kubernetes SIG architecture advice](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties) +and many other popular projects by introducing a greenhouse condition. -For more diffentiating states than `Ready=true/false/unkown` we optionally maintain a `.Status.State` property with typed States on the respective resource. +For more differentiating states than `Ready=true/false/unkown` we optionally maintain a `.Status.State` property with +typed States on the respective resource. -We will not use upstream conditions. Independent structs allow us to specifically design clear conditions for our use-case. -We can maintain the structs, provide documentation, custom validation, etc. and don't risk potential breaking changes when upgrading the upstream library. -Lastly, decoupling our APIs from the upstream kubernetes libraries (as much as possible) wouldn't force others to use the same versions of the libraries we do. This makes it easier to consume the Greenhouse API. +We will not use upstream conditions. Independent structs allow us to specifically design clear conditions for our +use-case. +We can maintain the structs, provide documentation, custom validation, etc. and don't risk potential breaking changes +when upgrading the upstream library. +Lastly, decoupling our APIs from the upstream kubernetes libraries (as much as possible) wouldn't force others to use +the same versions of the libraries we do. This makes it easier to consume the Greenhouse API. ### Conditions @@ -54,21 +57,27 @@ The Greenhouse `Condition` has the following properties: If it becomes necessary the condition can be expanded by a typed `Reason` for programmatic interaction. -Every reconcile step that needs to report success or failure back to the resource should return a custom condition instead of an error. All conditions are collected within the `StatusConditions`. -This struct provides a couple of convenience methods, so no direct interaction with the conditions array becomes necessary as it bears the risk to be error prone. +Every reconcile step that needs to report success or failure back to the resource should return a custom condition +instead of an error. All conditions are collected within the `StatusConditions`. +This struct provides a couple of convenience methods, so no direct interaction with the conditions array becomes +necessary as it bears the risk to be error prone. -Every resource will maintain a greenhouse condition of the Type `Ready`. This will be the API endpoint to the resource overall "ready state" with respective message. -This `ReadyCondition` should only be computed by combining the other conditions. No other factors should be taken into consideration. +Every resource will maintain a greenhouse condition of the Type `Ready`. This will be the API endpoint to the resource +overall "ready state" with respective message. +This `ReadyCondition` should only be computed by combining the other conditions. No other factors should be taken into +consideration. Each condition may only be manipulated and written by one controller. It may be read by various. -**Note**: `Condition.Status == false` does explicitely **not** mean failure of a reconciliation step. E.g. the `PodUnschedulable` Condition in k8s core. +**Note**: `Condition.Status == false` does explicitely **not** mean failure of a reconciliation step. E.g. +the `PodUnschedulable` Condition in k8s core. Methods on the `StatusConditions`: - `SetConditions(conditionsToSet ...Condition)` - Updates an existing condition of matching `Type` with `LastTransitionTime` set to now if `Status` or `Message` differ, creates the condition otherwise. + Updates an existing condition of matching `Type` with `LastTransitionTime` set to now if `Status` or `Message` differ, + creates the condition otherwise. - `GetConditionByType(conditionType ConditionType) *Condition` @@ -92,13 +101,15 @@ We aim to provide helper methods and libraries for other clients to ease develop ### Resource States -If we need to provide more differentiated resources states than only `Ready == true/false/unkown` we will introduce a typed `State` within the resource `Status`. This `State` should also be computed only by combining condition status. +If we need to provide more differentiated resources states than only `Ready == true/false/unkown` we will introduce a +typed `State` within the resource `Status`. This `State` should also be computed only by combining condition status. Refer to the [plugin.Status.State](./../../pkg/apis/greenhouse/v1alpha1/pluginconfig_types.go#64) as a reference. ### HowTos and best practices -When reconciling an object we recommend to defer the reconciliation of the status within the `Reconcile()` method. Note how we pass the reference to the resource into the defer func: +When reconciling an object we recommend to defer the reconciliation of the status within the `Reconcile()` method. Note +how we pass the reference to the resource into the defer func: ```go var myResource = new(MyResource) @@ -116,7 +127,8 @@ When reconciling an object we recommend to defer the reconciliation of the statu ... ``` -A reconciliation step in the `Reconcile()` method that should report back to the resource status is expected to return a condition (instead of an error), e.g.: +A reconciliation step in the `Reconcile()` method that should report back to the resource status is expected to return a +condition (instead of an error), e.g.: ```go ... @@ -128,9 +140,12 @@ A reconciliation step in the `Reconcile()` method that should report back to the Following [SIG Architecture docs](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties) -> Controllers should apply their conditions to a resource the first time they visit the resource, even if the status is Unknown. +> Controllers should apply their conditions to a resource the first time they visit the resource, even if the status is +> Unknown. -The `ReconcileStatus()` method should at least persist the all manipulated `conditions` back to the resource. Maybe it also computes the `ReadyCondition` or optionally a resource `State`. Note that not all controllers do, as they might reconcile different aspects of a resource. +The `ReconcileStatus()` method should at least persist the all manipulated `conditions` back to the resource. +Maybe it also computes the `ReadyCondition` or optionally a resource `State`. +Note that not all controllers do, as they might reconcile different aspects of a resource. ```go func (r *YourReconciler) reconcileStatus(ctx context.Context, myResource *MyResource) error { diff --git a/architecture-decision-records/005-greenhouse-supported-kubernetes-versions.md b/architecture-decision-records/005-greenhouse-supported-kubernetes-versions.md new file mode 100644 index 0000000..b8f5c90 --- /dev/null +++ b/architecture-decision-records/005-greenhouse-supported-kubernetes-versions.md @@ -0,0 +1,75 @@ +# 005 Supported Kubernetes Versions + +- Status: proposed +- Deciders: - Arno Uhlig, David Gogl, Ivo Gosemann, Uwe Mayer +- Date: 2024-03-01 +- Tags: greenhouse + +## Context and Problem Statement + +Greenhouse is interacting with remote Kubernetes clusters +via [kubernetes/client-go](https://github.com/kubernetes/client-go) and [helm/helm](https://github.com/helm/helm). +This includes creating Kubernetes resources and interacting with Helm releases. +To ensure a consistent and transparent behavior it is important to define versions of Kubernetes +officially supported by Greenhouse. + +This ADR addresses the following concerns: + +1. guideline which Kubernetes versions are officially supported +2. how to communicate/ inform Organisation Admins about EOL Kubernetes versions + +## Decision Drivers + +- Stability: + * Greenhouse should be stable and reliable for any of the officially supported Kubernetes Versions. + +- Transparency: + * Consumers should be informed about the officially supported Kubernetes Versions. + * Consumers should be informed about the EOL Date of their Kubernetes Version. + +- Compatibility: + * Greenhouse should be compatible with the Kubernetes Versions officially supported by Helm and the Kubernetes + client. + +## Decision Outcome + +The Kubernetes project [supports](https://kubernetes.io/releases/version-skew-policy/#supported-versions) the most +recent three minor releases. The [Kubernetes Release Cycle](https://kubernetes.io/releases/release/#the-release-cycle) +for a new minor version is roughly every 4 months. + +Kubernetes is backward compatible with clients. This means that client-go will work +with [many different Kubernetes versions](https://github.com/kubernetes/client-go?tab=readme-ov-file#compatibility-client-go---kubernetes-clusters). + +The Helm project officially supports the most recent n-3 Kubernetes minor releases. This means that Helm 3.14.x is +supporting Kubernetes 1.29.x, 1.28.x, 1.27.x and 1.26.x. The Helm project follows the release cadence of Kubernetes, +with a [minor release every 4 months](https://helm.sh/docs/topics/release_policy/#minor-releases). The Helm minor +version release does not follow directly with the Kubernetes version release but with a small offset. + +The official release date for Kubernetes 1.30 is 17.04.2024. The corresponding Helm minor release 3.15 has been +announced for 08.05.2024. + +Greenhouse should support the latest Kubernetes Version soon after the Helm project has released the corresponding minor +version release. Since Helm supports the latest n-3 Kubernetes Versions, this allows for a grace period of roughly 4 +months for Organisation Admins to upgrade their Clusters to an officially supported Kubernetes version. + +With this decision, Greenhouse will follow the version skew policy of the Helm project to support the most recent n-3 +Kubernetes minor releases. This both ensures that Organisations can use the latest Kubernetes version soon after it has +been released. Also this gives Organisation Admins time to upgrade their Clusters to an officially supported Kubernetes +version, if they are running Clusters on a Kubernetes version that has reached EOL. + +It must also be clear how Greenhouse interacts with Clusters running on a Kubernetes Version not yet supported, or on a +Kubernetes Version that is no longer supported. + +Greenhouse should not reconcile Clusters that are running on a newer Kubernetes Version than currently supported by the +pulled in dependencies for Kubernetes and Helm. It should be made clear in the Status of the Cluster CRD and in the +Plugin CRD, that the Cluster is running an unsupported version. The Greenhouse UI should also visualise that a Cluster +is running on a version that is not yet supported. Organisation Admins should also be informed about this situation. + +The other case is when a Cluster is running on a Kubernetes Version that is no longer supported by the Helm +dependencies. In this case the reconciliation of this Cluster should not be stopped. The UI should however visualise +that the Cluster is running on an EOL Kubernetes release. Prior to the EOL Date, Organisation Admins should be informed +about the upcoming EOL Date and the implications for their Clusters. + +The documentation will show the currently supported Kubernetes releases. The documentation should also describe the +reconciliation behavior for clusters running on Kubernetes releases that are not yet supported and those no longer +supported. diff --git a/architecture-decision-records/0_template.md b/architecture-decision-records/0_template.md deleted file mode 100644 index 38fb0ef..0000000 --- a/architecture-decision-records/0_template.md +++ /dev/null @@ -1,40 +0,0 @@ -# ADR-- - -## Decision Contributors - -- Name1 -- ... - -## Status - -- Proposed | Accepted - -## Context and Problem Statement - -High-level context and problem description. - -## Related Decision Records - -Reference related or superseded ADRs here if any. - -## Decision Drivers - -Outline main motivations for a solution in bullet points. - -Example: -* Increase transparency - A main driver is to ensure clear visibility of ... - - -* Ease of use - The solution should be easy to use .. - -## Decision - -Describe the decision and potential implications. - ---- - -## Evaluated options, technical details, etc. - -If relevant, outline the considered options, their pros- and cons, technical details, etc. hereinafter. diff --git a/architecture-decision-records/Greenhouse-ADR-001-logical_authorization_concept_for_plugins.md b/architecture-decision-records/Greenhouse-ADR-001-logical_authorization_concept_for_plugins.md deleted file mode 100644 index b87e935..0000000 --- a/architecture-decision-records/Greenhouse-ADR-001-logical_authorization_concept_for_plugins.md +++ /dev/null @@ -1,187 +0,0 @@ -# ADR-1 Logical Authorization Concept for Greenhouse Plugins - -## Decision Contributors - -* Fabian Ruff -* Esther Schmitz -* Arno Uhlig -* Uwe Mayer -* David Rochow - -## Status - -- Preliminary decision taken - -## Context and Problem Statement - -Greenhouse is a Platform that aims to aggregate a vaierity of Applications into a single Platform using a Plugin Concept that allows Applications to be integrated into Greenhouse while beeing maintained and developed in a distributed manner. - -Furthermore it intentionally does not support multi tenancy across Plugin Instances to enable segregation between tennants and make the Platform usable by totally unrelated LoB's. - -While the Authorization concept for the Platform itself is clear, and rather simple, with Plugin Architecture of the Platform comes a challange in respect to the Authorization Concept for those and the unerlying architecture and constraints. - -This Decision record is about the Authorization Concept on a logical level to decide how permissions are mapped within Greenhouse. - -## Decision Drivers - -* Transparency: - * Clear visibility about who has access to which Plugins within a Greenhouse Organization: - * without the need to know internals about the Plugin - * within Greenhouse --> no need to view Source Code, Deployed Configuration, Pipeline etc. -* Open for Adoption: - * It should not block applications to get ported to Greenhouse Plugins - * It should follow industry standards as good as possible - * It should enable support of other OIDC Providers then CCloud to be used -* Open to Automation: - * Potential capability of automation and simplification in regards to CAM Profile Management - * Potential capability for automated integration of Plugin Instance Backends to central Greenhouse configuration -* Standardization: - * Supports standardization and harmonization of Role Concepts -* Ease of Use: - * It should be as easy as possible for end users to manage the access to Plugins - -* Avoidance of compliance implications: - * Do not re-introduce already fullfilled compliance requirements (by CAM) such as: - * Approval Processes - * Role Ownerships - * Automatic Access Revocation - * Time constrained Role assignements - - -## Considered Options - -* Permission Mapping via Teams with Plugin provided Access levels -* Permission Mapping via Teams with predefined set of Access Levels -* Permission Mapping via Org Wide Roles -* Permission Mapping via Greenhouse Roles -* No permission mapping - - -## Decision Outcome - -We decided to approach the situation with a phased approach. And in the first Phase of Greenhouse we go with: - -* No permission mapping - -Main drivers for this decision have been: -* The first plugins that are about to get integrated do not require any permission model/authorization concept for the Plugins itself. -* The capability of auto-creating roles & access leves through Greenhouse on CAM is considered an advanced feature not planned for the initial phase. - -This is not considered a final decision and rather as an intermediate step. The final decision about one of the described approaches is going to happen in future and documented in a seperate ADR. - -### Consequences - -**Good** -* Smaller implementation effort -* We do not shoot for a solution of a problem that we right now do not have -* We can concetrate on core features that are required for the MVP - -**Bad** -* Does most likely not work as a final solution -* Worst overall ratings in terms of decision drivers -* Everything we build need to consider the potential approaches described in this document, to not create blockers for any of the potential final solutions - -## Evaluation of Options - -### Permission Mapping via Teams with Plugin provided Access levels - -![](./assets/df757ef6-b316-4dec-b6de-a0ed0e5716a5.png) - -In this solution the Access to a Plugin is granted based on 4 Factors: - -* The user is member of a OIDC Group -* The respective Group is mapped to a Team within the Team Configuration -* The Plugin is mapped to the Team -* The permission which the Team gives to the Plugin is set within the Plugin to Team Map, where the Plugin does provide the permission levels. - - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Transparency | +++ | * Gives full overview who has access to a Plugin within Greenhouse, including access levels | -| Open for Adoption | +++ | Allows Plugins to use as many roles as they want, and as they are on a existing app already | -| Open for Automation | +++ | We could integrate in future CAM profile and Access Level creation | -| Standardization | --- | Giving Plugins the full flexibility of choosing any amount of Roles they want and name them how they want discourages standardization and harmonization | -| Ease of Use | -- | If a plugin wants to use multiple Roles the Org Admin has to be aware of those and they are not ensured to be standardized therefore a assignment would be probably not intuitive and would require the Org Admin to read the Permission Level descriptions. | -| Avoidance of compliance implications | +++ | The actuall access provisioning is still done via CAM inlcuding all the Compliance related processes | - - - - -### Permission Mapping via Teams with predefined set of Access Levels - -![](./assets/9d6e6c65-1229-4aba-ab8f-a732f3e68e68.png) - -In this solution the Access to a Plugin is granted based on 4 Factors: - -* The user is member of a OIDC Group -* The respective Group is mapped to a Team within the Team Configuration -* The Plugin is mapped to the Team -* The permission which the Team gives to the Plugin is set within the Plugin to Team Map, where you can select from a default set which are (open to change): - * Admin - * User - * Viewer - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Transparency | +++ | Gives full overview who has access to a Plugin within Greenhouse | -| Open for Adoption | ++ | Predefined set of (e.g. 3 Access Levels) can catch 99% of use cases for role segregation | -| Open for Automation | +++ | We could integrate in future CAM profile and Access Level creation | -| Standardization | +++ | We can enforce a clear set of Roles to be used on Plugins | -| Ease of Use | o | Whole Permission Configuration is configured on a single place (within Greenhouse), the ability to select different access levels comes with additional configurations steps though | -| Avoidance of compliance implications | +++ | The actuall access provisioning is still done via CAM inlcuding all the Compliance related processes | - -### Permission Mapping via Org Wide Roles - -![](./assets/0b91d753-b862-4e0c-b43e-e87f6fe852ee.png) - -This Solution would grant access to a Plugin based on 2 Factors: -- User is Member of OIDC Group -- The User is mapped to one of the 3 pre-defined Organization Roles - -The main difference here is that we seperate Teams from Role assignments. - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Transparency | +++ | Gives full overview who has access to a Plugin within Greenhouse | -| Open for Adoption | + | Predefined set of (e.g. 3 Access Levels) can catch 99% of use cases for role segregation. Additionally we can not segregate access to individual plugins which could potentially be a problem | -| Open for Automation | +++ | We could integrate in future CAM profile and Access Level creation | -| Standardization | +++ | We can enforce a clear set of Roles to be used on Plugins | -| Ease of Use | + | Decoupling of Teams and Roles could lead to double effort in Management. The difference between teams and roles could be unintuitive and misleading. | -| Avoidance of compliance implications | +++ | The actuall access provisioning is still done via CAM inlcuding all the Compliance related processes | - -### Permission Mapping via Greenhouse Roles - -![](./assets/e23bbf83-02b3-44f2-9f5d-e1cd2f9b9dea.png) - -In this Solution we have 2 Factors that grant access to a Plugin: -- The user is member holds any Greenhouse Role (Greenhouse Admin, Organization Admin, Organization Member) -- The Plugin is configured for the Organization the user is part of. - - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Transparency | + | We would implicitly know who has access by looking who has which role on Greenhouse. As greenhouse membership is defined through teams the member derivation  is actually automtically mapped to multiple teams | -| Open for Adoption | - | Only allows 2 Roles to be used by Plugins. We can not segregate access to individual plugins which could potentially be a problem.| We could integrate in future CAM profile and Access Level creation. Would lead potentially to a lot of profiles on CAM with the same access level. | -| Open for Automation | ++ | We could integrate in future CAM profile and Access Level creation. Would lead potentially to a lot of profiles on CAM with the same access levels | -| Standardization | ++ | We would encourage that we have only 2 Roles on a organization to be applied to all Plugins | -| Ease of Use | ++ | No additional burden of permission management except for Org Admins | -| Avoidance of compliance implications | +++ | The actuall access provisioning is still done via CAM inlcuding all the Compliance related processes | - -### No permission mapping - -![](./assets/b36ac896-60c0-4074-9f20-7e6c554f4ace.png) - -In this Solution we do not have any Authorization check for Plugins. Meaning if you are member of a Organization you are able to Access all Plugins and the Plugins have no officially supported possibility to regulate fine grained access - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Transparency | - | We would implicitly know who has access by looking who has which role on Greenhouse. As greenhouse membership is defined through teams the member derivation is actually automtically mapped to multiple teams. This could lead to roles beeing used in the background | -| Open for Adoption | -- | Encourages Plugins to use only 1 Role or manage permissions in addition outside of Greenhouse | -| Open for Automation | o | Automation of Role Provisioning would be probably unnecessarry from Greenhouse perspective | -| Standardization | o | We would basically encourage to use no roles in Plugins | -| Ease of Use | +++ | No additional burden of permission management except for Org Admins | -| Avoidance of compliance implications | +++ | The actuall access provisioning is still done via CAM inlcuding all the Compliance related processes | - -## Related Decision Records - -ADR-2 Technical Implementation of Authorization Concept for Greenhouse Plugins diff --git a/architecture-decision-records/Greenhouse-ADR-002-technical_implementation_of_access_authorization_for_plugins.md b/architecture-decision-records/Greenhouse-ADR-002-technical_implementation_of_access_authorization_for_plugins.md deleted file mode 100644 index ea31e22..0000000 --- a/architecture-decision-records/Greenhouse-ADR-002-technical_implementation_of_access_authorization_for_plugins.md +++ /dev/null @@ -1,120 +0,0 @@ -# ADR-2 Technical Implementation of access Authorization for Greenhouse Plugins - -## Decision Contributors - -* Fabian Ruff -* Esther Schmitz -* Arno Uhlig -* Uwe Mayer -* David Rochow - -## Status - -- PoC for selected solution running - -## Context and Problem Statement - -Greenhouse is a Platform that aims to aggregate a vaierity of Applications into a single Platform using a Plugin Concept that allows Applications to be integrated into Greenhouse while beeing maintained and developed in a distributed manner. - -Furthermore, it intentionally does not support multi tenancy across Plugin Instances to enable segregation between tennants and make the Platform usable by totally unrelated LoB's. - -This Decision record is about the technical solution how we do Authorizations for Plugins. - -## Decision Drivers - -* Enables support of multiple Identitiy Providers - * To allow Organizations to use their own IdP -* Open for adoption - * allows also Plugin Backends to be used that are not developed internally -* Support of Role Mapping within Greenhouse - * Supports the usage of any of the considered solutions of ADR-1 -* Supports running Greenhouse components in a single Kubernetes Cluster - * On kubernetes you can only configure one OIDC Provider -* Implementation effort -* Maintenance effort - - -## Considered Options - -* Team Sync during Deployment -* Team Sync by Plugin during runtime -* Usage of internal IdP for Group Ownership Rewriting based on Greenhouse mappings - - -## Decision Outcome - -We decided to go with: -* Usage of internal IdP for Group Ownership Rewriting based on Greenhouse mappings - - -### Consequences - -**Good** -* Overall best decision driver ratings -* Most flexible solution -* Does not require additional syncing of mappings between Greenhouse and Plugins -* We are in control of the OIDC Provider that is used for Authorization of Requests on Plugins -* The authentication is still happening on the external Identitiy Provider -* Only of the Solutions that solves the Kubernetes problem(just supports one OIDC Procider) by design - -**Bad** -* Introduction of a additional Open Source Project -* In case we need to support Plugin Backends outside of the Converged Cloud, we would need to somewhow expose the internal OIDC Provider or build an additional proxy solution. -* This solution is expected to require the most implementation and maintenance effort - -## Evaluation of Options - -### Team Sync during Deployment - -![](./assets/a0b55e95-8ded-47bb-96ce-67729b3f123c.png) - -This Solution is using an external OIDC Provider. Within Greenhouse, mappings from OIDC Groups to Plugin Permissions are done and the respective mappings are distributed to Plugins during the deployment of the Plugins. - -This means any change in the mapping of a Team/Role would require a re-deployment of the Plugins to happen. - - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Enables support of multiple Identitiy Providers | + | possible | -| Open for adoption | + | Would use 100% standard OIDC for Authorization on Plugin Side. Organizations would be forced to use a OIDC Provider though. | -| Support of Role Mapping within Greenhouse | + | supports with variations in the details all options | -| Supports running Greenhouse components in a single Kubernetes Cluster | - | unclear, how this would be solved | -| Implementation effort | o | | -| Maintenenace effort | - | The required redeployment of components| - -### Team Sync by Plugin during runtime - -![](./assets/c652bfd8-2552-4eea-9e1a-89ee1a078e69.png) - -In this Solution we use a external OIDC provider as well. The mapping of Access Levels for Plugins is also done within Greenhouse. The difference is that the mapping of OIDC Groups to permissions is fetched from the Plugin at runtime from Greenhouse using a API endpoint implemented for this purpose. - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Enables support of multiple Identitiy Providers | + | possible | -| Open for adoption | - | Would use for the Authorization a custom implmentation through retrieving the mapping | -| Support of Role Mapping within Greenhouse | + | supports with variations in the implementation details all options | -| Supports running Greenhouse components in a single Kubernetes Cluster | - | unclear how this would be solved | -| Implementation effort | - | We would need to create an additional API Endpoint | -| Maintenenace effort | o | Neutral | - -### Usage of internal IdP for Group Ownership Rewriting based on Greenhouse mappings - -![](./assets/7f365a58-5c96-4648-8c15-d53b32e5b3f7.png) - -This Solution does use a federated IdP that handles the authorization. The Idea here is to us any external Authentication Provider (which could also be something else then a OIDC provider) and use an internal OIDC Provider that is used for the Plugins and Kubernetes. Within the internal OIDC Provider we can then create the Group to Role mappings for plugins before issuing the a Token. -This way the Token would include all custom Permission mappings that we configure in Greenhouse using a standardized approach. This also means that Plugins can either advertise theire expected naming schema with theire Plugin Schema or use a default pre-defined schema that all Greenhouse Plugins are using. - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Enables support of multiple Identitiy Providers | +++ | Even allows usage of other Protocols than OIDC | -| Open for adoption | +++ | Openness for different Identity providers enables Organizations to have a very flexible choice | -| Support of Role Mapping within Greenhouse | + | Supports all the variants | -| Supports running Greenhouse components in a single Kubernetes Cluster | +++ | We would internally use only our internal OIDC Provider for issuing tokens which would solve the problem that Kubernetes Clusters only support one OIDC Procider | -| Implementation effort | - | Probably more effort to implement than other solutions| -| Maintenenace effort | - | Probably more maintenance effort than the other solutions especially due to the additional open source dependencies introduced | - -## Related Decision Records - -ADR-1 Logical Auhtorization Concept for Greenhouse Plugins - - diff --git a/architecture-decision-records/Greenhouse-ADR-003-location_of_plugins.md b/architecture-decision-records/Greenhouse-ADR-003-location_of_plugins.md deleted file mode 100644 index a3b3338..0000000 --- a/architecture-decision-records/Greenhouse-ADR-003-location_of_plugins.md +++ /dev/null @@ -1,278 +0,0 @@ ---- -parent: docs/adr -nav_order: 3 ---- - -# ADR-3 Location of Greenhouse Backend Plugins in overarching Architecture - -## Decision Contributors - -* Fabian Ruff -* Esther Schmitz -* Arturo Reuschenbach -* Arno Uhlig -* Uwe Mayer -* David Rochow -* David Gogl - -## Status - -- In discussion - -## Context and Problem Statement - -During developement the question got raised whatever it is a good decision/architecture to run the Plugin Backends on the Greenhouse Platform Cluster or if they rather should run in the "Customer Clusters". Furthermore it was unclear how communication from potential agents running on the customer cluster would communicate back to the Backends. -> (Arno) -> One key objective of Greenhouse is to continously manage Plugins in customer clusters. -> -> Ideally, the customer cluster is directly reachable from the operator from our Greenhouse cluster. -> However, this can not be guaranteed. In which case a reverse tunnel needs to be established from the customer cluster to our infrastructure. -> [Headscale](https://github.com/juanfont/headscale) is currently being evaluated for this purpose. -> -> A cluster exists within an organization and is thus namespaced. -> Onboarding of clusters is restricted to organization administrators. -> -> User story: Customer onboards a newly created cluster and requires an Ingress to expose applications. Via Greenhouse the Ingress Plugin can be configured which results in a deployment of the ingress controller within the customer cluster. -> The PluginConfig, dashboard reflects the current status of relevant underlying resources. - -## Decision Drivers - -* Should work with/ focus on the for MVP in scope Applications - * Do not put to much value in hypotethical assumptions of what may be (YAGNI) -* Architectural complexity - * try to favor the "easiest" Solution from architectual complexity perspective (KISS) -* Operative support complexity - * The complexity to provide operative support for Greenhouse Plugins should be as low as possible -* Plugin complexity - * The solution should add no unnecessary complexity to the plugin configuration / setup for End Users -* Security - * Consider security aspects in term of Isolation of Components and Blast Radius - - - -## Considered Options - -* Plugin Backends and Agents within Greenhouse Platform - -* Plugin Backends within Greenhouse Platform with Agents running on Customer Cluster - -* Plugin Backends within Greenhouse Platform using seperate Headscale client per Organization - -* Plugin Backends within seperate Clusters - -* Plugin Backends and Agents running on Customer Cluster - - -## Decision Outcome - -We decided to go with: TO BE DEFINED - - -### Consequences - -**Good** -TO BE DEFINED - -**Bad** -TO BE DEFINED - -## Evaluation of Options - -### [1] Plugin Backends and Agents within Greenhouse Platform - -![](./assets/5562b186-8744-4dc0-8d85-8d5edaf2150c.png) - -In this option the Plugin Backends as well as potential Agents are running on the Greenhouse Platform Cluster within the namespace of the respective Organization. -Potential "Special Purpose Backends" such as e.g. Monitoring Infrastructure can still run on the Customer Backend but would need to be exposed to be reachable for corresponding Plugin Frontends. - -**Good** - -* We are in full control of the Backends and Agents, and can similar to other Platform Core Components take full responsibility for those Components. -* As long as no Special Purpose Plugins are added it comes with the lowest Architectural complexity -* UI Components can directly speak to the Backends within our cluster -* Realatively easy from our side to operate and support as we have direct access to the reosurces / plugin backends - -**Bad** - -* Compromise of a Plugin Backend has a high potential Blast Radius as from there the customer clusters are reachable. -* Hard to delegate costs to customers -* IF we add "Special Purpose Plugins" in addition that run on the Customer Cluster this increases the complexity -* Increases the error surface for the customer, we could have: - * Their cluster is down / has a problem - * Our cluster is down / has a problem - * The connection between the cluster is down/has a problem -* No possibility to add Agents that need to run on the Cluster / Cluster Nodes and feed data to the Backends runnin in our cluster - - - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Should work with/ focus on the for MVP in scope Applications | + + + | With the current MVP applications this will work without harnessing other drivers to much | -| Architectural complexity | + + + / - - | Easiest complexity / only as long as no special purpose Plugins involved | -| Operative support complexity | + + + / - - | Easiest operative support / as long as no Special purpose Plugins involved | -| Plugin complexity | + + + / - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | -| Security | - - | Relative high Blast Radius of Plugin Backends and higher attack surface on the Platform | - - -### [2] Plugin Backends within Greenhouse Platform with Agents running on Customer Cluster - -![](./assets/f8d6f6fd-2fe8-4ce1-8fc2-52bcc3d2ebd6.png) - - -In this option the Plugin Backends are running on the Greenhouse Platform Cluster within the namespace of the respective Organization. Agents can be deployed on the customer cluster and they feed data to a "Plugin Cache" which can then be consumed by Plugin Backends on the Greenhouse Platform in a polling manner. -Potential "Special Purpose Backends" such as e.g. Monitoring Infrastructure can still run on the Customer Backend but would need to be exposed to be reachable for corresponding Plugin Frontends. - -**Good** - -* We are in control of the Backends, and can similar to other Platform Core Components take full responsibility for those Components. -* As long as no Special Purpose Plugins are added it comes with medium Architectural complexity (more in comparison to [1] due to agents & agent cache) -* UI Components can directly speak to the Backends within our cluster -* Realatively easy from our side to operate and support the backends as we have direct access to them. -* We won't loose any data if the Platform is down as agents would still collect data on the customer clusters - -**Bad** - -* In comparison to [1] there is a lot more complexity involved due to the Agents running on the Customer Cluster -* Compromise of a Plugin Backend has a high potential Blast Radius as from there the customer clusters are reachable. -* Hard to delegate costs to customers -* As Soon as we addd Agents to the customer Clusters we adding a lot of architectural and operatrional complexity -* IF we add "Special Purpose Plugins" in addition that run on the Customer Cluster this increases the complexity -* Increases the error surface for the customer, we could have: - * Their cluster is down / has a problem - * Our cluster is down / has a problem - * The connection between the cluster is down/has a problem - - - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Should work with/ focus on the for MVP in scope Applications | + | Considering that Heureka will have Agents there will be already added complexity in MVP to support all MVP apps in comparison to [1] | -| Architectural complexity | - / - - - | Plugin Agents on the Customer Cluster increasoing complexity / Special Purpose Plugin Backends would increase furthermore | -| Operative support complexity | - / - - - | Plugin Agents on the Customer Cluster increasoing complexity / Special Purpose Plugin Backends would increase furthermore | -| Plugin complexity | + + + / - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | -| Security | - | Relative high Blast Radius of Plugin Backends and higher attack surface on the Platform (a little lower due to Agents not running on our Cluster) | - -### [3] Plugin Backends within Greenhouse Platform using seperate Headscale client per Organization - -![](./assets/d315c458-b3b1-4ede-99cc-0d427dfe9d83.png) - - - -In this option the Plugin Backends are running on the Greenhouse Platform Cluster within the namespace of the respective Organization. Agents can be deployed on the customer cluster and they feed data to a "Plugin Cache" which can then be consumed by Plugin Backends on the Greenhouse Platform in a polling manner. -Potential "Special Purpose Backends" such as e.g. Monitoring Infrastructure can still run on the Customer Backend but would need to be exposed to be reachable for corresponding Plugin Frontends. - -**Good** - -* We are in control of the Backends, and can similar to other Platform Core Components take full responsibility for those Components. -* As long as no Special Purpose Plugins are added it comes with medium Architectural complexity (more in comparison to [1] due to agents & agent cache) -* UI Components can directly speak to the Backends within our cluster -* Realatively easy from our side to operate and support the backends as we have direct access to them. - - -**Bad** - -* In comparison to [1] there is a lot more complexity involved due to the Agents running on the Customer Cluster -* In comparison to [1] and [2] there is more complexity due to the seperate headscale client -* Hard to delegate costs to customers -* We have a channel back from the customer cluster to us -* IF we add "Special Purpose Plugins" in addition that run on the Customer Cluster this increases the complexity -* Increases the error surface for the customer, we could have: - * Their cluster is down / has a problem - * Our cluster is down / has a problem - * The connection between the cluster is down/has a problem - - - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Should work with/ focus on the for MVP in scope Applications | + | Considering that Heureka will have Agents there will be already added complexity in MVP to support all MVP apps in comparison to [1] | -| Architectural complexity | - - / - - - | Plugin Agents on the Customer Cluster as well as the dedicated Headscale Clients per Organization on our Cluster increasing complexity / Special Purpose Plugin Backends would increase furthermore | -| Operative support complexity | - - / - - - | Plugin Agents on the Customer Cluster increasing complexity , as well as the dedicated Headscale Clients per Organization on our Cluster/ Special Purpose Plugin Backends would increase furthermore | -| Plugin complexity | + + + / - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | -| Security | o | We still have a higher attack surface on Platform Cluster but the Plugin Backends are stricter isolated and the dedicated Headscale Client ensures the organization boundaries wich is reducing the blast radius, on the other side we open a channel back into our cluster from the customer cluster | - -### [4] Plugin Backends within seperate Clusters - -![](./assets/5f1b0a46-15f2-414b-99d5-8f102b67dcd8.png) - - -This option is similar to option [3] with the difference that for stronger encapsulation we run seperate clusters fro Organizations instead of utilizing Namespaces for segregation - -**Good** - -* We are in control of the Backends, and can similar to other Platform Core Components take full responsibility for those Components. -* Increased Security due to higher seperation - - -**Bad** - -* In comparison to [3] there is a lot more complexity involved on all layers due to the seperation by clusters -* IF we add "Special Purpose Plugins" in addition that run on the Customer Cluster this increases the complexity further -* Increases the error surface for the customer, we could have: - * Their cluster is down / has a problem - * Our cluster is down / has a problem - * The backend cluster is down / has a problem - * The connection between the cluster is down/has a problem - - - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Should work with/ focus on the for MVP in scope Applications | + | Considering that Heureka will have Agents there will be already added complexity in MVP to support all MVP apps in comparison to [1] | -| Architectural complexity |- - - | Plugin Agents on the Customer Cluster as well as the dedicated Headscale Clients per Organization on our Cluster increasing complexity / Special Purpose Plugin Backends would increase furthermore | -| Operative support complexity | - - - | Plugin Agents on the Customer Cluster increasing complexity , as well as the dedicated Headscale Clients per Organization on our Cluster/ Special Purpose Plugin Backends would increase furthermore | -| Plugin complexity | - - - | Low plugin complexity for general Plugins / Special purpose Plugins need to include exposure (ingress etc.) on the customer cluster | -| Security | + | We still have a higher attack surface on Platform Cluster but the Plugin Backends are stricter isolated and the dedicated Headscale Client ensures the organization boundaries wich is reducing the blast radius, on the other side we open a channel back into our cluster from the customer cluster | - -### [5] Plugin Backends and Agents running on Customer Cluster - -![](./assets/d5689168-6b71-4468-ac9e-ada131ec46c2.png) - -In this option all Plugin Backends run on the customer Clusters and the communication from the UI to the Backends is going through a "Plugin Proxy" that is facilitating the Headsclae client to talk with the Plugin Backends to keep the "Plugin configuration complexity" reasonbable and avoid that all Plugin Backends need to include a ingress configuration to be reachable from outside. - -**Good** - -* We do not have to carry costs for backends -* No difference between special purpose and "normal" plugins -* Customer clusters fully operational when Greenhouse Platform would be down -* Best security posture of otpions -* Avoids conflicting cluster-scoped resources, foremost CRD - -**Bad** - -* Plugin Proxy and Headscale are critical components to reach the Plugin Backends -* The operational complexity is among the highest of all the options - > (Arno) Complexity will increase for sure, since for the POC we limited ourselves to a single cluster. -* As customers can mess with the Plugin Backends on their cluster they could potentially make the Plugin Backends un-functional and then ask us for support for things they have messed up - > (Arno) The PluginConfig defines the desired state. Underlying resources are being watched and reconciled until they match the desired state (Helm diff + drift detection) ensuring funcitionality. -* In case of multi cluster scenarios the plugin complexity increases dramatical as you need to define a "main" cluster and need to have a way that enables Agents from non-main clusters to talk to the main cluster. - > (Arno) Ensuring connectivity between registered customer cluster is not part of the Greenhouse scope but a customer responsibility. - Example: Prometheus federation. A large set of metrics being pulled from various Prometheis to and would be persisted in the central cluster. We want that in the customer cluster. - -| Decision Driver | Rating | Reason | -| -------- | -------- | -------- | -| Should work with/ focus on the for MVP in scope Applications | + | Works with all MVP products but for MVP more complex than ][1] | -| Architectural complexity | + | relatively clean Architectural but more complex than option [1] if no special purpose plugins are involved | -| Operative support complexity | - - - | as the Plugin Backends are running on the customer cluster its hard for us to monitor them and we are required to go through the Headscale client to provide operative support | -| Plugin complexity | o | More complex then option [1] as the customer would need to configure in multi cluster scenarios where the Backend is actually living in and how agents can talk back to it. | -| Security | + + | We have a uni directional connection to the customer clusters and the most minimal attack surface on the Platform cluster | - - -## Related Decision Records - -### General comments (Arno) - -* We go for option 5 though headscale is not required if the cluster is directly accessible. -* One core goal of Greenhouse is to manage Plugins and related resources in all registered Kubernetes clusters. Thus no difference btwn. "special" and "normal" Plugins. - Rather distinguish between Plugins with/without backend. - During a transition phase (MVP) we allow plugin backends in the central cluster. - Future: Plugins with backend require a customer cluster for the workload. -* Headscale coordination server downtime - * Not critical until key expiration (1d). Registered clients continue to work. - * New client cannot be added w/o coordination server though. -* Open topics: - * Dashboard - customer cluster connectivity required? - * Currently, we assume all relevant information is made transparent via the available CRDs (PluginConfig) in the central cluster. Thus the dashboard only needs to act within the Greenhouse central cluster. - -n/A - diff --git a/architecture-decision-records/Greenhouse-ADR-005-supported_kubernetes_versions.md b/architecture-decision-records/Greenhouse-ADR-005-supported_kubernetes_versions.md deleted file mode 100644 index 8fee053..0000000 --- a/architecture-decision-records/Greenhouse-ADR-005-supported_kubernetes_versions.md +++ /dev/null @@ -1,55 +0,0 @@ -# ADR-5 Supported Kubernetes Versions - -## Decision Contributors - -- Arno Uhlig -- David Gogl -- Ivo Gosemann -- Uwe Mayer - -## Status - -- Proposed - -## Context and Problem Statement - -Greenhouse is interacting with remote Kubernetes clusters via [kubernetes/client-go](https://github.com/kubernetes/client-go) and [helm/helm](https://github.com/helm/helm). This includes creating Kubernetes resources and interacting with Helm releases. To ensure a consistent and transparent behavior it is important to define which Kubernetes versions are officially supported by Greenhouse. - -This ADR addresses the following concerns: - -1. guideline which Kubernetes versions are officially supported -2. how to communicate/ inform Organisation Admins about EOL Kubernetes versions - -## Decision Drivers - -- Stability: - - Greenhouse should be stable and reliable for any of the officially supported Kubernetes Versions. - -- Transparency: - - Consumers should be informed about the officially supported Kubernetes Versions. - - Consumers should be informed about the EOL Date of their Kubernetes Version. - -- Compatibility: - - Greenhouse should be compatible with the Kubernetes Versions officially supported by Helm and the Kubernetes client. - -## Decision - -The Kubernetes project [supports](https://kubernetes.io/releases/version-skew-policy/#supported-versions) the most recent three minor releases. The [Kubernetes Release Cycle](https://kubernetes.io/releases/release/#the-release-cycle) for a new minor version is roughly every 4 months. - -Kubernetes is backward compatible with clients. This means that client-go will work with [many different Kubernetes versions](https://github.com/kubernetes/client-go?tab=readme-ov-file#compatibility-client-go---kubernetes-clusters). - -The Helm project officially supports the most recent n-3 Kubernetes minor releases. This means that Helm 3.14.x is supporting Kubernetes 1.29.x, 1.28.x, 1.27.x and 1.26.x. The Helm project follows the release cadence of Kubernetes, with a [minor release every 4 months](https://helm.sh/docs/topics/release_policy/#minor-releases). The Helm minor version release does not follow directly with the Kubernetes version release but with a small offset. - -The official release date for Kubernetes 1.30 is 17.04.2024. The corresponding Helm minor release 3.15 has been announced for 08.05.2024. - -Greenhouse should support the latest Kubernetes Version soon after the Helm project has released the corresponding minor version release. Since Helm supports the latest n-3 Kubernetes Versions, this allows for a grace period of roughly 4 months for Organisation Admins to upgrade their Clusters to an officially supported Kubernetes version. - -With this decision, Greenhouse will follow the version skew policy of the Helm project to support the most recent n-3 Kubernetes minor releases. This both ensures that Organisations can use the latest Kubernetes version soon after it has been released. Also this gives Organisation Admins time to upgrade their Clusters to an officially supported Kubernetes version, if they are running Clusters on a Kubernetes version that has reached EOL. - -It must also be clear how Greenhouse interacts with Clusters running on a Kubernetes Version not yet supported, or on a Kubernetes Version that is no longer supported. - -Greenhouse should not reconcile Clusters that are running on a newer Kubernetes Version than currently supported by the pulled in dependencies for Kubernetes and Helm. It should be made clear in the Status of the Cluster CRD and in the Plugin CRD, that the Cluster is running an unsupported version. The Greenhouse UI should also visualise that a Cluster is running on a version that is not yet supported. Organisation Admins should also be informed about this situation. - -The other case is when a Cluster is running on a Kubernetes Version that is no longer supported by the Helm dependencies. In this case the reconciliation of this Cluster should not be stopped. The UI should however visualise that the Cluster is running on an EOL Kubernetes release. Prior to the EOL Date, Organisation Admins should be informed about the upcoming EOL Date and the implications for their Clusters. - -The documentation will show the currently supported Kubernetes releases. The documentation should also describe the reconciliation behavior for clusters running on Kubernetes releases that are not yet supported and those no longer supported. diff --git a/architecture-decision-records/README.md b/architecture-decision-records/README.md index 794352f..3f5ff9f 100644 --- a/architecture-decision-records/README.md +++ b/architecture-decision-records/README.md @@ -1,30 +1,74 @@ -# Architecture decision record (ADR) +# Architecture decision records (ADRs) This folder contains Architecture decision records (ADRs) for the Operations Platform. The purpose is to provide transparency and enable a broader team to contribute to various aspects. -## Conventions +## Illustrations -### Template +Use [mermaid](https://www.mermaidchart.com/app/dashboard) within the markdown files for technical illustrations instead +of external assets. -Use the [ADR template](0_template.md). +## Development -### Illustrations +Pre-requisites: -Use [mermaid](https://www.mermaidchart.com/app/dashboard) within the markdown files for technical illustrations instead of external assets. +- Node.js [LTS](https://nodejs.org/en/download/) -### Naming +**If not already done, install `Log4brains`** -The files should be named using this conventions `-ADR--.md`. -The `` consists of 3 digits. Use leading zeros if necessary. -The `CloudOperators` component is used for ADRs of general nature. +```bash +npm install -g log4brains +``` + +**To create a new ADR interactively, run**: + +```bash +log4brains adr new +``` + +- When prompted for `Title of the solved problem and its solution?` Provide a title in the + format ` ` + +- This will generate a new ADR file in the `architecture-decision-records` folder with a filename in the format + `-.md` + +### Rename generated ADR filename + +`log4brains` will generate the file with current date as prefix followed by the title provided in the +command `log4brains adr new` separated by `-` + +e.g. `20240101-.md` + +- Please rename the generated filename to `--.md`. +- The `` should consists of three digits. Use leading zeros if necessary. +- Component can be one of `greenhouse` | `cloudoperators`. + +> The `cloudoperators` component is used for ADRs of general nature. Example: -* `Greenhouse-ADR-001-logical_authorization_concept_for_plugins.md` -* `CloudOperators-ADR--.md` -### Contributing +- `001-greenhouse-logical-authorization-concept-for-plugins.md` +- `002-cloudoperators-.md` + +**Alternatively** you can use the `Makefile` command to create a new ADR without any manual renaming + +example: + +```bash +make init TITLE=new-adr-title +``` +This will automatically append the next index and `greenhouse` prefix for the ADR and create a new ADR file with the provided title. + +> NOTE: You can also override the prefix by passing PREFIX= + +## Contributing Each ADR should live on a dedicated branch and be proposed through a pull request (PR). Decision contributors are to be assigned as `reviewers` to the PR. -An ADR is accepted, once all reviewers approved the PR. \ No newline at end of file +An ADR is accepted, once all reviewers approved the PR. + +## More information + +- [Log4brains documentation](https://github.com/thomvaill/log4brains/tree/master#readme) +- [What is an ADR and why should you use them](https://github.com/thomvaill/log4brains/tree/master#-what-is-an-adr-and-why-should-you-use-them) +- [ADR GitHub organization](https://adr.github.io/) diff --git a/architecture-decision-records/index.md b/architecture-decision-records/index.md new file mode 100644 index 0000000..5806519 --- /dev/null +++ b/architecture-decision-records/index.md @@ -0,0 +1,20 @@ + + +# Architecture knowledge base + +Welcome 👋 to the architecture knowledge base of Greenhouse. +You will find here all the Architecture Decision Records (ADR) of the project. + +## Definition and purpose + +> An Architectural Decision (AD) is a software design choice that addresses a functional or non-functional requirement that is architecturally significant. +> An Architectural Decision Record (ADR) captures a single AD, such as often done when writing personal notes or meeting minutes; the collection of ADRs created and maintained in a project constitutes its decision log. + +An ADR is immutable: only its status can change (i.e., become deprecated or superseded). That way, you can become familiar with the whole project history just by reading its decision log in chronological order. +Moreover, maintaining this documentation aims at: + +- 🚀 Improving and speeding up the onboarding of a new team member +- 🔭 Avoiding blind acceptance/reversal of a past decision (Read [Michael Nygard's famous article on ADRs](https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions.html)) +- 🤝 Formalizing the decision process of the team + +The decision process is entirely collaborative and backed by pull requests. diff --git a/architecture-decision-records/template.md b/architecture-decision-records/template.md new file mode 100644 index 0000000..ec7eb22 --- /dev/null +++ b/architecture-decision-records/template.md @@ -0,0 +1,83 @@ +# [short title of solved problem and solution] + +- Status: [draft | proposed | rejected | accepted | deprecated | … | superseded by [xxx](yyyymmdd-xxx.md)] +- Deciders: [list everyone involved in the decision] +- Date: [YYYY-MM-DD when the decision was last updated] +- Tags: [greenhouse / cloudoperators] +- Technical Story: [description | ticket/issue URL] + +## Context and Problem Statement + +[Describe the context and problem statement, e.g., in free form using two to three sentences. You may want to articulate the problem in form of a question.] + +## Decision Drivers + +- [driver 1, e.g., a force, facing concern, …] +- [driver 2, e.g., a force, facing concern, …] +- … + +## Considered Options + +- [option 1] +- [option 2] +- [option 3] +- … + +## Decision Outcome + +Chosen option: "[option 1]", +because [justification. e.g., only option, which meets k.o. criterion decision driver | which resolves force force | … | comes out best (see below)]. + +### Positive Consequences + +- [e.g., improvement of quality attribute satisfaction, follow-up decisions required, …] +- … + +### Negative Consequences + +- [e.g., compromising quality attribute, follow-up decisions required, …] +- … + +## Pros and Cons of the Options | Evaluation of options + +### [option 1] + +[example | description | pointer to more information | …] + +| Decision Driver | Rating | Reason | +|---------------------|--------|-------------------------------| +| [decision driver a] | +++ | Good, because [argument a] | | +| [decision driver b] | --- | Good, because [argument b] | +| [decision driver c] | -- | Bad, because [argument c] | +| [decision driver d] | o | Neutral, because [argument d] | + +### [option 2] + +[example | description | pointer to more information | …] + +| Decision Driver | Rating | Reason | +|---------------------|--------|-------------------------------| +| [decision driver a] | +++ | Good, because [argument a] | | +| [decision driver b] | --- | Good, because [argument b] | +| [decision driver c] | -- | Bad, because [argument c] | +| [decision driver d] | o | Neutral, because [argument d] | + +### [option 3] + +[example | description | pointer to more information | …] + +| Decision Driver | Rating | Reason | +|---------------------|--------|-------------------------------| +| [decision driver a] | +++ | Good, because [argument a] | | +| [decision driver b] | --- | Good, because [argument b] | +| [decision driver c] | -- | Bad, because [argument c] | +| [decision driver d] | o | Neutral, because [argument d] | + +## Related Decision Records + +[previous decision record, e.g., an ADR, which is solved by this one | next decision record, e.g., an ADR, which solves this one | … | pointer to more information] + +## Links + +- [Link type](link to adr) +- …