From a0b12a1b6e18c3e616d0df2e5884c039b532a625 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 7 May 2025 12:30:30 +0100 Subject: [PATCH 01/24] PUDN, static ips: kick-off enhancement Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 498 ++++++++++++++++++ 1 file changed, 498 insertions(+) create mode 100644 enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md new file mode 100644 index 0000000000..638f0109c9 --- /dev/null +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -0,0 +1,498 @@ +--- +title: neat-enhancement-idea +authors: + - TBD +reviewers: # Include a comment about what domain expertise a reviewer is expected to bring and what area of the enhancement you expect them to focus on. For example: - "@networkguru, for networking aspects, please look at IP bootstrapping aspect" + - TBD +approvers: # A single approver is preferred, the role of the approver is to raise important questions, help ensure the enhancement receives reviews from all applicable areas/SMEs, and determine when consensus is achieved such that the EP can move forward to implementation. Having multiple approvers makes it difficult to determine who is responsible for the actual approval. + - TBD +api-approvers: # In case of new or modified APIs or API extensions (CRDs, aggregated apiservers, webhooks, finalizers). If there is no API change, use "None" + - TBD +creation-date: yyyy-mm-dd +last-updated: yyyy-mm-dd +tracking-link: # link to the tracking ticket (for example: Jira Feature or Epic ticket) that corresponds to this enhancement + - TBD +see-also: + - "/enhancements/this-other-neat-thing.md" +replaces: + - "/enhancements/that-less-than-great-idea.md" +superseded-by: + - "/enhancements/our-past-effort.md" +--- + +To get started with this template: +1. **Pick a domain.** Find the appropriate domain to discuss your enhancement. +1. **Make a copy of this template.** Copy this template into the directory for + the domain. +1. **Fill out the metadata at the top.** The embedded YAML document is + checked by the linter. +1. **Fill out the "overview" sections.** This includes the Summary and + Motivation sections. These should be easy and explain why the community + should desire this enhancement. +1. **Create a PR.** Assign it to folks with expertise in that domain to help + sponsor the process. +1. **Merge after reaching consensus.** Merge when there is consensus + that the design is complete and all reviewer questions have been + answered so that work can begin. Come back and update the document + if important details (API field names, workflow, etc.) change + during code review. +1. **Keep all required headers.** If a section does not apply to an + enhancement, explain why but do not remove the section. This part + of the process is enforced by the linter CI job. + +See ../README.md for background behind these instructions. + +Start by filling out the header with the metadata for this enhancement. + +# Neat Enhancement Idea + +This is the title of the enhancement. Keep it simple and descriptive. A good +title can help communicate what the enhancement is and should be considered as +part of any review. + +The YAML `title` should be lowercased and spaces/punctuation should be +replaced with `-`. + +The `Metadata` section above is intended to support the creation of tooling +around the enhancement process. + +## Summary + +The `Summary` section is important for producing high quality +user-focused documentation such as release notes or a development roadmap. It +should be possible to collect this information before implementation begins in +order to avoid requiring implementors to split their attention between writing +release notes and implementing the feature itself. + +Your summary should be one paragraph long. More detail +should go into the following sections. + +## Motivation + +This section is for explicitly listing the motivation, goals and non-goals of +this proposal. Describe why the change is important and the benefits to users. + +### User Stories + +Detail the things that people will be able to do if this is implemented and +what goal that allows them to achieve. In each story, explain who the actor +is based on their role, explain what they want to do with the system, +and explain the underlying goal they have, what it is they are going to +achieve with this new feature. + +Use the standard three part formula: + +> "As a _role_, I want to _take some action_ so that I can _accomplish a +goal_." + +Make the change feel real for users, without getting bogged down in +implementation details. + +Here are some example user stories to show what they might look like: + +* As an OpenShift engineer, I want to write an enhancement, so that I + can get feedback on my design and build consensus about the approach + to take before starting the implementation. +* As an OpenShift engineer, I want to understand the rationale behind + a particular feature's design and alternatives considered, so I can + work on a new enhancement in that problem space knowing the history + of the current design better. +* As a product manager, I want to review this enhancement proposal, so + that I can make sure the customer requirements are met by the + design. +* As an administrator, I want a one-click OpenShift installer, so that + I can easily set up a new cluster without having to follow a long + set of operations. + +In each example, the persona's goal is clear, and the goal is clearly provided +by the capability being described. +The engineer wants feedback on their enhancement from their peers, and writing +an enhancement allows for that feedback. +The product manager wants to make sure that their customer requirements are fulfilled, +reviewing the enhancement allows them to check that. +The administrator wants to set up his OpenShift cluster as easily as possible, and +reducing the install to a single click simplifies that process. + +Here are some real examples from previous enhancements: +* [As a member of OpenShift concerned with the release process (TRT, dev, staff engineer, maybe even PM), +I want to opt in to pre-release features so that I can run periodic testing in CI and obtain a signal of +feature quality.](https://github.com/openshift/enhancements/blob/master/enhancements/installer/feature-sets.md#user-stories) +* [As a cloud-provider affiliated engineer / platform integrator / RH partner +I want to have a mechanism to signal OpenShift's built-in operators about additional +cloud-provider specific components so that I can inject my own platform-specific controllers into OpenShift +to improve the integration between OpenShift and my cloud provider.](https://github.com/openshift/enhancements/blob/master/enhancements/cloud-integration/infrastructure-external-platform-type.md#user-stories) +* [As an OpenShift cluster administrator, I want to add worker nodes to my +existing single control-plane node cluster, so that it'll be able to meet +growing computation demands.](https://github.com/openshift/enhancements/blob/master/enhancements/single-node/single-node-openshift-with-workers.md#user-stories) + +Include a story on how this proposal will be operationalized: +life-cycled, monitored and remediated at scale. + +### Goals + +Summarize the specific goals of the proposal. How will we know that +this has succeeded? A good goal describes something a user wants from +their perspective, and does not include the implementation details +from the proposal. + +### Non-Goals + +What is out of scope for this proposal? Listing non-goals helps to +focus discussion and make progress. Highlight anything that is being +deferred to a later phase of implementation that may call for its own +enhancement. + +## Proposal + +This section should explain what the proposal actually is. Enumerate +*all* of the proposed changes at a *high level*, including all of the +components that need to be modified and how they will be +different. Include the reason for each choice in the design and +implementation that is proposed here. + +To keep this section succinct, document the details like API field +changes, new images, and other implementation details in the +**Implementation Details** section and record the reasons for not +choosing alternatives in the **Alternatives** section at the end of +the document. + +### Workflow Description + +Explain how the user will use the feature. Be detailed and explicit. +Describe all of the actors, their roles, and the APIs or interfaces +involved. Define a starting state and then list the steps that the +user would need to go through to trigger the feature described in the +enhancement. Optionally add a +[mermaid](https://github.com/mermaid-js/mermaid#readme) sequence +diagram. + +Use sub-sections to explain variations, such as for error handling, +failure recovery, or alternative outcomes. + +For example: + +**cluster creator** is a human user responsible for deploying a +cluster. + +**application administrator** is a human user responsible for +deploying an application in a cluster. + +1. The cluster creator sits down at their keyboard... +2. ... +3. The cluster creator sees that their cluster is ready to receive + applications, and gives the application administrator their + credentials. + +See +https://github.com/openshift/enhancements/blob/master/enhancements/workload-partitioning/management-workload-partitioning.md#high-level-end-to-end-workflow +and https://github.com/openshift/enhancements/blob/master/enhancements/agent-installer/automated-workflow-for-agent-based-installer.md for more detailed examples. + +### API Extensions + +API Extensions are CRDs, admission and conversion webhooks, aggregated API servers, +and finalizers, i.e. those mechanisms that change the OCP API surface and behaviour. + +- Name the API extensions this enhancement adds or modifies. +- Does this enhancement modify the behaviour of existing resources, especially those owned + by other parties than the authoring team (including upstream resources), and, if yes, how? + Please add those other parties as reviewers to the enhancement. + + Examples: + - Adds a finalizer to namespaces. Namespace cannot be deleted without our controller running. + - Restricts the label format for objects to X. + - Defaults field Y on object kind Z. + +Fill in the operational impact of these API Extensions in the "Operational Aspects +of API Extensions" section. + +### Topology Considerations + +#### Hypershift / Hosted Control Planes + +Are there any unique considerations for making this change work with +Hypershift? + +See https://github.com/openshift/enhancements/blob/e044f84e9b2bafa600e6c24e35d226463c2308a5/enhancements/multi-arch/heterogeneous-architecture-clusters.md?plain=1#L282 + +How does it affect any of the components running in the +management cluster? How does it affect any components running split +between the management cluster and guest cluster? + +#### Standalone Clusters + +Is the change relevant for standalone clusters? + +#### Single-node Deployments or MicroShift + +How does this proposal affect the resource consumption of a +single-node OpenShift deployment (SNO), CPU and memory? + +How does this proposal affect MicroShift? For example, if the proposal +adds configuration options through API resources, should any of those +behaviors also be exposed to MicroShift admins through the +configuration file for MicroShift? + +### Implementation Details/Notes/Constraints + +What are some important details that didn't come across above in the +**Proposal**? Go in to as much detail as necessary here. This might be +a good place to talk about core concepts and how they relate. While it is useful +to go into the details of the code changes required, it is not necessary to show +how the code will be rewritten in the enhancement. + +### Risks and Mitigations + +What are the risks of this proposal and how do we mitigate. Think broadly. For +example, consider both security and how this will impact the larger OKD +ecosystem. + +How will security be reviewed and by whom? + +How will UX be reviewed and by whom? + +Consider including folks that also work outside your immediate sub-project. + +### Drawbacks + +The idea is to find the best form of an argument why this enhancement should +_not_ be implemented. + +What trade-offs (technical/efficiency cost, user experience, flexibility, +supportability, etc) must be made in order to implement this? What are the reasons +we might not want to undertake this proposal, and how do we overcome them? + +Does this proposal implement a behavior that's new/unique/novel? Is it poorly +aligned with existing user expectations? Will it be a significant maintenance +burden? Is it likely to be superceded by something else in the near future? + +## Alternatives (Not Implemented) + +Similar to the `Drawbacks` section the `Alternatives` section is used +to highlight and record other possible approaches to delivering the +value proposed by an enhancement, including especially information +about why the alternative was not selected. + +## Open Questions [optional] + +This is where to call out areas of the design that require closure before deciding +to implement the design. For instance, + > 1. This requires exposing previously private resources which contain sensitive + information. Can we do this? + +## Test Plan + +**Note:** *Section not required until targeted at a release.* + +Consider the following in developing a test plan for this enhancement: +- Will there be e2e and integration tests, in addition to unit tests? +- How will it be tested in isolation vs with other components? +- What additional testing is necessary to support managed OpenShift service-based offerings? + +No need to outline all of the test cases, just the general strategy. Anything +that would count as tricky in the implementation and anything particularly +challenging to test should be called out. + +All code is expected to have adequate tests (eventually with coverage +expectations). + +## Graduation Criteria + +**Note:** *Section not required until targeted at a release.* + +Define graduation milestones. + +These may be defined in terms of API maturity, or as something else. Initial proposal +should keep this high-level with a focus on what signals will be looked at to +determine graduation. + +Consider the following in developing the graduation criteria for this +enhancement: + +- Maturity levels + - [`alpha`, `beta`, `stable` in upstream Kubernetes][maturity-levels] + - `Dev Preview`, `Tech Preview`, `GA` in OpenShift +- [Deprecation policy][deprecation-policy] + +Clearly define what graduation means by either linking to the [API doc definition](https://kubernetes.io/docs/concepts/overview/kubernetes-api/#api-versioning), +or by redefining what graduation means. + +In general, we try to use the same stages (alpha, beta, GA), regardless how the functionality is accessed. + +[maturity-levels]: https://git.k8s.io/community/contributors/devel/sig-architecture/api_changes.md#alpha-beta-and-stable-versions +[deprecation-policy]: https://kubernetes.io/docs/reference/using-api/deprecation-policy/ + +**If this is a user facing change requiring new or updated documentation in [openshift-docs](https://github.com/openshift/openshift-docs/), +please be sure to include in the graduation criteria.** + +**Examples**: These are generalized examples to consider, in addition +to the aforementioned [maturity levels][maturity-levels]. + +### Dev Preview -> Tech Preview + +- Ability to utilize the enhancement end to end +- End user documentation, relative API stability +- Sufficient test coverage +- Gather feedback from users rather than just developers +- Enumerate service level indicators (SLIs), expose SLIs as metrics +- Write symptoms-based alerts for the component(s) + +### Tech Preview -> GA + +- More testing (upgrade, downgrade, scale) +- Sufficient time for feedback +- Available by default +- Backhaul SLI telemetry +- Document SLOs for the component +- Conduct load testing +- User facing documentation created in [openshift-docs](https://github.com/openshift/openshift-docs/) + +**For non-optional features moving to GA, the graduation criteria must include +end to end tests.** + +### Removing a deprecated feature + +- Announce deprecation and support policy of the existing feature +- Deprecate the feature + +## Upgrade / Downgrade Strategy + +If applicable, how will the component be upgraded and downgraded? Make sure this +is in the test plan. + +Consider the following in developing an upgrade/downgrade strategy for this +enhancement: +- What changes (in invocations, configurations, API use, etc.) is an existing + cluster required to make on upgrade in order to keep previous behavior? +- What changes (in invocations, configurations, API use, etc.) is an existing + cluster required to make on upgrade in order to make use of the enhancement? + +Upgrade expectations: +- Each component should remain available for user requests and + workloads during upgrades. Ensure the components leverage best practices in handling [voluntary + disruption](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/). Any exception to + this should be identified and discussed here. +- Micro version upgrades - users should be able to skip forward versions within a + minor release stream without being required to pass through intermediate + versions - i.e. `x.y.N->x.y.N+2` should work without requiring `x.y.N->x.y.N+1` + as an intermediate step. +- Minor version upgrades - you only need to support `x.N->x.N+1` upgrade + steps. So, for example, it is acceptable to require a user running 4.3 to + upgrade to 4.5 with a `4.3->4.4` step followed by a `4.4->4.5` step. +- While an upgrade is in progress, new component versions should + continue to operate correctly in concert with older component + versions (aka "version skew"). For example, if a node is down, and + an operator is rolling out a daemonset, the old and new daemonset + pods must continue to work correctly even while the cluster remains + in this partially upgraded state for some time. + +Downgrade expectations: +- If an `N->N+1` upgrade fails mid-way through, or if the `N+1` cluster is + misbehaving, it should be possible for the user to rollback to `N`. It is + acceptable to require some documented manual steps in order to fully restore + the downgraded cluster to its previous state. Examples of acceptable steps + include: + - Deleting any CVO-managed resources added by the new version. The + CVO does not currently delete resources that no longer exist in + the target version. + +## Version Skew Strategy + +How will the component handle version skew with other components? +What are the guarantees? Make sure this is in the test plan. + +Consider the following in developing a version skew strategy for this +enhancement: +- During an upgrade, we will always have skew among components, how will this impact your work? +- Does this enhancement involve coordinating behavior in the control plane and + in the kubelet? How does an n-2 kubelet without this feature available behave + when this feature is used? +- Will any other components on the node change? For example, changes to CSI, CRI + or CNI may require updating that component before the kubelet. + +## Operational Aspects of API Extensions + +Describe the impact of API extensions (mentioned in the proposal section, i.e. CRDs, +admission and conversion webhooks, aggregated API servers, finalizers) here in detail, +especially how they impact the OCP system architecture and operational aspects. + +- For conversion/admission webhooks and aggregated apiservers: what are the SLIs (Service Level + Indicators) an administrator or support can use to determine the health of the API extensions + + Examples (metrics, alerts, operator conditions) + - authentication-operator condition `APIServerDegraded=False` + - authentication-operator condition `APIServerAvailable=True` + - openshift-authentication/oauth-apiserver deployment and pods health + +- What impact do these API extensions have on existing SLIs (e.g. scalability, API throughput, + API availability) + + Examples: + - Adds 1s to every pod update in the system, slowing down pod scheduling by 5s on average. + - Fails creation of ConfigMap in the system when the webhook is not available. + - Adds a dependency on the SDN service network for all resources, risking API availability in case + of SDN issues. + - Expected use-cases require less than 1000 instances of the CRD, not impacting + general API throughput. + +- How is the impact on existing SLIs to be measured and when (e.g. every release by QE, or + automatically in CI) and by whom (e.g. perf team; name the responsible person and let them review + this enhancement) + +- Describe the possible failure modes of the API extensions. +- Describe how a failure or behaviour of the extension will impact the overall cluster health + (e.g. which kube-controller-manager functionality will stop working), especially regarding + stability, availability, performance and security. +- Describe which OCP teams are likely to be called upon in case of escalation with one of the failure modes + and add them as reviewers to this enhancement. + +## Support Procedures + +Describe how to +- detect the failure modes in a support situation, describe possible symptoms (events, metrics, + alerts, which log output in which component) + + Examples: + - If the webhook is not running, kube-apiserver logs will show errors like "failed to call admission webhook xyz". + - Operator X will degrade with message "Failed to launch webhook server" and reason "WehhookServerFailed". + - The metric `webhook_admission_duration_seconds("openpolicyagent-admission", "mutating", "put", "false")` + will show >1s latency and alert `WebhookAdmissionLatencyHigh` will fire. + +- disable the API extension (e.g. remove MutatingWebhookConfiguration `xyz`, remove APIService `foo`) + + - What consequences does it have on the cluster health? + + Examples: + - Garbage collection in kube-controller-manager will stop working. + - Quota will be wrongly computed. + - Disabling/removing the CRD is not possible without removing the CR instances. Customer will lose data. + Disabling the conversion webhook will break garbage collection. + + - What consequences does it have on existing, running workloads? + + Examples: + - New namespaces won't get the finalizer "xyz" and hence might leak resource X + when deleted. + - SDN pod-to-pod routing will stop updating, potentially breaking pod-to-pod + communication after some minutes. + + - What consequences does it have for newly created workloads? + + Examples: + - New pods in namespace with Istio support will not get sidecars injected, breaking + their networking. + +- Does functionality fail gracefully and will work resume when re-enabled without risking + consistency? + + Examples: + - The mutating admission webhook "xyz" has FailPolicy=Ignore and hence + will not block the creation or updates on objects when it fails. When the + webhook comes back online, there is a controller reconciling all objects, applying + labels that were not applied during admission webhook downtime. + - Namespaces deletion will not delete all objects in etcd, leading to zombie + objects when another namespace with the same name is created. + +## Infrastructure Needed [optional] + +Use this section if you need things from the project. Examples include a new +subproject, repos requested, github details, and/or testing infrastructure. From c31d140cbd7926c75ac1d206559b664f4c25acae Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 8 May 2025 07:53:37 +0100 Subject: [PATCH 02/24] PUDN, static ips: provide the introduction chapters This commits fills out the following sections: - summary - motivation - user stories - goals - non-goals Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 133 +++++++----------- 1 file changed, 50 insertions(+), 83 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 638f0109c9..cf7cdb563b 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -1,19 +1,20 @@ --- -title: neat-enhancement-idea +title: routed-ingress-support-for-primary-udn-attached-vms-with-static-ips authors: - - TBD + - @maiqueb reviewers: # Include a comment about what domain expertise a reviewer is expected to bring and what area of the enhancement you expect them to focus on. For example: - "@networkguru, for networking aspects, please look at IP bootstrapping aspect" - TBD approvers: # A single approver is preferred, the role of the approver is to raise important questions, help ensure the enhancement receives reviews from all applicable areas/SMEs, and determine when consensus is achieved such that the EP can move forward to implementation. Having multiple approvers makes it difficult to determine who is responsible for the actual approval. - TBD api-approvers: # In case of new or modified APIs or API extensions (CRDs, aggregated apiservers, webhooks, finalizers). If there is no API change, use "None" - TBD -creation-date: yyyy-mm-dd +creation-date: 2025-05-07 last-updated: yyyy-mm-dd tracking-link: # link to the tracking ticket (for example: Jira Feature or Epic ticket) that corresponds to this enhancement - TBD see-also: - - "/enhancements/this-other-neat-thing.md" + - "/enhancements/network/user-defined-network-segmentation.md" + - "/enhancements/network/bgp-ovn-kubernetes.md" replaces: - "/enhancements/that-less-than-great-idea.md" superseded-by: @@ -44,103 +45,69 @@ See ../README.md for background behind these instructions. Start by filling out the header with the metadata for this enhancement. -# Neat Enhancement Idea +# Routed ingress support for primary UDN attached VMs with static IPs -This is the title of the enhancement. Keep it simple and descriptive. A good -title can help communicate what the enhancement is and should be considered as -part of any review. +## Summary -The YAML `title` should be lowercased and spaces/punctuation should be -replaced with `-`. +There's an ongoing interest to migrate Virtual Machines (VMs) from other +virtualization platforms into OpenShift. These users are both after a managed IP +experience (which they have on their existing platform), and also want to +consume existing Kubernetes features, like network policies, and sometimes +services. -The `Metadata` section above is intended to support the creation of tooling -around the enhancement process. +Traditional virtualization users have some expectations on what a +virtualization platform should provide. Live-migration, and IP address +persistence across reboots are paramount features for any virtualization +solution, and OpenShift Virtualization currently supports these features on its +primary UserDefinedNetworks (UDNs). -## Summary +These users have additional requirements, like routed ingress into their VMs, +and, to import the VMs with the MAC, IPs, and gateway configuration the VM had +on the previous platform. -The `Summary` section is important for producing high quality -user-focused documentation such as release notes or a development roadmap. It -should be possible to collect this information before implementation begins in -order to avoid requiring implementors to split their attention between writing -release notes and implementing the feature itself. +Routed ingress into the VMs is being added to OpenShift already, and is tracked +by [this enhancement](https://github.com/openshift/enhancements/pull/1636/files) +which tracks adding BGP support to OpenShift. -Your summary should be one paragraph long. More detail -should go into the following sections. +Given the aggressive time-frame we're attempting, adding support for importing +a VM into OpenShift with it's MAC, IPs, and gateway on top of primary UDN is the +missing piece we should pursue. ## Motivation -This section is for explicitly listing the motivation, goals and non-goals of -this proposal. Describe why the change is important and the benefits to users. +Some users are running VMs in virtualization platforms having a managed IP +configuration. + +For this kind of users, we need to have a way to enable their existing VMs to +run properly after being migrated into OpenShift, without any guest +configuration changes. For that, we need to import the VMs from their existing +platforms, preserving their existing MACs, IPs, and gateway configuration. ### User Stories -Detail the things that people will be able to do if this is implemented and -what goal that allows them to achieve. In each story, explain who the actor -is based on their role, explain what they want to do with the system, -and explain the underlying goal they have, what it is they are going to -achieve with this new feature. - -Use the standard three part formula: - -> "As a _role_, I want to _take some action_ so that I can _accomplish a -goal_." - -Make the change feel real for users, without getting bogged down in -implementation details. - -Here are some example user stories to show what they might look like: - -* As an OpenShift engineer, I want to write an enhancement, so that I - can get feedback on my design and build consensus about the approach - to take before starting the implementation. -* As an OpenShift engineer, I want to understand the rationale behind - a particular feature's design and alternatives considered, so I can - work on a new enhancement in that problem space knowing the history - of the current design better. -* As a product manager, I want to review this enhancement proposal, so - that I can make sure the customer requirements are met by the - design. -* As an administrator, I want a one-click OpenShift installer, so that - I can easily set up a new cluster without having to follow a long - set of operations. - -In each example, the persona's goal is clear, and the goal is clearly provided -by the capability being described. -The engineer wants feedback on their enhancement from their peers, and writing -an enhancement allows for that feedback. -The product manager wants to make sure that their customer requirements are fulfilled, -reviewing the enhancement allows them to check that. -The administrator wants to set up his OpenShift cluster as easily as possible, and -reducing the install to a single click simplifies that process. - -Here are some real examples from previous enhancements: -* [As a member of OpenShift concerned with the release process (TRT, dev, staff engineer, maybe even PM), -I want to opt in to pre-release features so that I can run periodic testing in CI and obtain a signal of -feature quality.](https://github.com/openshift/enhancements/blob/master/enhancements/installer/feature-sets.md#user-stories) -* [As a cloud-provider affiliated engineer / platform integrator / RH partner -I want to have a mechanism to signal OpenShift's built-in operators about additional -cloud-provider specific components so that I can inject my own platform-specific controllers into OpenShift -to improve the integration between OpenShift and my cloud provider.](https://github.com/openshift/enhancements/blob/master/enhancements/cloud-integration/infrastructure-external-platform-type.md#user-stories) -* [As an OpenShift cluster administrator, I want to add worker nodes to my -existing single control-plane node cluster, so that it'll be able to meet -growing computation demands.](https://github.com/openshift/enhancements/blob/master/enhancements/single-node/single-node-openshift-with-workers.md#user-stories) - -Include a story on how this proposal will be operationalized: -life-cycled, monitored and remediated at scale. +- As the owner of a VM running in a traditional virtualization platform, I want +to import said VM - whose IPs were statically configured - into Kubernetes, +attaching it to an overlay network. I want to ingress/egress using the same IP +address (the one assigned to the VM). +- As the owner of a VM running in a traditional virtualization platform, I want +to import said VM into Kubernetes, attaching it to an overlay network. I want to +be able to consume Kubernetes features like network policies, and services, to +benefit from the Kubernetes experience. ### Goals -Summarize the specific goals of the proposal. How will we know that -this has succeeded? A good goal describes something a user wants from -their perspective, and does not include the implementation details -from the proposal. +- Preserve the original VM's MAC address +- Preserve the original VM's IP address +- Specify the gateway IP address of the imported VM so it can keep the same +default route +- Allow excludeSubnets to be used with L2 UDNs to ensure OVNK does not use an IP +address from the range that VMs have already been assigned outside the +cluster (or for secondary IP addresses assigned to the VM's interfaces) ### Non-Goals -What is out of scope for this proposal? Listing non-goals helps to -focus discussion and make progress. Highlight anything that is being -deferred to a later phase of implementation that may call for its own -enhancement. +Handle importing VMs without a managed IP experience - i.e. IPs were defined +statically in the guest. ## Proposal From 20b76f72040e416ec0c179921ab381914a885345 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 8 May 2025 11:19:58 +0100 Subject: [PATCH 03/24] PUDN, static ips: add the proposal section Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 107 ++++++++++++++++-- 1 file changed, 96 insertions(+), 11 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index cf7cdb563b..cf956b202e 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -111,17 +111,102 @@ statically in the guest. ## Proposal -This section should explain what the proposal actually is. Enumerate -*all* of the proposed changes at a *high level*, including all of the -components that need to be modified and how they will be -different. Include the reason for each choice in the design and -implementation that is proposed here. - -To keep this section succinct, document the details like API field -changes, new images, and other implementation details in the -**Implementation Details** section and record the reasons for not -choosing alternatives in the **Alternatives** section at the end of -the document. +This proposal to import VMs from other virtualization platforms while +preserving their networking configuration will impact three different OpenShift +components: +- MTV +- KubeVirt's ipam-extensions +- OVN-Kubernetes + +We will elaborate the overall flow first, before digging into details in each +of the components. + +MTV currently introspects the VM to learn the MAC addresses of the interfaces, +and templates the VM which will be created in OpenShift Virtualization with the +required MAC address. We would need MTV to somehow also figure out what IP +addresses are on the aforementioned interfaces. + +**NOTE:** a separate component can introspect the VMs in their source cluster +and identify their IPs. It really does **not** need to be MTV to do it. + +For doing this, we need to have a MAC to IP addresses association stored in the +cluster (*at least* for the VMs which will be migrated into OpenShift Virt). +There are two alternatives to persist this information: a +[centralized approach](#centralized-ip-management), +which has the benefits of a single centralized place an admin type of user +could provision / get information from, or a +[de-centralized approach](#de-centralized-ip-management), similar +to the existing `IPAMClaim` CRD, where each VM connection to a primary UDN +information would be recorded. + +Independently of which option we take, the API between OpenShift Virt (CNV) and +OVN-Kubernetes will be the same, and is roughly described in +[CNV to OVN-Kubernetes API](#cnv-ovnk-api). + +A new CRD - named `IPPool`, or `DHCPLeaseConfig` (or the like) - will be +created, and is associated to a UDN (both UDN, and C-UDN). This CRD holds the +association of MAC address to IPs for a UDN. When importing the VM into +OpenShift Virt, MTV (or a separate, dedicated component) will provision / +update this object with the required information (MAC to IPs association). +This object is providing to the admin user a single place to check the IP +address MAC to IPs mapping. On an first implementation phase, we can have the +admin provision these CRs manually. Later on, MTV (or any other cluster +introspection tool) can provision these on behalf of the admin. + +The `ipam-extensions` mutating webhook will kick in whenever a virt launcher +pod is created - it will identify when the VM has a primary UDN attachment +(already happens today), and will also identify when the pod network attachment +has a MAC address configuration request. +It will then access the `IPPool` (or `DHCPLeaseConfig` for the UDN) to extract +which IP addresses are assigned to said MAC address. +Finally, the `ipam-extensions` mutating webhook will mutate the launcher pod to +customize the primary UDN attachment using the multus default network +annotation. This annotation (with an associated example) would look like: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + annotations: + v1.multus-cni.io/default-network: '{ + "name": "isolated-net", + "namespace": "myisolatedns", + "mac": "02:03:04:05:06:07", + "ips": [ + "192.0.2.20/24", + "fd90:1234::14/64" + ] +}' +``` + +OVN-Kubernetes will then act upon this information, by configuring the +requested MAC and IPs in the pod. If the allocation of the IP is successful, +said IPs will be persisted in the corresponding `IPAMClaim` CR (which already +happens today). If it fails (e.g. that IP address is already in use in the +subnet), the CNI will fail, crash-looping the pod. The error condition will be +reported in the associated `IPAMClaim` CR, and an event logged in the pod. + +This flow is described in the following sequence diagram: +```mermaid +sequenceDiagram +actor Admin +actor VM Owner + +participant MTV +participant CNV +participant o as OVN-Kubernetes + +Admin ->> CNV: provision IPPool +CNV -->> Admin: OK + +VM Owner ->> MTV: import VM +MTV ->> CNV: create VM(name=<...>, primaryUDNMac=origMAC) +CNV ->> CNV: ips = getIPsForMAC(mac=origMAC) +CNV ->> o: create pod(mac=origMAC, IPs=ips) +o -->> CNV: OK +CNV -->> MTV: OK +MTV -->> VM Owner: OK +``` ### Workflow Description From 9e345e29f45ddbb4accea79aa7dba7b11682c8fa Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 9 May 2025 12:03:37 +0100 Subject: [PATCH 04/24] PUDN, static ips: preserve gateway on VM import This commit describes how an imported VM's gateway will be preserved, which is required for the network configuration on the guests to be identical on both the source/destination clusters. Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index cf956b202e..2ca2931eec 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -119,7 +119,14 @@ components: - OVN-Kubernetes We will elaborate the overall flow first, before digging into details in each -of the components. +of the components. There will be two sub-sections: one about preserving the +[IP addresses](#preserving-the-original-vm-ip-address), +another about preserving the +[gateway configuration](#preserving-the-vm-gateway). +MTV already introspects the VM on the source cluster, and templates the VM with +its original MAC addresses. + +### Preserving the original VM IP address MTV currently introspects the VM to learn the MAC addresses of the interfaces, and templates the VM which will be created in OpenShift Virtualization with the @@ -208,6 +215,24 @@ CNV -->> MTV: OK MTV -->> VM Owner: OK ``` +### Preserving the VM gateway +Preserving the gateway will require changes to the OVN-Kubernetes API. Both the +UDN and C-UDN CRDs should be updated - adding a gateway definition - and the +OVN-Kubernetes +[NetConf](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/2643dabe165bcb2d4564866ee1476a891c316fe3/go-controller/pkg/cni/types/types.go#L10) +CNI structure should also be updated with a `gateway` attribute. + +The primary UDN gateway code will need to be updated to parametrize the gateway +IP address, since today it is using the UDN subnet first IP address. +To keep the feature backwards compatible, that would be the default if the UDN +does **not** feature the gateway configuration. This will ensure the +DHCP/RA flows on the logical switch are advertising the correct gateway +information to the guests on the network, and also that the gateway is properly +configured to allow features like Kubernetes `Services`. + +This flow is described in more detail (and presents alternatives to it) in the +[OVN-Kubernetes enhancement proposal](https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5238). + ### Workflow Description Explain how the user will use the feature. Be detailed and explicit. From 2d82e96cfd72056f132be5e6eb427e7a59591a66 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 9 May 2025 14:22:48 +0100 Subject: [PATCH 05/24] PUDN, static ips: add ipam-claim-ref to the multus default net annotation Signed-off-by: Miguel Duarte Barroso --- ...gress-support-for-primary-udn-attached-vms-with-static-ips.md | 1 + 1 file changed, 1 insertion(+) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 2ca2931eec..158c04f7b9 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -179,6 +179,7 @@ metadata: "name": "isolated-net", "namespace": "myisolatedns", "mac": "02:03:04:05:06:07", + "ipam-claim-reference": "myvm.isolated-net", "ips": [ "192.0.2.20/24", "fd90:1234::14/64" From 9a1a7114daaaca42e135eea497b9a1c0e1b7142c Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 9 May 2025 16:59:29 +0100 Subject: [PATCH 06/24] PUDN, static ips: alternatives to the MAC <=> IPs association Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 158c04f7b9..ea7bd83831 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -150,6 +150,8 @@ Independently of which option we take, the API between OpenShift Virt (CNV) and OVN-Kubernetes will be the same, and is roughly described in [CNV to OVN-Kubernetes API](#cnv-ovnk-api). +#### Centralized IP management + A new CRD - named `IPPool`, or `DHCPLeaseConfig` (or the like) - will be created, and is associated to a UDN (both UDN, and C-UDN). This CRD holds the association of MAC address to IPs for a UDN. When importing the VM into @@ -160,6 +162,32 @@ address MAC to IPs mapping. On an first implementation phase, we can have the admin provision these CRs manually. Later on, MTV (or any other cluster introspection tool) can provision these on behalf of the admin. +This approach has the following advantages: +- single place the admin to manage for UDN +- similar to what is done on VmWare +- simple association between the `IPPool` and the logical network + +This approach requires the `IPAMClaim` CRD to be updated, specifically its +status sub-resource - we need to introduce `Conditions` so we can report errors +when allocating IPs which were requested by the user - what if the address is +already in use within the UDN ? + +#### De-centralized IP management + +This approach requires having N CRs with a 1:1 association between a primary +UDN attachment and the MAC and IPs it had on the original platform. + +We could either introduce a new CRD with IPs and MAC association (which would +deprecate the IPAMClaim CRD), or, assuming clunkiness in all its glory, we +could change the IPAMClaim CRD to have MAC and IP addresses being requested in +the spec stanza as well. + +OVN-Kubernetes would read the CR, attempt to reserve the requested MAC and IP, +and then persist that information in the IPAMClaim status - reporting a +successful sync in the `IPAMClaim` status - or a failure otherwise. + +#### CNV OVNK API + The `ipam-extensions` mutating webhook will kick in whenever a virt launcher pod is created - it will identify when the VM has a primary UDN attachment (already happens today), and will also identify when the pod network attachment @@ -216,6 +244,51 @@ CNV -->> MTV: OK MTV -->> VM Owner: OK ``` +Hence, the required changes would be: +- ipam-extensions (CNV component) and OVN-Kubernetes API will need to be + changed, to work with the multus default network annotation. Depending on the + VM's specification, it will request a specific MAC and IP addresses for the + primary UDN attachment. The `ipam-claim-reference` will also be requested via + this annoation. +- the `IPAMClaim` CRD will need to be updated, adding a `Conditions` array to + its status. This way we will be able to report errors back to the user (e.g. + the desired IP allocation is already in use) +- new CRD to be added, where the MAC <-> IPs addresses association will be + persisted. Only required for the + [centralized IP management](#centralized-ip-management) option +- ipam-extensions (CNV component) will now also read the `IPPool` CRs for VMs + having primary UDNs in their namespaces, and requesting a specific MAC address + in their specs. These CRs will be used to generate the multus default network + annotation, which will be set in the pods by the mutating webhook. + +On a second stage, MTV (or other source cluster introspection tool) will +provision the `IPPool` CR for the UDN on behalf of the admin user, thus +simplifying the operation of the solution, making it more robust and less +error-prone. + +In the following sub-sections we will detail each of these changes. + +#### IPPool CRD +This IPPool CRD has a 1:1 association to a UDN (or. For now, it'll only apply +to a primary UDN though. In the future, nothing prevents these CRs from being +used for secondary UDNs. + +The IPPool CRD is a non-namespaced object associated to a UDN via the NAD name, +since we want to have this feature upstream in the k8snetworkplumbingwg, rather +than in OVN-Kubernetes. + +The `IPPool` spec will have an attribute via which the admin can point to a +UDN - by the logical network name. The admin (which is the only actor able to +create the `IPPool`) has read access to all NADs in all namespaces, hence they +can inspect the NAD object to extract the network name. + +An alternative would be to reference the UDN by the NAD name - with that +information, whatever controller reconciling the `IPPool` CRs can access the +NAD, and extract the network name. This approach would require a validating +webhook to ensure that you can't have multiple `IPPool`s referencing the same +logical network (since multiple NADs with the same logical network name - in +**different** namespaces can exist). + ### Preserving the VM gateway Preserving the gateway will require changes to the OVN-Kubernetes API. Both the UDN and C-UDN CRDs should be updated - adding a gateway definition - and the From 32b9764210d2be62c0478622b21e073e6465da5d Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Mon, 12 May 2025 15:22:54 +0100 Subject: [PATCH 07/24] PUDN, static ips: fill out the API section Indicate the changes to the `IPAMClaim` CRD, as well as the definition of the proposed new `IPPool` CRD. Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 94 ++++++++++++++++--- 1 file changed, 82 insertions(+), 12 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index ea7bd83831..8fd84e2390 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -340,21 +340,91 @@ and https://github.com/openshift/enhancements/blob/master/enhancements/agent-ins ### API Extensions -API Extensions are CRDs, admission and conversion webhooks, aggregated API servers, -and finalizers, i.e. those mechanisms that change the OCP API surface and behaviour. +#### IPAMClaim CRD + +The IPAMClaim CRD status sub-resource will need to be updated, adding +conditions. + +For traceability, we also suggest adding to the `IPAMClaim` spec an attribute +to indicate which pod is holding the claim at any given time. On a VM live +migration, the OVN-Kubernetes control plane would update the `OwnerPod` after +the claim has been consumed by a different pod. Same for VM start/stop +scenarios. + +If we choose the [de-centralized IP management](#de-centralized-ip-management) +alternative, we will also need to change the IPAMClaim CRD spec, adding it an +attribute to request specific IPs for the workload which will consume the +claim. + +Below you'll see the changes requested: +```go +type IPAMClaimSpec struct { + // The network name for which this persistent allocation was created + Network string `json:"network"` + // The pod interface name for which this allocation was created + Interface string `json:"interface"` ++ // The IPs requested by the user ++ // +optional ++ IPRequests []CIDR `json:"ipRequests,omitempty"` +} + +// IPAMClaimStatus contains the observed status of the IPAMClaim. +type IPAMClaimStatus struct { + // The list of IP addresses (v4, v6) that were allocated for the pod interface +- IPs []string `json:"ips"` ++ IPs []CIDR `json:"ips"` ++ // The name of the pod holding the IPAMClaim ++ OwnerPod string `json:"ownerPod"` ++ // Conditions contains details for one aspect of the current state of this API Resource ++ Conditions []metav1.Condition `json:"conditions,omitempty"` +} +``` -- Name the API extensions this enhancement adds or modifies. -- Does this enhancement modify the behaviour of existing resources, especially those owned - by other parties than the authoring team (including upstream resources), and, if yes, how? - Please add those other parties as reviewers to the enhancement. +The `IPAMClaim` status will have (at least) the following conditions: +- SuccessfulAllocation: reports the IP address was successfully allocated for + the workload +- AllocationConflict: reports the requested allocation was not successful - i.e. + the requested IP address is already present in the network - Examples: - - Adds a finalizer to namespaces. Namespace cannot be deleted without our controller running. - - Restricts the label format for objects to X. - - Defaults field Y on object kind Z. +#### New IPPool CRD + +The IPPool CRD will operate as a place to store the MAC to IP addresses +association for a logical network. + +```go +type IPPool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec IPPoolSpec `json:"spec,omitempty"` + Status IPPoolStatus `json:"status,omitempty"` +} + +type IPPoolSpec struct { + NetworkName string `json:"network-name"` + Entries map[net.HardwareAddr][]net.IP `json:"entries"` +} + +type IPPoolStatus struct { + Conditions []Condition + AssociatedNADs []NADInfo +} + +type NADInfo struct { + Name string `json:"name"` +} +``` + +The `IPPool` CRD will have at least the following conditions: +- DuplicateMACAddresses: will indicate to the admin that a MAC address appears + multiple times in the `Entries` list +- DuplicateIPAddresses: will indicate to the admin that an IP address appears + multiple times associated to different MAC addresses in the `Entries` list +- Success: the data present in the spec is valid (no duplicate MACs or IPs) -Fill in the operational impact of these API Extensions in the "Operational Aspects -of API Extensions" section. +We plan on reporting in the `IPPool` the name of the NADs which are holding the +configuration for the network which this pool stores the MAC <=> IPs +associations. ### Topology Considerations From 50ef16de378860c775e3e217a35dcdc33365f25c Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 12:23:08 +0100 Subject: [PATCH 08/24] PUDN, static ips: react to qinqon's review Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 136 +++++++++++++++--- 1 file changed, 115 insertions(+), 21 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 8fd84e2390..0b7c75f675 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -93,6 +93,9 @@ address (the one assigned to the VM). to import said VM into Kubernetes, attaching it to an overlay network. I want to be able to consume Kubernetes features like network policies, and services, to benefit from the Kubernetes experience. +- As the owner of a VM running in a traditional virtualization platform, I want +to import said VM into Kubernetes, attaching it to an overlay network, without +having to reconfigure the VM's networking configuration (MAC, IPs, gateway). ### Goals @@ -103,11 +106,23 @@ default route - Allow excludeSubnets to be used with L2 UDNs to ensure OVNK does not use an IP address from the range that VMs have already been assigned outside the cluster (or for secondary IP addresses assigned to the VM's interfaces) +- Ensure it is possible to enable non-NATed traffic for pods with the static +network configuration by exposing the network through BGP + +**NOTE:** all the goals mentioned above will be fulfilled **only** for the +**cluster** UDN type. ### Non-Goals -Handle importing VMs without a managed IP experience - i.e. IPs were defined +- Handle importing VMs without a managed IP experience - i.e. IPs were defined statically in the guest. +- Importing a VM whose gateway is outside the subnet of the network. +- Adding non default routes to the VM when importing it into OpenShift Virt. +- Modifying the default gateway and management IPs of a primary UDN after it was created. +- Modifying a pod's network configuration after the pod was created. + +**NOTE:** implementing support on UDNs (achieving the namespace isolation +use-case) is outside the scope for this feature. ## Proposal @@ -153,10 +168,10 @@ OVN-Kubernetes will be the same, and is roughly described in #### Centralized IP management A new CRD - named `IPPool`, or `DHCPLeaseConfig` (or the like) - will be -created, and is associated to a UDN (both UDN, and C-UDN). This CRD holds the -association of MAC address to IPs for a UDN. When importing the VM into -OpenShift Virt, MTV (or a separate, dedicated component) will provision / -update this object with the required information (MAC to IPs association). +created, and is associated to a cluster UDN. This CRD holds the association of +MAC address to IPs for a UDN. When importing the VM into OpenShift Virt, MTV +(or a separate, dedicated component) will provision / update this object with +the required information (MAC to IPs association). This object is providing to the admin user a single place to check the IP address MAC to IPs mapping. On an first implementation phase, we can have the admin provision these CRs manually. Later on, MTV (or any other cluster @@ -172,6 +187,9 @@ status sub-resource - we need to introduce `Conditions` so we can report errors when allocating IPs which were requested by the user - what if the address is already in use within the UDN ? +The flow is described in more detail in the +[implementation details](#implementation-detailsnotesconstraints) section. + #### De-centralized IP management This approach requires having N CRs with a 1:1 association between a primary @@ -191,7 +209,9 @@ successful sync in the `IPAMClaim` status - or a failure otherwise. The `ipam-extensions` mutating webhook will kick in whenever a virt launcher pod is created - it will identify when the VM has a primary UDN attachment (already happens today), and will also identify when the pod network attachment -has a MAC address configuration request. +has a MAC address configuration request (defined in the KubeVirt +`VMI.Spec.Domain.Devices.Interfaces` attribute). + It will then access the `IPPool` (or `DHCPLeaseConfig` for the UDN) to extract which IP addresses are assigned to said MAC address. Finally, the `ipam-extensions` mutating webhook will mutate the launcher pod to @@ -273,14 +293,16 @@ This IPPool CRD has a 1:1 association to a UDN (or. For now, it'll only apply to a primary UDN though. In the future, nothing prevents these CRs from being used for secondary UDNs. -The IPPool CRD is a non-namespaced object associated to a UDN via the NAD name, -since we want to have this feature upstream in the k8snetworkplumbingwg, rather -than in OVN-Kubernetes. +The IPPool CRD is a cluster-scoped object associated to a UDN via the logical +network name (`NAD.Spec.Config.Name` attribute), since we want to have this +feature upstream in the k8snetworkplumbingwg, rather than in OVN-Kubernetes. The `IPPool` spec will have an attribute via which the admin can point to a -UDN - by the logical network name. The admin (which is the only actor able to -create the `IPPool`) has read access to all NADs in all namespaces, hence they -can inspect the NAD object to extract the network name. +cluster UDN - by the logical network name. The admin (which is the only actor +able to create the `IPPool`) has read access to all NADs in all namespaces, +hence they can inspect the NAD object to extract the network name. We could +even update the cluster UDN type to feature the generated network name in its +status sub-resource, to simplify the UX of the admin user. An alternative would be to reference the UDN by the NAD name - with that information, whatever controller reconciling the `IPPool` CRs can access the @@ -290,8 +312,8 @@ logical network (since multiple NADs with the same logical network name - in **different** namespaces can exist). ### Preserving the VM gateway -Preserving the gateway will require changes to the OVN-Kubernetes API. Both the -UDN and C-UDN CRDs should be updated - adding a gateway definition - and the +Preserving the gateway will require changes to the OVN-Kubernetes API. The +cluster UDN CRD should be updated - adding a gateway definition - and the OVN-Kubernetes [NetConf](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/2643dabe165bcb2d4564866ee1476a891c316fe3/go-controller/pkg/cni/types/types.go#L10) CNI structure should also be updated with a `gateway` attribute. @@ -386,8 +408,29 @@ The `IPAMClaim` status will have (at least) the following conditions: - AllocationConflict: reports the requested allocation was not successful - i.e. the requested IP address is already present in the network +These conditions will look like - **successful** allocation: +``` +type: SuccessfulAllocation +status: "True" +reason: IPAllocated +message: "IP 192.168.1.5 allocated successfully" +lastTransitionTime: "2025-05-13T11:56:00Z" +``` + +These conditions will look like - **failed** allocation: +``` +type: AllocationConflict +status: "True" +reason: IPAlreadyExists +message: "Requested IP 192.168.1.5 is already assigned in the network" +lastTransitionTime: "2025-05-13T11:56:00Z" +``` + #### New IPPool CRD +**NOTE:** this CRD is only required if we go with a +[centralized IP allocation](#centralized-ip-management) design. + The IPPool CRD will operate as a place to store the MAC to IP addresses association for a logical network. @@ -401,13 +444,14 @@ type IPPool struct { } type IPPoolSpec struct { - NetworkName string `json:"network-name"` - Entries map[net.HardwareAddr][]net.IP `json:"entries"` + NetworkName string `json:"network-name"` + Entries map[net.HardwareAddr][]net.IPNet `json:"entries"` } type IPPoolStatus struct { Conditions []Condition - AssociatedNADs []NADInfo + AssociatedNADs []NADInfo // this is an optional improvement for + // improving the UX. It is not **required**. } type NADInfo struct { @@ -426,6 +470,59 @@ We plan on reporting in the `IPPool` the name of the NADs which are holding the configuration for the network which this pool stores the MAC <=> IPs associations. +You can find below the conditions for when the IPPool information is correct +(no conflicts): + +``` +apiVersion: ippool.k8s.cni.cncf.io/v1alpha +kind: IPPool +metadata: + name: example-ip-pool +spec: + network-name: "prod-network" + entries: + "00:1a:4b:12:34:56": ["10.0.0.0/24", "10.0.1.0/24"] +status: + conditions: + - type: Success + status: "true" + lastTransitionTime: "2025-05-13T12:05:00Z" +``` + +You can find below the conditions for when the IPPool information has +conflicts: + +``` +apiVersion: ippool.k8s.cni.cncf.io/v1alpha +kind: IPPool +metadata: + name: example-ip-pool +spec: + network-name: "prod-network" + entries: + "00:1a:4b:12:34:56": ["10.0.0.2/24", "10.0.1.10/24"] + "00:1a:4b:12:34:56": ["10.0.0.14/24", "10.0.1.110/24"] +status: + conditions: + - type: Success + status: "False" + reason: DuplicatesExist + message: "Duplicate MAC 00:1a:4b:12:34:56 found in entries" + lastTransitionTime: "2025-05-13T12:05:00Z" + + - type: DuplicateMACAddresses + status: "True" + reason: DuplicateMACFound + message: "MAC address 00:1a:4b:12:34:56 appears 2 times" + lastTransitionTime: "2025-05-13T12:05:00Z" + + - type: DuplicateIPAddresses + status: "False" + reason: NoIPDuplicates + message: "All IP addresses are unique per MAC" + lastTransitionTime: "2025-05-13T12:05:00Z" +``` + ### Topology Considerations #### Hypershift / Hosted Control Planes @@ -488,10 +585,7 @@ burden? Is it likely to be superceded by something else in the near future? ## Alternatives (Not Implemented) -Similar to the `Drawbacks` section the `Alternatives` section is used -to highlight and record other possible approaches to delivering the -value proposed by an enhancement, including especially information -about why the alternative was not selected. + ## Open Questions [optional] From b80c584b52c68c735d457aed37fcb071a50c4687 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 12:29:53 +0100 Subject: [PATCH 09/24] PUDN, static ips: add user flows for the de-centralized IP mgmt approach Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 0b7c75f675..1dbffc83c1 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -242,7 +242,8 @@ happens today). If it fails (e.g. that IP address is already in use in the subnet), the CNI will fail, crash-looping the pod. The error condition will be reported in the associated `IPAMClaim` CR, and an event logged in the pod. -This flow is described in the following sequence diagram: +The [centralized IP management](#centralized-ip-management) flow is described +in the following sequence diagram: ```mermaid sequenceDiagram actor Admin @@ -286,6 +287,35 @@ provision the `IPPool` CR for the UDN on behalf of the admin user, thus simplifying the operation of the solution, making it more robust and less error-prone. +The [de-centralized IP management](#de-centralized-ip-management) flow is +described in the following sequence diagram: + +```mermaid +sequenceDiagram +actor Admin +actor VM Owner + +participant MTV +participant CNV +participant o as OVN-Kubernetes + +Admin ->> CNV: provision IPAMClaim w/ MAC and IP requests +CNV -->> Admin: OK + +VM Owner ->> MTV: import VM +MTV ->> CNV: create VM(name=<...>, primaryUDNMac=origMAC) +CNV ->> CNV: ips = getIPsForMAC(mac=origMAC) +CNV ->> o: create pod(mac=origMAC, IPs=ips) +o -->> CNV: OK +CNV -->> MTV: OK +MTV -->> VM Owner: OK +``` + +This option has the network admin create the IPAMClaim, and request a MAC and +IP addresses for the VM; in a second stage of the implementation, MTV (or any +other component able to introspect the source cluster for the VM's MAC and IPs) +will create the IPAMClaim on behalf of the admin user. + In the following sub-sections we will detail each of these changes. #### IPPool CRD From da4bd0be1932248911ef0f2ae25f7e92b4e5af38 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 12:54:42 +0100 Subject: [PATCH 10/24] PUDN, static ips: provide workflow for both alternatives Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 151 +++++++++--------- 1 file changed, 75 insertions(+), 76 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 1dbffc83c1..1ffddcf324 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -212,8 +212,12 @@ pod is created - it will identify when the VM has a primary UDN attachment has a MAC address configuration request (defined in the KubeVirt `VMI.Spec.Domain.Devices.Interfaces` attribute). -It will then access the `IPPool` (or `DHCPLeaseConfig` for the UDN) to extract -which IP addresses are assigned to said MAC address. +It will then access the `IPPool` (in the +[centralized IP management option](#centralized-ip-management-workflow)) **or** +the +[de-centralized IP management option](#de-centralized-ip-management-workflow)) +to extract which IP addresses are assigned to said MAC address. + Finally, the `ipam-extensions` mutating webhook will mutate the launcher pod to customize the primary UDN attachment using the multus default network annotation. This annotation (with an associated example) would look like: @@ -242,6 +246,61 @@ happens today). If it fails (e.g. that IP address is already in use in the subnet), the CNI will fail, crash-looping the pod. The error condition will be reported in the associated `IPAMClaim` CR, and an event logged in the pod. +In the following sub-sections we will detail each of these changes. + +#### IPPool CRD +This IPPool CRD has a 1:1 association to a UDN (or. For now, it'll only apply +to a primary UDN though. In the future, nothing prevents these CRs from being +used for secondary UDNs. + +The IPPool CRD is a cluster-scoped object associated to a UDN via the logical +network name (`NAD.Spec.Config.Name` attribute), since we want to have this +feature upstream in the k8snetworkplumbingwg, rather than in OVN-Kubernetes. + +The `IPPool` spec will have an attribute via which the admin can point to a +cluster UDN - by the logical network name. The admin (which is the only actor +able to create the `IPPool`) has read access to all NADs in all namespaces, +hence they can inspect the NAD object to extract the network name. We could +even update the cluster UDN type to feature the generated network name in its +status sub-resource, to simplify the UX of the admin user. + +An alternative would be to reference the UDN by the NAD name - with that +information, whatever controller reconciling the `IPPool` CRs can access the +NAD, and extract the network name. This approach would require a validating +webhook to ensure that you can't have multiple `IPPool`s referencing the same +logical network (since multiple NADs with the same logical network name - in +**different** namespaces can exist). + +### Preserving the VM gateway +Preserving the gateway will require changes to the OVN-Kubernetes API. The +cluster UDN CRD should be updated - adding a gateway definition - and the +OVN-Kubernetes +[NetConf](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/2643dabe165bcb2d4564866ee1476a891c316fe3/go-controller/pkg/cni/types/types.go#L10) +CNI structure should also be updated with a `gateway` attribute. + +The primary UDN gateway code will need to be updated to parametrize the gateway +IP address, since today it is using the UDN subnet first IP address. +To keep the feature backwards compatible, that would be the default if the UDN +does **not** feature the gateway configuration. This will ensure the +DHCP/RA flows on the logical switch are advertising the correct gateway +information to the guests on the network, and also that the gateway is properly +configured to allow features like Kubernetes `Services`. + +This flow is described in more detail (and presents alternatives to it) in the +[OVN-Kubernetes enhancement proposal](https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5238). + +### Workflow Description + +We'll list here the workflow for both the +[centralized IP management](#centralized-ip-management) option, and the +[de-centralized IP management](#de-centralized-ip-management)) option. +On both alternatives, we plan on initially having the admin provision the +required CR(s) with the relevant data (MAC and IP addresses) - in the future, +MTV or another source cluster introspection mechanism can provision these with +the required data. + +#### Centralized IP management workflow + The [centralized IP management](#centralized-ip-management) flow is described in the following sequence diagram: ```mermaid @@ -287,6 +346,8 @@ provision the `IPPool` CR for the UDN on behalf of the admin user, thus simplifying the operation of the solution, making it more robust and less error-prone. +#### De-centralized IP management workflow + The [de-centralized IP management](#de-centralized-ip-management) flow is described in the following sequence diagram: @@ -311,85 +372,23 @@ CNV -->> MTV: OK MTV -->> VM Owner: OK ``` +Hence, the required changes would be: +- the `IPAMClaim` CRD status sub-resource will need to be updated, adding a + `Conditions` array. This way we will be able to report errors back to the + user (e.g. the desired IP allocation is already in use). +- the `IPAMClaim` CRD spec will need to be updated, adding `MAC` and `IPs` +requests. +- OVN-Kubernetes will need to try to apply the MAC and IP addresses requested + in its spec. If any conflicts are found for either MAC or IPs, the error will + be persisted in the `IPAMClaim` status (in the conditions array). +- OVN-Kubernetes will record the requested IPs in the status sub-resource + (already does so today), along with a `Success` condition. + This option has the network admin create the IPAMClaim, and request a MAC and IP addresses for the VM; in a second stage of the implementation, MTV (or any other component able to introspect the source cluster for the VM's MAC and IPs) will create the IPAMClaim on behalf of the admin user. -In the following sub-sections we will detail each of these changes. - -#### IPPool CRD -This IPPool CRD has a 1:1 association to a UDN (or. For now, it'll only apply -to a primary UDN though. In the future, nothing prevents these CRs from being -used for secondary UDNs. - -The IPPool CRD is a cluster-scoped object associated to a UDN via the logical -network name (`NAD.Spec.Config.Name` attribute), since we want to have this -feature upstream in the k8snetworkplumbingwg, rather than in OVN-Kubernetes. - -The `IPPool` spec will have an attribute via which the admin can point to a -cluster UDN - by the logical network name. The admin (which is the only actor -able to create the `IPPool`) has read access to all NADs in all namespaces, -hence they can inspect the NAD object to extract the network name. We could -even update the cluster UDN type to feature the generated network name in its -status sub-resource, to simplify the UX of the admin user. - -An alternative would be to reference the UDN by the NAD name - with that -information, whatever controller reconciling the `IPPool` CRs can access the -NAD, and extract the network name. This approach would require a validating -webhook to ensure that you can't have multiple `IPPool`s referencing the same -logical network (since multiple NADs with the same logical network name - in -**different** namespaces can exist). - -### Preserving the VM gateway -Preserving the gateway will require changes to the OVN-Kubernetes API. The -cluster UDN CRD should be updated - adding a gateway definition - and the -OVN-Kubernetes -[NetConf](https://github.com/ovn-kubernetes/ovn-kubernetes/blob/2643dabe165bcb2d4564866ee1476a891c316fe3/go-controller/pkg/cni/types/types.go#L10) -CNI structure should also be updated with a `gateway` attribute. - -The primary UDN gateway code will need to be updated to parametrize the gateway -IP address, since today it is using the UDN subnet first IP address. -To keep the feature backwards compatible, that would be the default if the UDN -does **not** feature the gateway configuration. This will ensure the -DHCP/RA flows on the logical switch are advertising the correct gateway -information to the guests on the network, and also that the gateway is properly -configured to allow features like Kubernetes `Services`. - -This flow is described in more detail (and presents alternatives to it) in the -[OVN-Kubernetes enhancement proposal](https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5238). - -### Workflow Description - -Explain how the user will use the feature. Be detailed and explicit. -Describe all of the actors, their roles, and the APIs or interfaces -involved. Define a starting state and then list the steps that the -user would need to go through to trigger the feature described in the -enhancement. Optionally add a -[mermaid](https://github.com/mermaid-js/mermaid#readme) sequence -diagram. - -Use sub-sections to explain variations, such as for error handling, -failure recovery, or alternative outcomes. - -For example: - -**cluster creator** is a human user responsible for deploying a -cluster. - -**application administrator** is a human user responsible for -deploying an application in a cluster. - -1. The cluster creator sits down at their keyboard... -2. ... -3. The cluster creator sees that their cluster is ready to receive - applications, and gives the application administrator their - credentials. - -See -https://github.com/openshift/enhancements/blob/master/enhancements/workload-partitioning/management-workload-partitioning.md#high-level-end-to-end-workflow -and https://github.com/openshift/enhancements/blob/master/enhancements/agent-installer/automated-workflow-for-agent-based-installer.md for more detailed examples. - ### API Extensions #### IPAMClaim CRD From 829e63a5b324dee0c4827981e2b8e2e8a1e9bc92 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 12:58:36 +0100 Subject: [PATCH 11/24] PUDN, static ips: fill the YAML metadata on top Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 1ffddcf324..828e8dfe13 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -1,24 +1,26 @@ --- title: routed-ingress-support-for-primary-udn-attached-vms-with-static-ips authors: - - @maiqueb + - "@maiqueb" reviewers: # Include a comment about what domain expertise a reviewer is expected to bring and what area of the enhancement you expect them to focus on. For example: - "@networkguru, for networking aspects, please look at IP bootstrapping aspect" - - TBD + - "@qinqon" + - "@ormergi" + - "@eddev" + - "@tssurya" + - "@kyrtapz" approvers: # A single approver is preferred, the role of the approver is to raise important questions, help ensure the enhancement receives reviews from all applicable areas/SMEs, and determine when consensus is achieved such that the EP can move forward to implementation. Having multiple approvers makes it difficult to determine who is responsible for the actual approval. - - TBD + - "@trozet" api-approvers: # In case of new or modified APIs or API extensions (CRDs, aggregated apiservers, webhooks, finalizers). If there is no API change, use "None" - - TBD + - "@joelspeed" creation-date: 2025-05-07 -last-updated: yyyy-mm-dd +last-updated: 2025-05-13 tracking-link: # link to the tracking ticket (for example: Jira Feature or Epic ticket) that corresponds to this enhancement - - TBD + - https://issues.redhat.com/browse/CORENET-5999 see-also: - "/enhancements/network/user-defined-network-segmentation.md" - "/enhancements/network/bgp-ovn-kubernetes.md" -replaces: - - "/enhancements/that-less-than-great-idea.md" -superseded-by: - - "/enhancements/our-past-effort.md" +replaces: [] +superseded-by: [] --- To get started with this template: From 40f5bb5448d587661521dfcdf6c4286146287d11 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 12:59:51 +0100 Subject: [PATCH 12/24] PUDN, static ips: cleanup the instructions from the doc Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 828e8dfe13..c2338c7013 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -23,30 +23,6 @@ replaces: [] superseded-by: [] --- -To get started with this template: -1. **Pick a domain.** Find the appropriate domain to discuss your enhancement. -1. **Make a copy of this template.** Copy this template into the directory for - the domain. -1. **Fill out the metadata at the top.** The embedded YAML document is - checked by the linter. -1. **Fill out the "overview" sections.** This includes the Summary and - Motivation sections. These should be easy and explain why the community - should desire this enhancement. -1. **Create a PR.** Assign it to folks with expertise in that domain to help - sponsor the process. -1. **Merge after reaching consensus.** Merge when there is consensus - that the design is complete and all reviewer questions have been - answered so that work can begin. Come back and update the document - if important details (API field names, workflow, etc.) change - during code review. -1. **Keep all required headers.** If a section does not apply to an - enhancement, explain why but do not remove the section. This part - of the process is enforced by the linter CI job. - -See ../README.md for background behind these instructions. - -Start by filling out the header with the metadata for this enhancement. - # Routed ingress support for primary UDN attached VMs with static IPs ## Summary From 3015e73bfe160eaf387feaf7338ab85b9c527794 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 15:50:53 +0100 Subject: [PATCH 13/24] PUDN, static ips: adding IPRequests to KubeVirt API as a discarded alternative Signed-off-by: Miguel Duarte Barroso --- ...-for-primary-udn-attached-vms-with-static-ips.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index c2338c7013..9ed689e7a6 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -592,7 +592,18 @@ burden? Is it likely to be superceded by something else in the near future? ## Alternatives (Not Implemented) - +Another option which would provide a simpler implementation would be to modify +the VMI specification, adding a way for the user (or the tool templating the +VM - e.g. MTV) to specify which IP address they want the VM's interface to take. + +It would be updating the +[v1.Interface](https://kubevirt.io/api-reference/main/definitions.html#_v1_interface) +with an ipAddresses array. This attribute is defined in +`VMI.Spec.Domain.Devices.Interfaces`. + +This option (despite being the simplest to implement) is not taken into +consideration since we're already past the KubeVirt 1.5 API freeze - as a +result, we wouldn't be able to get this in OpenShift 4.20. ## Open Questions [optional] From c8c1da6383c6a0838caa3906a62bbe4d29192f66 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 16:08:48 +0100 Subject: [PATCH 14/24] PUDN, static ips: provide admin and vm owner flows for both alternatives Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 9ed689e7a6..045768bc52 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -269,6 +269,14 @@ This flow is described in more detail (and presents alternatives to it) in the ### Workflow Description +### Workflow Description + +#### Roles +- admin: a user with cluster admin rights. They can list NADs in all + namespaces, create cluster UDNs, and create / update `IPPool`s. +- VM owner: a user without cluster admin rights. They own their namespaces. + They will either import - or create - a VM in the namespace they own. + We'll list here the workflow for both the [centralized IP management](#centralized-ip-management) option, and the [de-centralized IP management](#de-centralized-ip-management)) option. @@ -324,6 +332,28 @@ provision the `IPPool` CR for the UDN on behalf of the admin user, thus simplifying the operation of the solution, making it more robust and less error-prone. +##### Admin user flows for centralized approach + +The admin user will have to perform the following operations: +- provision the cluster UDN CR having the overlay definition. + - layer2 topology ; IPAM lifecycle `Persistent` are a **must** +- provision the `IPPool` object, pointing at the logical network created in the + previous step via the `IPPool.Spec.NetworkName attribute`. This is only + required until another tool automatically provisions the `IPPool` on behalf of + the admin +- fill out the `IPPool` with the MAC to IPs association on the cluster from + which the VMs are being imported. This is only required until another tool + (able to introspect the source virtualization cluster) can perform this + operation on behalf of the admin + +As indicated above, when a tool exists that can introspect the source cluster +to learn the VM's IP addresses, the admin user will only require provisioning +the cluster UDN CR. + +##### VM owner flows for centralized approach + +The VM owner just has to use MTV to import the VM into CNV. + #### De-centralized IP management workflow The [de-centralized IP management](#de-centralized-ip-management) flow is @@ -367,6 +397,24 @@ IP addresses for the VM; in a second stage of the implementation, MTV (or any other component able to introspect the source cluster for the VM's MAC and IPs) will create the IPAMClaim on behalf of the admin user. +##### Admin user flows for de-centralized approach + +The admin user will have to perform the following operations: +- provision the cluster UDN CR having the overlay definition. + - layer2 topology ; IPAM lifecycle `Persistent` are a **must** +- provision the `IPAMClaim` object, whose name **must** be created like + `.`. The `IPAMClaim` must have both the IP and + MAC addresses for the VM attachment. This is only required until another tool + automatically provisions the `IPAMClaim`s on behalf of the admin + +As indicated above, when a tool exists that can introspect the source cluster +to learn the VM's IP addresses, the admin user will only require provisioning +the cluster UDN CR. + +##### VM owner flows for de-centralized approach + +The VM owner just has to use MTV to import the VM into CNV. + ### API Extensions #### IPAMClaim CRD From fe58366614580a0ca047fe4e0a646bff7fd98106 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 13 May 2025 16:22:46 +0100 Subject: [PATCH 15/24] PUDN, static ips: add topology consideration sections Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 045768bc52..7b47c509b8 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -582,28 +582,24 @@ status: #### Hypershift / Hosted Control Planes -Are there any unique considerations for making this change work with -Hypershift? +The management cluster should have no reason to import an existing VM from +another virtualization platform into OpenShift Virtualization. -See https://github.com/openshift/enhancements/blob/e044f84e9b2bafa600e6c24e35d226463c2308a5/enhancements/multi-arch/heterogeneous-architecture-clusters.md?plain=1#L282 +It makes more sense to import VMs into the hosted clusters, in order to provide +tenants with routed ingress into their hosted cluster workloads. -How does it affect any of the components running in the -management cluster? How does it affect any components running split -between the management cluster and guest cluster? +There should be no hypershift platform-specific considerations with this +feature. #### Standalone Clusters -Is the change relevant for standalone clusters? +Full support. #### Single-node Deployments or MicroShift -How does this proposal affect the resource consumption of a -single-node OpenShift deployment (SNO), CPU and memory? - -How does this proposal affect MicroShift? For example, if the proposal -adds configuration options through API resources, should any of those -behaviors also be exposed to MicroShift admins through the -configuration file for MicroShift? +Full support; there might be some increased resource usage when scaling up +number of VMs requesting IP and MAC address, since it will mean more processing +in OVN-Kubernetes, and more CRs to reconcile. ### Implementation Details/Notes/Constraints From 2ffe8ba64f1b098e8f2844bab531db2c8c254987 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 10:42:33 +0100 Subject: [PATCH 16/24] PUDN, static ips: make the IPPool "associated NADs" attr optional Signed-off-by: Miguel Duarte Barroso --- ...-support-for-primary-udn-attached-vms-with-static-ips.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 7b47c509b8..c911781fa9 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -504,9 +504,9 @@ type IPPoolSpec struct { } type IPPoolStatus struct { - Conditions []Condition - AssociatedNADs []NADInfo // this is an optional improvement for - // improving the UX. It is not **required**. + Conditions []Condition `json:"conditions"` + // this is an optional improvement for improving the UX + AssociatedNADs []NADInfo `json:"associated-nads,omitempty"` } type NADInfo struct { From 9ffd0e398094eafae46e85d4bdc76e83afe7e66f Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 11:16:31 +0100 Subject: [PATCH 17/24] PUDN, static ips: add gateway MAC considerations Signed-off-by: Miguel Duarte Barroso --- ...-support-for-primary-udn-attached-vms-with-static-ips.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index c911781fa9..696d905538 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -264,6 +264,12 @@ DHCP/RA flows on the logical switch are advertising the correct gateway information to the guests on the network, and also that the gateway is properly configured to allow features like Kubernetes `Services`. +Another aspect to take into consideration is the gateway MAC address; when we +import a VM into OpenShift Virtualization, the VM is started from scratch on +the destination cluster. Hence, the neighbor caches (ARP, NDP) will be updated. +This means there's nothing special we need to do on this regard - other than +ensuring the gateway MAC address is not already available in the logical network. + This flow is described in more detail (and presents alternatives to it) in the [OVN-Kubernetes enhancement proposal](https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5238). From 9cfa87e5172757a511fdd3674b8b35cbc0668dff Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 12:03:21 +0100 Subject: [PATCH 18/24] PUDN, static ips: add test plan, and graduation criteria sections Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 89 ++++--------------- 1 file changed, 17 insertions(+), 72 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 696d905538..67c703d9c0 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -664,78 +664,33 @@ to implement the design. For instance, ## Test Plan -**Note:** *Section not required until targeted at a release.* - -Consider the following in developing a test plan for this enhancement: -- Will there be e2e and integration tests, in addition to unit tests? -- How will it be tested in isolation vs with other components? -- What additional testing is necessary to support managed OpenShift service-based offerings? - -No need to outline all of the test cases, just the general strategy. Anything -that would count as tricky in the implementation and anything particularly -challenging to test should be called out. - -All code is expected to have adequate tests (eventually with coverage -expectations). +* E2E upstream and downstream jobs covering VM creation with requested + IP/MAC/gateway configuration for both IPv4 and dual-stack configurations +* E2E downstream jobs covering a VM (having a network configuration which must + be kept) import via MTV is successful for both IPv4 and dual-stack + configurations +* E2E tests covering the existing features (network policies, IP spoof + protection, services) for an imported VM works as expected for both IPv4 and + dual-stack configurations ## Graduation Criteria -**Note:** *Section not required until targeted at a release.* - -Define graduation milestones. - -These may be defined in terms of API maturity, or as something else. Initial proposal -should keep this high-level with a focus on what signals will be looked at to -determine graduation. - -Consider the following in developing the graduation criteria for this -enhancement: - -- Maturity levels - - [`alpha`, `beta`, `stable` in upstream Kubernetes][maturity-levels] - - `Dev Preview`, `Tech Preview`, `GA` in OpenShift -- [Deprecation policy][deprecation-policy] - -Clearly define what graduation means by either linking to the [API doc definition](https://kubernetes.io/docs/concepts/overview/kubernetes-api/#api-versioning), -or by redefining what graduation means. - -In general, we try to use the same stages (alpha, beta, GA), regardless how the functionality is accessed. - -[maturity-levels]: https://git.k8s.io/community/contributors/devel/sig-architecture/api_changes.md#alpha-beta-and-stable-versions -[deprecation-policy]: https://kubernetes.io/docs/reference/using-api/deprecation-policy/ - -**If this is a user facing change requiring new or updated documentation in [openshift-docs](https://github.com/openshift/openshift-docs/), -please be sure to include in the graduation criteria.** - -**Examples**: These are generalized examples to consider, in addition -to the aforementioned [maturity levels][maturity-levels]. - ### Dev Preview -> Tech Preview -- Ability to utilize the enhancement end to end -- End user documentation, relative API stability -- Sufficient test coverage -- Gather feedback from users rather than just developers -- Enumerate service level indicators (SLIs), expose SLIs as metrics -- Write symptoms-based alerts for the component(s) +There will be no Dev Preview for this feature. ### Tech Preview -> GA -- More testing (upgrade, downgrade, scale) -- Sufficient time for feedback -- Available by default -- Backhaul SLI telemetry -- Document SLOs for the component -- Conduct load testing -- User facing documentation created in [openshift-docs](https://github.com/openshift/openshift-docs/) - -**For non-optional features moving to GA, the graduation criteria must include -end to end tests.** +Targeting GA in OCP version 4.20. ### Removing a deprecated feature -- Announce deprecation and support policy of the existing feature -- Deprecate the feature +The annotation currently being used for indicating to OVN-Kubernetes what is +the `IPAMClaim` to use for the primary UDN attachment will be deprecated, and +we plan on using the [multus default annotation](#cnv-ovnk-api) for it. + +The deprecation strategy is described in the OVN-Kubernetes +[OKEP](https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5238). ## Upgrade / Downgrade Strategy @@ -780,17 +735,7 @@ Downgrade expectations: ## Version Skew Strategy -How will the component handle version skew with other components? -What are the guarantees? Make sure this is in the test plan. - -Consider the following in developing a version skew strategy for this -enhancement: -- During an upgrade, we will always have skew among components, how will this impact your work? -- Does this enhancement involve coordinating behavior in the control plane and - in the kubelet? How does an n-2 kubelet without this feature available behave - when this feature is used? -- Will any other components on the node change? For example, changes to CSI, CRI - or CNI may require updating that component before the kubelet. +N/A ## Operational Aspects of API Extensions From cd30ef2742693734b1ac108cf0fae38f4c742fb5 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 12:08:13 +0100 Subject: [PATCH 19/24] PUDN, static ips: add operational aspects, and upgrade strategy sections Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 126 ++---------------- 1 file changed, 11 insertions(+), 115 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 67c703d9c0..53be63c005 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -694,44 +694,7 @@ The deprecation strategy is described in the OVN-Kubernetes ## Upgrade / Downgrade Strategy -If applicable, how will the component be upgraded and downgraded? Make sure this -is in the test plan. - -Consider the following in developing an upgrade/downgrade strategy for this -enhancement: -- What changes (in invocations, configurations, API use, etc.) is an existing - cluster required to make on upgrade in order to keep previous behavior? -- What changes (in invocations, configurations, API use, etc.) is an existing - cluster required to make on upgrade in order to make use of the enhancement? - -Upgrade expectations: -- Each component should remain available for user requests and - workloads during upgrades. Ensure the components leverage best practices in handling [voluntary - disruption](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/). Any exception to - this should be identified and discussed here. -- Micro version upgrades - users should be able to skip forward versions within a - minor release stream without being required to pass through intermediate - versions - i.e. `x.y.N->x.y.N+2` should work without requiring `x.y.N->x.y.N+1` - as an intermediate step. -- Minor version upgrades - you only need to support `x.N->x.N+1` upgrade - steps. So, for example, it is acceptable to require a user running 4.3 to - upgrade to 4.5 with a `4.3->4.4` step followed by a `4.4->4.5` step. -- While an upgrade is in progress, new component versions should - continue to operate correctly in concert with older component - versions (aka "version skew"). For example, if a node is down, and - an operator is rolling out a daemonset, the old and new daemonset - pods must continue to work correctly even while the cluster remains - in this partially upgraded state for some time. - -Downgrade expectations: -- If an `N->N+1` upgrade fails mid-way through, or if the `N+1` cluster is - misbehaving, it should be possible for the user to rollback to `N`. It is - acceptable to require some documented manual steps in order to fully restore - the downgraded cluster to its previous state. Examples of acceptable steps - include: - - Deleting any CVO-managed resources added by the new version. The - CVO does not currently delete resources that no longer exist in - the target version. +N/A ## Version Skew Strategy @@ -739,88 +702,21 @@ N/A ## Operational Aspects of API Extensions -Describe the impact of API extensions (mentioned in the proposal section, i.e. CRDs, -admission and conversion webhooks, aggregated API servers, finalizers) here in detail, -especially how they impact the OCP system architecture and operational aspects. - -- For conversion/admission webhooks and aggregated apiservers: what are the SLIs (Service Level - Indicators) an administrator or support can use to determine the health of the API extensions - - Examples (metrics, alerts, operator conditions) - - authentication-operator condition `APIServerDegraded=False` - - authentication-operator condition `APIServerAvailable=True` - - openshift-authentication/oauth-apiserver deployment and pods health - -- What impact do these API extensions have on existing SLIs (e.g. scalability, API throughput, - API availability) +The proposed `IPPool` CRD must be provisioned by the admin (or the source +cluster introspection tool) before the VMs are migrated into OpenShift virt, +otherwise, they will lose the IP addresses they had on the source cluster. - Examples: - - Adds 1s to every pod update in the system, slowing down pod scheduling by 5s on average. - - Fails creation of ConfigMap in the system when the webhook is not available. - - Adds a dependency on the SDN service network for all resources, risking API availability in case - of SDN issues. - - Expected use-cases require less than 1000 instances of the CRD, not impacting - general API throughput. +The gateway for the network must be configured in the cluster UDN CR at +creation time, as any other cluster UDN parameter. -- How is the impact on existing SLIs to be measured and when (e.g. every release by QE, or - automatically in CI) and by whom (e.g. perf team; name the responsible person and let them review - this enhancement) - -- Describe the possible failure modes of the API extensions. -- Describe how a failure or behaviour of the extension will impact the overall cluster health - (e.g. which kube-controller-manager functionality will stop working), especially regarding - stability, availability, performance and security. -- Describe which OCP teams are likely to be called upon in case of escalation with one of the failure modes - and add them as reviewers to this enhancement. +Hence, some planning and preparation are required from the admin before the +VM owner starts importing VMs into the OpenShift Virt cluster via MTV. ## Support Procedures -Describe how to -- detect the failure modes in a support situation, describe possible symptoms (events, metrics, - alerts, which log output in which component) - - Examples: - - If the webhook is not running, kube-apiserver logs will show errors like "failed to call admission webhook xyz". - - Operator X will degrade with message "Failed to launch webhook server" and reason "WehhookServerFailed". - - The metric `webhook_admission_duration_seconds("openpolicyagent-admission", "mutating", "put", "false")` - will show >1s latency and alert `WebhookAdmissionLatencyHigh` will fire. - -- disable the API extension (e.g. remove MutatingWebhookConfiguration `xyz`, remove APIService `foo`) - - - What consequences does it have on the cluster health? - - Examples: - - Garbage collection in kube-controller-manager will stop working. - - Quota will be wrongly computed. - - Disabling/removing the CRD is not possible without removing the CR instances. Customer will lose data. - Disabling the conversion webhook will break garbage collection. - - - What consequences does it have on existing, running workloads? - - Examples: - - New namespaces won't get the finalizer "xyz" and hence might leak resource X - when deleted. - - SDN pod-to-pod routing will stop updating, potentially breaking pod-to-pod - communication after some minutes. - - - What consequences does it have for newly created workloads? - - Examples: - - New pods in namespace with Istio support will not get sidecars injected, breaking - their networking. - -- Does functionality fail gracefully and will work resume when re-enabled without risking - consistency? - - Examples: - - The mutating admission webhook "xyz" has FailPolicy=Ignore and hence - will not block the creation or updates on objects when it fails. When the - webhook comes back online, there is a controller reconciling all objects, applying - labels that were not applied during admission webhook downtime. - - Namespaces deletion will not delete all objects in etcd, leading to zombie - objects when another namespace with the same name is created. +TODO ## Infrastructure Needed [optional] -Use this section if you need things from the project. Examples include a new -subproject, repos requested, github details, and/or testing infrastructure. +We'll need a virt-aware lane with CNV (and MTV) installed so we can e2e test +the features. From cc4bfd71a557c7d156cbab5ad037b38ebf7bd0af Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 12:12:36 +0100 Subject: [PATCH 20/24] PUDN, static ips: fill out the drawbacks section Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 53be63c005..ad36db4841 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -609,36 +609,24 @@ in OVN-Kubernetes, and more CRs to reconcile. ### Implementation Details/Notes/Constraints -What are some important details that didn't come across above in the -**Proposal**? Go in to as much detail as necessary here. This might be -a good place to talk about core concepts and how they relate. While it is useful -to go into the details of the code changes required, it is not necessary to show -how the code will be rewritten in the enhancement. +TODO ### Risks and Mitigations -What are the risks of this proposal and how do we mitigate. Think broadly. For -example, consider both security and how this will impact the larger OKD -ecosystem. - -How will security be reviewed and by whom? - -How will UX be reviewed and by whom? - -Consider including folks that also work outside your immediate sub-project. +TODO ### Drawbacks -The idea is to find the best form of an argument why this enhancement should -_not_ be implemented. - -What trade-offs (technical/efficiency cost, user experience, flexibility, -supportability, etc) must be made in order to implement this? What are the reasons -we might not want to undertake this proposal, and how do we overcome them? +The biggest drawback we have to implementing this feature is lack of clear +asks from customers - we're pretty much guessing what they would want to use. -Does this proposal implement a behavior that's new/unique/novel? Is it poorly -aligned with existing user expectations? Will it be a significant maintenance -burden? Is it likely to be superceded by something else in the near future? +We do not know (for instance): +- is this feature only for VMs ? +- is this feature only about importing VMs ? Should we allow creating new VMs + with dedicated MAC / IP / gateway requests ? +- are the end users (admins) after a centralized IP management alternative ? + Do they want to have a single place to check their network address + assignments ? ## Alternatives (Not Implemented) From d3e4be0dbf05f3b95cd42d0288496eeb4a03f488 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 12:49:11 +0100 Subject: [PATCH 21/24] PUDN, static ips: add a follow-up feature for the IPPool Signed-off-by: Miguel Duarte Barroso --- ...r-primary-udn-attached-vms-with-static-ips.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index ad36db4841..6c80ef0d06 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -584,6 +584,13 @@ status: lastTransitionTime: "2025-05-13T12:05:00Z" ``` +On a follow-up, we could reconcile the `IPPool` CR with the MAC <-> IPs +association of all workloads attached to that network - i.e. also for the +workloads being created without MAC / IP requests. That would be useful to the +network admin / service engineer to get the full view on whatever's connected +to a network at any given time. This would require a separate enhancement to +get better understanding of the use-cases, and how to implement said feature. + ### Topology Considerations #### Hypershift / Hosted Control Planes @@ -702,7 +709,14 @@ VM owner starts importing VMs into the OpenShift Virt cluster via MTV. ## Support Procedures -TODO +Issues with an imported VM should be logged in the launcher pod events, and +persisted in the corresponding `IPAMClaim` so the support team can check what +failed in the address allocation requests. + +The `IPPool` CRD would help the support engineer get a grasp of the MAC <-> IPs +being used for a network at any given time. Having this notion will simplify +the support procedures on the cluster. This is an optional step, described in +the [centralized IP management](#centralized-ip-management) section. ## Infrastructure Needed [optional] From 2c4f3a3eea72ffd0e9cfda8eb1cdf55482a3939b Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 13:11:12 +0100 Subject: [PATCH 22/24] PUDN, static ips: drop the open questions optional section Signed-off-by: Miguel Duarte Barroso --- ...support-for-primary-udn-attached-vms-with-static-ips.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 6c80ef0d06..5620f5c8f4 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -650,13 +650,6 @@ This option (despite being the simplest to implement) is not taken into consideration since we're already past the KubeVirt 1.5 API freeze - as a result, we wouldn't be able to get this in OpenShift 4.20. -## Open Questions [optional] - -This is where to call out areas of the design that require closure before deciding -to implement the design. For instance, - > 1. This requires exposing previously private resources which contain sensitive - information. Can we do this? - ## Test Plan * E2E upstream and downstream jobs covering VM creation with requested From c11701322972065f0c7275dff05cc32707277b9d Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Wed, 14 May 2025 16:14:30 +0100 Subject: [PATCH 23/24] PUDN, static ips: add IPv6 considerations to "preserving the VM gateway" section Signed-off-by: Miguel Duarte Barroso --- ...upport-for-primary-udn-attached-vms-with-static-ips.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 5620f5c8f4..1de42a4d06 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -260,10 +260,16 @@ The primary UDN gateway code will need to be updated to parametrize the gateway IP address, since today it is using the UDN subnet first IP address. To keep the feature backwards compatible, that would be the default if the UDN does **not** feature the gateway configuration. This will ensure the -DHCP/RA flows on the logical switch are advertising the correct gateway +DHCP flows on the logical switch are advertising the correct gateway information to the guests on the network, and also that the gateway is properly configured to allow features like Kubernetes `Services`. +For IPv6 the flow will be different; IPv6 gateways are dynamic ! Hence, when a +VM is imported into OpenShift Virtualization, the OVN control plane must send a +RouterAdvertisement to the VM instructing it to forget about the old gateway - +i.e. send an RA with `lifetime = 0` with the source address of the gateway +defined for the VM in the old cluster. + Another aspect to take into consideration is the gateway MAC address; when we import a VM into OpenShift Virtualization, the VM is started from scratch on the destination cluster. Hence, the neighbor caches (ARP, NDP) will be updated. From a43431365a0b50665e47713ab02c895b4d9eea4f Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Mon, 19 May 2025 12:13:06 +0100 Subject: [PATCH 24/24] PUDN, static ips: react to patryks comments Signed-off-by: Miguel Duarte Barroso --- ...rimary-udn-attached-vms-with-static-ips.md | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md index 1de42a4d06..24b4ffc832 100644 --- a/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md +++ b/enhancements/network/routed-ingress-support-for-primary-udn-attached-vms-with-static-ips.md @@ -80,10 +80,7 @@ having to reconfigure the VM's networking configuration (MAC, IPs, gateway). - Preserve the original VM's MAC address - Preserve the original VM's IP address - Specify the gateway IP address of the imported VM so it can keep the same -default route -- Allow excludeSubnets to be used with L2 UDNs to ensure OVNK does not use an IP -address from the range that VMs have already been assigned outside the -cluster (or for secondary IP addresses assigned to the VM's interfaces) +default route. This gateway is common to the entire logical network. - Ensure it is possible to enable non-NATed traffic for pods with the static network configuration by exposing the network through BGP @@ -98,6 +95,11 @@ statically in the guest. - Adding non default routes to the VM when importing it into OpenShift Virt. - Modifying the default gateway and management IPs of a primary UDN after it was created. - Modifying a pod's network configuration after the pod was created. +- Support importing a "live" VM into the OpenShift virtualization cluster (i.e. + without requiring a VM restart). +- Allow excludeSubnets to be used with L2 UDNs to ensure OVNK does not use an IP + address from the range that VMs have already been assigned outside the + cluster (or for secondary IP addresses assigned to the VM's interfaces). **NOTE:** implementing support on UDNs (achieving the namespace isolation use-case) is outside the scope for this feature. @@ -182,6 +184,11 @@ OVN-Kubernetes would read the CR, attempt to reserve the requested MAC and IP, and then persist that information in the IPAMClaim status - reporting a successful sync in the `IPAMClaim` status - or a failure otherwise. +In this option, the [CNV to OVN-Kubernetes API](#cnv-ovnk-api) **remains** as +defined today - i.e. OVN-Kubernetes will be pointed to an `IPAMClaim` via a +particular annotation on the pod. This means there is no need to implement the +changes described in the aforementioned section. + #### CNV OVNK API The `ipam-extensions` mutating webhook will kick in whenever a virt launcher @@ -310,6 +317,9 @@ participant MTV participant CNV participant o as OVN-Kubernetes +Admin ->> o: cluster UDN +o -->> Admin: OK + Admin ->> CNV: provision IPPool CNV -->> Admin: OK @@ -380,6 +390,9 @@ participant MTV participant CNV participant o as OVN-Kubernetes +Admin ->> o: cluster UDN +o -->> Admin: OK + Admin ->> CNV: provision IPAMClaim w/ MAC and IP requests CNV -->> Admin: OK @@ -454,7 +467,8 @@ type IPAMClaimSpec struct { Interface string `json:"interface"` + // The IPs requested by the user + // +optional -+ IPRequests []CIDR `json:"ipRequests,omitempty"` ++ IPRequests []CIDR `json:"ipRequests,omitempty"` ++ MACRequest net.HardwareAddr `json:"macRequest,omitempty"` } // IPAMClaimStatus contains the observed status of the IPAMClaim. @@ -503,15 +517,15 @@ association for a logical network. ```go type IPPool struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` Spec IPPoolSpec `json:"spec,omitempty"` Status IPPoolStatus `json:"status,omitempty"` } type IPPoolSpec struct { - NetworkName string `json:"network-name"` + NetworkName string `json:"network-name"` Entries map[net.HardwareAddr][]net.IPNet `json:"entries"` }