diff --git a/go.mod b/go.mod index a767be27e..0f734459d 100644 --- a/go.mod +++ b/go.mod @@ -21,6 +21,7 @@ require ( github.com/safchain/ethtool v0.4.1 github.com/vishvananda/netlink v1.3.0 golang.org/x/sys v0.23.0 + sigs.k8s.io/knftables v0.0.17 ) require ( diff --git a/go.sum b/go.sum index 2d62938a8..ab1b696dc 100644 --- a/go.sum +++ b/go.sum @@ -68,6 +68,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k= github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/lithammer/dedent v1.1.0 h1:VNzHMVCBNG1j0fh3OrsFRkVUwStdDArbgBWoPAffktY= +github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk= github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y= github.com/networkplumbing/go-nft v0.4.0 h1:kExVMwXW48DOAukkBwyI16h4uhE5lN9iMvQd52lpTyU= @@ -194,3 +196,5 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +sigs.k8s.io/knftables v0.0.17 h1:wGchTyRF/iGTIjd+vRaR1m676HM7jB8soFtyr/148ic= +sigs.k8s.io/knftables v0.0.17/go.mod h1:f/5ZLKYEUPUhVjUCg6l80ACdL7CIIyeL0DxfgojGRTk= diff --git a/pkg/ip/ipmasq_iptables_linux.go b/pkg/ip/ipmasq_iptables_linux.go new file mode 100644 index 000000000..5c1fcfa80 --- /dev/null +++ b/pkg/ip/ipmasq_iptables_linux.go @@ -0,0 +1,161 @@ +// Copyright 2015 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ip + +import ( + "fmt" + "net" + + "github.com/coreos/go-iptables/iptables" + + "github.com/containernetworking/cni/pkg/types" + "github.com/containernetworking/plugins/pkg/utils" +) + +// setupIPMasqIPTables is the iptables-based implementation of SetupIPMasqForNetwork +func setupIPMasqIPTables(ipn *net.IPNet, network, _, containerID string) error { + // Note: for historical reasons, the iptables implementation ignores ifname. + chain := utils.FormatChainName(network, containerID) + comment := utils.FormatComment(network, containerID) + return SetupIPMasq(ipn, chain, comment) +} + +// SetupIPMasq installs iptables rules to masquerade traffic +// coming from ip of ipn and going outside of ipn. +// Deprecated: This function only supports iptables. Use SetupIPMasqForNetwork, which +// supports both iptables and nftables. +func SetupIPMasq(ipn *net.IPNet, chain string, comment string) error { + isV6 := ipn.IP.To4() == nil + + var ipt *iptables.IPTables + var err error + var multicastNet string + + if isV6 { + ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv6) + multicastNet = "ff00::/8" + } else { + ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv4) + multicastNet = "224.0.0.0/4" + } + if err != nil { + return fmt.Errorf("failed to locate iptables: %v", err) + } + + // Create chain if doesn't exist + exists := false + chains, err := ipt.ListChains("nat") + if err != nil { + return fmt.Errorf("failed to list chains: %v", err) + } + for _, ch := range chains { + if ch == chain { + exists = true + break + } + } + if !exists { + if err = ipt.NewChain("nat", chain); err != nil { + return err + } + } + + // Packets to this network should not be touched + if err := ipt.AppendUnique("nat", chain, "-d", ipn.String(), "-j", "ACCEPT", "-m", "comment", "--comment", comment); err != nil { + return err + } + + // Don't masquerade multicast - pods should be able to talk to other pods + // on the local network via multicast. + if err := ipt.AppendUnique("nat", chain, "!", "-d", multicastNet, "-j", "MASQUERADE", "-m", "comment", "--comment", comment); err != nil { + return err + } + + // Packets from the specific IP of this network will hit the chain + return ipt.AppendUnique("nat", "POSTROUTING", "-s", ipn.IP.String(), "-j", chain, "-m", "comment", "--comment", comment) +} + +// teardownIPMasqIPTables is the iptables-based implementation of TeardownIPMasqForNetwork +func teardownIPMasqIPTables(ipn *net.IPNet, network, _, containerID string) error { + // Note: for historical reasons, the iptables implementation ignores ifname. + chain := utils.FormatChainName(network, containerID) + comment := utils.FormatComment(network, containerID) + return TeardownIPMasq(ipn, chain, comment) +} + +// TeardownIPMasq undoes the effects of SetupIPMasq. +// Deprecated: This function only supports iptables. Use TeardownIPMasqForNetwork, which +// supports both iptables and nftables. +func TeardownIPMasq(ipn *net.IPNet, chain string, comment string) error { + isV6 := ipn.IP.To4() == nil + + var ipt *iptables.IPTables + var err error + + if isV6 { + ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv6) + } else { + ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv4) + } + if err != nil { + return fmt.Errorf("failed to locate iptables: %v", err) + } + + err = ipt.Delete("nat", "POSTROUTING", "-s", ipn.IP.String(), "-j", chain, "-m", "comment", "--comment", comment) + if err != nil && !isNotExist(err) { + return err + } + + // for downward compatibility + err = ipt.Delete("nat", "POSTROUTING", "-s", ipn.String(), "-j", chain, "-m", "comment", "--comment", comment) + if err != nil && !isNotExist(err) { + return err + } + + err = ipt.ClearChain("nat", chain) + if err != nil && !isNotExist(err) { + return err + } + + err = ipt.DeleteChain("nat", chain) + if err != nil && !isNotExist(err) { + return err + } + + return nil +} + +// gcIPMasqIPTables is the iptables-based implementation of GCIPMasqForNetwork +func gcIPMasqIPTables(_ string, _ []types.GCAttachment) error { + // FIXME: The iptables implementation does not support GC. + // + // (In theory, it _could_ backward-compatibly support it, by adding a no-op rule + // with a comment indicating the network to each chain it creates, so that it + // could later figure out which chains corresponded to which networks; older + // implementations would ignore the extra rule but would still correctly delete + // the chain on teardown (because they ClearChain() before doing DeleteChain()). + + return nil +} + +// isNotExist returnst true if the error is from iptables indicating +// that the target does not exist. +func isNotExist(err error) bool { + e, ok := err.(*iptables.Error) + if !ok { + return false + } + return e.IsNotExist() +} diff --git a/pkg/ip/ipmasq_linux.go b/pkg/ip/ipmasq_linux.go index aa59a8db5..bad83541d 100644 --- a/pkg/ip/ipmasq_linux.go +++ b/pkg/ip/ipmasq_linux.go @@ -15,111 +15,78 @@ package ip import ( + "errors" "fmt" "net" + "strings" - "github.com/coreos/go-iptables/iptables" + "github.com/containernetworking/cni/pkg/types" + "github.com/containernetworking/plugins/pkg/utils" ) -// SetupIPMasq installs iptables rules to masquerade traffic -// coming from ip of ipn and going outside of ipn -func SetupIPMasq(ipn *net.IPNet, chain string, comment string) error { - isV6 := ipn.IP.To4() == nil - - var ipt *iptables.IPTables - var err error - var multicastNet string - - if isV6 { - ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv6) - multicastNet = "ff00::/8" - } else { - ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv4) - multicastNet = "224.0.0.0/4" - } - if err != nil { - return fmt.Errorf("failed to locate iptables: %v", err) - } - - // Create chain if doesn't exist - exists := false - chains, err := ipt.ListChains("nat") - if err != nil { - return fmt.Errorf("failed to list chains: %v", err) - } - for _, ch := range chains { - if ch == chain { - exists = true - break - } - } - if !exists { - if err = ipt.NewChain("nat", chain); err != nil { - return err +// SetupIPMasqForNetwork installs rules to masquerade traffic coming from ip of ipn and +// going outside of ipn, using a chain name based on network, ifname, and containerID. The +// backend can be either "iptables" or "nftables"; if it is nil, then a suitable default +// implementation will be used. +func SetupIPMasqForNetwork(backend *string, ipn *net.IPNet, network, ifname, containerID string) error { + if backend == nil { + // Prefer iptables, unless only nftables is available + defaultBackend := "iptables" + if !utils.SupportsIPTables() && utils.SupportsNFTables() { + defaultBackend = "nftables" } + backend = &defaultBackend } - // Packets to this network should not be touched - if err := ipt.AppendUnique("nat", chain, "-d", ipn.String(), "-j", "ACCEPT", "-m", "comment", "--comment", comment); err != nil { - return err + switch *backend { + case "iptables": + return setupIPMasqIPTables(ipn, network, ifname, containerID) + case "nftables": + return setupIPMasqNFTables(ipn, network, ifname, containerID) + default: + return fmt.Errorf("unknown ipmasq backend %q", *backend) } - - // Don't masquerade multicast - pods should be able to talk to other pods - // on the local network via multicast. - if err := ipt.AppendUnique("nat", chain, "!", "-d", multicastNet, "-j", "MASQUERADE", "-m", "comment", "--comment", comment); err != nil { - return err - } - - // Packets from the specific IP of this network will hit the chain - return ipt.AppendUnique("nat", "POSTROUTING", "-s", ipn.IP.String(), "-j", chain, "-m", "comment", "--comment", comment) } -// TeardownIPMasq undoes the effects of SetupIPMasq -func TeardownIPMasq(ipn *net.IPNet, chain string, comment string) error { - isV6 := ipn.IP.To4() == nil +// TeardownIPMasqForNetwork undoes the effects of SetupIPMasqForNetwork +func TeardownIPMasqForNetwork(ipn *net.IPNet, network, ifname, containerID string) error { + var errs []string - var ipt *iptables.IPTables - var err error + // Do both the iptables and the nftables cleanup, since the pod may have been + // created with a different version of this plugin or a different configuration. - if isV6 { - ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv6) - } else { - ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv4) - } - if err != nil { - return fmt.Errorf("failed to locate iptables: %v", err) + err := teardownIPMasqIPTables(ipn, network, ifname, containerID) + if err != nil && utils.SupportsIPTables() { + errs = append(errs, err.Error()) } - err = ipt.Delete("nat", "POSTROUTING", "-s", ipn.IP.String(), "-j", chain, "-m", "comment", "--comment", comment) - if err != nil && !isNotExist(err) { - return err + err = teardownIPMasqNFTables(ipn, network, ifname, containerID) + if err != nil && utils.SupportsNFTables() { + errs = append(errs, err.Error()) } - // for downward compatibility - err = ipt.Delete("nat", "POSTROUTING", "-s", ipn.String(), "-j", chain, "-m", "comment", "--comment", comment) - if err != nil && !isNotExist(err) { - return err + if errs == nil { + return nil } + return errors.New(strings.Join(errs, "\n")) +} - err = ipt.ClearChain("nat", chain) - if err != nil && !isNotExist(err) { - return err - } +// GCIPMasqForNetwork garbage collects stale IPMasq entries for network +func GCIPMasqForNetwork(network string, attachments []types.GCAttachment) error { + var errs []string - err = ipt.DeleteChain("nat", chain) - if err != nil && !isNotExist(err) { - return err + err := gcIPMasqIPTables(network, attachments) + if err != nil && utils.SupportsIPTables() { + errs = append(errs, err.Error()) } - return nil -} + err = gcIPMasqNFTables(network, attachments) + if err != nil && utils.SupportsNFTables() { + errs = append(errs, err.Error()) + } -// isNotExist returnst true if the error is from iptables indicating -// that the target does not exist. -func isNotExist(err error) bool { - e, ok := err.(*iptables.Error) - if !ok { - return false + if errs == nil { + return nil } - return e.IsNotExist() + return errors.New(strings.Join(errs, "\n")) } diff --git a/pkg/ip/ipmasq_nftables_linux.go b/pkg/ip/ipmasq_nftables_linux.go new file mode 100644 index 000000000..5c7458c99 --- /dev/null +++ b/pkg/ip/ipmasq_nftables_linux.go @@ -0,0 +1,229 @@ +// Copyright 2023 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ip + +import ( + "context" + "fmt" + "net" + "strings" + + "sigs.k8s.io/knftables" + + "github.com/containernetworking/cni/pkg/types" + "github.com/containernetworking/plugins/pkg/utils" +) + +const ( + ipMasqTableName = "cni_plugins_masquerade" + ipMasqChainName = "masq_checks" +) + +// The nftables ipmasq implementation is mostly like the iptables implementation, with +// minor updates to fix a bug (adding `ifname`) and to allow future GC support. +// +// We add a rule for each mapping, with a comment containing a hash of its identifiers, +// so that we can later reliably delete the rules we want. (This is important because in +// edge cases, it's possible the plugin might see "ADD container A with IP 192.168.1.3", +// followed by "ADD container B with IP 192.168.1.3" followed by "DEL container A with IP +// 192.168.1.3", and we need to make sure that the DEL causes us to delete the rule for +// container A, and not the rule for container B.) +// +// It would be more nftables-y to have a chain with a single rule doing a lookup against a +// set with an element per mapping, rather than having a chain with a rule per mapping. +// But there's no easy, non-racy way to say "delete the element 192.168.1.3 from the set, +// but only if it was added for container A, not if it was added for container B". + +// hashForNetwork returns a unique hash for this network +func hashForNetwork(network string) string { + return utils.MustFormatHashWithPrefix(16, "", network) +} + +// hashForInstance returns a unique hash identifying the rules for this +// network/ifname/containerID +func hashForInstance(network, ifname, containerID string) string { + return hashForNetwork(network) + "-" + utils.MustFormatHashWithPrefix(16, "", ifname+":"+containerID) +} + +// commentForInstance returns a comment string that begins with a unique hash and +// ends with a (possibly-truncated) human-readable description. +func commentForInstance(network, ifname, containerID string) string { + comment := fmt.Sprintf("%s, net: %s, if: %s, id: %s", + hashForInstance(network, ifname, containerID), + strings.ReplaceAll(network, `"`, ``), + strings.ReplaceAll(ifname, `"`, ``), + strings.ReplaceAll(containerID, `"`, ``), + ) + if len(comment) > knftables.CommentLengthMax { + comment = comment[:knftables.CommentLengthMax] + } + return comment +} + +// setupIPMasqNFTables is the nftables-based implementation of SetupIPMasqForNetwork +func setupIPMasqNFTables(ipn *net.IPNet, network, ifname, containerID string) error { + nft, err := knftables.New(knftables.InetFamily, ipMasqTableName) + if err != nil { + return err + } + return setupIPMasqNFTablesWithInterface(nft, ipn, network, ifname, containerID) +} + +func setupIPMasqNFTablesWithInterface(nft knftables.Interface, ipn *net.IPNet, network, ifname, containerID string) error { + staleRules, err := findRules(nft, hashForInstance(network, ifname, containerID)) + if err != nil { + return err + } + + tx := nft.NewTransaction() + + // Ensure that our table and chains exist. + tx.Add(&knftables.Table{ + Comment: knftables.PtrTo("Masquerading for plugins from github.com/containernetworking/plugins"), + }) + tx.Add(&knftables.Chain{ + Name: ipMasqChainName, + Comment: knftables.PtrTo("Masquerade traffic from certain IPs to any (non-multicast) IP outside their subnet"), + }) + + // Ensure that the postrouting chain exists and has the correct rules. (Has to be + // done after creating ipMasqChainName, so we can jump to it.) + tx.Add(&knftables.Chain{ + Name: "postrouting", + Type: knftables.PtrTo(knftables.NATType), + Hook: knftables.PtrTo(knftables.PostroutingHook), + Priority: knftables.PtrTo(knftables.SNATPriority), + }) + tx.Flush(&knftables.Chain{ + Name: "postrouting", + }) + tx.Add(&knftables.Rule{ + Chain: "postrouting", + Rule: "ip daddr == 224.0.0.0/4 return", + }) + tx.Add(&knftables.Rule{ + Chain: "postrouting", + Rule: "ip6 daddr == ff00::/8 return", + }) + tx.Add(&knftables.Rule{ + Chain: "postrouting", + Rule: knftables.Concat( + "goto", ipMasqChainName, + ), + }) + + // Delete stale rules, add new rules to masquerade chain + for _, rule := range staleRules { + tx.Delete(rule) + } + ip := "ip" + if ipn.IP.To4() == nil { + ip = "ip6" + } + + // e.g. if ipn is "192.168.1.4/24", then dstNet is "192.168.1.0/24" + dstNet := &net.IPNet{IP: ipn.IP.Mask(ipn.Mask), Mask: ipn.Mask} + + tx.Add(&knftables.Rule{ + Chain: ipMasqChainName, + Rule: knftables.Concat( + ip, "saddr", "==", ipn.IP, + ip, "daddr", "!=", dstNet, + "masquerade", + ), + Comment: knftables.PtrTo(commentForInstance(network, ifname, containerID)), + }) + + return nft.Run(context.TODO(), tx) +} + +// teardownIPMasqNFTables is the nftables-based implementation of TeardownIPMasqForNetwork +func teardownIPMasqNFTables(ipn *net.IPNet, network, ifname, containerID string) error { + nft, err := knftables.New(knftables.InetFamily, ipMasqTableName) + if err != nil { + return err + } + return teardownIPMasqNFTablesWithInterface(nft, ipn, network, ifname, containerID) +} + +func teardownIPMasqNFTablesWithInterface(nft knftables.Interface, _ *net.IPNet, network, ifname, containerID string) error { + rules, err := findRules(nft, hashForInstance(network, ifname, containerID)) + if err != nil { + return err + } else if len(rules) == 0 { + return nil + } + + tx := nft.NewTransaction() + for _, rule := range rules { + tx.Delete(rule) + } + return nft.Run(context.TODO(), tx) +} + +// gcIPMasqNFTables is the nftables-based implementation of GCIPMasqForNetwork +func gcIPMasqNFTables(network string, attachments []types.GCAttachment) error { + nft, err := knftables.New(knftables.InetFamily, ipMasqTableName) + if err != nil { + return err + } + return gcIPMasqNFTablesWithInterface(nft, network, attachments) +} + +func gcIPMasqNFTablesWithInterface(nft knftables.Interface, network string, attachments []types.GCAttachment) error { + // Find all rules for the network + rules, err := findRules(nft, hashForNetwork(network)) + if err != nil { + return err + } else if len(rules) == 0 { + return nil + } + + // Compute the comments for all elements of attachments + validAttachments := map[string]bool{} + for _, attachment := range attachments { + validAttachments[commentForInstance(network, attachment.IfName, attachment.ContainerID)] = true + } + + // Delete anything in rules that isn't in validAttachments + tx := nft.NewTransaction() + for _, rule := range rules { + if !validAttachments[*rule.Comment] { + tx.Delete(rule) + } + } + return nft.Run(context.TODO(), tx) +} + +// findRules finds rules with comments that start with commentPrefix. +func findRules(nft knftables.Interface, commentPrefix string) ([]*knftables.Rule, error) { + rules, err := nft.ListRules(context.TODO(), ipMasqChainName) + if err != nil { + if knftables.IsNotFound(err) { + // If ipMasqChainName doesn't exist yet, that's fine + return nil, nil + } + return nil, err + } + + matchingRules := make([]*knftables.Rule, 0, 1) + for _, rule := range rules { + if rule.Comment != nil && strings.HasPrefix(*rule.Comment, commentPrefix) { + matchingRules = append(matchingRules, rule) + } + } + + return matchingRules, nil +} diff --git a/pkg/ip/ipmasq_nftables_linux_test.go b/pkg/ip/ipmasq_nftables_linux_test.go new file mode 100644 index 000000000..08b8bbe5d --- /dev/null +++ b/pkg/ip/ipmasq_nftables_linux_test.go @@ -0,0 +1,186 @@ +// Copyright 2023 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ip + +import ( + "strings" + "testing" + + "github.com/vishvananda/netlink" + "sigs.k8s.io/knftables" + + "github.com/containernetworking/cni/pkg/types" +) + +func Test_setupIPMasqNFTables(t *testing.T) { + nft := knftables.NewFake(knftables.InetFamily, ipMasqTableName) + + containers := []struct { + network string + ifname string + containerID string + addr string + }{ + { + network: "unit-test", + ifname: "eth0", + containerID: "one", + addr: "192.168.1.1/24", + }, + { + network: "unit-test", + ifname: "eth0", + containerID: "two", + addr: "192.168.1.2/24", + }, + { + network: "unit-test", + ifname: "eth0", + containerID: "three", + addr: "192.168.99.5/24", + }, + { + network: "alternate", + ifname: "net1", + containerID: "three", + addr: "10.0.0.5/24", + }, + } + + for _, c := range containers { + addr, err := netlink.ParseAddr(c.addr) + if err != nil { + t.Fatalf("failed to parse test addr: %v", err) + } + err = setupIPMasqNFTablesWithInterface(nft, addr.IPNet, c.network, c.ifname, c.containerID) + if err != nil { + t.Fatalf("error from setupIPMasqNFTables: %v", err) + } + } + + expected := strings.TrimSpace(` +add table inet cni_plugins_masquerade { comment "Masquerading for plugins from github.com/containernetworking/plugins" ; } +add chain inet cni_plugins_masquerade masq_checks { comment "Masquerade traffic from certain IPs to any (non-multicast) IP outside their subnet" ; } +add chain inet cni_plugins_masquerade postrouting { type nat hook postrouting priority 100 ; } +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.1.1 ip daddr != 192.168.1.0/24 masquerade comment "6fd94d501e58f0aa-287fc69eff0574a2, net: unit-test, if: eth0, id: one" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.1.2 ip daddr != 192.168.1.0/24 masquerade comment "6fd94d501e58f0aa-d750b2c8f0f25d5f, net: unit-test, if: eth0, id: two" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.99.5 ip daddr != 192.168.99.0/24 masquerade comment "6fd94d501e58f0aa-a4d4adb82b669cfe, net: unit-test, if: eth0, id: three" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 10.0.0.5 ip daddr != 10.0.0.0/24 masquerade comment "82783ef24bdc7036-acb19d111858e348, net: alternate, if: net1, id: three" +add rule inet cni_plugins_masquerade postrouting ip daddr == 224.0.0.0/4 return +add rule inet cni_plugins_masquerade postrouting ip6 daddr == ff00::/8 return +add rule inet cni_plugins_masquerade postrouting goto masq_checks +`) + dump := strings.TrimSpace(nft.Dump()) + if dump != expected { + t.Errorf("expected nftables state:\n%s\n\nactual:\n%s\n\n", expected, dump) + } + + // Add a new container reusing "one"'s address, before deleting "one" + addr, err := netlink.ParseAddr(containers[0].addr) + if err != nil { + t.Fatalf("failed to parse test addr: %v", err) + } + err = setupIPMasqNFTablesWithInterface(nft, addr.IPNet, "unit-test", "eth0", "four") + if err != nil { + t.Fatalf("error from setupIPMasqNFTables: %v", err) + } + + // Remove "one" + c := containers[0] + addr, err = netlink.ParseAddr(c.addr) + if err != nil { + t.Fatalf("failed to parse test addr: %v", err) + } + err = teardownIPMasqNFTablesWithInterface(nft, addr.IPNet, c.network, c.ifname, c.containerID) + if err != nil { + t.Fatalf("error from teardownIPMasqNFTables: %v", err) + } + + // Check that "one" was deleted (and "four" wasn't) + expected = strings.TrimSpace(` +add table inet cni_plugins_masquerade { comment "Masquerading for plugins from github.com/containernetworking/plugins" ; } +add chain inet cni_plugins_masquerade masq_checks { comment "Masquerade traffic from certain IPs to any (non-multicast) IP outside their subnet" ; } +add chain inet cni_plugins_masquerade postrouting { type nat hook postrouting priority 100 ; } +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.1.2 ip daddr != 192.168.1.0/24 masquerade comment "6fd94d501e58f0aa-d750b2c8f0f25d5f, net: unit-test, if: eth0, id: two" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.99.5 ip daddr != 192.168.99.0/24 masquerade comment "6fd94d501e58f0aa-a4d4adb82b669cfe, net: unit-test, if: eth0, id: three" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 10.0.0.5 ip daddr != 10.0.0.0/24 masquerade comment "82783ef24bdc7036-acb19d111858e348, net: alternate, if: net1, id: three" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.1.1 ip daddr != 192.168.1.0/24 masquerade comment "6fd94d501e58f0aa-e766de567ef6c543, net: unit-test, if: eth0, id: four" +add rule inet cni_plugins_masquerade postrouting ip daddr == 224.0.0.0/4 return +add rule inet cni_plugins_masquerade postrouting ip6 daddr == ff00::/8 return +add rule inet cni_plugins_masquerade postrouting goto masq_checks +`) + dump = strings.TrimSpace(nft.Dump()) + if dump != expected { + t.Errorf("expected nftables state:\n%s\n\nactual:\n%s\n\n", expected, dump) + } + + // GC "four" from the "unit-test" network + err = gcIPMasqNFTablesWithInterface(nft, "unit-test", []types.GCAttachment{ + {IfName: "eth0", ContainerID: "two"}, + {IfName: "eth0", ContainerID: "three"}, + // (irrelevant extra element) + {IfName: "eth0", ContainerID: "one"}, + }) + if err != nil { + t.Fatalf("error from gcIPMasqNFTables: %v", err) + } + // GC the "alternate" network without removing anything + err = gcIPMasqNFTablesWithInterface(nft, "alternate", []types.GCAttachment{ + {IfName: "net1", ContainerID: "three"}, + }) + if err != nil { + t.Fatalf("error from gcIPMasqNFTables: %v", err) + } + + // Re-dump + expected = strings.TrimSpace(` +add table inet cni_plugins_masquerade { comment "Masquerading for plugins from github.com/containernetworking/plugins" ; } +add chain inet cni_plugins_masquerade masq_checks { comment "Masquerade traffic from certain IPs to any (non-multicast) IP outside their subnet" ; } +add chain inet cni_plugins_masquerade postrouting { type nat hook postrouting priority 100 ; } +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.1.2 ip daddr != 192.168.1.0/24 masquerade comment "6fd94d501e58f0aa-d750b2c8f0f25d5f, net: unit-test, if: eth0, id: two" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 192.168.99.5 ip daddr != 192.168.99.0/24 masquerade comment "6fd94d501e58f0aa-a4d4adb82b669cfe, net: unit-test, if: eth0, id: three" +add rule inet cni_plugins_masquerade masq_checks ip saddr == 10.0.0.5 ip daddr != 10.0.0.0/24 masquerade comment "82783ef24bdc7036-acb19d111858e348, net: alternate, if: net1, id: three" +add rule inet cni_plugins_masquerade postrouting ip daddr == 224.0.0.0/4 return +add rule inet cni_plugins_masquerade postrouting ip6 daddr == ff00::/8 return +add rule inet cni_plugins_masquerade postrouting goto masq_checks +`) + dump = strings.TrimSpace(nft.Dump()) + if dump != expected { + t.Errorf("expected nftables state:\n%s\n\nactual:\n%s\n\n", expected, dump) + } + + // GC everything + err = gcIPMasqNFTablesWithInterface(nft, "unit-test", []types.GCAttachment{}) + if err != nil { + t.Fatalf("error from gcIPMasqNFTables: %v", err) + } + err = gcIPMasqNFTablesWithInterface(nft, "alternate", []types.GCAttachment{}) + if err != nil { + t.Fatalf("error from gcIPMasqNFTables: %v", err) + } + + expected = strings.TrimSpace(` +add table inet cni_plugins_masquerade { comment "Masquerading for plugins from github.com/containernetworking/plugins" ; } +add chain inet cni_plugins_masquerade masq_checks { comment "Masquerade traffic from certain IPs to any (non-multicast) IP outside their subnet" ; } +add chain inet cni_plugins_masquerade postrouting { type nat hook postrouting priority 100 ; } +add rule inet cni_plugins_masquerade postrouting ip daddr == 224.0.0.0/4 return +add rule inet cni_plugins_masquerade postrouting ip6 daddr == ff00::/8 return +add rule inet cni_plugins_masquerade postrouting goto masq_checks +`) + dump = strings.TrimSpace(nft.Dump()) + if dump != expected { + t.Errorf("expected nftables state:\n%s\n\nactual:\n%s\n\n", expected, dump) + } +} diff --git a/pkg/utils/netfilter.go b/pkg/utils/netfilter.go new file mode 100644 index 000000000..1fa391404 --- /dev/null +++ b/pkg/utils/netfilter.go @@ -0,0 +1,46 @@ +// Copyright 2023 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "github.com/coreos/go-iptables/iptables" + "sigs.k8s.io/knftables" +) + +// SupportsIPTables tests whether the system supports using netfilter via the iptables API +// (whether via "iptables-legacy" or "iptables-nft"). (Note that this returns true if it +// is *possible* to use iptables; it does not test whether any other components on the +// system are *actually* using iptables.) +func SupportsIPTables() bool { + ipt, err := iptables.NewWithProtocol(iptables.ProtocolIPv4) + if err != nil { + return false + } + // We don't care whether the chain actually exists, only whether we can *check* + // whether it exists. + _, err = ipt.ChainExists("filter", "INPUT") + return err == nil +} + +// SupportsNFTables tests whether the system supports using netfilter via the nftables API +// (ie, not via "iptables-nft"). (Note that this returns true if it is *possible* to use +// nftables; it does not test whether any other components on the system are *actually* +// using nftables.) +func SupportsNFTables() bool { + // knftables.New() does sanity checks so we don't need any further test like in + // the iptables case. + _, err := knftables.New(knftables.IPv4Family, "supports_nftables_test") + return err == nil +} diff --git a/pkg/utils/netfilter_test.go b/pkg/utils/netfilter_test.go new file mode 100644 index 000000000..d035ad387 --- /dev/null +++ b/pkg/utils/netfilter_test.go @@ -0,0 +1,52 @@ +// Copyright 2023 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "os" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("netfilter support", func() { + When("it is available", func() { + It("reports that iptables is supported", func() { + Expect(SupportsIPTables()).To(BeTrue(), "This test should only fail if iptables is not available, but the test suite as a whole requires it to be available.") + }) + It("reports that nftables is supported", func() { + Expect(SupportsNFTables()).To(BeTrue(), "This test should only fail if nftables is not available, but the test suite as a whole requires it to be available.") + }) + }) + + // These are Serial because os.Setenv has process-wide effect + When("it is not available", Serial, func() { + var origPath string + BeforeEach(func() { + origPath = os.Getenv("PATH") + os.Setenv("PATH", "/does-not-exist") + }) + AfterEach(func() { + os.Setenv("PATH", origPath) + }) + + It("reports that iptables is not supported", func() { + Expect(SupportsIPTables()).To(BeFalse(), "found iptables outside of PATH??") + }) + It("reports that nftables is not supported", func() { + Expect(SupportsNFTables()).To(BeFalse(), "found nftables outside of PATH??") + }) + }) +}) diff --git a/plugins/main/bridge/bridge.go b/plugins/main/bridge/bridge.go index 49d7b04db..1c4fa59a6 100644 --- a/plugins/main/bridge/bridge.go +++ b/plugins/main/bridge/bridge.go @@ -35,7 +35,6 @@ import ( "github.com/containernetworking/plugins/pkg/ipam" "github.com/containernetworking/plugins/pkg/link" "github.com/containernetworking/plugins/pkg/ns" - "github.com/containernetworking/plugins/pkg/utils" bv "github.com/containernetworking/plugins/pkg/utils/buildversion" "github.com/containernetworking/plugins/pkg/utils/sysctl" ) @@ -52,6 +51,7 @@ type NetConf struct { IsDefaultGW bool `json:"isDefaultGateway"` ForceAddress bool `json:"forceAddress"` IPMasq bool `json:"ipMasq"` + IPMasqBackend *string `json:"ipMasqBackend,omitempty"` MTU int `json:"mtu"` HairpinMode bool `json:"hairpinMode"` PromiscMode bool `json:"promiscMode"` @@ -673,10 +673,8 @@ func cmdAdd(args *skel.CmdArgs) error { } if n.IPMasq { - chain := utils.FormatChainName(n.Name, args.ContainerID) - comment := utils.FormatComment(n.Name, args.ContainerID) for _, ipc := range result.IPs { - if err = ip.SetupIPMasq(&ipc.Address, chain, comment); err != nil { + if err = ip.SetupIPMasqForNetwork(n.IPMasqBackend, &ipc.Address, n.Name, args.IfName, args.ContainerID); err != nil { return err } } @@ -814,10 +812,8 @@ func cmdDel(args *skel.CmdArgs) error { } if isLayer3 && n.IPMasq { - chain := utils.FormatChainName(n.Name, args.ContainerID) - comment := utils.FormatComment(n.Name, args.ContainerID) for _, ipn := range ipnets { - if err := ip.TeardownIPMasq(ipn, chain, comment); err != nil { + if err := ip.TeardownIPMasqForNetwork(ipn, n.Name, args.IfName, args.ContainerID); err != nil { return err } } diff --git a/plugins/main/bridge/bridge_test.go b/plugins/main/bridge/bridge_test.go index 15d419118..c625ab098 100644 --- a/plugins/main/bridge/bridge_test.go +++ b/plugins/main/bridge/bridge_test.go @@ -15,6 +15,7 @@ package main import ( + "context" "encoding/json" "fmt" "net" @@ -27,6 +28,7 @@ import ( . "github.com/onsi/gomega" "github.com/vishvananda/netlink" "github.com/vishvananda/netlink/nl" + "sigs.k8s.io/knftables" "github.com/containernetworking/cni/pkg/skel" "github.com/containernetworking/cni/pkg/types" @@ -77,6 +79,7 @@ type testCase struct { vlanTrunk []*VlanTrunk removeDefaultVlan bool ipMasq bool + ipMasqBackend string macspoofchk bool disableContIface bool @@ -172,6 +175,9 @@ const ( ipMasqConfStr = `, "ipMasq": %t` + ipMasqBackendConfStr = `, + "ipMasqBackend": "%s"` + // Single subnet configuration (legacy) subnetConfStr = `, "subnet": "%s"` @@ -243,6 +249,9 @@ func (tc testCase) netConfJSON(dataDir string) string { if tc.ipMasq { conf += tc.ipMasqConfig() } + if tc.ipMasqBackend != "" { + conf += tc.ipMasqBackendConfig() + } if tc.args.cni.mac != "" { conf += fmt.Sprintf(argsFormat, tc.args.cni.mac) } @@ -295,6 +304,11 @@ func (tc testCase) ipMasqConfig() string { return conf } +func (tc testCase) ipMasqBackendConfig() string { + conf := fmt.Sprintf(ipMasqBackendConfStr, tc.ipMasqBackend) + return conf +} + func (tc testCase) rangesConfig() string { conf := rangesStartStr for i, tcRange := range tc.ranges { @@ -2390,41 +2404,82 @@ var _ = Describe("bridge Operations", func() { }) if testutils.SpecVersionHasChaining(ver) { - It(fmt.Sprintf("[%s] configures a bridge and ipMasq rules", ver), func() { - err := originalNS.Do(func(ns.NetNS) error { - defer GinkgoRecover() - tc := testCase{ - ranges: []rangeInfo{{ - subnet: "10.1.2.0/24", - }}, - ipMasq: true, - cniVersion: ver, - } - - args := tc.createCmdArgs(originalNS, dataDir) - r, _, err := testutils.CmdAddWithArgs(args, func() error { - return cmdAdd(args) - }) - Expect(err).NotTo(HaveOccurred()) - result, err := types100.GetResult(r) - Expect(err).NotTo(HaveOccurred()) - Expect(result.IPs).Should(HaveLen(1)) - - ipt, err := iptables.NewWithProtocol(iptables.ProtocolIPv4) - Expect(err).NotTo(HaveOccurred()) - - rules, err := ipt.List("nat", "POSTROUTING") - Expect(err).NotTo(HaveOccurred()) - Expect(rules).Should(ContainElement(ContainSubstring(result.IPs[0].Address.IP.String()))) + for _, tc := range []testCase{ + { + ranges: []rangeInfo{{ + subnet: "10.1.2.0/24", + }}, + ipMasq: true, + cniVersion: ver, + }, + { + ranges: []rangeInfo{{ + subnet: "10.1.2.0/24", + }}, + ipMasq: true, + ipMasqBackend: "iptables", + cniVersion: ver, + }, + { + ranges: []rangeInfo{{ + subnet: "10.1.2.0/24", + }}, + ipMasq: true, + ipMasqBackend: "nftables", + cniVersion: ver, + }, + } { + tc := tc + It(fmt.Sprintf("[%s] configures a bridge and ipMasq rules with ipMasqBackend %q", ver, tc.ipMasqBackend), func() { + err := originalNS.Do(func(ns.NetNS) error { + defer GinkgoRecover() + + args := tc.createCmdArgs(originalNS, dataDir) + r, _, err := testutils.CmdAddWithArgs(args, func() error { + return cmdAdd(args) + }) + Expect(err).NotTo(HaveOccurred()) + result, err := types100.GetResult(r) + Expect(err).NotTo(HaveOccurred()) + Expect(result.IPs).Should(HaveLen(1)) + + ip := result.IPs[0].Address.IP.String() + + // Update this if the default ipmasq backend changes + switch tc.ipMasqBackend { + case "iptables", "": + ipt, err := iptables.NewWithProtocol(iptables.ProtocolIPv4) + Expect(err).NotTo(HaveOccurred()) + + rules, err := ipt.List("nat", "POSTROUTING") + Expect(err).NotTo(HaveOccurred()) + Expect(rules).Should(ContainElement(ContainSubstring(ip))) + case "nftables": + nft, err := knftables.New(knftables.InetFamily, "cni_plugins_masquerade") + Expect(err).NotTo(HaveOccurred()) + rules, err := nft.ListRules(context.TODO(), "masq_checks") + Expect(err).NotTo(HaveOccurred()) + // FIXME: ListRules() doesn't return the actual rule strings, + // and we can't easily compute the ipmasq plugin's comment. + comments := 0 + for _, r := range rules { + if r.Comment != nil { + comments++ + break + } + } + Expect(comments).To(Equal(1), "expected to find exactly one Rule with a comment") + } - err = testutils.CmdDelWithArgs(args, func() error { - return cmdDel(args) + err = testutils.CmdDelWithArgs(args, func() error { + return cmdDel(args) + }) + Expect(err).NotTo(HaveOccurred()) + return nil }) Expect(err).NotTo(HaveOccurred()) - return nil }) - Expect(err).NotTo(HaveOccurred()) - }) + } for i, tc := range []testCase{ { diff --git a/plugins/main/ptp/ptp.go b/plugins/main/ptp/ptp.go index 7213b9c6b..0ac5e6093 100644 --- a/plugins/main/ptp/ptp.go +++ b/plugins/main/ptp/ptp.go @@ -31,7 +31,6 @@ import ( "github.com/containernetworking/plugins/pkg/ip" "github.com/containernetworking/plugins/pkg/ipam" "github.com/containernetworking/plugins/pkg/ns" - "github.com/containernetworking/plugins/pkg/utils" bv "github.com/containernetworking/plugins/pkg/utils/buildversion" ) @@ -44,8 +43,9 @@ func init() { type NetConf struct { types.NetConf - IPMasq bool `json:"ipMasq"` - MTU int `json:"mtu"` + IPMasq bool `json:"ipMasq"` + IPMasqBackend *string `json:"ipMasqBackend,omitempty"` + MTU int `json:"mtu"` } func setupContainerVeth(netns ns.NetNS, ifName string, mtu int, pr *current.Result) (*current.Interface, *current.Interface, error) { @@ -229,10 +229,8 @@ func cmdAdd(args *skel.CmdArgs) error { } if conf.IPMasq { - chain := utils.FormatChainName(conf.Name, args.ContainerID) - comment := utils.FormatComment(conf.Name, args.ContainerID) for _, ipc := range result.IPs { - if err = ip.SetupIPMasq(&ipc.Address, chain, comment); err != nil { + if err = ip.SetupIPMasqForNetwork(conf.IPMasqBackend, &ipc.Address, conf.Name, args.IfName, args.ContainerID); err != nil { return err } } @@ -293,10 +291,8 @@ func cmdDel(args *skel.CmdArgs) error { } if len(ipnets) != 0 && conf.IPMasq { - chain := utils.FormatChainName(conf.Name, args.ContainerID) - comment := utils.FormatComment(conf.Name, args.ContainerID) for _, ipn := range ipnets { - err = ip.TeardownIPMasq(ipn, chain, comment) + err = ip.TeardownIPMasqForNetwork(ipn, conf.Name, args.IfName, args.ContainerID) } } diff --git a/plugins/main/ptp/ptp_test.go b/plugins/main/ptp/ptp_test.go index 0ede4409e..bdc78b13e 100644 --- a/plugins/main/ptp/ptp_test.go +++ b/plugins/main/ptp/ptp_test.go @@ -39,6 +39,7 @@ type Net struct { CNIVersion string `json:"cniVersion"` Type string `json:"type,omitempty"` IPMasq bool `json:"ipMasq"` + IPMasqBackend *string `json:"ipMasqBackend,omitempty"` MTU int `json:"mtu"` IPAM *allocator.IPAMConfig `json:"ipam"` DNS types.DNS `json:"dns"` @@ -368,6 +369,62 @@ var _ = Describe("ptp Operations", func() { doTest(conf, ver, 1, dnsConf, targetNS) }) + It(fmt.Sprintf("[%s] configures and deconfigures a ptp link when specifying ipMasqBackend: iptables", ver), func() { + dnsConf := types.DNS{ + Nameservers: []string{"10.1.2.123"}, + Domain: "some.domain.test", + Search: []string{"search.test"}, + Options: []string{"option1:foo"}, + } + dnsConfBytes, err := json.Marshal(dnsConf) + Expect(err).NotTo(HaveOccurred()) + + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "mynet", + "type": "ptp", + "ipMasq": true, + "ipMasqBackend": "iptables", + "mtu": 5000, + "ipam": { + "type": "host-local", + "subnet": "10.1.2.0/24", + "dataDir": "%s" + }, + "dns": %s + }`, ver, dataDir, string(dnsConfBytes)) + + doTest(conf, ver, 1, dnsConf, targetNS) + }) + + It(fmt.Sprintf("[%s] configures and deconfigures a ptp link when specifying ipMasqBackend: nftables", ver), func() { + dnsConf := types.DNS{ + Nameservers: []string{"10.1.2.123"}, + Domain: "some.domain.test", + Search: []string{"search.test"}, + Options: []string{"option1:foo"}, + } + dnsConfBytes, err := json.Marshal(dnsConf) + Expect(err).NotTo(HaveOccurred()) + + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "mynet", + "type": "ptp", + "ipMasq": true, + "ipMasqBackend": "nftables", + "mtu": 5000, + "ipam": { + "type": "host-local", + "subnet": "10.1.2.0/24", + "dataDir": "%s" + }, + "dns": %s + }`, ver, dataDir, string(dnsConfBytes)) + + doTest(conf, ver, 1, dnsConf, targetNS) + }) + It(fmt.Sprintf("[%s] configures and deconfigures a dual-stack ptp link + routes with ADD/DEL", ver), func() { conf := fmt.Sprintf(`{ "cniVersion": "%s", diff --git a/plugins/meta/portmap/main.go b/plugins/meta/portmap/main.go index 108c1f592..02df34f9b 100644 --- a/plugins/meta/portmap/main.go +++ b/plugins/meta/portmap/main.go @@ -37,9 +37,22 @@ import ( "github.com/containernetworking/cni/pkg/types" current "github.com/containernetworking/cni/pkg/types/100" "github.com/containernetworking/cni/pkg/version" + "github.com/containernetworking/plugins/pkg/utils" bv "github.com/containernetworking/plugins/pkg/utils/buildversion" ) +type PortMapper interface { + forwardPorts(config *PortMapConf, containerNet net.IPNet) error + checkPorts(config *PortMapConf, containerNet net.IPNet) error + unforwardPorts(config *PortMapConf) error +} + +// These are vars rather than consts so we can "&" them +var ( + iptablesBackend = "iptables" + nftablesBackend = "nftables" +) + // PortMapEntry corresponds to a single entry in the port_mappings argument, // see CONVENTIONS.md type PortMapEntry struct { @@ -51,16 +64,23 @@ type PortMapEntry struct { type PortMapConf struct { types.NetConf - SNAT *bool `json:"snat,omitempty"` - ConditionsV4 *[]string `json:"conditionsV4"` - ConditionsV6 *[]string `json:"conditionsV6"` - MasqAll bool `json:"masqAll,omitempty"` - MarkMasqBit *int `json:"markMasqBit"` - ExternalSetMarkChain *string `json:"externalSetMarkChain"` - RuntimeConfig struct { + + mapper PortMapper + + // Generic config + Backend *string `json:"backend,omitempty"` + SNAT *bool `json:"snat,omitempty"` + ConditionsV4 *[]string `json:"conditionsV4"` + ConditionsV6 *[]string `json:"conditionsV6"` + MasqAll bool `json:"masqAll,omitempty"` + MarkMasqBit *int `json:"markMasqBit"` + RuntimeConfig struct { PortMaps []PortMapEntry `json:"portMappings,omitempty"` } `json:"runtimeConfig,omitempty"` + // iptables-backend-specific config + ExternalSetMarkChain *string `json:"externalSetMarkChain"` + // These are fields parsed out of the config or the environment; // included here for convenience ContainerID string `json:"-"` @@ -89,7 +109,7 @@ func cmdAdd(args *skel.CmdArgs) error { netConf.ContainerID = args.ContainerID if netConf.ContIPv4.IP != nil { - if err := forwardPorts(netConf, netConf.ContIPv4); err != nil { + if err := netConf.mapper.forwardPorts(netConf, netConf.ContIPv4); err != nil { return err } // Delete conntrack entries for UDP to avoid conntrack blackholing traffic @@ -98,10 +118,21 @@ func cmdAdd(args *skel.CmdArgs) error { if err := deletePortmapStaleConnections(netConf.RuntimeConfig.PortMaps, unix.AF_INET); err != nil { log.Printf("failed to delete stale UDP conntrack entries for %s: %v", netConf.ContIPv4.IP, err) } + + if *netConf.SNAT { + // Set the route_localnet bit on the host interface, so that + // 127/8 can cross a routing boundary. + hostIfName := getRoutableHostIF(netConf.ContIPv4.IP) + if hostIfName != "" { + if err := enableLocalnetRouting(hostIfName); err != nil { + return fmt.Errorf("unable to enable route_localnet: %v", err) + } + } + } } if netConf.ContIPv6.IP != nil { - if err := forwardPorts(netConf, netConf.ContIPv6); err != nil { + if err := netConf.mapper.forwardPorts(netConf, netConf.ContIPv6); err != nil { return err } // Delete conntrack entries for UDP to avoid conntrack blackholing traffic @@ -130,7 +161,7 @@ func cmdDel(args *skel.CmdArgs) error { // We don't need to parse out whether or not we're using v6 or snat, // deletion is idempotent - return unforwardPorts(netConf) + return netConf.mapper.unforwardPorts(netConf) } func main() { @@ -161,13 +192,13 @@ func cmdCheck(args *skel.CmdArgs) error { conf.ContainerID = args.ContainerID if conf.ContIPv4.IP != nil { - if err := checkPorts(conf, conf.ContIPv4); err != nil { + if err := conf.mapper.checkPorts(conf, conf.ContIPv4); err != nil { return err } } if conf.ContIPv6.IP != nil { - if err := checkPorts(conf, conf.ContIPv6); err != nil { + if err := conf.mapper.checkPorts(conf, conf.ContIPv6); err != nil { return err } } @@ -197,6 +228,8 @@ func parseConfig(stdin []byte, ifName string) (*PortMapConf, *current.Result, er } } + conf.mapper = &portMapperIPTables{} + if conf.SNAT == nil { tvar := true conf.SNAT = &tvar @@ -215,6 +248,21 @@ func parseConfig(stdin []byte, ifName string) (*PortMapConf, *current.Result, er return nil, nil, fmt.Errorf("MasqMarkBit must be between 0 and 31") } + err := ensureBackend(&conf) + if err != nil { + return nil, nil, err + } + switch *conf.Backend { + case iptablesBackend: + conf.mapper = &portMapperIPTables{} + + case nftablesBackend: + conf.mapper = &portMapperNFTables{} + + default: + return nil, nil, fmt.Errorf("unrecognized backend %q", *conf.Backend) + } + // Reject invalid port numbers for _, pm := range conf.RuntimeConfig.PortMaps { if pm.ContainerPort <= 0 { @@ -254,3 +302,58 @@ func parseConfig(stdin []byte, ifName string) (*PortMapConf, *current.Result, er return &conf, result, nil } + +// ensureBackend validates and/or sets conf.Backend +func ensureBackend(conf *PortMapConf) error { + backendConfig := make(map[string][]string) + + if conf.ExternalSetMarkChain != nil { + backendConfig[iptablesBackend] = append(backendConfig[iptablesBackend], "externalSetMarkChain") + } + if conditionsBackend := detectBackendOfConditions(conf.ConditionsV4); conditionsBackend != "" { + backendConfig[conditionsBackend] = append(backendConfig[conditionsBackend], "conditionsV4") + } + if conditionsBackend := detectBackendOfConditions(conf.ConditionsV6); conditionsBackend != "" { + backendConfig[conditionsBackend] = append(backendConfig[conditionsBackend], "conditionsV6") + } + + // If backend wasn't requested explicitly, default to iptables, unless it is not + // available (and nftables is). FIXME: flip this default at some point. + if conf.Backend == nil { + if !utils.SupportsIPTables() && utils.SupportsNFTables() { + conf.Backend = &nftablesBackend + } else { + conf.Backend = &iptablesBackend + } + } + + // Make sure we dont have config for the wrong backend + var wrongBackend string + if *conf.Backend == iptablesBackend { + wrongBackend = nftablesBackend + } else { + wrongBackend = iptablesBackend + } + if len(backendConfig[wrongBackend]) > 0 { + return fmt.Errorf("%s backend was requested but configuration contains %s-specific options %v", *conf.Backend, wrongBackend, backendConfig[wrongBackend]) + } + + // OK + return nil +} + +// detectBackendOfConditions returns "iptables" if conditions contains iptables +// conditions, "nftables" if it contains nftables conditions, and "" if it is empty. +func detectBackendOfConditions(conditions *[]string) string { + if conditions == nil || len(*conditions) == 0 || (*conditions)[0] == "" { + return "" + } + + // The first token of any iptables condition would start with a hyphen (e.g. "-d", + // "--sport", "-m"). No nftables condition would start that way. (An nftables + // condition might include a negative number, but not as the first token.) + if (*conditions)[0][0] == '-' { + return iptablesBackend + } + return nftablesBackend +} diff --git a/plugins/meta/portmap/portmap.go b/plugins/meta/portmap/portmap_iptables.go similarity index 93% rename from plugins/meta/portmap/portmap.go rename to plugins/meta/portmap/portmap_iptables.go index e380da932..07d6c4209 100644 --- a/plugins/meta/portmap/portmap.go +++ b/plugins/meta/portmap/portmap_iptables.go @@ -25,7 +25,6 @@ import ( "github.com/vishvananda/netlink" "github.com/containernetworking/plugins/pkg/utils" - "github.com/containernetworking/plugins/pkg/utils/sysctl" ) // This creates the chains to be added to iptables. The basic structure is @@ -52,9 +51,11 @@ const ( OldTopLevelSNATChainName = "CNI-HOSTPORT-SNAT" ) +type portMapperIPTables struct{} + // forwardPorts establishes port forwarding to a given container IP. // containerNet.IP can be either v4 or v6. -func forwardPorts(config *PortMapConf, containerNet net.IPNet) error { +func (*portMapperIPTables) forwardPorts(config *PortMapConf, containerNet net.IPNet) error { isV6 := (containerNet.IP.To4() == nil) var ipt *iptables.IPTables @@ -87,17 +88,6 @@ func forwardPorts(config *PortMapConf, containerNet net.IPNet) error { return fmt.Errorf("unable to create chain %s: %v", setMarkChain.name, err) } } - - if !isV6 { - // Set the route_localnet bit on the host interface, so that - // 127/8 can cross a routing boundary. - hostIfName := getRoutableHostIF(containerNet.IP) - if hostIfName != "" { - if err := enableLocalnetRouting(hostIfName); err != nil { - return fmt.Errorf("unable to enable route_localnet: %v", err) - } - } - } } // Generate the DNAT (actual port forwarding) rules @@ -117,7 +107,7 @@ func forwardPorts(config *PortMapConf, containerNet net.IPNet) error { return nil } -func checkPorts(config *PortMapConf, containerNet net.IPNet) error { +func (*portMapperIPTables) checkPorts(config *PortMapConf, containerNet net.IPNet) error { isV6 := (containerNet.IP.To4() == nil) dnatChain := genDnatChain(config.Name, config.ContainerID) fillDnatRules(&dnatChain, config, containerNet) @@ -344,14 +334,6 @@ func genMarkMasqChain(markBit int) chain { return ch } -// enableLocalnetRouting tells the kernel not to treat 127/8 as a martian, -// so that connections with a source ip of 127/8 can cross a routing boundary. -func enableLocalnetRouting(ifName string) error { - routeLocalnetPath := "net/ipv4/conf/" + ifName + "/route_localnet" - _, err := sysctl.Sysctl(routeLocalnetPath, "1") - return err -} - // genOldSnatChain is no longer used, but used to be created. We'll try and // tear it down in case the plugin version changed between ADD and DEL func genOldSnatChain(netName, containerID string) chain { @@ -372,7 +354,7 @@ func genOldSnatChain(netName, containerID string) chain { // don't know which protocols were used. // So, we first check that iptables is "generally OK" by doing a check. If // not, we ignore the error, unless neither v4 nor v6 are OK. -func unforwardPorts(config *PortMapConf) error { +func (*portMapperIPTables) unforwardPorts(config *PortMapConf) error { dnatChain := genDnatChain(config.Name, config.ContainerID) // Might be lying around from old versions diff --git a/plugins/meta/portmap/portmap_iptables_test.go b/plugins/meta/portmap/portmap_iptables_test.go new file mode 100644 index 000000000..bc9bbf223 --- /dev/null +++ b/plugins/meta/portmap/portmap_iptables_test.go @@ -0,0 +1,252 @@ +// Copyright 2017 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/containernetworking/cni/pkg/types" +) + +var _ = Describe("portmapping configuration (iptables)", func() { + netName := "testNetName" + containerID := "icee6giejonei6sohng6ahngee7laquohquee9shiGo7fohferakah3Feiyoolu2pei7ciPhoh7shaoX6vai3vuf0ahfaeng8yohb9ceu0daez5hashee8ooYai5wa3y" + + for _, ver := range []string{"0.3.0", "0.3.1", "0.4.0", "1.0.0"} { + // Redefine ver inside for scope so real value is picked up by each dynamically defined It() + // See Gingkgo's "Patterns for dynamically generating tests" documentation. + ver := ver + + Describe("Generating iptables chains", func() { + Context("for DNAT", func() { + It(fmt.Sprintf("[%s] generates a correct standard container chain", ver), func() { + ch := genDnatChain(netName, containerID) + + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-DN-bfd599665540dd91d5d28", + entryChains: []string{TopLevelDNATChainName}, + })) + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s", + "runtimeConfig": { + "portMappings": [ + { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"}, + { "hostPort": 8081, "containerPort": 80, "protocol": "tcp"}, + { "hostPort": 8080, "containerPort": 81, "protocol": "udp"}, + { "hostPort": 8082, "containerPort": 82, "protocol": "udp"}, + { "hostPort": 8083, "containerPort": 83, "protocol": "tcp", "hostIP": "192.168.0.2"}, + { "hostPort": 8084, "containerPort": 84, "protocol": "tcp", "hostIP": "0.0.0.0"}, + { "hostPort": 8085, "containerPort": 85, "protocol": "tcp", "hostIP": "2001:db8:a::1"}, + { "hostPort": 8086, "containerPort": 86, "protocol": "tcp", "hostIP": "::"} + ] + }, + "snat": true, + "conditionsV4": ["-a", "b"], + "conditionsV6": ["-c", "d"] + }`, ver)) + + conf, _, err := parseConfig(configBytes, "foo") + Expect(err).NotTo(HaveOccurred()) + conf.ContainerID = containerID + + ch = genDnatChain(conf.Name, containerID) + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-DN-67e92b96e692a494b6b85", + entryChains: []string{"CNI-HOSTPORT-DNAT"}, + })) + + n, err := types.ParseCIDR("10.0.0.2/24") + Expect(err).NotTo(HaveOccurred()) + fillDnatRules(&ch, conf, *n) + + Expect(ch.entryRules).To(Equal([][]string{ + { + "-m", "comment", "--comment", + fmt.Sprintf("dnat name: \"test\" id: \"%s\"", containerID), + "-m", "multiport", + "-p", "tcp", + "--destination-ports", "8080,8081,8083,8084,8085,8086", + "-a", "b", + }, + { + "-m", "comment", "--comment", + fmt.Sprintf("dnat name: \"test\" id: \"%s\"", containerID), + "-m", "multiport", + "-p", "udp", + "--destination-ports", "8080,8082", + "-a", "b", + }, + })) + + Expect(ch.rules).To(Equal([][]string{ + // tcp rules and not hostIP + {"-p", "tcp", "--dport", "8080", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8080", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + {"-p", "tcp", "--dport", "8081", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8081", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + // udp rules and not hostIP + {"-p", "udp", "--dport", "8080", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8080", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:81"}, + {"-p", "udp", "--dport", "8082", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8082", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "10.0.0.2:82"}, + // tcp rules and hostIP + {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-j", "DNAT", "--to-destination", "10.0.0.2:83"}, + // tcp rules and hostIP = "0.0.0.0" + {"-p", "tcp", "--dport", "8084", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8084", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8084", "-j", "DNAT", "--to-destination", "10.0.0.2:84"}, + })) + + ch.rules = nil + ch.entryRules = nil + + n, err = types.ParseCIDR("2001:db8::2/64") + Expect(err).NotTo(HaveOccurred()) + fillDnatRules(&ch, conf, *n) + + Expect(ch.rules).To(Equal([][]string{ + // tcp rules and not hostIP + {"-p", "tcp", "--dport", "8080", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "[2001:db8::2]:80"}, + {"-p", "tcp", "--dport", "8081", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "[2001:db8::2]:80"}, + // udp rules and not hostIP + {"-p", "udp", "--dport", "8080", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "[2001:db8::2]:81"}, + {"-p", "udp", "--dport", "8082", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "[2001:db8::2]:82"}, + // tcp rules and hostIP + {"-p", "tcp", "--dport", "8085", "-d", "2001:db8:a::1", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8085", "-d", "2001:db8:a::1", "-j", "DNAT", "--to-destination", "[2001:db8::2]:85"}, + // tcp rules and hostIP = "::" + {"-p", "tcp", "--dport", "8086", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, + {"-p", "tcp", "--dport", "8086", "-j", "DNAT", "--to-destination", "[2001:db8::2]:86"}, + })) + + // Disable snat, generate rules + ch.rules = nil + ch.entryRules = nil + fvar := false + conf.SNAT = &fvar + + n, err = types.ParseCIDR("10.0.0.2/24") + Expect(err).NotTo(HaveOccurred()) + fillDnatRules(&ch, conf, *n) + Expect(ch.rules).To(Equal([][]string{ + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:81"}, + {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "10.0.0.2:82"}, + {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-j", "DNAT", "--to-destination", "10.0.0.2:83"}, + {"-p", "tcp", "--dport", "8084", "-j", "DNAT", "--to-destination", "10.0.0.2:84"}, + })) + }) + + It(fmt.Sprintf("[%s] generates a correct chain with external mark", ver), func() { + ch := genDnatChain(netName, containerID) + + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-DN-bfd599665540dd91d5d28", + entryChains: []string{TopLevelDNATChainName}, + })) + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s", + "runtimeConfig": { + "portMappings": [ + { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"} + ] + }, + "externalSetMarkChain": "PLZ-SET-MARK", + "conditionsV4": ["-a", "b"], + "conditionsV6": ["-c", "d"] + }`, ver)) + + conf, _, err := parseConfig(configBytes, "foo") + Expect(err).NotTo(HaveOccurred()) + conf.ContainerID = containerID + + ch = genDnatChain(conf.Name, containerID) + n, err := types.ParseCIDR("10.0.0.2/24") + Expect(err).NotTo(HaveOccurred()) + fillDnatRules(&ch, conf, *n) + Expect(ch.rules).To(Equal([][]string{ + {"-p", "tcp", "--dport", "8080", "-s", "10.0.0.2/24", "-j", "PLZ-SET-MARK"}, + {"-p", "tcp", "--dport", "8080", "-s", "127.0.0.1", "-j", "PLZ-SET-MARK"}, + {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, + })) + }) + + It(fmt.Sprintf("[%s] generates a correct top-level chain", ver), func() { + ch := genToplevelDnatChain() + + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-HOSTPORT-DNAT", + entryChains: []string{"PREROUTING", "OUTPUT"}, + entryRules: [][]string{{"-m", "addrtype", "--dst-type", "LOCAL"}}, + })) + }) + + It(fmt.Sprintf("[%s] generates the correct mark chains", ver), func() { + masqBit := 5 + ch := genSetMarkChain(masqBit) + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-HOSTPORT-SETMARK", + rules: [][]string{{ + "-m", "comment", + "--comment", "CNI portfwd masquerade mark", + "-j", "MARK", + "--set-xmark", "0x20/0x20", + }}, + })) + + ch = genMarkMasqChain(masqBit) + Expect(ch).To(Equal(chain{ + table: "nat", + name: "CNI-HOSTPORT-MASQ", + entryChains: []string{"POSTROUTING"}, + entryRules: [][]string{{ + "-m", "comment", + "--comment", "CNI portfwd requiring masquerade", + }}, + rules: [][]string{{ + "-m", "mark", + "--mark", "0x20/0x20", + "-j", "MASQUERADE", + }}, + prependEntry: true, + })) + }) + }) + }) + } +}) diff --git a/plugins/meta/portmap/portmap_nftables.go b/plugins/meta/portmap/portmap_nftables.go new file mode 100644 index 000000000..fca2fdf82 --- /dev/null +++ b/plugins/meta/portmap/portmap_nftables.go @@ -0,0 +1,341 @@ +// Copyright 2023 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "net" + + "sigs.k8s.io/knftables" +) + +const ( + tableName = "cni_hostport" + + hostIPHostPortsChain = "hostip_hostports" + hostPortsChain = "hostports" + masqueradingChain = "masquerading" +) + +// The nftables portmap implementation is fairly similar to the iptables implementation: +// we add a rule for each mapping, with a comment containing a hash of the container ID, +// so that we can later reliably delete the rules we want. (This is important because in +// edge cases, it's possible the plugin might see "ADD container A with IP 192.168.1.3", +// followed by "ADD container B with IP 192.168.1.3" followed by "DEL container A with IP +// 192.168.1.3", and we need to make sure that the DEL causes us to delete the rule for +// container A, and not the rule for container B.) This iptables implementation actually +// uses a separate chain per container but there's not really any need for that... +// +// As with pkg/ip/ipmasq_nftables_linux.go, it would be more nftables-y to have a chain +// with a single rule doing a lookup against a map with an element per mapping, rather +// than having a chain with a rule per mapping. But there's no easy, non-racy way to say +// "delete the element 192.168.1.3 from the map, but only if it was added for container A, +// not if it was added for container B". + +type portMapperNFTables struct { + ipv4 knftables.Interface + ipv6 knftables.Interface +} + +// getPortMapNFT creates an nftables.Interface for port mapping for the IP family of ipn +func (pmNFT *portMapperNFTables) getPortMapNFT(ipv6 bool) (knftables.Interface, error) { + var err error + if ipv6 { + if pmNFT.ipv6 == nil { + pmNFT.ipv6, err = knftables.New(knftables.IPv6Family, tableName) + if err != nil { + return nil, err + } + } + return pmNFT.ipv6, nil + } + + if pmNFT.ipv4 == nil { + pmNFT.ipv4, err = knftables.New(knftables.IPv4Family, tableName) + if err != nil { + return nil, err + } + } + return pmNFT.ipv4, err +} + +// forwardPorts establishes port forwarding to a given container IP. +// containerNet.IP can be either v4 or v6. +func (pmNFT *portMapperNFTables) forwardPorts(config *PortMapConf, containerNet net.IPNet) error { + isV6 := (containerNet.IP.To4() == nil) + nft, err := pmNFT.getPortMapNFT(isV6) + if err != nil { + return err + } + + var ipX string + var conditions []string + if isV6 { + ipX = "ip6" + if config.ConditionsV6 != nil { + conditions = *config.ConditionsV6 + } + } else if !isV6 { + ipX = "ip" + if config.ConditionsV4 != nil { + conditions = *config.ConditionsV4 + } + } + + tx := nft.NewTransaction() + + // Ensure basic rule structure + tx.Add(&knftables.Table{ + Comment: knftables.PtrTo("CNI portmap plugin"), + }) + + tx.Add(&knftables.Chain{ + Name: "hostports", + }) + tx.Add(&knftables.Chain{ + Name: "hostip_hostports", + }) + + tx.Add(&knftables.Chain{ + Name: "input", + Type: knftables.PtrTo(knftables.NATType), + Hook: knftables.PtrTo(knftables.InputHook), + Priority: knftables.PtrTo(knftables.DNATPriority), + }) + tx.Flush(&knftables.Chain{ + Name: "input", + }) + tx.Add(&knftables.Rule{ + Chain: "input", + Rule: knftables.Concat( + conditions, + "jump", hostIPHostPortsChain, + ), + }) + tx.Add(&knftables.Rule{ + Chain: "input", + Rule: knftables.Concat( + conditions, + "jump", hostPortsChain, + ), + }) + + tx.Add(&knftables.Chain{ + Name: "output", + Type: knftables.PtrTo(knftables.NATType), + Hook: knftables.PtrTo(knftables.OutputHook), + Priority: knftables.PtrTo(knftables.DNATPriority), + }) + tx.Flush(&knftables.Chain{ + Name: "output", + }) + tx.Add(&knftables.Rule{ + Chain: "output", + Rule: knftables.Concat( + conditions, + "jump", hostIPHostPortsChain, + ), + }) + tx.Add(&knftables.Rule{ + Chain: "output", + Rule: knftables.Concat( + conditions, + "fib daddr type local", + "jump", hostPortsChain, + ), + }) + + if *config.SNAT { + tx.Add(&knftables.Chain{ + Name: masqueradingChain, + Type: knftables.PtrTo(knftables.NATType), + Hook: knftables.PtrTo(knftables.PostroutingHook), + Priority: knftables.PtrTo(knftables.SNATPriority), + }) + } + + // Set up this container + for _, e := range config.RuntimeConfig.PortMaps { + useHostIP := false + if e.HostIP != "" { + hostIP := net.ParseIP(e.HostIP) + isHostV6 := (hostIP.To4() == nil) + // Ignore wrong-IP-family HostIPs + if isV6 != isHostV6 { + continue + } + + // Unspecified addresses cannot be used as destination + useHostIP = !hostIP.IsUnspecified() + } + + if useHostIP { + tx.Add(&knftables.Rule{ + Chain: hostIPHostPortsChain, + Rule: knftables.Concat( + ipX, "daddr", e.HostIP, + ipX, "protocol", e.Protocol, + "th dport", e.HostPort, + "dnat", ipX, "addr . port", "to", containerNet.IP, ".", e.ContainerPort, + ), + Comment: &config.ContainerID, + }) + } else { + tx.Add(&knftables.Rule{ + Chain: hostPortsChain, + Rule: knftables.Concat( + ipX, "protocol", e.Protocol, + "th dport", e.HostPort, + "dnat", ipX, "addr . port", "to", containerNet.IP, ".", e.ContainerPort, + ), + Comment: &config.ContainerID, + }) + } + } + + if *config.SNAT { + // Add mark-to-masquerade rules for hairpin and localhost + // In theory we should validate that the original dst IP and port are as + // expected, but *any* traffic matching one of these patterns would need + // to be masqueraded to be able to work correctly anyway. + tx.Add(&knftables.Rule{ + Chain: masqueradingChain, + Rule: knftables.Concat( + ipX, "saddr", containerNet.IP, + ipX, "daddr", containerNet.IP, + "masquerade", + ), + Comment: &config.ContainerID, + }) + if !isV6 { + tx.Add(&knftables.Rule{ + Chain: masqueradingChain, + Rule: knftables.Concat( + ipX, "saddr 127.0.0.1", + ipX, "daddr", containerNet.IP, + "masquerade", + ), + Comment: &config.ContainerID, + }) + } + } + + err = nft.Run(context.TODO(), tx) + if err != nil { + return fmt.Errorf("unable to set up nftables rules for port mappings: %v", err) + } + + return nil +} + +func (pmNFT *portMapperNFTables) checkPorts(config *PortMapConf, containerNet net.IPNet) error { + isV6 := (containerNet.IP.To4() == nil) + + var hostPorts, hostIPHostPorts, masqueradings int + for _, e := range config.RuntimeConfig.PortMaps { + if e.HostIP != "" { + hostIPHostPorts++ + } else { + hostPorts++ + } + } + if *config.SNAT { + masqueradings = len(config.RuntimeConfig.PortMaps) + if isV6 { + masqueradings *= 2 + } + } + + nft, err := pmNFT.getPortMapNFT(isV6) + if err != nil { + return err + } + if hostPorts > 0 { + err := checkPortsAgainstRules(nft, hostPortsChain, config.ContainerID, hostPorts) + if err != nil { + return err + } + } + if hostIPHostPorts > 0 { + err := checkPortsAgainstRules(nft, hostIPHostPortsChain, config.ContainerID, hostIPHostPorts) + if err != nil { + return err + } + } + if masqueradings > 0 { + err := checkPortsAgainstRules(nft, masqueradingChain, config.ContainerID, masqueradings) + if err != nil { + return err + } + } + + return nil +} + +func checkPortsAgainstRules(nft knftables.Interface, chain, comment string, nPorts int) error { + rules, err := nft.ListRules(context.TODO(), chain) + if err != nil { + return err + } + + found := 0 + for _, r := range rules { + if r.Comment != nil && *r.Comment == comment { + found++ + } + } + if found < nPorts { + return fmt.Errorf("missing hostport rules in %q chain", chain) + } + + return nil +} + +// unforwardPorts deletes any nftables rules created by this plugin. +// It should be idempotent - it will not error if the chain does not exist. +func (pmNFT *portMapperNFTables) unforwardPorts(config *PortMapConf) error { + // Always clear both IPv4 and IPv6, just to be sure + for _, family := range []knftables.Family{knftables.IPv4Family, knftables.IPv6Family} { + nft, err := pmNFT.getPortMapNFT(family == knftables.IPv6Family) + if err != nil { + continue + } + + tx := nft.NewTransaction() + for _, chain := range []string{hostPortsChain, hostIPHostPortsChain, masqueradingChain} { + rules, err := nft.ListRules(context.TODO(), chain) + if err != nil { + if knftables.IsNotFound(err) { + continue + } + return fmt.Errorf("could not list rules in table %s: %w", tableName, err) + } + + for _, r := range rules { + if r.Comment != nil && *r.Comment == config.ContainerID { + tx.Delete(r) + } + } + } + + err = nft.Run(context.TODO(), tx) + if err != nil { + return fmt.Errorf("error deleting nftables rules: %w", err) + } + } + + return nil +} diff --git a/plugins/meta/portmap/portmap_nftables_test.go b/plugins/meta/portmap/portmap_nftables_test.go new file mode 100644 index 000000000..fc8d0fb38 --- /dev/null +++ b/plugins/meta/portmap/portmap_nftables_test.go @@ -0,0 +1,134 @@ +// Copyright 2023 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/knftables" + + "github.com/containernetworking/cni/pkg/types" +) + +var _ = Describe("portmapping configuration (nftables)", func() { + containerID := "icee6giejonei6so" + + for _, ver := range []string{"0.3.0", "0.3.1", "0.4.0", "1.0.0"} { + // Redefine ver inside for scope so real value is picked up by each dynamically defined It() + // See Gingkgo's "Patterns for dynamically generating tests" documentation. + ver := ver + + Describe("nftables rules", func() { + var pmNFT *portMapperNFTables + var ipv4Fake, ipv6Fake *knftables.Fake + BeforeEach(func() { + ipv4Fake = knftables.NewFake(knftables.IPv4Family, tableName) + ipv6Fake = knftables.NewFake(knftables.IPv6Family, tableName) + pmNFT = &portMapperNFTables{ + ipv4: ipv4Fake, + ipv6: ipv6Fake, + } + }) + + It(fmt.Sprintf("[%s] generates correct rules on ADD", ver), func() { + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s", + "backend": "nftables", + "runtimeConfig": { + "portMappings": [ + { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"}, + { "hostPort": 8081, "containerPort": 80, "protocol": "tcp"}, + { "hostPort": 8080, "containerPort": 81, "protocol": "udp"}, + { "hostPort": 8082, "containerPort": 82, "protocol": "udp"}, + { "hostPort": 8083, "containerPort": 83, "protocol": "tcp", "hostIP": "192.168.0.2"}, + { "hostPort": 8084, "containerPort": 84, "protocol": "tcp", "hostIP": "0.0.0.0"}, + { "hostPort": 8085, "containerPort": 85, "protocol": "tcp", "hostIP": "2001:db8:a::1"}, + { "hostPort": 8086, "containerPort": 86, "protocol": "tcp", "hostIP": "::"} + ] + }, + "snat": true, + "conditionsV4": ["a", "b"], + "conditionsV6": ["c", "d"] + }`, ver)) + + conf, _, err := parseConfig(configBytes, "foo") + Expect(err).NotTo(HaveOccurred()) + conf.ContainerID = containerID + + containerNet, err := types.ParseCIDR("10.0.0.2/24") + Expect(err).NotTo(HaveOccurred()) + + err = pmNFT.forwardPorts(conf, *containerNet) + Expect(err).NotTo(HaveOccurred()) + + expectedRules := strings.TrimSpace(` +add table ip cni_hostport { comment "CNI portmap plugin" ; } +add chain ip cni_hostport hostip_hostports +add chain ip cni_hostport hostports +add chain ip cni_hostport input { type nat hook input priority -100 ; } +add chain ip cni_hostport masquerading { type nat hook postrouting priority 100 ; } +add chain ip cni_hostport output { type nat hook output priority -100 ; } +add rule ip cni_hostport hostip_hostports ip daddr 192.168.0.2 ip protocol tcp th dport 8083 dnat ip addr . port to 10.0.0.2 . 83 comment "icee6giejonei6so" +add rule ip cni_hostport hostports ip protocol tcp th dport 8080 dnat ip addr . port to 10.0.0.2 . 80 comment "icee6giejonei6so" +add rule ip cni_hostport hostports ip protocol tcp th dport 8081 dnat ip addr . port to 10.0.0.2 . 80 comment "icee6giejonei6so" +add rule ip cni_hostport hostports ip protocol udp th dport 8080 dnat ip addr . port to 10.0.0.2 . 81 comment "icee6giejonei6so" +add rule ip cni_hostport hostports ip protocol udp th dport 8082 dnat ip addr . port to 10.0.0.2 . 82 comment "icee6giejonei6so" +add rule ip cni_hostport hostports ip protocol tcp th dport 8084 dnat ip addr . port to 10.0.0.2 . 84 comment "icee6giejonei6so" +add rule ip cni_hostport input a b jump hostip_hostports +add rule ip cni_hostport input a b jump hostports +add rule ip cni_hostport masquerading ip saddr 10.0.0.2 ip daddr 10.0.0.2 masquerade comment "icee6giejonei6so" +add rule ip cni_hostport masquerading ip saddr 127.0.0.1 ip daddr 10.0.0.2 masquerade comment "icee6giejonei6so" +add rule ip cni_hostport output a b jump hostip_hostports +add rule ip cni_hostport output a b fib daddr type local jump hostports +`) + actualRules := strings.TrimSpace(ipv4Fake.Dump()) + Expect(actualRules).To(Equal(expectedRules)) + + // Disable snat, generate IPv6 rules + *conf.SNAT = false + containerNet, err = types.ParseCIDR("2001:db8::2/64") + Expect(err).NotTo(HaveOccurred()) + + err = pmNFT.forwardPorts(conf, *containerNet) + Expect(err).NotTo(HaveOccurred()) + + expectedRules = strings.TrimSpace(` +add table ip6 cni_hostport { comment "CNI portmap plugin" ; } +add chain ip6 cni_hostport hostip_hostports +add chain ip6 cni_hostport hostports +add chain ip6 cni_hostport input { type nat hook input priority -100 ; } +add chain ip6 cni_hostport output { type nat hook output priority -100 ; } +add rule ip6 cni_hostport hostip_hostports ip6 daddr 2001:db8:a::1 ip6 protocol tcp th dport 8085 dnat ip6 addr . port to 2001:db8::2 . 85 comment "icee6giejonei6so" +add rule ip6 cni_hostport hostports ip6 protocol tcp th dport 8080 dnat ip6 addr . port to 2001:db8::2 . 80 comment "icee6giejonei6so" +add rule ip6 cni_hostport hostports ip6 protocol tcp th dport 8081 dnat ip6 addr . port to 2001:db8::2 . 80 comment "icee6giejonei6so" +add rule ip6 cni_hostport hostports ip6 protocol udp th dport 8080 dnat ip6 addr . port to 2001:db8::2 . 81 comment "icee6giejonei6so" +add rule ip6 cni_hostport hostports ip6 protocol udp th dport 8082 dnat ip6 addr . port to 2001:db8::2 . 82 comment "icee6giejonei6so" +add rule ip6 cni_hostport hostports ip6 protocol tcp th dport 8086 dnat ip6 addr . port to 2001:db8::2 . 86 comment "icee6giejonei6so" +add rule ip6 cni_hostport input c d jump hostip_hostports +add rule ip6 cni_hostport input c d jump hostports +add rule ip6 cni_hostport output c d jump hostip_hostports +add rule ip6 cni_hostport output c d fib daddr type local jump hostports +`) + actualRules = strings.TrimSpace(ipv6Fake.Dump()) + Expect(actualRules).To(Equal(expectedRules)) + }) + }) + } +}) diff --git a/plugins/meta/portmap/portmap_test.go b/plugins/meta/portmap/portmap_test.go index 4897fe632..7cf10f944 100644 --- a/plugins/meta/portmap/portmap_test.go +++ b/plugins/meta/portmap/portmap_test.go @@ -24,9 +24,6 @@ import ( ) var _ = Describe("portmapping configuration", func() { - netName := "testNetName" - containerID := "icee6giejonei6sohng6ahngee7laquohquee9shiGo7fohferakah3Feiyoolu2pei7ciPhoh7shaoX6vai3vuf0ahfaeng8yohb9ceu0daez5hashee8ooYai5wa3y" - for _, ver := range []string{"0.3.0", "0.3.1", "0.4.0", "1.0.0"} { // Redefine ver inside for scope so real value is picked up by each dynamically defined It() // See Gingkgo's "Patterns for dynamically generating tests" documentation. @@ -38,6 +35,7 @@ var _ = Describe("portmapping configuration", func() { "name": "test", "type": "portmap", "cniVersion": "%s", + "backend": "iptables", "runtimeConfig": { "portMappings": [ { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"}, @@ -45,8 +43,8 @@ var _ = Describe("portmapping configuration", func() { ] }, "snat": false, - "conditionsV4": ["a", "b"], - "conditionsV6": ["c", "d"], + "conditionsV4": ["-s", "1.2.3.4"], + "conditionsV6": ["-s", "12::34"], "prevResult": { "interfaces": [ {"name": "host"}, @@ -77,8 +75,8 @@ var _ = Describe("portmapping configuration", func() { c, _, err := parseConfig(configBytes, "container") Expect(err).NotTo(HaveOccurred()) Expect(c.CNIVersion).To(Equal(ver)) - Expect(c.ConditionsV4).To(Equal(&[]string{"a", "b"})) - Expect(c.ConditionsV6).To(Equal(&[]string{"c", "d"})) + Expect(c.ConditionsV4).To(Equal(&[]string{"-s", "1.2.3.4"})) + Expect(c.ConditionsV6).To(Equal(&[]string{"-s", "12::34"})) fvar := false Expect(c.SNAT).To(Equal(&fvar)) Expect(c.Name).To(Equal("test")) @@ -97,15 +95,16 @@ var _ = Describe("portmapping configuration", func() { "name": "test", "type": "portmap", "cniVersion": "%s", + "backend": "iptables", "snat": false, - "conditionsV4": ["a", "b"], - "conditionsV6": ["c", "d"] + "conditionsV4": ["-s", "1.2.3.4"], + "conditionsV6": ["-s", "12::34"] }`, ver)) c, _, err := parseConfig(configBytes, "container") Expect(err).NotTo(HaveOccurred()) Expect(c.CNIVersion).To(Equal(ver)) - Expect(c.ConditionsV4).To(Equal(&[]string{"a", "b"})) - Expect(c.ConditionsV6).To(Equal(&[]string{"c", "d"})) + Expect(c.ConditionsV4).To(Equal(&[]string{"-s", "1.2.3.4"})) + Expect(c.ConditionsV6).To(Equal(&[]string{"-s", "12::34"})) fvar := false Expect(c.SNAT).To(Equal(&fvar)) Expect(c.Name).To(Equal("test")) @@ -116,9 +115,10 @@ var _ = Describe("portmapping configuration", func() { "name": "test", "type": "portmap", "cniVersion": "%s", + "backend": "iptables", "snat": false, - "conditionsV4": ["a", "b"], - "conditionsV6": ["c", "d"], + "conditionsV4": ["-s", "1.2.3.4"], + "conditionsV6": ["-s", "12::34"], "runtimeConfig": { "portMappings": [ { "hostPort": 0, "containerPort": 80, "protocol": "tcp"} @@ -129,6 +129,82 @@ var _ = Describe("portmapping configuration", func() { Expect(err).To(MatchError("Invalid host port number: 0")) }) + It(fmt.Sprintf("[%s] defaults to iptables when backend is not specified", ver), func() { + // "defaults to iptables" is only true if iptables is installed + // (or if neither iptables nor nftables is installed), but the + // other unit tests would fail if iptables wasn't installed, so + // we know it must be. + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s" + }`, ver)) + c, _, err := parseConfig(configBytes, "container") + Expect(err).NotTo(HaveOccurred()) + Expect(c.CNIVersion).To(Equal(ver)) + Expect(c.Backend).To(Equal(&iptablesBackend)) + Expect(c.Name).To(Equal("test")) + }) + + It(fmt.Sprintf("[%s] uses nftables if requested", ver), func() { + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s", + "backend": "nftables" + }`, ver)) + c, _, err := parseConfig(configBytes, "container") + Expect(err).NotTo(HaveOccurred()) + Expect(c.CNIVersion).To(Equal(ver)) + Expect(c.Backend).To(Equal(&nftablesBackend)) + Expect(c.Name).To(Equal("test")) + }) + + It(fmt.Sprintf("[%s] allows nftables conditions if nftables is requested", ver), func() { + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s", + "backend": "nftables", + "conditionsV4": ["ip", "saddr", "1.2.3.4"], + "conditionsV6": ["ip6", "saddr", "12::34"] + }`, ver)) + c, _, err := parseConfig(configBytes, "container") + Expect(err).NotTo(HaveOccurred()) + Expect(c.CNIVersion).To(Equal(ver)) + Expect(c.Backend).To(Equal(&nftablesBackend)) + Expect(c.ConditionsV4).To(Equal(&[]string{"ip", "saddr", "1.2.3.4"})) + Expect(c.ConditionsV6).To(Equal(&[]string{"ip6", "saddr", "12::34"})) + Expect(c.Name).To(Equal("test")) + }) + + It(fmt.Sprintf("[%s] rejects nftables options with 'backend: iptables'", ver), func() { + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s", + "backend": "iptables", + "conditionsV4": ["ip", "saddr", "1.2.3.4"], + "conditionsV6": ["ip6", "saddr", "12::34"] + }`, ver)) + _, _, err := parseConfig(configBytes, "container") + Expect(err).To(MatchError("iptables backend was requested but configuration contains nftables-specific options [conditionsV4 conditionsV6]")) + }) + + It(fmt.Sprintf("[%s] rejects iptables options with 'backend: nftables'", ver), func() { + configBytes := []byte(fmt.Sprintf(`{ + "name": "test", + "type": "portmap", + "cniVersion": "%s", + "backend": "nftables", + "externalSetMarkChain": "KUBE-MARK-MASQ", + "conditionsV4": ["-s", "1.2.3.4"], + "conditionsV6": ["-s", "12::34"] + }`, ver)) + _, _, err := parseConfig(configBytes, "container") + Expect(err).To(MatchError("nftables backend was requested but configuration contains iptables-specific options [externalSetMarkChain conditionsV4 conditionsV6]")) + }) + It(fmt.Sprintf("[%s] does not fail on missing prevResult interface index", ver), func() { configBytes := []byte(fmt.Sprintf(`{ "name": "test", @@ -139,7 +215,7 @@ var _ = Describe("portmapping configuration", func() { { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"} ] }, - "conditionsV4": ["a", "b"], + "conditionsV4": ["-s", "1.2.3.4"], "prevResult": { "interfaces": [ {"name": "host"} @@ -157,222 +233,5 @@ var _ = Describe("portmapping configuration", func() { Expect(err).NotTo(HaveOccurred()) }) }) - - Describe("Generating chains", func() { - Context("for DNAT", func() { - It(fmt.Sprintf("[%s] generates a correct standard container chain", ver), func() { - ch := genDnatChain(netName, containerID) - - Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-DN-bfd599665540dd91d5d28", - entryChains: []string{TopLevelDNATChainName}, - })) - configBytes := []byte(fmt.Sprintf(`{ - "name": "test", - "type": "portmap", - "cniVersion": "%s", - "runtimeConfig": { - "portMappings": [ - { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"}, - { "hostPort": 8081, "containerPort": 80, "protocol": "tcp"}, - { "hostPort": 8080, "containerPort": 81, "protocol": "udp"}, - { "hostPort": 8082, "containerPort": 82, "protocol": "udp"}, - { "hostPort": 8083, "containerPort": 83, "protocol": "tcp", "hostIP": "192.168.0.2"}, - { "hostPort": 8084, "containerPort": 84, "protocol": "tcp", "hostIP": "0.0.0.0"}, - { "hostPort": 8085, "containerPort": 85, "protocol": "tcp", "hostIP": "2001:db8:a::1"}, - { "hostPort": 8086, "containerPort": 86, "protocol": "tcp", "hostIP": "::"} - ] - }, - "snat": true, - "conditionsV4": ["a", "b"], - "conditionsV6": ["c", "d"] - }`, ver)) - - conf, _, err := parseConfig(configBytes, "foo") - Expect(err).NotTo(HaveOccurred()) - conf.ContainerID = containerID - - ch = genDnatChain(conf.Name, containerID) - Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-DN-67e92b96e692a494b6b85", - entryChains: []string{"CNI-HOSTPORT-DNAT"}, - })) - - n, err := types.ParseCIDR("10.0.0.2/24") - Expect(err).NotTo(HaveOccurred()) - fillDnatRules(&ch, conf, *n) - - Expect(ch.entryRules).To(Equal([][]string{ - { - "-m", "comment", "--comment", - fmt.Sprintf("dnat name: \"test\" id: \"%s\"", containerID), - "-m", "multiport", - "-p", "tcp", - "--destination-ports", "8080,8081,8083,8084,8085,8086", - "a", "b", - }, - { - "-m", "comment", "--comment", - fmt.Sprintf("dnat name: \"test\" id: \"%s\"", containerID), - "-m", "multiport", - "-p", "udp", - "--destination-ports", "8080,8082", - "a", "b", - }, - })) - - Expect(ch.rules).To(Equal([][]string{ - // tcp rules and not hostIP - {"-p", "tcp", "--dport", "8080", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8080", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, - {"-p", "tcp", "--dport", "8081", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8081", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, - // udp rules and not hostIP - {"-p", "udp", "--dport", "8080", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "udp", "--dport", "8080", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:81"}, - {"-p", "udp", "--dport", "8082", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "udp", "--dport", "8082", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "10.0.0.2:82"}, - // tcp rules and hostIP - {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-j", "DNAT", "--to-destination", "10.0.0.2:83"}, - // tcp rules and hostIP = "0.0.0.0" - {"-p", "tcp", "--dport", "8084", "-s", "10.0.0.2/24", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8084", "-s", "127.0.0.1", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8084", "-j", "DNAT", "--to-destination", "10.0.0.2:84"}, - })) - - ch.rules = nil - ch.entryRules = nil - - n, err = types.ParseCIDR("2001:db8::2/64") - Expect(err).NotTo(HaveOccurred()) - fillDnatRules(&ch, conf, *n) - - Expect(ch.rules).To(Equal([][]string{ - // tcp rules and not hostIP - {"-p", "tcp", "--dport", "8080", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "[2001:db8::2]:80"}, - {"-p", "tcp", "--dport", "8081", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "[2001:db8::2]:80"}, - // udp rules and not hostIP - {"-p", "udp", "--dport", "8080", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "[2001:db8::2]:81"}, - {"-p", "udp", "--dport", "8082", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "[2001:db8::2]:82"}, - // tcp rules and hostIP - {"-p", "tcp", "--dport", "8085", "-d", "2001:db8:a::1", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8085", "-d", "2001:db8:a::1", "-j", "DNAT", "--to-destination", "[2001:db8::2]:85"}, - // tcp rules and hostIP = "::" - {"-p", "tcp", "--dport", "8086", "-s", "2001:db8::2/64", "-j", "CNI-HOSTPORT-SETMARK"}, - {"-p", "tcp", "--dport", "8086", "-j", "DNAT", "--to-destination", "[2001:db8::2]:86"}, - })) - - // Disable snat, generate rules - ch.rules = nil - ch.entryRules = nil - fvar := false - conf.SNAT = &fvar - - n, err = types.ParseCIDR("10.0.0.2/24") - Expect(err).NotTo(HaveOccurred()) - fillDnatRules(&ch, conf, *n) - Expect(ch.rules).To(Equal([][]string{ - {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, - {"-p", "tcp", "--dport", "8081", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, - {"-p", "udp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:81"}, - {"-p", "udp", "--dport", "8082", "-j", "DNAT", "--to-destination", "10.0.0.2:82"}, - {"-p", "tcp", "--dport", "8083", "-d", "192.168.0.2", "-j", "DNAT", "--to-destination", "10.0.0.2:83"}, - {"-p", "tcp", "--dport", "8084", "-j", "DNAT", "--to-destination", "10.0.0.2:84"}, - })) - }) - - It(fmt.Sprintf("[%s] generates a correct chain with external mark", ver), func() { - ch := genDnatChain(netName, containerID) - - Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-DN-bfd599665540dd91d5d28", - entryChains: []string{TopLevelDNATChainName}, - })) - configBytes := []byte(fmt.Sprintf(`{ - "name": "test", - "type": "portmap", - "cniVersion": "%s", - "runtimeConfig": { - "portMappings": [ - { "hostPort": 8080, "containerPort": 80, "protocol": "tcp"} - ] - }, - "externalSetMarkChain": "PLZ-SET-MARK", - "conditionsV4": ["a", "b"], - "conditionsV6": ["c", "d"] - }`, ver)) - - conf, _, err := parseConfig(configBytes, "foo") - Expect(err).NotTo(HaveOccurred()) - conf.ContainerID = containerID - - ch = genDnatChain(conf.Name, containerID) - n, err := types.ParseCIDR("10.0.0.2/24") - Expect(err).NotTo(HaveOccurred()) - fillDnatRules(&ch, conf, *n) - Expect(ch.rules).To(Equal([][]string{ - {"-p", "tcp", "--dport", "8080", "-s", "10.0.0.2/24", "-j", "PLZ-SET-MARK"}, - {"-p", "tcp", "--dport", "8080", "-s", "127.0.0.1", "-j", "PLZ-SET-MARK"}, - {"-p", "tcp", "--dport", "8080", "-j", "DNAT", "--to-destination", "10.0.0.2:80"}, - })) - }) - - It(fmt.Sprintf("[%s] generates a correct top-level chain", ver), func() { - ch := genToplevelDnatChain() - - Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-HOSTPORT-DNAT", - entryChains: []string{"PREROUTING", "OUTPUT"}, - entryRules: [][]string{{"-m", "addrtype", "--dst-type", "LOCAL"}}, - })) - }) - - It(fmt.Sprintf("[%s] generates the correct mark chains", ver), func() { - masqBit := 5 - ch := genSetMarkChain(masqBit) - Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-HOSTPORT-SETMARK", - rules: [][]string{{ - "-m", "comment", - "--comment", "CNI portfwd masquerade mark", - "-j", "MARK", - "--set-xmark", "0x20/0x20", - }}, - })) - - ch = genMarkMasqChain(masqBit) - Expect(ch).To(Equal(chain{ - table: "nat", - name: "CNI-HOSTPORT-MASQ", - entryChains: []string{"POSTROUTING"}, - entryRules: [][]string{{ - "-m", "comment", - "--comment", "CNI portfwd requiring masquerade", - }}, - rules: [][]string{{ - "-m", "mark", - "--mark", "0x20/0x20", - "-j", "MASQUERADE", - }}, - prependEntry: true, - })) - }) - }) - }) } }) diff --git a/plugins/meta/portmap/utils.go b/plugins/meta/portmap/utils.go index e6709089d..922705579 100644 --- a/plugins/meta/portmap/utils.go +++ b/plugins/meta/portmap/utils.go @@ -21,6 +21,8 @@ import ( "strings" "github.com/vishvananda/netlink" + + "github.com/containernetworking/plugins/pkg/utils/sysctl" ) // fmtIpPort correctly formats ip:port literals for iptables and ip6tables - @@ -52,6 +54,14 @@ func getRoutableHostIF(containerIP net.IP) string { return "" } +// enableLocalnetRouting tells the kernel not to treat 127/8 as a martian, +// so that connections with a source ip of 127/8 can cross a routing boundary. +func enableLocalnetRouting(ifName string) error { + routeLocalnetPath := "net/ipv4/conf/" + ifName + "/route_localnet" + _, err := sysctl.Sysctl(routeLocalnetPath, "1") + return err +} + // groupByProto groups port numbers by protocol func groupByProto(entries []PortMapEntry) map[string][]int { if len(entries) == 0 { diff --git a/vendor/modules.txt b/vendor/modules.txt index 76928f717..04d65189d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -267,3 +267,6 @@ google.golang.org/protobuf/types/known/anypb # gopkg.in/yaml.v3 v3.0.1 ## explicit gopkg.in/yaml.v3 +# sigs.k8s.io/knftables v0.0.17 +## explicit; go 1.20 +sigs.k8s.io/knftables diff --git a/vendor/sigs.k8s.io/knftables/.gitignore b/vendor/sigs.k8s.io/knftables/.gitignore new file mode 100644 index 000000000..896d5783b --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/.gitignore @@ -0,0 +1,2 @@ +*~ +hack/bin/golangci-lint diff --git a/vendor/sigs.k8s.io/knftables/CHANGELOG.md b/vendor/sigs.k8s.io/knftables/CHANGELOG.md new file mode 100644 index 000000000..4f1dc3a35 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/CHANGELOG.md @@ -0,0 +1,170 @@ +# ChangeLog + +## v0.0.17 + +- `ListRules()` now accepts `""` for the chain name, meaning to list + all rules in the table. (`@caseydavenport`) + +- `ListElements()` now handles elements with prefix/CIDR values (e.g., + `"192.168.0.0/16"`; these are represented specially in the JSON + format and the old code didn't handle them). (`@caseydavenport`) + +- Added `NumOperations()` to `Transaction` (which lets you figure out + belatedly whether you added anything to the transaction or not, and + could also be used for metrics). (`@fasaxc`) + +- `knftables.Interface` now reuses the same `bytes.Buffer` for each + call to `nft` rather than constructing a new one each time, saving + time and memory. (`@aroradaman`) + +- Fixed map element deletion in `knftables.Fake` to not mistakenly + require that you fill in the `.Value` of the element. (`@npinaeva`) + +- Added `Fake.LastTransaction`, to retrieve the most-recently-executed + transaction. (`@npinaeva`) + +## v0.0.16 + +- Fixed a bug in `Fake.ParseDump()` when using IPv6. (`@npinaeva`) + +## v0.0.15 + +- knftables now requires the nft binary to be v1.0.1 or later. This is + because earlier versions (a) had bugs that might cause them to crash + when parsing rules created by later versions of nft, and (b) always + parsed the entire ruleset at startup, even if you were only trying + to operate on a single table. The combination of those two factors + means that older versions of nft can't reliably be used from inside + a container. (`@danwinship`) + +- Fixed a bug that meant we were never setting comments on + tables/chains/sets/etc, even if nft and the kernel were both new + enough to support it. (`@tnqn`) + +- Added `Fake.ParseDump()`, to load a `Fake` from a `Fake.Dump()` + output. (`@npinaeva`) + +## v0.0.14 + +- Renamed the package `"sigs.k8s.io/knftables"`, reflecting its new + home at https://github.com/kubernetes-sigs/knftables/ + +- Improvements to `Fake`: + + - `Fake.Run()` is now properly transactional, and will have no + side effects if an error occurs. + + - `Fake.Dump()` now outputs all `add chain`, `add set`, and `add + table` commands before any `add rule` and `add element` + commands, to ensure that the dumped ruleset can be passed to + `nft -f` without errors. + + - Conversely, `Fake.Run()` now does enough parsing of rules and + elements that it will notice rules that do lookups in + non-existent sets/maps, and rules/verdicts that jump to + non-existent chains, so it can error out in those cases. + +- Added `nft.Check()`, which is like `nft.Run()`, but using + `nft --check`. + +- Fixed support for ingress and egress hooks (by adding + `Chain.Device`). + +## v0.0.13 + +- Fixed a bug in `Fake.Run` where it was not properly returning "not + found" / "already exists" errors. + +## v0.0.12 + +- Renamed the package from `"github.com/danwinship/nftables"` to + `"github.com/danwinship/knftables"`, for less ambiguity. + +- Added `NameLengthMax` and `CommentLengthMax` constants. + +- Changed serialization of `Chain` to convert string-valued `Priority` + to numeric form, if possible. + +- (The `v0.0.11` tag exists but is not usable due to a bad `go.mod`) + +## v0.0.10 + +- Dropped `Define`, because nft defines turned out to not work the way + I thought (in particular, you can't do "$IP daddr"), so they end up + not really being useful for our purposes. + +- Made `NewTransaction` a method on `Interface` rather than a + top-level function. + +- Added `Transaction.String()`, for debugging + +- Fixed serialization of set/map elements with timeouts + +- Added special treament for `"@"` to `Concat` + +- Changed `nftables.New()` to return an `error` (doing the work that + used to be done by `nft.Present()`.) + +- Add autodetection for "object comment" support, and have + serialization just ignore comments on `Table`/`Chain`/`Set`/`Map` if + nft or the kernel does not support them. + +- Renamed `Optional()` to `PtrTo()` + +## v0.0.9 + +- Various tweaks to `Element`: + + - Changed `Key` and `Value` from `string` to `[]string` to better + support concatenated types (and dropped the `Join()` and + `Split()` helper functions that were previously used to join and + split concatenated values). + + - Split `Name` into separate `Set` and `Map` fields, which make it + clearer what is being named, and are more consistent with + `Rule.Chain`, and provide more redundancy for distinguishing set + elements from map elements. + + - Fixed serialization of map elements with a comments. + +- Rewrote `ListElements` and `ListRules` to use `nft -j`, for easier / + more reliable parsing. But this meant that `ListRules` no longer + returns the actual text of the rule. + +## v0.0.8 + +- Fixed `Fake.List` / `Fake.ListRules` / `Fake.ListElements` to return + errors that would be properly recognized by + `IsNotFound`/`IsAlreadyExists`. + +## v0.0.7 + +- Implemented `tx.Create`, `tx.Insert`, `tx.Replace` + +- Replaced `tx.AddRule` with the `Concat` function + +## v0.0.6 + +- Added `IsNotFound` and `IsAlreadyExists` error-checking functions + +## v0.0.5 + +- Moved `Define` from `Transaction` to `Interface` + +## v0.0.3, v0.0.4 + +- Improvements to `Fake` to handle `Rule` and `Element` + deletion/overwrite. + +- Added `ListRules` and `ListElements` + +- (The `v0.0.3` and `v0.0.4` tags are identical.) + +## v0.0.2 + +- Made `Interface` be specific to a single family and table. (Before, + that was specified at the `Transaction` level.) + +## v0.0.1 + +- Initial "release" diff --git a/vendor/sigs.k8s.io/knftables/CONTRIBUTING.md b/vendor/sigs.k8s.io/knftables/CONTRIBUTING.md new file mode 100644 index 000000000..50a4c6a37 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# Contributing Guidelines + +Welcome to Kubernetes. We are excited about the prospect of you joining our [community](https://git.k8s.io/community)! The Kubernetes community abides by the CNCF [code of conduct](code-of-conduct.md). Here is an excerpt: + +_As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities._ + +## Getting Started + +We have full documentation on how to get started contributing here: + + + +- [Contributor License Agreement](https://git.k8s.io/community/CLA.md) - Kubernetes projects require that you sign a Contributor License Agreement (CLA) before we can accept your pull requests +- [Kubernetes Contributor Guide](https://k8s.dev/guide) - Main contributor documentation, or you can just jump directly to the [contributing page](https://k8s.dev/docs/guide/contributing/) +- [Contributor Cheat Sheet](https://k8s.dev/cheatsheet) - Common resources for existing developers + +## Mentorship + +- [Mentoring Initiatives](https://k8s.dev/community/mentoring) - We have a diverse set of mentorship programs available that are always looking for volunteers! + +## Contact Information + +knftables is maintained by [Kubernetes SIG Network](https://github.com/kubernetes/community/tree/master/sig-network). + +- [sig-network slack channel](https://kubernetes.slack.com/messages/sig-network) +- [kubernetes-sig-network mailing list](https://groups.google.com/forum/#!forum/kubernetes-sig-network) diff --git a/vendor/sigs.k8s.io/knftables/LICENSE b/vendor/sigs.k8s.io/knftables/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/sigs.k8s.io/knftables/Makefile b/vendor/sigs.k8s.io/knftables/Makefile new file mode 100644 index 000000000..981e6256a --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/Makefile @@ -0,0 +1,32 @@ +# Copyright 2023 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +all build: + echo "Usage:" + echo "make test - run unit tests" + echo "make update - run gofmt, etc" + echo "make verify - run golangci, etc" + +clean: + +test: + ./hack/test.sh + +update: + ./hack/update.sh + +verify: + ./hack/verify.sh + +.PHONY: all build clean test update verify diff --git a/vendor/sigs.k8s.io/knftables/OWNERS b/vendor/sigs.k8s.io/knftables/OWNERS new file mode 100644 index 000000000..01baa6237 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/OWNERS @@ -0,0 +1,7 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +reviewers: + - aojea + - danwinship +approvers: + - danwinship diff --git a/vendor/sigs.k8s.io/knftables/README.md b/vendor/sigs.k8s.io/knftables/README.md new file mode 100644 index 000000000..794b15bb7 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/README.md @@ -0,0 +1,278 @@ +# knftables: a golang nftables library + +This is a library for using nftables from Go. + +It is not intended to support arbitrary use cases, but instead +specifically focuses on supporting Kubernetes components which are +using nftables in the way that nftables is supposed to be used (as +opposed to using nftables in a naively-translated-from-iptables way, +or using nftables to do totally valid things that aren't the sorts of +things Kubernetes components are likely to need to do; see the +"[iptables porting](./docs/iptables-porting.md)" doc for more thoughts +on porting old iptables-based components to nftables.) + +knftables is still under development and is not yet API stable. (See the +section on "Possible future changes" below.) + +The library is implemented as a wrapper around the `nft` CLI, because +the CLI API is the only well-documented interface to nftables. +Although it would be possible to use netlink directly (and some other +golang-based nftables libraries do this), that would result in an API +that is quite different from all documented examples of nftables usage +(e.g. the man pages and the [nftables wiki](http://wiki.nftables.org/)) +because there is no easy way to convert the "standard" representation +of nftables rules into the netlink form. + +(Actually, it's not quite true that there's no other usable API: the +`nft` CLI is just a thin wrapper around `libnftables`, and it would be +possible for knftables to use cgo to invoke that library instead of +using an external binary. However, this would be harder to build and +ship, so I'm not bothering with that for now. But this could be done +in the future without needing to change knftables's API.) + +knftables requires nft version 1.0.1 or later, because earlier +versions would download and process the entire ruleset regardless of +what you were doing, which, besides being pointlessly inefficient, +means that in some cases, other people using new features in _their_ +tables could prevent you from modifying _your_ table. (In particular, +a change in how some rules are generated starting in nft 1.0.3 +triggers a crash in nft 0.9.9 and earlier, _even if you aren't looking +at the table containing that rule_.) + +## Usage + +Create an `Interface` object to manage operations on a single nftables +table: + +```golang +nft, err := knftables.New(knftables.IPv4Family, "my-table") +if err != nil { + return fmt.Errorf("no nftables support: %v", err) +} +``` + +(If you want to operate on multiple tables or multiple nftables +families, you will need separate `Interface` objects for each. If you +need to check whether the system supports an nftables feature as with +`nft --check`, use `nft.Check()`, which works the same as `nft.Run()` +below.) + +You can use the `List`, `ListRules`, and `ListElements` methods on the +`Interface` to check if objects exist. `List` returns the names of +`"chains"`, `"sets"`, or `"maps"` in the table, while `ListElements` +returns `Element` objects and `ListRules` returns *partial* `Rule` +objects. + +```golang +chains, err := nft.List(ctx, "chains") +if err != nil { + return fmt.Errorf("could not list chains: %v", err) +} + +FIXME + +elements, err := nft.ListElements(ctx, "map", "mymap") +if err != nil { + return fmt.Errorf("could not list map elements: %v", err) +} + +FIXME +``` + +To make changes, create a `Transaction`, add the appropriate +operations to the transaction, and then call `nft.Run` on it: + +```golang +tx := nft.NewTransaction() + +tx.Add(&knftables.Chain{ + Name: "mychain", + Comment: knftables.PtrTo("this is my chain"), +}) +tx.Flush(&knftables.Chain{ + Name: "mychain", +}) + +var destIP net.IP +var destPort uint16 +... +tx.Add(&knftables.Rule{ + Chain: "mychain", + Rule: knftables.Concat( + "ip daddr", destIP, + "ip protocol", "tcp", + "th port", destPort, + "jump", destChain, + ) +}) + +err := nft.Run(context, tx) +``` + +If any operation in the transaction would fail, then `Run()` will +return an error and the entire transaction will be ignored. You can +use the `knftables.IsNotFound()` and `knftables.IsAlreadyExists()` +methods to check for those well-known error types. In a large +transaction, there is no supported way to determine exactly which +operation failed. + +## `knftables.Transaction` operations + +`knftables.Transaction` operations correspond to the top-level commands +in the `nft` binary. Currently-supported operations are: + +- `tx.Add()`: adds an object, which may already exist, as with `nft add` +- `tx.Create()`: creates an object, which must not already exist, as with `nft create` +- `tx.Flush()`: flushes the contents of a table/chain/set/map, as with `nft flush` +- `tx.Delete()`: deletes an object, as with `nft delete` +- `tx.Insert()`: inserts a rule before another rule, as with `nft insert rule` +- `tx.Replace()`: replaces a rule, as with `nft replace rule` + +## Objects + +The `Transaction` methods take arguments of type `knftables.Object`. +The currently-supported objects are: + +- `Table` +- `Chain` +- `Rule` +- `Set` +- `Map` +- `Element` + +Optional fields in objects can be filled in with the help of the +`PtrTo()` function, which just returns a pointer to its argument. + +`Concat()` can be used to concatenate a series of strings, `[]string` +arrays, and other arguments (including numbers, `net.IP`s / +`net.IPNet`s, and anything else that can be formatted usefully via +`fmt.Sprintf("%s")`) together into a single string. This is often +useful when constructing `Rule`s. + +## `knftables.Fake` + +There is a fake (in-memory) implementation of `knftables.Interface` +for use in unit tests. Use `knftables.NewFake()` instead of +`knftables.New()` to create it, and then it should work mostly the +same. See `fake.go` for more details of the public APIs for examining +the current state of the fake nftables database. + +## Missing APIs + +Various top-level object types are not yet supported (notably the +"stateful objects" like `counter`). + +Most IPTables libraries have an API for "add this rule only if it +doesn't already exist", but that does not seem as useful in nftables +(or at least "in nftables as used by Kubernetes-ish components that +aren't just blindly copying over old iptables APIs"), because chains +tend to have static rules and dynamic sets/maps, rather than having +dynamic rules. If you aren't sure if a chain has the correct rules, +you can just `Flush` it and recreate all of the rules. + +The "destroy" (delete-without-ENOENT) command that exists in newer +versions of `nft` is not currently supported because it would be +unexpectedly heavyweight to emulate on systems that don't have it, so +it is better (for now) to force callers to implement it by hand. + +`ListRules` returns `Rule` objects without the `Rule` field filled in, +because it uses the JSON API to list the rules, but there is no easy +way to convert the JSON rule representation back into plaintext form. +This means that it is only useful when either (a) you know the order +of the rules in the chain, but want to know their handles, or (b) you +can recognize the rules you are looking for by their comments, rather +than the rule bodies. + +## Possible future changes + +### `nft` output parsing + +`nft`'s output is documented and standardized, so it ought to be +possible for us to extract better error messages in the event of a +transaction failure. + +Additionally, if we used the `--echo` (`-e`) and `--handle` (`-a`) +flags, we could learn the handles associated with newly-created +objects in a transaction, and return these to the caller somehow. +(E.g., by setting the `Handle` field in the object that had been +passed to `tx.Add` when the transaction is run.) + +(For now, `ListRules` fills in the handles of the rules it returns, so +it's possible to find out a rule's handle after the fact that way. For +other supported object types, either handles don't exist (`Element`) +or you don't really need to know their handles because it's possible +to delete by name instead (`Table`, `Chain`, `Set`, `Map`).) + +### List APIs + +The fact that `List` works completely differently from `ListRules` and +`ListElements` is a historical artifact. + +I would like to have a single function + +```golang +List[T Object](ctx context.Context, template T) ([]T, error) +``` + +So you could say + +```golang +elements, err := nft.List(ctx, &knftables.Element{Set: "myset"}) +``` + +to list the elements of "myset". But this doesn't actually compile +("`syntax error: method must have no type parameters`") because +allowing that would apparently introduce extremely complicated edge +cases in Go generics. + +### Set/map type representation + +There is currently an annoying asymmetry in the representation of +concatenated types between `Set`/`Map` and `Element`, where the former +uses a string containing `nft` syntax, and the latter uses an array: + +```golang +tx.Add(&knftables.Set{ + Name: "firewall", + Type: "ipv4_addr . inet_proto . inet_service", +}) +tx.Add(&knftables.Element{ + Set: "firewall", + Key: []string{"10.1.2.3", "tcp", "80"}, +}) +``` + +This will probably be fixed at some point, which may result in a +change to how the `type` vs `typeof` distinction is handled as well. + +### Optimization and rule representation + +We will need to optimize the performance of large transactions. One +change that is likely is to avoid pre-concatenating rule elements in +cases like: + +```golang +tx.Add(&knftables.Rule{ + Chain: "mychain", + Rule: knftables.Concat( + "ip daddr", destIP, + "ip protocol", "tcp", + "th port", destPort, + "jump", destChain, + ) +}) +``` + +This will presumably require a change to `knftables.Rule` and/or +`knftables.Concat()` but I'm not sure exactly what it will be. + +## Community, discussion, contribution, and support + +knftables is maintained by [Kubernetes SIG Network](https://github.com/kubernetes/community/tree/master/sig-network). + +- [sig-network slack channel](https://kubernetes.slack.com/messages/sig-network) +- [kubernetes-sig-network mailing list](https://groups.google.com/forum/#!forum/kubernetes-sig-network) + +See [`CONTRIBUTING.md`](CONTRIBUTING.md) for more information about +contributing. Participation in the Kubernetes community is governed by +the [Kubernetes Code of Conduct](code-of-conduct.md). diff --git a/vendor/sigs.k8s.io/knftables/SECURITY_CONTACTS b/vendor/sigs.k8s.io/knftables/SECURITY_CONTACTS new file mode 100644 index 000000000..eb4390a2e --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/SECURITY_CONTACTS @@ -0,0 +1,13 @@ +# Defined below are the security contacts for this repo. +# +# They are the contact point for the Security Response Committee to reach out +# to for triaging and handling of incoming issues. +# +# The below names agree to abide by the +# [Embargo Policy](https://git.k8s.io/security/private-distributors-list.md#embargo-policy) +# and will be removed and replaced if they violate that agreement. +# +# DO NOT REPORT SECURITY VULNERABILITIES DIRECTLY TO THESE NAMES, FOLLOW THE +# INSTRUCTIONS AT https://kubernetes.io/security/ + +danwinship diff --git a/vendor/sigs.k8s.io/knftables/code-of-conduct.md b/vendor/sigs.k8s.io/knftables/code-of-conduct.md new file mode 100644 index 000000000..0d15c00cf --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/code-of-conduct.md @@ -0,0 +1,3 @@ +# Kubernetes Community Code of Conduct + +Please refer to our [Kubernetes Community Code of Conduct](https://git.k8s.io/community/code-of-conduct.md) diff --git a/vendor/sigs.k8s.io/knftables/error.go b/vendor/sigs.k8s.io/knftables/error.go new file mode 100644 index 000000000..fe57da03b --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/error.go @@ -0,0 +1,94 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "errors" + "fmt" + "os/exec" + "strings" + "syscall" +) + +type nftablesError struct { + wrapped error + msg string + errno syscall.Errno +} + +// wrapError wraps an error resulting from running nft +func wrapError(err error) error { + nerr := &nftablesError{wrapped: err, msg: err.Error()} + ee := &exec.ExitError{} + if errors.As(err, &ee) { + if len(ee.Stderr) > 0 { + nerr.msg = string(ee.Stderr) + eol := strings.Index(nerr.msg, "\n") + // The nft binary does not call setlocale() and so will return + // English error strings regardless of the locale. + enoent := strings.Index(nerr.msg, "No such file or directory") + eexist := strings.Index(nerr.msg, "File exists") + if enoent != -1 && (enoent < eol || eol == -1) { + nerr.errno = syscall.ENOENT + } else if eexist != -1 && (eexist < eol || eol == -1) { + nerr.errno = syscall.EEXIST + } + } + } + return nerr +} + +// notFoundError returns an nftablesError with the given message for which IsNotFound will +// return true. +func notFoundError(format string, args ...interface{}) error { + return &nftablesError{msg: fmt.Sprintf(format, args...), errno: syscall.ENOENT} +} + +// existsError returns an nftablesError with the given message for which IsAlreadyExists +// will return true. +func existsError(format string, args ...interface{}) error { + return &nftablesError{msg: fmt.Sprintf(format, args...), errno: syscall.EEXIST} +} + +func (nerr *nftablesError) Error() string { + return nerr.msg +} + +func (nerr *nftablesError) Unwrap() error { + return nerr.wrapped +} + +// IsNotFound tests if err corresponds to an nftables "not found" error of any sort. +// (e.g., in response to a "delete rule" command, this might indicate that the rule +// doesn't exist, or the chain doesn't exist, or the table doesn't exist.) +func IsNotFound(err error) bool { + var nerr *nftablesError + if errors.As(err, &nerr) { + return nerr.errno == syscall.ENOENT + } + return false +} + +// IsAlreadyExists tests if err corresponds to an nftables "already exists" error (e.g. +// when doing a "create" rather than an "add"). +func IsAlreadyExists(err error) bool { + var nerr *nftablesError + if errors.As(err, &nerr) { + return nerr.errno == syscall.EEXIST + } + return false +} diff --git a/vendor/sigs.k8s.io/knftables/exec.go b/vendor/sigs.k8s.io/knftables/exec.go new file mode 100644 index 000000000..154b5bc45 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/exec.go @@ -0,0 +1,48 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "os/exec" +) + +// execer is a mockable wrapper around os/exec. +type execer interface { + // LookPath wraps exec.LookPath + LookPath(file string) (string, error) + + // Run runs cmd as with cmd.Output(). If an error occurs, and the process outputs + // stderr, then that output will be returned in the error. + Run(cmd *exec.Cmd) (string, error) +} + +// realExec implements execer by actually using os/exec +type realExec struct{} + +// LookPath is part of execer +func (realExec) LookPath(file string) (string, error) { + return exec.LookPath(file) +} + +// Run is part of execer +func (realExec) Run(cmd *exec.Cmd) (string, error) { + out, err := cmd.Output() + if err != nil { + err = wrapError(err) + } + return string(out), err +} diff --git a/vendor/sigs.k8s.io/knftables/fake.go b/vendor/sigs.k8s.io/knftables/fake.go new file mode 100644 index 000000000..584c27a54 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/fake.go @@ -0,0 +1,671 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "context" + "fmt" + "reflect" + "regexp" + "sort" + "strings" +) + +// Fake is a fake implementation of Interface +type Fake struct { + nftContext + + nextHandle int + + // Table contains the Interface's table. This will be `nil` until you `tx.Add()` + // the table. + Table *FakeTable + + // LastTransaction is the last transaction passed to Run(). It will remain set until the + // next time Run() is called. (It is not affected by Check().) + LastTransaction *Transaction +} + +// FakeTable wraps Table for the Fake implementation +type FakeTable struct { + Table + + // Chains contains the table's chains, keyed by name + Chains map[string]*FakeChain + + // Sets contains the table's sets, keyed by name + Sets map[string]*FakeSet + + // Maps contains the table's maps, keyed by name + Maps map[string]*FakeMap +} + +// FakeChain wraps Chain for the Fake implementation +type FakeChain struct { + Chain + + // Rules contains the chain's rules, in order + Rules []*Rule +} + +// FakeSet wraps Set for the Fake implementation +type FakeSet struct { + Set + + // Elements contains the set's elements. You can also use the FakeSet's + // FindElement() method to see if a particular element is present. + Elements []*Element +} + +// FakeMap wraps Set for the Fake implementation +type FakeMap struct { + Map + + // Elements contains the map's elements. You can also use the FakeMap's + // FindElement() method to see if a particular element is present. + Elements []*Element +} + +// NewFake creates a new fake Interface, for unit tests +func NewFake(family Family, table string) *Fake { + return &Fake{ + nftContext: nftContext{ + family: family, + table: table, + }, + } +} + +var _ Interface = &Fake{} + +// List is part of Interface. +func (fake *Fake) List(_ context.Context, objectType string) ([]string, error) { + if fake.Table == nil { + return nil, notFoundError("no such table %q", fake.table) + } + + var result []string + + switch objectType { + case "chain", "chains": + for name := range fake.Table.Chains { + result = append(result, name) + } + case "set", "sets": + for name := range fake.Table.Sets { + result = append(result, name) + } + case "map", "maps": + for name := range fake.Table.Maps { + result = append(result, name) + } + + default: + return nil, fmt.Errorf("unsupported object type %q", objectType) + } + + return result, nil +} + +// ListRules is part of Interface +func (fake *Fake) ListRules(_ context.Context, chain string) ([]*Rule, error) { + if fake.Table == nil { + return nil, notFoundError("no such table %q", fake.table) + } + + rules := []*Rule{} + if chain == "" { + // Include all rules across all chains. + for _, ch := range fake.Table.Chains { + rules = append(rules, ch.Rules...) + } + } else { + ch := fake.Table.Chains[chain] + if ch == nil { + return nil, notFoundError("no such chain %q", chain) + } + rules = append(rules, ch.Rules...) + } + return rules, nil +} + +// ListElements is part of Interface +func (fake *Fake) ListElements(_ context.Context, objectType, name string) ([]*Element, error) { + if fake.Table == nil { + return nil, notFoundError("no such %s %q", objectType, name) + } + if objectType == "set" { + s := fake.Table.Sets[name] + if s != nil { + return s.Elements, nil + } + } else if objectType == "map" { + m := fake.Table.Maps[name] + if m != nil { + return m.Elements, nil + } + } + return nil, notFoundError("no such %s %q", objectType, name) +} + +// NewTransaction is part of Interface +func (fake *Fake) NewTransaction() *Transaction { + return &Transaction{nftContext: &fake.nftContext} +} + +// Run is part of Interface +func (fake *Fake) Run(_ context.Context, tx *Transaction) error { + fake.LastTransaction = tx + updatedTable, err := fake.run(tx) + if err == nil { + fake.Table = updatedTable + } + return err +} + +// Check is part of Interface +func (fake *Fake) Check(_ context.Context, tx *Transaction) error { + _, err := fake.run(tx) + return err +} + +func (fake *Fake) run(tx *Transaction) (*FakeTable, error) { + if tx.err != nil { + return nil, tx.err + } + + updatedTable := fake.Table.copy() + for _, op := range tx.operations { + // If the table hasn't been created, and this isn't a Table operation, then fail + if updatedTable == nil { + if _, ok := op.obj.(*Table); !ok { + return nil, notFoundError("no such table \"%s %s\"", fake.family, fake.table) + } + } + + if op.verb == addVerb || op.verb == createVerb || op.verb == insertVerb { + fake.nextHandle++ + } + + switch obj := op.obj.(type) { + case *Table: + err := checkExists(op.verb, "table", fake.table, updatedTable != nil) + if err != nil { + return nil, err + } + switch op.verb { + case flushVerb: + updatedTable = nil + fallthrough + case addVerb, createVerb: + if updatedTable != nil { + continue + } + table := *obj + table.Handle = PtrTo(fake.nextHandle) + updatedTable = &FakeTable{ + Table: table, + Chains: make(map[string]*FakeChain), + Sets: make(map[string]*FakeSet), + Maps: make(map[string]*FakeMap), + } + case deleteVerb: + updatedTable = nil + default: + return nil, fmt.Errorf("unhandled operation %q", op.verb) + } + + case *Chain: + existingChain := updatedTable.Chains[obj.Name] + err := checkExists(op.verb, "chain", obj.Name, existingChain != nil) + if err != nil { + return nil, err + } + switch op.verb { + case addVerb, createVerb: + if existingChain != nil { + continue + } + chain := *obj + chain.Handle = PtrTo(fake.nextHandle) + updatedTable.Chains[obj.Name] = &FakeChain{ + Chain: chain, + } + case flushVerb: + existingChain.Rules = nil + case deleteVerb: + // FIXME delete-by-handle + delete(updatedTable.Chains, obj.Name) + default: + return nil, fmt.Errorf("unhandled operation %q", op.verb) + } + + case *Rule: + existingChain := updatedTable.Chains[obj.Chain] + if existingChain == nil { + return nil, notFoundError("no such chain %q", obj.Chain) + } + if op.verb == deleteVerb { + i := findRule(existingChain.Rules, *obj.Handle) + if i == -1 { + return nil, notFoundError("no rule with handle %d", *obj.Handle) + } + existingChain.Rules = append(existingChain.Rules[:i], existingChain.Rules[i+1:]...) + continue + } + + rule := *obj + refRule := -1 + if rule.Handle != nil { + refRule = findRule(existingChain.Rules, *obj.Handle) + if refRule == -1 { + return nil, notFoundError("no rule with handle %d", *obj.Handle) + } + } else if obj.Index != nil { + if *obj.Index >= len(existingChain.Rules) { + return nil, notFoundError("no rule with index %d", *obj.Index) + } + refRule = *obj.Index + } + + if err := checkRuleRefs(obj, updatedTable); err != nil { + return nil, err + } + + switch op.verb { + case addVerb: + if refRule == -1 { + existingChain.Rules = append(existingChain.Rules, &rule) + } else { + existingChain.Rules = append(existingChain.Rules[:refRule+1], append([]*Rule{&rule}, existingChain.Rules[refRule+1:]...)...) + } + rule.Handle = PtrTo(fake.nextHandle) + case insertVerb: + if refRule == -1 { + existingChain.Rules = append([]*Rule{&rule}, existingChain.Rules...) + } else { + existingChain.Rules = append(existingChain.Rules[:refRule], append([]*Rule{&rule}, existingChain.Rules[refRule:]...)...) + } + rule.Handle = PtrTo(fake.nextHandle) + case replaceVerb: + existingChain.Rules[refRule] = &rule + default: + return nil, fmt.Errorf("unhandled operation %q", op.verb) + } + + case *Set: + existingSet := updatedTable.Sets[obj.Name] + err := checkExists(op.verb, "set", obj.Name, existingSet != nil) + if err != nil { + return nil, err + } + switch op.verb { + case addVerb, createVerb: + if existingSet != nil { + continue + } + set := *obj + set.Handle = PtrTo(fake.nextHandle) + updatedTable.Sets[obj.Name] = &FakeSet{ + Set: set, + } + case flushVerb: + existingSet.Elements = nil + case deleteVerb: + // FIXME delete-by-handle + delete(updatedTable.Sets, obj.Name) + default: + return nil, fmt.Errorf("unhandled operation %q", op.verb) + } + case *Map: + existingMap := updatedTable.Maps[obj.Name] + err := checkExists(op.verb, "map", obj.Name, existingMap != nil) + if err != nil { + return nil, err + } + switch op.verb { + case addVerb: + if existingMap != nil { + continue + } + mapObj := *obj + mapObj.Handle = PtrTo(fake.nextHandle) + updatedTable.Maps[obj.Name] = &FakeMap{ + Map: mapObj, + } + case flushVerb: + existingMap.Elements = nil + case deleteVerb: + // FIXME delete-by-handle + delete(updatedTable.Maps, obj.Name) + default: + return nil, fmt.Errorf("unhandled operation %q", op.verb) + } + case *Element: + if obj.Set != "" { + existingSet := updatedTable.Sets[obj.Set] + if existingSet == nil { + return nil, notFoundError("no such set %q", obj.Set) + } + switch op.verb { + case addVerb, createVerb: + element := *obj + if i := findElement(existingSet.Elements, element.Key); i != -1 { + if op.verb == createVerb { + return nil, existsError("element %q already exists", strings.Join(element.Key, " . ")) + } + existingSet.Elements[i] = &element + } else { + existingSet.Elements = append(existingSet.Elements, &element) + } + case deleteVerb: + element := *obj + if i := findElement(existingSet.Elements, element.Key); i != -1 { + existingSet.Elements = append(existingSet.Elements[:i], existingSet.Elements[i+1:]...) + } else { + return nil, notFoundError("no such element %q", strings.Join(element.Key, " . ")) + } + default: + return nil, fmt.Errorf("unhandled operation %q", op.verb) + } + } else { + existingMap := updatedTable.Maps[obj.Map] + if existingMap == nil { + return nil, notFoundError("no such map %q", obj.Map) + } + if err := checkElementRefs(obj, updatedTable); err != nil { + return nil, err + } + switch op.verb { + case addVerb, createVerb: + element := *obj + if i := findElement(existingMap.Elements, element.Key); i != -1 { + if op.verb == createVerb { + return nil, existsError("element %q already exists", strings.Join(element.Key, ". ")) + } + existingMap.Elements[i] = &element + } else { + existingMap.Elements = append(existingMap.Elements, &element) + } + case deleteVerb: + element := *obj + if i := findElement(existingMap.Elements, element.Key); i != -1 { + existingMap.Elements = append(existingMap.Elements[:i], existingMap.Elements[i+1:]...) + } else { + return nil, notFoundError("no such element %q", strings.Join(element.Key, " . ")) + } + default: + return nil, fmt.Errorf("unhandled operation %q", op.verb) + } + } + default: + return nil, fmt.Errorf("unhandled object type %T", op.obj) + } + } + + return updatedTable, nil +} + +func checkExists(verb verb, objectType, name string, exists bool) error { + switch verb { + case addVerb: + // It's fine if the object either exists or doesn't + return nil + case createVerb: + if exists { + return existsError("%s %q already exists", objectType, name) + } + default: + if !exists { + return notFoundError("no such %s %q", objectType, name) + } + } + return nil +} + +// checkRuleRefs checks for chains, sets, and maps referenced by rule in table +func checkRuleRefs(rule *Rule, table *FakeTable) error { + words := strings.Split(rule.Rule, " ") + for i, word := range words { + if strings.HasPrefix(word, "@") { + name := word[1:] + if i > 0 && (words[i] == "map" || words[i] == "vmap") { + if table.Maps[name] == nil { + return notFoundError("no such map %q", name) + } + } else { + // recent nft lets you use a map in a set lookup + if table.Sets[name] == nil && table.Maps[name] == nil { + return notFoundError("no such set %q", name) + } + } + } else if (word == "goto" || word == "jump") && i < len(words)-1 { + name := words[i+1] + if table.Chains[name] == nil { + return notFoundError("no such chain %q", name) + } + } + } + return nil +} + +// checkElementRefs checks for chains referenced by an element +func checkElementRefs(element *Element, table *FakeTable) error { + if len(element.Value) != 1 { + return nil + } + words := strings.Split(element.Value[0], " ") + if len(words) == 2 && (words[0] == "goto" || words[0] == "jump") { + name := words[1] + if table.Chains[name] == nil { + return notFoundError("no such chain %q", name) + } + } + return nil +} + +// Dump dumps the current contents of fake, in a way that looks like an nft transaction. +func (fake *Fake) Dump() string { + if fake.Table == nil { + return "" + } + + buf := &strings.Builder{} + + table := fake.Table + chains := sortKeys(table.Chains) + sets := sortKeys(table.Sets) + maps := sortKeys(table.Maps) + + // Write out all of the object adds first. + + table.writeOperation(addVerb, &fake.nftContext, buf) + for _, cname := range chains { + ch := table.Chains[cname] + ch.writeOperation(addVerb, &fake.nftContext, buf) + } + for _, sname := range sets { + s := table.Sets[sname] + s.writeOperation(addVerb, &fake.nftContext, buf) + } + for _, mname := range maps { + m := table.Maps[mname] + m.writeOperation(addVerb, &fake.nftContext, buf) + } + + // Now write their contents. + + for _, cname := range chains { + ch := table.Chains[cname] + for _, rule := range ch.Rules { + // Avoid outputing handles + dumpRule := *rule + dumpRule.Handle = nil + dumpRule.Index = nil + dumpRule.writeOperation(addVerb, &fake.nftContext, buf) + } + } + for _, sname := range sets { + s := table.Sets[sname] + for _, element := range s.Elements { + element.writeOperation(addVerb, &fake.nftContext, buf) + } + } + for _, mname := range maps { + m := table.Maps[mname] + for _, element := range m.Elements { + element.writeOperation(addVerb, &fake.nftContext, buf) + } + } + + return buf.String() +} + +// ParseDump can parse a dump for a given nft instance. +// It expects fake's table name and family in all rules. +// The best way to verify that everything important was properly parsed is to +// compare given data with nft.Dump() output. +func (fake *Fake) ParseDump(data string) (err error) { + lines := strings.Split(data, "\n") + var i int + var line string + parsingDone := false + defer func() { + if err != nil && !parsingDone { + err = fmt.Errorf("%w (at line %v: %s", err, i+1, line) + } + }() + tx := fake.NewTransaction() + commonRegexp := regexp.MustCompile(fmt.Sprintf(`add %s %s %s (.*)`, noSpaceGroup, fake.family, fake.table)) + + for i, line = range lines { + line = strings.TrimSpace(line) + if line == "" || line[0] == '#' { + continue + } + match := commonRegexp.FindStringSubmatch(line) + if match == nil { + return fmt.Errorf("could not parse, or wrong table/family") + } + var obj Object + switch match[1] { + case "table": + obj = &Table{} + case "chain": + obj = &Chain{} + case "rule": + obj = &Rule{} + case "map": + obj = &Map{} + case "set": + obj = &Set{} + case "element": + obj = &Element{} + default: + return fmt.Errorf("unknown object %s", match[1]) + } + err = obj.parse(match[2]) + if err != nil { + return err + } + tx.Add(obj) + } + parsingDone = true + return fake.Run(context.Background(), tx) +} + +func sortKeys[K ~string, V any](m map[K]V) []K { + keys := make([]K, 0, len(m)) + for key := range m { + keys = append(keys, key) + } + sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] }) + return keys +} + +func findRule(rules []*Rule, handle int) int { + for i := range rules { + if rules[i].Handle != nil && *rules[i].Handle == handle { + return i + } + } + return -1 +} + +func findElement(elements []*Element, key []string) int { + for i := range elements { + if reflect.DeepEqual(elements[i].Key, key) { + return i + } + } + return -1 +} + +// copy creates a copy of table with new arrays/maps so we can perform a transaction +// on it without changing the original table. +func (table *FakeTable) copy() *FakeTable { + if table == nil { + return nil + } + + tcopy := &FakeTable{ + Table: table.Table, + Chains: make(map[string]*FakeChain), + Sets: make(map[string]*FakeSet), + Maps: make(map[string]*FakeMap), + } + for name, chain := range table.Chains { + tcopy.Chains[name] = &FakeChain{ + Chain: chain.Chain, + Rules: append([]*Rule{}, chain.Rules...), + } + } + for name, set := range table.Sets { + tcopy.Sets[name] = &FakeSet{ + Set: set.Set, + Elements: append([]*Element{}, set.Elements...), + } + } + for name, mapObj := range table.Maps { + tcopy.Maps[name] = &FakeMap{ + Map: mapObj.Map, + Elements: append([]*Element{}, mapObj.Elements...), + } + } + + return tcopy +} + +// FindElement finds an element of the set with the given key. If there is no matching +// element, it returns nil. +func (s *FakeSet) FindElement(key ...string) *Element { + index := findElement(s.Elements, key) + if index == -1 { + return nil + } + return s.Elements[index] +} + +// FindElement finds an element of the map with the given key. If there is no matching +// element, it returns nil. +func (m *FakeMap) FindElement(key ...string) *Element { + index := findElement(m.Elements, key) + if index == -1 { + return nil + } + return m.Elements[index] +} diff --git a/vendor/sigs.k8s.io/knftables/nftables.go b/vendor/sigs.k8s.io/knftables/nftables.go new file mode 100644 index 000000000..8cb343806 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/nftables.go @@ -0,0 +1,514 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" + "sync" +) + +// Interface is an interface for running nftables commands against a given family and table. +type Interface interface { + // NewTransaction returns a new (empty) Transaction + NewTransaction() *Transaction + + // Run runs a Transaction and returns the result. The IsNotFound and + // IsAlreadyExists methods can be used to test the result. + Run(ctx context.Context, tx *Transaction) error + + // Check does a dry-run of a Transaction (as with `nft --check`) and returns the + // result. The IsNotFound and IsAlreadyExists methods can be used to test the + // result. + Check(ctx context.Context, tx *Transaction) error + + // List returns a list of the names of the objects of objectType ("chain", "set", + // or "map") in the table. If there are no such objects, this will return an empty + // list and no error. + List(ctx context.Context, objectType string) ([]string, error) + + // ListRules returns a list of the rules in a chain, in order. If no chain name is + // specified, then all rules within the table will be returned. Note that at the + // present time, the Rule objects will have their `Comment` and `Handle` fields + // filled in, but *not* the actual `Rule` field. So this can only be used to find + // the handles of rules if they have unique comments to recognize them by, or if + // you know the order of the rules within the chain. If the chain exists but + // contains no rules, this will return an empty list and no error. + ListRules(ctx context.Context, chain string) ([]*Rule, error) + + // ListElements returns a list of the elements in a set or map. (objectType should + // be "set" or "map".) If the set/map exists but contains no elements, this will + // return an empty list and no error. + ListElements(ctx context.Context, objectType, name string) ([]*Element, error) +} + +type nftContext struct { + family Family + table string + + // noObjectComments is true if comments on Table/Chain/Set/Map are not supported. + // (Comments on Rule and Element are always supported.) + noObjectComments bool +} + +// realNFTables is an implementation of Interface +type realNFTables struct { + nftContext + + bufferMutex sync.Mutex + buffer *bytes.Buffer + + exec execer + path string +} + +// newInternal creates a new nftables.Interface for interacting with the given table; this +// is split out from New() so it can be used from unit tests with a fakeExec. +func newInternal(family Family, table string, execer execer) (Interface, error) { + var err error + + nft := &realNFTables{ + nftContext: nftContext{ + family: family, + table: table, + }, + buffer: &bytes.Buffer{}, + exec: execer, + } + + nft.path, err = nft.exec.LookPath("nft") + if err != nil { + return nil, fmt.Errorf("could not find nftables binary: %w", err) + } + + cmd := exec.Command(nft.path, "--version") + out, err := nft.exec.Run(cmd) + if err != nil { + return nil, fmt.Errorf("could not run nftables command: %w", err) + } + if strings.HasPrefix(out, "nftables v0.") || strings.HasPrefix(out, "nftables v1.0.0 ") { + return nil, fmt.Errorf("nft version must be v1.0.1 or later (got %s)", strings.TrimSpace(out)) + } + + // Check that (a) nft works, (b) we have permission, (c) the kernel is new enough + // to support object comments. + tx := nft.NewTransaction() + tx.Add(&Table{ + Comment: PtrTo("test"), + }) + if err := nft.Check(context.TODO(), tx); err != nil { + // Try again, checking just that (a) nft works, (b) we have permission. + tx := nft.NewTransaction() + tx.Add(&Table{}) + if err := nft.Check(context.TODO(), tx); err != nil { + return nil, fmt.Errorf("could not run nftables command: %w", err) + } + + nft.noObjectComments = true + } + + return nft, nil +} + +// New creates a new nftables.Interface for interacting with the given table. If nftables +// is not available/usable on the current host, it will return an error. +func New(family Family, table string) (Interface, error) { + return newInternal(family, table, realExec{}) +} + +// NewTransaction is part of Interface +func (nft *realNFTables) NewTransaction() *Transaction { + return &Transaction{nftContext: &nft.nftContext} +} + +// Run is part of Interface +func (nft *realNFTables) Run(ctx context.Context, tx *Transaction) error { + nft.bufferMutex.Lock() + defer nft.bufferMutex.Unlock() + + if tx.err != nil { + return tx.err + } + + nft.buffer.Reset() + err := tx.populateCommandBuf(nft.buffer) + if err != nil { + return err + } + + cmd := exec.CommandContext(ctx, nft.path, "-f", "-") + cmd.Stdin = nft.buffer + _, err = nft.exec.Run(cmd) + return err +} + +// Check is part of Interface +func (nft *realNFTables) Check(ctx context.Context, tx *Transaction) error { + nft.bufferMutex.Lock() + defer nft.bufferMutex.Unlock() + + if tx.err != nil { + return tx.err + } + + nft.buffer.Reset() + err := tx.populateCommandBuf(nft.buffer) + if err != nil { + return err + } + + cmd := exec.CommandContext(ctx, nft.path, "--check", "-f", "-") + cmd.Stdin = nft.buffer + _, err = nft.exec.Run(cmd) + return err +} + +// jsonVal looks up key in json; if it exists and is of type T, it returns (json[key], true). +// Otherwise it returns (_, false). +func jsonVal[T any](json map[string]interface{}, key string) (T, bool) { + if ifVal, exists := json[key]; exists { + tVal, ok := ifVal.(T) + return tVal, ok + } + var zero T + return zero, false +} + +// getJSONObjects takes the output of "nft -j list", validates it, and returns an array +// of just the objects of objectType. +func getJSONObjects(listOutput, objectType string) ([]map[string]interface{}, error) { + // listOutput should contain JSON looking like: + // + // { + // "nftables": [ + // { + // "metainfo": { + // "json_schema_version": 1, + // ... + // } + // }, + // { + // "chain": { + // "family": "ip", + // "table": "kube-proxy", + // "name": "KUBE-SERVICES", + // "handle": 3 + // } + // }, + // { + // "chain": { + // "family": "ip", + // "table": "kube-proxy", + // "name": "KUBE-NODEPORTS", + // "handle": 4 + // } + // }, + // ... + // ] + // } + // + // In this case, given objectType "chain", we would return + // + // [ + // { + // "family": "ip", + // "table": "kube-proxy", + // "name": "KUBE-SERVICES", + // "handle": 3 + // }, + // { + // "family": "ip", + // "table": "kube-proxy", + // "name": "KUBE-NODEPORTS", + // "handle": 4 + // }, + // ... + // ] + + jsonResult := map[string][]map[string]map[string]interface{}{} + if err := json.Unmarshal([]byte(listOutput), &jsonResult); err != nil { + return nil, fmt.Errorf("could not parse nft output: %w", err) + } + + nftablesResult := jsonResult["nftables"] + if len(nftablesResult) == 0 { + return nil, fmt.Errorf("could not find result in nft output %q", listOutput) + } + metainfo := nftablesResult[0]["metainfo"] + if metainfo == nil { + return nil, fmt.Errorf("could not find metadata in nft output %q", listOutput) + } + // json_schema_version is an integer but `json.Unmarshal()` will have parsed it as + // a float64 since we didn't tell it otherwise. + if version, ok := jsonVal[float64](metainfo, "json_schema_version"); !ok || version != 1.0 { + return nil, fmt.Errorf("could not find supported json_schema_version in nft output %q", listOutput) + } + + var objects []map[string]interface{} + for _, objContainer := range nftablesResult { + obj := objContainer[objectType] + if obj != nil { + objects = append(objects, obj) + } + } + return objects, nil +} + +// List is part of Interface. +func (nft *realNFTables) List(ctx context.Context, objectType string) ([]string, error) { + // All currently-existing nftables object types have plural forms that are just + // the singular form plus 's'. + var typeSingular, typePlural string + if objectType[len(objectType)-1] == 's' { + typeSingular = objectType[:len(objectType)-1] + typePlural = objectType + } else { + typeSingular = objectType + typePlural = objectType + "s" + } + + cmd := exec.CommandContext(ctx, nft.path, "--json", "list", typePlural, string(nft.family)) + out, err := nft.exec.Run(cmd) + if err != nil { + return nil, fmt.Errorf("failed to run nft: %w", err) + } + + objects, err := getJSONObjects(out, typeSingular) + if err != nil { + return nil, err + } + + var result []string + for _, obj := range objects { + objTable, _ := jsonVal[string](obj, "table") + if objTable != nft.table { + continue + } + + if name, ok := jsonVal[string](obj, "name"); ok { + result = append(result, name) + } + } + return result, nil +} + +// ListRules is part of Interface +func (nft *realNFTables) ListRules(ctx context.Context, chain string) ([]*Rule, error) { + // If no chain is given, return all rules from within the table. + var cmd *exec.Cmd + if chain == "" { + cmd = exec.CommandContext(ctx, nft.path, "--json", "list", "table", string(nft.family), nft.table) + } else { + cmd = exec.CommandContext(ctx, nft.path, "--json", "list", "chain", string(nft.family), nft.table, chain) + } + out, err := nft.exec.Run(cmd) + if err != nil { + return nil, fmt.Errorf("failed to run nft: %w", err) + } + + jsonRules, err := getJSONObjects(out, "rule") + if err != nil { + return nil, fmt.Errorf("unable to parse JSON output: %w", err) + } + + rules := make([]*Rule, 0, len(jsonRules)) + for _, jsonRule := range jsonRules { + parentChain, ok := jsonVal[string](jsonRule, "chain") + if !ok { + return nil, fmt.Errorf("unexpected JSON output from nft (rule with no chain)") + } + rule := &Rule{ + Chain: parentChain, + } + + // handle is written as an integer in nft's output, but json.Unmarshal + // will have parsed it as a float64. (Handles are uint64s, but they are + // assigned consecutively starting from 1, so as long as fewer than 2**53 + // nftables objects have been created since boot time, we won't run into + // float64-vs-uint64 precision issues.) + if handle, ok := jsonVal[float64](jsonRule, "handle"); ok { + rule.Handle = PtrTo(int(handle)) + } + if comment, ok := jsonVal[string](jsonRule, "comment"); ok { + rule.Comment = &comment + } + + rules = append(rules, rule) + } + return rules, nil +} + +// ListElements is part of Interface +func (nft *realNFTables) ListElements(ctx context.Context, objectType, name string) ([]*Element, error) { + cmd := exec.CommandContext(ctx, nft.path, "--json", "list", objectType, string(nft.family), nft.table, name) + out, err := nft.exec.Run(cmd) + if err != nil { + return nil, fmt.Errorf("failed to run nft: %w", err) + } + + jsonSetsOrMaps, err := getJSONObjects(out, objectType) + if err != nil { + return nil, fmt.Errorf("unable to parse JSON output: %w", err) + } + if len(jsonSetsOrMaps) != 1 { + return nil, fmt.Errorf("unexpected JSON output from nft (multiple results)") + } + + jsonElements, _ := jsonVal[[]interface{}](jsonSetsOrMaps[0], "elem") + elements := make([]*Element, 0, len(jsonElements)) + for _, jsonElement := range jsonElements { + var key, value interface{} + + elem := &Element{} + if objectType == "set" { + elem.Set = name + key = jsonElement + } else { + elem.Map = name + tuple, ok := jsonElement.([]interface{}) + if !ok || len(tuple) != 2 { + return nil, fmt.Errorf("unexpected JSON output from nft (elem is not [key,val]: %q)", jsonElement) + } + key, value = tuple[0], tuple[1] + } + + // If the element has a comment, then key will be a compound object like: + // + // { + // "elem": { + // "val": "192.168.0.1", + // "comment": "this is a comment" + // } + // } + // + // (Where "val" contains the value that key would have held if there was no + // comment.) + if obj, ok := key.(map[string]interface{}); ok { + if compoundElem, ok := jsonVal[map[string]interface{}](obj, "elem"); ok { + if key, ok = jsonVal[interface{}](compoundElem, "val"); !ok { + return nil, fmt.Errorf("unexpected JSON output from nft (elem with no val: %q)", jsonElement) + } + if comment, ok := jsonVal[string](compoundElem, "comment"); ok { + elem.Comment = &comment + } + } + } + + elem.Key, err = parseElementValue(key) + if err != nil { + return nil, err + } + if value != nil { + elem.Value, err = parseElementValue(value) + if err != nil { + return nil, err + } + } + + elements = append(elements, elem) + } + return elements, nil +} + +// parseElementValue parses a JSON element key/value, handling concatenations, prefixes, and +// converting numeric or "verdict" values to strings. +func parseElementValue(json interface{}) ([]string, error) { + // json can be: + // + // - a single string, e.g. "192.168.1.3" + // + // - a single number, e.g. 80 + // + // - a prefix, expressed as an object: + // { + // "prefix": { + // "addr": "192.168.0.0", + // "len": 16, + // } + // } + // + // - a concatenation, expressed as an object containing an array of simple + // values: + // { + // "concat": [ + // "192.168.1.3", + // "tcp", + // 80 + // ] + // } + // + // - a verdict (for a vmap value), expressed as an object: + // { + // "drop": null + // } + // + // { + // "goto": { + // "target": "destchain" + // } + // } + + switch val := json.(type) { + case string: + return []string{val}, nil + case float64: + return []string{fmt.Sprintf("%d", int(val))}, nil + case map[string]interface{}: + if concat, _ := jsonVal[[]interface{}](val, "concat"); concat != nil { + vals := make([]string, len(concat)) + for i := range concat { + if str, ok := concat[i].(string); ok { + vals[i] = str + } else if num, ok := concat[i].(float64); ok { + vals[i] = fmt.Sprintf("%d", int(num)) + } else { + return nil, fmt.Errorf("could not parse element value %q", concat[i]) + } + } + return vals, nil + } else if prefix, _ := jsonVal[map[string]interface{}](val, "prefix"); prefix != nil { + // For prefix-type elements, return the element in CIDR representation. + addr, ok := jsonVal[string](prefix, "addr") + if !ok { + return nil, fmt.Errorf("could not parse 'addr' value as string: %q", prefix) + } + length, ok := jsonVal[float64](prefix, "len") + if !ok { + return nil, fmt.Errorf("could not parse 'len' value as number: %q", prefix) + } + return []string{fmt.Sprintf("%s/%d", addr, int(length))}, nil + } else if len(val) == 1 { + var verdict string + // We just checked that len(val) == 1, so this loop body will only + // run once + for k, v := range val { + if v == nil { + verdict = k + } else if target, ok := v.(map[string]interface{}); ok { + verdict = fmt.Sprintf("%s %s", k, target["target"]) + } + } + return []string{verdict}, nil + } + } + + return nil, fmt.Errorf("could not parse element value %q", json) +} diff --git a/vendor/sigs.k8s.io/knftables/objects.go b/vendor/sigs.k8s.io/knftables/objects.go new file mode 100644 index 000000000..6a6287939 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/objects.go @@ -0,0 +1,581 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "fmt" + "io" + "regexp" + "strconv" + "strings" + "time" +) + +func parseInt(numbersOnly string) *int { + i64, _ := strconv.ParseInt(numbersOnly, 10, 64) + i := int(i64) + return &i +} + +func parseUint(numbersOnly string) *uint64 { + ui64, _ := strconv.ParseUint(numbersOnly, 10, 64) + return &ui64 +} + +// getComment parses a match for the commentGroup regexp (below). To distinguish between empty comment and no comment, +// we capture comment with double quotes. +func getComment(commentGroup string) *string { + if commentGroup == "" { + return nil + } + noQuotes := strings.Trim(commentGroup, "\"") + return &noQuotes +} + +var commentGroup = `(".*")` +var noSpaceGroup = `([^ ]*)` +var numberGroup = `([0-9]*)` + +// Object implementation for Table +func (table *Table) validate(verb verb) error { + switch verb { + case addVerb, createVerb, flushVerb: + if table.Handle != nil { + return fmt.Errorf("cannot specify Handle in %s operation", verb) + } + case deleteVerb: + // Handle can be nil or non-nil + default: + return fmt.Errorf("%s is not implemented for tables", verb) + } + + return nil +} + +func (table *Table) writeOperation(verb verb, ctx *nftContext, writer io.Writer) { + // Special case for delete-by-handle + if verb == deleteVerb && table.Handle != nil { + fmt.Fprintf(writer, "delete table %s handle %d", ctx.family, *table.Handle) + return + } + + // All other cases refer to the table by name + fmt.Fprintf(writer, "%s table %s %s", verb, ctx.family, ctx.table) + if verb == addVerb || verb == createVerb { + if table.Comment != nil && !ctx.noObjectComments { + fmt.Fprintf(writer, " { comment %q ; }", *table.Comment) + } + } + fmt.Fprintf(writer, "\n") +} + +var tableRegexp = regexp.MustCompile(fmt.Sprintf( + `(?:{ comment %s ; })?`, commentGroup)) + +func (table *Table) parse(line string) error { + match := tableRegexp.FindStringSubmatch(line) + if match == nil { + return fmt.Errorf("failed parsing table add command") + } + table.Comment = getComment(match[1]) + return nil +} + +// Object implementation for Chain +func (chain *Chain) validate(verb verb) error { + if chain.Hook == nil { + if chain.Type != nil || chain.Priority != nil { + return fmt.Errorf("regular chain %q must not specify Type or Priority", chain.Name) + } + if chain.Device != nil { + return fmt.Errorf("regular chain %q must not specify Device", chain.Name) + } + } else { + if chain.Type == nil || chain.Priority == nil { + return fmt.Errorf("base chain %q must specify Type and Priority", chain.Name) + } + } + + switch verb { + case addVerb, createVerb, flushVerb: + if chain.Name == "" { + return fmt.Errorf("no name specified for chain") + } + if chain.Handle != nil { + return fmt.Errorf("cannot specify Handle in %s operation", verb) + } + case deleteVerb: + if chain.Name == "" && chain.Handle == nil { + return fmt.Errorf("must specify either name or handle") + } + default: + return fmt.Errorf("%s is not implemented for chains", verb) + } + + return nil +} + +func (chain *Chain) writeOperation(verb verb, ctx *nftContext, writer io.Writer) { + // Special case for delete-by-handle + if verb == deleteVerb && chain.Handle != nil { + fmt.Fprintf(writer, "delete chain %s %s handle %d", ctx.family, ctx.table, *chain.Handle) + return + } + + fmt.Fprintf(writer, "%s chain %s %s %s", verb, ctx.family, ctx.table, chain.Name) + if verb == addVerb || verb == createVerb { + if chain.Type != nil || (chain.Comment != nil && !ctx.noObjectComments) { + fmt.Fprintf(writer, " {") + + if chain.Type != nil { + fmt.Fprintf(writer, " type %s hook %s", *chain.Type, *chain.Hook) + if chain.Device != nil { + fmt.Fprintf(writer, " device %q", *chain.Device) + } + + // Parse the priority to a number if we can, because older + // versions of nft don't accept certain named priorities + // in all contexts (eg, "dstnat" priority in the "output" + // hook). + if priority, err := ParsePriority(ctx.family, string(*chain.Priority)); err == nil { + fmt.Fprintf(writer, " priority %d ;", priority) + } else { + fmt.Fprintf(writer, " priority %s ;", *chain.Priority) + } + } + if chain.Comment != nil && !ctx.noObjectComments { + fmt.Fprintf(writer, " comment %q ;", *chain.Comment) + } + + fmt.Fprintf(writer, " }") + } + } + + fmt.Fprintf(writer, "\n") +} + +// groups in []: [1]%s(?: {(?: type [2]%s hook [3]%s(?: device "[4]%s")(?: priority [5]%s ;))(?: comment [6]%s ;) }) +var chainRegexp = regexp.MustCompile(fmt.Sprintf( + `%s(?: {(?: type %s hook %s(?: device "%s")?(?: priority %s ;))?(?: comment %s ;)? })?`, + noSpaceGroup, noSpaceGroup, noSpaceGroup, noSpaceGroup, noSpaceGroup, commentGroup)) + +func (chain *Chain) parse(line string) error { + match := chainRegexp.FindStringSubmatch(line) + if match == nil { + return fmt.Errorf("failed parsing chain add command") + } + chain.Name = match[1] + chain.Comment = getComment(match[6]) + if match[2] != "" { + chain.Type = (*BaseChainType)(&match[2]) + } + if match[3] != "" { + chain.Hook = (*BaseChainHook)(&match[3]) + } + if match[4] != "" { + chain.Device = &match[4] + } + if match[5] != "" { + chain.Priority = (*BaseChainPriority)(&match[5]) + } + return nil +} + +// Object implementation for Rule +func (rule *Rule) validate(verb verb) error { + if rule.Chain == "" { + return fmt.Errorf("no chain name specified for rule") + } + + if rule.Index != nil && rule.Handle != nil { + return fmt.Errorf("cannot specify both Index and Handle") + } + + switch verb { + case addVerb, insertVerb: + if rule.Rule == "" { + return fmt.Errorf("no rule specified") + } + case replaceVerb: + if rule.Rule == "" { + return fmt.Errorf("no rule specified") + } + if rule.Handle == nil { + return fmt.Errorf("must specify Handle with %s", verb) + } + case deleteVerb: + if rule.Handle == nil { + return fmt.Errorf("must specify Handle with %s", verb) + } + default: + return fmt.Errorf("%s is not implemented for rules", verb) + } + + return nil +} + +func (rule *Rule) writeOperation(verb verb, ctx *nftContext, writer io.Writer) { + fmt.Fprintf(writer, "%s rule %s %s %s", verb, ctx.family, ctx.table, rule.Chain) + if rule.Index != nil { + fmt.Fprintf(writer, " index %d", *rule.Index) + } else if rule.Handle != nil { + fmt.Fprintf(writer, " handle %d", *rule.Handle) + } + + switch verb { + case addVerb, insertVerb, replaceVerb: + fmt.Fprintf(writer, " %s", rule.Rule) + + if rule.Comment != nil { + fmt.Fprintf(writer, " comment %q", *rule.Comment) + } + } + + fmt.Fprintf(writer, "\n") +} + +// groups in []: [1]%s(?: index [2]%s)?(?: handle [3]%s)? [4]([^"]*)(?: comment [5]%s)?$ +var ruleRegexp = regexp.MustCompile(fmt.Sprintf( + `%s(?: index %s)?(?: handle %s)? ([^"]*)(?: comment %s)?$`, + noSpaceGroup, numberGroup, numberGroup, commentGroup)) + +func (rule *Rule) parse(line string) error { + match := ruleRegexp.FindStringSubmatch(line) + if match == nil { + return fmt.Errorf("failed parsing rule add command") + } + rule.Chain = match[1] + rule.Rule = match[4] + rule.Comment = getComment(match[5]) + if match[2] != "" { + rule.Index = parseInt(match[2]) + } + if match[3] != "" { + rule.Handle = parseInt(match[3]) + } + return nil +} + +// Object implementation for Set +func (set *Set) validate(verb verb) error { + switch verb { + case addVerb, createVerb: + if (set.Type == "" && set.TypeOf == "") || (set.Type != "" && set.TypeOf != "") { + return fmt.Errorf("set must specify either Type or TypeOf") + } + if set.Handle != nil { + return fmt.Errorf("cannot specify Handle in %s operation", verb) + } + fallthrough + case flushVerb: + if set.Name == "" { + return fmt.Errorf("no name specified for set") + } + case deleteVerb: + if set.Name == "" && set.Handle == nil { + return fmt.Errorf("must specify either name or handle") + } + default: + return fmt.Errorf("%s is not implemented for sets", verb) + } + + return nil +} + +func (set *Set) writeOperation(verb verb, ctx *nftContext, writer io.Writer) { + // Special case for delete-by-handle + if verb == deleteVerb && set.Handle != nil { + fmt.Fprintf(writer, "delete set %s %s handle %d", ctx.family, ctx.table, *set.Handle) + return + } + + fmt.Fprintf(writer, "%s set %s %s %s", verb, ctx.family, ctx.table, set.Name) + if verb == addVerb || verb == createVerb { + fmt.Fprintf(writer, " {") + + if set.Type != "" { + fmt.Fprintf(writer, " type %s ;", set.Type) + } else { + fmt.Fprintf(writer, " typeof %s ;", set.TypeOf) + } + + if len(set.Flags) != 0 { + fmt.Fprintf(writer, " flags ") + for i := range set.Flags { + if i > 0 { + fmt.Fprintf(writer, ",") + } + fmt.Fprintf(writer, "%s", set.Flags[i]) + } + fmt.Fprintf(writer, " ;") + } + + if set.Timeout != nil { + fmt.Fprintf(writer, " timeout %ds ;", int64(set.Timeout.Seconds())) + } + if set.GCInterval != nil { + fmt.Fprintf(writer, " gc-interval %ds ;", int64(set.GCInterval.Seconds())) + } + if set.Size != nil { + fmt.Fprintf(writer, " size %d ;", *set.Size) + } + if set.Policy != nil { + fmt.Fprintf(writer, " policy %s ;", *set.Policy) + } + if set.AutoMerge != nil && *set.AutoMerge { + fmt.Fprintf(writer, " auto-merge ;") + } + + if set.Comment != nil && !ctx.noObjectComments { + fmt.Fprintf(writer, " comment %q ;", *set.Comment) + } + + fmt.Fprintf(writer, " }") + } + + fmt.Fprintf(writer, "\n") +} + +func (set *Set) parse(line string) error { + match := setRegexp.FindStringSubmatch(line) + if match == nil { + return fmt.Errorf("failed parsing set add command") + } + set.Name, set.Type, set.TypeOf, set.Flags, set.Timeout, set.GCInterval, + set.Size, set.Policy, set.Comment, set.AutoMerge = parseMapAndSetProps(match) + return nil +} + +// Object implementation for Map +func (mapObj *Map) validate(verb verb) error { + switch verb { + case addVerb, createVerb: + if (mapObj.Type == "" && mapObj.TypeOf == "") || (mapObj.Type != "" && mapObj.TypeOf != "") { + return fmt.Errorf("map must specify either Type or TypeOf") + } + if mapObj.Handle != nil { + return fmt.Errorf("cannot specify Handle in %s operation", verb) + } + fallthrough + case flushVerb: + if mapObj.Name == "" { + return fmt.Errorf("no name specified for map") + } + case deleteVerb: + if mapObj.Name == "" && mapObj.Handle == nil { + return fmt.Errorf("must specify either name or handle") + } + default: + return fmt.Errorf("%s is not implemented for maps", verb) + } + + return nil +} + +func (mapObj *Map) writeOperation(verb verb, ctx *nftContext, writer io.Writer) { + // Special case for delete-by-handle + if verb == deleteVerb && mapObj.Handle != nil { + fmt.Fprintf(writer, "delete map %s %s handle %d", ctx.family, ctx.table, *mapObj.Handle) + return + } + + fmt.Fprintf(writer, "%s map %s %s %s", verb, ctx.family, ctx.table, mapObj.Name) + if verb == addVerb || verb == createVerb { + fmt.Fprintf(writer, " {") + + if mapObj.Type != "" { + fmt.Fprintf(writer, " type %s ;", mapObj.Type) + } else { + fmt.Fprintf(writer, " typeof %s ;", mapObj.TypeOf) + } + + if len(mapObj.Flags) != 0 { + fmt.Fprintf(writer, " flags ") + for i := range mapObj.Flags { + if i > 0 { + fmt.Fprintf(writer, ",") + } + fmt.Fprintf(writer, "%s", mapObj.Flags[i]) + } + fmt.Fprintf(writer, " ;") + } + + if mapObj.Timeout != nil { + fmt.Fprintf(writer, " timeout %ds ;", int64(mapObj.Timeout.Seconds())) + } + if mapObj.GCInterval != nil { + fmt.Fprintf(writer, " gc-interval %ds ;", int64(mapObj.GCInterval.Seconds())) + } + if mapObj.Size != nil { + fmt.Fprintf(writer, " size %d ;", *mapObj.Size) + } + if mapObj.Policy != nil { + fmt.Fprintf(writer, " policy %s ;", *mapObj.Policy) + } + + if mapObj.Comment != nil && !ctx.noObjectComments { + fmt.Fprintf(writer, " comment %q ;", *mapObj.Comment) + } + + fmt.Fprintf(writer, " }") + } + + fmt.Fprintf(writer, "\n") +} + +func (mapObj *Map) parse(line string) error { + match := mapRegexp.FindStringSubmatch(line) + if match == nil { + return fmt.Errorf("failed parsing map add command") + } + mapObj.Name, mapObj.Type, mapObj.TypeOf, mapObj.Flags, mapObj.Timeout, mapObj.GCInterval, + mapObj.Size, mapObj.Policy, mapObj.Comment, _ = parseMapAndSetProps(match) + return nil +} + +var autoMergeProp = `( auto-merge ;)?` + +// groups in []: [1]%s {(?: [2](type|typeof) [3]([^;]*)) ;(?: flags [4]([^;]*) ;)?(?: timeout [5]%ss ;)?(?: gc-interval [6]%ss ;)?(?: size [7]%s ;)?(?: policy [8]%s ;)?[9]%s(?: comment [10]%s ;)? } +var mapOrSet = `%s {(?: (type|typeof) ([^;]*)) ;(?: flags ([^;]*) ;)?(?: timeout %ss ;)?(?: gc-interval %ss ;)?(?: size %s ;)?(?: policy %s ;)?%s(?: comment %s ;)? }` +var mapRegexp = regexp.MustCompile(fmt.Sprintf(mapOrSet, noSpaceGroup, numberGroup, numberGroup, noSpaceGroup, noSpaceGroup, "", commentGroup)) +var setRegexp = regexp.MustCompile(fmt.Sprintf(mapOrSet, noSpaceGroup, numberGroup, numberGroup, noSpaceGroup, noSpaceGroup, autoMergeProp, commentGroup)) + +func parseMapAndSetProps(match []string) (name string, typeProp string, typeOf string, flags []SetFlag, + timeout *time.Duration, gcInterval *time.Duration, size *uint64, policy *SetPolicy, comment *string, autoMerge *bool) { + name = match[1] + // set and map have different number of match groups, but comment is always the last + comment = getComment(match[len(match)-1]) + if match[2] == "type" { + typeProp = match[3] + } else { + typeOf = match[3] + } + if match[4] != "" { + flags = parseSetFlags(match[4]) + } + if match[5] != "" { + timeoutObj, _ := time.ParseDuration(match[5] + "s") + timeout = &timeoutObj + } + if match[6] != "" { + gcIntervalObj, _ := time.ParseDuration(match[6] + "s") + gcInterval = &gcIntervalObj + } + if match[7] != "" { + size = parseUint(match[7]) + } + if match[8] != "" { + policy = (*SetPolicy)(&match[8]) + } + if len(match) > 10 { + // set + if match[9] != "" { + autoMergeObj := true + autoMerge = &autoMergeObj + } + } + return +} + +func parseSetFlags(s string) []SetFlag { + var res []SetFlag + for _, flag := range strings.Split(s, ",") { + res = append(res, SetFlag(flag)) + } + return res +} + +// Object implementation for Element +func (element *Element) validate(verb verb) error { + if element.Map == "" && element.Set == "" { + return fmt.Errorf("no set/map name specified for element") + } else if element.Set != "" && element.Map != "" { + return fmt.Errorf("element specifies both a set name and a map name") + } + + if len(element.Key) == 0 { + return fmt.Errorf("no key specified for element") + } + if element.Set != "" && len(element.Value) != 0 { + return fmt.Errorf("map value specified for set element") + } + + switch verb { + case addVerb, createVerb: + if element.Map != "" && len(element.Value) == 0 { + return fmt.Errorf("no map value specified for map element") + } + case deleteVerb: + default: + return fmt.Errorf("%s is not implemented for elements", verb) + } + + return nil +} + +func (element *Element) writeOperation(verb verb, ctx *nftContext, writer io.Writer) { + name := element.Set + if name == "" { + name = element.Map + } + + fmt.Fprintf(writer, "%s element %s %s %s { %s", verb, ctx.family, ctx.table, name, + strings.Join(element.Key, " . ")) + + if verb == addVerb || verb == createVerb { + if element.Comment != nil { + fmt.Fprintf(writer, " comment %q", *element.Comment) + } + + if len(element.Value) != 0 { + fmt.Fprintf(writer, " : %s", strings.Join(element.Value, " . ")) + } + } + + fmt.Fprintf(writer, " }\n") +} + +// groups in []: [1]%s { [2]([^:"]*)(?: comment [3]%s)? : [4](.*) } +var mapElementRegexp = regexp.MustCompile(fmt.Sprintf( + `%s { ([^"]*)(?: comment %s)? : (.*) }`, noSpaceGroup, commentGroup)) + +// groups in []: [1]%s { [2]([^:"]*)(?: comment [3]%s)? } +var setElementRegexp = regexp.MustCompile(fmt.Sprintf( + `%s { ([^"]*)(?: comment %s)? }`, noSpaceGroup, commentGroup)) + +func (element *Element) parse(line string) error { + // try to match map element first, since it has more groups, and if it matches, then we can be sure + // this is map element. + match := mapElementRegexp.FindStringSubmatch(line) + if match == nil { + match = setElementRegexp.FindStringSubmatch(line) + if match == nil { + return fmt.Errorf("failed parsing element add command") + } + } + element.Comment = getComment(match[3]) + mapOrSetName := match[1] + element.Key = append(element.Key, strings.Split(match[2], " . ")...) + if len(match) == 5 { + // map regex matched + element.Map = mapOrSetName + element.Value = append(element.Value, strings.Split(match[4], " . ")...) + } else { + element.Set = mapOrSetName + } + return nil +} diff --git a/vendor/sigs.k8s.io/knftables/transaction.go b/vendor/sigs.k8s.io/knftables/transaction.go new file mode 100644 index 000000000..3063637ad --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/transaction.go @@ -0,0 +1,141 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "bytes" + "fmt" +) + +// Transaction represents an nftables transaction +type Transaction struct { + *nftContext + + operations []operation + err error +} + +// operation contains a single nftables operation (eg "add table", "flush chain") +type operation struct { + verb verb + obj Object +} + +// verb is used internally to represent the different "nft" verbs +type verb string + +const ( + addVerb verb = "add" + createVerb verb = "create" + insertVerb verb = "insert" + replaceVerb verb = "replace" + deleteVerb verb = "delete" + flushVerb verb = "flush" +) + +// populateCommandBuf populates the transaction as series of nft commands to the given bytes.Buffer. +func (tx *Transaction) populateCommandBuf(buf *bytes.Buffer) error { + if tx.err != nil { + return tx.err + } + + for _, op := range tx.operations { + op.obj.writeOperation(op.verb, tx.nftContext, buf) + } + return nil +} + +// String returns the transaction as a string containing the nft commands; if there is +// a pending error, it will be output as a comment at the end of the transaction. +func (tx *Transaction) String() string { + buf := &bytes.Buffer{} + for _, op := range tx.operations { + op.obj.writeOperation(op.verb, tx.nftContext, buf) + } + + if tx.err != nil { + fmt.Fprintf(buf, "# ERROR: %v", tx.err) + } + + return buf.String() +} + +// NumOperations returns the number of operations queued in the transaction. +func (tx *Transaction) NumOperations() int { + return len(tx.operations) +} + +func (tx *Transaction) operation(verb verb, obj Object) { + if tx.err != nil { + return + } + if tx.err = obj.validate(verb); tx.err != nil { + return + } + + tx.operations = append(tx.operations, operation{verb: verb, obj: obj}) +} + +// Add adds an "nft add" operation to tx, ensuring that obj exists by creating it if it +// did not already exist. (If obj is a Rule, it will be appended to the end of its chain, +// or else added after the Rule indicated by this rule's Index or Handle.) The Add() call +// always succeeds, but if obj is invalid, or inconsistent with the existing nftables +// state, then an error will be returned when the transaction is Run. +func (tx *Transaction) Add(obj Object) { + tx.operation(addVerb, obj) +} + +// Create adds an "nft create" operation to tx, creating obj, which must not already +// exist. (If obj is a Rule, it will be appended to the end of its chain, or else added +// after the Rule indicated by this rule's Index or Handle.) The Create() call always +// succeeds, but if obj is invalid, already exists, or is inconsistent with the existing +// nftables state, then an error will be returned when the transaction is Run. +func (tx *Transaction) Create(obj Object) { + tx.operation(createVerb, obj) +} + +// Insert adds an "nft insert" operation to tx, inserting obj (which must be a Rule) at +// the start of its chain, or before the other Rule indicated by this rule's Index or +// Handle. The Insert() call always succeeds, but if obj is invalid or is inconsistent +// with the existing nftables state, then an error will be returned when the transaction +// is Run. +func (tx *Transaction) Insert(obj Object) { + tx.operation(insertVerb, obj) +} + +// Replace adds an "nft replace" operation to tx, replacing an existing rule with obj +// (which must be a Rule). The Replace() call always succeeds, but if obj is invalid, does +// not contain the Handle of an existing rule, or is inconsistent with the existing +// nftables state, then an error will be returned when the transaction is Run. +func (tx *Transaction) Replace(obj Object) { + tx.operation(replaceVerb, obj) +} + +// Flush adds an "nft flush" operation to tx, clearing the contents of obj. The Flush() +// call always succeeds, but if obj does not exist (or does not support flushing) then an +// error will be returned when the transaction is Run. +func (tx *Transaction) Flush(obj Object) { + tx.operation(flushVerb, obj) +} + +// Delete adds an "nft delete" operation to tx, deleting obj. The Delete() call always +// succeeds, but if obj does not exist or cannot be deleted based on the information +// provided (eg, Handle is required but not set) then an error will be returned when the +// transaction is Run. +func (tx *Transaction) Delete(obj Object) { + tx.operation(deleteVerb, obj) +} diff --git a/vendor/sigs.k8s.io/knftables/types.go b/vendor/sigs.k8s.io/knftables/types.go new file mode 100644 index 000000000..d8202bc01 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/types.go @@ -0,0 +1,384 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "io" + "time" +) + +const ( + // Maximum length of a table, chain, set, etc, name + NameLengthMax = 256 + + // Maximum length of a comment + CommentLengthMax = 128 +) + +// Object is the interface for an nftables object. All of the concrete object types +// implement this interface. +type Object interface { + // validate validates an object for an operation + validate(verb verb) error + + // writeOperation writes out an "nft" operation involving the object. It assumes + // that the object has been validated. + writeOperation(verb verb, ctx *nftContext, writer io.Writer) + + // parse is the opposite of writeOperation; it fills Object fields based on an "nft add" + // command. line is the part of the line after "nft add " + // (so for most types it starts with the object name). + // If error is returned, Object's fields may be partially filled, therefore Object should not be used. + parse(line string) error +} + +// Family is an nftables family +type Family string + +const ( + // IPv4Family represents the "ip" nftables family, for IPv4 rules. + IPv4Family Family = "ip" + + // IPv6Family represents the "ip6" nftables family, for IPv6 rules. + IPv6Family Family = "ip6" + + // InetFamily represents the "inet" nftables family, for mixed IPv4 and IPv6 rules. + InetFamily Family = "inet" + + // ARPFamily represents the "arp" nftables family, for ARP rules. + ARPFamily Family = "arp" + + // BridgeFamily represents the "bridge" nftables family, for rules operating + // on packets traversing a bridge. + BridgeFamily Family = "bridge" + + // NetDevFamily represents the "netdev" nftables family, for rules operating on + // the device ingress/egress path. + NetDevFamily Family = "netdev" +) + +// Table represents an nftables table. +type Table struct { + // Comment is an optional comment for the table. (Requires kernel >= 5.10 and + // nft >= 0.9.7; otherwise this field will be silently ignored. Requires + // nft >= 1.0.8 to include comments in List() results.) + Comment *string + + // Handle is an identifier that can be used to uniquely identify an object when + // deleting it. When adding a new object, this must be nil. + Handle *int +} + +// BaseChainType represents the "type" of a "base chain" (ie, a chain that is attached to a hook). +// See https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains#Base_chain_types +type BaseChainType string + +const ( + // FilterType is the chain type for basic packet filtering. + FilterType BaseChainType = "filter" + + // NATType is the chain type for doing DNAT, SNAT, and masquerading. + // NAT operations are only available from certain hooks. + NATType BaseChainType = "nat" + + // RouteType is the chain type for rules that change the routing of packets. + // Chains of this type can only be added to the "output" hook. + RouteType BaseChainType = "route" +) + +// BaseChainHook represents the "hook" that a base chain is attached to. +// See https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains#Base_chain_hooks +// and https://wiki.nftables.org/wiki-nftables/index.php/Netfilter_hooks +type BaseChainHook string + +const ( + // PreroutingHook is the "prerouting" stage of packet processing, which is the + // first stage (after "ingress") for inbound ("input path" and "forward path") + // packets. + PreroutingHook BaseChainHook = "prerouting" + + // InputHook is the "input" stage of packet processing, which happens after + // "prerouting" for inbound packets being delivered to an interface on this host, + // in this network namespace. + InputHook BaseChainHook = "input" + + // ForwardHook is the "forward" stage of packet processing, which happens after + // "prerouting" for inbound packets destined for a non-local IP (i.e. on another + // host or in another network namespace) + ForwardHook BaseChainHook = "forward" + + // OutputHook is the "output" stage of packet processing, which is the first stage + // for outbound packets, regardless of their final destination. + OutputHook BaseChainHook = "output" + + // PostroutingHook is the "postrouting" stage of packet processing, which is the + // final stage (before "egress") for outbound ("forward path" and "output path") + // packets. + PostroutingHook BaseChainHook = "postrouting" + + // IngressHook is the "ingress" stage of packet processing, in the "netdev" family + // or (with kernel >= 5.10 and nft >= 0.9.7) the "inet" family. + IngressHook BaseChainHook = "ingress" + + // EgressHook is the "egress" stage of packet processing, in the "netdev" family + // (with kernel >= 5.16 and nft >= 1.0.1). + EgressHook BaseChainHook = "egress" +) + +// BaseChainPriority represents the "priority" of a base chain. Lower values run earlier. +// See https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains#Base_chain_priority +// and https://wiki.nftables.org/wiki-nftables/index.php/Netfilter_hooks#Priority_within_hook +// +// In addition to the const values, you can also use a signed integer value, or an +// arithmetic expression consisting of a const value followed by "+" or "-" and an +// integer. +type BaseChainPriority string + +const ( + // RawPriority is the earliest named priority. In particular, it can be used for + // rules that need to run before conntrack. It is equivalent to the value -300 and + // can be used in the ip, ip6, and inet families. + RawPriority BaseChainPriority = "raw" + + // ManglePriority is the standard priority for packet-rewriting operations. It is + // equivalent to the value -150 and can be used in the ip, ip6, and inet families. + ManglePriority BaseChainPriority = "mangle" + + // DNATPriority is the standard priority for DNAT operations. In the ip, ip6, and + // inet families, it is equivalent to the value -100. In the bridge family it is + // equivalent to the value -300. In both cases it can only be used from the + // prerouting hook. + DNATPriority BaseChainPriority = "dstnat" + + // FilterPriority is the standard priority for filtering operations. In the ip, + // ip6, inet, arp, and netdev families, it is equivalent to the value 0. In the + // bridge family it is equivalent to the value -200. + FilterPriority BaseChainPriority = "filter" + + // OutPriority is FIXME. It is equivalent to the value 300 and can only be used in + // the bridge family. + OutPriority BaseChainPriority = "out" + + // SecurityPriority is the standard priority for security operations ("where + // secmark can be set for example"). It is equivalent to the value 50 and can be + // used in the ip, ip6, and inet families. + SecurityPriority BaseChainPriority = "security" + + // SNATPriority is the standard priority for SNAT operations. In the ip, ip6, and + // inet families, it is equivalent to the value 100. In the bridge family it is + // equivalent to the value 300. In both cases it can only be used from the + // postrouting hook. + SNATPriority BaseChainPriority = "srcnat" +) + +// Chain represents an nftables chain; either a "base chain" (if Type, Hook, and Priority +// are specified), or a "regular chain" (if they are not). +type Chain struct { + // Name is the name of the chain. + Name string + + // Type is the chain type; this must be set for a base chain and unset for a + // regular chain. + Type *BaseChainType + // Hook is the hook that the chain is connected to; this must be set for a base + // chain and unset for a regular chain. + Hook *BaseChainHook + // Priority is the chain priority; this must be set for a base chain and unset for + // a regular chain. You can call ParsePriority() to convert this to a number. + Priority *BaseChainPriority + + // Device is the network interface that the chain is attached to; this must be set + // for a base chain connected to the "ingress" or "egress" hooks, and unset for + // all other chains. + Device *string + + // Comment is an optional comment for the object. (Requires kernel >= 5.10 and + // nft >= 0.9.7; otherwise this field will be silently ignored. Requires + // nft >= 1.0.8 to include comments in List() results.) + Comment *string + + // Handle is an identifier that can be used to uniquely identify an object when + // deleting it. When adding a new object, this must be nil + Handle *int +} + +// Rule represents a rule in a chain +type Rule struct { + // Chain is the name of the chain that contains this rule + Chain string + + // Rule is the rule in standard nftables syntax. (Should be empty on Delete, but + // is ignored if not.) Note that this does not include any rule comment, which is + // separate from the rule itself. + Rule string + + // Comment is an optional comment for the rule. + Comment *string + + // Index is the number of a rule (counting from 0) to Add this Rule after or + // Insert it before. Cannot be specified along with Handle. If neither Index + // nor Handle is specified then Add appends the rule the end of the chain and + // Insert prepends it to the beginning. + Index *int + + // Handle is a rule handle. In Add or Insert, if set, this is the handle of + // existing rule to put the new rule after/before. In Delete or Replace, this + // indicates the existing rule to delete/replace, and is mandatory. In the result + // of a List, this will indicate the rule's handle that can then be used in a + // later operation. + Handle *int +} + +// SetFlag represents a set or map flag +type SetFlag string + +const ( + // ConstantFlag is a flag indicating that the set/map is constant. FIXME UNDOCUMENTED + ConstantFlag SetFlag = "constant" + + // DynamicFlag is a flag indicating that the set contains stateful objects + // (counters, quotas, or limits) that will be dynamically updated. + DynamicFlag SetFlag = "dynamic" + + // IntervalFlag is a flag indicating that the set contains either CIDR elements or + // IP ranges. + IntervalFlag SetFlag = "interval" + + // TimeoutFlag is a flag indicating that the set/map has a timeout after which + // dynamically added elements will be removed. (It is set automatically if the + // set/map has a Timeout.) + TimeoutFlag SetFlag = "timeout" +) + +// SetPolicy represents a set or map storage policy +type SetPolicy string + +const ( + // PolicyPerformance FIXME + PerformancePolicy SetPolicy = "performance" + + // PolicyMemory FIXME + MemoryPolicy SetPolicy = "memory" +) + +// Set represents the definition of an nftables set (but not its elements) +type Set struct { + // Name is the name of the set. + Name string + + // Type is the type of the set key (eg "ipv4_addr"). Either Type or TypeOf, but + // not both, must be non-empty. + Type string + + // TypeOf is the type of the set key as an nftables expression (eg "ip saddr"). + // Either Type or TypeOf, but not both, must be non-empty. (Requires at least nft + // 0.9.4, and newer than that for some types.) + TypeOf string + + // Flags are the set flags + Flags []SetFlag + + // Timeout is the time that an element will stay in the set before being removed. + // (Optional; mandatory for sets that will be added to from the packet path) + Timeout *time.Duration + + // GCInterval is the interval at which timed-out elements will be removed from the + // set. (Optional; FIXME DEFAULT) + GCInterval *time.Duration + + // Size if the maximum numer of elements in the set. + // (Optional; mandatory for sets that will be added to from the packet path) + Size *uint64 + + // Policy is the FIXME + Policy *SetPolicy + + // AutoMerge indicates that adjacent/overlapping set elements should be merged + // together (only for interval sets) + AutoMerge *bool + + // Comment is an optional comment for the object. (Requires kernel >= 5.10 and + // nft >= 0.9.7; otherwise this field will be silently ignored.) + Comment *string + + // Handle is an identifier that can be used to uniquely identify an object when + // deleting it. When adding a new object, this must be nil + Handle *int +} + +// Map represents the definition of an nftables map (but not its elements) +type Map struct { + // Name is the name of the map. + Name string + + // Type is the type of the map key and value (eg "ipv4_addr : verdict"). Either + // Type or TypeOf, but not both, must be non-empty. + Type string + + // TypeOf is the type of the set key as an nftables expression (eg "ip saddr : verdict"). + // Either Type or TypeOf, but not both, must be non-empty. (Requires at least nft 0.9.4, + // and newer than that for some types.) + TypeOf string + + // Flags are the map flags + Flags []SetFlag + + // Timeout is the time that an element will stay in the set before being removed. + // (Optional; mandatory for sets that will be added to from the packet path) + Timeout *time.Duration + + // GCInterval is the interval at which timed-out elements will be removed from the + // set. (Optional; FIXME DEFAULT) + GCInterval *time.Duration + + // Size if the maximum numer of elements in the set. + // (Optional; mandatory for sets that will be added to from the packet path) + Size *uint64 + + // Policy is the FIXME + Policy *SetPolicy + + // Comment is an optional comment for the object. (Requires kernel >= 5.10 and + // nft >= 0.9.7; otherwise this field will be silently ignored.) + Comment *string + + // Handle is an identifier that can be used to uniquely identify an object when + // deleting it. When adding a new object, this must be nil + Handle *int +} + +// Element represents a set or map element +type Element struct { + // Set is the name of the set that contains this element (or the empty string if + // this is a map element.) + Set string + + // Map is the name of the map that contains this element (or the empty string if + // this is a set element.) + Map string + + // Key is the element key. (The list contains a single element for "simple" keys, + // or multiple elements for concatenations.) + Key []string + + // Value is the map element value. As with Key, this may be a single value or + // multiple. For set elements, this must be nil. + Value []string + + // Comment is an optional comment for the element + Comment *string +} diff --git a/vendor/sigs.k8s.io/knftables/util.go b/vendor/sigs.k8s.io/knftables/util.go new file mode 100644 index 000000000..4ff14af24 --- /dev/null +++ b/vendor/sigs.k8s.io/knftables/util.go @@ -0,0 +1,117 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package knftables + +import ( + "fmt" + "strconv" + "strings" +) + +// PtrTo can be used to fill in optional field values in objects +func PtrTo[T any](val T) *T { + return &val +} + +var numericPriorities = map[string]int{ + "raw": -300, + "mangle": -150, + "dstnat": -100, + "filter": 0, + "security": 50, + "srcnat": 100, +} + +var bridgeNumericPriorities = map[string]int{ + "dstnat": -300, + "filter": -200, + "out": 100, + "srcnat": 300, +} + +// ParsePriority tries to convert the string form of a chain priority into a number +func ParsePriority(family Family, priority string) (int, error) { + val, err := strconv.Atoi(priority) + if err == nil { + return val, nil + } + + modVal := 0 + if i := strings.IndexAny(priority, "+-"); i != -1 { + mod := priority[i:] + modVal, err = strconv.Atoi(mod) + if err != nil { + return 0, fmt.Errorf("could not parse modifier %q: %w", mod, err) + } + priority = priority[:i] + } + + var found bool + if family == BridgeFamily { + val, found = bridgeNumericPriorities[priority] + } else { + val, found = numericPriorities[priority] + } + if !found { + return 0, fmt.Errorf("unknown priority %q", priority) + } + + return val + modVal, nil +} + +// Concat is a helper (primarily) for constructing Rule objects. It takes a series of +// arguments and concatenates them together into a single string with spaces between the +// arguments. Strings are output as-is, string arrays are output element by element, +// numbers are output as with `fmt.Sprintf("%d")`, and all other types are output as with +// `fmt.Sprintf("%s")`. To help with set/map lookup syntax, an argument of "@" will not +// be followed by a space, so you can do, eg, `Concat("ip saddr", "@", setName)`. +func Concat(args ...interface{}) string { + b := &strings.Builder{} + var needSpace, wroteAt bool + for _, arg := range args { + switch x := arg.(type) { + case string: + if needSpace { + b.WriteByte(' ') + } + b.WriteString(x) + wroteAt = (x == "@") + case []string: + for _, s := range x { + if needSpace { + b.WriteByte(' ') + } + b.WriteString(s) + wroteAt = (s == "@") + needSpace = b.Len() > 0 && !wroteAt + } + case int, uint, int16, uint16, int32, uint32, int64, uint64: + if needSpace { + b.WriteByte(' ') + } + fmt.Fprintf(b, "%d", x) + default: + if needSpace { + b.WriteByte(' ') + } + fmt.Fprintf(b, "%s", x) + } + + needSpace = b.Len() > 0 && !wroteAt + } + return b.String() +}