From 06b480e17751eb5d719ec85cb34e1abd51280a64 Mon Sep 17 00:00:00 2001 From: brad-defined <77982333+brad-defined@users.noreply.github.com> Date: Tue, 5 Sep 2023 09:29:27 -0400 Subject: [PATCH] Fix relay migration (#964) * Fix for relay migration on rehandshaking issue. On rehandshake, the relay tunnel doesn't migrate to the new hostinfo object correctly, due to an incorrect Nebula IP sent in the CreateRelayRequest message. * Add a test for this case --------- Co-authored-by: Nate Brown --- connection_manager.go | 4 +- e2e/handshakes_test.go | 104 +++++++++++++++++++++++++++++++++++++++++ relay_manager.go | 6 +++ 3 files changed, 112 insertions(+), 2 deletions(-) diff --git a/connection_manager.go b/connection_manager.go index 900db07cb..ce11f1966 100644 --- a/connection_manager.go +++ b/connection_manager.go @@ -231,7 +231,7 @@ func (n *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo) index = existing.LocalIndex switch r.Type { case TerminalType: - relayFrom = newhostinfo.vpnIp + relayFrom = n.intf.myVpnIp relayTo = existing.PeerIp case ForwardingType: relayFrom = existing.PeerIp @@ -256,7 +256,7 @@ func (n *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo) } switch r.Type { case TerminalType: - relayFrom = newhostinfo.vpnIp + relayFrom = n.intf.myVpnIp relayTo = r.PeerIp case ForwardingType: relayFrom = r.PeerIp diff --git a/e2e/handshakes_test.go b/e2e/handshakes_test.go index d02a1a733..022b5a3f1 100644 --- a/e2e/handshakes_test.go +++ b/e2e/handshakes_test.go @@ -610,6 +610,110 @@ func TestRehandshakingRelays(t *testing.T) { t.Logf("relayControl hostinfos got cleaned up!") } +func TestRehandshakingRelaysPrimary(t *testing.T) { + // This test is the same as TestRehandshakingRelays but one of the terminal types is a primary swap winner + ca, _, caKey, _ := newTestCaCert(time.Now(), time.Now().Add(10*time.Minute), []*net.IPNet{}, []*net.IPNet{}, []string{}) + myControl, myVpnIpNet, _, _ := newSimpleServer(ca, caKey, "me ", net.IP{10, 0, 0, 128}, m{"relay": m{"use_relays": true}}) + relayControl, relayVpnIpNet, relayUdpAddr, relayConfig := newSimpleServer(ca, caKey, "relay ", net.IP{10, 0, 0, 1}, m{"relay": m{"am_relay": true}}) + theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(ca, caKey, "them ", net.IP{10, 0, 0, 2}, m{"relay": m{"use_relays": true}}) + + // Teach my how to get to the relay and that their can be reached via the relay + myControl.InjectLightHouseAddr(relayVpnIpNet.IP, relayUdpAddr) + myControl.InjectRelays(theirVpnIpNet.IP, []net.IP{relayVpnIpNet.IP}) + relayControl.InjectLightHouseAddr(theirVpnIpNet.IP, theirUdpAddr) + + // Build a router so we don't have to reason who gets which packet + r := router.NewR(t, myControl, relayControl, theirControl) + defer r.RenderFlow() + + // Start the servers + myControl.Start() + relayControl.Start() + theirControl.Start() + + t.Log("Trigger a handshake from me to them via the relay") + myControl.InjectTunUDPPacket(theirVpnIpNet.IP, 80, 80, []byte("Hi from me")) + + p := r.RouteForAllUntilTxTun(theirControl) + r.Log("Assert the tunnel works") + assertUdpPacket(t, []byte("Hi from me"), p, myVpnIpNet.IP, theirVpnIpNet.IP, 80, 80) + r.RenderHostmaps("working hostmaps", myControl, relayControl, theirControl) + + // When I update the certificate for the relay, both me and them will have 2 host infos for the relay, + // and the main host infos will not have any relay state to handle the me<->relay<->them tunnel. + r.Log("Renew relay certificate and spin until me and them sees it") + _, _, myNextPrivKey, myNextPEM := newTestCert(ca, caKey, "relay", time.Now(), time.Now().Add(5*time.Minute), relayVpnIpNet, nil, []string{"new group"}) + + caB, err := ca.MarshalToPEM() + if err != nil { + panic(err) + } + + relayConfig.Settings["pki"] = m{ + "ca": string(caB), + "cert": string(myNextPEM), + "key": string(myNextPrivKey), + } + rc, err := yaml.Marshal(relayConfig.Settings) + assert.NoError(t, err) + relayConfig.ReloadConfigString(string(rc)) + + for { + r.Log("Assert the tunnel works between myVpnIpNet and relayVpnIpNet") + assertTunnel(t, myVpnIpNet.IP, relayVpnIpNet.IP, myControl, relayControl, r) + c := myControl.GetHostInfoByVpnIp(iputil.Ip2VpnIp(relayVpnIpNet.IP), false) + if len(c.Cert.Details.Groups) != 0 { + // We have a new certificate now + r.Log("Certificate between my and relay is updated!") + break + } + + time.Sleep(time.Second) + } + + for { + r.Log("Assert the tunnel works between theirVpnIpNet and relayVpnIpNet") + assertTunnel(t, theirVpnIpNet.IP, relayVpnIpNet.IP, theirControl, relayControl, r) + c := theirControl.GetHostInfoByVpnIp(iputil.Ip2VpnIp(relayVpnIpNet.IP), false) + if len(c.Cert.Details.Groups) != 0 { + // We have a new certificate now + r.Log("Certificate between their and relay is updated!") + break + } + + time.Sleep(time.Second) + } + + r.Log("Assert the relay tunnel still works") + assertTunnel(t, theirVpnIpNet.IP, myVpnIpNet.IP, theirControl, myControl, r) + r.RenderHostmaps("working hostmaps", myControl, relayControl, theirControl) + // We should have two hostinfos on all sides + for len(myControl.GetHostmap().Indexes) != 2 { + t.Logf("Waiting for myControl hostinfos (%v != 2) to get cleaned up from lack of use...", len(myControl.GetHostmap().Indexes)) + r.Log("Assert the relay tunnel still works") + assertTunnel(t, theirVpnIpNet.IP, myVpnIpNet.IP, theirControl, myControl, r) + r.Log("yupitdoes") + time.Sleep(time.Second) + } + t.Logf("myControl hostinfos got cleaned up!") + for len(theirControl.GetHostmap().Indexes) != 2 { + t.Logf("Waiting for theirControl hostinfos (%v != 2) to get cleaned up from lack of use...", len(theirControl.GetHostmap().Indexes)) + r.Log("Assert the relay tunnel still works") + assertTunnel(t, theirVpnIpNet.IP, myVpnIpNet.IP, theirControl, myControl, r) + r.Log("yupitdoes") + time.Sleep(time.Second) + } + t.Logf("theirControl hostinfos got cleaned up!") + for len(relayControl.GetHostmap().Indexes) != 2 { + t.Logf("Waiting for relayControl hostinfos (%v != 2) to get cleaned up from lack of use...", len(relayControl.GetHostmap().Indexes)) + r.Log("Assert the relay tunnel still works") + assertTunnel(t, theirVpnIpNet.IP, myVpnIpNet.IP, theirControl, myControl, r) + r.Log("yupitdoes") + time.Sleep(time.Second) + } + t.Logf("relayControl hostinfos got cleaned up!") +} + func TestRehandshaking(t *testing.T) { ca, _, caKey, _ := newTestCaCert(time.Now(), time.Now().Add(10*time.Minute), []*net.IPNet{}, []*net.IPNet{}, []string{}) myControl, myVpnIpNet, myUdpAddr, myConfig := newSimpleServer(ca, caKey, "me ", net.IP{10, 0, 0, 2}, nil) diff --git a/relay_manager.go b/relay_manager.go index 224135362..7aa06ccb4 100644 --- a/relay_manager.go +++ b/relay_manager.go @@ -179,6 +179,12 @@ func (rm *relayManager) handleCreateRelayRequest(h *HostInfo, f *Interface, m *N "vpnIp": h.vpnIp}) logMsg.Info("handleCreateRelayRequest") + // Is the source of the relay me? This should never happen, but did happen due to + // an issue migrating relays over to newly re-handshaked host info objects. + if from == f.myVpnIp { + logMsg.WithField("myIP", f.myVpnIp).Error("Discarding relay request from myself") + return + } // Is the target of the relay me? if target == f.myVpnIp { existingRelay, ok := h.relayState.QueryRelayForByIp(from)