Skip to content

Commit e931f4e

Browse files
committed
Compare PodCIDRs in NodeRouteController
When determining whether a Node's configuration has changed. Note that when writing a controller, we should never assume that when the object name is the same, all immutable fields are guaranteed to be the same, and as a result we should always check all of them. This applies even for controllers without a workqueue (which is not the case here), as a Delete followed by a Create with the same name can show as an Update event. It is more likely to happen when the controller uses a workqueue, as in that case, even if the Delete and Add event handlers are called sequentially, the key may only be processed once, after the informer store has been updated with the new object. Fixes antrea-io#6965 Signed-off-by: Antonin Bas <antonin.bas@broadcom.com>
1 parent 25b2b81 commit e931f4e

File tree

2 files changed

+68
-8
lines changed

2 files changed

+68
-8
lines changed

pkg/agent/controller/noderoute/node_route_controller.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"fmt"
2020
"net"
2121
"net/netip"
22+
"slices"
2223
"sync"
2324
"time"
2425

@@ -548,21 +549,32 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error {
548549
return err
549550
}
550551
peerWireGuardPublicKey := node.Annotations[types.NodeWireGuardPublicAnnotationKey]
552+
podCIDRStrs := getPodCIDRsOnNode(node)
553+
if len(podCIDRStrs) == 0 {
554+
// If no valid PodCIDR is configured in Node.Spec, return immediately.
555+
return nil
556+
}
551557

552558
nrInfo, installed, _ := c.installedNodes.GetByKey(nodeName)
553-
// Route is already added for this Node and Node MAC, transport IP
554-
// and WireGuard public key are not changed.
559+
samePodCIDRs := func(podCIDRs []*net.IPNet) bool {
560+
// There is no reason to worry about the ordering of the CIDRs here. In a dual-stack
561+
// cluster, the CIDRs should always be ordered consistently by IP family. Even if
562+
// this was not true, samePodCIDRs would just return false and we would end up
563+
// re-installing the same routes (and the podCIDRs field in nodeRouteInfo would be
564+
// updated).
565+
return slices.EqualFunc(podCIDRStrs, podCIDRs, func(cidrStr string, cidr *net.IPNet) bool {
566+
return cidr.String() == cidrStr
567+
})
568+
}
569+
// Route is already added for this Node and Node MAC, transport IP, podCIDRs and WireGuard
570+
// public key are not changed.
555571
if installed && nrInfo.(*nodeRouteInfo).nodeMAC.String() == peerNodeMAC.String() &&
556572
peerNodeIPs.Equal(*nrInfo.(*nodeRouteInfo).nodeIPs) &&
573+
samePodCIDRs(nrInfo.(*nodeRouteInfo).podCIDRs) &&
557574
nrInfo.(*nodeRouteInfo).wireGuardPublicKey == peerWireGuardPublicKey {
558575
return nil
559576
}
560577

561-
podCIDRStrs := getPodCIDRsOnNode(node)
562-
if len(podCIDRStrs) == 0 {
563-
// If no valid PodCIDR is configured in Node.Spec, return immediately.
564-
return nil
565-
}
566578
klog.InfoS("Adding routes and flows to Node", "Node", nodeName, "podCIDRs", podCIDRStrs,
567579
"addresses", node.Status.Addresses)
568580

pkg/agent/controller/noderoute/node_route_controller_test.go

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,13 @@ var (
4949
// podCIDRs of "local" Node
5050
_, podCIDR, _ = net.ParseCIDR("1.1.0.0/24")
5151
_, podCIDRv6, _ = net.ParseCIDR("2001:4860:0000::/48")
52-
// podCIDRs of "remote" Node
52+
// podCIDRs of "remote" Nodes
5353
_, podCIDR1, _ = net.ParseCIDR("1.1.1.0/24")
5454
_, podCIDR1v6, _ = net.ParseCIDR("2001:4860:0001::/48")
55+
_, podCIDR2, _ = net.ParseCIDR("1.1.2.0/24")
5556
podCIDR1Gateway = util.GetGatewayIPForPodCIDR(podCIDR1)
5657
podCIDR1v6Gateway = util.GetGatewayIPForPodCIDR(podCIDR1v6)
58+
podCIDR2Gateway = util.GetGatewayIPForPodCIDR(podCIDR2)
5759
nodeIP1 = net.ParseIP("10.10.10.10")
5860
dsIPs1 = utilip.DualStackIPs{IPv4: nodeIP1}
5961
nodeIP2 = net.ParseIP("10.10.10.11")
@@ -198,6 +200,52 @@ func TestControllerWithDuplicatePodCIDR(t *testing.T) {
198200
}
199201
}
200202

203+
func TestNodeRecreateNewPodCIDR(t *testing.T) {
204+
c := newController(t, &config.NetworkConfig{})
205+
defer c.queue.ShutDown()
206+
207+
stopCh := make(chan struct{})
208+
defer close(stopCh)
209+
c.informerFactory.Start(stopCh)
210+
c.informerFactory.WaitForCacheSync(stopCh)
211+
212+
// Remove IPv6 Pod CIDR to simplify test.
213+
oldNode := node1.DeepCopy()
214+
oldNode.UID = "old-uid"
215+
oldNode.Spec.PodCIDR = podCIDR1.String()
216+
oldNode.Spec.PodCIDRs = []string{podCIDR1.String()}
217+
218+
newNode := node1.DeepCopy()
219+
newNode.UID = "new-uid"
220+
newNode.Spec.PodCIDR = podCIDR2.String()
221+
newNode.Spec.PodCIDRs = []string{podCIDR2.String()}
222+
223+
c.clientset.CoreV1().Nodes().Create(context.TODO(), oldNode, metav1.CreateOptions{})
224+
c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), &dsIPs1, uint32(0), nil).Times(1)
225+
c.routeClient.EXPECT().AddRoutes(podCIDR1, "node1", nodeIP1, podCIDR1Gateway).Times(1)
226+
c.processNextWorkItem()
227+
228+
// Node is deleted, then shortly after is "recreated" (same name) with a different PodCIDR.
229+
c.clientset.CoreV1().Nodes().Delete(context.TODO(), oldNode.Name, metav1.DeleteOptions{})
230+
// The sleep is for illustration purposes. The duration does not really matter. What matters
231+
// is that by the time we call processNextWorkItem below, the informer store has been
232+
// updated with the new Node.
233+
time.Sleep(10 * time.Millisecond)
234+
c.clientset.CoreV1().Nodes().Create(context.TODO(), newNode, metav1.CreateOptions{})
235+
236+
// At the moment, the test will fail because these expectations are not met.
237+
c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), &dsIPs1, uint32(0), nil).Times(1)
238+
c.routeClient.EXPECT().AddRoutes(podCIDR2, "node1", nodeIP1, podCIDR2Gateway).Times(1)
239+
240+
require.Eventually(t, func() bool {
241+
node, err := c.nodeLister.Get("node1")
242+
return err == nil && node.UID == "new-uid"
243+
}, 1*time.Second, 10*time.Millisecond)
244+
245+
// The workqueue may still be empty by the time this is called, but processNextWorkItem will block.
246+
c.processNextWorkItem()
247+
}
248+
201249
func TestLookupIPInPodSubnets(t *testing.T) {
202250
c := newController(t, &config.NetworkConfig{})
203251
defer c.queue.ShutDown()

0 commit comments

Comments
 (0)