Skip to content

Commit

Permalink
Fix Voq chassis orchagent crash with 34K routes (#3329)
Browse files Browse the repository at this point in the history
What I did
Don't add the remote system neighbor if the same neighbor exists.
Why I did it
The IMM has two asics and has 2 port channels in each asic and 2 port members in each port channel.
The ip address is configured on each port channel and bgp is enabled. The neighbor and routes are learned on these port channel.
  • Loading branch information
saksarav-nokia authored Dec 2, 2024
1 parent 39e5a67 commit d006374
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 1 deletion.
11 changes: 10 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1890,6 +1890,15 @@ void NeighOrch::doVoqSystemNeighTask(Consumer &consumer)
it++;
continue;
}
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
{
NextHopKey nexthop = { ip_address, ibif.m_alias};
if (hasNextHop(nexthop))
{
it++;
continue;
}
}

if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end() ||
m_syncdNeighbors[neighbor_entry].mac != mac_address ||
Expand Down Expand Up @@ -2391,4 +2400,4 @@ bool NeighOrch::ifChangeInformRemoteNextHop(const string &alias, bool if_up)
}
}
return rc;
}
}
127 changes: 127 additions & 0 deletions tests/test_virtual_chassis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,133 @@ def test_remote_port_down(self, vct):

# Cleanup inband if configuration
self.del_inbandif_port(vct, inband_port)
self.configure_neighbor(local_lc_dvs, "del", test_neigh_ip_2, test_neigh_mac_2, test_neigh_dev_2)


def test_remote_neighbor_add(self, vct):
# test params
local_lc_switch_id = '0'
remote_lc_switch_id = '2'
test_prefix = "14.14.0.0/16"
inband_port = "Ethernet0"
test_neigh_ip_1 = "10.8.104.50"
test_neigh_dev_1 = "Ethernet4"
test_neigh_mac_1 = "00:09:03:04:05:06"
test_neigh_dev_2 = "Ethernet8"

local_lc_dvs = self.get_lc_dvs(vct, local_lc_switch_id)
remote_lc_dvs = self.get_lc_dvs(vct, remote_lc_switch_id)

# config inband port
self.config_inbandif_port(vct, inband_port)

# add neighbor
self.configure_neighbor(local_lc_dvs, "add", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_1)

time.sleep(10)

asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 1)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys), "No neigh entries in ASIC_DB"

# Check for presence of the remote neighbor in ASIC_DB
remote_neigh = ""
for nkey in neighkeys:
ne = ast.literal_eval(nkey)
if ne['ip'] == test_neigh_ip_1:
remote_neigh = nkey
break

assert remote_neigh != "", "Remote neigh not found in ASIC_DB"

# Preserve remote neigh asic db neigh key for delete verification later
test_remote_neigh_asic_db_key = remote_neigh

asic_db = remote_lc_dvs.get_asic_db()
nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 1)
assert len(nexthop_keys), "No Nexthop entries in ASIC_DB"

nexthop_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", nexthop_keys[0])
ip = nexthop_entry.get("SAI_NEXT_HOP_ATTR_IP")
assert ip != "", "Ip address not found for nexthop entry in asic db"
rif1 = nexthop_entry.get("SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID")


# add route of LC1(pretend learnt via bgp)
_, res = remote_lc_dvs.runcmd(['sh', '-c', f"ip route add {test_prefix} nexthop via {test_neigh_ip_1}"])
assert res == "", "Error configuring route"
time.sleep(5)

# del neighbor on first port and add it on second port
self.configure_neighbor(local_lc_dvs, "del", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_1)
time.sleep(5)
self.configure_neighbor(local_lc_dvs, "add", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_2)

time.sleep(10)

asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 1)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys), "No neigh entries in ASIC_DB"

# Check for presence of the remote neighbor in ASIC_DB
remote_neigh = ""
for nkey in neighkeys:
ne = ast.literal_eval(nkey)
if ne['ip'] == test_neigh_ip_1:
remote_neigh = nkey
break

assert remote_neigh != "", "Remote neigh not found in ASIC_DB"

nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 1)
assert len(nexthop_keys), "No Nexthop entries in ASIC_DB"
nexthop_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", nexthop_keys[0])
print("2:nexthop_entrty:",nexthop_entry)
rif2 = nexthop_entry.get("SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID")
assert rif1 == rif2, "Neighbor is already replaced with new rif"

#del the route
_, res = remote_lc_dvs.runcmd(['sh', '-c', f"ip route del {test_prefix} nexthop via {test_neigh_ip_1} "])
assert res == "", "Error configuring route"

time.sleep(10)

asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 1)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys), "No neigh entries in ASIC_DB"

# Check for presence of the remote neighbor in ASIC_DB
remote_neigh = ""
for nkey in neighkeys:
ne = ast.literal_eval(nkey)
if ne['ip'] == test_neigh_ip_1:
remote_neigh = nkey
break
assert remote_neigh != "", "Remote neigh not found in ASIC_DB"

nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 1)
assert len(nexthop_keys), "No Nexthop entries in ASIC_DB"
nexthop_entry = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", nexthop_keys[0])
print("3:nexthop_entrty:",nexthop_entry)
rif3 = nexthop_entry.get("SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID")
assert rif1 != rif3, "Neighbor is not replaced with new rif"

#del the neighbor
self.configure_neighbor(local_lc_dvs, "del", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_2)
time.sleep(10)
asic_db = remote_lc_dvs.get_asic_db()
asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY", 0)
neighkeys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEIGHBOR_ENTRY")
assert len(neighkeys) == 0, "Neigh entries still in ASIC_DB"

nexthop_keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", 0)
assert len(nexthop_keys) == 0, "Nexthop entries in still ASIC_DB"

# Cleanup inband if configuration
self.del_inbandif_port(vct, inband_port)

def test_voq_drop_counters(self, vct):
"""Test VOQ switch drop counters.
Expand Down

0 comments on commit d006374

Please sign in to comment.