Skip to content

Commit

Permalink
netif_addr: fix hw multicast address sync problems
Browse files Browse the repository at this point in the history
It fixes following problems in Previous implementation.
1. Neglected the fact that multicast IPv4/IPv6 address can be mapped to one multicast hw address,
   and a lower dpvs port may have multiple upper ports (such as vlan).
2. Multicast hw addresses could sync from kni multiple times or be deleted by mistake.
3. Interferences of linked down kni devices.

Signed-off-by: ywc689 <[email protected]>
  • Loading branch information
ywc689 committed Jul 8, 2024
1 parent db2176b commit 2849ec1
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 166 deletions.
4 changes: 2 additions & 2 deletions include/inetaddr.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct inet_ifmcaddr {
int af;
union inet_addr addr;
uint32_t flags; /* not used yet */
rte_atomic32_t refcnt;
uint32_t refcnt;
};

/*
Expand Down Expand Up @@ -117,7 +117,7 @@ bool inet_chk_mcast_addr(int af, struct netif_port *dev,

void inet_ifaddr_dad_failure(struct inet_ifaddr *ifa);

int idev_add_mcast_init(void *args);
int idev_add_mcast_init(struct netif_port *dev);

int inet_addr_init(void);
int inet_addr_term(void);
Expand Down
28 changes: 1 addition & 27 deletions include/netif.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "list.h"
#include "dpdk.h"
#include "inetaddr.h"
#include "netif_addr.h"
#include "global_data.h"
#include "timer.h"
#include "tc/tc.h"
Expand Down Expand Up @@ -205,31 +206,6 @@ struct netif_ops {
int (*op_get_xstats)(struct netif_port *dev, netif_nic_xstats_get_t **xstats);
};

struct netif_hw_addr {
struct list_head list;
struct rte_ether_addr addr;
rte_atomic32_t refcnt;
/*
* - sync only once!
*
* for HA in upper dev, no matter how many times it's added,
* only sync once to lower (when sync_cnt is zero).
*
* and HA (upper)'s refcnt++, to mark lower dev own's it.
*
* - when to unsync?
*
* when del if HA (upper dev)'s refcnt is 1 and syn_cnt is not zero.
* means lower dev is the only owner and need be unsync.
*/
int sync_cnt;
};

struct netif_hw_addr_list {
struct list_head addrs;
int count;
};

struct netif_port {
char name[IFNAMSIZ]; /* device name */
portid_t id; /* device id */
Expand Down Expand Up @@ -296,8 +272,6 @@ int netif_port_conf_get(struct netif_port *port, struct rte_eth_conf *eth_conf);
int netif_port_conf_set(struct netif_port *port, const struct rte_eth_conf *conf);
int netif_port_start(struct netif_port *port); // start nic and wait until up
int netif_port_stop(struct netif_port *port); // stop nic
int netif_set_mc_list(struct netif_port *port);
int __netif_set_mc_list(struct netif_port *port);
int netif_get_queue(struct netif_port *port, lcoreid_t id, queueid_t *qid);
int netif_get_link(struct netif_port *dev, struct rte_eth_link *link);
int netif_get_promisc(struct netif_port *dev, bool *promisc);
Expand Down
51 changes: 40 additions & 11 deletions include/netif_addr.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,42 @@
*/
#ifndef __DPVS_NETIF_ADDR_H__
#define __DPVS_NETIF_ADDR_H__
#include "netif.h"

int __netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr);
int __netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr);
enum {
HW_ADDR_F_FROM_KNI = 1, // from linux kni device in local layer
};

struct netif_hw_addr {
struct list_head list;
struct rte_ether_addr addr;
rte_atomic32_t refcnt;
uint16_t flags;
uint16_t sync_cnt;
};

struct netif_hw_addr_list {
struct list_head addrs;
int count;
};

struct netif_port;

int __netif_hw_addr_add(struct netif_hw_addr_list *list,
const struct rte_ether_addr *addr, uint16_t flags);
int __netif_hw_addr_del(struct netif_hw_addr_list *list,
const struct rte_ether_addr *addr, uint16_t flags);

int netif_set_mc_list(struct netif_port *dev);
int __netif_set_mc_list(struct netif_port *dev);

int netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr);
int netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr);
void netif_mc_flush(struct netif_port *dev);
void netif_mc_init(struct netif_port *dev);
int __netif_mc_dump(struct netif_port *dev,
struct rte_ether_addr *addrs, size_t *naddr);
int netif_mc_dump(struct netif_port *dev,
struct rte_ether_addr *addrs, size_t *naddr);
int __netif_mc_dump(struct netif_port *dev, uint16_t filter_flags,
struct rte_ether_addr *addrs, size_t *naddr);
int netif_mc_dump(struct netif_port *dev, uint16_t filter_flags,
struct rte_ether_addr *addrs, size_t *naddr);
int __netif_mc_print(struct netif_port *dev,
char *buf, int *len, int *pnaddr);
int netif_mc_print(struct netif_port *dev,
Expand All @@ -45,10 +69,10 @@ int netif_mc_sync(struct netif_port *to, struct netif_port *from);
int __netif_mc_unsync(struct netif_port *to, struct netif_port *from);
int netif_mc_unsync(struct netif_port *to, struct netif_port *from);

int __netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from);
int netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from);
int __netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from);
int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from);
int __netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt);
int netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt);
int __netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt);
int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt);

static inline int eth_addr_equal(const struct rte_ether_addr *addr1,
const struct rte_ether_addr *addr2)
Expand All @@ -69,4 +93,9 @@ static inline char *eth_addr_dump(const struct rte_ether_addr *ea,
return buf;
}

static bool inline hw_addr_from_kni(const struct netif_hw_addr *hwa)
{
return !!(hwa->flags & HW_ADDR_F_FROM_KNI);
}

#endif /* __DPVS_NETIF_ADDR_H__ */
57 changes: 40 additions & 17 deletions src/inetaddr.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,15 @@ static inline void idev_put(struct inet_device *idev)
static inline void imc_hash(struct inet_ifmcaddr *imc, struct inet_device *idev)
{
list_add(&imc->d_list, &idev->this_ifm_list);
rte_atomic32_inc(&imc->refcnt);
++imc->refcnt;
}

static inline void imc_unhash(struct inet_ifmcaddr *imc)
{
assert(rte_atomic32_read(&imc->refcnt) > 1);
assert(imc->refcnt> 1);

list_del(&imc->d_list);
rte_atomic32_dec(&imc->refcnt);
--imc->refcnt;
}

static struct inet_ifmcaddr *imc_lookup(int af, const struct inet_device *idev,
Expand All @@ -109,7 +109,7 @@ static struct inet_ifmcaddr *imc_lookup(int af, const struct inet_device *idev,

list_for_each_entry(imc, &idev->ifm_list[cid], d_list) {
if (inet_addr_equal(af, &imc->addr, maddr)) {
rte_atomic32_inc(&imc->refcnt);
++imc->refcnt;
return imc;
}
}
Expand All @@ -121,7 +121,7 @@ static void imc_put(struct inet_ifmcaddr *imc)
{
char ipstr[64];

if (rte_atomic32_dec_and_test(&imc->refcnt)) {
if (--imc->refcnt == 0) {
RTE_LOG(DEBUG, IFA, "[%02d] %s: del mcaddr %s\n",
rte_lcore_id(), __func__,
inet_ntop(imc->af, &imc->addr, ipstr, sizeof(ipstr)));
Expand All @@ -136,20 +136,27 @@ static int idev_mc_add(int af, struct inet_device *idev,
struct inet_ifmcaddr *imc;
char ipstr[64];

imc = imc_lookup(af, idev, maddr);
if (imc) {
imc_put(imc);
return EDPVS_EXIST;
if (imc_lookup(af, idev, maddr)) {
/*
* Hold the imc and return.
*
* Multiple IPv6 unicast address may be mapped to one IPv6 solicated-node
* multicast address. So increase the imc refcnt each time idev_mc_add called.
*
* Possibly imc added repeated? No, at least for now. The imc is set within the
* rigid program, not allowing user to configure it.
* */
return EDPVS_OK;
}

imc = rte_calloc(NULL, 1, sizeof(struct inet_ifmcaddr), RTE_CACHE_LINE_SIZE);
if (!imc)
return EDPVS_NOMEM;

imc->af = af;
imc->idev = idev;
imc->addr = *maddr;
rte_atomic32_init(&imc->refcnt);
imc->af = af;
imc->idev = idev;
imc->addr = *maddr;
imc->refcnt = 1;

imc_hash(imc, idev);

Expand All @@ -169,7 +176,10 @@ static int idev_mc_del(int af, struct inet_device *idev,
if (!imc)
return EDPVS_NOTEXIST;

imc_unhash(imc);
if (--imc->refcnt == 2) {
imc_unhash(imc);
}

imc_put(imc);

return EDPVS_OK;
Expand All @@ -192,8 +202,6 @@ static int ifa_add_del_mcast(struct inet_ifaddr *ifa, bool add, bool is_master)

if (add) {
err = idev_mc_add(ifa->af, ifa->idev, &iaddr);
if (EDPVS_EXIST == err)
return EDPVS_OK;
if (err)
return err;
if (is_master) {
Expand Down Expand Up @@ -222,7 +230,7 @@ static int ifa_add_del_mcast(struct inet_ifaddr *ifa, bool add, bool is_master)
}

/* add ipv6 multicast address after port start */
int idev_add_mcast_init(void *args)
static int __idev_add_mcast_init(void *args)
{
int err;
struct inet_device *idev;
Expand Down Expand Up @@ -278,6 +286,21 @@ int idev_add_mcast_init(void *args)
return err;
}

int idev_add_mcast_init(struct netif_port *dev)
{
int err;
lcoreid_t cid;

rte_eal_mp_remote_launch(__idev_add_mcast_init, dev, CALL_MAIN);
RTE_LCORE_FOREACH_WORKER(cid) {
err = rte_eal_wait_lcore(cid);
if (unlikely(err < 0))
return err;
}

return EDPVS_OK;
}

/* refer to linux:ipv6_chk_mcast_addr */
bool inet_chk_mcast_addr(int af, struct netif_port *dev,
const union inet_addr *group,
Expand Down
16 changes: 12 additions & 4 deletions src/kni.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static int kni_mc_list_cmp_set(struct netif_port *dev,
rte_rwlock_write_lock(&dev->dev_lock);

naddr_old = NELEMS(addrs_old);
err = __netif_mc_dump(dev, addrs_old, &naddr_old);
err = __netif_mc_dump(dev, HW_ADDR_F_FROM_KNI, addrs_old, &naddr_old);
if (err != EDPVS_OK) {
RTE_LOG(ERR, Kni, "%s: fail to get current mc list\n", __func__);
goto out;
Expand Down Expand Up @@ -162,14 +162,14 @@ static int kni_mc_list_cmp_set(struct netif_port *dev,
/* nothing */
break;
case 1:
err = __netif_mc_add(dev, &chg_lst.addrs[i]);
err = __netif_hw_addr_add(&dev->mc, &chg_lst.addrs[i], HW_ADDR_F_FROM_KNI);

RTE_LOG(INFO, Kni, "%s: add mc addr: %s %s %s\n", __func__,
eth_addr_dump(&chg_lst.addrs[i], mac, sizeof(mac)),
dev->name, dpvs_strerror(err));
break;
case 2:
err = __netif_mc_del(dev, &chg_lst.addrs[i]);
err = __netif_hw_addr_del(&dev->mc, &chg_lst.addrs[i], HW_ADDR_F_FROM_KNI);

RTE_LOG(INFO, Kni, "%s: del mc addr: %s %s %s\n", __func__,
eth_addr_dump(&chg_lst.addrs[i], mac, sizeof(mac)),
Expand Down Expand Up @@ -246,7 +246,7 @@ static int kni_rtnl_check(void *arg)
{
struct netif_port *dev = arg;
int fd = dev->kni.kni_rtnl_fd;
int n, i;
int n, i, link_flags = 0;
char buf[4096];
struct nlmsghdr *nlh = (struct nlmsghdr *)buf;
bool update = false;
Expand Down Expand Up @@ -284,6 +284,14 @@ static int kni_rtnl_check(void *arg)
/* note we should not update kni mac list for every event ! */
if (update) {
RTE_LOG(DEBUG, Kni, "%d events received!\n", i);
if (EDPVS_OK != linux_get_link_status(dev->kni.name, &link_flags, NULL, 0)) {
RTE_LOG(ERR, Kni, "%s:undetermined kni link status\n", dev->kni.name);
return DTIMER_OK;
}
if (!(link_flags & IFF_UP)) {
RTE_LOG(DEBUG, Kni, "skip link down kni device %s\n", dev->kni.name);
return DTIMER_OK;
}
if (kni_update_maddr(dev) == EDPVS_OK)
RTE_LOG(DEBUG, Kni, "update maddr of %s OK!\n", dev->name);
else
Expand Down
37 changes: 8 additions & 29 deletions src/netif.c
Original file line number Diff line number Diff line change
Expand Up @@ -3298,7 +3298,7 @@ static int bond_set_mc_list(struct netif_port *dev)
slave = dev->bond->master.slaves[i];

rte_rwlock_write_lock(&slave->dev_lock);
err = __netif_mc_sync_multiple(slave, dev);
err = __netif_mc_sync_multiple(slave, dev, dev->bond->master.slave_nb);
rte_rwlock_write_unlock(&slave->dev_lock);

if (err != EDPVS_OK) {
Expand All @@ -3320,10 +3320,11 @@ static int dpdk_set_mc_list(struct netif_port *dev)
if (rte_eth_allmulticast_get(dev->id) == 1)
return EDPVS_OK;

err = __netif_mc_dump(dev, addrs, &naddr);
err = __netif_mc_dump(dev, 0, addrs, &naddr);
if (err != EDPVS_OK)
return err;

RTE_LOG(DEBUG, NETIF, "%s: configuring %lu multicast hw-addrs\n", dev->name, naddr);
err = rte_eth_dev_set_mc_addr_list(dev->id, addrs, naddr);
if (err) {
RTE_LOG(WARNING, NETIF, "%s: rte_eth_dev_set_mc_addr_list failed -- %s,"
Expand Down Expand Up @@ -3506,6 +3507,7 @@ static struct netif_port* netif_rte_port_alloc(portid_t id, int nrxq,
return NULL;
}
port->in_ptr->dev = port;

for (ii = 0; ii < DPVS_MAX_LCORE; ii++) {
INIT_LIST_HEAD(&port->in_ptr->ifa_list[ii]);
INIT_LIST_HEAD(&port->in_ptr->ifm_list[ii]);
Expand Down Expand Up @@ -3916,7 +3918,6 @@ static int config_fdir_conf(struct rte_fdir_conf *fdir_conf)
int netif_port_start(struct netif_port *port)
{
int ii, ret;
lcoreid_t cid;
queueid_t qid;
char promisc_on, allmulticast;
char buf[512];
Expand Down Expand Up @@ -4058,13 +4059,10 @@ int netif_port_start(struct netif_port *port)
port->netif_ops->op_update_addr(port);

/* add in6_addr multicast address */
rte_eal_mp_remote_launch(idev_add_mcast_init, port, CALL_MAIN);
RTE_LCORE_FOREACH_WORKER(cid) {
if ((ret = rte_eal_wait_lcore(cid)) < 0) {
RTE_LOG(WARNING, NETIF, "%s: lcore %d: multicast address add failed for device %s\n",
__func__, cid, port->name);
return ret;
}
if ((ret = idev_add_mcast_init(port)) != EDPVS_OK) {
RTE_LOG(WARNING, NETIF, "%s: idev_add_mcast_init failed -- %d(%s)\n",
__func__, ret, dpvs_strerror(ret));
return ret;
}

/* update rss reta */
Expand Down Expand Up @@ -4096,25 +4094,6 @@ int netif_port_stop(struct netif_port *port)
return EDPVS_OK;
}

int __netif_set_mc_list(struct netif_port *dev)
{
if (!dev->netif_ops->op_set_mc_list)
return EDPVS_NOTSUPP;

return dev->netif_ops->op_set_mc_list(dev);
}

int netif_set_mc_list(struct netif_port *dev)
{
int err;

rte_rwlock_write_lock(&dev->dev_lock);
err = __netif_set_mc_list(dev);
rte_rwlock_write_unlock(&dev->dev_lock);

return err;
}

int netif_port_register(struct netif_port *port)
{
struct netif_port *cur;
Expand Down
Loading

0 comments on commit 2849ec1

Please sign in to comment.