Skip to content

Commit ffa501e

Browse files
aharonl-nvidiajgunthorpe
authored andcommitted
RDMA/mlx5: Add steering support in optional flow counters
Adding steering infrastructure for adding and removing optional counter. This allows to add and remove the counters dynamically in order not to hurt performance. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Aharon Landau <[email protected]> Reviewed-by: Maor Gottlieb <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Mark Zhang <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 886773d commit ffa501e

File tree

3 files changed

+212
-0
lines changed

3 files changed

+212
-0
lines changed

drivers/infiniband/hw/mlx5/fs.c

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
#include <rdma/uverbs_std_types.h>
1111
#include <rdma/mlx5_user_ioctl_cmds.h>
1212
#include <rdma/mlx5_user_ioctl_verbs.h>
13+
#include <rdma/ib_hdrs.h>
1314
#include <rdma/ib_umem.h>
1415
#include <linux/mlx5/driver.h>
1516
#include <linux/mlx5/fs.h>
1617
#include <linux/mlx5/fs_helpers.h>
1718
#include <linux/mlx5/accel.h>
1819
#include <linux/mlx5/eswitch.h>
20+
#include <net/inet_ecn.h>
1921
#include "mlx5_ib.h"
2022
#include "counters.h"
2123
#include "devx.h"
@@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
847849
return prio;
848850
}
849851

852+
enum {
853+
RDMA_RX_ECN_OPCOUNTER_PRIO,
854+
RDMA_RX_CNP_OPCOUNTER_PRIO,
855+
};
856+
857+
enum {
858+
RDMA_TX_CNP_OPCOUNTER_PRIO,
859+
};
860+
861+
static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
862+
struct mlx5_flow_spec *spec)
863+
{
864+
if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
865+
ft_field_support.source_vhca_port) ||
866+
!MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
867+
ft_field_support.source_vhca_port))
868+
return -EOPNOTSUPP;
869+
870+
MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
871+
misc_parameters.source_vhca_port);
872+
MLX5_SET(fte_match_param, &spec->match_value,
873+
misc_parameters.source_vhca_port, port_num);
874+
875+
return 0;
876+
}
877+
878+
static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
879+
struct mlx5_flow_spec *spec, int ipv)
880+
{
881+
if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
882+
ft_field_support.outer_ip_version))
883+
return -EOPNOTSUPP;
884+
885+
if (mlx5_core_mp_enabled(dev->mdev) &&
886+
set_vhca_port_spec(dev, port_num, spec))
887+
return -EOPNOTSUPP;
888+
889+
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
890+
outer_headers.ip_ecn);
891+
MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
892+
INET_ECN_CE);
893+
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
894+
outer_headers.ip_version);
895+
MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
896+
ipv);
897+
898+
spec->match_criteria_enable =
899+
get_match_criteria_enable(spec->match_criteria);
900+
901+
return 0;
902+
}
903+
904+
static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
905+
struct mlx5_flow_spec *spec)
906+
{
907+
if (mlx5_core_mp_enabled(dev->mdev) &&
908+
set_vhca_port_spec(dev, port_num, spec))
909+
return -EOPNOTSUPP;
910+
911+
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
912+
misc_parameters.bth_opcode);
913+
MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
914+
IB_BTH_OPCODE_CNP);
915+
916+
spec->match_criteria_enable =
917+
get_match_criteria_enable(spec->match_criteria);
918+
919+
return 0;
920+
}
921+
922+
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
923+
struct mlx5_ib_op_fc *opfc,
924+
enum mlx5_ib_optional_counter_type type)
925+
{
926+
enum mlx5_flow_namespace_type fn_type;
927+
int priority, i, err, spec_num;
928+
struct mlx5_flow_act flow_act = {};
929+
struct mlx5_flow_destination dst;
930+
struct mlx5_flow_namespace *ns;
931+
struct mlx5_ib_flow_prio *prio;
932+
struct mlx5_flow_spec *spec;
933+
934+
spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
935+
if (!spec)
936+
return -ENOMEM;
937+
938+
switch (type) {
939+
case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
940+
if (set_ecn_ce_spec(dev, port_num, &spec[0],
941+
MLX5_FS_IPV4_VERSION) ||
942+
set_ecn_ce_spec(dev, port_num, &spec[1],
943+
MLX5_FS_IPV6_VERSION)) {
944+
err = -EOPNOTSUPP;
945+
goto free;
946+
}
947+
spec_num = 2;
948+
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
949+
priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
950+
break;
951+
952+
case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
953+
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
954+
ft_field_support_2_nic_receive_rdma.bth_opcode) ||
955+
set_cnp_spec(dev, port_num, &spec[0])) {
956+
err = -EOPNOTSUPP;
957+
goto free;
958+
}
959+
spec_num = 1;
960+
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
961+
priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
962+
break;
963+
964+
case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
965+
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
966+
ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
967+
set_cnp_spec(dev, port_num, &spec[0])) {
968+
err = -EOPNOTSUPP;
969+
goto free;
970+
}
971+
spec_num = 1;
972+
fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
973+
priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
974+
break;
975+
976+
default:
977+
err = -EOPNOTSUPP;
978+
goto free;
979+
}
980+
981+
ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
982+
if (!ns) {
983+
err = -EOPNOTSUPP;
984+
goto free;
985+
}
986+
987+
prio = &dev->flow_db->opfcs[type];
988+
if (!prio->flow_table) {
989+
prio = _get_prio(ns, prio, priority,
990+
dev->num_ports * MAX_OPFC_RULES, 1, 0);
991+
if (IS_ERR(prio)) {
992+
err = PTR_ERR(prio);
993+
goto free;
994+
}
995+
}
996+
997+
dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
998+
dst.counter_id = mlx5_fc_id(opfc->fc);
999+
1000+
flow_act.action =
1001+
MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1002+
1003+
for (i = 0; i < spec_num; i++) {
1004+
opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
1005+
&flow_act, &dst, 1);
1006+
if (IS_ERR(opfc->rule[i])) {
1007+
err = PTR_ERR(opfc->rule[i]);
1008+
goto del_rules;
1009+
}
1010+
}
1011+
prio->refcount += spec_num;
1012+
kfree(spec);
1013+
1014+
return 0;
1015+
1016+
del_rules:
1017+
for (i -= 1; i >= 0; i--)
1018+
mlx5_del_flow_rules(opfc->rule[i]);
1019+
put_flow_table(dev, prio, false);
1020+
free:
1021+
kfree(spec);
1022+
return err;
1023+
}
1024+
1025+
void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
1026+
struct mlx5_ib_op_fc *opfc,
1027+
enum mlx5_ib_optional_counter_type type)
1028+
{
1029+
int i;
1030+
1031+
for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
1032+
mlx5_del_flow_rules(opfc->rule[i]);
1033+
put_flow_table(dev, &dev->flow_db->opfcs[type], true);
1034+
}
1035+
}
1036+
8501037
static void set_underlay_qp(struct mlx5_ib_dev *dev,
8511038
struct mlx5_flow_spec *spec,
8521039
u32 underlay_qpn)

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,14 @@ struct mlx5_ib_pp {
263263
struct mlx5_core_dev *mdev;
264264
};
265265

266+
enum mlx5_ib_optional_counter_type {
267+
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
268+
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
269+
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
270+
271+
MLX5_IB_OPCOUNTER_MAX,
272+
};
273+
266274
struct mlx5_ib_flow_db {
267275
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
268276
struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
@@ -271,6 +279,7 @@ struct mlx5_ib_flow_db {
271279
struct mlx5_ib_flow_prio fdb;
272280
struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
273281
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
282+
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
274283
struct mlx5_flow_table *lag_demux_ft;
275284
/* Protect flow steering bypass flow tables
276285
* when add/del flow rules.
@@ -797,6 +806,13 @@ struct mlx5_ib_resources {
797806
struct mlx5_ib_port_resources ports[2];
798807
};
799808

809+
#define MAX_OPFC_RULES 2
810+
811+
struct mlx5_ib_op_fc {
812+
struct mlx5_fc *fc;
813+
struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
814+
};
815+
800816
struct mlx5_ib_counters {
801817
struct rdma_stat_desc *descs;
802818
size_t *offsets;
@@ -807,6 +823,14 @@ struct mlx5_ib_counters {
807823
u16 set_id;
808824
};
809825

826+
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
827+
struct mlx5_ib_op_fc *opfc,
828+
enum mlx5_ib_optional_counter_type type);
829+
830+
void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
831+
struct mlx5_ib_op_fc *opfc,
832+
enum mlx5_ib_optional_counter_type type);
833+
810834
struct mlx5_ib_multiport_info;
811835

812836
struct mlx5_ib_multiport {

include/rdma/ib_hdrs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
232232
#define IB_BTH_SE_SHIFT 23
233233
#define IB_BTH_TVER_MASK 0xf
234234
#define IB_BTH_TVER_SHIFT 16
235+
#define IB_BTH_OPCODE_CNP 0x81
235236

236237
static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
237238
{

0 commit comments

Comments
 (0)