diff --git a/include/dp_cntrack.h b/include/dp_cntrack.h new file mode 100644 index 000000000..0e173c336 --- /dev/null +++ b/include/dp_cntrack.h @@ -0,0 +1,24 @@ +#ifndef __INCLUDE_DP_CNTRACK_H__ +#define __INCLUDE_DP_CNTRACK_H__ + +#include +#include +#include +#include + +#include "dp_flow.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void dp_cntrack_init(void); +int dp_cntrack_handle(struct rte_node *node, struct rte_mbuf *m, struct dp_flow *df); + + +#ifdef __cplusplus +} +#endif + + +#endif // __INCLUDE_DP_CNTRACK_H__ diff --git a/include/dp_flow.h b/include/dp_flow.h index 89e59808b..8b76b3098 100644 --- a/include/dp_flow.h +++ b/include/dp_flow.h @@ -57,6 +57,7 @@ enum { DP_FLOW_NAT_TYPE_NETWORK_LOCAL, DP_FLOW_NAT_TYPE_NETWORK_NEIGH, DP_FLOW_NAT_AS_TARGET, + DP_FLOW_LB_TYPE_LOCAL_NEIGH_TRAFFIC, DP_FLOW_LB_TYPE_RECIRC, DP_FLOW_LB_TYPE_FORWARD, }; diff --git a/include/dp_log.h b/include/dp_log.h index 623fceec3..312c366ea 100644 --- a/include/dp_log.h +++ b/include/dp_log.h @@ -58,6 +58,7 @@ extern "C" { #define DP_LOG_DST_PORT(VALUE) _DP_LOG_UINT("dst_port", VALUE) #define DP_LOG_PROTO(VALUE) _DP_LOG_UINT("protocol", VALUE) // networking +#define DP_LOG_VNF_TYPE(VALUE) _DP_LOG_UINT("vnf_type", VALUE) #define DP_LOG_VNI(VALUE) _DP_LOG_UINT("vni", VALUE) #define DP_LOG_VNI_TYPE(VALUE) _DP_LOG_UINT("vni_type", VALUE) #define DP_LOG_MINPORT(VALUE) _DP_LOG_UINT("minport", VALUE) diff --git a/include/dp_vnf.h b/include/dp_vnf.h index dfc127201..61305721c 100644 --- a/include/dp_vnf.h +++ b/include/dp_vnf.h @@ -11,6 +11,8 @@ extern "C" { #endif #define DP_VNF_MAX_TABLE_SIZE 1000 +#define DP_VNF_MATCH_ALL_PORT_ID_VALUE 0xFFFF +#define DP_VNF_MATCH_ALL_PORT_ID true enum vnf_type { DP_VNF_TYPE_UNDEFINED, @@ -42,6 +44,7 @@ int dp_get_portid_with_vnf_key(void *key, enum vnf_type v_type); int dp_del_vnf_with_vnf_key(void *key); int dp_del_vnf_with_value(struct dp_vnf_value *val); int dp_find_vnf_with_value(struct dp_vnf_value *val); +int dp_get_vnf_entry(struct dp_vnf_value *val, enum vnf_type v_type, uint16_t portid, bool match_all); int dp_list_vnf_alias_routes(uint16_t portid, enum vnf_type v_type, struct dp_grpc_responder *responder); #ifdef __cplusplus diff --git a/src/dp_cntrack.c b/src/dp_cntrack.c new file mode 100644 index 000000000..1ec86ed9f --- /dev/null +++ b/src/dp_cntrack.c @@ -0,0 +1,275 @@ +#include "dp_cntrack.h" +#include "dp_error.h" +#include "dp_flow.h" +#include "dp_log.h" +#include "dp_vnf.h" +#include "rte_flow/dp_rte_flow.h" + + +static struct flow_key first_key = {0}; +static struct flow_key second_key = {0}; +static struct flow_key *prev_key = NULL, *curr_key = &first_key; +static struct flow_value *prev_flow_val = NULL; +static int flow_timeout = DP_FLOW_DEFAULT_TIMEOUT; +static bool offload_mode_enabled = 0; + +void dp_cntrack_init(void) +{ + offload_mode_enabled = dp_conf_is_offload_enabled(); +} + +static __rte_always_inline void dp_cntrack_tcp_state(struct flow_value *flow_val, struct rte_tcp_hdr *tcp_hdr) +{ + uint8_t tcp_flags = tcp_hdr->tcp_flags; + + if (DP_TCP_PKT_FLAG_RST(tcp_flags)) { + flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_RST_FIN; + } else if (DP_TCP_PKT_FLAG_FIN(tcp_flags)) { + // this is not entirely 1:1 mapping to fin sequence, + // but sufficient to determine if a tcp connection is almost successfuly closed + // (last ack is still pending) + if (flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_ESTABLISHED) + flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_FINWAIT; + else + flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_RST_FIN; + } else { + switch (flow_val->l4_state.tcp_state) { + case DP_FLOW_TCP_STATE_NONE: + case DP_FLOW_TCP_STATE_RST_FIN: + if (DP_TCP_PKT_FLAG_SYN(tcp_flags)) + flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_NEW_SYN; + break; + case DP_FLOW_TCP_STATE_NEW_SYN: + if (DP_TCP_PKT_FLAG_SYNACK(tcp_flags)) + flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_NEW_SYNACK; + break; + case DP_FLOW_TCP_STATE_NEW_SYNACK: + if (DP_TCP_PKT_FLAG_ACK(tcp_flags)) + flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_ESTABLISHED; + break; + default: + // FIN-states already handled above + break; + } + + } + +} + +static __rte_always_inline void dp_cntrack_init_flow_offload_flags(struct flow_value *flow_val, struct dp_flow *df) +{ + if (!offload_mode_enabled) + return; + + if (df->l4_type != IPPROTO_TCP) + flow_val->offload_flags.orig = DP_FLOW_OFFLOAD_INSTALL; + else + flow_val->offload_flags.orig = DP_FLOW_NON_OFFLOAD; // offload tcp traffic until it is established + + flow_val->offload_flags.reply = DP_FLOW_NON_OFFLOAD; +} + + +static __rte_always_inline void dp_cntrack_change_flow_offload_flags(struct flow_value *flow_val, struct dp_flow *df) +{ + if (!offload_mode_enabled) + return; + + if (df->flags.dir == DP_FLOW_DIR_ORG) { + + if (flow_val->offload_flags.orig == DP_FLOW_NON_OFFLOAD) + flow_val->offload_flags.orig = DP_FLOW_OFFLOAD_INSTALL; + else if (flow_val->offload_flags.orig == DP_FLOW_OFFLOAD_INSTALL) + flow_val->offload_flags.orig = DP_FLOW_OFFLOADED; + } else if (df->flags.dir == DP_FLOW_DIR_REPLY) { + + if (flow_val->offload_flags.reply == DP_FLOW_NON_OFFLOAD) + flow_val->offload_flags.reply = DP_FLOW_OFFLOAD_INSTALL; + else if (flow_val->offload_flags.reply == DP_FLOW_OFFLOAD_INSTALL) + flow_val->offload_flags.reply = DP_FLOW_OFFLOADED; + } +} + +static __rte_always_inline void dp_cntrack_set_timeout_tcp_flow(struct flow_value *flow_val, struct dp_flow *df) +{ + + if (flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_ESTABLISHED) { + flow_val->timeout_value = DP_FLOW_TCP_EXTENDED_TIMEOUT; + dp_cntrack_change_flow_offload_flags(flow_val, df); + } else if (flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_FINWAIT + || flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_RST_FIN) { + dp_cntrack_change_flow_offload_flags(flow_val, df); + flow_val->timeout_value = flow_timeout; + } else + flow_val->timeout_value = flow_timeout; +} + +static __rte_always_inline void dp_cntrack_set_pkt_offload_decision(struct dp_flow *df) +{ + if (df->flags.dir == DP_FLOW_DIR_ORG) + df->flags.offload_decision = df->conntrack->offload_flags.orig; + else + df->flags.offload_decision = df->conntrack->offload_flags.reply; +} + +static __rte_always_inline struct flow_value *flow_table_insert_entry(struct flow_key *key, struct dp_flow *df, struct rte_mbuf *m) +{ + struct flow_value *flow_val = NULL; + struct flow_key inverted_key = {0}; + struct dp_vnf_value vnf_val; + + flow_val = rte_zmalloc("flow_val", sizeof(struct flow_value), RTE_CACHE_LINE_SIZE); + if (!flow_val) + return flow_val; + + vnf_val.alias_pfx.ip = key->ip_dst; + vnf_val.alias_pfx.length = 32; + /* Add original direction to conntrack table */ + dp_add_flow(key); + flow_val->flow_key[DP_FLOW_DIR_ORG] = *key; + flow_val->flow_status = DP_FLOW_STATUS_FLAG_NONE; + /* Target ip of the traffic is an alias prefix of a VM in the same VNI on this dp-service */ + /* This will be an uni-directional traffic, which does not expect its corresponding reverse traffic */ + /* Details can be found in https://github.com/onmetal/net-dpservice/pull/341 */ + if (offload_mode_enabled + && (df->flags.flow_type != DP_FLOW_TYPE_INCOMING) + && !DP_FAILED(dp_get_vnf_entry(&vnf_val, DP_VNF_TYPE_LB_ALIAS_PFX, m->port, DP_VNF_MATCH_ALL_PORT_ID)) + ) + flow_val->nf_info.nat_type = DP_FLOW_LB_TYPE_LOCAL_NEIGH_TRAFFIC; + else + flow_val->nf_info.nat_type = DP_FLOW_NAT_TYPE_NONE; + + flow_val->timeout_value = flow_timeout; + flow_val->created_port_id = m->port; + + df->flags.dir = DP_FLOW_DIR_ORG; + + dp_cntrack_init_flow_offload_flags(flow_val, df); + + if (df->l4_type == IPPROTO_TCP) + flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_NONE; + + dp_ref_init(&flow_val->ref_count, dp_free_flow); + dp_add_flow_data(key, flow_val); + + // Only the original flow (outgoing)'s hash value is recorded + // Implicit casting from hash_sig_t to uint32_t! + df->dp_flow_hash = dp_get_conntrack_flow_hash_value(key); + + dp_invert_flow_key(key, &inverted_key); + flow_val->flow_key[DP_FLOW_DIR_REPLY] = inverted_key; + dp_add_flow(&inverted_key); + dp_add_flow_data(&inverted_key, flow_val); + return flow_val; +} + + +static __rte_always_inline bool dp_test_next_n_bytes_identical(const unsigned char *first_val, const unsigned char *second_val, uint8_t nr_bytes) +{ + + for (uint8_t i = 0; i < nr_bytes; i++) { + if ((first_val[i] ^ second_val[i]) > 0) + return false; + } + + return true; +} + +static __rte_always_inline void dp_set_pkt_flow_direction(struct flow_key *key, struct flow_value *flow_val, struct dp_flow *df) +{ + + if (dp_are_flows_identical(key, &flow_val->flow_key[DP_FLOW_DIR_REPLY])) + df->flags.dir = DP_FLOW_DIR_REPLY; + + if (dp_are_flows_identical(key, &flow_val->flow_key[DP_FLOW_DIR_ORG])) + df->flags.dir = DP_FLOW_DIR_ORG; + + df->dp_flow_hash = dp_get_conntrack_flow_hash_value(key); +} + +static __rte_always_inline void dp_set_flow_offload_flag(struct rte_mbuf *m, struct flow_value *flow_val, struct dp_flow *df) +{ + if (flow_val->nf_info.nat_type == DP_FLOW_NAT_TYPE_NETWORK_NEIGH + || flow_val->nf_info.nat_type == DP_FLOW_LB_TYPE_FORWARD + || flow_val->nf_info.nat_type == DP_FLOW_LB_TYPE_LOCAL_NEIGH_TRAFFIC) { + dp_cntrack_change_flow_offload_flags(flow_val, df); + } else { + + // recirc pkt shall not change flow's state because its ancestor has already done + if (dp_get_pkt_mark(m)->flags.is_recirc) + return; + + // when to offload reply pkt of a tcp flow is determined in dp_cntrack_set_timeout_tcp_flow + if (df->l4_type != IPPROTO_TCP) + dp_cntrack_change_flow_offload_flags(flow_val, df); + } +} + +int dp_cntrack_handle(struct rte_node *node, struct rte_mbuf *m, struct dp_flow *df) +{ + struct flow_value *flow_val = NULL; + struct rte_ipv4_hdr *ipv4_hdr; + struct rte_tcp_hdr *tcp_hdr; + struct flow_key *key = NULL; + bool same_key; + int ret; + + #ifdef ENABLE_PYTEST + flow_timeout = dp_conf_get_flow_timeout(); + #endif + + ipv4_hdr = dp_get_ipv4_hdr(m); + + key = curr_key; + memset(key, 0, sizeof(struct flow_key)); + + if (unlikely(DP_FAILED(dp_build_flow_key(key, m)))) + return DP_ERROR; + + same_key = prev_key && dp_test_next_n_bytes_identical((const unsigned char *)prev_key, + (const unsigned char *)curr_key, + sizeof(struct flow_key)); + + if (!same_key) { + ret = dp_get_flow_data(key, (void **)&flow_val); + if (unlikely(DP_FAILED(ret))) { + if (likely(ret == -ENOENT)) { + flow_val = flow_table_insert_entry(key, df, m); + if (unlikely(!flow_val)) { + DPNODE_LOG_WARNING(node, "Failed to allocate a new flow table entry"); + return DP_ERROR; + } + } else { + DPNODE_LOG_WARNING(node, "Flow table key search failed", DP_LOG_RET(ret)); + return DP_ERROR; + } + } else { + dp_set_pkt_flow_direction(key, flow_val, df); + dp_set_flow_offload_flag(m, flow_val, df); + + } + prev_key = curr_key; + if (curr_key == &first_key) + curr_key = &second_key; + else + curr_key = &first_key; + + prev_flow_val = flow_val; + } else { + flow_val = prev_flow_val; + dp_set_pkt_flow_direction(key, flow_val, df); + dp_set_flow_offload_flag(m, flow_val, df); + } + + flow_val->timestamp = rte_rdtsc(); + + if (df->l4_type == IPPROTO_TCP && !dp_get_pkt_mark(m)->flags.is_recirc) { + tcp_hdr = (struct rte_tcp_hdr *) (ipv4_hdr + 1); + dp_cntrack_tcp_state(flow_val, tcp_hdr); + dp_cntrack_set_timeout_tcp_flow(flow_val, df); + } + df->conntrack = flow_val; + dp_cntrack_set_pkt_offload_decision(df); + + return DP_OK; +} diff --git a/src/dp_flow.c b/src/dp_flow.c index 8d6f1316d..15e708686 100644 --- a/src/dp_flow.c +++ b/src/dp_flow.c @@ -21,6 +21,8 @@ #define DP_LOG_FLOW_KEY(KEY) \ _DP_LOG_UINT("flow_hash", dp_get_conntrack_flow_hash_value(KEY)), \ DP_LOG_PROTO((KEY)->proto), \ + DP_LOG_VNI((KEY)->vni), \ + DP_LOG_VNF_TYPE((KEY)->vnf), \ DP_LOG_SRC_IPV4((KEY)->ip_src), DP_LOG_DST_IPV4((KEY)->ip_dst), \ DP_LOG_SRC_PORT((KEY)->src.port_src), DP_LOG_DST_PORT((KEY)->port_dst) @@ -91,22 +93,26 @@ static int dp_build_icmp_flow_key(struct dp_flow *df, struct flow_key *key /* ou } /* Isolating only VNF NAT conntrack entries at the moment. The others should follow */ -static __rte_always_inline void dp_mark_vnf_type(struct dp_flow *df, struct flow_key *key) +static __rte_always_inline void dp_mark_vnf_type(struct dp_flow *df, struct flow_key *key, uint16_t port) { struct snat_data *s_data; + struct dp_vnf_value vnf_val; if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) { - if (df->vnf_type == DP_VNF_TYPE_NAT) - key->vnf = (uint8_t)DP_VNF_TYPE_NAT; + if (df->vnf_type == DP_VNF_TYPE_NAT || df->vnf_type == DP_VNF_TYPE_LB_ALIAS_PFX) + key->vnf = (uint8_t)df->vnf_type; else key->vnf = (uint8_t)DP_VNF_TYPE_UNDEFINED; } else { + vnf_val.alias_pfx.ip = key->ip_src; + vnf_val.alias_pfx.length = 32; s_data = dp_get_vm_snat_data(key->ip_src, key->vni); - if (s_data && s_data->network_nat_ip != 0) { + if (s_data && s_data->network_nat_ip != 0) key->vnf = (uint8_t)DP_VNF_TYPE_NAT; - } else { + else if (!DP_FAILED(dp_get_vnf_entry(&vnf_val, DP_VNF_TYPE_LB_ALIAS_PFX, port, !DP_VNF_MATCH_ALL_PORT_ID))) + key->vnf = (uint8_t)DP_VNF_TYPE_LB_ALIAS_PFX; + else key->vnf = (uint8_t)DP_VNF_TYPE_UNDEFINED; - } } } @@ -125,7 +131,7 @@ int dp_build_flow_key(struct flow_key *key /* out */, struct rte_mbuf *m /* in * else key->vni = dp_get_vm_vni(m->port); - dp_mark_vnf_type(df, key); + dp_mark_vnf_type(df, key, m->port); switch (df->l4_type) { case IPPROTO_TCP: @@ -216,6 +222,12 @@ int dp_get_flow_data(struct flow_key *key, void **data) if (DP_FAILED(result)) *data = NULL; +#ifdef ENABLE_PYTEST + if (*data != NULL) + DPS_LOG_DEBUG("Successfully found data in flow table", DP_LOG_FLOW_KEY(key)); + else + DPS_LOG_DEBUG("Cannot find data in flow table", DP_LOG_FLOW_KEY(key)); +#endif return result; } diff --git a/src/dp_vnf.c b/src/dp_vnf.c index e0ea76b6a..d7f56162b 100644 --- a/src/dp_vnf.c +++ b/src/dp_vnf.c @@ -2,6 +2,7 @@ #include #include "dp_error.h" #include "dp_log.h" +#include "dp_lpm.h" #include "grpc/dp_grpc_responder.h" static struct rte_hash *vnf_handle_tbl = NULL; @@ -49,6 +50,14 @@ int dp_set_vnf_value(void *key, struct dp_vnf_value *val) return DP_ERROR; } +int dp_get_vnf_entry(struct dp_vnf_value *val, enum vnf_type v_type, uint16_t portid, bool match_all) +{ + val->v_type = v_type; + val->portid = match_all ? DP_VNF_MATCH_ALL_PORT_ID_VALUE : portid; + val->vni = dp_get_vm_vni(portid); + return dp_find_vnf_with_value(val); +} + int dp_get_portid_with_vnf_key(void *key, enum vnf_type v_type) { struct dp_vnf_value *temp_val; @@ -99,7 +108,7 @@ int dp_del_vnf_with_vnf_key(void *key) static __rte_always_inline bool dp_vnf_equal(struct dp_vnf_value *val1, struct dp_vnf_value *val2) { - return val1->portid == val2->portid + return ((val1->portid == DP_VNF_MATCH_ALL_PORT_ID_VALUE) || (val1->portid == val2->portid)) && val1->alias_pfx.ip == val2->alias_pfx.ip && val1->alias_pfx.length == val2->alias_pfx.length && val1->v_type == val2->v_type; diff --git a/src/grpc/dp_grpc_impl.c b/src/grpc/dp_grpc_impl.c index 3d2c64ef9..a949e5324 100644 --- a/src/grpc/dp_grpc_impl.c +++ b/src/grpc/dp_grpc_impl.c @@ -56,14 +56,6 @@ static int dp_insert_vnf_entry(struct dp_vnf_value *val, enum vnf_type v_type, return dp_set_vnf_value((void *)ul_addr6, val); } -static __rte_always_inline int dp_get_vnf_entry(struct dp_vnf_value *val, enum vnf_type v_type, uint16_t portid) -{ - val->v_type = v_type; - val->portid = portid; - val->vni = dp_get_vm_vni(portid); - return dp_find_vnf_with_value(val); -} - static __rte_always_inline int dp_remove_vnf_entry(struct dp_vnf_value *val, enum vnf_type v_type, uint16_t portid) { val->v_type = v_type; @@ -367,7 +359,7 @@ static int dp_process_add_lbprefix(struct dp_grpc_responder *responder) if (DP_FAILED(port_id)) return DP_GRPC_ERR_NO_VM; - if (!DP_FAILED(dp_get_vnf_entry(&vnf_val, DP_VNF_TYPE_LB_ALIAS_PFX, port_id))) + if (!DP_FAILED(dp_get_vnf_entry(&vnf_val, DP_VNF_TYPE_LB_ALIAS_PFX, port_id, !DP_VNF_MATCH_ALL_PORT_ID))) return DP_GRPC_ERR_ALREADY_EXISTS; if (DP_FAILED(dp_insert_vnf_entry(&vnf_val, DP_VNF_TYPE_LB_ALIAS_PFX, dp_get_vm_vni(port_id), port_id, ul_addr6))) diff --git a/src/meson.build b/src/meson.build index e8eaaa13e..cd94e4fb0 100644 --- a/src/meson.build +++ b/src/meson.build @@ -53,7 +53,8 @@ dp_sources = [ 'dp_vnf.c', 'dp_vni.c', 'dpdk_layer.c', - 'dp_internal_stats.c' + 'dp_internal_stats.c', + 'dp_cntrack.c' ] if get_option('enable_virtual_services') dp_sources += [ diff --git a/src/nodes/conntrack_node.c b/src/nodes/conntrack_node.c index 25f1af541..d19d64fec 100644 --- a/src/nodes/conntrack_node.c +++ b/src/nodes/conntrack_node.c @@ -5,6 +5,7 @@ #include "dp_error.h" #include "dp_flow.h" #include "dp_log.h" +#include "dp_cntrack.h" #include "dp_lpm.h" #include "dp_mbuf_dyn.h" #include "dp_vnf.h" @@ -12,13 +13,6 @@ #include "nodes/dhcp_node.h" #include "rte_flow/dp_rte_flow.h" -static struct flow_key first_key = {0}; -static struct flow_key second_key = {0}; -static struct flow_key *prev_key, *curr_key; -static struct flow_value *prev_flow_val = NULL; -static int flow_timeout = DP_FLOW_DEFAULT_TIMEOUT; -static bool offload_mode_enabled = 0; - #define NEXT_NODES(NEXT) \ NEXT(CONNTRACK_NEXT_LB, "lb") \ NEXT(CONNTRACK_NEXT_DNAT, "dnat") \ @@ -27,188 +21,10 @@ DP_NODE_REGISTER(CONNTRACK, conntrack, NEXT_NODES); static int conntrack_node_init(__rte_unused const struct rte_graph *graph, __rte_unused struct rte_node *node) { - prev_key = NULL; - curr_key = &first_key; - offload_mode_enabled = dp_conf_is_offload_enabled(); -#ifdef ENABLE_PYTEST - flow_timeout = dp_conf_get_flow_timeout(); -#endif + dp_cntrack_init(); return DP_OK; } -static __rte_always_inline void dp_cntrack_tcp_state(struct flow_value *flow_val, struct rte_tcp_hdr *tcp_hdr) -{ - uint8_t tcp_flags = tcp_hdr->tcp_flags; - - if (DP_TCP_PKT_FLAG_RST(tcp_flags)) { - flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_RST_FIN; - } else if (DP_TCP_PKT_FLAG_FIN(tcp_flags)) { - // this is not entirely 1:1 mapping to fin sequence, - // but sufficient to determine if a tcp connection is almost successfuly closed - // (last ack is still pending) - if (flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_ESTABLISHED) - flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_FINWAIT; - else - flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_RST_FIN; - } else { - switch (flow_val->l4_state.tcp_state) { - case DP_FLOW_TCP_STATE_NONE: - case DP_FLOW_TCP_STATE_RST_FIN: - if (DP_TCP_PKT_FLAG_SYN(tcp_flags)) - flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_NEW_SYN; - break; - case DP_FLOW_TCP_STATE_NEW_SYN: - if (DP_TCP_PKT_FLAG_SYNACK(tcp_flags)) - flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_NEW_SYNACK; - break; - case DP_FLOW_TCP_STATE_NEW_SYNACK: - if (DP_TCP_PKT_FLAG_ACK(tcp_flags)) - flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_ESTABLISHED; - break; - default: - // FIN-states already handled above - break; - } - - } -} - -static __rte_always_inline void dp_cntrack_init_flow_offload_flags(struct flow_value *flow_val, struct dp_flow *df) -{ - if (!offload_mode_enabled) - return; - - if (df->l4_type != IPPROTO_TCP) - flow_val->offload_flags.orig = DP_FLOW_OFFLOAD_INSTALL; - else - flow_val->offload_flags.orig = DP_FLOW_NON_OFFLOAD; // offload tcp traffic until it is established - - flow_val->offload_flags.reply = DP_FLOW_NON_OFFLOAD; -} - - -static __rte_always_inline void dp_cntrack_change_flow_offload_flags(struct flow_value *flow_val, struct dp_flow *df) -{ - if (!offload_mode_enabled) - return; - - if (df->flags.dir == DP_FLOW_DIR_ORG) { - - if (flow_val->offload_flags.orig == DP_FLOW_NON_OFFLOAD) - flow_val->offload_flags.orig = DP_FLOW_OFFLOAD_INSTALL; - else if (flow_val->offload_flags.orig == DP_FLOW_OFFLOAD_INSTALL) - flow_val->offload_flags.orig = DP_FLOW_OFFLOADED; - } else if (df->flags.dir == DP_FLOW_DIR_REPLY) { - - if (flow_val->offload_flags.reply == DP_FLOW_NON_OFFLOAD) - flow_val->offload_flags.reply = DP_FLOW_OFFLOAD_INSTALL; - else if (flow_val->offload_flags.reply == DP_FLOW_OFFLOAD_INSTALL) - flow_val->offload_flags.reply = DP_FLOW_OFFLOADED; - } -} - -static __rte_always_inline void dp_cntrack_set_timeout_tcp_flow(struct flow_value *flow_val, struct dp_flow *df) -{ - - if (flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_ESTABLISHED) { - flow_val->timeout_value = DP_FLOW_TCP_EXTENDED_TIMEOUT; - dp_cntrack_change_flow_offload_flags(flow_val, df); - } else if (flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_FINWAIT - || flow_val->l4_state.tcp_state == DP_FLOW_TCP_STATE_RST_FIN) { - dp_cntrack_change_flow_offload_flags(flow_val, df); - flow_val->timeout_value = flow_timeout; - } else - flow_val->timeout_value = flow_timeout; -} - - - -static __rte_always_inline void dp_cntrack_set_pkt_offload_decision(struct dp_flow *df) -{ - if (df->flags.dir == DP_FLOW_DIR_ORG) - df->flags.offload_decision = df->conntrack->offload_flags.orig; - else - df->flags.offload_decision = df->conntrack->offload_flags.reply; -} - -static __rte_always_inline struct flow_value *flow_table_insert_entry(struct flow_key *key, struct dp_flow *df, struct rte_mbuf *m) -{ - struct flow_value *flow_val = NULL; - struct flow_key inverted_key = {0}; - - flow_val = rte_zmalloc("flow_val", sizeof(struct flow_value), RTE_CACHE_LINE_SIZE); - if (!flow_val) - return flow_val; - /* Add original direction to conntrack table */ - dp_add_flow(key); - flow_val->flow_key[DP_FLOW_DIR_ORG] = *key; - flow_val->flow_status = DP_FLOW_STATUS_FLAG_NONE; - flow_val->nf_info.nat_type = DP_FLOW_NAT_TYPE_NONE; - flow_val->timeout_value = flow_timeout; - flow_val->created_port_id = m->port; - - df->flags.dir = DP_FLOW_DIR_ORG; - - dp_cntrack_init_flow_offload_flags(flow_val, df); - - if (df->l4_type == IPPROTO_TCP) - flow_val->l4_state.tcp_state = DP_FLOW_TCP_STATE_NONE; - - dp_ref_init(&flow_val->ref_count, dp_free_flow); - dp_add_flow_data(key, flow_val); - - // Only the original flow (outgoing)'s hash value is recorded - // Implicit casting from hash_sig_t to uint32_t! - df->dp_flow_hash = dp_get_conntrack_flow_hash_value(key); - - dp_invert_flow_key(key, &inverted_key); - flow_val->flow_key[DP_FLOW_DIR_REPLY] = inverted_key; - dp_add_flow(&inverted_key); - dp_add_flow_data(&inverted_key, flow_val); - return flow_val; -} - -static __rte_always_inline void change_flow_state_dir(struct rte_mbuf *m, struct flow_key *key, struct flow_value *flow_val, struct dp_flow *df) -{ - - if (flow_val->nf_info.nat_type == DP_FLOW_NAT_TYPE_NETWORK_NEIGH - || flow_val->nf_info.nat_type == DP_FLOW_LB_TYPE_FORWARD) { - if (dp_are_flows_identical(key, &flow_val->flow_key[DP_FLOW_DIR_ORG])) { - - df->flags.dir = DP_FLOW_DIR_ORG; - dp_cntrack_change_flow_offload_flags(flow_val, df); - } - } else { - if (dp_are_flows_identical(key, &flow_val->flow_key[DP_FLOW_DIR_REPLY])) - df->flags.dir = DP_FLOW_DIR_REPLY; - - if (dp_are_flows_identical(key, &flow_val->flow_key[DP_FLOW_DIR_ORG])) - df->flags.dir = DP_FLOW_DIR_ORG; - - // recirc pkt shall not change flow's state because its ancestor has already done - if (dp_get_pkt_mark(m)->flags.is_recirc) - return; - - // when to offload reply pkt of a tcp flow is determined in dp_cntrack_set_timeout_tcp_flow - if (df->l4_type != IPPROTO_TCP) - dp_cntrack_change_flow_offload_flags(flow_val, df); - } - df->dp_flow_hash = dp_get_conntrack_flow_hash_value(key); - -} - - -static __rte_always_inline bool dp_test_next_n_bytes_identical(const unsigned char *first_val, const unsigned char *second_val, uint8_t nr_bytes) -{ - - for (uint8_t i = 0; i < nr_bytes; i++) { - if ((first_val[i] ^ second_val[i]) > 0) - return false; - } - - return true; -} - static __rte_always_inline rte_edge_t dp_find_nxt_graph_node(struct dp_flow *df) { if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) { @@ -232,13 +48,8 @@ static __rte_always_inline rte_edge_t dp_find_nxt_graph_node(struct dp_flow *df) static __rte_always_inline rte_edge_t get_next_index(struct rte_node *node, struct rte_mbuf *m) { - struct flow_value *flow_val = NULL; struct rte_ipv4_hdr *ipv4_hdr; - struct rte_tcp_hdr *tcp_hdr; struct dp_flow *df; - struct flow_key *key; - bool same_key; - int ret; df = dp_get_flow_ptr(m); ipv4_hdr = dp_get_ipv4_hdr(m); @@ -259,52 +70,10 @@ static __rte_always_inline rte_edge_t get_next_index(struct rte_node *node, stru || df->l4_type == IPPROTO_UDP || df->l4_type == IPPROTO_ICMP ) { - key = curr_key; - memset(key, 0, sizeof(struct flow_key)); - if (unlikely(DP_FAILED(dp_build_flow_key(key, m)))) + if (DP_FAILED(dp_cntrack_handle(node, m, df))) return CONNTRACK_NEXT_DROP; - same_key = prev_key && dp_test_next_n_bytes_identical((const unsigned char *)prev_key, - (const unsigned char *)curr_key, - sizeof(struct flow_key)); - if (!same_key) { - ret = dp_get_flow_data(key, (void **)&flow_val); - if (unlikely(DP_FAILED(ret))) { - if (likely(ret == -ENOENT)) { - flow_val = flow_table_insert_entry(key, df, m); - if (unlikely(!flow_val)) { - DPNODE_LOG_WARNING(node, "Failed to allocate a new flow table entry"); - return CONNTRACK_NEXT_DROP; - } - } else { - DPNODE_LOG_WARNING(node, "Flow table key search failed", DP_LOG_RET(ret)); - return CONNTRACK_NEXT_DROP; - } - } else { - change_flow_state_dir(m, key, flow_val, df); - } - prev_key = curr_key; - if (curr_key == &first_key) - curr_key = &second_key; - else - curr_key = &first_key; - - prev_flow_val = flow_val; - } else { - flow_val = prev_flow_val; - change_flow_state_dir(m, key, flow_val, df); - } - - flow_val->timestamp = rte_rdtsc(); - - if (df->l4_type == IPPROTO_TCP && !dp_get_pkt_mark(m)->flags.is_recirc) { - tcp_hdr = (struct rte_tcp_hdr *) (ipv4_hdr + 1); - dp_cntrack_tcp_state(flow_val, tcp_hdr); - dp_cntrack_set_timeout_tcp_flow(flow_val, df); - } - df->conntrack = flow_val; - dp_cntrack_set_pkt_offload_decision(df); } else { return CONNTRACK_NEXT_DROP; } diff --git a/src/nodes/tx_node.c b/src/nodes/tx_node.c index 44ac6738d..f87e63600 100644 --- a/src/nodes/tx_node.c +++ b/src/nodes/tx_node.c @@ -107,7 +107,7 @@ static uint16_t tx_node_process(struct rte_graph *graph, if (df->conntrack) { // mark the flow as default if it is not marked as any other status - if (!DP_IS_FLOW_STATUS_FLAG_NF(df->conntrack->flow_status)) + if (!DP_IS_FLOW_STATUS_FLAG_NF(df->conntrack->flow_status)) df->conntrack->flow_status |= DP_FLOW_STATUS_FLAG_DEFAULT; if (df->flags.offload_decision == DP_FLOW_OFFLOAD_INSTALL || df->flags.offload_ipv6) diff --git a/src/rte_flow/dp_rte_flow_traffic_forward.c b/src/rte_flow/dp_rte_flow_traffic_forward.c index 17e13425b..a5d429075 100644 --- a/src/rte_flow/dp_rte_flow_traffic_forward.c +++ b/src/rte_flow/dp_rte_flow_traffic_forward.c @@ -620,7 +620,6 @@ static __rte_always_inline int dp_offload_handle_tunnel_decap_traffic(struct rte static __rte_always_inline int dp_offload_handle_local_traffic(struct rte_mbuf *m, struct dp_flow *df) { - struct rte_flow_attr attr; create_rte_flow_rule_attr(&attr, 0, 0, 1, 0, 1);