Skip to content

Commit

Permalink
issue: 1471639 Move Rx timestamp handling to sockinfo
Browse files Browse the repository at this point in the history
In order to support TCP receive timestamp, common logic
should be moved to sockinfo.

Signed-off-by: Liran Oz <[email protected]>
  • Loading branch information
Liran Oz authored and liranoz12 committed Oct 29, 2018
1 parent be4697b commit 3f74fe9
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 256 deletions.
16 changes: 8 additions & 8 deletions src/vma/dev/cq_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,10 +611,10 @@ void cq_mgr::reclaim_recv_buffer_helper(mem_buf_desc_t* buff)
temp->rx.flow_tag_id = 0;
temp->rx.tcp.p_ip_h = NULL;
temp->rx.tcp.p_tcp_h = NULL;
temp->rx.udp.sw_timestamp.tv_nsec = 0;
temp->rx.udp.sw_timestamp.tv_sec = 0;
temp->rx.udp.hw_timestamp.tv_nsec = 0;
temp->rx.udp.hw_timestamp.tv_sec = 0;
temp->rx.sw_timestamp.tv_nsec = 0;
temp->rx.sw_timestamp.tv_sec = 0;
temp->rx.hw_timestamp.tv_nsec = 0;
temp->rx.hw_timestamp.tv_sec = 0;
temp->rx.hw_raw_timestamp = 0;
free_lwip_pbuf(&temp->lwip_pbuf);
m_rx_pool.push_back(temp);
Expand Down Expand Up @@ -647,10 +647,10 @@ void cq_mgr::socketxtreme_reclaim_recv_buffer_helper(mem_buf_desc_t* buff)
temp->rx.flow_tag_id = 0;
temp->rx.tcp.p_ip_h = NULL;
temp->rx.tcp.p_tcp_h = NULL;
temp->rx.udp.sw_timestamp.tv_nsec = 0;
temp->rx.udp.sw_timestamp.tv_sec = 0;
temp->rx.udp.hw_timestamp.tv_nsec = 0;
temp->rx.udp.hw_timestamp.tv_sec = 0;
temp->rx.sw_timestamp.tv_nsec = 0;
temp->rx.sw_timestamp.tv_sec = 0;
temp->rx.hw_timestamp.tv_nsec = 0;
temp->rx.hw_timestamp.tv_sec = 0;
temp->rx.hw_raw_timestamp = 0;
free_lwip_pbuf(&temp->lwip_pbuf);
m_rx_pool.push_back(temp);
Expand Down
8 changes: 4 additions & 4 deletions src/vma/dev/ring_tap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,10 +667,10 @@ bool ring_tap::reclaim_recv_buffers(mem_buf_desc_t *buff)
temp->rx.flow_tag_id = 0;
temp->rx.tcp.p_ip_h = NULL;
temp->rx.tcp.p_tcp_h = NULL;
temp->rx.udp.sw_timestamp.tv_nsec = 0;
temp->rx.udp.sw_timestamp.tv_sec = 0;
temp->rx.udp.hw_timestamp.tv_nsec = 0;
temp->rx.udp.hw_timestamp.tv_sec = 0;
temp->rx.sw_timestamp.tv_nsec = 0;
temp->rx.sw_timestamp.tv_sec = 0;
temp->rx.hw_timestamp.tv_nsec = 0;
temp->rx.hw_timestamp.tv_sec = 0;
temp->rx.hw_raw_timestamp = 0;
free_lwip_pbuf(&temp->lwip_pbuf);
m_rx_pool.push_back(temp);
Expand Down
6 changes: 2 additions & 4 deletions src/vma/proto/mem_buf_desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ class mem_buf_desc_t {
iovec frag; // Datagram part base address and length
size_t sz_payload; // This is the total amount of data of the packet, if (sz_payload>sz_data) means fragmented packet.
uint64_t hw_raw_timestamp;
struct timespec sw_timestamp;
struct timespec hw_timestamp;
void* context;
uint32_t flow_tag_id; // Flow Tag ID of this received packet

Expand All @@ -88,13 +90,9 @@ class mem_buf_desc_t {
struct tcphdr* p_tcp_h;
size_t n_transport_header_len;
bool gro;
bool pad[7];
} tcp;
struct {
struct timespec sw_timestamp;
struct timespec hw_timestamp;
in_addr_t local_if; // L3 info
uint32_t pad;
} udp;
};

Expand Down
169 changes: 168 additions & 1 deletion src/vma/sock/sockinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,13 @@

sockinfo::sockinfo(int fd):
socket_fd_api(fd),
m_b_closed(false), m_b_blocking(true), m_protocol(PROTO_UNDEFINED),
m_b_closed(false),
m_b_blocking(true),
m_b_pktinfo(false),
m_b_rcvtstamp(false),
m_b_rcvtstampns(false),
m_n_tsing_flags(0),
m_protocol(PROTO_UNDEFINED),
m_lock_rcv(MODULE_NAME "::m_lock_rcv"),
m_lock_snd(MODULE_NAME "::m_lock_snd"),
m_p_connected_dst_entry(NULL),
Expand Down Expand Up @@ -299,6 +305,51 @@ int sockinfo::setsockopt(int __level, int __optname, const void *__optval, sockl
"optval == NULL");
}
break;
case SO_TIMESTAMP:
case SO_TIMESTAMPNS:
if (__optval) {
m_b_rcvtstamp = *(bool*)__optval;
if (__optname == SO_TIMESTAMPNS)
m_b_rcvtstampns = m_b_rcvtstamp;
si_logdbg("SOL_SOCKET, %s=%s", setsockopt_so_opt_to_str(__optname), (m_b_rcvtstamp ? "true" : "false"));
}
else {
si_logdbg("SOL_SOCKET, %s=\"???\" - NOT HANDLED, optval == NULL", setsockopt_so_opt_to_str(__optname));
}
break;

case SO_TIMESTAMPING:
if (__optval) {
uint8_t val = *(uint8_t*)__optval;

// SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_HARDWARE is NOT supported.
if (val & (SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE)) {
ret = SOCKOPT_NO_VMA_SUPPORT;
errno = EOPNOTSUPP;
si_logdbg("SOL_SOCKET, SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_HARDWARE is not supported, errno set to EOPNOTSUPP");
}

if (val & (SOF_TIMESTAMPING_RAW_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE)) {
if (g_p_ib_ctx_handler_collection->get_ctx_time_conversion_mode() == TS_CONVERSION_MODE_DISABLE){
if (safe_mce_sys().hw_ts_conversion_mode == TS_CONVERSION_MODE_DISABLE) {
ret = SOCKOPT_NO_VMA_SUPPORT;
errno = EPERM;
si_logdbg("SOL_SOCKET, SOF_TIMESTAMPING_RAW_HARDWARE and SOF_TIMESTAMPING_RX_HARDWARE socket options were disabled (VMA_HW_TS_CONVERSION = %d) , errno set to EPERM", TS_CONVERSION_MODE_DISABLE);
} else {
ret = SOCKOPT_NO_VMA_SUPPORT;
errno = ENODEV;
si_logdbg("SOL_SOCKET, SOF_TIMESTAMPING_RAW_HARDWARE and SOF_TIMESTAMPING_RX_HARDWARE is not supported by device(s), errno set to ENODEV");
}
}
}

m_n_tsing_flags = val;
si_logdbg("SOL_SOCKET, SO_TIMESTAMPING=%u", m_n_tsing_flags);
}
else {
si_logdbg("SOL_SOCKET, %s=\"???\" - NOT HANDLED, optval == NULL", setsockopt_so_opt_to_str(__optname));
}
break;
default:
break;
}
Expand Down Expand Up @@ -1463,3 +1514,119 @@ void sockinfo::set_sockopt_prio(__const void *__optval, socklen_t __optlen)
si_logdbg("set socket pcp to be %d", m_pcp);
}
}

/**
* Function to process SW & HW timestamps
*/
void sockinfo::process_timestamps(mem_buf_desc_t* p_desc)
{
// keep the sw_timestamp the same to all sockets
if ((m_b_rcvtstamp ||
(m_n_tsing_flags &
(SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE))) &&
!p_desc->rx.sw_timestamp.tv_sec) {
clock_gettime(CLOCK_REALTIME, &(p_desc->rx.sw_timestamp));
}

// convert hw timestamp to system time
if (m_n_tsing_flags & SOF_TIMESTAMPING_RAW_HARDWARE) {
ring_simple* owner_ring = (ring_simple*) p_desc->p_desc_owner;
if (owner_ring) {
owner_ring->convert_hw_time_to_system_time(p_desc->rx.hw_raw_timestamp, &p_desc->rx.hw_timestamp);
}
}
}

void sockinfo::handle_recv_timestamping(struct cmsg_state *cm_state)
{
struct {
struct timespec systime;
struct timespec hwtimetrans;
struct timespec hwtimeraw;
} tsing;

memset(&tsing, 0, sizeof(tsing));

mem_buf_desc_t* packet = get_front_m_rx_pkt_ready_list();
if (unlikely(!packet)) {
si_logdbg("m_rx_pkt_ready_list empty");
return ;
}

struct timespec* packet_systime = &packet->rx.sw_timestamp;

// Only fill in SO_TIMESTAMPNS if both requested.
// This matches the kernel behavior.
if (m_b_rcvtstampns) {
insert_cmsg(cm_state, SOL_SOCKET, SO_TIMESTAMPNS, packet_systime, sizeof(*packet_systime));
} else if (m_b_rcvtstamp) {
struct timeval tv;
tv.tv_sec = packet_systime->tv_sec;
tv.tv_usec = packet_systime->tv_nsec/1000;
insert_cmsg(cm_state, SOL_SOCKET, SO_TIMESTAMP, &tv, sizeof(tv));
}

// Handle timestamping options
// Only support rx time stamps at this time
int support = m_n_tsing_flags & (SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE);
if (!support) {
return;
}

if (m_n_tsing_flags & SOF_TIMESTAMPING_SOFTWARE) {
tsing.systime = packet->rx.sw_timestamp;
}

if (m_n_tsing_flags & SOF_TIMESTAMPING_RAW_HARDWARE) {
tsing.hwtimeraw = packet->rx.hw_timestamp;
}

insert_cmsg(cm_state, SOL_SOCKET, SO_TIMESTAMPING, &tsing, sizeof(tsing));
}

void sockinfo::insert_cmsg(struct cmsg_state * cm_state, int level, int type, void *data, int len)
{
if (!cm_state->cmhdr ||
cm_state->mhdr->msg_flags & MSG_CTRUNC)
return;

// Ensure there is enough space for the data payload
const unsigned int cmsg_len = CMSG_LEN(len);
if (cmsg_len > cm_state->mhdr->msg_controllen - cm_state->cmsg_bytes_consumed) {
cm_state->mhdr->msg_flags |= MSG_CTRUNC;
return;
}

// Fill in the cmsghdr
cm_state->cmhdr->cmsg_level = level;
cm_state->cmhdr->cmsg_type = type;
cm_state->cmhdr->cmsg_len = cmsg_len;
memcpy(CMSG_DATA(cm_state->cmhdr), data, len);

// Update bytes consumed to update msg_controllen later
cm_state->cmsg_bytes_consumed += CMSG_SPACE(len);

// Advance to next cmsghdr
// can't simply use CMSG_NXTHDR() due to glibc bug 13500
struct cmsghdr *next = (struct cmsghdr*)((char*)cm_state->cmhdr +
CMSG_ALIGN(cm_state->cmhdr->cmsg_len));
if ((char*)(next + 1) >
((char*)cm_state->mhdr->msg_control + cm_state->mhdr->msg_controllen))
cm_state->cmhdr = NULL;
else
cm_state->cmhdr = next;
}

void sockinfo::handle_cmsg(struct msghdr * msg)
{
struct cmsg_state cm_state;

cm_state.mhdr = msg;
cm_state.cmhdr = CMSG_FIRSTHDR(msg);
cm_state.cmsg_bytes_consumed = 0;

if (m_b_pktinfo) handle_ip_pktinfo(&cm_state);
if (m_b_rcvtstamp || m_n_tsing_flags) handle_recv_timestamping(&cm_state);

cm_state.mhdr->msg_controllen = cm_state.cmsg_bytes_consumed;
}
67 changes: 66 additions & 1 deletion src/vma/sock/sockinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
#include "vma/proto/mem_buf_desc.h"
#include "vma/proto/dst_entry.h"
#include "vma/dev/net_device_table_mgr.h"
#include "vma/dev/ring_slave.h"
#include "vma/dev/ring_simple.h"
#include "vma/dev/ring_allocation_logic.h"

#include "socket_fd_api.h"
Expand All @@ -60,6 +60,42 @@
#define BYTE_TO_KB(byte_value) ((byte_value) / 125)
#define KB_TO_BYTE(kbit_value) ((kbit_value) * 125)

#if DEFINED_MISSING_NET_TSTAMP
enum {
SOF_TIMESTAMPING_TX_HARDWARE = (1<<0),
SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1),
SOF_TIMESTAMPING_RX_HARDWARE = (1<<2),
SOF_TIMESTAMPING_RX_SOFTWARE = (1<<3),
SOF_TIMESTAMPING_SOFTWARE = (1<<4),
SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
SOF_TIMESTAMPING_MASK =
(SOF_TIMESTAMPING_RAW_HARDWARE - 1) |
SOF_TIMESTAMPING_RAW_HARDWARE
};
#else
#include <linux/net_tstamp.h>
#endif

#ifndef SO_TIMESTAMPNS
#define SO_TIMESTAMPNS 35
#endif

#ifndef SO_TIMESTAMPING
#define SO_TIMESTAMPING 37
#endif

#ifndef SO_REUSEPORT
#define SO_REUSEPORT 15
#endif

struct cmsg_state
{
struct msghdr *mhdr;
struct cmsghdr *cmhdr;
size_t cmsg_bytes_consumed;
};

struct buff_info_t {
buff_info_t(){
rx_reuse.set_id("buff_info_t (%p) : rx_reuse", this);
Expand Down Expand Up @@ -155,6 +191,10 @@ class sockinfo : public socket_fd_api, public pkt_rcvr_sink, public pkt_sndr_sou
protected:
bool m_b_closed;
bool m_b_blocking;
bool m_b_pktinfo;
bool m_b_rcvtstamp;
bool m_b_rcvtstampns;
uint8_t m_n_tsing_flags;
in_protocol_t m_protocol;

lock_spin_recursive m_lock_rcv;
Expand Down Expand Up @@ -277,6 +317,12 @@ class sockinfo : public socket_fd_api, public pkt_rcvr_sink, public pkt_sndr_sou
void move_owned_rx_ready_descs(ring* p_ring, descq_t* toq); // Move all owner's rx ready packets ro 'toq'
void set_sockopt_prio(__const void *__optval, socklen_t __optlen);

virtual void handle_ip_pktinfo(struct cmsg_state *cm_state) = 0;
inline void handle_recv_timestamping(struct cmsg_state *cm_state);
void insert_cmsg(struct cmsg_state *cm_state, int level, int type, void *data, int len);
void handle_cmsg(struct msghdr * msg);
void process_timestamps(mem_buf_desc_t* p_desc);

virtual bool try_un_offloading(); // un-offload the socket if possible
#ifdef DEFINED_SOCKETXTREME
virtual inline void do_wakeup()
Expand Down Expand Up @@ -512,6 +558,25 @@ class sockinfo : public socket_fd_api, public pkt_rcvr_sink, public pkt_sndr_sou
}
}

static const char * setsockopt_so_opt_to_str(int opt)
{
switch (opt) {
case SO_REUSEADDR: return "SO_REUSEADDR";
case SO_REUSEPORT: return "SO_REUSEPORT";
case SO_BROADCAST: return "SO_BROADCAST";
case SO_RCVBUF: return "SO_RCVBUF";
case SO_SNDBUF: return "SO_SNDBUF";
case SO_TIMESTAMP: return "SO_TIMESTAMP";
case SO_TIMESTAMPNS: return "SO_TIMESTAMPNS";
case SO_BINDTODEVICE: return "SO_BINDTODEVICE";
case SO_VMA_RING_ALLOC_LOGIC: return "SO_VMA_RING_ALLOC_LOGIC";
case SO_MAX_PACING_RATE: return "SO_MAX_PACING_RATE";
case SO_VMA_FLOW_TAG: return "SO_VMA_FLOW_TAG";
default: break;
}
return "UNKNOWN SO opt";
}

inline void move_not_owned_descs(ring* p_ring, descq_t *toq, descq_t *fromq)
{
// Assume locked by owner!!!
Expand Down
5 changes: 5 additions & 0 deletions src/vma/sock/sockinfo_tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,11 @@ class sockinfo_tcp : public sockinfo, public timer_handler

void handle_socket_linger();

/*
* Supported only for UDP
*/
virtual void handle_ip_pktinfo(struct cmsg_state *) {};

int handle_rx_error();

/** Function prototype for tcp error callback functions. Called when the pcb
Expand Down
Loading

0 comments on commit 3f74fe9

Please sign in to comment.