Skip to content

Commit

Permalink
Merge pull request projectcalico#592 from tigera/tomas-send-recvmsg
Browse files Browse the repository at this point in the history
bpf send/recvmsg hooks
  • Loading branch information
fasaxc committed Feb 24, 2020
2 parents 1b248a7 + 617fab1 commit 53b1800
Show file tree
Hide file tree
Showing 11 changed files with 529 additions and 47 deletions.
1 change: 1 addition & 0 deletions bpf-gpl/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ MAKEFUNC(int, skb_change_head, void *ctx, __u32 len, __u64 flags)
MAKEFUNC(int, skb_change_tail, void *ctx, __u32 len, __u64 flags)
MAKEFUNC(int, skb_adjust_room, void *ctx, __s32 len, __u32 mode, __u64 flags)
MAKEFUNC(int, csum_diff, __be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed)
MAKEFUNC(uint64_t, get_socket_cookie, void *ctx)

#define printk(fmt, ...) do { char fmt2[] = fmt; bpf_trace_printk(fmt2, sizeof(fmt2) , ## __VA_ARGS__); } while (0)

Expand Down
114 changes: 103 additions & 11 deletions bpf-gpl/connect_balancer.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,30 @@
#include "log.h"
#include "nat.h"

#include "sendrecv.h"

__attribute__((section("calico_connect_v4_noop")))
int cali_noop_v4(struct bpf_sock_addr *ctx)
{
CALI_INFO("Noop program executing\n");
return 1;
}

static CALI_BPF_INLINE struct calico_nat_dest* nat_lookup(struct bpf_sock_addr *ctx, uint8_t proto)
{
uint16_t dport = (uint16_t)(be32_to_host(ctx->user_port)>>16);

/* We do not know what the source address is yet, we only know that it
* is the localhost, so we might just use 0.0.0.0. That would not
* conflict with traffic from elsewhere.
*
* XXX it means that all workloads that use the cgroup hook have the
* XXX same affinity, which (a) is sub-optimal and (b) leaks info between
* XXX workloads.
*/
return calico_v4_nat_lookup(0, ctx->user_ip4, proto, dport);
}

__attribute__((section("calico_connect_v4")))
int cali_ctlb_v4(struct bpf_sock_addr *ctx)
{
Expand Down Expand Up @@ -57,27 +74,102 @@ int cali_ctlb_v4(struct bpf_sock_addr *ctx)
goto out;
}

uint16_t dport = (uint16_t)(be32_to_host(ctx->user_port)>>16);
struct calico_nat_dest *nat_dest;

/* We do not know what the source address is yet, we only know that it
* is the localhost, so we might just use 0.0.0.0. That would not
* conflict with traffic from elsewhere.
*
* XXX it means that all workloads that use the cgroup hook have the
* XXX same affinity, which (a) is sub-optimal and (b) leaks info between
* XXX workloads.
*/
nat_dest = calico_v4_nat_lookup(0, ctx->user_ip4, ip_proto, dport);
nat_dest = nat_lookup(ctx, ip_proto);
if (!nat_dest) {
goto out;
}

ctx->user_ip4 = nat_dest->addr;
ctx->user_port = host_to_be32(((uint32_t)nat_dest->port)<<16);
ctx->user_port = host_to_ctx_port(nat_dest->port);

out:
return verdict;
}

__attribute__((section("calico_sendmsg_v4")))
int cali_ctlb_sendmsg_v4(struct bpf_sock_addr *ctx)
{
CALI_DEBUG("sendmsg_v4 %x:%d\n",
be32_to_host(ctx->user_ip4), be32_to_host(ctx->user_port)>>16);

if (ctx->type != SOCK_DGRAM) {
CALI_INFO("unexpected sock type %d\n", ctx->type);
goto out;
}

struct calico_nat_dest *nat_dest;

nat_dest = nat_lookup(ctx, IPPROTO_UDP);
if (!nat_dest) {
goto out;
}

uint32_t dport = host_to_ctx_port(nat_dest->port);

struct sendrecv4_key key = {
.ip = nat_dest->addr,
.port = dport,
.cookie = bpf_get_socket_cookie(ctx),
};
struct sendrecv4_val val = {
.ip = ctx->user_ip4,
.port = ctx->user_port,
/* XXX we should also store the backend key to verify that it is
* XXX still ok upon recvmsg.
*/
};

if (bpf_map_update_elem(&cali_v4_srmsg, &key, &val, 0)) {
/* if this happens things are really bad! report */
CALI_INFO("sendmsg4 failed to update map\n");
goto out;
}

ctx->user_ip4 = nat_dest->addr;
ctx->user_port = dport;

out:
return 1;
}

__attribute__((section("calico_recvmsg_v4")))
int cali_ctlb_recvmsg_v4(struct bpf_sock_addr *ctx)
{
CALI_DEBUG("recvmsg_v4 %x:%d\n", be32_to_host(ctx->user_ip4), ctx_port_to_host(ctx->user_port));

if (ctx->type != SOCK_DGRAM) {
CALI_INFO("unexpected sock type %d\n", ctx->type);
goto out;
}

struct sendrecv4_key key = {
.ip = ctx->user_ip4,
.port = ctx->user_port,
.cookie = bpf_get_socket_cookie(ctx),
};

struct sendrecv4_val *revnat = bpf_map_lookup_elem(&cali_v4_srmsg, &key);

if (revnat == NULL) {
CALI_DEBUG("revnat miss for %x:%d\n",
be32_to_host(ctx->user_ip4), ctx_port_to_host(ctx->user_port));
/* we are past policy and the packet was allowed. Either the
* mapping does not exist anymore and if the app cares, it
* should check the addresses. It is more likely a packet sent
* to server from outside and no mapping is expected.
*/
goto out;
}

ctx->user_ip4 = revnat->ip;
ctx->user_port = revnat->port;
CALI_DEBUG("recvmsg_v4 rev nat to %x:%d\n",
be32_to_host(ctx->user_ip4), ctx_port_to_host(ctx->user_port));

out:
return 1;
}

char ____license[] __attribute__((section("license"), used)) = "GPL";
79 changes: 79 additions & 0 deletions bpf-gpl/connect_balancer_v6.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) 2020 Tigera, Inc. All rights reserved.

#include <linux/bpf.h>
#include <sys/socket.h>

#include "../include/bpf.h"
#include "../include/log.h"

#include "sendrecv.h"

__attribute__((section("calico_sendmsg_v6")))
int cali_ctlb_sendmsg_v6(struct bpf_sock_addr *ctx)
{
CALI_DEBUG("sendmsg_v6\n");

return 1;
}

__attribute__((section("calico_recvmsg_v6")))
int cali_ctlb_recvmsg_v6(struct bpf_sock_addr *ctx)
{
__be32 ipv4;

CALI_DEBUG("recvmsg_v6 ip[0-1] %x%x\n",
ctx->user_ip6[0],
ctx->user_ip6[1]);
CALI_DEBUG("recvmsg_v6 ip[2-3] %x%x\n",
ctx->user_ip6[2],
ctx->user_ip6[3]);

/* check if it is a IPv4 mapped as IPv6 and if so, use the v4 table */
if (ctx->user_ip6[0] == 0 && ctx->user_ip6[1] == 0 &&
ctx->user_ip6[2] == host_to_be32(0x0000ffff)) {
goto v4;
}

CALI_DEBUG("recvmsg_v6: not implemented for v6 yet\n");
goto out;


v4:
ipv4 = ctx->user_ip6[3];
CALI_DEBUG("recvmsg_v6 %x:%d\n", be32_to_host(ipv4), ctx_port_to_host(ctx->user_port));

if (ctx->type != SOCK_DGRAM) {
CALI_INFO("unexpected sock type %d\n", ctx->type);
goto out;
}

struct sendrecv4_key key = {
.ip = ipv4,
.port = ctx->user_port,
.cookie = bpf_get_socket_cookie(ctx),
};

struct sendrecv4_val *revnat = bpf_map_lookup_elem(&cali_v4_srmsg, &key);

if (revnat == NULL) {
CALI_DEBUG("revnat miss for %x:%d\n",
be32_to_host(ipv4), ctx_port_to_host(ctx->user_port));
/* we are past policy and the packet was allowed. Either the
* mapping does not exist anymore and if the app cares, it
* should check the addresses. It is more likely a packet sent
* to server from outside and no mapping is expected.
*/
goto out;
}

ctx->user_ip6[3] = revnat->ip;
ctx->user_port = revnat->port;
CALI_DEBUG("recvmsg_v6 v4 rev nat to %x:%d\n",
be32_to_host(ipv4), ctx_port_to_host(ctx->user_port));

out:
return 1;
}


char ____license[] __attribute__((section("license"), used)) = "GPL";
37 changes: 37 additions & 0 deletions bpf-gpl/sendrecv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright (c) 2020 Tigera, Inc. All rights reserved.

#ifndef __SENDRECV_H__
#define __SENDRECV_H__

struct sendrecv4_key {
uint64_t cookie;
uint32_t ip;
uint32_t port; /* because bpf_sock_addr uses 32bit and we would need padding */
};

struct sendrecv4_val {
uint32_t ip;
uint32_t port; /* because bpf_sock_addr uses 32bit and we would need padding */
};

struct bpf_map_def_extended __attribute__((section("maps"))) cali_v4_srmsg = {
.type = BPF_MAP_TYPE_LRU_HASH,
.key_size = sizeof(struct sendrecv4_key),
.value_size = sizeof(struct sendrecv4_val),
.max_entries = 510000, // arbitrary
#ifndef __BPFTOOL_LOADER__
.pinning_strategy = 2 /* global namespace */,
#endif
};

static CALI_BPF_INLINE uint16_t ctx_port_to_host(__u32 port)
{
return be32_to_host(port) >> 16;
}

static CALI_BPF_INLINE __u32 host_to_ctx_port(uint16_t port)
{
return host_to_be32(((uint32_t)port) << 16);
}

#endif /* __SENDRECV_H__ */
12 changes: 6 additions & 6 deletions bpf-gpl/tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@ static CALI_BPF_INLINE int forward_or_drop(struct __sk_buff *skb,
.tot_len = be16_to_host(ip_header->tot_len),
.ifindex = skb->ingress_ifindex,
.l4_protocol = state->ip_proto,
.sport = state->sport,
.dport = state->dport,
.sport = host_to_be16(state->sport),
.dport = host_to_be16(state->dport),
};

/* set the ipv4 here, otherwise the ipv4/6 unions do not get
Expand All @@ -200,10 +200,10 @@ static CALI_BPF_INLINE int forward_or_drop(struct __sk_buff *skb,
CALI_DEBUG("FIB tot_len=%d\n", fib_params.tot_len);
CALI_DEBUG("FIB ifindex=%d\n", fib_params.ifindex);
CALI_DEBUG("FIB l4_protocol=%d\n", fib_params.l4_protocol);
CALI_DEBUG("FIB sport=%d\n", fib_params.sport);
CALI_DEBUG("FIB dport=%d\n", fib_params.dport);
CALI_DEBUG("FIB ipv4_src=%x\n", fib_params.ipv4_src);
CALI_DEBUG("FIB ipv4_dst=%x\n", fib_params.ipv4_dst);
CALI_DEBUG("FIB sport=%d\n", be16_to_host(fib_params.sport));
CALI_DEBUG("FIB dport=%d\n", be16_to_host(fib_params.dport));
CALI_DEBUG("FIB ipv4_src=%x\n", be32_to_host(fib_params.ipv4_src));
CALI_DEBUG("FIB ipv4_dst=%x\n", be32_to_host(fib_params.ipv4_dst));

CALI_DEBUG("Traffic is towards the host namespace, doing Linux FIB lookup\n");
rc = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), fwd->fib_flags);
Expand Down
Binary file removed bpf/bin/from_tnl_fib_no_log.o
Binary file not shown.
7 changes: 7 additions & 0 deletions bpf/maps.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ func (b *PinnedMap) Close() error {
return err
}

func (b *PinnedMap) RepinningEnabled() bool {
if b.context == nil {
return false
}
return b.context.RepinningEnabled
}

// DumpMapCmd returns the command that can be used to dump a map or an error
func DumpMapCmd(m Map) ([]string, error) {
if pm, ok := m.(*PinnedMap); ok {
Expand Down
Loading

0 comments on commit 53b1800

Please sign in to comment.