Skip to content

Commit 467dc9a

Browse files
jrifeKernel Patches Daemon
authored and
Kernel Patches Daemon
committed
bpf: udp: Avoid socket skips and repeats during iteration
Replace the offset-based approach for tracking progress through a bucket in the UDP table with one based on socket cookies. Remember the cookies of unprocessed sockets from the last batch and use this list to pick up where we left off or, in the case that the next socket disappears between reads, find the first socket after that point that still exists in the bucket and resume from there. This approach guarantees that all sockets that existed when iteration began and continue to exist throughout will be visited exactly once. Sockets that are added to the table during iteration may or may not be seen, but if they are they will be seen exactly once. Initialize iter->state.bucket to -1 to ensure that on the first call to bpf_iter_udp_batch, the resume_bucket case is not hit. It's not strictly accurate that we are resuming from bucket zero when we create the first batch, and this avoids adding special case logic for just that bucket. Signed-off-by: Jordan Rife <[email protected]>
1 parent 0e83b8e commit 467dc9a

File tree

1 file changed

+46
-17
lines changed

1 file changed

+46
-17
lines changed

net/ipv4/udp.c

+46-17
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
#include <linux/inet.h>
9494
#include <linux/netdevice.h>
9595
#include <linux/slab.h>
96+
#include <linux/sock_diag.h>
9697
#include <net/tcp_states.h>
9798
#include <linux/skbuff.h>
9899
#include <linux/proc_fs.h>
@@ -3392,33 +3393,50 @@ struct bpf_iter__udp {
33923393

33933394
union bpf_udp_iter_batch_item {
33943395
struct sock *sock;
3396+
__u64 cookie;
33953397
};
33963398

33973399
struct bpf_udp_iter_state {
33983400
struct udp_iter_state state;
33993401
unsigned int cur_sk;
34003402
unsigned int end_sk;
34013403
unsigned int max_sk;
3402-
int offset;
34033404
union bpf_udp_iter_batch_item *batch;
34043405
};
34053406

34063407
static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
34073408
unsigned int new_batch_sz, int flags);
3409+
static struct sock *bpf_iter_udp_resume(struct sock *first_sk,
3410+
union bpf_udp_iter_batch_item *cookies,
3411+
int n_cookies)
3412+
{
3413+
struct sock *sk = NULL;
3414+
int i = 0;
3415+
3416+
for (; i < n_cookies; i++) {
3417+
sk = first_sk;
3418+
udp_portaddr_for_each_entry_from(sk)
3419+
if (cookies[i].cookie == atomic64_read(&sk->sk_cookie))
3420+
goto done;
3421+
}
3422+
done:
3423+
return sk;
3424+
}
3425+
34083426
static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34093427
{
34103428
struct bpf_udp_iter_state *iter = seq->private;
34113429
struct udp_iter_state *state = &iter->state;
3430+
unsigned int find_cookie, end_cookie = 0;
34123431
struct net *net = seq_file_net(seq);
3413-
int resume_bucket, resume_offset;
34143432
struct udp_table *udptable;
34153433
unsigned int batch_sks = 0;
3434+
int resume_bucket;
34163435
int resizes = 0;
34173436
struct sock *sk;
34183437
int err = 0;
34193438

34203439
resume_bucket = state->bucket;
3421-
resume_offset = iter->offset;
34223440

34233441
/* The current batch is done, so advance the bucket. */
34243442
if (iter->cur_sk == iter->end_sk)
@@ -3434,6 +3452,8 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34343452
* before releasing the bucket lock. This allows BPF programs that are
34353453
* called in seq_show to acquire the bucket lock if needed.
34363454
*/
3455+
find_cookie = iter->cur_sk;
3456+
end_cookie = iter->end_sk;
34373457
iter->cur_sk = 0;
34383458
iter->end_sk = 0;
34393459
batch_sks = 0;
@@ -3444,21 +3464,21 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
34443464
if (hlist_empty(&hslot2->head))
34453465
goto next_bucket;
34463466

3447-
iter->offset = 0;
34483467
spin_lock_bh(&hslot2->lock);
34493468
sk = hlist_entry_safe(hslot2->head.first, struct sock,
34503469
__sk_common.skc_portaddr_node);
3470+
/* Resume from the first (in iteration order) unseen socket from
3471+
* the last batch that still exists in resume_bucket. Most of
3472+
* the time this will just be where the last iteration left off
3473+
* in resume_bucket unless that socket disappeared between
3474+
* reads.
3475+
*/
3476+
if (state->bucket == resume_bucket)
3477+
sk = bpf_iter_udp_resume(sk, &iter->batch[find_cookie],
3478+
end_cookie - find_cookie);
34513479
fill_batch:
34523480
udp_portaddr_for_each_entry_from(sk) {
34533481
if (seq_sk_match(seq, sk)) {
3454-
/* Resume from the last iterated socket at the
3455-
* offset in the bucket before iterator was stopped.
3456-
*/
3457-
if (state->bucket == resume_bucket &&
3458-
iter->offset < resume_offset) {
3459-
++iter->offset;
3460-
continue;
3461-
}
34623482
if (iter->end_sk < iter->max_sk) {
34633483
sock_hold(sk);
34643484
iter->batch[iter->end_sk++].sock = sk;
@@ -3525,10 +3545,8 @@ static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
35253545
/* Whenever seq_next() is called, the iter->cur_sk is
35263546
* done with seq_show(), so unref the iter->cur_sk.
35273547
*/
3528-
if (iter->cur_sk < iter->end_sk) {
3548+
if (iter->cur_sk < iter->end_sk)
35293549
sock_put(iter->batch[iter->cur_sk++].sock);
3530-
++iter->offset;
3531-
}
35323550

35333551
/* After updating iter->cur_sk, check if there are more sockets
35343552
* available in the current bucket batch.
@@ -3598,10 +3616,19 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
35983616

35993617
static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter)
36003618
{
3619+
union bpf_udp_iter_batch_item *item;
36013620
unsigned int cur_sk = iter->cur_sk;
3621+
__u64 cookie;
36023622

3603-
while (iter->cur_sk < iter->end_sk)
3604-
sock_put(iter->batch[cur_sk++].sock);
3623+
/* Remember the cookies of the sockets we haven't seen yet, so we can
3624+
* pick up where we left off next time around.
3625+
*/
3626+
while (cur_sk < iter->end_sk) {
3627+
item = &iter->batch[cur_sk++];
3628+
cookie = sock_gen_cookie(item->sock);
3629+
sock_put(item->sock);
3630+
item->cookie = cookie;
3631+
}
36053632
}
36063633

36073634
static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
@@ -3895,6 +3922,8 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
38953922
if (ret)
38963923
bpf_iter_fini_seq_net(priv_data);
38973924

3925+
iter->state.bucket = -1;
3926+
38983927
return ret;
38993928
}
39003929

0 commit comments

Comments
 (0)