Skip to content

Commit 790e1fa

Browse files
howlettakpm00
authored andcommitted
maple_tree: add RCU lock checking to rcu callback functions
Dereferencing RCU objects within the RCU callback without the RCU check has caused lockdep to complain. Fix the RCU dereferencing by using the RCU callback lock to ensure the operation is safe. Also stop creating a new lock to use for dereferencing during destruction of the tree or subtree. Instead, pass through a pointer to the tree that has the lock that is held for RCU dereferencing checking. It also does not make sense to use the maple state in the freeing scenario as the tree walk is a special case where the tree no longer has the normal encodings and parent pointers. Link: https://lkml.kernel.org/r/[email protected] Fixes: 54a611b ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett <[email protected]> Reported-by: Suren Baghdasaryan <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 0a2b18d commit 790e1fa

File tree

1 file changed

+96
-92
lines changed

1 file changed

+96
-92
lines changed

lib/maple_tree.c

+96-92
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt,
824824
return rcu_dereference_check(slots[offset], mt_locked(mt));
825825
}
826826

827+
static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
828+
unsigned char offset)
829+
{
830+
return rcu_dereference_protected(slots[offset], mt_locked(mt));
831+
}
827832
/*
828833
* mas_slot_locked() - Get the slot value when holding the maple tree lock.
829834
* @mas: The maple state
@@ -835,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt,
835840
static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
836841
unsigned char offset)
837842
{
838-
return rcu_dereference_protected(slots[offset], mt_locked(mas->tree));
843+
return mt_slot_locked(mas->tree, slots, offset);
839844
}
840845

841846
/*
@@ -907,34 +912,35 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
907912
}
908913

909914
/*
910-
* mas_clear_meta() - clear the metadata information of a node, if it exists
911-
* @mas: The maple state
915+
* mt_clear_meta() - clear the metadata information of a node, if it exists
916+
* @mt: The maple tree
912917
* @mn: The maple node
913-
* @mt: The maple node type
918+
* @type: The maple node type
914919
* @offset: The offset of the highest sub-gap in this node.
915920
* @end: The end of the data in this node.
916921
*/
917-
static inline void mas_clear_meta(struct ma_state *mas, struct maple_node *mn,
918-
enum maple_type mt)
922+
static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
923+
enum maple_type type)
919924
{
920925
struct maple_metadata *meta;
921926
unsigned long *pivots;
922927
void __rcu **slots;
923928
void *next;
924929

925-
switch (mt) {
930+
switch (type) {
926931
case maple_range_64:
927932
pivots = mn->mr64.pivot;
928933
if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
929934
slots = mn->mr64.slot;
930-
next = mas_slot_locked(mas, slots,
931-
MAPLE_RANGE64_SLOTS - 1);
932-
if (unlikely((mte_to_node(next) && mte_node_type(next))))
933-
return; /* The last slot is a node, no metadata */
935+
next = mt_slot_locked(mt, slots,
936+
MAPLE_RANGE64_SLOTS - 1);
937+
if (unlikely((mte_to_node(next) &&
938+
mte_node_type(next))))
939+
return; /* no metadata, could be node */
934940
}
935941
fallthrough;
936942
case maple_arange_64:
937-
meta = ma_meta(mn, mt);
943+
meta = ma_meta(mn, type);
938944
break;
939945
default:
940946
return;
@@ -5483,7 +5489,7 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
54835489
}
54845490

54855491
/*
5486-
* mas_dead_leaves() - Mark all leaves of a node as dead.
5492+
* mte_dead_leaves() - Mark all leaves of a node as dead.
54875493
* @mas: The maple state
54885494
* @slots: Pointer to the slot array
54895495
* @type: The maple node type
@@ -5493,16 +5499,16 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
54935499
* Return: The number of leaves marked as dead.
54945500
*/
54955501
static inline
5496-
unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots,
5497-
enum maple_type mt)
5502+
unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
5503+
void __rcu **slots)
54985504
{
54995505
struct maple_node *node;
55005506
enum maple_type type;
55015507
void *entry;
55025508
int offset;
55035509

5504-
for (offset = 0; offset < mt_slots[mt]; offset++) {
5505-
entry = mas_slot_locked(mas, slots, offset);
5510+
for (offset = 0; offset < mt_slot_count(enode); offset++) {
5511+
entry = mt_slot(mt, slots, offset);
55065512
type = mte_node_type(entry);
55075513
node = mte_to_node(entry);
55085514
/* Use both node and type to catch LE & BE metadata */
@@ -5517,162 +5523,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots,
55175523
return offset;
55185524
}
55195525

5520-
static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset)
5526+
/**
5527+
* mte_dead_walk() - Walk down a dead tree to just before the leaves
5528+
* @enode: The maple encoded node
5529+
* @offset: The starting offset
5530+
*
5531+
* Note: This can only be used from the RCU callback context.
5532+
*/
5533+
static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
55215534
{
5522-
struct maple_node *next;
5535+
struct maple_node *node, *next;
55235536
void __rcu **slots = NULL;
55245537

5525-
next = mas_mn(mas);
5538+
next = mte_to_node(*enode);
55265539
do {
5527-
mas->node = mt_mk_node(next, next->type);
5528-
slots = ma_slots(next, next->type);
5529-
next = mas_slot_locked(mas, slots, offset);
5540+
*enode = ma_enode_ptr(next);
5541+
node = mte_to_node(*enode);
5542+
slots = ma_slots(node, node->type);
5543+
next = rcu_dereference_protected(slots[offset],
5544+
lock_is_held(&rcu_callback_map));
55305545
offset = 0;
55315546
} while (!ma_is_leaf(next->type));
55325547

55335548
return slots;
55345549
}
55355550

5551+
/**
5552+
* mt_free_walk() - Walk & free a tree in the RCU callback context
5553+
* @head: The RCU head that's within the node.
5554+
*
5555+
* Note: This can only be used from the RCU callback context.
5556+
*/
55365557
static void mt_free_walk(struct rcu_head *head)
55375558
{
55385559
void __rcu **slots;
55395560
struct maple_node *node, *start;
5540-
struct maple_tree mt;
5561+
struct maple_enode *enode;
55415562
unsigned char offset;
55425563
enum maple_type type;
5543-
MA_STATE(mas, &mt, 0, 0);
55445564

55455565
node = container_of(head, struct maple_node, rcu);
55465566

55475567
if (ma_is_leaf(node->type))
55485568
goto free_leaf;
55495569

5550-
mt_init_flags(&mt, node->ma_flags);
5551-
mas_lock(&mas);
55525570
start = node;
5553-
mas.node = mt_mk_node(node, node->type);
5554-
slots = mas_dead_walk(&mas, 0);
5555-
node = mas_mn(&mas);
5571+
enode = mt_mk_node(node, node->type);
5572+
slots = mte_dead_walk(&enode, 0);
5573+
node = mte_to_node(enode);
55565574
do {
55575575
mt_free_bulk(node->slot_len, slots);
55585576
offset = node->parent_slot + 1;
5559-
mas.node = node->piv_parent;
5560-
if (mas_mn(&mas) == node)
5561-
goto start_slots_free;
5562-
5563-
type = mte_node_type(mas.node);
5564-
slots = ma_slots(mte_to_node(mas.node), type);
5565-
if ((offset < mt_slots[type]) && (slots[offset]))
5566-
slots = mas_dead_walk(&mas, offset);
5567-
5568-
node = mas_mn(&mas);
5577+
enode = node->piv_parent;
5578+
if (mte_to_node(enode) == node)
5579+
goto free_leaf;
5580+
5581+
type = mte_node_type(enode);
5582+
slots = ma_slots(mte_to_node(enode), type);
5583+
if ((offset < mt_slots[type]) &&
5584+
rcu_dereference_protected(slots[offset],
5585+
lock_is_held(&rcu_callback_map)))
5586+
slots = mte_dead_walk(&enode, offset);
5587+
node = mte_to_node(enode);
55695588
} while ((node != start) || (node->slot_len < offset));
55705589

55715590
slots = ma_slots(node, node->type);
55725591
mt_free_bulk(node->slot_len, slots);
55735592

5574-
start_slots_free:
5575-
mas_unlock(&mas);
55765593
free_leaf:
55775594
mt_free_rcu(&node->rcu);
55785595
}
55795596

5580-
static inline void __rcu **mas_destroy_descend(struct ma_state *mas,
5581-
struct maple_enode *prev, unsigned char offset)
5597+
static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
5598+
struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
55825599
{
55835600
struct maple_node *node;
5584-
struct maple_enode *next = mas->node;
5601+
struct maple_enode *next = *enode;
55855602
void __rcu **slots = NULL;
5603+
enum maple_type type;
5604+
unsigned char next_offset = 0;
55865605

55875606
do {
5588-
mas->node = next;
5589-
node = mas_mn(mas);
5590-
slots = ma_slots(node, mte_node_type(mas->node));
5591-
next = mas_slot_locked(mas, slots, 0);
5592-
if ((mte_dead_node(next))) {
5593-
mte_to_node(next)->type = mte_node_type(next);
5594-
next = mas_slot_locked(mas, slots, 1);
5595-
}
5607+
*enode = next;
5608+
node = mte_to_node(*enode);
5609+
type = mte_node_type(*enode);
5610+
slots = ma_slots(node, type);
5611+
next = mt_slot_locked(mt, slots, next_offset);
5612+
if ((mte_dead_node(next)))
5613+
next = mt_slot_locked(mt, slots, ++next_offset);
55965614

5597-
mte_set_node_dead(mas->node);
5598-
node->type = mte_node_type(mas->node);
5599-
mas_clear_meta(mas, node, node->type);
5615+
mte_set_node_dead(*enode);
5616+
node->type = type;
56005617
node->piv_parent = prev;
56015618
node->parent_slot = offset;
5602-
offset = 0;
5603-
prev = mas->node;
5619+
offset = next_offset;
5620+
next_offset = 0;
5621+
prev = *enode;
56045622
} while (!mte_is_leaf(next));
56055623

56065624
return slots;
56075625
}
56085626

5609-
static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags,
5627+
static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
56105628
bool free)
56115629
{
56125630
void __rcu **slots;
56135631
struct maple_node *node = mte_to_node(enode);
56145632
struct maple_enode *start;
5615-
struct maple_tree mt;
5616-
5617-
MA_STATE(mas, &mt, 0, 0);
56185633

5619-
mas.node = enode;
56205634
if (mte_is_leaf(enode)) {
56215635
node->type = mte_node_type(enode);
56225636
goto free_leaf;
56235637
}
56245638

5625-
ma_flags &= ~MT_FLAGS_LOCK_MASK;
5626-
mt_init_flags(&mt, ma_flags);
5627-
mas_lock(&mas);
5628-
5629-
mte_to_node(enode)->ma_flags = ma_flags;
56305639
start = enode;
5631-
slots = mas_destroy_descend(&mas, start, 0);
5632-
node = mas_mn(&mas);
5640+
slots = mte_destroy_descend(&enode, mt, start, 0);
5641+
node = mte_to_node(enode); // Updated in the above call.
56335642
do {
56345643
enum maple_type type;
56355644
unsigned char offset;
56365645
struct maple_enode *parent, *tmp;
56375646

5638-
node->type = mte_node_type(mas.node);
5639-
node->slot_len = mas_dead_leaves(&mas, slots, node->type);
5647+
node->slot_len = mte_dead_leaves(enode, mt, slots);
56405648
if (free)
56415649
mt_free_bulk(node->slot_len, slots);
56425650
offset = node->parent_slot + 1;
5643-
mas.node = node->piv_parent;
5644-
if (mas_mn(&mas) == node)
5645-
goto start_slots_free;
5651+
enode = node->piv_parent;
5652+
if (mte_to_node(enode) == node)
5653+
goto free_leaf;
56465654

5647-
type = mte_node_type(mas.node);
5648-
slots = ma_slots(mte_to_node(mas.node), type);
5655+
type = mte_node_type(enode);
5656+
slots = ma_slots(mte_to_node(enode), type);
56495657
if (offset >= mt_slots[type])
56505658
goto next;
56515659

5652-
tmp = mas_slot_locked(&mas, slots, offset);
5660+
tmp = mt_slot_locked(mt, slots, offset);
56535661
if (mte_node_type(tmp) && mte_to_node(tmp)) {
5654-
parent = mas.node;
5655-
mas.node = tmp;
5656-
slots = mas_destroy_descend(&mas, parent, offset);
5662+
parent = enode;
5663+
enode = tmp;
5664+
slots = mte_destroy_descend(&enode, mt, parent, offset);
56575665
}
56585666
next:
5659-
node = mas_mn(&mas);
5660-
} while (start != mas.node);
5667+
node = mte_to_node(enode);
5668+
} while (start != enode);
56615669

5662-
node = mas_mn(&mas);
5663-
node->type = mte_node_type(mas.node);
5664-
node->slot_len = mas_dead_leaves(&mas, slots, node->type);
5670+
node = mte_to_node(enode);
5671+
node->slot_len = mte_dead_leaves(enode, mt, slots);
56655672
if (free)
56665673
mt_free_bulk(node->slot_len, slots);
56675674

5668-
start_slots_free:
5669-
mas_unlock(&mas);
5670-
56715675
free_leaf:
56725676
if (free)
56735677
mt_free_rcu(&node->rcu);
56745678
else
5675-
mas_clear_meta(&mas, node, node->type);
5679+
mt_clear_meta(mt, node, node->type);
56765680
}
56775681

56785682
/*
@@ -5688,10 +5692,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
56885692
struct maple_node *node = mte_to_node(enode);
56895693

56905694
if (mt_in_rcu(mt)) {
5691-
mt_destroy_walk(enode, mt->ma_flags, false);
5695+
mt_destroy_walk(enode, mt, false);
56925696
call_rcu(&node->rcu, mt_free_walk);
56935697
} else {
5694-
mt_destroy_walk(enode, mt->ma_flags, true);
5698+
mt_destroy_walk(enode, mt, true);
56955699
}
56965700
}
56975701

0 commit comments

Comments
 (0)