Skip to content

Commit 6e0954b

Browse files
committed
RDMA/uverbs: Allow drivers to create a new HW object during rereg_mr
mlx5 has an ugly flow where it tries to allocate a new MR and replace the existing MR in the same memory during rereg. This is very complicated and buggy. Instead of trying to replace in-place inside the driver, provide support from uverbs to change the entire HW object assigned to a handle during rereg_mr. Since destroying a MR is allowed to fail (ie if a MW is pointing at it) and can't be detected in advance, the algorithm creates a completely new uobject to hold the new MR and swaps the IDR entries of the two objects. The old MR in the temporary IDR entry is destroyed, and if it fails rereg_mr succeeds and destruction is deferred to FD release. This complexity is why this cannot live in a driver safely. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent adac4cb commit 6e0954b

File tree

10 files changed

+160
-55
lines changed

10 files changed

+160
-55
lines changed

drivers/infiniband/core/rdma_core.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
595595
WARN_ON(old != NULL);
596596
}
597597

598+
static void swap_idr_uobjects(struct ib_uobject *obj_old,
599+
struct ib_uobject *obj_new)
600+
{
601+
struct ib_uverbs_file *ufile = obj_old->ufile;
602+
void *old;
603+
604+
/*
605+
* New must be an object that been allocated but not yet committed, this
606+
* moves the pre-committed state to obj_old, new still must be comitted.
607+
*/
608+
old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY,
609+
GFP_KERNEL);
610+
if (WARN_ON(old != obj_old))
611+
return;
612+
613+
swap(obj_old->id, obj_new->id);
614+
615+
old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL);
616+
WARN_ON(old != NULL);
617+
}
618+
598619
static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
599620
{
600621
int fd = uobj->id;
@@ -640,6 +661,35 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
640661
up_read(&ufile->hw_destroy_rwsem);
641662
}
642663

664+
/*
665+
* new_uobj will be assigned to the handle currently used by to_uobj, and
666+
* to_uobj will be destroyed.
667+
*
668+
* Upon return the caller must do:
669+
* rdma_alloc_commit_uobject(new_uobj)
670+
* uobj_put_destroy(to_uobj)
671+
*
672+
* to_uobj must have a write get but the put mode switches to destroy once
673+
* this is called.
674+
*/
675+
void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj,
676+
struct uverbs_attr_bundle *attrs)
677+
{
678+
assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE);
679+
680+
if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object ||
681+
!to_uobj->uapi_object->type_class->swap_uobjects))
682+
return;
683+
684+
to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj);
685+
686+
/*
687+
* If this fails then the uobject is still completely valid (though with
688+
* a new ID) and we leak it until context close.
689+
*/
690+
uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs);
691+
}
692+
643693
/*
644694
* This consumes the kref for uobj. It is up to the caller to unwind the HW
645695
* object and anything else connected to uobj before calling this.
@@ -747,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
747797
.lookup_put = lookup_put_idr_uobject,
748798
.destroy_hw = destroy_hw_idr_uobject,
749799
.remove_handle = remove_handle_idr_uobject,
800+
.swap_uobjects = swap_idr_uobjects,
750801
};
751802
EXPORT_SYMBOL(uverbs_idr_class);
752803

drivers/infiniband/core/uverbs_cmd.c

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -764,11 +764,14 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
764764
{
765765
struct ib_uverbs_rereg_mr cmd;
766766
struct ib_uverbs_rereg_mr_resp resp;
767-
struct ib_pd *pd = NULL;
768767
struct ib_mr *mr;
769-
struct ib_pd *old_pd;
770768
int ret;
771769
struct ib_uobject *uobj;
770+
struct ib_uobject *new_uobj;
771+
struct ib_device *ib_dev;
772+
struct ib_pd *orig_pd;
773+
struct ib_pd *new_pd;
774+
struct ib_mr *new_mr;
772775

773776
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
774777
if (ret)
@@ -801,44 +804,86 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
801804
goto put_uobjs;
802805
}
803806

807+
orig_pd = mr->pd;
804808
if (cmd.flags & IB_MR_REREG_PD) {
805-
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
806-
attrs);
807-
if (!pd) {
809+
new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
810+
attrs);
811+
if (!new_pd) {
808812
ret = -EINVAL;
809813
goto put_uobjs;
810814
}
815+
} else {
816+
new_pd = mr->pd;
811817
}
812818

813-
old_pd = mr->pd;
814-
ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start,
815-
cmd.length, cmd.hca_va,
816-
cmd.access_flags, pd,
817-
&attrs->driver_udata);
818-
if (ret)
819+
/*
820+
* The driver might create a new HW object as part of the rereg, we need
821+
* to have a uobject ready to hold it.
822+
*/
823+
new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
824+
if (IS_ERR(new_uobj)) {
825+
ret = PTR_ERR(new_uobj);
819826
goto put_uobj_pd;
820-
821-
if (cmd.flags & IB_MR_REREG_PD) {
822-
atomic_inc(&pd->usecnt);
823-
mr->pd = pd;
824-
atomic_dec(&old_pd->usecnt);
825827
}
826828

827-
if (cmd.flags & IB_MR_REREG_TRANS)
828-
mr->iova = cmd.hca_va;
829+
new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
830+
cmd.hca_va, cmd.access_flags, new_pd,
831+
&attrs->driver_udata);
832+
if (IS_ERR(new_mr)) {
833+
ret = PTR_ERR(new_mr);
834+
goto put_new_uobj;
835+
}
836+
if (new_mr) {
837+
new_mr->device = new_pd->device;
838+
new_mr->pd = new_pd;
839+
new_mr->type = IB_MR_TYPE_USER;
840+
new_mr->dm = NULL;
841+
new_mr->sig_attrs = NULL;
842+
new_mr->uobject = uobj;
843+
atomic_inc(&new_pd->usecnt);
844+
new_mr->iova = cmd.hca_va;
845+
new_uobj->object = new_mr;
846+
847+
rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
848+
rdma_restrack_set_name(&new_mr->res, NULL);
849+
rdma_restrack_add(&new_mr->res);
850+
851+
/*
852+
* The new uobj for the new HW object is put into the same spot
853+
* in the IDR and the old uobj & HW object is deleted.
854+
*/
855+
rdma_assign_uobject(uobj, new_uobj, attrs);
856+
rdma_alloc_commit_uobject(new_uobj, attrs);
857+
uobj_put_destroy(uobj);
858+
new_uobj = NULL;
859+
uobj = NULL;
860+
mr = new_mr;
861+
} else {
862+
if (cmd.flags & IB_MR_REREG_PD) {
863+
atomic_dec(&orig_pd->usecnt);
864+
mr->pd = new_pd;
865+
atomic_inc(&new_pd->usecnt);
866+
}
867+
if (cmd.flags & IB_MR_REREG_TRANS)
868+
mr->iova = cmd.hca_va;
869+
}
829870

830871
memset(&resp, 0, sizeof(resp));
831872
resp.lkey = mr->lkey;
832873
resp.rkey = mr->rkey;
833874

834875
ret = uverbs_response(attrs, &resp, sizeof(resp));
835876

877+
put_new_uobj:
878+
if (new_uobj)
879+
uobj_alloc_abort(new_uobj, attrs);
836880
put_uobj_pd:
837881
if (cmd.flags & IB_MR_REREG_PD)
838-
uobj_put_obj_read(pd);
882+
uobj_put_obj_read(new_pd);
839883

840884
put_uobjs:
841-
uobj_put_write(uobj);
885+
if (uobj)
886+
uobj_put_write(uobj);
842887

843888
return ret;
844889
}

drivers/infiniband/hw/hns/hns_roce_device.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,9 +1223,10 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
12231223
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
12241224
u64 virt_addr, int access_flags,
12251225
struct ib_udata *udata);
1226-
int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
1227-
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
1228-
struct ib_udata *udata);
1226+
struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
1227+
u64 length, u64 virt_addr,
1228+
int mr_access_flags, struct ib_pd *pd,
1229+
struct ib_udata *udata);
12291230
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
12301231
u32 max_num_sg);
12311232
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,

drivers/infiniband/hw/hns/hns_roce_mr.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,10 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
328328
return ret;
329329
}
330330

331-
int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
332-
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
333-
struct ib_udata *udata)
331+
struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
332+
u64 length, u64 virt_addr,
333+
int mr_access_flags, struct ib_pd *pd,
334+
struct ib_udata *udata)
334335
{
335336
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
336337
struct ib_device *ib_dev = &hr_dev->ib_dev;
@@ -341,11 +342,11 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
341342
int ret;
342343

343344
if (!mr->enabled)
344-
return -EINVAL;
345+
return ERR_PTR(-EINVAL);
345346

346347
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
347348
if (IS_ERR(mailbox))
348-
return PTR_ERR(mailbox);
349+
return ERR_CAST(mailbox);
349350

350351
mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
351352
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
@@ -390,12 +391,12 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
390391

391392
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
392393

393-
return 0;
394+
return NULL;
394395

395396
free_cmd_mbox:
396397
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
397398

398-
return ret;
399+
return ERR_PTR(ret);
399400
}
400401

401402
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)

drivers/infiniband/hw/mlx4/mlx4_ib.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -908,10 +908,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
908908
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
909909
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
910910
int is_attach);
911-
int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
912-
u64 start, u64 length, u64 virt_addr,
913-
int mr_access_flags, struct ib_pd *pd,
914-
struct ib_udata *udata);
911+
struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
912+
u64 length, u64 virt_addr,
913+
int mr_access_flags, struct ib_pd *pd,
914+
struct ib_udata *udata);
915915
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
916916
const struct ib_gid_attr *attr);
917917

drivers/infiniband/hw/mlx4/mr.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
456456
return ERR_PTR(err);
457457
}
458458

459-
int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
460-
u64 start, u64 length, u64 virt_addr,
461-
int mr_access_flags, struct ib_pd *pd,
462-
struct ib_udata *udata)
459+
struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
460+
u64 length, u64 virt_addr,
461+
int mr_access_flags, struct ib_pd *pd,
462+
struct ib_udata *udata)
463463
{
464464
struct mlx4_ib_dev *dev = to_mdev(mr->device);
465465
struct mlx4_ib_mr *mmr = to_mmr(mr);
@@ -472,9 +472,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
472472
* race exists.
473473
*/
474474
err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
475-
476475
if (err)
477-
return err;
476+
return ERR_PTR(err);
478477

479478
if (flags & IB_MR_REREG_PD) {
480479
err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
@@ -542,8 +541,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
542541

543542
release_mpt_entry:
544543
mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
545-
546-
return err;
544+
if (err)
545+
return ERR_PTR(err);
546+
return NULL;
547547
}
548548

549549
static int

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,9 +1254,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
12541254
int access_flags);
12551255
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
12561256
void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
1257-
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1258-
u64 length, u64 virt_addr, int access_flags,
1259-
struct ib_pd *pd, struct ib_udata *udata);
1257+
struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1258+
u64 length, u64 virt_addr, int access_flags,
1259+
struct ib_pd *pd, struct ib_udata *udata);
12601260
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
12611261
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
12621262
u32 max_num_sg);

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1620,9 +1620,10 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
16201620
return err;
16211621
}
16221622

1623-
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1624-
u64 length, u64 virt_addr, int new_access_flags,
1625-
struct ib_pd *new_pd, struct ib_udata *udata)
1623+
struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1624+
u64 length, u64 virt_addr,
1625+
int new_access_flags, struct ib_pd *new_pd,
1626+
struct ib_udata *udata)
16261627
{
16271628
struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
16281629
struct mlx5_ib_mr *mr = to_mmr(ib_mr);
@@ -1638,10 +1639,10 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
16381639
start, virt_addr, length, access_flags);
16391640

16401641
if (!mr->umem)
1641-
return -EINVAL;
1642+
return ERR_PTR(-EINVAL);
16421643

16431644
if (is_odp_mr(mr))
1644-
return -EOPNOTSUPP;
1645+
return ERR_PTR(-EOPNOTSUPP);
16451646

16461647
if (flags & IB_MR_REREG_TRANS) {
16471648
addr = virt_addr;
@@ -1717,14 +1718,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
17171718

17181719
set_mr_fields(dev, mr, len, access_flags);
17191720

1720-
return 0;
1721+
return NULL;
17211722

17221723
err:
17231724
ib_umem_release(mr->umem);
17241725
mr->umem = NULL;
17251726

17261727
clean_mr(dev, mr);
1727-
return err;
1728+
return ERR_PTR(err);
17281729
}
17291730

17301731
static int

include/rdma/ib_verbs.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2433,9 +2433,10 @@ struct ib_device_ops {
24332433
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
24342434
u64 virt_addr, int mr_access_flags,
24352435
struct ib_udata *udata);
2436-
int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
2437-
u64 virt_addr, int mr_access_flags,
2438-
struct ib_pd *pd, struct ib_udata *udata);
2436+
struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
2437+
u64 length, u64 virt_addr,
2438+
int mr_access_flags, struct ib_pd *pd,
2439+
struct ib_udata *udata);
24392440
int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
24402441
struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
24412442
u32 max_num_sg);

include/rdma/uverbs_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ struct uverbs_obj_type_class {
7171
enum rdma_remove_reason why,
7272
struct uverbs_attr_bundle *attrs);
7373
void (*remove_handle)(struct ib_uobject *uobj);
74+
void (*swap_uobjects)(struct ib_uobject *obj_old,
75+
struct ib_uobject *obj_new);
7476
};
7577

7678
struct uverbs_obj_type {
@@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
116118
bool hw_obj_valid);
117119
void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
118120
struct uverbs_attr_bundle *attrs);
121+
void rdma_assign_uobject(struct ib_uobject *to_uobj,
122+
struct ib_uobject *new_uobj,
123+
struct uverbs_attr_bundle *attrs);
119124

120125
/*
121126
* uverbs_uobject_get is called in order to increase the reference count on

0 commit comments

Comments
 (0)