Skip to content

Commit

Permalink
DRA: Implement UnPrepareResource interface for NodeServer
Browse files Browse the repository at this point in the history
Signed-off-by: cyclinder <[email protected]>
  • Loading branch information
cyclinder committed Mar 28, 2024
1 parent ca9b62e commit 7ac5d1e
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 8 deletions.
19 changes: 18 additions & 1 deletion pkg/dra/dra-plugin/cdi.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, scp *v2beta1.SpiderC
cdiSpec := cdispec.Spec{
// TODO(@cyclinder): should be make it to configureable?
Version: cdiapi.CurrentVersion,
Kind: fmt.Sprintf("%s/%s", cdi.vendor, cdi.class),
Kind: cdi.cdiKind(),
Devices: []cdispec.Device{{
Name: claimUID,
ContainerEdits: cdi.getContaineEdits(claimUID, scp.Spec.Rdma),
Expand All @@ -105,6 +105,19 @@ func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, scp *v2beta1.SpiderC
return nil
}

func (cdi *CDIHandler) DeleteClaimSpecFile(claimUID string) error {
spec := &cdispec.Spec{
Kind: cdi.cdiKind(),
}

specName, err := cdiapi.GenerateNameForTransientSpec(spec, claimUID)
if err != nil {
return fmt.Errorf("failed to generate CDI Spec name: %w", err)
}

return cdi.registry.SpecDB().RemoveSpec(specName + ".yaml")
}

func (cdi *CDIHandler) getContaineEdits(claim string, rdma bool) cdispec.ContainerEdits {
soName := path.Base(cdi.so)
ce := cdispec.ContainerEdits{
Expand Down Expand Up @@ -135,3 +148,7 @@ func (cdi *CDIHandler) getContaineEdits(claim string, rdma bool) cdispec.Contain

return ce
}

func (cdi *CDIHandler) cdiKind() string {
return cdi.vendor + "/" + cdi.class
}
14 changes: 13 additions & 1 deletion pkg/dra/dra-plugin/device_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ func NewDeviceState(logger *zap.Logger, cdiRoot, so string) (*NodeDeviceState, e
}

return &NodeDeviceState{
cdi: cdi,
cdi: cdi,
preparedClaims: make(map[string]struct{}),
}, nil
}

Expand All @@ -57,3 +58,14 @@ func (nds *NodeDeviceState) Prepare(ctx context.Context, claimUID string, scp *v
nds.preparedClaims[claimUID] = struct{}{}
return nds.cdi.GetClaimDevices(claimUID), nil
}

func (nds *NodeDeviceState) UnPrepare(ctx context.Context, claimUID string) error {
nds.Lock()
defer nds.Unlock()

if _, ok := nds.preparedClaims[claimUID]; ok {
delete(nds.preparedClaims, claimUID)
}

return nds.cdi.DeleteClaimSpecFile(claimUID)
}
26 changes: 20 additions & 6 deletions pkg/dra/dra-plugin/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ func (d *driver) NodePrepareResources(ctx context.Context, req *drapbv1.NodePrep
for _, claim := range req.Claims {
preparedResources.Claims[claim.Uid] = d.nodePrepareResource(ctx, claim)
}

d.logger.Info("NodePrepareResource returning newly prepared devices", zap.Any("response", preparedResources))
return preparedResources, nil
}

Expand All @@ -58,7 +60,7 @@ func (d *driver) nodePrepareResource(ctx context.Context, claim *drapbv1.Claim)
}

if isPrepared {
d.logger.Info("Claim has already prepared, returning cached device resources", zap.String("claim", claim.Uid))
d.logger.Info("[NodePrepareResource] Claim has already prepared, returning cached device resources", zap.String("claim", claim.Uid))
return &drapbv1.NodePrepareResourceResponse{CDIDevices: devices}
}

Expand All @@ -71,7 +73,6 @@ func (d *driver) nodePrepareResource(ctx context.Context, claim *drapbv1.Claim)
}
}

d.logger.Info("Returning newly prepared devices for claim", zap.String("claim", claim.Uid), zap.Strings("devices", devices))
return &drapbv1.NodePrepareResourceResponse{CDIDevices: devices}
}

Expand Down Expand Up @@ -112,8 +113,21 @@ func (d *driver) isPrepared(ctx context.Context, claimUID string) (bool, []strin
}

func (d *driver) NodeUnprepareResources(ctx context.Context, req *drapbv1.NodeUnprepareResourcesRequest) (*drapbv1.NodeUnprepareResourcesResponse, error) {
// We don't upprepare as part of NodeUnprepareResource, we do it
// asynchronously when the claims themselves are deleted and the
// AllocatedClaim has been removed.
return &drapbv1.NodeUnprepareResourcesResponse{}, nil
d.logger.Info("NodeUnprepareResources is called")
response := make(map[string]*drapbv1.NodeUnprepareResourceResponse, len(req.Claims))
for _, claim := range req.Claims {
response[claim.Uid] = d.unPrepareResoruce(ctx, claim)

}
return &drapbv1.NodeUnprepareResourcesResponse{Claims: response}, nil
}

func (d *driver) unPrepareResoruce(ctx context.Context, claim *drapbv1.Claim) *drapbv1.NodeUnprepareResourceResponse {
d.logger.Info("UnPrepareResource for claim", zap.String("claim", claim.Uid))
if err := d.State.UnPrepare(ctx, claim.Uid); err != nil {
d.logger.Error("error unprepare resource for claim", zap.String("claim", claim.Uid), zap.Error(err))
return &drapbv1.NodeUnprepareResourceResponse{Error: err.Error()}
}

return &drapbv1.NodeUnprepareResourceResponse{}
}

0 comments on commit 7ac5d1e

Please sign in to comment.