From 7f4028d356c7c9750719a97159af0445d3bb5c0c Mon Sep 17 00:00:00 2001 From: ShyamsundarR Date: Wed, 22 Jan 2020 21:48:46 -0500 Subject: [PATCH] CephFS: Added ENOENT checks for possible missing volumes Added checks in DeleteVolume RPC, for image missing errors, and taking appropriate actions to cleanup the CSI reservations. Further removed forcing a volume purge, and instead added checks for missing volume errors in purgeVolume. This should now fix issues where an continuation of an interrupted DeleteVolume call, that only deleted the backing volume, will proceed and not error out. Signed-off-by: ShyamsundarR (cherry picked from commit 35e8c3b3a58abc20be2ddde4a0a6b810ace93d5e) --- pkg/cephfs/controllerserver.go | 23 +++++++++++++++++++++-- pkg/cephfs/errors.go | 9 +++++++++ pkg/cephfs/volume.go | 17 +++++++++++++++-- pkg/cephfs/volumeoptions.go | 6 +++--- 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/pkg/cephfs/controllerserver.go b/pkg/cephfs/controllerserver.go index 1ffe41a27..7aa08d256 100644 --- a/pkg/cephfs/controllerserver.go +++ b/pkg/cephfs/controllerserver.go @@ -243,7 +243,23 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol return cs.deleteVolumeDeprecated(ctx, req) } - return nil, status.Error(codes.Internal, err.Error()) + // All errors other than ErrVolumeNotFound should return an error back to the caller + if _, ok := err.(ErrVolumeNotFound); !ok { + return nil, status.Error(codes.Internal, err.Error()) + } + + // If error is ErrImageNotFound then we failed to find the subvolume, but found the imageOMap + // to lead us to the image, hence the imageOMap needs to be garbage collected, by calling + // unreserve for the same + if acquired := cs.VolumeLocks.TryAcquire(volOptions.RequestName); !acquired { + return nil, status.Errorf(codes.Aborted, util.VolumeOperationAlreadyExistsFmt, volOptions.RequestName) + } + defer cs.VolumeLocks.Release(volOptions.RequestName) + + if err = undoVolReservation(ctx, volOptions, *vID, secrets); err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + return &csi.DeleteVolumeResponse{}, nil } // lock out parallel delete and create requests against the same volume name as we @@ -263,7 +279,10 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol if err = purgeVolume(ctx, volumeID(vID.FsSubvolName), cr, volOptions); err != nil { klog.Errorf(util.Log(ctx, "failed to delete volume %s: %v"), volID, err) - return nil, status.Error(codes.Internal, err.Error()) + // All errors other than ErrVolumeNotFound should return an error back to the caller + if _, ok := err.(ErrVolumeNotFound); !ok { + return nil, status.Error(codes.Internal, err.Error()) + } } if err := undoVolReservation(ctx, volOptions, *vID, secrets); err != nil { diff --git a/pkg/cephfs/errors.go b/pkg/cephfs/errors.go index ab1d75c87..ec5bb4ca0 100644 --- a/pkg/cephfs/errors.go +++ b/pkg/cephfs/errors.go @@ -35,3 +35,12 @@ type ErrNonStaticVolume struct { func (e ErrNonStaticVolume) Error() string { return e.err.Error() } + +// ErrVolumeNotFound is returned when a subvolume is not found in CephFS +type ErrVolumeNotFound struct { + err error +} + +func (e ErrVolumeNotFound) Error() string { + return e.err.Error() +} diff --git a/pkg/cephfs/volume.go b/pkg/cephfs/volume.go index 56e1eb482..daf0e0a10 100644 --- a/pkg/cephfs/volume.go +++ b/pkg/cephfs/volume.go @@ -52,8 +52,12 @@ func getCephRootPathLocalDeprecated(volID volumeID) string { return fmt.Sprintf("%s/controller/volumes/root-%s", PluginFolder, string(volID)) } +func getVolumeNotFoundErrorString(volID volumeID) string { + return fmt.Sprintf("Error ENOENT: Subvolume '%s' not found", string(volID)) +} + func getVolumeRootPathCeph(ctx context.Context, volOptions *volumeOptions, cr *util.Credentials, volID volumeID) (string, error) { - stdout, _, err := util.ExecCommand( + stdout, stderr, err := util.ExecCommand( "ceph", "fs", "subvolume", @@ -69,6 +73,11 @@ func getVolumeRootPathCeph(ctx context.Context, volOptions *volumeOptions, cr *u if err != nil { klog.Errorf(util.Log(ctx, "failed to get the rootpath for the vol %s(%s)"), string(volID), err) + + if strings.Contains(string(stderr), getVolumeNotFoundErrorString(volID)) { + return "", ErrVolumeNotFound{err} + } + return "", err } return strings.TrimSuffix(string(stdout), "\n"), nil @@ -204,13 +213,17 @@ func purgeVolume(ctx context.Context, volID volumeID, cr *util.Credentials, volO string(volID), "--group_name", csiSubvolumeGroup, - "--force", "-m", volOptions.Monitors, "-c", util.CephConfigPath, "-n", cephEntityClientPrefix+cr.ID, "--keyfile="+cr.KeyFile) if err != nil { klog.Errorf(util.Log(ctx, "failed to purge subvolume %s(%s) in fs %s"), string(volID), err, volOptions.FsName) + + if strings.Contains(err.Error(), getVolumeNotFoundErrorString(volID)) { + return ErrVolumeNotFound{err} + } + return err } diff --git a/pkg/cephfs/volumeoptions.go b/pkg/cephfs/volumeoptions.go index f3948324a..ad1adb323 100644 --- a/pkg/cephfs/volumeoptions.go +++ b/pkg/cephfs/volumeoptions.go @@ -245,13 +245,13 @@ func newVolumeOptionsFromVolID(ctx context.Context, volID string, volOpt, secret } } + volOptions.ProvisionVolume = true + volOptions.RootPath, err = getVolumeRootPathCeph(ctx, &volOptions, cr, volumeID(vid.FsSubvolName)) if err != nil { - return nil, nil, err + return &volOptions, &vid, err } - volOptions.ProvisionVolume = true - return &volOptions, &vid, nil }