From 31648c8feb82a0770b0979e1ddc4f41406678bbd Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Fri, 26 Jul 2019 14:36:43 +0200 Subject: [PATCH] provisioners: add reconfiguring of PID limit The container runtime CRI-O limits the number of PIDs to 1024 by default. When many PVCs are requested at the same time, it is possible for the provisioner to start too many threads (or go routines) and executing 'rbd' commands can start to fail. In case a go routine can not get started, the process panics. The PID limit can be changed by passing an argument to kubelet, but this will affect all pids running on a host. Changing the parameters to kubelet is also not a very elegant solution. Instead, the provisioner pod can change the configuration itself. The pod is running in privileged mode and can write to /sys/fs/cgroup where the limit is configured. With this change, the limit is configured to 'max', just as if there is no limit at all. The logs of the csi-rbdplugin in the provisioner pod will reflect the change it makes when starting the service: $ oc -n rook-ceph logs -c csi-rbdplugin csi-rbdplugin-provisioner-0 .. I0726 13:59:19.737678 1 cephcsi.go:127] Initial PID limit is set to 1024 I0726 13:59:19.737746 1 cephcsi.go:136] Reconfigured PID limit to -1 (max) .. It is possible to pass a different limit on the commandline of the cephcsi executable. The following flag has been added: --pidlimit= the PID limit to configure through cgroups This accepts special values -1 (max) and 0 (default, do not reconfigure). Other integers will be the limit that gets configured in cgroups. Signed-off-by: Niels de Vos --- cmd/cephcsi.go | 21 +++ .../v1.13/csi-cephfsplugin-provisioner.yaml | 1 + .../templates/provisioner-statefulset.yaml | 1 + .../v1.14+/csi-cephfsplugin-provisioner.yaml | 1 + .../templates/provisioner-deployment.yaml | 1 + .../v1.13/csi-rbdplugin-provisioner.yaml | 1 + .../templates/provisioner-statefulset.yaml | 1 + .../v1.14+/csi-rbdplugin-provisioner.yaml | 1 + .../templates/provisioner-deployment.yaml | 1 + docs/deploy-cephfs.md | 1 + docs/deploy-rbd.md | 1 + pkg/util/pidlimit.go | 121 ++++++++++++++++++ pkg/util/pidlimit_test.go | 46 +++++++ 13 files changed, 198 insertions(+) create mode 100644 pkg/util/pidlimit.go create mode 100644 pkg/util/pidlimit_test.go diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go index 09f8cef35..cd97787ad 100644 --- a/cmd/cephcsi.go +++ b/cmd/cephcsi.go @@ -47,6 +47,7 @@ var ( " instances, when sharing Ceph clusters across CSI instances for provisioning") metadataStorage = flag.String("metadatastorage", "", "metadata persistence method [node|k8s_configmap]") pluginPath = flag.String("pluginpath", "/var/lib/kubelet/plugins/", "the location of cephcsi plugin") + pidLimit = flag.Int("pidlimit", 0, "the PID limit to configure through cgroups") // rbd related flags containerized = flag.Bool("containerized", true, "whether run as containerized") @@ -117,6 +118,26 @@ func main() { } } + // the driver may need a higher PID limit for handling all concurrent requests + if pidLimit != nil && *pidLimit != 0 { + currentLimit, err := util.GetPIDLimit() + if err != nil { + klog.Errorf("Failed to get the PID limit, can not reconfigure: %v", err) + } else { + klog.Infof("Initial PID limit is set to %d", currentLimit) + err = util.SetPIDLimit(*pidLimit) + if err != nil { + klog.Errorf("Failed to set new PID limit to %d: %v", *pidLimit, err) + } else { + s := "" + if *pidLimit == -1 { + s = " (max)" + } + klog.Infof("Reconfigured PID limit to %d%s", *pidLimit, s) + } + } + } + klog.Infof("Starting driver type: %v with name: %v", driverType, dname) switch driverType { case rbdType: diff --git a/deploy/cephfs/kubernetes/v1.13/csi-cephfsplugin-provisioner.yaml b/deploy/cephfs/kubernetes/v1.13/csi-cephfsplugin-provisioner.yaml index b897377fb..738675004 100644 --- a/deploy/cephfs/kubernetes/v1.13/csi-cephfsplugin-provisioner.yaml +++ b/deploy/cephfs/kubernetes/v1.13/csi-cephfsplugin-provisioner.yaml @@ -70,6 +70,7 @@ spec: - "--v=5" - "--drivername=cephfs.csi.ceph.com" - "--metadatastorage=k8s_configmap" + - "--pidlimit=-1" env: - name: NODE_ID valueFrom: diff --git a/deploy/cephfs/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml b/deploy/cephfs/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml index 110c70b95..604ac2f72 100644 --- a/deploy/cephfs/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml +++ b/deploy/cephfs/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml @@ -71,6 +71,7 @@ spec: - "--v=5" - "--drivername=$(DRIVER_NAME)" - "--metadatastorage=k8s_configmap" + - "--pidlimit=-1" env: - name: DRIVER_NAME value: {{ .Values.driverName }} diff --git a/deploy/cephfs/kubernetes/v1.14+/csi-cephfsplugin-provisioner.yaml b/deploy/cephfs/kubernetes/v1.14+/csi-cephfsplugin-provisioner.yaml index 23dfbf622..371643f35 100644 --- a/deploy/cephfs/kubernetes/v1.14+/csi-cephfsplugin-provisioner.yaml +++ b/deploy/cephfs/kubernetes/v1.14+/csi-cephfsplugin-provisioner.yaml @@ -59,6 +59,7 @@ spec: - "--v=5" - "--drivername=cephfs.csi.ceph.com" - "--metadatastorage=k8s_configmap" + - "--pidlimit=-1" env: - name: NODE_ID valueFrom: diff --git a/deploy/cephfs/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml b/deploy/cephfs/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml index 8b8dce98e..1bcb49e47 100644 --- a/deploy/cephfs/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml +++ b/deploy/cephfs/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml @@ -74,6 +74,7 @@ spec: - "--v=5" - "--drivername=$(DRIVER_NAME)" - "--metadatastorage=k8s_configmap" + - "--pidlimit=-1" env: - name: DRIVER_NAME value: {{ .Values.driverName }} diff --git a/deploy/rbd/kubernetes/v1.13/csi-rbdplugin-provisioner.yaml b/deploy/rbd/kubernetes/v1.13/csi-rbdplugin-provisioner.yaml index d9ce31536..3b03ec341 100644 --- a/deploy/rbd/kubernetes/v1.13/csi-rbdplugin-provisioner.yaml +++ b/deploy/rbd/kubernetes/v1.13/csi-rbdplugin-provisioner.yaml @@ -85,6 +85,7 @@ spec: - "--v=5" - "--drivername=rbd.csi.ceph.com" - "--containerized=true" + - "--pidlimit=-1" env: - name: NODE_ID valueFrom: diff --git a/deploy/rbd/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml b/deploy/rbd/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml index ed8327c33..b195f9667 100644 --- a/deploy/rbd/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml +++ b/deploy/rbd/kubernetes/v1.13/helm/templates/provisioner-statefulset.yaml @@ -88,6 +88,7 @@ spec: - "--v=5" - "--drivername=$(DRIVER_NAME)" - "--containerized=true" + - "--pidlimit=-1" env: - name: DRIVER_NAME value: {{ .Values.driverName }} diff --git a/deploy/rbd/kubernetes/v1.14+/csi-rbdplugin-provisioner.yaml b/deploy/rbd/kubernetes/v1.14+/csi-rbdplugin-provisioner.yaml index f0b00b669..02aecd603 100644 --- a/deploy/rbd/kubernetes/v1.14+/csi-rbdplugin-provisioner.yaml +++ b/deploy/rbd/kubernetes/v1.14+/csi-rbdplugin-provisioner.yaml @@ -75,6 +75,7 @@ spec: - "--v=5" - "--drivername=rbd.csi.ceph.com" - "--containerized=true" + - "--pidlimit=-1" env: - name: NODE_ID valueFrom: diff --git a/deploy/rbd/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml b/deploy/rbd/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml index c792415ca..bacd694c4 100644 --- a/deploy/rbd/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml +++ b/deploy/rbd/kubernetes/v1.14+/helm/templates/provisioner-deployment.yaml @@ -92,6 +92,7 @@ spec: - "--v=5" - "--drivername=$(DRIVER_NAME)" - "--containerized=true" + - "--pidlimit=-1" env: - name: DRIVER_NAME value: {{ .Values.driverName }} diff --git a/docs/deploy-cephfs.md b/docs/deploy-cephfs.md index 62f3903f3..7b1c69447 100644 --- a/docs/deploy-cephfs.md +++ b/docs/deploy-cephfs.md @@ -53,6 +53,7 @@ that should be resolved in v14.2.3. | `--instanceid` | "default" | Unique ID distinguishing this instance of Ceph CSI among other instances, when sharing Ceph clusters across CSI instances for provisioning | | `--pluginpath` | "/var/lib/kubelet/plugins/" | The location of cephcsi plugin on host | | `--metadatastorage` | _empty_ | Points to where older (1.0.0 or older plugin versions) metadata about provisioned volumes are kept, as file or in as k8s configmap (`node` or `k8s_configmap` respectively) | +| `--pidlimit` | _0_ | Configure the PID limit in cgroups. The container runtime can restrict the number of processes/tasks which can cause problems while provisioning (or deleting) a large number of volumes. A value of `-1` configures the limit to the maximum, `0` does not configure limits at all. | **Available environmental variables:** diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md index 985d65bc7..4f7fece36 100644 --- a/docs/deploy-rbd.md +++ b/docs/deploy-rbd.md @@ -35,6 +35,7 @@ make image-cephcsi | `--containerized` | true | Whether running in containerized mode | | `--instanceid` | "default" | Unique ID distinguishing this instance of Ceph CSI among other instances, when sharing Ceph clusters across CSI instances for provisioning | | `--metadatastorage` | _empty_ | Points to where legacy (1.0.0 or older plugin versions) metadata about provisioned volumes are kept, as file or in as k8s configmap (`node` or `k8s_configmap` respectively) | +| `--pidlimit` | _0_ | Configure the PID limit in cgroups. The container runtime can restrict the number of processes/tasks which can cause problems while provisioning (or deleting) a large number of volumes. A value of `-1` configures the limit to the maximum, `0` does not configure limits at all. | **Available volume parameters:** diff --git a/pkg/util/pidlimit.go b/pkg/util/pidlimit.go new file mode 100644 index 000000000..0a6a52f3b --- /dev/null +++ b/pkg/util/pidlimit.go @@ -0,0 +1,121 @@ +/* +Copyright 2019 The Ceph-CSI Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" +) + +const ( + procCgroup = "/proc/self/cgroup" + sysPidsMaxFmt = "/sys/fs/cgroup/pids%s/pids.max" +) + +// return the cgouprs "pids.max" file of the current process +// +// find the line containing the pids group from the /proc/self/cgroup file +// $ grep 'pids' /proc/self/cgroup +// 7:pids:/kubepods.slice/kubepods-besteffort.slice/....scope +// $ cat /sys/fs/cgroup/pids + *.scope + /pids.max +func getCgroupPidsFile() (string, error) { + cgroup, err := os.Open(procCgroup) + if err != nil { + return "", err + } + defer cgroup.Close() + + scanner := bufio.NewScanner(cgroup) + var slice string + for scanner.Scan() { + parts := strings.Split(scanner.Text(), ":") + if parts == nil || len(parts) < 3 { + continue + } + if parts[1] == "pids" { + slice = parts[2] + break + } + } + if slice == "" { + return "", fmt.Errorf("could not find a cgroup for 'pids'") + } + + pidsMax := fmt.Sprintf(sysPidsMaxFmt, slice) + return pidsMax, nil +} + +// GetPIDLimit returns the current PID limit, or an error. A value of -1 +// translates to "max". +func GetPIDLimit() (int, error) { + pidsMax, err := getCgroupPidsFile() + if err != nil { + return 0, err + } + + f, err := os.Open(pidsMax) + if err != nil { + return 0, err + } + defer f.Close() + + maxPidsStr, err := bufio.NewReader(f).ReadString('\n') + if err != nil && err != io.EOF { + return 0, err + } + maxPidsStr = strings.TrimRight(maxPidsStr, "\n") + + maxPids := -1 + if maxPidsStr != "max" { + maxPids, err = strconv.Atoi(maxPidsStr) + if err != nil { + return 0, err + } + } + return maxPids, nil +} + +// SetPIDLimit configures the given PID limit for the current process. A value +// of -1 translates to "max". +func SetPIDLimit(limit int) error { + limitStr := "max" + if limit != -1 { + limitStr = fmt.Sprintf("%d", limit) + } + + pidsMax, err := getCgroupPidsFile() + if err != nil { + return err + } + + f, err := os.Create(pidsMax) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(limitStr) + if err != nil { + return err + } + + return nil +} diff --git a/pkg/util/pidlimit_test.go b/pkg/util/pidlimit_test.go new file mode 100644 index 000000000..6fd06f32c --- /dev/null +++ b/pkg/util/pidlimit_test.go @@ -0,0 +1,46 @@ +/* +Copyright 2019 ceph-csi authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "testing" +) + +// minimal test to check if GetPIDLimit() returns an int +// changing the limit require root permissions, not tested +func TestGetPIDLimix(t *testing.T) { + limit, err := GetPIDLimit() + + if err != nil { + t.Errorf("no error should be returned, got: %v", err) + } + if limit == 0 { + t.Errorf("a PID limit of 0 is invalid") + } + + // this is expected to fail when not run as root + err = SetPIDLimit(4096) + if err != nil { + t.Logf("failed to set PID limit, are you running as root?") + } else { + // in case it worked, reset to the previous value + err = SetPIDLimit(limit) + if err != nil { + t.Logf("failed to reset PID to original limit %d", limit) + } + } +}