diff --git a/controllers/object_controls.go b/controllers/object_controls.go index b436bcab1..5171fddc1 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -3553,6 +3553,30 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy } } + // Mount /lib/modules for precompiled drivers on SUSE distributions. + // Those containers need access to host /lib/modules at runtime. + osID := getOSName(n.gpuNodeOSTag) + if config.Driver.UsePrecompiledDrivers() && (osID == "sles" || osID == "sl-micro") { + n.logger.Info("Mounting /lib/modules into the driver container") + libModulesVolMount := corev1.VolumeMount{ + Name: "lib-modules", + MountPath: "/run/host/lib/modules", + ReadOnly: true, + } + driverContainer.VolumeMounts = append(driverContainer.VolumeMounts, libModulesVolMount) + + libModulesVol := corev1.Volume{ + Name: "lib-modules", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/lib/modules", + Type: ptr.To(corev1.HostPathDirectory), + }, + }, + } + podSpec.Volumes = append(podSpec.Volumes, libModulesVol) + } + // no further repo configuration required when using pre-compiled drivers, return here. if config.Driver.UsePrecompiledDrivers() { return nil @@ -3594,7 +3618,7 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy } } - osID := getOSName(n.gpuNodeOSTag) + osID = getOSName(n.gpuNodeOSTag) // set up subscription entitlements for RHEL(using K8s with a non-CRIO runtime) and SLES if (osID == "rhel" && n.openshift == "" && n.runtime != gpuv1.CRIO) || osID == "sles" || osID == "sl-micro" { n.logger.Info("Mounting subscriptions into the driver container", "OS", osID) diff --git a/controllers/object_controls_test.go b/controllers/object_controls_test.go index f6df7340d..4a60d2162 100644 --- a/controllers/object_controls_test.go +++ b/controllers/object_controls_test.go @@ -1883,3 +1883,91 @@ func TestMIGManager(t *testing.T) { }) } } + +// TestDriverPrecompiledLibModulesUbuntu tests that /lib/modules is NOT mounted for precompiled drivers on Ubuntu +func TestDriverPrecompiledLibModulesUbuntu(t *testing.T) { + cp := getDriverTestInput("precompiled") + output := getDriverTestOutput("precompiled") + + ds, err := testDaemonsetCommon(t, cp, "Driver", output["numDaemonsets"].(int)) + if err != nil { + t.Fatalf("error in testDaemonsetCommon(): %v", err) + } + require.NotNil(t, ds) + + // Check that /lib/modules volume and mount are NOT present + for _, vol := range ds.Spec.Template.Spec.Volumes { + require.NotEqual(t, "lib-modules", vol.Name, "lib-modules volume should not be present for ubuntu") + } + + driverContainer := findContainerByName(ds.Spec.Template.Spec.Containers, "nvidia-driver-ctr") + require.NotNil(t, driverContainer) + + for _, mount := range driverContainer.VolumeMounts { + require.NotEqual(t, "lib-modules", mount.Name, "lib-modules volume mount should not be present for ubuntu") + } + + // Cleanup + err = removeState(&clusterPolicyController, clusterPolicyController.idx-1) + if err != nil { + t.Fatalf("error removing state %v:", err) + } + clusterPolicyController.idx-- +} + +// TestDriverPrecompiledLibModulesSuse tests that /lib/modules is mounted for precompiled drivers on SLES and SL-Micro +func TestDriverPrecompiledLibModulesSuse(t *testing.T) { + osTags := []string{"sles16.0", "sl-micro6.1"} + + for _, osTag := range osTags { + t.Run(osTag, func(t *testing.T) { + // Save original OS tag and restore after test + originalOSTag := clusterPolicyController.gpuNodeOSTag + defer func() { + clusterPolicyController.gpuNodeOSTag = originalOSTag + }() + + clusterPolicyController.gpuNodeOSTag = osTag + + cp := getDriverTestInput("precompiled") + output := getDriverTestOutput("precompiled") + + ds, err := testDaemonsetCommon(t, cp, "Driver", output["numDaemonsets"].(int)) + if err != nil { + t.Fatalf("error in testDaemonsetCommon(): %v", err) + } + require.NotNil(t, ds) + + // Check for /lib/modules volume and mount + foundVolume := false + for _, vol := range ds.Spec.Template.Spec.Volumes { + if vol.Name == "lib-modules" { + foundVolume = true + require.NotNil(t, vol.HostPath) + require.Equal(t, "/lib/modules", vol.HostPath.Path) + } + } + require.Truef(t, foundVolume, "lib-modules volume not found for precompiled drivers on %s", osTag) + + foundMount := false + driverContainer := findContainerByName(ds.Spec.Template.Spec.Containers, "nvidia-driver-ctr") + require.NotNil(t, driverContainer) + + for _, mount := range driverContainer.VolumeMounts { + if mount.Name == "lib-modules" { + foundMount = true + require.Equal(t, "/run/host/lib/modules", mount.MountPath) + require.True(t, mount.ReadOnly) + } + } + require.Truef(t, foundMount, "lib-modules volume mount not found for precompiled drivers on %s", osTag) + + // Cleanup + err = removeState(&clusterPolicyController, clusterPolicyController.idx-1) + if err != nil { + t.Fatalf("error removing state %v:", err) + } + clusterPolicyController.idx-- + }) + } +} diff --git a/internal/state/driver_volumes.go b/internal/state/driver_volumes.go index c9e13b998..a1f59c94c 100644 --- a/internal/state/driver_volumes.go +++ b/internal/state/driver_volumes.go @@ -209,6 +209,29 @@ func (s *stateDriver) getDriverAdditionalConfigs(ctx context.Context, cr *v1alph additionalCfgs.Volumes = append(additionalCfgs.Volumes, subscriptionVol) } } + + // Mount /lib/modules for precompiled drivers on SUSE distributions. + // Those containers need access to host /lib/modules at runtime. + if cr.Spec.UsePrecompiledDrivers() && (pool.osRelease == "sles" || pool.osRelease == "sl-micro") { + logger.Info("Mounting /lib/modules into the driver container") + libModulesVolMount := corev1.VolumeMount{ + Name: "lib-modules", + MountPath: "/run/host/lib/modules", + ReadOnly: true, + } + additionalCfgs.VolumeMounts = append(additionalCfgs.VolumeMounts, libModulesVolMount) + + libModulesVol := corev1.Volume{ + Name: "lib-modules", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/lib/modules", + Type: ptr.To(corev1.HostPathDirectory), + }, + }, + } + additionalCfgs.Volumes = append(additionalCfgs.Volumes, libModulesVol) + } } // mount any custom kernel module configuration parameters at /drivers