diff --git a/roles/vgpu/templates/nvidia-mdev.service.j2 b/roles/vgpu/templates/nvidia-mdev.service.j2 index d5b287c..e651756 100644 --- a/roles/vgpu/templates/nvidia-mdev.service.j2 +++ b/roles/vgpu/templates/nvidia-mdev.service.j2 @@ -4,15 +4,19 @@ Before=docker.service {% if vgpu_definition.mig_devices is defined %} After=nvidia-mig-manager.service Requires=nvidia-mig-manager.service -{% else %} -After=nvidia-sriov-{{ vgpu_definition.pci_address }}.service -Requires=nvidia-sriov-{{ vgpu_definition.pci_address }}.service {% endif %} [Service] +Restart=on-failure +RestartSec=30 Type=oneshot User=root -ExecStartPre=/bin/sleep 5 +{% if vgpu_definition.mig_devices is not defined %} +# Workaround lack of UpheldBy/RestartMode=direct in systemd<254 to ensure unit is +# started when the dependency fails, see: +# https://unix.stackexchange.com/questions/213185/restarting-systemd-service-on-dependency-failure +ExecStartPre=/usr/bin/systemctl is-active nvidia-sriov-{{ vgpu_definition.pci_address }}.service +{% endif %} ExecStart=-/usr/sbin/mdevctl start --uuid %i RemainAfterExit=yes diff --git a/roles/vgpu/templates/nvidia-sriov.service.j2 b/roles/vgpu/templates/nvidia-sriov.service.j2 index c058f6c..4cd4db0 100644 --- a/roles/vgpu/templates/nvidia-sriov.service.j2 +++ b/roles/vgpu/templates/nvidia-sriov.service.j2 @@ -6,12 +6,19 @@ After=local-fs.target {{ vgpu_systemd_device[vgpu_definition.pci_address] }} Wants={{ vgpu_systemd_device[vgpu_definition.pci_address] }} [Service] +Restart=on-failure +RestartSec=30 Type=oneshot User=root -# NOTE(wszumski): There is a race in the driver initialization where if we run this too early, then -# the mdev_support_devices entry doesn't show up in sysfs. I was unable to get this to show up again -# without a reboot. +# NOTE(wszumski): There is a race in the driver initialization where if we run +# this too early, then the mdev_support_devices entry doesn't show up in sysfs. +# I was unable to get this to show up again without a reboot. ExecStartPre=/bin/sleep 5 +# NOTE(wszumski): The sriov-manage script will unbind the nvidia driver to +# initialize the virtual functions. If it fails part way through, the driver +# can be left unbound, and subsequent executions of sriov-mange will fail. This +# ensures that the nvidia driver is always bound before we run sriov-manage. +ExecStart=/bin/bash -c "echo '{{ vgpu_definition.pci_address }}' > /sys/bus/pci/drivers/nvidia/bind || true" ExecStart=/usr/lib/nvidia/sriov-manage -e {{ vgpu_definition.pci_address }} RemainAfterExit=yes