diff --git a/roles/vgpu/templates/nvidia-mdev.service.j2 b/roles/vgpu/templates/nvidia-mdev.service.j2
index d5b287c..e651756 100644
--- a/roles/vgpu/templates/nvidia-mdev.service.j2
+++ b/roles/vgpu/templates/nvidia-mdev.service.j2
@@ -4,15 +4,19 @@ Before=docker.service
 {% if vgpu_definition.mig_devices is defined %}
 After=nvidia-mig-manager.service
 Requires=nvidia-mig-manager.service
-{% else %}
-After=nvidia-sriov-{{ vgpu_definition.pci_address }}.service
-Requires=nvidia-sriov-{{ vgpu_definition.pci_address }}.service
 {% endif %}
 
 [Service]
+Restart=on-failure
+RestartSec=30
 Type=oneshot
 User=root
-ExecStartPre=/bin/sleep 5
+{% if vgpu_definition.mig_devices is not defined %}
+# Workaround lack of UpheldBy/RestartMode=direct in systemd<254 to ensure unit is
+# started when the dependency fails, see:
+# https://unix.stackexchange.com/questions/213185/restarting-systemd-service-on-dependency-failure
+ExecStartPre=/usr/bin/systemctl is-active nvidia-sriov-{{ vgpu_definition.pci_address }}.service
+{% endif %}
 ExecStart=-/usr/sbin/mdevctl start --uuid %i
 RemainAfterExit=yes
 
diff --git a/roles/vgpu/templates/nvidia-sriov.service.j2 b/roles/vgpu/templates/nvidia-sriov.service.j2
index c058f6c..4cd4db0 100644
--- a/roles/vgpu/templates/nvidia-sriov.service.j2
+++ b/roles/vgpu/templates/nvidia-sriov.service.j2
@@ -6,12 +6,19 @@ After=local-fs.target {{ vgpu_systemd_device[vgpu_definition.pci_address] }}
 Wants={{ vgpu_systemd_device[vgpu_definition.pci_address] }}
 
 [Service]
+Restart=on-failure
+RestartSec=30
 Type=oneshot
 User=root
-# NOTE(wszumski): There is a race in the driver initialization where if we run this too early, then
-# the mdev_support_devices entry doesn't show up in sysfs. I was unable to get this to show up again
-# without a reboot.
+# NOTE(wszumski): There is a race in the driver initialization where if we run
+# this too early, then the mdev_support_devices entry doesn't show up in sysfs.
+# I was unable to get this to show up again without a reboot.
 ExecStartPre=/bin/sleep 5
+# NOTE(wszumski): The sriov-manage script will unbind the nvidia driver to
+# initialize the virtual functions.  If it fails part way through, the driver
+# can be left unbound, and subsequent executions of sriov-mange will fail. This
+# ensures that the nvidia driver is always bound before we run sriov-manage.
+ExecStart=/bin/bash -c "echo '{{ vgpu_definition.pci_address }}' > /sys/bus/pci/drivers/nvidia/bind || true"
 ExecStart=/usr/lib/nvidia/sriov-manage -e {{ vgpu_definition.pci_address }}
 RemainAfterExit=yes