Skip to content

Commit 38d09d7

Browse files
committed
Add GPU Passthrough role
1 parent f34bb9d commit 38d09d7

File tree

4 files changed

+88
-0
lines changed

4 files changed

+88
-0
lines changed

roles/gpu_passthrough/README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# stackhpc.linux.iommu
2+
3+
## Example playbook
4+
5+
```
6+
---
7+
- name: Enable GPU Passthrough
8+
hosts: gpu_passthrough
9+
tasks:
10+
- import_role:
11+
name: stackhpc.linux.gpu_passthrough
12+
handlers:
13+
- name: reboot
14+
fail:
15+
msg: "Please reboot your hypervisor and re-run your host configure to continue"
16+
become: true
17+
18+
```
19+
20+
Or if you want the machine to reboot automatically:
21+
22+
```
23+
---
24+
- name: Enable GPU Passthrough
25+
hosts: gpu_passthrough
26+
tasks:
27+
- import_role:
28+
name: stackhpc.linux.gpu_passthrough
29+
handlers:
30+
- name: reboot
31+
reboot:
32+
become: true
33+
34+
```
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
---
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
- name: Regenerate initramfs
3+
ansible.builtin.shell: |-
4+
#!/bin/bash
5+
set -eux
6+
dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r)
7+
become: true
8+
changed_when: true

roles/gpu_passthrough/tasks/main.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
---
2+
- name: Blacklist nouveau
3+
ansible.builtin.blockinfile:
4+
path: /etc/modprobe.d/blacklist-nouveau.conf
5+
block: |
6+
blacklist nouveau
7+
options nouveau modeset=0
8+
mode: "0664"
9+
owner: root
10+
group: root
11+
create: true
12+
become: true
13+
notify:
14+
- Regenerate initramfs
15+
- reboot # no-qa
16+
17+
- name: Ignore unsupported model specific registers
18+
# Occasionally, applications running in the VM may crash unexpectedly,
19+
# whereas they would run normally on a physical machine. If, while
20+
# running dmesg -wH, you encounter an error mentioning MSR, the reason
21+
# for those crashes is that KVM injects a General protection fault (GPF)
22+
# when the guest tries to access unsupported Model-specific registers
23+
# (MSRs) - this often results in guest applications/OS crashing. A
24+
# number of those issues can be solved by passing the ignore_msrs=1
25+
# option to the KVM module, which will ignore unimplemented MSRs.
26+
# source: https://wiki.archlinux.org/index.php/QEMU
27+
ansible.builtin.blockinfile:
28+
path: /etc/modprobe.d/kvm.conf
29+
block: |
30+
options kvm ignore_msrs=Y
31+
# This option is not available in centos 7 as the kernel is too old,
32+
# but it can help with dmesg spam in newer kernels (centos8?). Sample
33+
# dmesg log message:
34+
# [ +0.000002] kvm [8348]: vcpu0, guest rIP: 0xffffffffb0a767fa ignored rdmsr: 0x619
35+
# options kvm report_ignored_msrs=N
36+
mode: "0664"
37+
owner: root
38+
group: root
39+
create: true
40+
become: true
41+
notify: reboot # no-qa
42+
43+
- name: Add IOMMU config to kernel command line
44+
ansible.builtin.include_role:
45+
name: stackhpc.linux.iommu

0 commit comments

Comments
 (0)