From 57b4d3e5731d6f71309b538d9aee0afea6a017c6 Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Wed, 13 Aug 2025 16:44:31 +0300 Subject: [PATCH] sgx: add new special resources for TDX QGS and SGX platform registration Signed-off-by: Mikko Ylinen --- .github/workflows/lib-build.yaml | 1 + cmd/sgx_plugin/sgx_plugin.go | 59 +++++++++++++++---- cmd/sgx_plugin/sgx_plugin_test.go | 21 ++++++- demo/sgx-dcap-infra/Dockerfile | 22 +++++++ demo/sgx-dcap-infra/dcap-registration-flow | 28 +++++++++ ...viceplugin.intel.com_sgxdeviceplugins.yaml | 4 ++ deployments/sgx_dcap/base/intel-sgx-dcap.yaml | 46 +++++++++++++++ deployments/sgx_dcap/base/kustomization.yaml | 4 ++ deployments/sgx_dcap/kustomization.yaml | 2 + .../sgx_plugin/base/intel-sgx-plugin.yaml | 6 ++ .../deviceplugin/v1/sgxdeviceplugin_types.go | 3 + pkg/controllers/sgx/controller.go | 4 ++ pkg/controllers/sgx/controller_test.go | 14 +++++ 13 files changed, 203 insertions(+), 11 deletions(-) create mode 100644 demo/sgx-dcap-infra/Dockerfile create mode 100755 demo/sgx-dcap-infra/dcap-registration-flow create mode 100644 deployments/sgx_dcap/base/intel-sgx-dcap.yaml create mode 100644 deployments/sgx_dcap/base/kustomization.yaml create mode 100644 deployments/sgx_dcap/kustomization.yaml diff --git a/.github/workflows/lib-build.yaml b/.github/workflows/lib-build.yaml index 4659bf6c4..4f0de1bd2 100644 --- a/.github/workflows/lib-build.yaml +++ b/.github/workflows/lib-build.yaml @@ -33,6 +33,7 @@ jobs: - accel-config-demo - intel-opencl-icd - openssl-qat-engine + - sgx-dcap-infra - sgx-sdk-demo - sgx-aesmd-demo - dsa-dpdk-dmadevtest diff --git a/cmd/sgx_plugin/sgx_plugin.go b/cmd/sgx_plugin/sgx_plugin.go index cc8cd289a..7b2e6c5e6 100644 --- a/cmd/sgx_plugin/sgx_plugin.go +++ b/cmd/sgx_plugin/sgx_plugin.go @@ -25,6 +25,7 @@ import ( dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + cdispec "tags.cncf.io/container-device-interface/specs-go" ) const ( @@ -38,18 +39,20 @@ const ( ) type devicePlugin struct { - scanDone chan bool - devfsDir string - nEnclave uint - nProvision uint + scanDone chan bool + devfsDir string + nEnclave uint + nProvision uint + dcapInfraResources bool } -func newDevicePlugin(devfsDir string, nEnclave, nProvision uint) *devicePlugin { +func newDevicePlugin(devfsDir string, nEnclave, nProvision uint, dcapInfraResources bool) *devicePlugin { return &devicePlugin{ - devfsDir: devfsDir, - nEnclave: nEnclave, - nProvision: nProvision, - scanDone: make(chan bool, 1), + devfsDir: devfsDir, + nEnclave: nEnclave, + nProvision: nProvision, + dcapInfraResources: dcapInfraResources, + scanDone: make(chan bool, 1), } } @@ -96,6 +99,39 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { devTree.AddDevice(deviceTypeProvision, devID, dpapi.NewDeviceInfoWithTopologyHints(pluginapi.Healthy, nodes, nil, nil, nil, nil, nil)) } + if !dp.dcapInfraResources { + return devTree, nil + } + + tdQeNodes := []pluginapi.DeviceSpec{ + {HostPath: sgxEnclavePath, ContainerPath: sgxEnclavePath, Permissions: "rw"}, + {HostPath: sgxProvisionPath, ContainerPath: sgxProvisionPath, Permissions: "rw"}, + } + + devTree.AddDevice("tdqe", "tdqe-1", dpapi.NewDeviceInfoWithTopologyHints(pluginapi.Healthy, tdQeNodes, nil, nil, nil, nil, nil)) + + regNodes := []pluginapi.DeviceSpec{ + {HostPath: sgxEnclavePath, ContainerPath: sgxEnclavePath, Permissions: "rw"}, + } + + // TODO: /sys/firmware is a maskedPath. Test /run/efivars with a patched PCK-ID-Retrieval-Tool. + efiVarFsMount := &cdispec.Spec{ + Version: dpapi.CDIVersion, + Kind: dpapi.CDIVendor + "/sgx", + Devices: []cdispec.Device{ + { + Name: "efivarfs", + ContainerEdits: cdispec.ContainerEdits{ + Mounts: []*cdispec.Mount{ + {HostPath: "efivarfs", ContainerPath: "/run/efivars", Type: "efivarfs", Options: []string{"rw", "nosuid", "nodev", "noexec", "relatime"}}, + }, + }, + }, + }, + } + + devTree.AddDevice("registration", "registration-1", dpapi.NewDeviceInfoWithTopologyHints(pluginapi.Healthy, regNodes, nil, nil, nil, nil, efiVarFsMount)) + return devTree, nil } @@ -121,15 +157,18 @@ func getDefaultPodCount(nCPUs uint) uint { func main() { var enclaveLimit, provisionLimit uint + var dcapInfraResources bool + podCount := getDefaultPodCount(uint(runtime.NumCPU())) flag.UintVar(&enclaveLimit, "enclave-limit", podCount, "Number of \"enclave\" resources") flag.UintVar(&provisionLimit, "provision-limit", podCount, "Number of \"provision\" resources") + flag.BoolVar(&dcapInfraResources, "dcap-infra-resources", false, "add special resources for DCAP infrastructure daemonSet pods") flag.Parse() klog.V(4).Infof("SGX device plugin started with %d \"%s/enclave\" resources and %d \"%s/provision\" resources.", enclaveLimit, namespace, provisionLimit, namespace) - plugin := newDevicePlugin(devicePath, enclaveLimit, provisionLimit) + plugin := newDevicePlugin(devicePath, enclaveLimit, provisionLimit, dcapInfraResources) manager := dpapi.NewManager(namespace, plugin) manager.Run() } diff --git a/cmd/sgx_plugin/sgx_plugin_test.go b/cmd/sgx_plugin/sgx_plugin_test.go index f5a602837..3540f91e5 100644 --- a/cmd/sgx_plugin/sgx_plugin_test.go +++ b/cmd/sgx_plugin/sgx_plugin_test.go @@ -27,17 +27,22 @@ func init() { _ = flag.Set("v", "4") // Enable debug output } +// Update if new resource types are added. +const dcapInfraResources = 2 + // mockNotifier implements Notifier interface. type mockNotifier struct { scanDone chan bool enclaveDevCount int provisionDevCount int + dcapInfraResCnt int } // Notify stops plugin Scan. func (n *mockNotifier) Notify(newDeviceTree dpapi.DeviceTree) { n.enclaveDevCount = len(newDeviceTree[deviceTypeEnclave]) n.provisionDevCount = len(newDeviceTree[deviceTypeProvision]) + n.dcapInfraResCnt = len(newDeviceTree) - n.enclaveDevCount - n.provisionDevCount n.scanDone <- true } @@ -95,6 +100,7 @@ func TestScan(t *testing.T) { requestedProvisionDevs uint expectedEnclaveDevs int expectedProvisionDevs int + requestDcapInfra bool }{ { name: "no device installed", @@ -131,6 +137,16 @@ func TestScan(t *testing.T) { requestedProvisionDevs: 20, expectedProvisionDevs: 20, }, + { + name: "all resources", + enclaveDevice: "sgx_enclave", + provisionDevice: "sgx_provision", + requestedEnclaveDevs: 1, + expectedEnclaveDevs: 1, + requestedProvisionDevs: 1, + expectedProvisionDevs: 1, + requestDcapInfra: true, + }, } for _, tc := range tcases { @@ -159,7 +175,7 @@ func TestScan(t *testing.T) { } } - plugin := newDevicePlugin(devfs, tc.requestedEnclaveDevs, tc.requestedProvisionDevs) + plugin := newDevicePlugin(devfs, tc.requestedEnclaveDevs, tc.requestedProvisionDevs, tc.requestDcapInfra) notifier := &mockNotifier{ scanDone: plugin.scanDone, @@ -175,6 +191,9 @@ func TestScan(t *testing.T) { if tc.expectedProvisionDevs != notifier.provisionDevCount { t.Errorf("Wrong number of discovered provision devices") } + if tc.requestDcapInfra && notifier.dcapInfraResCnt != dcapInfraResources { + t.Errorf("Wrong number of discovered DCAP infra resources: expected %d, got %d.", dcapInfraResources, notifier.dcapInfraResCnt) + } }) } } diff --git a/demo/sgx-dcap-infra/Dockerfile b/demo/sgx-dcap-infra/Dockerfile new file mode 100644 index 000000000..70a4d5099 --- /dev/null +++ b/demo/sgx-dcap-infra/Dockerfile @@ -0,0 +1,22 @@ +FROM ubuntu:24.04 + +# TODO: pin DCAP release version + +RUN apt update && apt install -y curl gnupg \ + && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-sgx.gpg] https://download.01.org/intel-sgx/sgx_repo/ubuntu noble main" | \ + tee -a /etc/apt/sources.list.d/intel-sgx.list \ + && curl -s https://download.01.org/intel-sgx/sgx_repo/ubuntu/intel-sgx-deb.key | \ + gpg --dearmor --output /usr/share/keyrings/intel-sgx.gpg \ + && apt update \ + && apt install -y --no-install-recommends \ + tdx-qgs \ + sgx-pck-id-retrieval-tool \ + libsgx-ra-uefi \ + libsgx-dcap-default-qpl + +# BUG: "qgs -p=0" gets overriden by the config file making the parameter useless +RUN sed -e 's/\(^port =\).*/\1 0/g' -i /etc/qgs.conf + +COPY dcap-registration-flow /usr/bin + +ENTRYPOINT ["/opt/intel/tdx-qgs/qgs", "--no-daemon", "-p=0"] diff --git a/demo/sgx-dcap-infra/dcap-registration-flow b/demo/sgx-dcap-infra/dcap-registration-flow new file mode 100755 index 000000000..b49531379 --- /dev/null +++ b/demo/sgx-dcap-infra/dcap-registration-flow @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -u + +# TODO remove before merging +sleep infinity + +if [ ! -x "${PWD}"/PCKIDRetrievalTool ]; then + echo "dcap-registration-flow: PCKIDRetrievalTool must be in the workingDir and executable" + exit 1 +fi + +echo "Waiting for the PCCS to be ready ..." + +if ! curl --retry 20 --retry-delay 30 -k https://pccs-service:8042/sgx/certification/v4/rootcacrl &> /dev/null; then + echo "ERROR: PCCS pod didn't become ready after 20 minutes" + exit 1 +fi + +echo "PCCS is online, proceeding ..." + +ARGS="-user_token ${USER_TOKEN} -url ${PCCS_URL} -use_secure_cert ${SECURE_CERT}" + +echo "Calling PCKIDRetrievalTool ${ARGS} ..." + +./PCKIDRetrievalTool ${ARGS} + +sleep infinity diff --git a/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml b/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml index a50a2354b..565388c28 100644 --- a/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml +++ b/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml @@ -55,6 +55,10 @@ spec: spec: description: SgxDevicePluginSpec defines the desired state of SgxDevicePlugin. properties: + dcapInfraResources: + description: DcapInfraResources adds two special resources for DCAP + infra DaemonSet Pods. + type: boolean enclaveLimit: description: EnclaveLimit is a number of containers that can share the same SGX enclave device. diff --git a/deployments/sgx_dcap/base/intel-sgx-dcap.yaml b/deployments/sgx_dcap/base/intel-sgx-dcap.yaml new file mode 100644 index 000000000..23bc8020e --- /dev/null +++ b/deployments/sgx_dcap/base/intel-sgx-dcap.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-sgx-dcap-infra + labels: + app: intel-sgx-dcap-infra +spec: + selector: + matchLabels: + app: intel-sgx-dcap-infra + template: + metadata: + labels: + app: intel-sgx-dcap-infra + spec: + automountServiceAccountToken: false + initContainers: + - name: platform-registration + image: intel/sgx-dcap-infra:devel + restartPolicy: Always + workingDir: "/opt/intel/sgx-pck-id-retrieval-tool/" + command: ['/usr/bin/dcap-registration-flow'] + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + resources: + limits: + sgx.intel.com/registration: 1 + containers: + - name: tdxqgs + image: intel/sgx-dcap-infra:devel + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + resources: + limits: + sgx.intel.com/tdqe: 1 + imagePullPolicy: IfNotPresent + volumeMounts: + - name: qgs-socket + mountPath: /var/run/tdx-qgs + volumes: + - name: qgs-socket + hostPath: + path: /var/run/tdx-qgs + type: DirectoryOrCreate diff --git a/deployments/sgx_dcap/base/kustomization.yaml b/deployments/sgx_dcap/base/kustomization.yaml new file mode 100644 index 000000000..2b3c9088f --- /dev/null +++ b/deployments/sgx_dcap/base/kustomization.yaml @@ -0,0 +1,4 @@ +resources: +- intel-sgx-dcap.yaml +generatorOptions: + disableNameSuffixHash: true diff --git a/deployments/sgx_dcap/kustomization.yaml b/deployments/sgx_dcap/kustomization.yaml new file mode 100644 index 000000000..197c7292e --- /dev/null +++ b/deployments/sgx_dcap/kustomization.yaml @@ -0,0 +1,2 @@ +resources: + - base diff --git a/deployments/sgx_plugin/base/intel-sgx-plugin.yaml b/deployments/sgx_plugin/base/intel-sgx-plugin.yaml index 356650554..597bca7b0 100644 --- a/deployments/sgx_plugin/base/intel-sgx-plugin.yaml +++ b/deployments/sgx_plugin/base/intel-sgx-plugin.yaml @@ -49,6 +49,8 @@ spec: - name: sgx-provision mountPath: /dev/sgx_provision readOnly: true + - name: cdipath + mountPath: /var/run/cdi volumes: - name: kubeletsockets hostPath: @@ -61,5 +63,9 @@ spec: hostPath: path: /dev/sgx_provision type: CharDevice + - name: cdipath + hostPath: + path: /var/run/cdi + type: DirectoryOrCreate nodeSelector: kubernetes.io/arch: amd64 diff --git a/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go b/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go index 1b2bfb6be..6bc7937d5 100644 --- a/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go +++ b/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go @@ -46,6 +46,9 @@ type SgxDevicePluginSpec struct { // +kubebuilder:validation:Minimum=1 ProvisionLimit int `json:"provisionLimit,omitempty"` + // DcapInfraResources adds two special resources for DCAP infra DaemonSet Pods. + DcapInfraResources bool `json:"dcapInfraResources,omitempty"` + // LogLevel sets the plugin's log level. // +kubebuilder:validation:Minimum=0 LogLevel int `json:"logLevel,omitempty"` diff --git a/pkg/controllers/sgx/controller.go b/pkg/controllers/sgx/controller.go index d92395870..685876c84 100644 --- a/pkg/controllers/sgx/controller.go +++ b/pkg/controllers/sgx/controller.go @@ -247,5 +247,9 @@ func getPodArgs(sdp *devicepluginv1.SgxDevicePlugin) []string { args = append(args, "-provision-limit", "1") } + if sdp.Spec.DcapInfraResources { + args = append(args, "-dcap-infra-resources") + } + return args } diff --git a/pkg/controllers/sgx/controller_test.go b/pkg/controllers/sgx/controller_test.go index 78c9f4d0f..8c48c7701 100644 --- a/pkg/controllers/sgx/controller_test.go +++ b/pkg/controllers/sgx/controller_test.go @@ -39,6 +39,7 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet yes := true no := false + directoryOrCreate := v1.HostPathDirectoryOrCreate charDevice := v1.HostPathCharDev maxUnavailable := intstr.FromInt(1) maxSurge := intstr.FromInt(0) @@ -116,6 +117,10 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet MountPath: "/dev/sgx_provision", ReadOnly: true, }, + { + Name: "cdipath", + MountPath: "/var/run/cdi", + }, }, }, }, @@ -147,6 +152,15 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet }, }, }, + { + Name: "cdipath", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/run/cdi", + Type: &directoryOrCreate, + }, + }, + }, }, }, },