Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sriov dp admission controller to dpu operator #172

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Dockerfile.networkResourcesInjector.rhel
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM registry.ci.openshift.org/ocp/builder:rhel-9-golang-1.23-openshift-4.19 AS builder
ARG TARGETOS
ARG TARGETARCH
WORKDIR /workspace
COPY . .
RUN GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} make build-network-resources-injector

FROM registry.ci.openshift.org/ocp/4.19:base-rhel9
ARG TARGETARCH
WORKDIR /

COPY --from=builder /workspace/bin/nri.${TARGETARCH} /webhook
29 changes: 25 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,13 @@ DAEMON_BIN = bin/daemon
DPU_CNI_BIN = bin/dpu-cni
IPU_PLUGIN_BIN = bin/ipuplugin
VSP_BIN = bin/vsp-mrvl
NRI_BIN = bin/nri

GOARCH ?= amd64
GOOS ?= linux

.PHONY: build
build: manifests generate fmt vet build-manager build-daemon build-intel-vsp build-marvell-vsp
build: manifests generate fmt vet build-manager build-daemon build-intel-vsp build-marvell-vsp build-network-resources-injector
@echo "Built all components"

.PHONY: build-manager
Expand All @@ -243,6 +244,10 @@ build-intel-vsp:
build-marvell-vsp:
CGO_ENABLED=0 GOOS=${GOOS} GOARCH=${GOARCH} go build -o $(VSP_BIN).${GOARCH} internal/daemon/vendor-specific-plugins/marvell/main.go

.PHONY: build-network-resources-injector
build-network-resources-injector:
CGO_ENABLED=0 GOOS=${GOOS} GOARCH=${GOARCH} go build -o ${NRI_BIN}.${GOARCH} cmd/nri/networkresourcesinjector.go

# If you wish built the manager image targeting other platforms you can use the --platform flag.
# (i.e. docker build --platform linux/arm64 ). However, you must enable docker buildKit for it.
# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
Expand All @@ -262,6 +267,7 @@ DPU_OPERATOR_IMAGE := $(REGISTRY):5000/dpu-operator:dev
DPU_DAEMON_IMAGE := $(REGISTRY):5000/dpu-daemon:dev
MARVELL_VSP_IMAGE := $(REGISTRY):5000/mrvl-vsp:dev
INTEL_VSP_IMAGE := $(REGISTRY):5000/intel-vsp:dev
NETWORK_RESOURCES_INJECTOR_IMAGE:= $(REGISTRY):5000/network-resources-injector-image:dev

.PHONY: local-deploy-prep
prep-local-deploy: tools
Expand Down Expand Up @@ -296,6 +302,7 @@ local-build: ## Build all container images necessary to run the whole operator
$(CONTAINER_TOOL) build -v $(GO_CONTAINER_CACHE):/go:z -f Dockerfile.daemon.rhel -t $(DPU_DAEMON_IMAGE)
$(CONTAINER_TOOL) build -v $(GO_CONTAINER_CACHE):/go:z -f Dockerfile.mrvlVSP.rhel -t $(MARVELL_VSP_IMAGE)
$(CONTAINER_TOOL) build -v $(GO_CONTAINER_CACHE):/go:z -f Dockerfile.IntelVSP.rhel -t $(INTEL_VSP_IMAGE)
$(CONTAINER_TOOL) build -v $(GO_CONTAINER_CACHE):/go:z -f Dockerfile.networkResourcesInjector.rhel -t $(NETWORK_RESOURCES_INJECTOR_IMAGE)

.PHONE: prepare-multi-arch
prepare-multi-arch:
Expand All @@ -308,7 +315,7 @@ go-cache: ## Build all container images necessary to run the whole operator

## Build all container images necessary to run the whole operator
.PHONY: local-buildx
local-buildx: prepare-multi-arch go-cache local-buildx-manager local-buildx-daemon local-buildx-marvell-vsp local-buildx-intel-vsp
local-buildx: prepare-multi-arch go-cache local-buildx-manager local-buildx-daemon local-buildx-marvell-vsp local-buildx-intel-vsp local-buildx-network-resources-injector
@echo "local-buildx completed"

define build_image
Expand All @@ -333,6 +340,10 @@ local-buildx-marvell-vsp: prepare-multi-arch go-cache
local-buildx-intel-vsp: prepare-multi-arch go-cache
$(call build_image,INTEL_VSP_IMAGE,Dockerfile.IntelVSP.rhel)

.PHONY: local-buildx-network-resources-injector
local-buildx-network-resources-injector: prepare-multi-arch go-cache
$(call build_image,NETWORK_RESOURCES_INJECTOR_IMAGE,Dockerfile.networkResourcesInjector.rhel)

TMP_FILE=/tmp/dpu-operator-incremental-build
define build_image_incremental
bin/incremental -dockerfile $(2) -base-uri $($(1))-base -output-file $(TMP_FILE)
Expand Down Expand Up @@ -367,8 +378,14 @@ local-buildx-incremental-intel-vsp: prepare-multi-arch go-cache
GOARCH=amd64 $(MAKE) build-intel-vsp
$(call build_image_incremental,INTEL_VSP_IMAGE,Dockerfile.IntelVSP.rhel)

.PHONY: local-buildx-incremental-network-resources-injector
local-buildx-incremental-network-resources-injector: prepare-multi-arch go-cache
GOARCH=arm64 $(MAKE) build-network-resources-injector
GOARCH=amd64 $(MAKE) build-network-resources-injector
$(call build_image_incremental,NETWORK_RESOURCES_INJECTOR_IMAGE,Dockerfile.networkResourcesInjector.rhel)

.PHONY: incremental-local-buildx
incremental-local-buildx: prepare-multi-arch go-cache incremental-prep-local-deploy local-buildx-incremental-manager local-buildx-incremental-daemon local-buildx-incremental-marvell-vsp local-buildx-incremental-intel-vsp
incremental-local-buildx: prepare-multi-arch go-cache incremental-prep-local-deploy local-buildx-incremental-manager local-buildx-incremental-daemon local-buildx-incremental-marvell-vsp local-buildx-incremental-intel-vsp local-buildx-incremental-network-resources-injector
@echo "local-buildx-incremental completed"

.PHONY: local-pushx-incremental
Expand All @@ -377,24 +394,28 @@ local-pushx-incremental: ## Push all container images necessary to run the whole
buildah manifest push --all $(DPU_DAEMON_IMAGE)-manifest docker://$(DPU_DAEMON_IMAGE)
buildah manifest push --all $(MARVELL_VSP_IMAGE)-manifest docker://$(MARVELL_VSP_IMAGE)
buildah manifest push --all $(INTEL_VSP_IMAGE)-manifest docker://$(INTEL_VSP_IMAGE)
buildah manifest push --all $(NETWORK_RESOURCES_INJECTOR_IMAGE)-manifest docker://$(INTEL_VSP_IMAGE)

.PHONY: local-pushx
local-pushx: ## Push all container images necessary to run the whole operator
buildah manifest push --all $(DPU_OPERATOR_IMAGE)-manifest docker://$(DPU_OPERATOR_IMAGE)
buildah manifest push --all $(DPU_DAEMON_IMAGE)-manifest docker://$(DPU_DAEMON_IMAGE)
buildah manifest push --all $(MARVELL_VSP_IMAGE)-manifest docker://$(MARVELL_VSP_IMAGE)
buildah manifest push --all $(INTEL_VSP_IMAGE)-manifest docker://$(INTEL_VSP_IMAGE)
buildah manifest push --all $(NETWORK_RESOURCES_INJECTOR_IMAGE)-manifest docker://$(NETWORK_RESOURCES_INJECTOR_IMAGE)
buildah manifest push --all $(DPU_OPERATOR_IMAGE)-manifest docker://$(DPU_OPERATOR_IMAGE)-base
buildah manifest push --all $(DPU_DAEMON_IMAGE)-manifest docker://$(DPU_DAEMON_IMAGE)-base
buildah manifest push --all $(MARVELL_VSP_IMAGE)-manifest docker://$(MARVELL_VSP_IMAGE)-base
buildah manifest push --all $(INTEL_VSP_IMAGE)-manifest docker://$(INTEL_VSP_IMAGE)-base

buildah manifest push --all $(NETWORK_RESOURCES_INJECTOR_IMAGE)-manifest docker://$(NETWORK_RESOURCES_INJECTOR_IMAGE)-base

.PHONY: local-push
local-push: ## Push all container images necessary to run the whole operator
$(CONTAINER_TOOL) push $(DPU_OPERATOR_IMAGE)
$(CONTAINER_TOOL) push $(DPU_DAEMON_IMAGE)
$(CONTAINER_TOOL) push $(MARVELL_VSP_IMAGE)
$(CONTAINER_TOOL) push $(INTEL_VSP_IMAGE)
$(CONTAINER_TOOL) push $(NETWORK_RESOURCES_INJECTOR_IMAGE)
# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
# - able to use docker buildx . More info: https://docs.docker.com/build/buildx/
Expand Down
2 changes: 2 additions & 0 deletions bundle/manifests/dpu-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ spec:
env:
- name: DPU_DAEMON_IMAGE
value: quay.io/openshift/origin-dpu-daemon:4.19
- name: NETWORK_RESOURCES_INJECTOR_IMAGE
value: quay.io/openshift/sriov-dp-admission-controller:latest
image: quay.io/openshift/origin-dpu-operator:4.19
livenessProbe:
httpGet:
Expand Down
8 changes: 7 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,13 @@ func main() {
os.Exit(1)
}

b := controller.NewDpuOperatorConfigReconciler(mgr.GetClient(), mgr.GetScheme(), dpuDaemonImage, vspImages, vspExtraData)
networkResourcesInjectorImage := os.Getenv("NETWORK_RESOURCES_INJECTOR_IMAGE")
if networkResourcesInjectorImage == "" {
setupLog.Error(err, "Failed to set NETWORK_RESOURCES_INJECTOR_IMAGE env var")
os.Exit(1)
}

b := controller.NewDpuOperatorConfigReconciler(mgr.GetClient(), mgr.GetScheme(), dpuDaemonImage, vspImages, vspExtraData, networkResourcesInjectorImage)

if value, ok := os.LookupEnv("IMAGE_PULL_POLICIES"); ok {
b = b.WithImagePullPolicy(value)
Expand Down
253 changes: 253 additions & 0 deletions cmd/nri/networkresourcesinjector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
// Copyright (c) 2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"context"
"crypto/tls"
"flag"
"fmt"
"net/http"
"os"
"time"

"github.com/fsnotify/fsnotify"
"github.com/golang/glog"

"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/k8snetworkplumbingwg/network-resources-injector/pkg/controlswitches"
netcache "github.com/k8snetworkplumbingwg/network-resources-injector/pkg/tools"
"github.com/k8snetworkplumbingwg/network-resources-injector/pkg/userdefinedinjections"
"github.com/k8snetworkplumbingwg/network-resources-injector/pkg/webhook"
)

const (
defaultClientCa = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
controlSwitchesConfigMap = "nri-control-switches"
)

func main() {
var namespace string
var clientCAPaths webhook.ClientCAFlags

/* load configuration */
port := flag.Int("port", 8443, "The port on which to serve.")
address := flag.String("bind-address", "0.0.0.0", "The IP address on which to listen for the --port port.")
cert := flag.String("tls-cert-file", "cert.pem", "File containing the default x509 Certificate for HTTPS.")
key := flag.String("tls-private-key-file", "key.pem", "File containing the default x509 private key matching --tls-cert-file.")
insecure := flag.Bool("insecure", false, "Disable adding client CA to server TLS endpoint --insecure")
flag.Var(&clientCAPaths, "client-ca", "File containing client CA. This flag is repeatable if more than one client CA needs to be added to server")
healthCheckPort := flag.Int("health-check-port", 8444, "The port to use for health check monitoring")
enableHTTP2 := flag.Bool("enable-http2", false, "If HTTP/2 should be enabled for the webhook server.")

// do initialization of control switches flags
controlSwitches := controlswitches.SetupControlSwitchesFlags()

// at the end when all flags are declared parse it
flag.Parse()

// initialize all control switches structures
controlSwitches.InitControlSwitches()
glog.Infof("controlSwitches: %+v", *controlSwitches)

if !isValidPort(*port) {
glog.Fatalf("invalid port number. Choose between 1024 and 65535")
}

if !controlSwitches.IsResourcesNameEnabled() {
glog.Fatalf("Input argument for resourceName cannot be empty.")
}

if *address == "" || *cert == "" || *key == "" {
glog.Fatalf("input argument(s) not defined correctly")
}

if len(clientCAPaths) == 0 {
clientCAPaths = append(clientCAPaths, defaultClientCa)
}

if namespace = os.Getenv("NAMESPACE"); namespace == "" {
namespace = "kube-system"
}

if !isValidPort(*healthCheckPort) {
glog.Fatalf("Invalid health check port number. Choose between 1024 and 65535")
} else if *healthCheckPort == *port {
glog.Fatalf("Health check port should be different from port")
} else {
go func() {
addr := fmt.Sprintf("%s:%d", *address, *healthCheckPort)
mux := http.NewServeMux()

mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
})
err := http.ListenAndServe(addr, mux)
if err != nil {
glog.Fatalf("error starting health check server: %v", err)
}
}()
}

glog.Infof("starting mutating admission controller for network resources injection")

keyPair, err := webhook.NewTlsKeypairReloader(*cert, *key)
if err != nil {
glog.Fatalf("error load certificate: %s", err.Error())
}

clientCaPool, err := webhook.NewClientCertPool(&clientCAPaths, *insecure)
if err != nil {
glog.Fatalf("error loading client CA pool: '%s'", err.Error())
}

/* init API client */
clientset := webhook.SetupInClusterClient()

// initialize webhook with controlSwitches
webhook.SetControlSwitches(controlSwitches)

//initialize webhook with cache
netAnnotationCache := netcache.Create()
netAnnotationCache.Start()
webhook.SetNetAttachDefCache(netAnnotationCache)

userInjections := userdefinedinjections.CreateUserInjectionsStructure()
webhook.SetUserInjectionStructure(userInjections)

go func() {
/* register handlers */
var httpServer *http.Server

http.HandleFunc("/mutate", func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/mutate" {
http.NotFound(w, r)
return
}
if r.Method != http.MethodPost {
http.Error(w, "Invalid HTTP verb requested", 405)
return
}
webhook.MutateHandler(w, r)
})

/* start serving */
httpServer = &http.Server{
Addr: fmt.Sprintf("%s:%d", *address, *port),
ReadTimeout: 5 * time.Second,
WriteTimeout: 10 * time.Second,
MaxHeaderBytes: 1 << 20,
ReadHeaderTimeout: 1 * time.Second,
TLSConfig: &tls.Config{
ClientAuth: webhook.GetClientAuth(*insecure),
MinVersion: tls.VersionTLS12,
CurvePreferences: []tls.CurveID{tls.CurveP521, tls.CurveP384},
ClientCAs: clientCaPool.GetCertPool(),
PreferServerCipherSuites: true,
InsecureSkipVerify: false,
CipherSuites: []uint16{
// tls 1.2
tls.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
// tls 1.3 configuration not supported
},
GetCertificate: keyPair.GetCertificateFunc(),
},
// CVE-2023-39325 https://github.com/golang/go/issues/63417
TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
}

if *enableHTTP2 {
httpServer.TLSNextProto = nil
}

err := httpServer.ListenAndServeTLS("", "")
if err != nil {
glog.Fatalf("error starting web server: %v", err)
}
}()

/* watch the cert file and restart http sever if the file updated. */
watcher, err := fsnotify.NewWatcher()
if err != nil {
glog.Fatalf("error starting fsnotify watcher: %v", err)
}
defer watcher.Close()

certUpdated := false
keyUpdated := false

for {
watcher.Add(*cert)
watcher.Add(*key)

select {
case event, ok := <-watcher.Events:
if !ok {
continue
}
glog.V(2).Infof("watcher event: %v", event)
mask := fsnotify.Create | fsnotify.Rename | fsnotify.Remove |
fsnotify.Write | fsnotify.Chmod
if (event.Op & mask) != 0 {
glog.V(2).Infof("modified file: %v", event.Name)
if event.Name == *cert {
certUpdated = true
}
if event.Name == *key {
keyUpdated = true
}
if keyUpdated && certUpdated {
if err := keyPair.Reload(); err != nil {
glog.Fatalf("Failed to reload certificate: %v", err)
}
certUpdated = false
keyUpdated = false
}
}
case err, ok := <-watcher.Errors:
if !ok {
continue
}
glog.Infof("watcher error: %v", err)
case <-time.After(30 * time.Second):
cm, err := clientset.CoreV1().ConfigMaps(namespace).Get(
context.Background(), controlSwitchesConfigMap, metav1.GetOptions{})
// only in case of API errors report an error and do not restore default values
if err != nil && !errors.IsNotFound(err) {
glog.Warningf("Error getting control switches configmap %s", err.Error())
continue
}

// to be called each time when map is present or not (in that case to restore default values)
controlSwitches.ProcessControlSwitchesConfigMap(cm)
userInjections.SetUserDefinedInjections(cm)
}
}

// TODO: find a way to stop cache, should we run the above block in a go routine and make main module
// to respond to terminate singal ?
}

func isValidPort(port int) bool {
if port < 1024 || port > 65535 {
return false
}
return true
}
Loading