diff --git a/charts/dataplane/values.yaml b/charts/dataplane/values.yaml index 70abc051..b8534bc6 100644 --- a/charts/dataplane/values.yaml +++ b/charts/dataplane/values.yaml @@ -1408,7 +1408,18 @@ serving: # -- Enables scraping of metrics from the serving component metrics: true # -- Additional configuration for Knative serving - extraConfig: {} + extraConfig: + + # -- Knative serving deployment configuration. + # Ref: https://knative.dev/docs/serving/configuration/deployment/#configure-deployment-resources + deployment: {} + + # -- Knative Serving attempts to resolve image tags to digests within the Knative controller. + # This requires the Knative serving controller to have permissions to access the registry and resolve the tags. + # This configuration option allows you to disable this behavior and use the image tags directly without resolving them to digests. + # Example: + # registries-skipping-tag-resolving: "ghcr.io" + # -- Resources for serving components resources: 3scale-kourier-gateway: diff --git a/tests/generated/dataplane.aws.yaml b/tests/generated/dataplane.aws.yaml index 6a6c0832..b3dde56a 100644 --- a/tests/generated/dataplane.aws.yaml +++ b/tests/generated/dataplane.aws.yaml @@ -674,10 +674,10 @@ data: enabled: true enableTunnelService: true tunnel: - enableDirectToAppIngress: false + enableDirectToAppIngress: true deploymentToRestart: union-operator-proxy apps: - enabled: 'false' + enabled: 'true' syncClusterConfig: enabled: false clusterId: @@ -884,6 +884,133 @@ data: max_size_mbs: 0 target_gc_percent: 70 --- +# Source: dataplane/templates/serving/bootstrap-configmap.yaml +# We need to copy the default configmap here since knative-operator does not automatically update the +# address of the `net-kourier-controller` endpoint to use the release namespace. It assumes that the +# resources are being installed into the `knative-serving` namespace instead. +apiVersion: v1 +kind: ConfigMap +metadata: + name: union-operator-serving-envoy-bootstrap + namespace: union +data: + envoy-bootstrap.yaml: | + dynamic_resources: + ads_config: + transport_api_version: V3 + api_type: GRPC + rate_limit_settings: {} + grpc_services: + - envoy_grpc: {cluster_name: xds_cluster} + cds_config: + resource_api_version: V3 + ads: {} + lds_config: + resource_api_version: V3 + ads: {} + node: + cluster: kourier-knative + id: 3scale-kourier-gateway + static_resources: + listeners: + - name: stats_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 9000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: stats_server + http_filters: + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + route_config: + virtual_hosts: + - name: admin_interface + domains: + - "*" + routes: + - match: + safe_regex: + regex: '/(certs|stats(/prometheus)?|server_info|clusters|listeners|ready)?' + headers: + - name: ':method' + string_match: + exact: GET + route: + cluster: service_stats + - match: + safe_regex: + regex: '/drain_listeners' + headers: + - name: ':method' + string_match: + exact: POST + route: + cluster: service_stats + clusters: + - name: service_stats + connect_timeout: 0.250s + type: static + load_assignment: + cluster_name: service_stats + endpoints: + lb_endpoints: + endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 9901 + - name: xds_cluster + # This keepalive is recommended by envoy docs. + # https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: + connection_keepalive: + interval: 30s + timeout: 5s + connect_timeout: 1s + load_assignment: + cluster_name: xds_cluster + endpoints: + lb_endpoints: + endpoint: + address: + socket_address: + address: "net-kourier-controller" + port_value: 18000 + type: STRICT_DNS + admin: + access_log: + - name: envoy.access_loggers.stdout + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog + address: + socket_address: + address: 127.0.0.1 + port_value: 9901 + stats_config: + stats_tags: + - tag_name: name + regex: '^.*?\.u/.*?n=(.*?)/.*?u\..*$' + - tag_name: domain + regex: '^.*?\.u/.*?d=(.*?)/.*?u\..*$' + - tag_name: org + regex: '^.*?\.u/.*?o=(.*?)/.*?u\..*$' + - tag_name: project + regex: '^.*?\.u/.*?p=(.*?)/.*?u\..*$' + - tag_name: target + regex: '^.*?\.u/.*?t=(.*?)/.*?u\..*$' + - tag_name: target_namespace + regex: '^.*?\.u/.*?tns=(.*?)/.*?u\..*$' +--- # Source: dataplane/charts/fluentbit/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1392,6 +1519,19 @@ rules: - /metrics verbs: - get + - apiGroups: + - serving.knative.dev + resources: + - revisions + - configurations + - services + verbs: + - get + - list + - watch + - create + - update + - delete --- # Source: dataplane/templates/propeller/serviceaccount-webhook.yaml kind: ClusterRole @@ -3088,7 +3228,7 @@ spec: template: metadata: annotations: - configChecksum: "406af0eee1ce6710a187811a571d9736c443e28228db13126bc41e3a57e6495" + configChecksum: "ba7b0e5654618bd1b499c974af10c6db179bccde05e71a0a6d40d564519c57f" labels: @@ -3225,7 +3365,7 @@ spec: template: metadata: annotations: - configChecksum: "406af0eee1ce6710a187811a571d9736c443e28228db13126bc41e3a57e6495" + configChecksum: "ba7b0e5654618bd1b499c974af10c6db179bccde05e71a0a6d40d564519c57f" labels: @@ -3685,6 +3825,175 @@ webhooks: # See the License for the specific language governing permissions and # limitations under the License. --- +# Source: dataplane/templates/serving/knative-serving.yaml +apiVersion: operator.knative.dev/v1beta1 +kind: KnativeServing +metadata: + name: union-operator-serving + namespace: union +spec: + config: + deployment: + progress-deadline: "30m" + queue-sidecar-cpu-request: "25m" + queue-sidecar-cpu-limit: "1000m" + queue-sidecar-memory-request: "400Mi" + queue-sidecar-memory-limit: "800Mi" + queue-sidecar-ephemeral-storage-request: "512Mi" + queue-sidecar-ephemeral-storage-limit: "1024Mi" + registries-skipping-tag-resolving: test.registry.com + features: + kubernetes.podspec-affinity: "enabled" + kubernetes.podspec-nodeselector: "enabled" + kubernetes.podspec-tolerations: "enabled" + kubernetes.podspec-fieldref: "enabled" + network: + ingress-class: "kourier.ingress.networking.knative.dev" + high-availability: + replicas: 2 + ingress: + kourier: + enabled: true + bootstrap-configmap: "union-operator-serving-envoy-bootstrap" + service-type: ClusterIP + podDisruptionBudgets: + - name: 3scale-kourier-gateway-pdb + minAvailable: 50% + - name: activator-pdb + minAvailable: 50% + - name: webhook-pdb + minAvailable: 50% + registry: + override: + # TODO(jeev): Wire up Union fork of Envoy + 3scale-kourier-gateway/kourier-gateway: ghcr.io/unionai/envoy:456fed84d4ad9a9dfb186d117d9362e9dc0f7c1f + # TODO(jeev): Wire up Union fork of Kourier + net-kourier-controller/controller: ghcr.io/unionai/kourier@sha256:5804c348d15b3959604e3e3ceed216c3a1c7b32cbe254c7d3eb02a35e62ba9c4 + workloads: + - name: 3scale-kourier-gateway + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - 3scale-kourier-gateway + topologyKey: topology.kubernetes.io/zone + annotations: + checksum/bootstrap-config: 6469f6f592eebc9e0c676d8ccc359a05bc799c0988e474b2da942c4cc328f656 + env: + - container: kourier-gateway + envVars: + - name: UNION_AUTHZ_TENANTAUTHURL + value: "https://test-controlplane-host/me" + - name: UNION_AUTHZ_TENANTAUTHSIGNINURL + value: "https://test-controlplane-host/login" + - name: UNION_AUTHZ_TENANTCONTROLPLANEURL + value: "https://test-controlplane-host" + resources: + - container: kourier-gateway + limits: + cpu: "2" + memory: 2Gi + requests: + cpu: "1" + memory: 1Gi + - name: activator + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - activator + topologyKey: topology.kubernetes.io/zone + - name: autoscaler + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - autoscaler + topologyKey: topology.kubernetes.io/zone + - name: autoscaler-hpa + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - autoscaler-hpa + topologyKey: topology.kubernetes.io/zone + - name: controller + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - controller + topologyKey: topology.kubernetes.io/zone + - name: net-kourier-controller + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - net-kourier-controller + topologyKey: topology.kubernetes.io/zone + env: + - container: controller + envVars: + - name: KOURIER_UNION_AUTHZ_ENABLED + value: "true" + resources: + - container: controller + limits: + cpu: "1" + memory: 1Gi + requests: + cpu: 500m + memory: 500Mi + - name: webhook + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - webhook + topologyKey: topology.kubernetes.io/zone +--- # Source: dataplane/charts/prometheus/templates/prometheus/prometheus.yaml apiVersion: monitoring.coreos.com/v1 kind: Prometheus diff --git a/tests/generated/dataplane.azure.yaml b/tests/generated/dataplane.azure.yaml index 58c451c2..5cf76215 100644 --- a/tests/generated/dataplane.azure.yaml +++ b/tests/generated/dataplane.azure.yaml @@ -581,10 +581,10 @@ data: enabled: true enableTunnelService: true tunnel: - enableDirectToAppIngress: false + enableDirectToAppIngress: true deploymentToRestart: union-operator-proxy apps: - enabled: 'false' + enabled: 'true' syncClusterConfig: enabled: false clusterId: @@ -858,6 +858,133 @@ data: max_size_mbs: 0 target_gc_percent: 70 --- +# Source: dataplane/templates/serving/bootstrap-configmap.yaml +# We need to copy the default configmap here since knative-operator does not automatically update the +# address of the `net-kourier-controller` endpoint to use the release namespace. It assumes that the +# resources are being installed into the `knative-serving` namespace instead. +apiVersion: v1 +kind: ConfigMap +metadata: + name: union-operator-serving-envoy-bootstrap + namespace: union +data: + envoy-bootstrap.yaml: | + dynamic_resources: + ads_config: + transport_api_version: V3 + api_type: GRPC + rate_limit_settings: {} + grpc_services: + - envoy_grpc: {cluster_name: xds_cluster} + cds_config: + resource_api_version: V3 + ads: {} + lds_config: + resource_api_version: V3 + ads: {} + node: + cluster: kourier-knative + id: 3scale-kourier-gateway + static_resources: + listeners: + - name: stats_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 9000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: stats_server + http_filters: + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + route_config: + virtual_hosts: + - name: admin_interface + domains: + - "*" + routes: + - match: + safe_regex: + regex: '/(certs|stats(/prometheus)?|server_info|clusters|listeners|ready)?' + headers: + - name: ':method' + string_match: + exact: GET + route: + cluster: service_stats + - match: + safe_regex: + regex: '/drain_listeners' + headers: + - name: ':method' + string_match: + exact: POST + route: + cluster: service_stats + clusters: + - name: service_stats + connect_timeout: 0.250s + type: static + load_assignment: + cluster_name: service_stats + endpoints: + lb_endpoints: + endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 9901 + - name: xds_cluster + # This keepalive is recommended by envoy docs. + # https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: + connection_keepalive: + interval: 30s + timeout: 5s + connect_timeout: 1s + load_assignment: + cluster_name: xds_cluster + endpoints: + lb_endpoints: + endpoint: + address: + socket_address: + address: "net-kourier-controller" + port_value: 18000 + type: STRICT_DNS + admin: + access_log: + - name: envoy.access_loggers.stdout + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog + address: + socket_address: + address: 127.0.0.1 + port_value: 9901 + stats_config: + stats_tags: + - tag_name: name + regex: '^.*?\.u/.*?n=(.*?)/.*?u\..*$' + - tag_name: domain + regex: '^.*?\.u/.*?d=(.*?)/.*?u\..*$' + - tag_name: org + regex: '^.*?\.u/.*?o=(.*?)/.*?u\..*$' + - tag_name: project + regex: '^.*?\.u/.*?p=(.*?)/.*?u\..*$' + - tag_name: target + regex: '^.*?\.u/.*?t=(.*?)/.*?u\..*$' + - tag_name: target_namespace + regex: '^.*?\.u/.*?tns=(.*?)/.*?u\..*$' +--- # Source: dataplane/charts/fluentbit/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -1366,6 +1493,19 @@ rules: - /metrics verbs: - get + - apiGroups: + - serving.knative.dev + resources: + - revisions + - configurations + - services + verbs: + - get + - list + - watch + - create + - update + - delete --- # Source: dataplane/templates/propeller/serviceaccount-webhook.yaml kind: ClusterRole @@ -2730,7 +2870,7 @@ spec: template: metadata: annotations: - configChecksum: "8330ed525c5e6956ed0d1ad63962c0903e8c25b423af2fcc7824542b832251f" + configChecksum: "8cde3b0e6675d39bd7864f21be7b188666282afe4fdeea2ed85f7909bd5307c" labels: @@ -2868,7 +3008,7 @@ spec: template: metadata: annotations: - configChecksum: "8330ed525c5e6956ed0d1ad63962c0903e8c25b423af2fcc7824542b832251f" + configChecksum: "8cde3b0e6675d39bd7864f21be7b188666282afe4fdeea2ed85f7909bd5307c" labels: @@ -3301,6 +3441,175 @@ webhooks: admissionReviewVersions: ["v1", "v1beta1"] sideEffects: None --- +# Source: dataplane/templates/serving/knative-serving.yaml +apiVersion: operator.knative.dev/v1beta1 +kind: KnativeServing +metadata: + name: union-operator-serving + namespace: union +spec: + config: + deployment: + progress-deadline: "30m" + queue-sidecar-cpu-request: "25m" + queue-sidecar-cpu-limit: "1000m" + queue-sidecar-memory-request: "400Mi" + queue-sidecar-memory-limit: "800Mi" + queue-sidecar-ephemeral-storage-request: "512Mi" + queue-sidecar-ephemeral-storage-limit: "1024Mi" + registries-skipping-tag-resolving: test.registry.com + features: + kubernetes.podspec-affinity: "enabled" + kubernetes.podspec-nodeselector: "enabled" + kubernetes.podspec-tolerations: "enabled" + kubernetes.podspec-fieldref: "enabled" + network: + ingress-class: "kourier.ingress.networking.knative.dev" + high-availability: + replicas: 2 + ingress: + kourier: + enabled: true + bootstrap-configmap: "union-operator-serving-envoy-bootstrap" + service-type: ClusterIP + podDisruptionBudgets: + - name: 3scale-kourier-gateway-pdb + minAvailable: 50% + - name: activator-pdb + minAvailable: 50% + - name: webhook-pdb + minAvailable: 50% + registry: + override: + # TODO(jeev): Wire up Union fork of Envoy + 3scale-kourier-gateway/kourier-gateway: ghcr.io/unionai/envoy:456fed84d4ad9a9dfb186d117d9362e9dc0f7c1f + # TODO(jeev): Wire up Union fork of Kourier + net-kourier-controller/controller: ghcr.io/unionai/kourier@sha256:5804c348d15b3959604e3e3ceed216c3a1c7b32cbe254c7d3eb02a35e62ba9c4 + workloads: + - name: 3scale-kourier-gateway + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - 3scale-kourier-gateway + topologyKey: topology.kubernetes.io/zone + annotations: + checksum/bootstrap-config: 6469f6f592eebc9e0c676d8ccc359a05bc799c0988e474b2da942c4cc328f656 + env: + - container: kourier-gateway + envVars: + - name: UNION_AUTHZ_TENANTAUTHURL + value: "https://test.dataplane.union.ai/me" + - name: UNION_AUTHZ_TENANTAUTHSIGNINURL + value: "https://test.dataplane.union.ai/login" + - name: UNION_AUTHZ_TENANTCONTROLPLANEURL + value: "https://test.dataplane.union.ai" + resources: + - container: kourier-gateway + limits: + cpu: "2" + memory: 2Gi + requests: + cpu: "1" + memory: 1Gi + - name: activator + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - activator + topologyKey: topology.kubernetes.io/zone + - name: autoscaler + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - autoscaler + topologyKey: topology.kubernetes.io/zone + - name: autoscaler-hpa + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - autoscaler-hpa + topologyKey: topology.kubernetes.io/zone + - name: controller + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - controller + topologyKey: topology.kubernetes.io/zone + - name: net-kourier-controller + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - net-kourier-controller + topologyKey: topology.kubernetes.io/zone + env: + - container: controller + envVars: + - name: KOURIER_UNION_AUTHZ_ENABLED + value: "true" + resources: + - container: controller + limits: + cpu: "1" + memory: 1Gi + requests: + cpu: 500m + memory: 500Mi + - name: webhook + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - webhook + topologyKey: topology.kubernetes.io/zone +--- # Source: dataplane/charts/prometheus/templates/prometheus/prometheus.yaml apiVersion: monitoring.coreos.com/v1 kind: Prometheus diff --git a/tests/values/dataplane.aws.yaml b/tests/values/dataplane.aws.yaml index 24bb0077..606fbbd2 100644 --- a/tests/values/dataplane.aws.yaml +++ b/tests/values/dataplane.aws.yaml @@ -222,4 +222,15 @@ dcgm-exporter: # -- It's common practice to taint accelerator nodes to ensure non accelerator workloads # # tolerations to ensure it only runs on GPU nodes. - # tolerations: [] \ No newline at end of file + # tolerations: [] + +# ---------------------------------------------------------------------------- +# SECTION 8: Serving +# ---------------------------------------------------------------------------- + +serving: + enabled: true + + extraConfig: + deployment: + registries-skipping-tag-resolving: "test.registry.com" \ No newline at end of file diff --git a/tests/values/dataplane.azure.yaml b/tests/values/dataplane.azure.yaml index 7155c01d..6808b1a9 100644 --- a/tests/values/dataplane.azure.yaml +++ b/tests/values/dataplane.azure.yaml @@ -203,3 +203,14 @@ config: non-interruptible-node-selector-requirement: key: kubernetes.azure.com/scalesetpriority operator: DoesNotExist + +# ---------------------------------------------------------------------------- +# SECTION 8: Serving +# ---------------------------------------------------------------------------- + +serving: + enabled: true + + extraConfig: + deployment: + registries-skipping-tag-resolving: "test.registry.com" \ No newline at end of file