diff --git a/cli b/cli index 1e42b26..860493d 100755 --- a/cli +++ b/cli @@ -147,11 +147,7 @@ terraform_apply() { create_cluster_ssh_tunnel() { # Function to check if the tunnel is already running is_tunnel_running() { - if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then - tasklist | grep -q "[s]sh.exe.*6443:localhost:6443.*${instance_ipv4}" - else - pgrep -f "ssh.*-L 6443:localhost:6443.*${instance_ipv4}" > /dev/null - fi + ps -ef | grep -E "ssh.*-L 6443:localhost:6443.*${instance_ipv4}" | grep -v grep > /dev/null } echo "Checking for existing SSH tunnel..." @@ -181,7 +177,7 @@ cmd_create() { create_cluster_ssh_tunnel # 4. Platform setup - #terraform_apply 02-platform + terraform_apply 02-platform # 5. Workload deployment #terraform_apply 03-workloads @@ -215,10 +211,7 @@ cmd_connect() { cmd_cleanup() { # kill tunnels - if pgrep -f "ssh.*-L 6443:localhost:6443.*" > /dev/null - then - pkill -f "ssh.*-L 6443:localhost:6443.*" - fi + ps -W | grep "ssh.*-L 6443:localhost:6443" | awk '{print $1}' | xargs -r kill -9 # terraform destroy terraform_destroy 00-vm diff --git a/terraform/02-platform/main.tf b/terraform/02-platform/main.tf index e056116..86fed18 100644 --- a/terraform/02-platform/main.tf +++ b/terraform/02-platform/main.tf @@ -59,32 +59,14 @@ module "traefik" { manifest = yamldecode(file("${path.module}/manifests/traefik.yaml")) } -module "rabbitmq" { +module "docker_registry" { + depends_on = [module.traefik] source = "./modules/helm-release" - manifest = yamldecode(file("${path.module}/manifests/rabbitmq.yaml")) + manifest = yamldecode(file("${path.module}/manifests/docker-registry.yaml")) } -module "loki" { +module "mssql" { + depends_on = [module.docker_registry] source = "./modules/helm-release" - manifest = yamldecode(file("${path.module}/manifests/loki.yaml")) -} - -module "grafana" { - source = "./modules/helm-release" - manifest = yamldecode(file("${path.module}/manifests/grafana.yaml")) -} - -module "tempo" { - source = "./modules/helm-release" - manifest = yamldecode(file("${path.module}/manifests/tempo.yaml")) -} - -module "mimir" { - source = "./modules/helm-release" - manifest = yamldecode(file("${path.module}/manifests/mimir.yaml")) -} - -module "alloy" { - source = "./modules/helm-release" - manifest = yamldecode(file("${path.module}/manifests/alloy.yaml")) + manifest = yamldecode(file("${path.module}/manifests/mssql.yaml")) } diff --git a/terraform/02-platform/manifests/alloy.yaml b/terraform/02-platform/manifests/alloy.yaml deleted file mode 100644 index 6d9597f..0000000 --- a/terraform/02-platform/manifests/alloy.yaml +++ /dev/null @@ -1,173 +0,0 @@ -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: alloy - namespace: alloy -spec: - repo: https://grafana.github.io/helm-charts - chart: alloy - targetNamespace: alloy - createNamespace: true - version: 0.12.6 - valuesContent: |- - # Simple Grafana Alloy values for local development with LGTM stack - - # Set up a simple deployment for local development - controller: - type: deployment - replicas: 1 - podAnnotations: - prometheus.io/scrape: "true" - prometheus.io/port: "12345" - - # Basic Alloy configuration - alloy: - # Use HTTP scheme for local development - listenAddr: 0.0.0.0 - listenPort: 12345 - - # OpenTelemetry collector config for receiving metrics, traces, and logs - configMap: - create: true - content: | - // Grafana Alloy Configuration for LGTM Stack - // This configuration demonstrates how to collect and ship metrics, logs, and traces - - // Local file discovery for logs - local.file_match "system_logs" { - path_targets = [{"__path__" = "/var/log/**/*.log"}] - } - - // Logs processing pipeline - loki.source.file "local_logs" { - targets = local.file_match.system_logs.targets - forward_to = [loki.write.local.receiver] - } - - // Loki write endpoint configuration - loki.write "local" { - endpoint { - url = "http://loki.loki:3100/loki/api/v1/push" - } - } - - // Prometheus node exporter metrics scraping - prometheus.scrape "node_exporter" { - targets = [ - { - "__address__" = "localhost:9100", - }, - ] - forward_to = [prometheus.remote_write.mimir.receiver] - } - - // Mimir remote write configuration - prometheus.remote_write "mimir" { - endpoint { - url = "http://mimir-gateway.mimir/api/v1/push" - } - } - - // OpenTelemetry collector for traces - otelcol.receiver.otlp "default" { - grpc { - endpoint = "0.0.0.0:4317" - } - - http { - endpoint = "0.0.0.0:4318" - } - - output { - metrics = [otelcol.processor.batch.default.input] - logs = [otelcol.processor.batch.default.input] - traces = [otelcol.processor.batch.default.input] - } - } - - // Batch processor to optimize throughput - otelcol.processor.batch "default" { - output { - metrics = [otelcol.exporter.prometheus.mimir.input] - logs = [otelcol.exporter.loki.local.input] - traces = [otelcol.exporter.otlphttp.tempo.input] - } - } - - // Prometheus exporter to convert OTLP metrics to Prometheus format - otelcol.exporter.prometheus "mimir" { - forward_to = [prometheus.remote_write.mimir.receiver] - } - - // Loki exporter for logs - otelcol.exporter.loki "local" { - forward_to = [loki.write.local.receiver] - } - - // Tempo trace exporter - otelcol.exporter.otlphttp "tempo" { - client { - endpoint = "http://tempo.tempo:4318" - } - } - - // Optional: Application metrics from a sample service - prometheus.scrape "example_app" { - targets = [ - { - "__address__" = "example-app:8080", - "job" = "example-service", - }, - ] - forward_to = [prometheus.remote_write.mimir.receiver] - } - - // Health check and debugging components - logging { - level = "info" - format = "logfmt" - } - - # Add extra ports for OTLP receivers - extraPorts: - - name: "otlp-grpc" - port: 4317 - targetPort: 4317 - protocol: "TCP" - - name: "otlp-http" - port: 4318 - targetPort: 4318 - protocol: "TCP" - - # Enable service for accessing Alloy from your applications - service: - enabled: true - type: ClusterIP - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "12345" - - # Simple resource requests for local development - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - memory: 512Mi - - # Default image settings - image: - repository: grafana/alloy - pullPolicy: IfNotPresent - - # Create RBAC resources - rbac: - create: true - - # Create ServiceAccount - serviceAccount: - create: true - - # Configure auto-reload for configuration changes - configReloader: - enabled: true diff --git a/terraform/02-platform/manifests/docker-registry.yaml b/terraform/02-platform/manifests/docker-registry.yaml new file mode 100644 index 0000000..a077127 --- /dev/null +++ b/terraform/02-platform/manifests/docker-registry.yaml @@ -0,0 +1,13 @@ +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: docker-registry + namespace: kube-system +spec: + repo: https://helm.twun.io + chart: twuni/docker-registry + targetNamespace: docker-registry + createNamespace: true + version: 2.3.0 + valuesContent: |- + {} \ No newline at end of file diff --git a/terraform/02-platform/manifests/grafana.yaml b/terraform/02-platform/manifests/grafana.yaml deleted file mode 100644 index d34874c..0000000 --- a/terraform/02-platform/manifests/grafana.yaml +++ /dev/null @@ -1,176 +0,0 @@ -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: grafana - namespace: grafana -spec: - repo: https://grafana.github.io/helm-charts - chart: grafana - targetNamespace: grafana - createNamespace: true - version: 8.11.3 - valuesContent: |- - # Simple Grafana values for local development with LGTM stack - - # Basic deployment configuration - replicas: 1 - deploymentStrategy: - type: RollingUpdate - - # Basic security configuration - securityContext: - runAsNonRoot: true - runAsUser: 472 - runAsGroup: 472 - fsGroup: 472 - - containerSecurityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - - # Grafana image configuration - image: - repository: grafana/grafana - pullPolicy: IfNotPresent - - # Service configuration - service: - enabled: true - type: ClusterIP - port: 80 - targetPort: 3000 - - # Persistence for dashboards and settings - persistence: - enabled: true - size: 5Gi - accessModes: - - ReadWriteOnce - - # Admin user setup - adminUser: admin - adminPassword: admin - - # Create RBAC resources - rbac: - create: true - - serviceAccount: - create: true - automountServiceAccountToken: true - - # Disable test framework for local development - testFramework: - enabled: false - - # Probe configuration - readinessProbe: - httpGet: - path: /api/health - port: 3000 - initialDelaySeconds: 10 - timeoutSeconds: 30 - failureThreshold: 10 - - livenessProbe: - httpGet: - path: /api/health - port: 3000 - initialDelaySeconds: 60 - timeoutSeconds: 30 - failureThreshold: 10 - - # Resources for local development - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - memory: 512Mi - - # Grafana configuration - grafana.ini: - paths: - data: /var/lib/grafana/ - logs: /var/log/grafana - plugins: /var/lib/grafana/plugins - provisioning: /etc/grafana/provisioning - - server: - root_url: "%(protocol)s://%(domain)s:%(http_port)s/" - serve_from_sub_path: false - - auth: - disable_login_form: false - - auth.anonymous: - enabled: true - org_role: Viewer - - analytics: - check_for_updates: false - reporting_enabled: false - - log: - mode: console - level: info - - # Configure datasources for the LGTM stack - datasources: - datasources.yaml: - apiVersion: 1 - datasources: - - name: Prometheus - type: prometheus - url: http://mimir-gateway.mimir/prometheus - access: proxy - uid: prometheus - - - name: Loki - type: loki - uid: loki - url: http://loki.loki:3100 - access: proxy - - - name: Tempo - type: tempo - uid: tempo - url: http://tempo.tempo:3100 - access: proxy - jsonData: - httpMethod: GET - tracesToLogsV2: - # Field with an internal link pointing to a logs data source in Grafana. - # datasourceUid value must match the uid value of the logs data source. - datasourceUid: 'loki' - spanStartTimeShift: '-1h' - spanEndTimeShift: '1h' - tags: ['job', 'instance', 'pod', 'namespace'] - filterByTraceID: false - filterBySpanID: false - customQuery: true - query: 'method="$${__span.tags.method}"' - tracesToMetrics: - datasourceUid: 'prometheus' - spanStartTimeShift: '-1h' - spanEndTimeShift: '1h' - tags: [{ key: 'service.name', value: 'service' }, { key: 'job' }] - serviceMap: - datasourceUid: 'prometheus' - nodeGraph: - enabled: true - search: - hide: false - traceQuery: - timeShiftEnabled: true - spanStartTimeShift: '-1h' - spanEndTimeShift: '1h' - spanBar: - type: 'Tag' - tag: 'http.path' - streamingEnabled: - search: false diff --git a/terraform/02-platform/manifests/loki.yaml b/terraform/02-platform/manifests/loki.yaml deleted file mode 100644 index e747795..0000000 --- a/terraform/02-platform/manifests/loki.yaml +++ /dev/null @@ -1,96 +0,0 @@ -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: loki - namespace: monitoring -spec: - repo: https://grafana.github.io/helm-charts - chart: loki - targetNamespace: loki - createNamespace: true - version: 6.29.0 - valuesContent: |- - loki: - auth_enabled: false - commonConfig: - replication_factor: 1 - schemaConfig: - configs: - - from: 2024-04-01 - store: tsdb - object_store: s3 - schema: v13 - index: - prefix: loki_index_ - period: 24h - ingester: - chunk_encoding: snappy - tracing: - enabled: true - querier: - # Default is 4, if you have enough memory and CPU you can increase, reduce if OOMing - max_concurrent: 2 - - #gateway: - # ingress: - # enabled: true - # hosts: - # - host: FIXME - # paths: - # - path: / - # pathType: Prefix - - deploymentMode: SingleBinary - singleBinary: - replicas: 1 - resources: - limits: - cpu: 2 - memory: 2Gi - requests: - cpu: 1 - memory: 1Gi - extraEnv: - # Keep a little bit lower than memory limits - - name: GOMEMLIMIT - value: 750MiB - - chunksCache: - # default is 500MB, with limited memory keep this smaller - writebackSizeLimit: 10MB - allocatedMemory: 256 - - resultsCache: - enabled: true - allocatedMemory: 256 - - # Enable minio for storage - minio: - enabled: true - - # Zero out replica counts of other deployment modes - backend: - replicas: 0 - read: - replicas: 0 - write: - replicas: 0 - - ingester: - replicas: 0 - querier: - replicas: 0 - queryFrontend: - replicas: 0 - queryScheduler: - replicas: 0 - distributor: - replicas: 0 - compactor: - replicas: 0 - indexGateway: - replicas: 0 - bloomCompactor: - replicas: 0 - bloomGateway: - replicas: 0 diff --git a/terraform/02-platform/manifests/mimir.yaml b/terraform/02-platform/manifests/mimir.yaml deleted file mode 100644 index 4bc3fbf..0000000 --- a/terraform/02-platform/manifests/mimir.yaml +++ /dev/null @@ -1,74 +0,0 @@ -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: mimir - namespace: mimir -spec: - repo: https://grafana.github.io/helm-charts - chart: mimir-distributed - targetNamespace: mimir - createNamespace: true - version: 5.6.0 - valuesContent: |- - alertmanager: - resources: - requests: - cpu: 20m - compactor: - resources: - requests: - cpu: 20m - distributor: - resources: - requests: - cpu: 20m - ingester: - # TODO: config ingester.ring.replication_factor to 1? - replicas: 3 - zoneAwareReplication: - enabled: false - resources: - requests: - cpu: 20m - overrides_exporter: - resources: - requests: - cpu: 20m - querier: - replicas: 1 - resources: - requests: - cpu: 20m - query_frontend: - resources: - requests: - cpu: 20m - query_scheduler: - replicas: 1 - resources: - requests: - cpu: 20m - ruler: - resources: - requests: - cpu: 20m - store_gateway: - zoneAwareReplication: - enabled: false - resources: - requests: - cpu: 20m - minio: - resources: - requests: - cpu: 20m - rollout_operator: - resources: - requests: - cpu: 20m - nginx: - enabled: false - gateway: - enabledNonEnterprise: true - replicas: 1 - diff --git a/terraform/02-platform/manifests/mssql.yaml b/terraform/02-platform/manifests/mssql.yaml new file mode 100644 index 0000000..51b0d36 --- /dev/null +++ b/terraform/02-platform/manifests/mssql.yaml @@ -0,0 +1,139 @@ +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: mssql + namespace: kube-system +spec: + repo: https://raw.githubusercontent.com/brandonros/hull-wrapper/master/ + chart: hull-wrapper + targetNamespace: mssql + createNamespace: true + version: 0.2.0 + valuesContent: |- + hull-wrapper: + hull: + config: + general: + nameOverride: mssql + rbac: false + noObjectNamePrefixes: true + + objects: + serviceaccount: + default: + enabled: false + + service: + mssql: + type: ClusterIP + ports: + sql: + port: 1433 + targetPort: 1433 + + statefulset: + mssql: + replicas: 1 + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + - metadata: + name: secrets + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + - metadata: + name: log + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + - metadata: + name: system + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + pod: + initContainers: + permission-fix: + image: + repository: busybox + tag: latest + command: + - sh + - -c + - | + chown -R 10001:0 /.system + chown -R 10001:0 /var/opt/mssql/data + chown -R 10001:0 /var/opt/mssql/log + chown -R 10001:0 /var/opt/mssql/secrets + chmod -R 755 /.system + chmod -R 755 /var/opt/mssql/data + chmod -R 755 /var/opt/mssql/log + chmod -R 755 /var/opt/mssql/secrets + securityContext: + runAsUser: 0 + runAsGroup: 0 + volumeMounts: + data: + name: data + mountPath: /var/opt/mssql/data + log: + name: log + mountPath: /var/opt/mssql/log + secrets: + name: secrets + mountPath: /var/opt/mssql/secrets + system: + name: system + mountPath: /.system + containers: + main: + resources: + requests: + memory: 1Gi + cpu: 500m + limits: + memory: 4Gi + cpu: 2000m + image: + repository: mcr.microsoft.com/mssql/server + tag: 2022-latest + env: + ACCEPT_EULA: + value: 'Y' + SA_PASSWORD: + value: 'Test_Password123!' + securityContext: + runAsUser: 10001 + runAsGroup: 10001 + ports: + sql: + containerPort: 1433 + volumeMounts: + system: + name: system + mountPath: /.system + log: + name: log + mountPath: /log + secrets: + name: secrets + mountPath: /var/opt/mssql/secrets + data: + name: data + mountPath: /var/opt/mssql/data diff --git a/terraform/02-platform/manifests/rabbitmq.yaml b/terraform/02-platform/manifests/rabbitmq.yaml deleted file mode 100644 index 301e21f..0000000 --- a/terraform/02-platform/manifests/rabbitmq.yaml +++ /dev/null @@ -1,52 +0,0 @@ -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: rabbitmq - namespace: kube-system -spec: - repo: oci://registry-1.docker.io/bitnamicharts - chart: rabbitmq - targetNamespace: rabbitmq - createNamespace: true - version: 15.4.1 - valuesContent: |- - ## Disable clustering since we're running a single instance - clustering: - enabled: false - - ## Set a simple fixed password and disable secure password - auth: - username: user - password: password - enableLoopbackUser: false - securePassword: false - tls: - enabled: false - - ## Reduce resource usage for local development - resources: - requests: - memory: 256Mi - cpu: 100m - limits: - memory: 512Mi - cpu: 200m - - ## Use a smaller persistence size - persistence: - size: 1Gi - - ## Expose service ports for local access - service: - type: ClusterIP - - ## Disable metrics for simplicity - metrics: - enabled: false - - ## Single replica for local development - replicaCount: 1 - - # Disable LDAP authentication - ldap: - enabled: false diff --git a/terraform/02-platform/manifests/tempo.yaml b/terraform/02-platform/manifests/tempo.yaml deleted file mode 100644 index bcf96ce..0000000 --- a/terraform/02-platform/manifests/tempo.yaml +++ /dev/null @@ -1,120 +0,0 @@ -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: tempo - namespace: tempo -spec: - repo: https://grafana.github.io/helm-charts - chart: tempo - targetNamespace: tempo - createNamespace: true - version: 1.20.0 - valuesContent: |- - # Simple Grafana Tempo values for local development with LGTM stack - - # Basic deployment with single replica - replicas: 1 - - # Basic labels for easier identification - labels: - app: tempo - environment: development - - # Pod annotations - podAnnotations: - prometheus.io/scrape: "true" - prometheus.io/port: "3100" - - tempo: - repository: grafana/tempo - pullPolicy: IfNotPresent - - # Enable metrics generator for service graphs - metricsGenerator: - enabled: true - remoteWriteUrl: "http://mimir-gateway.mimir/api/v1/push" - - # Set short retention for local development - retention: 24h - - # Basic server config - server: - http_listen_port: 3100 - - # Enable multitenancy if you plan to test multiple "tenants" - multitenancyEnabled: false - - # Storage configuration - using local storage for development - storage: - trace: - backend: local - local: - path: /var/tempo/traces - wal: - path: /var/tempo/wal - - # Configure receivers for various trace formats - receivers: - jaeger: - protocols: - grpc: - endpoint: 0.0.0.0:14250 - thrift_binary: - endpoint: 0.0.0.0:6832 - thrift_compact: - endpoint: 0.0.0.0:6831 - thrift_http: - endpoint: 0.0.0.0:14268 - otlp: - protocols: - grpc: - endpoint: "0.0.0.0:4317" - http: - endpoint: "0.0.0.0:4318" - - # Enable service graphs and span metrics - overrides: - defaults: - metrics_generator: - processors: - - service-graphs - - span-metrics - - # Reasonable resource limits for local development - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - memory: 512Mi - - # Enable tempoQuery (Jaeger UI) for local trace visualization - tempoQuery: - enabled: true - service: - port: 16686 - - # Set up persistent storage for local development - persistence: - enabled: true - size: 5Gi - accessModes: - - ReadWriteOnce - - # Create service for accessing Tempo - service: - type: ClusterIP - annotations: - prometheus.io/scrape: "true" - prometheus.io/port: "3100" - - # Create service account - serviceAccount: - create: true - - # Basic pod security context - securityContext: - runAsUser: 10001 - runAsGroup: 10001 - fsGroup: 10001 - runAsNonRoot: true \ No newline at end of file