From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- terraform/modules/observability/main.tf | 156 ++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 terraform/modules/observability/main.tf (limited to 'terraform/modules/observability/main.tf') diff --git a/terraform/modules/observability/main.tf b/terraform/modules/observability/main.tf new file mode 100644 index 0000000..2f88f2e --- /dev/null +++ b/terraform/modules/observability/main.tf @@ -0,0 +1,156 @@ +resource "kubernetes_namespace_v1" "monitoring" { + metadata { + name = var.namespace + } +} + +resource "kubernetes_namespace_v1" "ingress" { + metadata { + name = "ingress-nginx" + } +} + +resource "helm_release" "ingress_nginx" { + name = "ingress-nginx" + repository = "https://kubernetes.github.io/ingress-nginx" + chart = "ingress-nginx" + version = var.ingress_nginx_version + namespace = kubernetes_namespace_v1.ingress.metadata[0].name + wait = true + timeout = 300 + + values = [ + yamlencode({ + controller = { + hostPort = { enabled = true, ports = { http = 80, https = 443 } } + service = { type = "NodePort" } + nodeSelector = { + "ingress-ready" = "true" + } + tolerations = [ + { key = "node-role.kubernetes.io/control-plane", operator = "Equal", effect = "NoSchedule" }, + { key = "node-role.kubernetes.io/master", operator = "Equal", effect = "NoSchedule" }, + ] + publishService = { enabled = false } + admissionWebhooks = { enabled = false } # speeds up kind cluster installs + # Cap worker_processes so nginx doesn't try to spawn 14 threads under + # CPU pressure from vLLM cold-starts. With auto (= one per CPU) it + # sometimes hits pthread EAGAIN and workers die without respawn. + config = { + "worker-processes" = "4" + } + } + }), + ] +} + +resource "helm_release" "kps" { + name = "kube-prometheus-stack" + repository = "https://prometheus-community.github.io/helm-charts" + chart = "kube-prometheus-stack" + version = var.kps_version + namespace = kubernetes_namespace_v1.monitoring.metadata[0].name + wait = true + timeout = 600 + + values = [ + yamlencode({ + fullnameOverride = "kps" + prometheus = { + prometheusSpec = { + # Let Prometheus pick up ServiceMonitors from any namespace matching + # the release=kube-prometheus-stack label (the chart's default). + serviceMonitorSelectorNilUsesHelmValues = false + podMonitorSelectorNilUsesHelmValues = false + ruleSelectorNilUsesHelmValues = false + retention = "2d" + resources = { + requests = { cpu = "100m", memory = "400Mi" } + limits = { memory = "1Gi" } + } + } + ingress = { + enabled = true + ingressClassName = "nginx" + hosts = ["prom.localtest.me"] + } + } + alertmanager = { enabled = false } + grafana = { + adminPassword = var.grafana_admin_password + sidecar = { + dashboards = { + enabled = true + label = "grafana_dashboard" + labelValue = "1" + searchNamespace = "ALL" + } + } + service = { type = "ClusterIP" } + ingress = { + enabled = true + ingressClassName = "nginx" + hosts = ["grafana.localtest.me"] + } + } + }), + ] +} + +resource "helm_release" "prometheus_adapter" { + name = "prometheus-adapter" + repository = "https://prometheus-community.github.io/helm-charts" + chart = "prometheus-adapter" + version = var.prometheus_adapter_version + namespace = kubernetes_namespace_v1.monitoring.metadata[0].name + wait = true + timeout = 300 + + values = [ + yamlencode({ + prometheus = { + url = "http://kps-prometheus.${kubernetes_namespace_v1.monitoring.metadata[0].name}.svc" + port = 9090 + } + rules = { + default = false + custom = [ + { + # In-flight request count per pod; basis for autoscaling. + # vLLM exposes this as a gauge per model-engine. + seriesQuery = "vllm:num_requests_running{namespace!=\"\",pod!=\"\"}" + resources = { + overrides = { + namespace = { resource = "namespace" } + pod = { resource = "pod" } + } + } + name = { + matches = "^vllm:num_requests_running$" + as = "vllm:num_requests_running" + } + metricsQuery = "avg(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)" + }, + { + # Waiting (queued) requests per pod — an alternative scale signal. + seriesQuery = "vllm:num_requests_waiting{namespace!=\"\",pod!=\"\"}" + resources = { + overrides = { + namespace = { resource = "namespace" } + pod = { resource = "pod" } + } + } + name = { + matches = "^vllm:num_requests_waiting$" + as = "vllm:num_requests_waiting" + } + metricsQuery = "avg(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)" + }, + ] + } + }), + ] + + depends_on = [helm_release.kps] +} + -- cgit