resource "kubernetes_namespace_v1" "this" { metadata { name = var.namespace labels = { "app.kubernetes.io/part-of" = "llm-platform" } } } resource "kubernetes_horizontal_pod_autoscaler_v2" "llm" { count = var.hpa.enabled ? 1 : 0 metadata { name = "${var.release_name}-llm-app" namespace = kubernetes_namespace_v1.this.metadata[0].name } spec { scale_target_ref { api_version = "apps/v1" kind = "Deployment" name = "${var.release_name}-llm-app" } min_replicas = var.hpa.min_replicas max_replicas = var.hpa.max_replicas metric { type = "Pods" pods { metric { name = var.hpa.metric_name } target { type = "AverageValue" average_value = var.hpa.target_average_value } } } } depends_on = [helm_release.llm] } resource "helm_release" "llm" { name = var.release_name chart = var.chart_path namespace = kubernetes_namespace_v1.this.metadata[0].name create_namespace = false atomic = false wait = true timeout = 1800 values = [ yamlencode({ replicaCount = var.replicas image = { repository = var.image_repository tag = var.image_tag digest = var.image_digest pullPolicy = "IfNotPresent" } model = { name = var.model_name alias = var.model_alias maxModelLen = var.max_model_len dtype = var.dtype } server = { port = 8000 ompThreads = var.omp_threads extraArgs = var.extra_args } resources = var.resources ingress = { enabled = true className = var.ingress_class host = var.ingress_host } monitoring = { serviceMonitor = { enabled = true interval = "15s" labels = { release = var.service_monitor_release_label } } } modelCache = { sizeLimit = var.model_cache_size } }), ] }