From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- terraform/modules/llm/main.tf | 99 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 terraform/modules/llm/main.tf (limited to 'terraform/modules/llm/main.tf') diff --git a/terraform/modules/llm/main.tf b/terraform/modules/llm/main.tf new file mode 100644 index 0000000..cd22019 --- /dev/null +++ b/terraform/modules/llm/main.tf @@ -0,0 +1,99 @@ +resource "kubernetes_namespace_v1" "this" { + metadata { + name = var.namespace + labels = { + "app.kubernetes.io/part-of" = "llm-platform" + } + } +} + +resource "kubernetes_horizontal_pod_autoscaler_v2" "llm" { + count = var.hpa.enabled ? 1 : 0 + + metadata { + name = "${var.release_name}-llm-app" + namespace = kubernetes_namespace_v1.this.metadata[0].name + } + spec { + scale_target_ref { + api_version = "apps/v1" + kind = "Deployment" + name = "${var.release_name}-llm-app" + } + min_replicas = var.hpa.min_replicas + max_replicas = var.hpa.max_replicas + + metric { + type = "Pods" + pods { + metric { + name = var.hpa.metric_name + } + target { + type = "AverageValue" + average_value = var.hpa.target_average_value + } + } + } + } + + depends_on = [helm_release.llm] +} + +resource "helm_release" "llm" { + name = var.release_name + chart = var.chart_path + namespace = kubernetes_namespace_v1.this.metadata[0].name + create_namespace = false + atomic = false + wait = true + timeout = 1800 + + values = [ + yamlencode({ + replicaCount = var.replicas + + image = { + repository = var.image_repository + tag = var.image_tag + digest = var.image_digest + pullPolicy = "IfNotPresent" + } + + model = { + name = var.model_name + alias = var.model_alias + maxModelLen = var.max_model_len + dtype = var.dtype + } + + server = { + port = 8000 + ompThreads = var.omp_threads + extraArgs = var.extra_args + } + + resources = var.resources + + ingress = { + enabled = true + className = var.ingress_class + host = var.ingress_host + } + + monitoring = { + serviceMonitor = { + enabled = true + interval = "15s" + labels = { + release = var.service_monitor_release_label + } + } + } + + modelCache = { + sizeLimit = var.model_cache_size + } + }), + ] +} -- cgit