From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- terraform/modules/llm/variables.tf | 112 +++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 terraform/modules/llm/variables.tf (limited to 'terraform/modules/llm/variables.tf') diff --git a/terraform/modules/llm/variables.tf b/terraform/modules/llm/variables.tf new file mode 100644 index 0000000..3a7d8f7 --- /dev/null +++ b/terraform/modules/llm/variables.tf @@ -0,0 +1,112 @@ +variable "release_name" { + type = string + description = "Helm release name." +} + +variable "namespace" { + type = string + description = "Kubernetes namespace to deploy into." +} + +variable "chart_path" { + type = string + description = "Path to the local llm-app chart." +} + +variable "replicas" { + type = number + default = 1 +} + +variable "model_name" { + type = string + description = "HuggingFace repo id, passed as vLLM model_tag (positional)." +} + +variable "model_alias" { + type = string + description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)." +} + +variable "max_model_len" { + type = number + default = 2048 +} + +variable "dtype" { + type = string + default = "bfloat16" +} + +variable "omp_threads" { + type = number + default = 0 + description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect." +} + +variable "extra_args" { + type = list(string) + default = [] + description = "Extra CLI args passed to `vllm serve`, appended after the stock set." +} + +variable "resources" { + type = object({ + requests = object({ cpu = string, memory = string }) + limits = object({ cpu = string, memory = string }) + }) +} + +variable "ingress_host" { + type = string +} + +variable "ingress_class" { + type = string + default = "nginx" +} + +variable "image_repository" { + type = string + default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo" +} + +variable "image_tag" { + type = string + default = "latest" + description = "Used only when image_digest is empty." +} + +variable "image_digest" { + type = string + default = "" + description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag." +} + +variable "service_monitor_release_label" { + type = string + default = "kube-prometheus-stack" + description = "Must match the release label the Prometheus Operator selects on." +} + +variable "model_cache_size" { + type = string + default = "10Gi" +} + +variable "hpa" { + type = object({ + enabled = bool + min_replicas = number + max_replicas = number + metric_name = string + target_average_value = string + }) + default = { + enabled = false + min_replicas = 1 + max_replicas = 3 + metric_name = "vllm:num_requests_running" + target_average_value = "500m" + } +} -- cgit