variable "release_name" { type = string description = "Helm release name." } variable "namespace" { type = string description = "Kubernetes namespace to deploy into." } variable "chart_path" { type = string description = "Path to the local llm-app chart." } variable "replicas" { type = number default = 1 } variable "model_name" { type = string description = "HuggingFace repo id, passed as vLLM model_tag (positional)." } variable "model_alias" { type = string description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)." } variable "max_model_len" { type = number default = 2048 } variable "dtype" { type = string default = "bfloat16" } variable "omp_threads" { type = number default = 0 description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect." } variable "extra_args" { type = list(string) default = [] description = "Extra CLI args passed to `vllm serve`, appended after the stock set." } variable "resources" { type = object({ requests = object({ cpu = string, memory = string }) limits = object({ cpu = string, memory = string }) }) } variable "ingress_host" { type = string } variable "ingress_class" { type = string default = "nginx" } variable "image_repository" { type = string default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo" } variable "image_tag" { type = string default = "latest" description = "Used only when image_digest is empty." } variable "image_digest" { type = string default = "" description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag." } variable "service_monitor_release_label" { type = string default = "kube-prometheus-stack" description = "Must match the release label the Prometheus Operator selects on." } variable "model_cache_size" { type = string default = "10Gi" } variable "hpa" { type = object({ enabled = bool min_replicas = number max_replicas = number metric_name = string target_average_value = string }) default = { enabled = false min_replicas = 1 max_replicas = 3 metric_name = "vllm:num_requests_running" target_average_value = "500m" } }