diff options
| author | Your Name <you@example.com> | 2026-04-26 21:02:47 +0800 |
|---|---|---|
| committer | Your Name <you@example.com> | 2026-04-26 21:02:47 +0800 |
| commit | d3e770254de0bb301815ca87257c8b1a357d06c4 (patch) | |
| tree | 358c814be2a06b9e2009905f14938243286b8d82 /terraform/modules/llm/variables.tf | |
Diffstat (limited to 'terraform/modules/llm/variables.tf')
| -rw-r--r-- | terraform/modules/llm/variables.tf | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/terraform/modules/llm/variables.tf b/terraform/modules/llm/variables.tf new file mode 100644 index 0000000..3a7d8f7 --- /dev/null +++ b/terraform/modules/llm/variables.tf | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | variable "release_name" { | ||
| 2 | type = string | ||
| 3 | description = "Helm release name." | ||
| 4 | } | ||
| 5 | |||
| 6 | variable "namespace" { | ||
| 7 | type = string | ||
| 8 | description = "Kubernetes namespace to deploy into." | ||
| 9 | } | ||
| 10 | |||
| 11 | variable "chart_path" { | ||
| 12 | type = string | ||
| 13 | description = "Path to the local llm-app chart." | ||
| 14 | } | ||
| 15 | |||
| 16 | variable "replicas" { | ||
| 17 | type = number | ||
| 18 | default = 1 | ||
| 19 | } | ||
| 20 | |||
| 21 | variable "model_name" { | ||
| 22 | type = string | ||
| 23 | description = "HuggingFace repo id, passed as vLLM model_tag (positional)." | ||
| 24 | } | ||
| 25 | |||
| 26 | variable "model_alias" { | ||
| 27 | type = string | ||
| 28 | description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)." | ||
| 29 | } | ||
| 30 | |||
| 31 | variable "max_model_len" { | ||
| 32 | type = number | ||
| 33 | default = 2048 | ||
| 34 | } | ||
| 35 | |||
| 36 | variable "dtype" { | ||
| 37 | type = string | ||
| 38 | default = "bfloat16" | ||
| 39 | } | ||
| 40 | |||
| 41 | variable "omp_threads" { | ||
| 42 | type = number | ||
| 43 | default = 0 | ||
| 44 | description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect." | ||
| 45 | } | ||
| 46 | |||
| 47 | variable "extra_args" { | ||
| 48 | type = list(string) | ||
| 49 | default = [] | ||
| 50 | description = "Extra CLI args passed to `vllm serve`, appended after the stock set." | ||
| 51 | } | ||
| 52 | |||
| 53 | variable "resources" { | ||
| 54 | type = object({ | ||
| 55 | requests = object({ cpu = string, memory = string }) | ||
| 56 | limits = object({ cpu = string, memory = string }) | ||
| 57 | }) | ||
| 58 | } | ||
| 59 | |||
| 60 | variable "ingress_host" { | ||
| 61 | type = string | ||
| 62 | } | ||
| 63 | |||
| 64 | variable "ingress_class" { | ||
| 65 | type = string | ||
| 66 | default = "nginx" | ||
| 67 | } | ||
| 68 | |||
| 69 | variable "image_repository" { | ||
| 70 | type = string | ||
| 71 | default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo" | ||
| 72 | } | ||
| 73 | |||
| 74 | variable "image_tag" { | ||
| 75 | type = string | ||
| 76 | default = "latest" | ||
| 77 | description = "Used only when image_digest is empty." | ||
| 78 | } | ||
| 79 | |||
| 80 | variable "image_digest" { | ||
| 81 | type = string | ||
| 82 | default = "" | ||
| 83 | description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag." | ||
| 84 | } | ||
| 85 | |||
| 86 | variable "service_monitor_release_label" { | ||
| 87 | type = string | ||
| 88 | default = "kube-prometheus-stack" | ||
| 89 | description = "Must match the release label the Prometheus Operator selects on." | ||
| 90 | } | ||
| 91 | |||
| 92 | variable "model_cache_size" { | ||
| 93 | type = string | ||
| 94 | default = "10Gi" | ||
| 95 | } | ||
| 96 | |||
| 97 | variable "hpa" { | ||
| 98 | type = object({ | ||
| 99 | enabled = bool | ||
| 100 | min_replicas = number | ||
| 101 | max_replicas = number | ||
| 102 | metric_name = string | ||
| 103 | target_average_value = string | ||
| 104 | }) | ||
| 105 | default = { | ||
| 106 | enabled = false | ||
| 107 | min_replicas = 1 | ||
| 108 | max_replicas = 3 | ||
| 109 | metric_name = "vllm:num_requests_running" | ||
| 110 | target_average_value = "500m" | ||
| 111 | } | ||
| 112 | } | ||
