summaryrefslogtreecommitdiff
path: root/terraform/modules/llm/variables.tf
diff options
context:
space:
mode:
authorYour Name <you@example.com>2026-04-26 21:02:47 +0800
committerYour Name <you@example.com>2026-04-26 21:02:47 +0800
commitd3e770254de0bb301815ca87257c8b1a357d06c4 (patch)
tree358c814be2a06b9e2009905f14938243286b8d82 /terraform/modules/llm/variables.tf
Diffstat (limited to 'terraform/modules/llm/variables.tf')
-rw-r--r--terraform/modules/llm/variables.tf112
1 files changed, 112 insertions, 0 deletions
diff --git a/terraform/modules/llm/variables.tf b/terraform/modules/llm/variables.tf
new file mode 100644
index 0000000..3a7d8f7
--- /dev/null
+++ b/terraform/modules/llm/variables.tf
@@ -0,0 +1,112 @@
1variable "release_name" {
2 type = string
3 description = "Helm release name."
4}
5
6variable "namespace" {
7 type = string
8 description = "Kubernetes namespace to deploy into."
9}
10
11variable "chart_path" {
12 type = string
13 description = "Path to the local llm-app chart."
14}
15
16variable "replicas" {
17 type = number
18 default = 1
19}
20
21variable "model_name" {
22 type = string
23 description = "HuggingFace repo id, passed as vLLM model_tag (positional)."
24}
25
26variable "model_alias" {
27 type = string
28 description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)."
29}
30
31variable "max_model_len" {
32 type = number
33 default = 2048
34}
35
36variable "dtype" {
37 type = string
38 default = "bfloat16"
39}
40
41variable "omp_threads" {
42 type = number
43 default = 0
44 description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect."
45}
46
47variable "extra_args" {
48 type = list(string)
49 default = []
50 description = "Extra CLI args passed to `vllm serve`, appended after the stock set."
51}
52
53variable "resources" {
54 type = object({
55 requests = object({ cpu = string, memory = string })
56 limits = object({ cpu = string, memory = string })
57 })
58}
59
60variable "ingress_host" {
61 type = string
62}
63
64variable "ingress_class" {
65 type = string
66 default = "nginx"
67}
68
69variable "image_repository" {
70 type = string
71 default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo"
72}
73
74variable "image_tag" {
75 type = string
76 default = "latest"
77 description = "Used only when image_digest is empty."
78}
79
80variable "image_digest" {
81 type = string
82 default = ""
83 description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag."
84}
85
86variable "service_monitor_release_label" {
87 type = string
88 default = "kube-prometheus-stack"
89 description = "Must match the release label the Prometheus Operator selects on."
90}
91
92variable "model_cache_size" {
93 type = string
94 default = "10Gi"
95}
96
97variable "hpa" {
98 type = object({
99 enabled = bool
100 min_replicas = number
101 max_replicas = number
102 metric_name = string
103 target_average_value = string
104 })
105 default = {
106 enabled = false
107 min_replicas = 1
108 max_replicas = 3
109 metric_name = "vllm:num_requests_running"
110 target_average_value = "500m"
111 }
112}