heheHEAD main

author: Your Name <you@example.com> 2026-04-26 21:02:47 +0800
committer: Your Name <you@example.com> 2026-04-26 21:02:47 +0800
commit: d3e770254de0bb301815ca87257c8b1a357d06c4 (patch)
tree: 358c814be2a06b9e2009905f14938243286b8d82 /terraform/modules/llm/variables.tf
1 files changed, 112 insertions, 0 deletions
diff --git a/terraform/modules/llm/variables.tf b/terraform/modules/llm/variables.tf
new file mode 100644
index 0000000..3a7d8f7
--- /dev/null
+++ b/terraform/modules/llm/variables.tf
@@ -0,0 +1,112 @@
+variable "release_name" {
+  type        = string
+  description = "Helm release name."
+}
+variable "namespace" {
+  type        = string
+  description = "Kubernetes namespace to deploy into."
+}
+variable "chart_path" {
+  type        = string
+  description = "Path to the local llm-app chart."
+}
+variable "replicas" {
+  type    = number
+  default = 1
+}
+variable "model_name" {
+  type        = string
+  description = "HuggingFace repo id, passed as vLLM model_tag (positional)."
+}
+variable "model_alias" {
+  type        = string
+  description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)."
+}
+variable "max_model_len" {
+  type    = number
+  default = 2048
+}
+variable "dtype" {
+  type    = string
+  default = "bfloat16"
+}
+variable "omp_threads" {
+  type        = number
+  default     = 0
+  description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect."
+}
+variable "extra_args" {
+  type        = list(string)
+  default     = []
+  description = "Extra CLI args passed to `vllm serve`, appended after the stock set."
+}
+variable "resources" {
+  type = object({
+    requests = object({ cpu = string, memory = string })
+    limits   = object({ cpu = string, memory = string })
+  })
+}
+variable "ingress_host" {
+  type = string
+}
+variable "ingress_class" {
+  type    = string
+  default = "nginx"
+}
+variable "image_repository" {
+  type    = string
+  default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo"
+}
+variable "image_tag" {
+  type        = string
+  default     = "latest"
+  description = "Used only when image_digest is empty."
+}
+variable "image_digest" {
+  type        = string
+  default     = ""
+  description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag."
+}
+variable "service_monitor_release_label" {
+  type        = string
+  default     = "kube-prometheus-stack"
+  description = "Must match the release label the Prometheus Operator selects on."
+}
+variable "model_cache_size" {
+  type    = string
+  default = "10Gi"
+}
+variable "hpa" {
+  type = object({
+    enabled              = bool
+    min_replicas         = number
+    max_replicas         = number
+    metric_name          = string
+    target_average_value = string
+  })
+  default = {
+    enabled              = false
+    min_replicas         = 1
+    max_replicas         = 3
+    metric_name          = "vllm:num_requests_running"
+    target_average_value = "500m"
+  }
+}
author	Your Name <you@example.com>	2026-04-26 21:02:47 +0800
committer	Your Name <you@example.com>	2026-04-26 21:02:47 +0800
commit	d3e770254de0bb301815ca87257c8b1a357d06c4 (patch)
tree	358c814be2a06b9e2009905f14938243286b8d82 /terraform/modules/llm/variables.tf

diff --git a/terraform/modules/llm/variables.tf b/terraform/modules/llm/variables.tf new file mode 100644 index 0000000..3a7d8f7 --- /dev/null +++ b/terraform/modules/llm/variables.tf
@@ -0,0 +1,112 @@
	1	variable "release_name" {
	2	type = string
	3	description = "Helm release name."
	4	}
	5
	6	variable "namespace" {
	7	type = string
	8	description = "Kubernetes namespace to deploy into."
	9	}
	10
	11	variable "chart_path" {
	12	type = string
	13	description = "Path to the local llm-app chart."
	14	}
	15
	16	variable "replicas" {
	17	type = number
	18	default = 1
	19	}
	20
	21	variable "model_name" {
	22	type = string
	23	description = "HuggingFace repo id, passed as vLLM model_tag (positional)."
	24	}
	25
	26	variable "model_alias" {
	27	type = string
	28	description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)."
	29	}
	30
	31	variable "max_model_len" {
	32	type = number
	33	default = 2048
	34	}
	35
	36	variable "dtype" {
	37	type = string
	38	default = "bfloat16"
	39	}
	40
	41	variable "omp_threads" {
	42	type = number
	43	default = 0
	44	description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect."
	45	}
	46
	47	variable "extra_args" {
	48	type = list(string)
	49	default = []
	50	description = "Extra CLI args passed to `vllm serve`, appended after the stock set."
	51	}
	52
	53	variable "resources" {
	54	type = object({
	55	requests = object({ cpu = string, memory = string })
	56	limits = object({ cpu = string, memory = string })
	57	})
	58	}
	59
	60	variable "ingress_host" {
	61	type = string
	62	}
	63
	64	variable "ingress_class" {
	65	type = string
	66	default = "nginx"
	67	}
	68
	69	variable "image_repository" {
	70	type = string
	71	default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo"
	72	}
	73
	74	variable "image_tag" {
	75	type = string
	76	default = "latest"
	77	description = "Used only when image_digest is empty."
	78	}
	79
	80	variable "image_digest" {
	81	type = string
	82	default = ""
	83	description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag."
	84	}
	85
	86	variable "service_monitor_release_label" {
	87	type = string
	88	default = "kube-prometheus-stack"
	89	description = "Must match the release label the Prometheus Operator selects on."
	90	}
	91
	92	variable "model_cache_size" {
	93	type = string
	94	default = "10Gi"
	95	}
	96
	97	variable "hpa" {
	98	type = object({
	99	enabled = bool
	100	min_replicas = number
	101	max_replicas = number
	102	metric_name = string
	103	target_average_value = string
	104	})
	105	default = {
	106	enabled = false
	107	min_replicas = 1
	108	max_replicas = 3
	109	metric_name = "vllm:num_requests_running"
	110	target_average_value = "500m"
	111	}
	112	}