summaryrefslogtreecommitdiff
path: root/terraform/modules/llm/variables.tf
blob: 3a7d8f728df3c6dfce2851001351816ad5cea6dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
variable "release_name" {
  type        = string
  description = "Helm release name."
}

variable "namespace" {
  type        = string
  description = "Kubernetes namespace to deploy into."
}

variable "chart_path" {
  type        = string
  description = "Path to the local llm-app chart."
}

variable "replicas" {
  type    = number
  default = 1
}

variable "model_name" {
  type        = string
  description = "HuggingFace repo id, passed as vLLM model_tag (positional)."
}

variable "model_alias" {
  type        = string
  description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)."
}

variable "max_model_len" {
  type    = number
  default = 2048
}

variable "dtype" {
  type    = string
  default = "bfloat16"
}

variable "omp_threads" {
  type        = number
  default     = 0
  description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect."
}

variable "extra_args" {
  type        = list(string)
  default     = []
  description = "Extra CLI args passed to `vllm serve`, appended after the stock set."
}

variable "resources" {
  type = object({
    requests = object({ cpu = string, memory = string })
    limits   = object({ cpu = string, memory = string })
  })
}

variable "ingress_host" {
  type = string
}

variable "ingress_class" {
  type    = string
  default = "nginx"
}

variable "image_repository" {
  type    = string
  default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo"
}

variable "image_tag" {
  type        = string
  default     = "latest"
  description = "Used only when image_digest is empty."
}

variable "image_digest" {
  type        = string
  default     = ""
  description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag."
}

variable "service_monitor_release_label" {
  type        = string
  default     = "kube-prometheus-stack"
  description = "Must match the release label the Prometheus Operator selects on."
}

variable "model_cache_size" {
  type    = string
  default = "10Gi"
}

variable "hpa" {
  type = object({
    enabled              = bool
    min_replicas         = number
    max_replicas         = number
    metric_name          = string
    target_average_value = string
  })
  default = {
    enabled              = false
    min_replicas         = 1
    max_replicas         = 3
    metric_name          = "vllm:num_requests_running"
    target_average_value = "500m"
  }
}