summaryrefslogtreecommitdiff
path: root/terraform/modules/llm/main.tf
diff options
context:
space:
mode:
Diffstat (limited to 'terraform/modules/llm/main.tf')
-rw-r--r--terraform/modules/llm/main.tf99
1 files changed, 99 insertions, 0 deletions
diff --git a/terraform/modules/llm/main.tf b/terraform/modules/llm/main.tf
new file mode 100644
index 0000000..cd22019
--- /dev/null
+++ b/terraform/modules/llm/main.tf
@@ -0,0 +1,99 @@
1resource "kubernetes_namespace_v1" "this" {
2 metadata {
3 name = var.namespace
4 labels = {
5 "app.kubernetes.io/part-of" = "llm-platform"
6 }
7 }
8}
9
10resource "kubernetes_horizontal_pod_autoscaler_v2" "llm" {
11 count = var.hpa.enabled ? 1 : 0
12
13 metadata {
14 name = "${var.release_name}-llm-app"
15 namespace = kubernetes_namespace_v1.this.metadata[0].name
16 }
17 spec {
18 scale_target_ref {
19 api_version = "apps/v1"
20 kind = "Deployment"
21 name = "${var.release_name}-llm-app"
22 }
23 min_replicas = var.hpa.min_replicas
24 max_replicas = var.hpa.max_replicas
25
26 metric {
27 type = "Pods"
28 pods {
29 metric {
30 name = var.hpa.metric_name
31 }
32 target {
33 type = "AverageValue"
34 average_value = var.hpa.target_average_value
35 }
36 }
37 }
38 }
39
40 depends_on = [helm_release.llm]
41}
42
43resource "helm_release" "llm" {
44 name = var.release_name
45 chart = var.chart_path
46 namespace = kubernetes_namespace_v1.this.metadata[0].name
47 create_namespace = false
48 atomic = false
49 wait = true
50 timeout = 1800
51
52 values = [
53 yamlencode({
54 replicaCount = var.replicas
55
56 image = {
57 repository = var.image_repository
58 tag = var.image_tag
59 digest = var.image_digest
60 pullPolicy = "IfNotPresent"
61 }
62
63 model = {
64 name = var.model_name
65 alias = var.model_alias
66 maxModelLen = var.max_model_len
67 dtype = var.dtype
68 }
69
70 server = {
71 port = 8000
72 ompThreads = var.omp_threads
73 extraArgs = var.extra_args
74 }
75
76 resources = var.resources
77
78 ingress = {
79 enabled = true
80 className = var.ingress_class
81 host = var.ingress_host
82 }
83
84 monitoring = {
85 serviceMonitor = {
86 enabled = true
87 interval = "15s"
88 labels = {
89 release = var.service_monitor_release_label
90 }
91 }
92 }
93
94 modelCache = {
95 sizeLimit = var.model_cache_size
96 }
97 }),
98 ]
99}