blob: cd220193aa2a7de81a8da15eb61f57b428c3296f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
resource "kubernetes_namespace_v1" "this" {
metadata {
name = var.namespace
labels = {
"app.kubernetes.io/part-of" = "llm-platform"
}
}
}
resource "kubernetes_horizontal_pod_autoscaler_v2" "llm" {
count = var.hpa.enabled ? 1 : 0
metadata {
name = "${var.release_name}-llm-app"
namespace = kubernetes_namespace_v1.this.metadata[0].name
}
spec {
scale_target_ref {
api_version = "apps/v1"
kind = "Deployment"
name = "${var.release_name}-llm-app"
}
min_replicas = var.hpa.min_replicas
max_replicas = var.hpa.max_replicas
metric {
type = "Pods"
pods {
metric {
name = var.hpa.metric_name
}
target {
type = "AverageValue"
average_value = var.hpa.target_average_value
}
}
}
}
depends_on = [helm_release.llm]
}
resource "helm_release" "llm" {
name = var.release_name
chart = var.chart_path
namespace = kubernetes_namespace_v1.this.metadata[0].name
create_namespace = false
atomic = false
wait = true
timeout = 1800
values = [
yamlencode({
replicaCount = var.replicas
image = {
repository = var.image_repository
tag = var.image_tag
digest = var.image_digest
pullPolicy = "IfNotPresent"
}
model = {
name = var.model_name
alias = var.model_alias
maxModelLen = var.max_model_len
dtype = var.dtype
}
server = {
port = 8000
ompThreads = var.omp_threads
extraArgs = var.extra_args
}
resources = var.resources
ingress = {
enabled = true
className = var.ingress_class
host = var.ingress_host
}
monitoring = {
serviceMonitor = {
enabled = true
interval = "15s"
labels = {
release = var.service_monitor_release_label
}
}
}
modelCache = {
sizeLimit = var.model_cache_size
}
}),
]
}
|