diff options
Diffstat (limited to 'terraform/modules/llm/main.tf')
| -rw-r--r-- | terraform/modules/llm/main.tf | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/terraform/modules/llm/main.tf b/terraform/modules/llm/main.tf new file mode 100644 index 0000000..cd22019 --- /dev/null +++ b/terraform/modules/llm/main.tf | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | resource "kubernetes_namespace_v1" "this" { | ||
| 2 | metadata { | ||
| 3 | name = var.namespace | ||
| 4 | labels = { | ||
| 5 | "app.kubernetes.io/part-of" = "llm-platform" | ||
| 6 | } | ||
| 7 | } | ||
| 8 | } | ||
| 9 | |||
| 10 | resource "kubernetes_horizontal_pod_autoscaler_v2" "llm" { | ||
| 11 | count = var.hpa.enabled ? 1 : 0 | ||
| 12 | |||
| 13 | metadata { | ||
| 14 | name = "${var.release_name}-llm-app" | ||
| 15 | namespace = kubernetes_namespace_v1.this.metadata[0].name | ||
| 16 | } | ||
| 17 | spec { | ||
| 18 | scale_target_ref { | ||
| 19 | api_version = "apps/v1" | ||
| 20 | kind = "Deployment" | ||
| 21 | name = "${var.release_name}-llm-app" | ||
| 22 | } | ||
| 23 | min_replicas = var.hpa.min_replicas | ||
| 24 | max_replicas = var.hpa.max_replicas | ||
| 25 | |||
| 26 | metric { | ||
| 27 | type = "Pods" | ||
| 28 | pods { | ||
| 29 | metric { | ||
| 30 | name = var.hpa.metric_name | ||
| 31 | } | ||
| 32 | target { | ||
| 33 | type = "AverageValue" | ||
| 34 | average_value = var.hpa.target_average_value | ||
| 35 | } | ||
| 36 | } | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | depends_on = [helm_release.llm] | ||
| 41 | } | ||
| 42 | |||
| 43 | resource "helm_release" "llm" { | ||
| 44 | name = var.release_name | ||
| 45 | chart = var.chart_path | ||
| 46 | namespace = kubernetes_namespace_v1.this.metadata[0].name | ||
| 47 | create_namespace = false | ||
| 48 | atomic = false | ||
| 49 | wait = true | ||
| 50 | timeout = 1800 | ||
| 51 | |||
| 52 | values = [ | ||
| 53 | yamlencode({ | ||
| 54 | replicaCount = var.replicas | ||
| 55 | |||
| 56 | image = { | ||
| 57 | repository = var.image_repository | ||
| 58 | tag = var.image_tag | ||
| 59 | digest = var.image_digest | ||
| 60 | pullPolicy = "IfNotPresent" | ||
| 61 | } | ||
| 62 | |||
| 63 | model = { | ||
| 64 | name = var.model_name | ||
| 65 | alias = var.model_alias | ||
| 66 | maxModelLen = var.max_model_len | ||
| 67 | dtype = var.dtype | ||
| 68 | } | ||
| 69 | |||
| 70 | server = { | ||
| 71 | port = 8000 | ||
| 72 | ompThreads = var.omp_threads | ||
| 73 | extraArgs = var.extra_args | ||
| 74 | } | ||
| 75 | |||
| 76 | resources = var.resources | ||
| 77 | |||
| 78 | ingress = { | ||
| 79 | enabled = true | ||
| 80 | className = var.ingress_class | ||
| 81 | host = var.ingress_host | ||
| 82 | } | ||
| 83 | |||
| 84 | monitoring = { | ||
| 85 | serviceMonitor = { | ||
| 86 | enabled = true | ||
| 87 | interval = "15s" | ||
| 88 | labels = { | ||
| 89 | release = var.service_monitor_release_label | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | modelCache = { | ||
| 95 | sizeLimit = var.model_cache_size | ||
| 96 | } | ||
| 97 | }), | ||
| 98 | ] | ||
| 99 | } | ||
