summaryrefslogtreecommitdiff
path: root/terraform/envs/prod
diff options
context:
space:
mode:
Diffstat (limited to 'terraform/envs/prod')
-rw-r--r--terraform/envs/prod/.terraform.lock.hcl54
-rw-r--r--terraform/envs/prod/backend.tf5
-rw-r--r--terraform/envs/prod/main.tf70
-rw-r--r--terraform/envs/prod/variables.tf14
-rw-r--r--terraform/envs/prod/versions.tf8
5 files changed, 151 insertions, 0 deletions
diff --git a/terraform/envs/prod/.terraform.lock.hcl b/terraform/envs/prod/.terraform.lock.hcl
new file mode 100644
index 0000000..09902a1
--- /dev/null
+++ b/terraform/envs/prod/.terraform.lock.hcl
@@ -0,0 +1,54 @@
1# This file is maintained automatically by "tofu init".
2# Manual edits may be lost in future updates.
3
4provider "registry.opentofu.org/hashicorp/helm" {
5 version = "2.17.0"
6 constraints = "~> 2.17"
7 hashes = [
8 "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=",
9 "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b",
10 "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a",
11 "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0",
12 "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe",
13 "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4",
14 "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25",
15 "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d",
16 "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978",
17 "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb",
18 "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0",
19 ]
20}
21
22provider "registry.opentofu.org/hashicorp/kubernetes" {
23 version = "2.38.0"
24 constraints = "~> 2.31"
25 hashes = [
26 "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=",
27 "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc",
28 "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c",
29 "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337",
30 "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e",
31 "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1",
32 "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a",
33 "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc",
34 "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584",
35 "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f",
36 ]
37}
38
39provider "registry.opentofu.org/hashicorp/random" {
40 version = "3.8.1"
41 constraints = "~> 3.6"
42 hashes = [
43 "h1:EHn3jsqOKhWjbg0X+psk0Ww96yz3N7ASqEKKuFvDFwo=",
44 "zh:25c458c7c676f15705e872202dad7dcd0982e4a48e7ea1800afa5fc64e77f4c8",
45 "zh:2edeaf6f1b20435b2f81855ad98a2e70956d473be9e52a5fdf57ccd0098ba476",
46 "zh:44becb9d5f75d55e36dfed0c5beabaf4c92e0a2bc61a3814d698271c646d48e7",
47 "zh:7699032612c3b16cc69928add8973de47b10ce81b1141f30644a0e8a895b5cd3",
48 "zh:86d07aa98d17703de9fbf402c89590dc1e01dbe5671dd6bc5e487eb8fe87eee0",
49 "zh:8c411c77b8390a49a8a1bc9f176529e6b32369dd33a723606c8533e5ca4d68c1",
50 "zh:a5ecc8255a612652a56b28149994985e2c4dc046e5d34d416d47fa7767f5c28f",
51 "zh:aea3fe1a5669b932eda9c5c72e5f327db8da707fe514aaca0d0ef60cb24892f9",
52 "zh:f56e26e6977f755d7ae56fa6320af96ecf4bb09580d47cb481efbf27f1c5afff",
53 ]
54}
diff --git a/terraform/envs/prod/backend.tf b/terraform/envs/prod/backend.tf
new file mode 100644
index 0000000..3c533e6
--- /dev/null
+++ b/terraform/envs/prod/backend.tf
@@ -0,0 +1,5 @@
1terraform {
2 backend "local" {
3 path = "terraform.tfstate"
4 }
5}
diff --git a/terraform/envs/prod/main.tf b/terraform/envs/prod/main.tf
new file mode 100644
index 0000000..04db31d
--- /dev/null
+++ b/terraform/envs/prod/main.tf
@@ -0,0 +1,70 @@
1provider "kubernetes" {
2 config_path = pathexpand(var.kubeconfig)
3 config_context = var.kube_context
4}
5
6provider "helm" {
7 kubernetes {
8 config_path = pathexpand(var.kubeconfig)
9 config_context = var.kube_context
10 }
11}
12
13locals {
14 env = "prod"
15}
16
17module "llm" {
18 source = "../../modules/llm"
19
20 release_name = "llm"
21 namespace = "llm-${local.env}"
22 chart_path = var.chart_path
23
24 replicas = 1
25
26 model_name = "Qwen/Qwen2.5-1.5B-Instruct"
27 model_alias = "Qwen2.5-1.5B-Instruct"
28 max_model_len = 4096
29 dtype = "bfloat16"
30 omp_threads = 6
31
32 resources = {
33 requests = { cpu = "2", memory = "4Gi" }
34 limits = { cpu = "6", memory = "8Gi" }
35 }
36
37 ingress_host = "llm.prod.localtest.me"
38 image_tag = "latest"
39 # Content-addressable pin, resolved with scripts/resolve-digests.sh on amd64.
40 # Per-arch digest — re-resolve on a different arch or after an upstream tag move.
41 # Dev intentionally runs on `:latest` so new fixes flow in without a PR.
42 image_digest = "sha256:bb7ed9b6c595334d78179e9d8f6490e06bf9220ed4a10b9b4e15064454ddc69e"
43
44 # Enable OpenAI tool-calling so the agent's function-call path works.
45 # Qwen 2.5 uses hermes-style tool parsing in vLLM.
46 extra_args = [
47 "--enable-auto-tool-choice",
48 "--tool-call-parser", "hermes",
49 ]
50
51 hpa = {
52 enabled = true
53 min_replicas = 1
54 max_replicas = 3
55 # vLLM exposes `vllm:num_requests_running` as a per-pod gauge of in-flight
56 # requests. Scale up when >50% of pods are actively serving.
57 metric_name = "vllm:num_requests_running"
58 target_average_value = "500m"
59 }
60}
61
62output "ingress_host" { value = module.llm.ingress_host }
63output "service_dns" { value = module.llm.service_dns }
64output "curl_example" {
65 value = <<-EOT
66 curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \
67 -H 'Content-Type: application/json' \
68 -d '{"model":"Qwen2.5-1.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}'
69 EOT
70}
diff --git a/terraform/envs/prod/variables.tf b/terraform/envs/prod/variables.tf
new file mode 100644
index 0000000..9f1b697
--- /dev/null
+++ b/terraform/envs/prod/variables.tf
@@ -0,0 +1,14 @@
1variable "kubeconfig" {
2 type = string
3 default = "~/.kube/config"
4}
5
6variable "kube_context" {
7 type = string
8 default = "kind-llm-local"
9}
10
11variable "chart_path" {
12 type = string
13 description = "Absolute path to charts/llm-app"
14}
diff --git a/terraform/envs/prod/versions.tf b/terraform/envs/prod/versions.tf
new file mode 100644
index 0000000..6a87674
--- /dev/null
+++ b/terraform/envs/prod/versions.tf
@@ -0,0 +1,8 @@
1terraform {
2 required_version = ">= 1.6.0"
3 required_providers {
4 helm = { source = "hashicorp/helm", version = "~> 2.17" }
5 kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" }
6 random = { source = "hashicorp/random", version = "~> 3.6" }
7 }
8}