From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- terraform/envs/agent/.terraform.lock.hcl | 19 ++++ terraform/envs/agent/backend.tf | 5 + terraform/envs/agent/main.tf | 27 +++++ terraform/envs/agent/variables.tf | 14 +++ terraform/envs/agent/versions.tf | 6 ++ terraform/envs/bootstrap/.terraform.lock.hcl | 37 +++++++ terraform/envs/bootstrap/backend.tf | 5 + terraform/envs/bootstrap/main.tf | 25 +++++ terraform/envs/bootstrap/variables.tf | 15 +++ terraform/envs/bootstrap/versions.tf | 7 ++ terraform/envs/dev/.terraform.lock.hcl | 54 ++++++++++ terraform/envs/dev/backend.tf | 5 + terraform/envs/dev/main.tf | 49 +++++++++ terraform/envs/dev/variables.tf | 14 +++ terraform/envs/dev/versions.tf | 8 ++ terraform/envs/prod/.terraform.lock.hcl | 54 ++++++++++ terraform/envs/prod/backend.tf | 5 + terraform/envs/prod/main.tf | 70 ++++++++++++ terraform/envs/prod/variables.tf | 14 +++ terraform/envs/prod/versions.tf | 8 ++ terraform/modules/agent/main.tf | 114 ++++++++++++++++++++ terraform/modules/agent/outputs.tf | 11 ++ terraform/modules/agent/variables.tf | 33 ++++++ terraform/modules/agent/versions.tf | 5 + terraform/modules/llm/main.tf | 99 +++++++++++++++++ terraform/modules/llm/outputs.tf | 12 +++ terraform/modules/llm/variables.tf | 112 +++++++++++++++++++ terraform/modules/observability/main.tf | 156 +++++++++++++++++++++++++++ terraform/modules/observability/outputs.tf | 11 ++ terraform/modules/observability/variables.tf | 27 +++++ 30 files changed, 1021 insertions(+) create mode 100644 terraform/envs/agent/.terraform.lock.hcl create mode 100644 terraform/envs/agent/backend.tf create mode 100644 terraform/envs/agent/main.tf create mode 100644 terraform/envs/agent/variables.tf create mode 100644 terraform/envs/agent/versions.tf create mode 100644 terraform/envs/bootstrap/.terraform.lock.hcl create mode 100644 terraform/envs/bootstrap/backend.tf create mode 100644 terraform/envs/bootstrap/main.tf create mode 100644 terraform/envs/bootstrap/variables.tf create mode 100644 terraform/envs/bootstrap/versions.tf create mode 100644 terraform/envs/dev/.terraform.lock.hcl create mode 100644 terraform/envs/dev/backend.tf create mode 100644 terraform/envs/dev/main.tf create mode 100644 terraform/envs/dev/variables.tf create mode 100644 terraform/envs/dev/versions.tf create mode 100644 terraform/envs/prod/.terraform.lock.hcl create mode 100644 terraform/envs/prod/backend.tf create mode 100644 terraform/envs/prod/main.tf create mode 100644 terraform/envs/prod/variables.tf create mode 100644 terraform/envs/prod/versions.tf create mode 100644 terraform/modules/agent/main.tf create mode 100644 terraform/modules/agent/outputs.tf create mode 100644 terraform/modules/agent/variables.tf create mode 100644 terraform/modules/agent/versions.tf create mode 100644 terraform/modules/llm/main.tf create mode 100644 terraform/modules/llm/outputs.tf create mode 100644 terraform/modules/llm/variables.tf create mode 100644 terraform/modules/observability/main.tf create mode 100644 terraform/modules/observability/outputs.tf create mode 100644 terraform/modules/observability/variables.tf (limited to 'terraform') diff --git a/terraform/envs/agent/.terraform.lock.hcl b/terraform/envs/agent/.terraform.lock.hcl new file mode 100644 index 0000000..605df33 --- /dev/null +++ b/terraform/envs/agent/.terraform.lock.hcl @@ -0,0 +1,19 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hashicorp/kubernetes" { + version = "2.38.0" + constraints = "~> 2.31" + hashes = [ + "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", + "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", + "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", + "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", + "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", + "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", + "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", + "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", + "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", + "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", + ] +} diff --git a/terraform/envs/agent/backend.tf b/terraform/envs/agent/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/agent/backend.tf @@ -0,0 +1,5 @@ +terraform { + backend "local" { + path = "terraform.tfstate" + } +} diff --git a/terraform/envs/agent/main.tf b/terraform/envs/agent/main.tf new file mode 100644 index 0000000..122eaca --- /dev/null +++ b/terraform/envs/agent/main.tf @@ -0,0 +1,27 @@ +provider "kubernetes" { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context +} + +module "agent" { + source = "../../modules/agent" + + namespace = "agent" + agent_source_path = var.agent_source_path + + # Point at the prod LLM. `svc.cluster.local` resolves from any namespace. + llm_service_url = "http://llm-llm-app.llm-prod.svc.cluster.local:8000/v1" + model_alias = "Qwen2.5-1.5B-Instruct" + + ingress_host = "agent.localtest.me" +} + +output "ingress_host" { value = module.agent.ingress_host } +output "service_dns" { value = module.agent.service_dns } +output "curl_example" { + value = <<-EOT + curl -s http://${module.agent.ingress_host}:8080/ask \ + -H 'Content-Type: application/json' \ + -d '{"question":"what is 123 * 47?"}' + EOT +} diff --git a/terraform/envs/agent/variables.tf b/terraform/envs/agent/variables.tf new file mode 100644 index 0000000..bf005b9 --- /dev/null +++ b/terraform/envs/agent/variables.tf @@ -0,0 +1,14 @@ +variable "kubeconfig" { + type = string + default = "~/.kube/config" +} + +variable "kube_context" { + type = string + default = "kind-llm-local" +} + +variable "agent_source_path" { + type = string + description = "Absolute path to agent/agent.py" +} diff --git a/terraform/envs/agent/versions.tf b/terraform/envs/agent/versions.tf new file mode 100644 index 0000000..69cf77e --- /dev/null +++ b/terraform/envs/agent/versions.tf @@ -0,0 +1,6 @@ +terraform { + required_version = ">= 1.6.0" + required_providers { + kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } + } +} diff --git a/terraform/envs/bootstrap/.terraform.lock.hcl b/terraform/envs/bootstrap/.terraform.lock.hcl new file mode 100644 index 0000000..baa0088 --- /dev/null +++ b/terraform/envs/bootstrap/.terraform.lock.hcl @@ -0,0 +1,37 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hashicorp/helm" { + version = "2.17.0" + constraints = "~> 2.17" + hashes = [ + "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=", + "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b", + "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a", + "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0", + "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe", + "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4", + "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25", + "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d", + "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978", + "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb", + "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0", + ] +} + +provider "registry.opentofu.org/hashicorp/kubernetes" { + version = "2.38.0" + constraints = "~> 2.31" + hashes = [ + "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", + "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", + "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", + "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", + "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", + "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", + "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", + "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", + "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", + "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", + ] +} diff --git a/terraform/envs/bootstrap/backend.tf b/terraform/envs/bootstrap/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/bootstrap/backend.tf @@ -0,0 +1,5 @@ +terraform { + backend "local" { + path = "terraform.tfstate" + } +} diff --git a/terraform/envs/bootstrap/main.tf b/terraform/envs/bootstrap/main.tf new file mode 100644 index 0000000..07bf04d --- /dev/null +++ b/terraform/envs/bootstrap/main.tf @@ -0,0 +1,25 @@ +provider "kubernetes" { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context +} + +provider "helm" { + kubernetes { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context + } +} + +module "observability" { + source = "../../modules/observability" + namespace = "monitoring" + grafana_admin_password = var.grafana_admin_password +} + +output "grafana" { + value = module.observability.grafana_service +} + +output "prometheus" { + value = module.observability.prometheus_service +} diff --git a/terraform/envs/bootstrap/variables.tf b/terraform/envs/bootstrap/variables.tf new file mode 100644 index 0000000..220bed3 --- /dev/null +++ b/terraform/envs/bootstrap/variables.tf @@ -0,0 +1,15 @@ +variable "kubeconfig" { + type = string + default = "~/.kube/config" +} + +variable "kube_context" { + type = string + default = "kind-llm-local" +} + +variable "grafana_admin_password" { + type = string + default = "admin" + sensitive = true +} diff --git a/terraform/envs/bootstrap/versions.tf b/terraform/envs/bootstrap/versions.tf new file mode 100644 index 0000000..0d7f77b --- /dev/null +++ b/terraform/envs/bootstrap/versions.tf @@ -0,0 +1,7 @@ +terraform { + required_version = ">= 1.6.0" + required_providers { + helm = { source = "hashicorp/helm", version = "~> 2.17" } + kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } + } +} diff --git a/terraform/envs/dev/.terraform.lock.hcl b/terraform/envs/dev/.terraform.lock.hcl new file mode 100644 index 0000000..09902a1 --- /dev/null +++ b/terraform/envs/dev/.terraform.lock.hcl @@ -0,0 +1,54 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hashicorp/helm" { + version = "2.17.0" + constraints = "~> 2.17" + hashes = [ + "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=", + "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b", + "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a", + "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0", + "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe", + "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4", + "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25", + "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d", + "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978", + "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb", + "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0", + ] +} + +provider "registry.opentofu.org/hashicorp/kubernetes" { + version = "2.38.0" + constraints = "~> 2.31" + hashes = [ + "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", + "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", + "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", + "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", + "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", + "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", + "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", + "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", + "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", + "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", + ] +} + +provider "registry.opentofu.org/hashicorp/random" { + version = "3.8.1" + constraints = "~> 3.6" + hashes = [ + "h1:EHn3jsqOKhWjbg0X+psk0Ww96yz3N7ASqEKKuFvDFwo=", + "zh:25c458c7c676f15705e872202dad7dcd0982e4a48e7ea1800afa5fc64e77f4c8", + "zh:2edeaf6f1b20435b2f81855ad98a2e70956d473be9e52a5fdf57ccd0098ba476", + "zh:44becb9d5f75d55e36dfed0c5beabaf4c92e0a2bc61a3814d698271c646d48e7", + "zh:7699032612c3b16cc69928add8973de47b10ce81b1141f30644a0e8a895b5cd3", + "zh:86d07aa98d17703de9fbf402c89590dc1e01dbe5671dd6bc5e487eb8fe87eee0", + "zh:8c411c77b8390a49a8a1bc9f176529e6b32369dd33a723606c8533e5ca4d68c1", + "zh:a5ecc8255a612652a56b28149994985e2c4dc046e5d34d416d47fa7767f5c28f", + "zh:aea3fe1a5669b932eda9c5c72e5f327db8da707fe514aaca0d0ef60cb24892f9", + "zh:f56e26e6977f755d7ae56fa6320af96ecf4bb09580d47cb481efbf27f1c5afff", + ] +} diff --git a/terraform/envs/dev/backend.tf b/terraform/envs/dev/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/dev/backend.tf @@ -0,0 +1,5 @@ +terraform { + backend "local" { + path = "terraform.tfstate" + } +} diff --git a/terraform/envs/dev/main.tf b/terraform/envs/dev/main.tf new file mode 100644 index 0000000..8e1b882 --- /dev/null +++ b/terraform/envs/dev/main.tf @@ -0,0 +1,49 @@ +provider "kubernetes" { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context +} + +provider "helm" { + kubernetes { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context + } +} + +locals { + env = "dev" +} + +module "llm" { + source = "../../modules/llm" + + release_name = "llm" + namespace = "llm-${local.env}" + chart_path = var.chart_path + + replicas = 2 + + model_name = "Qwen/Qwen2.5-0.5B-Instruct" + model_alias = "Qwen2.5-0.5B-Instruct" + max_model_len = 2048 + dtype = "bfloat16" + omp_threads = 4 + + resources = { + requests = { cpu = "1", memory = "2Gi" } + limits = { cpu = "4", memory = "6Gi" } + } + + ingress_host = "llm.dev.localtest.me" + image_tag = "latest" +} + +output "ingress_host" { value = module.llm.ingress_host } +output "service_dns" { value = module.llm.service_dns } +output "curl_example" { + value = <<-EOT + curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{"model":"Qwen2.5-0.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}' + EOT +} diff --git a/terraform/envs/dev/variables.tf b/terraform/envs/dev/variables.tf new file mode 100644 index 0000000..9f1b697 --- /dev/null +++ b/terraform/envs/dev/variables.tf @@ -0,0 +1,14 @@ +variable "kubeconfig" { + type = string + default = "~/.kube/config" +} + +variable "kube_context" { + type = string + default = "kind-llm-local" +} + +variable "chart_path" { + type = string + description = "Absolute path to charts/llm-app" +} diff --git a/terraform/envs/dev/versions.tf b/terraform/envs/dev/versions.tf new file mode 100644 index 0000000..6a87674 --- /dev/null +++ b/terraform/envs/dev/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_version = ">= 1.6.0" + required_providers { + helm = { source = "hashicorp/helm", version = "~> 2.17" } + kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } + random = { source = "hashicorp/random", version = "~> 3.6" } + } +} diff --git a/terraform/envs/prod/.terraform.lock.hcl b/terraform/envs/prod/.terraform.lock.hcl new file mode 100644 index 0000000..09902a1 --- /dev/null +++ b/terraform/envs/prod/.terraform.lock.hcl @@ -0,0 +1,54 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hashicorp/helm" { + version = "2.17.0" + constraints = "~> 2.17" + hashes = [ + "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=", + "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b", + "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a", + "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0", + "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe", + "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4", + "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25", + "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d", + "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978", + "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb", + "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0", + ] +} + +provider "registry.opentofu.org/hashicorp/kubernetes" { + version = "2.38.0" + constraints = "~> 2.31" + hashes = [ + "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", + "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", + "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", + "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", + "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", + "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", + "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", + "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", + "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", + "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", + ] +} + +provider "registry.opentofu.org/hashicorp/random" { + version = "3.8.1" + constraints = "~> 3.6" + hashes = [ + "h1:EHn3jsqOKhWjbg0X+psk0Ww96yz3N7ASqEKKuFvDFwo=", + "zh:25c458c7c676f15705e872202dad7dcd0982e4a48e7ea1800afa5fc64e77f4c8", + "zh:2edeaf6f1b20435b2f81855ad98a2e70956d473be9e52a5fdf57ccd0098ba476", + "zh:44becb9d5f75d55e36dfed0c5beabaf4c92e0a2bc61a3814d698271c646d48e7", + "zh:7699032612c3b16cc69928add8973de47b10ce81b1141f30644a0e8a895b5cd3", + "zh:86d07aa98d17703de9fbf402c89590dc1e01dbe5671dd6bc5e487eb8fe87eee0", + "zh:8c411c77b8390a49a8a1bc9f176529e6b32369dd33a723606c8533e5ca4d68c1", + "zh:a5ecc8255a612652a56b28149994985e2c4dc046e5d34d416d47fa7767f5c28f", + "zh:aea3fe1a5669b932eda9c5c72e5f327db8da707fe514aaca0d0ef60cb24892f9", + "zh:f56e26e6977f755d7ae56fa6320af96ecf4bb09580d47cb481efbf27f1c5afff", + ] +} diff --git a/terraform/envs/prod/backend.tf b/terraform/envs/prod/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/prod/backend.tf @@ -0,0 +1,5 @@ +terraform { + backend "local" { + path = "terraform.tfstate" + } +} diff --git a/terraform/envs/prod/main.tf b/terraform/envs/prod/main.tf new file mode 100644 index 0000000..04db31d --- /dev/null +++ b/terraform/envs/prod/main.tf @@ -0,0 +1,70 @@ +provider "kubernetes" { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context +} + +provider "helm" { + kubernetes { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context + } +} + +locals { + env = "prod" +} + +module "llm" { + source = "../../modules/llm" + + release_name = "llm" + namespace = "llm-${local.env}" + chart_path = var.chart_path + + replicas = 1 + + model_name = "Qwen/Qwen2.5-1.5B-Instruct" + model_alias = "Qwen2.5-1.5B-Instruct" + max_model_len = 4096 + dtype = "bfloat16" + omp_threads = 6 + + resources = { + requests = { cpu = "2", memory = "4Gi" } + limits = { cpu = "6", memory = "8Gi" } + } + + ingress_host = "llm.prod.localtest.me" + image_tag = "latest" + # Content-addressable pin, resolved with scripts/resolve-digests.sh on amd64. + # Per-arch digest — re-resolve on a different arch or after an upstream tag move. + # Dev intentionally runs on `:latest` so new fixes flow in without a PR. + image_digest = "sha256:bb7ed9b6c595334d78179e9d8f6490e06bf9220ed4a10b9b4e15064454ddc69e" + + # Enable OpenAI tool-calling so the agent's function-call path works. + # Qwen 2.5 uses hermes-style tool parsing in vLLM. + extra_args = [ + "--enable-auto-tool-choice", + "--tool-call-parser", "hermes", + ] + + hpa = { + enabled = true + min_replicas = 1 + max_replicas = 3 + # vLLM exposes `vllm:num_requests_running` as a per-pod gauge of in-flight + # requests. Scale up when >50% of pods are actively serving. + metric_name = "vllm:num_requests_running" + target_average_value = "500m" + } +} + +output "ingress_host" { value = module.llm.ingress_host } +output "service_dns" { value = module.llm.service_dns } +output "curl_example" { + value = <<-EOT + curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{"model":"Qwen2.5-1.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}' + EOT +} diff --git a/terraform/envs/prod/variables.tf b/terraform/envs/prod/variables.tf new file mode 100644 index 0000000..9f1b697 --- /dev/null +++ b/terraform/envs/prod/variables.tf @@ -0,0 +1,14 @@ +variable "kubeconfig" { + type = string + default = "~/.kube/config" +} + +variable "kube_context" { + type = string + default = "kind-llm-local" +} + +variable "chart_path" { + type = string + description = "Absolute path to charts/llm-app" +} diff --git a/terraform/envs/prod/versions.tf b/terraform/envs/prod/versions.tf new file mode 100644 index 0000000..6a87674 --- /dev/null +++ b/terraform/envs/prod/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_version = ">= 1.6.0" + required_providers { + helm = { source = "hashicorp/helm", version = "~> 2.17" } + kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } + random = { source = "hashicorp/random", version = "~> 3.6" } + } +} diff --git a/terraform/modules/agent/main.tf b/terraform/modules/agent/main.tf new file mode 100644 index 0000000..f53acdc --- /dev/null +++ b/terraform/modules/agent/main.tf @@ -0,0 +1,114 @@ +resource "kubernetes_namespace_v1" "agent" { + metadata { + name = var.namespace + labels = { + "app.kubernetes.io/part-of" = "llm-platform" + } + } +} + +resource "kubernetes_deployment_v1" "agent" { + metadata { + name = "agent" + namespace = kubernetes_namespace_v1.agent.metadata[0].name + labels = { app = "agent" } + } + spec { + replicas = 1 + selector { + match_labels = { app = "agent" } + } + template { + metadata { + labels = { app = "agent" } + annotations = { + # Bounce the pod when agent.py changes on disk, even if image tag is unchanged. + "checksum/code" = substr(sha256(file(var.agent_source_path)), 0, 16) + } + } + spec { + container { + name = "agent" + image = var.agent_image + image_pull_policy = "IfNotPresent" + env { + name = "OPENAI_BASE_URL" + value = var.llm_service_url + } + env { + name = "MODEL" + value = var.model_alias + } + port { + name = "http" + container_port = 8001 + } + readiness_probe { + http_get { + path = "/health" + port = "http" + } + initial_delay_seconds = 3 + period_seconds = 5 + failure_threshold = 10 + } + liveness_probe { + http_get { + path = "/health" + port = "http" + } + initial_delay_seconds = 30 + period_seconds = 30 + } + resources { + requests = { cpu = "100m", memory = "128Mi" } + limits = { cpu = "1", memory = "512Mi" } + } + } + } + } + } +} + +resource "kubernetes_service_v1" "agent" { + metadata { + name = "agent" + namespace = kubernetes_namespace_v1.agent.metadata[0].name + labels = { app = "agent" } + } + spec { + selector = { app = "agent" } + port { + name = "http" + port = 8001 + target_port = "http" + } + } +} + +resource "kubernetes_ingress_v1" "agent" { + metadata { + name = "agent" + namespace = kubernetes_namespace_v1.agent.metadata[0].name + } + spec { + ingress_class_name = var.ingress_class + rule { + host = var.ingress_host + http { + path { + path = "/" + path_type = "Prefix" + backend { + service { + name = kubernetes_service_v1.agent.metadata[0].name + port { + number = 8001 + } + } + } + } + } + } + } +} diff --git a/terraform/modules/agent/outputs.tf b/terraform/modules/agent/outputs.tf new file mode 100644 index 0000000..ac9932b --- /dev/null +++ b/terraform/modules/agent/outputs.tf @@ -0,0 +1,11 @@ +output "service_dns" { + value = "${kubernetes_service_v1.agent.metadata[0].name}.${kubernetes_namespace_v1.agent.metadata[0].name}.svc.cluster.local" +} + +output "ingress_host" { + value = var.ingress_host +} + +output "namespace" { + value = kubernetes_namespace_v1.agent.metadata[0].name +} diff --git a/terraform/modules/agent/variables.tf b/terraform/modules/agent/variables.tf new file mode 100644 index 0000000..6f525ee --- /dev/null +++ b/terraform/modules/agent/variables.tf @@ -0,0 +1,33 @@ +variable "namespace" { + type = string +} + +variable "agent_source_path" { + type = string + description = "Absolute path to agent/agent.py. Used only to bounce pods on code change." +} + +variable "agent_image" { + type = string + default = "localhost/agent:0.1.0" + description = "Pre-built agent image. Must be loaded into kind with `make agent-build`." +} + +variable "llm_service_url" { + type = string + description = "OpenAI-compatible base URL, e.g. http://llm-llm-app.llm-prod.svc.cluster.local:8000/v1" +} + +variable "model_alias" { + type = string + default = "Qwen2.5-1.5B-Instruct" +} + +variable "ingress_host" { + type = string +} + +variable "ingress_class" { + type = string + default = "nginx" +} diff --git a/terraform/modules/agent/versions.tf b/terraform/modules/agent/versions.tf new file mode 100644 index 0000000..4242705 --- /dev/null +++ b/terraform/modules/agent/versions.tf @@ -0,0 +1,5 @@ +terraform { + required_providers { + kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } + } +} diff --git a/terraform/modules/llm/main.tf b/terraform/modules/llm/main.tf new file mode 100644 index 0000000..cd22019 --- /dev/null +++ b/terraform/modules/llm/main.tf @@ -0,0 +1,99 @@ +resource "kubernetes_namespace_v1" "this" { + metadata { + name = var.namespace + labels = { + "app.kubernetes.io/part-of" = "llm-platform" + } + } +} + +resource "kubernetes_horizontal_pod_autoscaler_v2" "llm" { + count = var.hpa.enabled ? 1 : 0 + + metadata { + name = "${var.release_name}-llm-app" + namespace = kubernetes_namespace_v1.this.metadata[0].name + } + spec { + scale_target_ref { + api_version = "apps/v1" + kind = "Deployment" + name = "${var.release_name}-llm-app" + } + min_replicas = var.hpa.min_replicas + max_replicas = var.hpa.max_replicas + + metric { + type = "Pods" + pods { + metric { + name = var.hpa.metric_name + } + target { + type = "AverageValue" + average_value = var.hpa.target_average_value + } + } + } + } + + depends_on = [helm_release.llm] +} + +resource "helm_release" "llm" { + name = var.release_name + chart = var.chart_path + namespace = kubernetes_namespace_v1.this.metadata[0].name + create_namespace = false + atomic = false + wait = true + timeout = 1800 + + values = [ + yamlencode({ + replicaCount = var.replicas + + image = { + repository = var.image_repository + tag = var.image_tag + digest = var.image_digest + pullPolicy = "IfNotPresent" + } + + model = { + name = var.model_name + alias = var.model_alias + maxModelLen = var.max_model_len + dtype = var.dtype + } + + server = { + port = 8000 + ompThreads = var.omp_threads + extraArgs = var.extra_args + } + + resources = var.resources + + ingress = { + enabled = true + className = var.ingress_class + host = var.ingress_host + } + + monitoring = { + serviceMonitor = { + enabled = true + interval = "15s" + labels = { + release = var.service_monitor_release_label + } + } + } + + modelCache = { + sizeLimit = var.model_cache_size + } + }), + ] +} diff --git a/terraform/modules/llm/outputs.tf b/terraform/modules/llm/outputs.tf new file mode 100644 index 0000000..a953e73 --- /dev/null +++ b/terraform/modules/llm/outputs.tf @@ -0,0 +1,12 @@ +output "service_dns" { + value = "${var.release_name}-llm-app.${var.namespace}.svc.cluster.local" + description = "In-cluster DNS name for the LLM Service." +} + +output "ingress_host" { + value = var.ingress_host +} + +output "namespace" { + value = kubernetes_namespace_v1.this.metadata[0].name +} diff --git a/terraform/modules/llm/variables.tf b/terraform/modules/llm/variables.tf new file mode 100644 index 0000000..3a7d8f7 --- /dev/null +++ b/terraform/modules/llm/variables.tf @@ -0,0 +1,112 @@ +variable "release_name" { + type = string + description = "Helm release name." +} + +variable "namespace" { + type = string + description = "Kubernetes namespace to deploy into." +} + +variable "chart_path" { + type = string + description = "Path to the local llm-app chart." +} + +variable "replicas" { + type = number + default = 1 +} + +variable "model_name" { + type = string + description = "HuggingFace repo id, passed as vLLM model_tag (positional)." +} + +variable "model_alias" { + type = string + description = "Value clients pass in the OpenAI 'model' field (maps to --served-model-name)." +} + +variable "max_model_len" { + type = number + default = 2048 +} + +variable "dtype" { + type = string + default = "bfloat16" +} + +variable "omp_threads" { + type = number + default = 0 + description = "OMP_NUM_THREADS for vLLM CPU backend. 0 = autodetect." +} + +variable "extra_args" { + type = list(string) + default = [] + description = "Extra CLI args passed to `vllm serve`, appended after the stock set." +} + +variable "resources" { + type = object({ + requests = object({ cpu = string, memory = string }) + limits = object({ cpu = string, memory = string }) + }) +} + +variable "ingress_host" { + type = string +} + +variable "ingress_class" { + type = string + default = "nginx" +} + +variable "image_repository" { + type = string + default = "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo" +} + +variable "image_tag" { + type = string + default = "latest" + description = "Used only when image_digest is empty." +} + +variable "image_digest" { + type = string + default = "" + description = "Optional sha256:abc... content-addressable digest. Takes precedence over image_tag." +} + +variable "service_monitor_release_label" { + type = string + default = "kube-prometheus-stack" + description = "Must match the release label the Prometheus Operator selects on." +} + +variable "model_cache_size" { + type = string + default = "10Gi" +} + +variable "hpa" { + type = object({ + enabled = bool + min_replicas = number + max_replicas = number + metric_name = string + target_average_value = string + }) + default = { + enabled = false + min_replicas = 1 + max_replicas = 3 + metric_name = "vllm:num_requests_running" + target_average_value = "500m" + } +} diff --git a/terraform/modules/observability/main.tf b/terraform/modules/observability/main.tf new file mode 100644 index 0000000..2f88f2e --- /dev/null +++ b/terraform/modules/observability/main.tf @@ -0,0 +1,156 @@ +resource "kubernetes_namespace_v1" "monitoring" { + metadata { + name = var.namespace + } +} + +resource "kubernetes_namespace_v1" "ingress" { + metadata { + name = "ingress-nginx" + } +} + +resource "helm_release" "ingress_nginx" { + name = "ingress-nginx" + repository = "https://kubernetes.github.io/ingress-nginx" + chart = "ingress-nginx" + version = var.ingress_nginx_version + namespace = kubernetes_namespace_v1.ingress.metadata[0].name + wait = true + timeout = 300 + + values = [ + yamlencode({ + controller = { + hostPort = { enabled = true, ports = { http = 80, https = 443 } } + service = { type = "NodePort" } + nodeSelector = { + "ingress-ready" = "true" + } + tolerations = [ + { key = "node-role.kubernetes.io/control-plane", operator = "Equal", effect = "NoSchedule" }, + { key = "node-role.kubernetes.io/master", operator = "Equal", effect = "NoSchedule" }, + ] + publishService = { enabled = false } + admissionWebhooks = { enabled = false } # speeds up kind cluster installs + # Cap worker_processes so nginx doesn't try to spawn 14 threads under + # CPU pressure from vLLM cold-starts. With auto (= one per CPU) it + # sometimes hits pthread EAGAIN and workers die without respawn. + config = { + "worker-processes" = "4" + } + } + }), + ] +} + +resource "helm_release" "kps" { + name = "kube-prometheus-stack" + repository = "https://prometheus-community.github.io/helm-charts" + chart = "kube-prometheus-stack" + version = var.kps_version + namespace = kubernetes_namespace_v1.monitoring.metadata[0].name + wait = true + timeout = 600 + + values = [ + yamlencode({ + fullnameOverride = "kps" + prometheus = { + prometheusSpec = { + # Let Prometheus pick up ServiceMonitors from any namespace matching + # the release=kube-prometheus-stack label (the chart's default). + serviceMonitorSelectorNilUsesHelmValues = false + podMonitorSelectorNilUsesHelmValues = false + ruleSelectorNilUsesHelmValues = false + retention = "2d" + resources = { + requests = { cpu = "100m", memory = "400Mi" } + limits = { memory = "1Gi" } + } + } + ingress = { + enabled = true + ingressClassName = "nginx" + hosts = ["prom.localtest.me"] + } + } + alertmanager = { enabled = false } + grafana = { + adminPassword = var.grafana_admin_password + sidecar = { + dashboards = { + enabled = true + label = "grafana_dashboard" + labelValue = "1" + searchNamespace = "ALL" + } + } + service = { type = "ClusterIP" } + ingress = { + enabled = true + ingressClassName = "nginx" + hosts = ["grafana.localtest.me"] + } + } + }), + ] +} + +resource "helm_release" "prometheus_adapter" { + name = "prometheus-adapter" + repository = "https://prometheus-community.github.io/helm-charts" + chart = "prometheus-adapter" + version = var.prometheus_adapter_version + namespace = kubernetes_namespace_v1.monitoring.metadata[0].name + wait = true + timeout = 300 + + values = [ + yamlencode({ + prometheus = { + url = "http://kps-prometheus.${kubernetes_namespace_v1.monitoring.metadata[0].name}.svc" + port = 9090 + } + rules = { + default = false + custom = [ + { + # In-flight request count per pod; basis for autoscaling. + # vLLM exposes this as a gauge per model-engine. + seriesQuery = "vllm:num_requests_running{namespace!=\"\",pod!=\"\"}" + resources = { + overrides = { + namespace = { resource = "namespace" } + pod = { resource = "pod" } + } + } + name = { + matches = "^vllm:num_requests_running$" + as = "vllm:num_requests_running" + } + metricsQuery = "avg(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)" + }, + { + # Waiting (queued) requests per pod — an alternative scale signal. + seriesQuery = "vllm:num_requests_waiting{namespace!=\"\",pod!=\"\"}" + resources = { + overrides = { + namespace = { resource = "namespace" } + pod = { resource = "pod" } + } + } + name = { + matches = "^vllm:num_requests_waiting$" + as = "vllm:num_requests_waiting" + } + metricsQuery = "avg(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)" + }, + ] + } + }), + ] + + depends_on = [helm_release.kps] +} + diff --git a/terraform/modules/observability/outputs.tf b/terraform/modules/observability/outputs.tf new file mode 100644 index 0000000..06a507d --- /dev/null +++ b/terraform/modules/observability/outputs.tf @@ -0,0 +1,11 @@ +output "namespace" { + value = kubernetes_namespace_v1.monitoring.metadata[0].name +} + +output "grafana_service" { + value = "kube-prometheus-stack-grafana.${kubernetes_namespace_v1.monitoring.metadata[0].name}.svc.cluster.local" +} + +output "prometheus_service" { + value = "kps-prometheus.${kubernetes_namespace_v1.monitoring.metadata[0].name}.svc.cluster.local" +} diff --git a/terraform/modules/observability/variables.tf b/terraform/modules/observability/variables.tf new file mode 100644 index 0000000..6aeaca3 --- /dev/null +++ b/terraform/modules/observability/variables.tf @@ -0,0 +1,27 @@ +variable "namespace" { + type = string + default = "monitoring" +} + +variable "kps_version" { + type = string + default = "65.5.1" + description = "kube-prometheus-stack chart version." +} + +variable "ingress_nginx_version" { + type = string + default = "4.11.3" + description = "ingress-nginx chart version." +} + +variable "grafana_admin_password" { + type = string + default = "admin" + sensitive = true +} + +variable "prometheus_adapter_version" { + type = string + default = "4.11.0" +} -- cgit