From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- terraform/envs/prod/.terraform.lock.hcl | 54 +++++++++++++++++++++++++ terraform/envs/prod/backend.tf | 5 +++ terraform/envs/prod/main.tf | 70 +++++++++++++++++++++++++++++++++ terraform/envs/prod/variables.tf | 14 +++++++ terraform/envs/prod/versions.tf | 8 ++++ 5 files changed, 151 insertions(+) create mode 100644 terraform/envs/prod/.terraform.lock.hcl create mode 100644 terraform/envs/prod/backend.tf create mode 100644 terraform/envs/prod/main.tf create mode 100644 terraform/envs/prod/variables.tf create mode 100644 terraform/envs/prod/versions.tf (limited to 'terraform/envs/prod') diff --git a/terraform/envs/prod/.terraform.lock.hcl b/terraform/envs/prod/.terraform.lock.hcl new file mode 100644 index 0000000..09902a1 --- /dev/null +++ b/terraform/envs/prod/.terraform.lock.hcl @@ -0,0 +1,54 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hashicorp/helm" { + version = "2.17.0" + constraints = "~> 2.17" + hashes = [ + "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=", + "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b", + "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a", + "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0", + "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe", + "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4", + "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25", + "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d", + "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978", + "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb", + "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0", + ] +} + +provider "registry.opentofu.org/hashicorp/kubernetes" { + version = "2.38.0" + constraints = "~> 2.31" + hashes = [ + "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", + "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", + "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", + "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", + "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", + "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", + "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", + "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", + "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", + "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", + ] +} + +provider "registry.opentofu.org/hashicorp/random" { + version = "3.8.1" + constraints = "~> 3.6" + hashes = [ + "h1:EHn3jsqOKhWjbg0X+psk0Ww96yz3N7ASqEKKuFvDFwo=", + "zh:25c458c7c676f15705e872202dad7dcd0982e4a48e7ea1800afa5fc64e77f4c8", + "zh:2edeaf6f1b20435b2f81855ad98a2e70956d473be9e52a5fdf57ccd0098ba476", + "zh:44becb9d5f75d55e36dfed0c5beabaf4c92e0a2bc61a3814d698271c646d48e7", + "zh:7699032612c3b16cc69928add8973de47b10ce81b1141f30644a0e8a895b5cd3", + "zh:86d07aa98d17703de9fbf402c89590dc1e01dbe5671dd6bc5e487eb8fe87eee0", + "zh:8c411c77b8390a49a8a1bc9f176529e6b32369dd33a723606c8533e5ca4d68c1", + "zh:a5ecc8255a612652a56b28149994985e2c4dc046e5d34d416d47fa7767f5c28f", + "zh:aea3fe1a5669b932eda9c5c72e5f327db8da707fe514aaca0d0ef60cb24892f9", + "zh:f56e26e6977f755d7ae56fa6320af96ecf4bb09580d47cb481efbf27f1c5afff", + ] +} diff --git a/terraform/envs/prod/backend.tf b/terraform/envs/prod/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/prod/backend.tf @@ -0,0 +1,5 @@ +terraform { + backend "local" { + path = "terraform.tfstate" + } +} diff --git a/terraform/envs/prod/main.tf b/terraform/envs/prod/main.tf new file mode 100644 index 0000000..04db31d --- /dev/null +++ b/terraform/envs/prod/main.tf @@ -0,0 +1,70 @@ +provider "kubernetes" { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context +} + +provider "helm" { + kubernetes { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context + } +} + +locals { + env = "prod" +} + +module "llm" { + source = "../../modules/llm" + + release_name = "llm" + namespace = "llm-${local.env}" + chart_path = var.chart_path + + replicas = 1 + + model_name = "Qwen/Qwen2.5-1.5B-Instruct" + model_alias = "Qwen2.5-1.5B-Instruct" + max_model_len = 4096 + dtype = "bfloat16" + omp_threads = 6 + + resources = { + requests = { cpu = "2", memory = "4Gi" } + limits = { cpu = "6", memory = "8Gi" } + } + + ingress_host = "llm.prod.localtest.me" + image_tag = "latest" + # Content-addressable pin, resolved with scripts/resolve-digests.sh on amd64. + # Per-arch digest — re-resolve on a different arch or after an upstream tag move. + # Dev intentionally runs on `:latest` so new fixes flow in without a PR. + image_digest = "sha256:bb7ed9b6c595334d78179e9d8f6490e06bf9220ed4a10b9b4e15064454ddc69e" + + # Enable OpenAI tool-calling so the agent's function-call path works. + # Qwen 2.5 uses hermes-style tool parsing in vLLM. + extra_args = [ + "--enable-auto-tool-choice", + "--tool-call-parser", "hermes", + ] + + hpa = { + enabled = true + min_replicas = 1 + max_replicas = 3 + # vLLM exposes `vllm:num_requests_running` as a per-pod gauge of in-flight + # requests. Scale up when >50% of pods are actively serving. + metric_name = "vllm:num_requests_running" + target_average_value = "500m" + } +} + +output "ingress_host" { value = module.llm.ingress_host } +output "service_dns" { value = module.llm.service_dns } +output "curl_example" { + value = <<-EOT + curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{"model":"Qwen2.5-1.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}' + EOT +} diff --git a/terraform/envs/prod/variables.tf b/terraform/envs/prod/variables.tf new file mode 100644 index 0000000..9f1b697 --- /dev/null +++ b/terraform/envs/prod/variables.tf @@ -0,0 +1,14 @@ +variable "kubeconfig" { + type = string + default = "~/.kube/config" +} + +variable "kube_context" { + type = string + default = "kind-llm-local" +} + +variable "chart_path" { + type = string + description = "Absolute path to charts/llm-app" +} diff --git a/terraform/envs/prod/versions.tf b/terraform/envs/prod/versions.tf new file mode 100644 index 0000000..6a87674 --- /dev/null +++ b/terraform/envs/prod/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_version = ">= 1.6.0" + required_providers { + helm = { source = "hashicorp/helm", version = "~> 2.17" } + kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } + random = { source = "hashicorp/random", version = "~> 3.6" } + } +} -- cgit