diff options
Diffstat (limited to 'terraform/envs')
| -rw-r--r-- | terraform/envs/agent/.terraform.lock.hcl | 19 | ||||
| -rw-r--r-- | terraform/envs/agent/backend.tf | 5 | ||||
| -rw-r--r-- | terraform/envs/agent/main.tf | 27 | ||||
| -rw-r--r-- | terraform/envs/agent/variables.tf | 14 | ||||
| -rw-r--r-- | terraform/envs/agent/versions.tf | 6 | ||||
| -rw-r--r-- | terraform/envs/bootstrap/.terraform.lock.hcl | 37 | ||||
| -rw-r--r-- | terraform/envs/bootstrap/backend.tf | 5 | ||||
| -rw-r--r-- | terraform/envs/bootstrap/main.tf | 25 | ||||
| -rw-r--r-- | terraform/envs/bootstrap/variables.tf | 15 | ||||
| -rw-r--r-- | terraform/envs/bootstrap/versions.tf | 7 | ||||
| -rw-r--r-- | terraform/envs/dev/.terraform.lock.hcl | 54 | ||||
| -rw-r--r-- | terraform/envs/dev/backend.tf | 5 | ||||
| -rw-r--r-- | terraform/envs/dev/main.tf | 49 | ||||
| -rw-r--r-- | terraform/envs/dev/variables.tf | 14 | ||||
| -rw-r--r-- | terraform/envs/dev/versions.tf | 8 | ||||
| -rw-r--r-- | terraform/envs/prod/.terraform.lock.hcl | 54 | ||||
| -rw-r--r-- | terraform/envs/prod/backend.tf | 5 | ||||
| -rw-r--r-- | terraform/envs/prod/main.tf | 70 | ||||
| -rw-r--r-- | terraform/envs/prod/variables.tf | 14 | ||||
| -rw-r--r-- | terraform/envs/prod/versions.tf | 8 |
20 files changed, 441 insertions, 0 deletions
diff --git a/terraform/envs/agent/.terraform.lock.hcl b/terraform/envs/agent/.terraform.lock.hcl new file mode 100644 index 0000000..605df33 --- /dev/null +++ b/terraform/envs/agent/.terraform.lock.hcl | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | # This file is maintained automatically by "tofu init". | ||
| 2 | # Manual edits may be lost in future updates. | ||
| 3 | |||
| 4 | provider "registry.opentofu.org/hashicorp/kubernetes" { | ||
| 5 | version = "2.38.0" | ||
| 6 | constraints = "~> 2.31" | ||
| 7 | hashes = [ | ||
| 8 | "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", | ||
| 9 | "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", | ||
| 10 | "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", | ||
| 11 | "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", | ||
| 12 | "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", | ||
| 13 | "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", | ||
| 14 | "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", | ||
| 15 | "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", | ||
| 16 | "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", | ||
| 17 | "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", | ||
| 18 | ] | ||
| 19 | } | ||
diff --git a/terraform/envs/agent/backend.tf b/terraform/envs/agent/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/agent/backend.tf | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | terraform { | ||
| 2 | backend "local" { | ||
| 3 | path = "terraform.tfstate" | ||
| 4 | } | ||
| 5 | } | ||
diff --git a/terraform/envs/agent/main.tf b/terraform/envs/agent/main.tf new file mode 100644 index 0000000..122eaca --- /dev/null +++ b/terraform/envs/agent/main.tf | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | provider "kubernetes" { | ||
| 2 | config_path = pathexpand(var.kubeconfig) | ||
| 3 | config_context = var.kube_context | ||
| 4 | } | ||
| 5 | |||
| 6 | module "agent" { | ||
| 7 | source = "../../modules/agent" | ||
| 8 | |||
| 9 | namespace = "agent" | ||
| 10 | agent_source_path = var.agent_source_path | ||
| 11 | |||
| 12 | # Point at the prod LLM. `svc.cluster.local` resolves from any namespace. | ||
| 13 | llm_service_url = "http://llm-llm-app.llm-prod.svc.cluster.local:8000/v1" | ||
| 14 | model_alias = "Qwen2.5-1.5B-Instruct" | ||
| 15 | |||
| 16 | ingress_host = "agent.localtest.me" | ||
| 17 | } | ||
| 18 | |||
| 19 | output "ingress_host" { value = module.agent.ingress_host } | ||
| 20 | output "service_dns" { value = module.agent.service_dns } | ||
| 21 | output "curl_example" { | ||
| 22 | value = <<-EOT | ||
| 23 | curl -s http://${module.agent.ingress_host}:8080/ask \ | ||
| 24 | -H 'Content-Type: application/json' \ | ||
| 25 | -d '{"question":"what is 123 * 47?"}' | ||
| 26 | EOT | ||
| 27 | } | ||
diff --git a/terraform/envs/agent/variables.tf b/terraform/envs/agent/variables.tf new file mode 100644 index 0000000..bf005b9 --- /dev/null +++ b/terraform/envs/agent/variables.tf | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | variable "kubeconfig" { | ||
| 2 | type = string | ||
| 3 | default = "~/.kube/config" | ||
| 4 | } | ||
| 5 | |||
| 6 | variable "kube_context" { | ||
| 7 | type = string | ||
| 8 | default = "kind-llm-local" | ||
| 9 | } | ||
| 10 | |||
| 11 | variable "agent_source_path" { | ||
| 12 | type = string | ||
| 13 | description = "Absolute path to agent/agent.py" | ||
| 14 | } | ||
diff --git a/terraform/envs/agent/versions.tf b/terraform/envs/agent/versions.tf new file mode 100644 index 0000000..69cf77e --- /dev/null +++ b/terraform/envs/agent/versions.tf | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | terraform { | ||
| 2 | required_version = ">= 1.6.0" | ||
| 3 | required_providers { | ||
| 4 | kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } | ||
| 5 | } | ||
| 6 | } | ||
diff --git a/terraform/envs/bootstrap/.terraform.lock.hcl b/terraform/envs/bootstrap/.terraform.lock.hcl new file mode 100644 index 0000000..baa0088 --- /dev/null +++ b/terraform/envs/bootstrap/.terraform.lock.hcl | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | # This file is maintained automatically by "tofu init". | ||
| 2 | # Manual edits may be lost in future updates. | ||
| 3 | |||
| 4 | provider "registry.opentofu.org/hashicorp/helm" { | ||
| 5 | version = "2.17.0" | ||
| 6 | constraints = "~> 2.17" | ||
| 7 | hashes = [ | ||
| 8 | "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=", | ||
| 9 | "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b", | ||
| 10 | "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a", | ||
| 11 | "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0", | ||
| 12 | "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe", | ||
| 13 | "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4", | ||
| 14 | "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25", | ||
| 15 | "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d", | ||
| 16 | "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978", | ||
| 17 | "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb", | ||
| 18 | "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0", | ||
| 19 | ] | ||
| 20 | } | ||
| 21 | |||
| 22 | provider "registry.opentofu.org/hashicorp/kubernetes" { | ||
| 23 | version = "2.38.0" | ||
| 24 | constraints = "~> 2.31" | ||
| 25 | hashes = [ | ||
| 26 | "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", | ||
| 27 | "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", | ||
| 28 | "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", | ||
| 29 | "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", | ||
| 30 | "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", | ||
| 31 | "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", | ||
| 32 | "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", | ||
| 33 | "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", | ||
| 34 | "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", | ||
| 35 | "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", | ||
| 36 | ] | ||
| 37 | } | ||
diff --git a/terraform/envs/bootstrap/backend.tf b/terraform/envs/bootstrap/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/bootstrap/backend.tf | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | terraform { | ||
| 2 | backend "local" { | ||
| 3 | path = "terraform.tfstate" | ||
| 4 | } | ||
| 5 | } | ||
diff --git a/terraform/envs/bootstrap/main.tf b/terraform/envs/bootstrap/main.tf new file mode 100644 index 0000000..07bf04d --- /dev/null +++ b/terraform/envs/bootstrap/main.tf | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | provider "kubernetes" { | ||
| 2 | config_path = pathexpand(var.kubeconfig) | ||
| 3 | config_context = var.kube_context | ||
| 4 | } | ||
| 5 | |||
| 6 | provider "helm" { | ||
| 7 | kubernetes { | ||
| 8 | config_path = pathexpand(var.kubeconfig) | ||
| 9 | config_context = var.kube_context | ||
| 10 | } | ||
| 11 | } | ||
| 12 | |||
| 13 | module "observability" { | ||
| 14 | source = "../../modules/observability" | ||
| 15 | namespace = "monitoring" | ||
| 16 | grafana_admin_password = var.grafana_admin_password | ||
| 17 | } | ||
| 18 | |||
| 19 | output "grafana" { | ||
| 20 | value = module.observability.grafana_service | ||
| 21 | } | ||
| 22 | |||
| 23 | output "prometheus" { | ||
| 24 | value = module.observability.prometheus_service | ||
| 25 | } | ||
diff --git a/terraform/envs/bootstrap/variables.tf b/terraform/envs/bootstrap/variables.tf new file mode 100644 index 0000000..220bed3 --- /dev/null +++ b/terraform/envs/bootstrap/variables.tf | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | variable "kubeconfig" { | ||
| 2 | type = string | ||
| 3 | default = "~/.kube/config" | ||
| 4 | } | ||
| 5 | |||
| 6 | variable "kube_context" { | ||
| 7 | type = string | ||
| 8 | default = "kind-llm-local" | ||
| 9 | } | ||
| 10 | |||
| 11 | variable "grafana_admin_password" { | ||
| 12 | type = string | ||
| 13 | default = "admin" | ||
| 14 | sensitive = true | ||
| 15 | } | ||
diff --git a/terraform/envs/bootstrap/versions.tf b/terraform/envs/bootstrap/versions.tf new file mode 100644 index 0000000..0d7f77b --- /dev/null +++ b/terraform/envs/bootstrap/versions.tf | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | terraform { | ||
| 2 | required_version = ">= 1.6.0" | ||
| 3 | required_providers { | ||
| 4 | helm = { source = "hashicorp/helm", version = "~> 2.17" } | ||
| 5 | kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } | ||
| 6 | } | ||
| 7 | } | ||
diff --git a/terraform/envs/dev/.terraform.lock.hcl b/terraform/envs/dev/.terraform.lock.hcl new file mode 100644 index 0000000..09902a1 --- /dev/null +++ b/terraform/envs/dev/.terraform.lock.hcl | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | # This file is maintained automatically by "tofu init". | ||
| 2 | # Manual edits may be lost in future updates. | ||
| 3 | |||
| 4 | provider "registry.opentofu.org/hashicorp/helm" { | ||
| 5 | version = "2.17.0" | ||
| 6 | constraints = "~> 2.17" | ||
| 7 | hashes = [ | ||
| 8 | "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=", | ||
| 9 | "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b", | ||
| 10 | "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a", | ||
| 11 | "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0", | ||
| 12 | "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe", | ||
| 13 | "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4", | ||
| 14 | "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25", | ||
| 15 | "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d", | ||
| 16 | "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978", | ||
| 17 | "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb", | ||
| 18 | "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0", | ||
| 19 | ] | ||
| 20 | } | ||
| 21 | |||
| 22 | provider "registry.opentofu.org/hashicorp/kubernetes" { | ||
| 23 | version = "2.38.0" | ||
| 24 | constraints = "~> 2.31" | ||
| 25 | hashes = [ | ||
| 26 | "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", | ||
| 27 | "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", | ||
| 28 | "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", | ||
| 29 | "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", | ||
| 30 | "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", | ||
| 31 | "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", | ||
| 32 | "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", | ||
| 33 | "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", | ||
| 34 | "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", | ||
| 35 | "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", | ||
| 36 | ] | ||
| 37 | } | ||
| 38 | |||
| 39 | provider "registry.opentofu.org/hashicorp/random" { | ||
| 40 | version = "3.8.1" | ||
| 41 | constraints = "~> 3.6" | ||
| 42 | hashes = [ | ||
| 43 | "h1:EHn3jsqOKhWjbg0X+psk0Ww96yz3N7ASqEKKuFvDFwo=", | ||
| 44 | "zh:25c458c7c676f15705e872202dad7dcd0982e4a48e7ea1800afa5fc64e77f4c8", | ||
| 45 | "zh:2edeaf6f1b20435b2f81855ad98a2e70956d473be9e52a5fdf57ccd0098ba476", | ||
| 46 | "zh:44becb9d5f75d55e36dfed0c5beabaf4c92e0a2bc61a3814d698271c646d48e7", | ||
| 47 | "zh:7699032612c3b16cc69928add8973de47b10ce81b1141f30644a0e8a895b5cd3", | ||
| 48 | "zh:86d07aa98d17703de9fbf402c89590dc1e01dbe5671dd6bc5e487eb8fe87eee0", | ||
| 49 | "zh:8c411c77b8390a49a8a1bc9f176529e6b32369dd33a723606c8533e5ca4d68c1", | ||
| 50 | "zh:a5ecc8255a612652a56b28149994985e2c4dc046e5d34d416d47fa7767f5c28f", | ||
| 51 | "zh:aea3fe1a5669b932eda9c5c72e5f327db8da707fe514aaca0d0ef60cb24892f9", | ||
| 52 | "zh:f56e26e6977f755d7ae56fa6320af96ecf4bb09580d47cb481efbf27f1c5afff", | ||
| 53 | ] | ||
| 54 | } | ||
diff --git a/terraform/envs/dev/backend.tf b/terraform/envs/dev/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/dev/backend.tf | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | terraform { | ||
| 2 | backend "local" { | ||
| 3 | path = "terraform.tfstate" | ||
| 4 | } | ||
| 5 | } | ||
diff --git a/terraform/envs/dev/main.tf b/terraform/envs/dev/main.tf new file mode 100644 index 0000000..8e1b882 --- /dev/null +++ b/terraform/envs/dev/main.tf | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | provider "kubernetes" { | ||
| 2 | config_path = pathexpand(var.kubeconfig) | ||
| 3 | config_context = var.kube_context | ||
| 4 | } | ||
| 5 | |||
| 6 | provider "helm" { | ||
| 7 | kubernetes { | ||
| 8 | config_path = pathexpand(var.kubeconfig) | ||
| 9 | config_context = var.kube_context | ||
| 10 | } | ||
| 11 | } | ||
| 12 | |||
| 13 | locals { | ||
| 14 | env = "dev" | ||
| 15 | } | ||
| 16 | |||
| 17 | module "llm" { | ||
| 18 | source = "../../modules/llm" | ||
| 19 | |||
| 20 | release_name = "llm" | ||
| 21 | namespace = "llm-${local.env}" | ||
| 22 | chart_path = var.chart_path | ||
| 23 | |||
| 24 | replicas = 2 | ||
| 25 | |||
| 26 | model_name = "Qwen/Qwen2.5-0.5B-Instruct" | ||
| 27 | model_alias = "Qwen2.5-0.5B-Instruct" | ||
| 28 | max_model_len = 2048 | ||
| 29 | dtype = "bfloat16" | ||
| 30 | omp_threads = 4 | ||
| 31 | |||
| 32 | resources = { | ||
| 33 | requests = { cpu = "1", memory = "2Gi" } | ||
| 34 | limits = { cpu = "4", memory = "6Gi" } | ||
| 35 | } | ||
| 36 | |||
| 37 | ingress_host = "llm.dev.localtest.me" | ||
| 38 | image_tag = "latest" | ||
| 39 | } | ||
| 40 | |||
| 41 | output "ingress_host" { value = module.llm.ingress_host } | ||
| 42 | output "service_dns" { value = module.llm.service_dns } | ||
| 43 | output "curl_example" { | ||
| 44 | value = <<-EOT | ||
| 45 | curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \ | ||
| 46 | -H 'Content-Type: application/json' \ | ||
| 47 | -d '{"model":"Qwen2.5-0.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}' | ||
| 48 | EOT | ||
| 49 | } | ||
diff --git a/terraform/envs/dev/variables.tf b/terraform/envs/dev/variables.tf new file mode 100644 index 0000000..9f1b697 --- /dev/null +++ b/terraform/envs/dev/variables.tf | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | variable "kubeconfig" { | ||
| 2 | type = string | ||
| 3 | default = "~/.kube/config" | ||
| 4 | } | ||
| 5 | |||
| 6 | variable "kube_context" { | ||
| 7 | type = string | ||
| 8 | default = "kind-llm-local" | ||
| 9 | } | ||
| 10 | |||
| 11 | variable "chart_path" { | ||
| 12 | type = string | ||
| 13 | description = "Absolute path to charts/llm-app" | ||
| 14 | } | ||
diff --git a/terraform/envs/dev/versions.tf b/terraform/envs/dev/versions.tf new file mode 100644 index 0000000..6a87674 --- /dev/null +++ b/terraform/envs/dev/versions.tf | |||
| @@ -0,0 +1,8 @@ | |||
| 1 | terraform { | ||
| 2 | required_version = ">= 1.6.0" | ||
| 3 | required_providers { | ||
| 4 | helm = { source = "hashicorp/helm", version = "~> 2.17" } | ||
| 5 | kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } | ||
| 6 | random = { source = "hashicorp/random", version = "~> 3.6" } | ||
| 7 | } | ||
| 8 | } | ||
diff --git a/terraform/envs/prod/.terraform.lock.hcl b/terraform/envs/prod/.terraform.lock.hcl new file mode 100644 index 0000000..09902a1 --- /dev/null +++ b/terraform/envs/prod/.terraform.lock.hcl | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | # This file is maintained automatically by "tofu init". | ||
| 2 | # Manual edits may be lost in future updates. | ||
| 3 | |||
| 4 | provider "registry.opentofu.org/hashicorp/helm" { | ||
| 5 | version = "2.17.0" | ||
| 6 | constraints = "~> 2.17" | ||
| 7 | hashes = [ | ||
| 8 | "h1:69PnHoYrrDrm7C8+8PiSvRGPI55taqL14SvQR/FGM+g=", | ||
| 9 | "zh:02690815e35131a42cb9851f63a3369c216af30ad093d05b39001d43da04b56b", | ||
| 10 | "zh:27a62f12b29926387f4d71aeeee9f7ffa0ccb81a1b6066ee895716ad050d1b7a", | ||
| 11 | "zh:2d0a5babfa73604b3fefc9dab9c87f91c77fce756c2e32b294e9f1290aed26c0", | ||
| 12 | "zh:3976400ceba6dda4636e1d297e3097e1831de5628afa534a166de98a70d1dcbe", | ||
| 13 | "zh:54440ef14f342b41d75c1aded7487bfcc3f76322b75894235b47b7e89ac4bfa4", | ||
| 14 | "zh:6512e2ab9f2fa31cbb90d9249647b5c5798f62eb1215ec44da2cdaa24e38ad25", | ||
| 15 | "zh:795f327ca0b8c5368af0ed03d5d4f6da7260692b4b3ca0bd004ed542e683464d", | ||
| 16 | "zh:ba659e1d94f224bc3f1fd34cbb9d2663e3a8e734108e5a58eb49eda84b140978", | ||
| 17 | "zh:c5c8575c4458835c2acbc3d1ed5570589b14baa2525d8fbd04295c097caf41eb", | ||
| 18 | "zh:e0877a5dac3de138e61eefa26b2f5a13305a17259779465899880f70e11314e0", | ||
| 19 | ] | ||
| 20 | } | ||
| 21 | |||
| 22 | provider "registry.opentofu.org/hashicorp/kubernetes" { | ||
| 23 | version = "2.38.0" | ||
| 24 | constraints = "~> 2.31" | ||
| 25 | hashes = [ | ||
| 26 | "h1:nY7J9jFXcsRINog0KYagiWZw1GVYF9D2JmtIB7Wnrao=", | ||
| 27 | "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", | ||
| 28 | "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", | ||
| 29 | "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", | ||
| 30 | "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", | ||
| 31 | "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", | ||
| 32 | "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", | ||
| 33 | "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", | ||
| 34 | "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", | ||
| 35 | "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", | ||
| 36 | ] | ||
| 37 | } | ||
| 38 | |||
| 39 | provider "registry.opentofu.org/hashicorp/random" { | ||
| 40 | version = "3.8.1" | ||
| 41 | constraints = "~> 3.6" | ||
| 42 | hashes = [ | ||
| 43 | "h1:EHn3jsqOKhWjbg0X+psk0Ww96yz3N7ASqEKKuFvDFwo=", | ||
| 44 | "zh:25c458c7c676f15705e872202dad7dcd0982e4a48e7ea1800afa5fc64e77f4c8", | ||
| 45 | "zh:2edeaf6f1b20435b2f81855ad98a2e70956d473be9e52a5fdf57ccd0098ba476", | ||
| 46 | "zh:44becb9d5f75d55e36dfed0c5beabaf4c92e0a2bc61a3814d698271c646d48e7", | ||
| 47 | "zh:7699032612c3b16cc69928add8973de47b10ce81b1141f30644a0e8a895b5cd3", | ||
| 48 | "zh:86d07aa98d17703de9fbf402c89590dc1e01dbe5671dd6bc5e487eb8fe87eee0", | ||
| 49 | "zh:8c411c77b8390a49a8a1bc9f176529e6b32369dd33a723606c8533e5ca4d68c1", | ||
| 50 | "zh:a5ecc8255a612652a56b28149994985e2c4dc046e5d34d416d47fa7767f5c28f", | ||
| 51 | "zh:aea3fe1a5669b932eda9c5c72e5f327db8da707fe514aaca0d0ef60cb24892f9", | ||
| 52 | "zh:f56e26e6977f755d7ae56fa6320af96ecf4bb09580d47cb481efbf27f1c5afff", | ||
| 53 | ] | ||
| 54 | } | ||
diff --git a/terraform/envs/prod/backend.tf b/terraform/envs/prod/backend.tf new file mode 100644 index 0000000..3c533e6 --- /dev/null +++ b/terraform/envs/prod/backend.tf | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | terraform { | ||
| 2 | backend "local" { | ||
| 3 | path = "terraform.tfstate" | ||
| 4 | } | ||
| 5 | } | ||
diff --git a/terraform/envs/prod/main.tf b/terraform/envs/prod/main.tf new file mode 100644 index 0000000..04db31d --- /dev/null +++ b/terraform/envs/prod/main.tf | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | provider "kubernetes" { | ||
| 2 | config_path = pathexpand(var.kubeconfig) | ||
| 3 | config_context = var.kube_context | ||
| 4 | } | ||
| 5 | |||
| 6 | provider "helm" { | ||
| 7 | kubernetes { | ||
| 8 | config_path = pathexpand(var.kubeconfig) | ||
| 9 | config_context = var.kube_context | ||
| 10 | } | ||
| 11 | } | ||
| 12 | |||
| 13 | locals { | ||
| 14 | env = "prod" | ||
| 15 | } | ||
| 16 | |||
| 17 | module "llm" { | ||
| 18 | source = "../../modules/llm" | ||
| 19 | |||
| 20 | release_name = "llm" | ||
| 21 | namespace = "llm-${local.env}" | ||
| 22 | chart_path = var.chart_path | ||
| 23 | |||
| 24 | replicas = 1 | ||
| 25 | |||
| 26 | model_name = "Qwen/Qwen2.5-1.5B-Instruct" | ||
| 27 | model_alias = "Qwen2.5-1.5B-Instruct" | ||
| 28 | max_model_len = 4096 | ||
| 29 | dtype = "bfloat16" | ||
| 30 | omp_threads = 6 | ||
| 31 | |||
| 32 | resources = { | ||
| 33 | requests = { cpu = "2", memory = "4Gi" } | ||
| 34 | limits = { cpu = "6", memory = "8Gi" } | ||
| 35 | } | ||
| 36 | |||
| 37 | ingress_host = "llm.prod.localtest.me" | ||
| 38 | image_tag = "latest" | ||
| 39 | # Content-addressable pin, resolved with scripts/resolve-digests.sh on amd64. | ||
| 40 | # Per-arch digest — re-resolve on a different arch or after an upstream tag move. | ||
| 41 | # Dev intentionally runs on `:latest` so new fixes flow in without a PR. | ||
| 42 | image_digest = "sha256:bb7ed9b6c595334d78179e9d8f6490e06bf9220ed4a10b9b4e15064454ddc69e" | ||
| 43 | |||
| 44 | # Enable OpenAI tool-calling so the agent's function-call path works. | ||
| 45 | # Qwen 2.5 uses hermes-style tool parsing in vLLM. | ||
| 46 | extra_args = [ | ||
| 47 | "--enable-auto-tool-choice", | ||
| 48 | "--tool-call-parser", "hermes", | ||
| 49 | ] | ||
| 50 | |||
| 51 | hpa = { | ||
| 52 | enabled = true | ||
| 53 | min_replicas = 1 | ||
| 54 | max_replicas = 3 | ||
| 55 | # vLLM exposes `vllm:num_requests_running` as a per-pod gauge of in-flight | ||
| 56 | # requests. Scale up when >50% of pods are actively serving. | ||
| 57 | metric_name = "vllm:num_requests_running" | ||
| 58 | target_average_value = "500m" | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | output "ingress_host" { value = module.llm.ingress_host } | ||
| 63 | output "service_dns" { value = module.llm.service_dns } | ||
| 64 | output "curl_example" { | ||
| 65 | value = <<-EOT | ||
| 66 | curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \ | ||
| 67 | -H 'Content-Type: application/json' \ | ||
| 68 | -d '{"model":"Qwen2.5-1.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}' | ||
| 69 | EOT | ||
| 70 | } | ||
diff --git a/terraform/envs/prod/variables.tf b/terraform/envs/prod/variables.tf new file mode 100644 index 0000000..9f1b697 --- /dev/null +++ b/terraform/envs/prod/variables.tf | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | variable "kubeconfig" { | ||
| 2 | type = string | ||
| 3 | default = "~/.kube/config" | ||
| 4 | } | ||
| 5 | |||
| 6 | variable "kube_context" { | ||
| 7 | type = string | ||
| 8 | default = "kind-llm-local" | ||
| 9 | } | ||
| 10 | |||
| 11 | variable "chart_path" { | ||
| 12 | type = string | ||
| 13 | description = "Absolute path to charts/llm-app" | ||
| 14 | } | ||
diff --git a/terraform/envs/prod/versions.tf b/terraform/envs/prod/versions.tf new file mode 100644 index 0000000..6a87674 --- /dev/null +++ b/terraform/envs/prod/versions.tf | |||
| @@ -0,0 +1,8 @@ | |||
| 1 | terraform { | ||
| 2 | required_version = ">= 1.6.0" | ||
| 3 | required_providers { | ||
| 4 | helm = { source = "hashicorp/helm", version = "~> 2.17" } | ||
| 5 | kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.31" } | ||
| 6 | random = { source = "hashicorp/random", version = "~> 3.6" } | ||
| 7 | } | ||
| 8 | } | ||
