diff options
| author | Your Name <you@example.com> | 2026-04-26 21:02:47 +0800 |
|---|---|---|
| committer | Your Name <you@example.com> | 2026-04-26 21:02:47 +0800 |
| commit | d3e770254de0bb301815ca87257c8b1a357d06c4 (patch) | |
| tree | 358c814be2a06b9e2009905f14938243286b8d82 /terraform/envs/dev/main.tf | |
Diffstat (limited to 'terraform/envs/dev/main.tf')
| -rw-r--r-- | terraform/envs/dev/main.tf | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/terraform/envs/dev/main.tf b/terraform/envs/dev/main.tf new file mode 100644 index 0000000..8e1b882 --- /dev/null +++ b/terraform/envs/dev/main.tf | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | provider "kubernetes" { | ||
| 2 | config_path = pathexpand(var.kubeconfig) | ||
| 3 | config_context = var.kube_context | ||
| 4 | } | ||
| 5 | |||
| 6 | provider "helm" { | ||
| 7 | kubernetes { | ||
| 8 | config_path = pathexpand(var.kubeconfig) | ||
| 9 | config_context = var.kube_context | ||
| 10 | } | ||
| 11 | } | ||
| 12 | |||
| 13 | locals { | ||
| 14 | env = "dev" | ||
| 15 | } | ||
| 16 | |||
| 17 | module "llm" { | ||
| 18 | source = "../../modules/llm" | ||
| 19 | |||
| 20 | release_name = "llm" | ||
| 21 | namespace = "llm-${local.env}" | ||
| 22 | chart_path = var.chart_path | ||
| 23 | |||
| 24 | replicas = 2 | ||
| 25 | |||
| 26 | model_name = "Qwen/Qwen2.5-0.5B-Instruct" | ||
| 27 | model_alias = "Qwen2.5-0.5B-Instruct" | ||
| 28 | max_model_len = 2048 | ||
| 29 | dtype = "bfloat16" | ||
| 30 | omp_threads = 4 | ||
| 31 | |||
| 32 | resources = { | ||
| 33 | requests = { cpu = "1", memory = "2Gi" } | ||
| 34 | limits = { cpu = "4", memory = "6Gi" } | ||
| 35 | } | ||
| 36 | |||
| 37 | ingress_host = "llm.dev.localtest.me" | ||
| 38 | image_tag = "latest" | ||
| 39 | } | ||
| 40 | |||
| 41 | output "ingress_host" { value = module.llm.ingress_host } | ||
| 42 | output "service_dns" { value = module.llm.service_dns } | ||
| 43 | output "curl_example" { | ||
| 44 | value = <<-EOT | ||
| 45 | curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \ | ||
| 46 | -H 'Content-Type: application/json' \ | ||
| 47 | -d '{"model":"Qwen2.5-0.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}' | ||
| 48 | EOT | ||
| 49 | } | ||
