From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- terraform/envs/dev/main.tf | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 terraform/envs/dev/main.tf (limited to 'terraform/envs/dev/main.tf') diff --git a/terraform/envs/dev/main.tf b/terraform/envs/dev/main.tf new file mode 100644 index 0000000..8e1b882 --- /dev/null +++ b/terraform/envs/dev/main.tf @@ -0,0 +1,49 @@ +provider "kubernetes" { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context +} + +provider "helm" { + kubernetes { + config_path = pathexpand(var.kubeconfig) + config_context = var.kube_context + } +} + +locals { + env = "dev" +} + +module "llm" { + source = "../../modules/llm" + + release_name = "llm" + namespace = "llm-${local.env}" + chart_path = var.chart_path + + replicas = 2 + + model_name = "Qwen/Qwen2.5-0.5B-Instruct" + model_alias = "Qwen2.5-0.5B-Instruct" + max_model_len = 2048 + dtype = "bfloat16" + omp_threads = 4 + + resources = { + requests = { cpu = "1", memory = "2Gi" } + limits = { cpu = "4", memory = "6Gi" } + } + + ingress_host = "llm.dev.localtest.me" + image_tag = "latest" +} + +output "ingress_host" { value = module.llm.ingress_host } +output "service_dns" { value = module.llm.service_dns } +output "curl_example" { + value = <<-EOT + curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -d '{"model":"Qwen2.5-0.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}' + EOT +} -- cgit