summaryrefslogtreecommitdiff
path: root/terraform/envs/dev/main.tf
diff options
context:
space:
mode:
Diffstat (limited to 'terraform/envs/dev/main.tf')
-rw-r--r--terraform/envs/dev/main.tf49
1 files changed, 49 insertions, 0 deletions
diff --git a/terraform/envs/dev/main.tf b/terraform/envs/dev/main.tf
new file mode 100644
index 0000000..8e1b882
--- /dev/null
+++ b/terraform/envs/dev/main.tf
@@ -0,0 +1,49 @@
1provider "kubernetes" {
2 config_path = pathexpand(var.kubeconfig)
3 config_context = var.kube_context
4}
5
6provider "helm" {
7 kubernetes {
8 config_path = pathexpand(var.kubeconfig)
9 config_context = var.kube_context
10 }
11}
12
13locals {
14 env = "dev"
15}
16
17module "llm" {
18 source = "../../modules/llm"
19
20 release_name = "llm"
21 namespace = "llm-${local.env}"
22 chart_path = var.chart_path
23
24 replicas = 2
25
26 model_name = "Qwen/Qwen2.5-0.5B-Instruct"
27 model_alias = "Qwen2.5-0.5B-Instruct"
28 max_model_len = 2048
29 dtype = "bfloat16"
30 omp_threads = 4
31
32 resources = {
33 requests = { cpu = "1", memory = "2Gi" }
34 limits = { cpu = "4", memory = "6Gi" }
35 }
36
37 ingress_host = "llm.dev.localtest.me"
38 image_tag = "latest"
39}
40
41output "ingress_host" { value = module.llm.ingress_host }
42output "service_dns" { value = module.llm.service_dns }
43output "curl_example" {
44 value = <<-EOT
45 curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \
46 -H 'Content-Type: application/json' \
47 -d '{"model":"Qwen2.5-0.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}'
48 EOT
49}