blob: 8e1b8825add20ae713fa789c9d068f2da2cdf348 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
provider "kubernetes" {
config_path = pathexpand(var.kubeconfig)
config_context = var.kube_context
}
provider "helm" {
kubernetes {
config_path = pathexpand(var.kubeconfig)
config_context = var.kube_context
}
}
locals {
env = "dev"
}
module "llm" {
source = "../../modules/llm"
release_name = "llm"
namespace = "llm-${local.env}"
chart_path = var.chart_path
replicas = 2
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
model_alias = "Qwen2.5-0.5B-Instruct"
max_model_len = 2048
dtype = "bfloat16"
omp_threads = 4
resources = {
requests = { cpu = "1", memory = "2Gi" }
limits = { cpu = "4", memory = "6Gi" }
}
ingress_host = "llm.dev.localtest.me"
image_tag = "latest"
}
output "ingress_host" { value = module.llm.ingress_host }
output "service_dns" { value = module.llm.service_dns }
output "curl_example" {
value = <<-EOT
curl -s http://${module.llm.ingress_host}:8080/v1/chat/completions \
-H 'Content-Type: application/json' \
-d '{"model":"Qwen2.5-0.5B-Instruct","messages":[{"role":"user","content":"Say hi."}]}'
EOT
}
|