summaryrefslogtreecommitdiff
path: root/charts/llm-app/templates
diff options
context:
space:
mode:
Diffstat (limited to 'charts/llm-app/templates')
-rw-r--r--charts/llm-app/templates/_helpers.tpl8
-rw-r--r--charts/llm-app/templates/deployment.yaml76
-rw-r--r--charts/llm-app/templates/ingress.yaml19
-rw-r--r--charts/llm-app/templates/service.yaml13
-rw-r--r--charts/llm-app/templates/servicemonitor.yaml19
-rw-r--r--charts/llm-app/templates/smoketest-job.yaml32
6 files changed, 167 insertions, 0 deletions
diff --git a/charts/llm-app/templates/_helpers.tpl b/charts/llm-app/templates/_helpers.tpl
new file mode 100644
index 0000000..8b104de
--- /dev/null
+++ b/charts/llm-app/templates/_helpers.tpl
@@ -0,0 +1,8 @@
1{{- define "llm-app.fullname" -}}
2{{- printf "%s-%s" .Release.Name .Chart.Name | trunc 63 | trimSuffix "-" -}}
3{{- end -}}
4
5{{- define "llm-app.selectorLabels" -}}
6app.kubernetes.io/name: {{ .Chart.Name }}
7app.kubernetes.io/instance: {{ .Release.Name }}
8{{- end -}}
diff --git a/charts/llm-app/templates/deployment.yaml b/charts/llm-app/templates/deployment.yaml
new file mode 100644
index 0000000..12677b5
--- /dev/null
+++ b/charts/llm-app/templates/deployment.yaml
@@ -0,0 +1,76 @@
1apiVersion: apps/v1
2kind: Deployment
3metadata:
4 name: {{ include "llm-app.fullname" . }}
5spec:
6 replicas: {{ .Values.replicaCount }}
7 selector:
8 matchLabels: {{- include "llm-app.selectorLabels" . | nindent 6 }}
9 template:
10 metadata:
11 labels: {{- include "llm-app.selectorLabels" . | nindent 8 }}
12 spec:
13 containers:
14 - name: vllm-server
15 # Image entrypoint is already `vllm serve`; args start with the model tag.
16 image: "{{ .Values.image.repository }}{{ if .Values.image.digest }}@{{ .Values.image.digest }}{{ else }}:{{ .Values.image.tag }}{{ end }}"
17 imagePullPolicy: {{ .Values.image.pullPolicy }}
18 args:
19 - {{ .Values.model.name | quote }}
20 - "--host"
21 - "0.0.0.0"
22 - "--port"
23 - {{ .Values.server.port | quote }}
24 - "--served-model-name"
25 - {{ .Values.model.alias | quote }}
26 - "--max-model-len"
27 - {{ .Values.model.maxModelLen | quote }}
28 - "--dtype"
29 - {{ .Values.model.dtype | quote }}
30 {{- with .Values.server.extraArgs }}
31 {{- toYaml . | nindent 12 }}
32 {{- end }}
33 env:
34 - name: HF_HOME
35 value: /cache/huggingface
36 - name: VLLM_CPU_KVCACHE_SPACE
37 value: "2"
38 {{- if gt (int .Values.server.ompThreads) 0 }}
39 - name: OMP_NUM_THREADS
40 value: {{ .Values.server.ompThreads | quote }}
41 {{- end }}
42 ports:
43 - name: http
44 containerPort: {{ .Values.server.port }}
45 protocol: TCP
46 readinessProbe:
47 httpGet:
48 path: /health
49 port: http
50 # vLLM CPU cold-start is ~2 min + HF download on first boot.
51 initialDelaySeconds: 60
52 periodSeconds: 10
53 timeoutSeconds: 5
54 failureThreshold: 180
55 livenessProbe:
56 httpGet:
57 path: /health
58 port: http
59 initialDelaySeconds: 600
60 periodSeconds: 30
61 timeoutSeconds: 5
62 failureThreshold: 6
63 resources: {{- toYaml .Values.resources | nindent 12 }}
64 volumeMounts:
65 - name: cache
66 mountPath: /cache
67 - name: shm
68 mountPath: /dev/shm
69 volumes:
70 - name: cache
71 emptyDir:
72 sizeLimit: {{ .Values.modelCache.sizeLimit }}
73 - name: shm
74 emptyDir:
75 medium: Memory
76 sizeLimit: 1Gi
diff --git a/charts/llm-app/templates/ingress.yaml b/charts/llm-app/templates/ingress.yaml
new file mode 100644
index 0000000..f3a6ded
--- /dev/null
+++ b/charts/llm-app/templates/ingress.yaml
@@ -0,0 +1,19 @@
1{{- if .Values.ingress.enabled -}}
2apiVersion: networking.k8s.io/v1
3kind: Ingress
4metadata:
5 name: {{ include "llm-app.fullname" . }}
6spec:
7 ingressClassName: {{ .Values.ingress.className }}
8 rules:
9 - host: {{ .Values.ingress.host | quote }}
10 http:
11 paths:
12 - path: /
13 pathType: Prefix
14 backend:
15 service:
16 name: {{ include "llm-app.fullname" . }}
17 port:
18 number: {{ .Values.service.port }}
19{{- end }}
diff --git a/charts/llm-app/templates/service.yaml b/charts/llm-app/templates/service.yaml
new file mode 100644
index 0000000..6350996
--- /dev/null
+++ b/charts/llm-app/templates/service.yaml
@@ -0,0 +1,13 @@
1apiVersion: v1
2kind: Service
3metadata:
4 name: {{ include "llm-app.fullname" . }}
5 labels: {{- include "llm-app.selectorLabels" . | nindent 4 }}
6spec:
7 type: {{ .Values.service.type }}
8 ports:
9 - name: http
10 port: {{ .Values.service.port }}
11 targetPort: http
12 protocol: TCP
13 selector: {{- include "llm-app.selectorLabels" . | nindent 4 }}
diff --git a/charts/llm-app/templates/servicemonitor.yaml b/charts/llm-app/templates/servicemonitor.yaml
new file mode 100644
index 0000000..264e766
--- /dev/null
+++ b/charts/llm-app/templates/servicemonitor.yaml
@@ -0,0 +1,19 @@
1{{- if .Values.monitoring.serviceMonitor.enabled -}}
2apiVersion: monitoring.coreos.com/v1
3kind: ServiceMonitor
4metadata:
5 name: {{ include "llm-app.fullname" . }}
6 {{- with .Values.monitoring.serviceMonitor.labels }}
7 labels: {{- toYaml . | nindent 4 }}
8 {{- end }}
9spec:
10 selector:
11 matchLabels: {{- include "llm-app.selectorLabels" . | nindent 6 }}
12 endpoints:
13 - port: http
14 path: /metrics
15 interval: {{ .Values.monitoring.serviceMonitor.interval }}
16 namespaceSelector:
17 matchNames:
18 - {{ .Release.Namespace }}
19{{- end }}
diff --git a/charts/llm-app/templates/smoketest-job.yaml b/charts/llm-app/templates/smoketest-job.yaml
new file mode 100644
index 0000000..ac97f33
--- /dev/null
+++ b/charts/llm-app/templates/smoketest-job.yaml
@@ -0,0 +1,32 @@
1apiVersion: batch/v1
2kind: Job
3metadata:
4 name: {{ include "llm-app.fullname" . }}-smoketest
5 annotations:
6 "helm.sh/hook": post-install,post-upgrade
7 "helm.sh/hook-weight": "10"
8 "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
9spec:
10 backoffLimit: 2
11 activeDeadlineSeconds: 240
12 ttlSecondsAfterFinished: 600
13 template:
14 spec:
15 restartPolicy: Never
16 containers:
17 - name: curl
18 image: curlimages/curl:8.10.1
19 command: ["/bin/sh", "-euc"]
20 args:
21 - |
22 ENDPOINT="http://{{ include "llm-app.fullname" . }}:{{ .Values.service.port }}"
23 MODEL={{ .Values.model.alias | quote }}
24 echo "smoketest: GET $ENDPOINT/v1/models"
25 out=$(curl -fsS --max-time 60 "$ENDPOINT/v1/models")
26 echo "$out" | grep -q "\"$MODEL\"" || { echo "FAIL: $MODEL not listed in /v1/models"; echo "$out"; exit 1; }
27 echo "smoketest: POST $ENDPOINT/v1/chat/completions"
28 resp=$(curl -fsS --max-time 90 "$ENDPOINT/v1/chat/completions" \
29 -H "Content-Type: application/json" \
30 -d "{\"model\":\"$MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with just: pong\"}],\"max_tokens\":8,\"temperature\":0}")
31 echo "$resp" | grep -q '"content"' || { echo "FAIL: no content in response"; echo "$resp"; exit 1; }
32 echo "OK"