From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- charts/llm-app/templates/_helpers.tpl | 8 +++ charts/llm-app/templates/deployment.yaml | 76 ++++++++++++++++++++++++++++ charts/llm-app/templates/ingress.yaml | 19 +++++++ charts/llm-app/templates/service.yaml | 13 +++++ charts/llm-app/templates/servicemonitor.yaml | 19 +++++++ charts/llm-app/templates/smoketest-job.yaml | 32 ++++++++++++ 6 files changed, 167 insertions(+) create mode 100644 charts/llm-app/templates/_helpers.tpl create mode 100644 charts/llm-app/templates/deployment.yaml create mode 100644 charts/llm-app/templates/ingress.yaml create mode 100644 charts/llm-app/templates/service.yaml create mode 100644 charts/llm-app/templates/servicemonitor.yaml create mode 100644 charts/llm-app/templates/smoketest-job.yaml (limited to 'charts/llm-app/templates') diff --git a/charts/llm-app/templates/_helpers.tpl b/charts/llm-app/templates/_helpers.tpl new file mode 100644 index 0000000..8b104de --- /dev/null +++ b/charts/llm-app/templates/_helpers.tpl @@ -0,0 +1,8 @@ +{{- define "llm-app.fullname" -}} +{{- printf "%s-%s" .Release.Name .Chart.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "llm-app.selectorLabels" -}} +app.kubernetes.io/name: {{ .Chart.Name }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} diff --git a/charts/llm-app/templates/deployment.yaml b/charts/llm-app/templates/deployment.yaml new file mode 100644 index 0000000..12677b5 --- /dev/null +++ b/charts/llm-app/templates/deployment.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "llm-app.fullname" . }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: {{- include "llm-app.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: {{- include "llm-app.selectorLabels" . | nindent 8 }} + spec: + containers: + - name: vllm-server + # Image entrypoint is already `vllm serve`; args start with the model tag. + image: "{{ .Values.image.repository }}{{ if .Values.image.digest }}@{{ .Values.image.digest }}{{ else }}:{{ .Values.image.tag }}{{ end }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - {{ .Values.model.name | quote }} + - "--host" + - "0.0.0.0" + - "--port" + - {{ .Values.server.port | quote }} + - "--served-model-name" + - {{ .Values.model.alias | quote }} + - "--max-model-len" + - {{ .Values.model.maxModelLen | quote }} + - "--dtype" + - {{ .Values.model.dtype | quote }} + {{- with .Values.server.extraArgs }} + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: HF_HOME + value: /cache/huggingface + - name: VLLM_CPU_KVCACHE_SPACE + value: "2" + {{- if gt (int .Values.server.ompThreads) 0 }} + - name: OMP_NUM_THREADS + value: {{ .Values.server.ompThreads | quote }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.server.port }} + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: http + # vLLM CPU cold-start is ~2 min + HF download on first boot. + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 180 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 600 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 6 + resources: {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: cache + mountPath: /cache + - name: shm + mountPath: /dev/shm + volumes: + - name: cache + emptyDir: + sizeLimit: {{ .Values.modelCache.sizeLimit }} + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi diff --git a/charts/llm-app/templates/ingress.yaml b/charts/llm-app/templates/ingress.yaml new file mode 100644 index 0000000..f3a6ded --- /dev/null +++ b/charts/llm-app/templates/ingress.yaml @@ -0,0 +1,19 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "llm-app.fullname" . }} +spec: + ingressClassName: {{ .Values.ingress.className }} + rules: + - host: {{ .Values.ingress.host | quote }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ include "llm-app.fullname" . }} + port: + number: {{ .Values.service.port }} +{{- end }} diff --git a/charts/llm-app/templates/service.yaml b/charts/llm-app/templates/service.yaml new file mode 100644 index 0000000..6350996 --- /dev/null +++ b/charts/llm-app/templates/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "llm-app.fullname" . }} + labels: {{- include "llm-app.selectorLabels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - name: http + port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + selector: {{- include "llm-app.selectorLabels" . | nindent 4 }} diff --git a/charts/llm-app/templates/servicemonitor.yaml b/charts/llm-app/templates/servicemonitor.yaml new file mode 100644 index 0000000..264e766 --- /dev/null +++ b/charts/llm-app/templates/servicemonitor.yaml @@ -0,0 +1,19 @@ +{{- if .Values.monitoring.serviceMonitor.enabled -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "llm-app.fullname" . }} + {{- with .Values.monitoring.serviceMonitor.labels }} + labels: {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: {{- include "llm-app.selectorLabels" . | nindent 6 }} + endpoints: + - port: http + path: /metrics + interval: {{ .Values.monitoring.serviceMonitor.interval }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} +{{- end }} diff --git a/charts/llm-app/templates/smoketest-job.yaml b/charts/llm-app/templates/smoketest-job.yaml new file mode 100644 index 0000000..ac97f33 --- /dev/null +++ b/charts/llm-app/templates/smoketest-job.yaml @@ -0,0 +1,32 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "llm-app.fullname" . }}-smoketest + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "10" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + backoffLimit: 2 + activeDeadlineSeconds: 240 + ttlSecondsAfterFinished: 600 + template: + spec: + restartPolicy: Never + containers: + - name: curl + image: curlimages/curl:8.10.1 + command: ["/bin/sh", "-euc"] + args: + - | + ENDPOINT="http://{{ include "llm-app.fullname" . }}:{{ .Values.service.port }}" + MODEL={{ .Values.model.alias | quote }} + echo "smoketest: GET $ENDPOINT/v1/models" + out=$(curl -fsS --max-time 60 "$ENDPOINT/v1/models") + echo "$out" | grep -q "\"$MODEL\"" || { echo "FAIL: $MODEL not listed in /v1/models"; echo "$out"; exit 1; } + echo "smoketest: POST $ENDPOINT/v1/chat/completions" + resp=$(curl -fsS --max-time 90 "$ENDPOINT/v1/chat/completions" \ + -H "Content-Type: application/json" \ + -d "{\"model\":\"$MODEL\",\"messages\":[{\"role\":\"user\",\"content\":\"Reply with just: pong\"}],\"max_tokens\":8,\"temperature\":0}") + echo "$resp" | grep -q '"content"' || { echo "FAIL: no content in response"; echo "$resp"; exit 1; } + echo "OK" -- cgit