From d3e770254de0bb301815ca87257c8b1a357d06c4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 26 Apr 2026 21:02:47 +0800 Subject: hehe --- charts/llm-app/templates/deployment.yaml | 76 ++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 charts/llm-app/templates/deployment.yaml (limited to 'charts/llm-app/templates/deployment.yaml') diff --git a/charts/llm-app/templates/deployment.yaml b/charts/llm-app/templates/deployment.yaml new file mode 100644 index 0000000..12677b5 --- /dev/null +++ b/charts/llm-app/templates/deployment.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "llm-app.fullname" . }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: {{- include "llm-app.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: {{- include "llm-app.selectorLabels" . | nindent 8 }} + spec: + containers: + - name: vllm-server + # Image entrypoint is already `vllm serve`; args start with the model tag. + image: "{{ .Values.image.repository }}{{ if .Values.image.digest }}@{{ .Values.image.digest }}{{ else }}:{{ .Values.image.tag }}{{ end }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - {{ .Values.model.name | quote }} + - "--host" + - "0.0.0.0" + - "--port" + - {{ .Values.server.port | quote }} + - "--served-model-name" + - {{ .Values.model.alias | quote }} + - "--max-model-len" + - {{ .Values.model.maxModelLen | quote }} + - "--dtype" + - {{ .Values.model.dtype | quote }} + {{- with .Values.server.extraArgs }} + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: HF_HOME + value: /cache/huggingface + - name: VLLM_CPU_KVCACHE_SPACE + value: "2" + {{- if gt (int .Values.server.ompThreads) 0 }} + - name: OMP_NUM_THREADS + value: {{ .Values.server.ompThreads | quote }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.server.port }} + protocol: TCP + readinessProbe: + httpGet: + path: /health + port: http + # vLLM CPU cold-start is ~2 min + HF download on first boot. + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 180 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 600 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 6 + resources: {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: cache + mountPath: /cache + - name: shm + mountPath: /dev/shm + volumes: + - name: cache + emptyDir: + sizeLimit: {{ .Values.modelCache.sizeLimit }} + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi -- cgit