diff options
| author | Your Name <you@example.com> | 2026-04-26 21:02:47 +0800 |
|---|---|---|
| committer | Your Name <you@example.com> | 2026-04-26 21:02:47 +0800 |
| commit | d3e770254de0bb301815ca87257c8b1a357d06c4 (patch) | |
| tree | 358c814be2a06b9e2009905f14938243286b8d82 /charts/llm-app/values.yaml | |
Diffstat (limited to 'charts/llm-app/values.yaml')
| -rw-r--r-- | charts/llm-app/values.yaml | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/charts/llm-app/values.yaml b/charts/llm-app/values.yaml new file mode 100644 index 0000000..96c5c9a --- /dev/null +++ b/charts/llm-app/values.yaml | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | replicaCount: 1 | ||
| 2 | |||
| 3 | image: | ||
| 4 | # vLLM CPU-only image (no CUDA, works on AVX2+). | ||
| 5 | repository: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo | ||
| 6 | tag: latest | ||
| 7 | # Optional. If set, used in place of `tag` to pin the image by content. | ||
| 8 | # Example: "sha256:abc123...". Fill via scripts/resolve-digests.sh. | ||
| 9 | digest: "" | ||
| 10 | pullPolicy: IfNotPresent | ||
| 11 | |||
| 12 | # vLLM pulls model weights from HuggingFace at first boot into the cache volume. | ||
| 13 | # `name` is the HF repo id (also used as `--served-model-name` unless `alias` overrides). | ||
| 14 | model: | ||
| 15 | name: "Qwen/Qwen2.5-0.5B-Instruct" | ||
| 16 | alias: "Qwen2.5-0.5B-Instruct" | ||
| 17 | maxModelLen: 2048 | ||
| 18 | dtype: "bfloat16" | ||
| 19 | |||
| 20 | server: | ||
| 21 | port: 8000 | ||
| 22 | # OMP threads for the CPU backend; 0 = autodetect. | ||
| 23 | ompThreads: 0 | ||
| 24 | extraArgs: [] | ||
| 25 | |||
| 26 | resources: | ||
| 27 | requests: | ||
| 28 | cpu: "500m" | ||
| 29 | memory: "1Gi" | ||
| 30 | limits: | ||
| 31 | cpu: "2" | ||
| 32 | memory: "3Gi" | ||
| 33 | |||
| 34 | service: | ||
| 35 | type: ClusterIP | ||
| 36 | port: 8000 | ||
| 37 | |||
| 38 | ingress: | ||
| 39 | enabled: true | ||
| 40 | className: nginx | ||
| 41 | host: llm.localtest.me | ||
| 42 | |||
| 43 | monitoring: | ||
| 44 | serviceMonitor: | ||
| 45 | enabled: true | ||
| 46 | interval: 15s | ||
| 47 | labels: | ||
| 48 | release: kube-prometheus-stack | ||
| 49 | |||
| 50 | modelCache: | ||
| 51 | sizeLimit: 10Gi | ||
