summaryrefslogtreecommitdiff
path: root/terraform/modules/agent/main.tf
blob: f53acdcc45f11294099acde5caf20daeb9978b5a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
resource "kubernetes_namespace_v1" "agent" {
  metadata {
    name = var.namespace
    labels = {
      "app.kubernetes.io/part-of" = "llm-platform"
    }
  }
}

resource "kubernetes_deployment_v1" "agent" {
  metadata {
    name      = "agent"
    namespace = kubernetes_namespace_v1.agent.metadata[0].name
    labels    = { app = "agent" }
  }
  spec {
    replicas = 1
    selector {
      match_labels = { app = "agent" }
    }
    template {
      metadata {
        labels = { app = "agent" }
        annotations = {
          # Bounce the pod when agent.py changes on disk, even if image tag is unchanged.
          "checksum/code" = substr(sha256(file(var.agent_source_path)), 0, 16)
        }
      }
      spec {
        container {
          name              = "agent"
          image             = var.agent_image
          image_pull_policy = "IfNotPresent"
          env {
            name  = "OPENAI_BASE_URL"
            value = var.llm_service_url
          }
          env {
            name  = "MODEL"
            value = var.model_alias
          }
          port {
            name           = "http"
            container_port = 8001
          }
          readiness_probe {
            http_get {
              path = "/health"
              port = "http"
            }
            initial_delay_seconds = 3
            period_seconds        = 5
            failure_threshold     = 10
          }
          liveness_probe {
            http_get {
              path = "/health"
              port = "http"
            }
            initial_delay_seconds = 30
            period_seconds        = 30
          }
          resources {
            requests = { cpu = "100m", memory = "128Mi" }
            limits   = { cpu = "1",    memory = "512Mi" }
          }
        }
      }
    }
  }
}

resource "kubernetes_service_v1" "agent" {
  metadata {
    name      = "agent"
    namespace = kubernetes_namespace_v1.agent.metadata[0].name
    labels    = { app = "agent" }
  }
  spec {
    selector = { app = "agent" }
    port {
      name        = "http"
      port        = 8001
      target_port = "http"
    }
  }
}

resource "kubernetes_ingress_v1" "agent" {
  metadata {
    name      = "agent"
    namespace = kubernetes_namespace_v1.agent.metadata[0].name
  }
  spec {
    ingress_class_name = var.ingress_class
    rule {
      host = var.ingress_host
      http {
        path {
          path      = "/"
          path_type = "Prefix"
          backend {
            service {
              name = kubernetes_service_v1.agent.metadata[0].name
              port {
                number = 8001
              }
            }
          }
        }
      }
    }
  }
}