1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
"""Tool-using agent over an OpenAI-compatible backend.
Uses the standard OpenAI tools API (function calling). vLLM maps this to the
model's native tool-call template (Qwen here), so small models follow the
protocol much more reliably than a hand-rolled text convention.
POST /ask {"question": "..."} -> {"answer": "...", "transcript": [...]}
GET /health -> "ok"
"""
import json
import os
import re
from http.server import BaseHTTPRequestHandler, HTTPServer
from openai import OpenAI
client = OpenAI(
base_url=os.environ["OPENAI_BASE_URL"],
api_key=os.environ.get("OPENAI_API_KEY", "sk-local"),
)
MODEL = os.environ.get("MODEL", "Qwen2.5-1.5B-Instruct")
MAX_STEPS = int(os.environ.get("MAX_STEPS", "6"))
SYSTEM = (
"You are a careful math assistant. When the user asks any arithmetic question, "
"call the 'calc' tool with the exact expression. Do not compute arithmetic in your head. "
"After you receive the tool result, give a concise final answer."
)
TOOLS = [
{
"type": "function",
"function": {
"name": "calc",
"description": "Evaluate a safe arithmetic expression and return the numeric result.",
"parameters": {
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "Arithmetic expression using only digits, spaces, and + - * / . ( )",
}
},
"required": ["expression"],
},
},
}
]
SAFE_EXPR = re.compile(r"^[\d\s+\-*/().]+$")
def calc(expression: str) -> str:
if not SAFE_EXPR.fullmatch(expression):
return "ERROR: disallowed characters"
try:
return str(eval(expression, {"__builtins__": {}}, {})) # noqa: S307
except Exception as e:
return f"ERROR: {e}"
def run_agent(question: str) -> dict:
messages = [
{"role": "system", "content": SYSTEM},
{"role": "user", "content": question},
]
transcript: list = []
for step in range(MAX_STEPS):
resp = client.chat.completions.create(
model=MODEL,
messages=messages,
tools=TOOLS,
tool_choice="auto",
temperature=0.0,
max_tokens=256,
)
msg = resp.choices[0].message
# Always append the assistant message (with any tool_calls) to history.
assistant_entry = {"role": "assistant", "content": msg.content or ""}
if msg.tool_calls:
assistant_entry["tool_calls"] = [
{
"id": tc.id,
"type": "function",
"function": {"name": tc.function.name, "arguments": tc.function.arguments},
}
for tc in msg.tool_calls
]
messages.append(assistant_entry)
transcript.append(
{
"step": step + 1,
"content": msg.content,
"tool_calls": [
{"name": tc.function.name, "arguments": tc.function.arguments}
for tc in (msg.tool_calls or [])
],
}
)
if msg.tool_calls:
for tc in msg.tool_calls:
if tc.function.name != "calc":
result = f"ERROR: unknown tool {tc.function.name}"
else:
try:
args = json.loads(tc.function.arguments)
except json.JSONDecodeError:
result = "ERROR: bad JSON arguments"
else:
result = calc(args.get("expression", ""))
transcript.append({"tool_result": {"name": tc.function.name, "result": result}})
messages.append(
{"role": "tool", "tool_call_id": tc.id, "content": result}
)
continue
# No tool call -> model produced a final answer.
return {"answer": (msg.content or "").strip(), "steps": step + 1, "transcript": transcript}
return {"answer": None, "steps": MAX_STEPS, "note": "MAX_STEPS reached", "transcript": transcript}
class Handler(BaseHTTPRequestHandler):
def do_POST(self): # noqa: N802
if self.path != "/ask":
self.send_response(404); self.end_headers(); return
n = int(self.headers.get("Content-Length", "0"))
try:
body = json.loads(self.rfile.read(n) or b"{}")
except json.JSONDecodeError:
self.send_response(400); self.end_headers(); self.wfile.write(b'{"error":"invalid json"}'); return
q = body.get("question", "")
try:
result = run_agent(q)
code = 200
except Exception as e:
result = {"error": str(e), "type": type(e).__name__}
code = 500
payload = json.dumps(result).encode()
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(payload)))
self.end_headers()
self.wfile.write(payload)
def do_GET(self): # noqa: N802
if self.path == "/health":
self.send_response(200); self.end_headers(); self.wfile.write(b"ok"); return
self.send_response(404); self.end_headers()
def log_message(self, fmt, *args):
import sys
print(f"{self.address_string()} {fmt % args}", file=sys.stderr)
if __name__ == "__main__":
print(f"agent starting on :8001, model={MODEL}, backend={os.environ['OPENAI_BASE_URL']}")
HTTPServer(("0.0.0.0", 8001), Handler).serve_forever()
|