feat(ai): ADR-036 Nemotron E2E 驗證腳本

新增 verify_nemotron_e2e.py:
- 測試 NVIDIA API 連線
- 測試 AIRouter 整合
- 測試高風險 Tool 檢測
- 測試繁體中文 Tool Calling

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-03-29 00:11:40 +08:00
parent 7c905c4bf3
commit fb643eb645

View File

@@ -0,0 +1,225 @@
#!/usr/bin/env python3
"""
Nemotron E2E 驗證腳本 - ADR-036
================================
2026-03-29 ogt: 驗證 NvidiaProvider 在生產環境的 Tool Calling 功能
執行方式:
cd apps/api
python scripts/verify_nemotron_e2e.py
預期結果:
- NVIDIA API 連線成功
- Tool Calling 正確解析
- Fallback 機制正常
"""
import asyncio
import json
import sys
from pathlib import Path
# 加入 src 到 path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.services.nvidia_provider import (
NvidiaProvider,
create_tool_definition,
get_nvidia_provider,
)
from src.services.ai_router import get_ai_router, AIProvider
async def test_nvidia_connection():
"""測試 NVIDIA API 連線"""
print("\n=== 測試 1: NVIDIA API 連線 ===")
provider = NvidiaProvider()
# 定義測試 Tool
restart_tool = create_tool_definition(
name="restart_pod",
description="Restart a Kubernetes pod",
parameters={
"type": "object",
"properties": {
"pod_name": {"type": "string", "description": "Name of the pod"},
"namespace": {"type": "string", "description": "Kubernetes namespace"},
},
"required": ["pod_name"],
},
)
# 測試 Tool Calling
result = await provider.tool_call(
messages=[
{"role": "user", "content": "restart the awoooi-api pod in awoooi-prod namespace"}
],
tools=[restart_tool],
)
if result.success:
print(f"✅ NVIDIA API 連線成功")
print(f" 延遲: {result.latency_ms:.0f}ms")
if result.usage:
print(f" Token: {result.usage.total_tokens}")
for tc in result.tool_calls:
if tc.valid:
print(f" Tool: {tc.tool_name}")
print(f" Args: {json.dumps(tc.arguments, ensure_ascii=False)}")
else:
print(f" ❌ 無效 Tool Call: {tc.error}")
return True
else:
print(f"❌ NVIDIA API 連線失敗: {result.error}")
if result.fallback_triggered:
print(" 已觸發 Fallback")
return False
async def test_router_integration():
"""測試 AIRouter 整合"""
print("\n=== 測試 2: AIRouter 整合 ===")
router = get_ai_router()
provider, model, fallback_chain = router.route_tool_calling()
print(f"✅ Tool Calling 路由")
print(f" Provider: {provider.value}")
print(f" Model: {model}")
print(f" Fallback: {[p.value for p, _ in fallback_chain]}")
return provider == AIProvider.NVIDIA
async def test_high_risk_detection():
"""測試高風險 Tool 檢測"""
print("\n=== 測試 3: 高風險 Tool 檢測 ===")
provider = get_nvidia_provider()
high_risk_tools = ["delete_pod", "delete_deployment", "drain_node"]
safe_tools = ["restart_pod", "scale_deployment", "get_logs"]
all_pass = True
for tool in high_risk_tools:
if provider.is_high_risk_tool(tool):
print(f"{tool}: 高風險 (需 HITL)")
else:
print(f"{tool}: 應為高風險但未檢測到")
all_pass = False
for tool in safe_tools:
if not provider.is_high_risk_tool(tool):
print(f"{tool}: 安全 (可自動執行)")
else:
print(f"{tool}: 應為安全但被標記為高風險")
all_pass = False
return all_pass
async def test_chinese_prompt():
"""測試繁體中文提示"""
print("\n=== 測試 4: 繁體中文 Tool Calling ===")
provider = NvidiaProvider()
scale_tool = create_tool_definition(
name="scale_deployment",
description="Scale a Kubernetes deployment to a specified number of replicas",
parameters={
"type": "object",
"properties": {
"deployment": {"type": "string"},
"replicas": {"type": "integer"},
"namespace": {"type": "string"},
},
"required": ["deployment", "replicas"],
},
)
result = await provider.tool_call(
messages=[
{"role": "user", "content": "把 awoooi-web 擴展到 3 個副本"}
],
tools=[scale_tool],
)
if result.success and result.tool_calls:
for tc in result.tool_calls:
if tc.valid and tc.tool_name == "scale_deployment":
print(f"✅ 繁中 Tool Calling 成功")
print(f" Deployment: {tc.arguments.get('deployment')}")
print(f" Replicas: {tc.arguments.get('replicas')}")
return True
print(f"❌ 繁中 Tool Calling 失敗: {result.error or 'No valid tool calls'}")
return False
async def main():
"""主測試流程"""
print("=" * 60)
print("NVIDIA Nemotron E2E 驗證 - ADR-036")
print("=" * 60)
results = []
# 測試 1: API 連線
try:
results.append(("NVIDIA API 連線", await test_nvidia_connection()))
except Exception as e:
print(f"❌ 測試 1 異常: {e}")
results.append(("NVIDIA API 連線", False))
# 測試 2: Router 整合
try:
results.append(("AIRouter 整合", await test_router_integration()))
except Exception as e:
print(f"❌ 測試 2 異常: {e}")
results.append(("AIRouter 整合", False))
# 測試 3: 高風險檢測
try:
results.append(("高風險 Tool 檢測", await test_high_risk_detection()))
except Exception as e:
print(f"❌ 測試 3 異常: {e}")
results.append(("高風險 Tool 檢測", False))
# 測試 4: 繁中提示
try:
results.append(("繁中 Tool Calling", await test_chinese_prompt()))
except Exception as e:
print(f"❌ 測試 4 異常: {e}")
results.append(("繁中 Tool Calling", False))
# 總結
print("\n" + "=" * 60)
print("測試總結")
print("=" * 60)
passed = sum(1 for _, r in results if r)
total = len(results)
for name, result in results:
status = "✅ PASS" if result else "❌ FAIL"
print(f" {status}: {name}")
print(f"\n結果: {passed}/{total} 通過")
if passed == total:
print("\n🎉 Nemotron E2E 驗證通過!")
return 0
else:
print("\n⚠️ 部分測試失敗,請檢查日誌")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)