feat(ai): ADR-036 Nemotron E2E 驗證腳本
新增 verify_nemotron_e2e.py: - 測試 NVIDIA API 連線 - 測試 AIRouter 整合 - 測試高風險 Tool 檢測 - 測試繁體中文 Tool Calling Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
225
apps/api/scripts/verify_nemotron_e2e.py
Normal file
225
apps/api/scripts/verify_nemotron_e2e.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Nemotron E2E 驗證腳本 - ADR-036
|
||||
================================
|
||||
2026-03-29 ogt: 驗證 NvidiaProvider 在生產環境的 Tool Calling 功能
|
||||
|
||||
執行方式:
|
||||
cd apps/api
|
||||
python scripts/verify_nemotron_e2e.py
|
||||
|
||||
預期結果:
|
||||
- NVIDIA API 連線成功
|
||||
- Tool Calling 正確解析
|
||||
- Fallback 機制正常
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 加入 src 到 path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from src.services.nvidia_provider import (
|
||||
NvidiaProvider,
|
||||
create_tool_definition,
|
||||
get_nvidia_provider,
|
||||
)
|
||||
from src.services.ai_router import get_ai_router, AIProvider
|
||||
|
||||
|
||||
async def test_nvidia_connection():
|
||||
"""測試 NVIDIA API 連線"""
|
||||
print("\n=== 測試 1: NVIDIA API 連線 ===")
|
||||
|
||||
provider = NvidiaProvider()
|
||||
|
||||
# 定義測試 Tool
|
||||
restart_tool = create_tool_definition(
|
||||
name="restart_pod",
|
||||
description="Restart a Kubernetes pod",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pod_name": {"type": "string", "description": "Name of the pod"},
|
||||
"namespace": {"type": "string", "description": "Kubernetes namespace"},
|
||||
},
|
||||
"required": ["pod_name"],
|
||||
},
|
||||
)
|
||||
|
||||
# 測試 Tool Calling
|
||||
result = await provider.tool_call(
|
||||
messages=[
|
||||
{"role": "user", "content": "restart the awoooi-api pod in awoooi-prod namespace"}
|
||||
],
|
||||
tools=[restart_tool],
|
||||
)
|
||||
|
||||
if result.success:
|
||||
print(f"✅ NVIDIA API 連線成功")
|
||||
print(f" 延遲: {result.latency_ms:.0f}ms")
|
||||
if result.usage:
|
||||
print(f" Token: {result.usage.total_tokens}")
|
||||
|
||||
for tc in result.tool_calls:
|
||||
if tc.valid:
|
||||
print(f" Tool: {tc.tool_name}")
|
||||
print(f" Args: {json.dumps(tc.arguments, ensure_ascii=False)}")
|
||||
else:
|
||||
print(f" ❌ 無效 Tool Call: {tc.error}")
|
||||
|
||||
return True
|
||||
else:
|
||||
print(f"❌ NVIDIA API 連線失敗: {result.error}")
|
||||
if result.fallback_triggered:
|
||||
print(" 已觸發 Fallback")
|
||||
return False
|
||||
|
||||
|
||||
async def test_router_integration():
|
||||
"""測試 AIRouter 整合"""
|
||||
print("\n=== 測試 2: AIRouter 整合 ===")
|
||||
|
||||
router = get_ai_router()
|
||||
|
||||
provider, model, fallback_chain = router.route_tool_calling()
|
||||
|
||||
print(f"✅ Tool Calling 路由")
|
||||
print(f" Provider: {provider.value}")
|
||||
print(f" Model: {model}")
|
||||
print(f" Fallback: {[p.value for p, _ in fallback_chain]}")
|
||||
|
||||
return provider == AIProvider.NVIDIA
|
||||
|
||||
|
||||
async def test_high_risk_detection():
|
||||
"""測試高風險 Tool 檢測"""
|
||||
print("\n=== 測試 3: 高風險 Tool 檢測 ===")
|
||||
|
||||
provider = get_nvidia_provider()
|
||||
|
||||
high_risk_tools = ["delete_pod", "delete_deployment", "drain_node"]
|
||||
safe_tools = ["restart_pod", "scale_deployment", "get_logs"]
|
||||
|
||||
all_pass = True
|
||||
|
||||
for tool in high_risk_tools:
|
||||
if provider.is_high_risk_tool(tool):
|
||||
print(f" ✅ {tool}: 高風險 (需 HITL)")
|
||||
else:
|
||||
print(f" ❌ {tool}: 應為高風險但未檢測到")
|
||||
all_pass = False
|
||||
|
||||
for tool in safe_tools:
|
||||
if not provider.is_high_risk_tool(tool):
|
||||
print(f" ✅ {tool}: 安全 (可自動執行)")
|
||||
else:
|
||||
print(f" ❌ {tool}: 應為安全但被標記為高風險")
|
||||
all_pass = False
|
||||
|
||||
return all_pass
|
||||
|
||||
|
||||
async def test_chinese_prompt():
|
||||
"""測試繁體中文提示"""
|
||||
print("\n=== 測試 4: 繁體中文 Tool Calling ===")
|
||||
|
||||
provider = NvidiaProvider()
|
||||
|
||||
scale_tool = create_tool_definition(
|
||||
name="scale_deployment",
|
||||
description="Scale a Kubernetes deployment to a specified number of replicas",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"deployment": {"type": "string"},
|
||||
"replicas": {"type": "integer"},
|
||||
"namespace": {"type": "string"},
|
||||
},
|
||||
"required": ["deployment", "replicas"],
|
||||
},
|
||||
)
|
||||
|
||||
result = await provider.tool_call(
|
||||
messages=[
|
||||
{"role": "user", "content": "把 awoooi-web 擴展到 3 個副本"}
|
||||
],
|
||||
tools=[scale_tool],
|
||||
)
|
||||
|
||||
if result.success and result.tool_calls:
|
||||
for tc in result.tool_calls:
|
||||
if tc.valid and tc.tool_name == "scale_deployment":
|
||||
print(f"✅ 繁中 Tool Calling 成功")
|
||||
print(f" Deployment: {tc.arguments.get('deployment')}")
|
||||
print(f" Replicas: {tc.arguments.get('replicas')}")
|
||||
return True
|
||||
|
||||
print(f"❌ 繁中 Tool Calling 失敗: {result.error or 'No valid tool calls'}")
|
||||
return False
|
||||
|
||||
|
||||
async def main():
|
||||
"""主測試流程"""
|
||||
print("=" * 60)
|
||||
print("NVIDIA Nemotron E2E 驗證 - ADR-036")
|
||||
print("=" * 60)
|
||||
|
||||
results = []
|
||||
|
||||
# 測試 1: API 連線
|
||||
try:
|
||||
results.append(("NVIDIA API 連線", await test_nvidia_connection()))
|
||||
except Exception as e:
|
||||
print(f"❌ 測試 1 異常: {e}")
|
||||
results.append(("NVIDIA API 連線", False))
|
||||
|
||||
# 測試 2: Router 整合
|
||||
try:
|
||||
results.append(("AIRouter 整合", await test_router_integration()))
|
||||
except Exception as e:
|
||||
print(f"❌ 測試 2 異常: {e}")
|
||||
results.append(("AIRouter 整合", False))
|
||||
|
||||
# 測試 3: 高風險檢測
|
||||
try:
|
||||
results.append(("高風險 Tool 檢測", await test_high_risk_detection()))
|
||||
except Exception as e:
|
||||
print(f"❌ 測試 3 異常: {e}")
|
||||
results.append(("高風險 Tool 檢測", False))
|
||||
|
||||
# 測試 4: 繁中提示
|
||||
try:
|
||||
results.append(("繁中 Tool Calling", await test_chinese_prompt()))
|
||||
except Exception as e:
|
||||
print(f"❌ 測試 4 異常: {e}")
|
||||
results.append(("繁中 Tool Calling", False))
|
||||
|
||||
# 總結
|
||||
print("\n" + "=" * 60)
|
||||
print("測試總結")
|
||||
print("=" * 60)
|
||||
|
||||
passed = sum(1 for _, r in results if r)
|
||||
total = len(results)
|
||||
|
||||
for name, result in results:
|
||||
status = "✅ PASS" if result else "❌ FAIL"
|
||||
print(f" {status}: {name}")
|
||||
|
||||
print(f"\n結果: {passed}/{total} 通過")
|
||||
|
||||
if passed == total:
|
||||
print("\n🎉 Nemotron E2E 驗證通過!")
|
||||
return 0
|
||||
else:
|
||||
print("\n⚠️ 部分測試失敗,請檢查日誌")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user