- 自動修復 import 排序、unused imports - 手動修復 raise from、isinstance union、unused variable - scripts/ 暫時保留 (非 CI 阻擋) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
226 lines
6.4 KiB
Python
226 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Nemotron E2E 驗證腳本 - ADR-036
|
|
================================
|
|
2026-03-29 ogt: 驗證 NvidiaProvider 在生產環境的 Tool Calling 功能
|
|
|
|
執行方式:
|
|
cd apps/api
|
|
python scripts/verify_nemotron_e2e.py
|
|
|
|
預期結果:
|
|
- NVIDIA API 連線成功
|
|
- Tool Calling 正確解析
|
|
- Fallback 機制正常
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# 加入 src 到 path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from src.services.ai_router import AIProvider, get_ai_router
|
|
from src.services.nvidia_provider import (
|
|
NvidiaProvider,
|
|
create_tool_definition,
|
|
get_nvidia_provider,
|
|
)
|
|
|
|
|
|
async def test_nvidia_connection():
|
|
"""測試 NVIDIA API 連線"""
|
|
print("\n=== 測試 1: NVIDIA API 連線 ===")
|
|
|
|
provider = NvidiaProvider()
|
|
|
|
# 定義測試 Tool
|
|
restart_tool = create_tool_definition(
|
|
name="restart_pod",
|
|
description="Restart a Kubernetes pod",
|
|
parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"pod_name": {"type": "string", "description": "Name of the pod"},
|
|
"namespace": {"type": "string", "description": "Kubernetes namespace"},
|
|
},
|
|
"required": ["pod_name"],
|
|
},
|
|
)
|
|
|
|
# 測試 Tool Calling
|
|
result = await provider.tool_call(
|
|
messages=[
|
|
{"role": "user", "content": "restart the awoooi-api pod in awoooi-prod namespace"}
|
|
],
|
|
tools=[restart_tool],
|
|
)
|
|
|
|
if result.success:
|
|
print("✅ NVIDIA API 連線成功")
|
|
print(f" 延遲: {result.latency_ms:.0f}ms")
|
|
if result.usage:
|
|
print(f" Token: {result.usage.total_tokens}")
|
|
|
|
for tc in result.tool_calls:
|
|
if tc.valid:
|
|
print(f" Tool: {tc.tool_name}")
|
|
print(f" Args: {json.dumps(tc.arguments, ensure_ascii=False)}")
|
|
else:
|
|
print(f" ❌ 無效 Tool Call: {tc.error}")
|
|
|
|
return True
|
|
else:
|
|
print(f"❌ NVIDIA API 連線失敗: {result.error}")
|
|
if result.fallback_triggered:
|
|
print(" 已觸發 Fallback")
|
|
return False
|
|
|
|
|
|
async def test_router_integration():
|
|
"""測試 AIRouter 整合"""
|
|
print("\n=== 測試 2: AIRouter 整合 ===")
|
|
|
|
router = get_ai_router()
|
|
|
|
provider, model, fallback_chain = router.route_tool_calling()
|
|
|
|
print("✅ Tool Calling 路由")
|
|
print(f" Provider: {provider.value}")
|
|
print(f" Model: {model}")
|
|
print(f" Fallback: {[p.value for p, _ in fallback_chain]}")
|
|
|
|
return provider == AIProvider.NVIDIA
|
|
|
|
|
|
async def test_high_risk_detection():
|
|
"""測試高風險 Tool 檢測"""
|
|
print("\n=== 測試 3: 高風險 Tool 檢測 ===")
|
|
|
|
provider = get_nvidia_provider()
|
|
|
|
high_risk_tools = ["delete_pod", "delete_deployment", "drain_node"]
|
|
safe_tools = ["restart_pod", "scale_deployment", "get_logs"]
|
|
|
|
all_pass = True
|
|
|
|
for tool in high_risk_tools:
|
|
if provider.is_high_risk_tool(tool):
|
|
print(f" ✅ {tool}: 高風險 (需 HITL)")
|
|
else:
|
|
print(f" ❌ {tool}: 應為高風險但未檢測到")
|
|
all_pass = False
|
|
|
|
for tool in safe_tools:
|
|
if not provider.is_high_risk_tool(tool):
|
|
print(f" ✅ {tool}: 安全 (可自動執行)")
|
|
else:
|
|
print(f" ❌ {tool}: 應為安全但被標記為高風險")
|
|
all_pass = False
|
|
|
|
return all_pass
|
|
|
|
|
|
async def test_chinese_prompt():
|
|
"""測試繁體中文提示"""
|
|
print("\n=== 測試 4: 繁體中文 Tool Calling ===")
|
|
|
|
provider = NvidiaProvider()
|
|
|
|
scale_tool = create_tool_definition(
|
|
name="scale_deployment",
|
|
description="Scale a Kubernetes deployment to a specified number of replicas",
|
|
parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"deployment": {"type": "string"},
|
|
"replicas": {"type": "integer"},
|
|
"namespace": {"type": "string"},
|
|
},
|
|
"required": ["deployment", "replicas"],
|
|
},
|
|
)
|
|
|
|
result = await provider.tool_call(
|
|
messages=[
|
|
{"role": "user", "content": "把 awoooi-web 擴展到 3 個副本"}
|
|
],
|
|
tools=[scale_tool],
|
|
)
|
|
|
|
if result.success and result.tool_calls:
|
|
for tc in result.tool_calls:
|
|
if tc.valid and tc.tool_name == "scale_deployment":
|
|
print("✅ 繁中 Tool Calling 成功")
|
|
print(f" Deployment: {tc.arguments.get('deployment')}")
|
|
print(f" Replicas: {tc.arguments.get('replicas')}")
|
|
return True
|
|
|
|
print(f"❌ 繁中 Tool Calling 失敗: {result.error or 'No valid tool calls'}")
|
|
return False
|
|
|
|
|
|
async def main():
|
|
"""主測試流程"""
|
|
print("=" * 60)
|
|
print("NVIDIA Nemotron E2E 驗證 - ADR-036")
|
|
print("=" * 60)
|
|
|
|
results = []
|
|
|
|
# 測試 1: API 連線
|
|
try:
|
|
results.append(("NVIDIA API 連線", await test_nvidia_connection()))
|
|
except Exception as e:
|
|
print(f"❌ 測試 1 異常: {e}")
|
|
results.append(("NVIDIA API 連線", False))
|
|
|
|
# 測試 2: Router 整合
|
|
try:
|
|
results.append(("AIRouter 整合", await test_router_integration()))
|
|
except Exception as e:
|
|
print(f"❌ 測試 2 異常: {e}")
|
|
results.append(("AIRouter 整合", False))
|
|
|
|
# 測試 3: 高風險檢測
|
|
try:
|
|
results.append(("高風險 Tool 檢測", await test_high_risk_detection()))
|
|
except Exception as e:
|
|
print(f"❌ 測試 3 異常: {e}")
|
|
results.append(("高風險 Tool 檢測", False))
|
|
|
|
# 測試 4: 繁中提示
|
|
try:
|
|
results.append(("繁中 Tool Calling", await test_chinese_prompt()))
|
|
except Exception as e:
|
|
print(f"❌ 測試 4 異常: {e}")
|
|
results.append(("繁中 Tool Calling", False))
|
|
|
|
# 總結
|
|
print("\n" + "=" * 60)
|
|
print("測試總結")
|
|
print("=" * 60)
|
|
|
|
passed = sum(1 for _, r in results if r)
|
|
total = len(results)
|
|
|
|
for name, result in results:
|
|
status = "✅ PASS" if result else "❌ FAIL"
|
|
print(f" {status}: {name}")
|
|
|
|
print(f"\n結果: {passed}/{total} 通過")
|
|
|
|
if passed == total:
|
|
print("\n🎉 Nemotron E2E 驗證通過!")
|
|
return 0
|
|
else:
|
|
print("\n⚠️ 部分測試失敗,請檢查日誌")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = asyncio.run(main())
|
|
sys.exit(exit_code)
|