- lewooogo-core: 新增 placeholder 測試檔 (vitest) - api: 修復 I001 import 排序 (ruff --fix) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
196 lines
7.5 KiB
Python
196 lines
7.5 KiB
Python
"""
|
|
Phase 12.1: Tool Calling 優化 - 行動解析測試
|
|
============================================
|
|
#60 OpenClaw 行動解析測試
|
|
#61 Tool Calling 準確度基線建立
|
|
|
|
測試 parse_operation_from_action() 的準確度
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from src.services.executor import OperationType
|
|
from src.services.operation_parser import parse_operation_from_action
|
|
|
|
# =============================================================================
|
|
# 測試案例定義
|
|
# =============================================================================
|
|
|
|
# 格式: (action_string, expected_operation, expected_resource, expected_namespace)
|
|
|
|
ENGLISH_TEST_CASES = [
|
|
# kubectl 標準格式
|
|
("kubectl delete pod nginx-frontend-7d4b8c9f5-xk2m3 -n production",
|
|
OperationType.DELETE_POD, "nginx-frontend-7d4b8c9f5-xk2m3", "production"),
|
|
|
|
("kubectl rollout restart deployment/api-backend -n default",
|
|
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
|
|
|
("kubectl delete pod awoooi-worker-0 -n default",
|
|
OperationType.DELETE_POD, "awoooi-worker-0", "default"),
|
|
|
|
# 自然語言英文
|
|
("restart deployment api-backend",
|
|
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
|
|
|
("Restart deployment: web-frontend",
|
|
OperationType.RESTART_DEPLOYMENT, "web-frontend", "default"),
|
|
|
|
("delete pod nginx-ingress-abc123",
|
|
OperationType.DELETE_POD, "nginx-ingress-abc123", "default"),
|
|
|
|
("scale deployment web-frontend to 5 replicas",
|
|
OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
|
|
|
|
("Scale deployment api-backend -n staging",
|
|
OperationType.SCALE_DEPLOYMENT, "api-backend", "staging"),
|
|
]
|
|
|
|
CHINESE_TEST_CASES = [
|
|
# 中文標準格式
|
|
("重新啟動 api-backend 服務",
|
|
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
|
|
|
("重新啟動 awoooi-worker 服務",
|
|
OperationType.RESTART_DEPLOYMENT, "awoooi-worker", "default"),
|
|
|
|
("重新啟動 postgres-primary-0",
|
|
OperationType.DELETE_POD, "postgres-primary-0", "default"), # StatefulSet Pod
|
|
|
|
("擴容 api-backend",
|
|
OperationType.SCALE_DEPLOYMENT, "api-backend", "default"),
|
|
|
|
("擴展 web-frontend 副本數到 5",
|
|
OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
|
|
|
|
("擴展 api-backend-deployment 副本數至 10",
|
|
OperationType.SCALE_DEPLOYMENT, "api-backend", "default"), # 移除 -deployment 後綴
|
|
|
|
# 刪除 Pod
|
|
("刪除 Pod nginx-ingress-7d6f8c9b5-abc12",
|
|
OperationType.DELETE_POD, "nginx-ingress-7d6f8c9b5-abc12", "default"),
|
|
]
|
|
|
|
MIXED_TEST_CASES = [
|
|
# 混合中英文
|
|
("kubectl delete pod api-backend-0 -n default",
|
|
OperationType.DELETE_POD, "api-backend-0", "default"),
|
|
|
|
("重新啟動 deployment api-backend",
|
|
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
|
|
|
# OpenClaw 生成的常見格式
|
|
("建議行動: kubectl rollout restart deployment/awoooi-api -n default",
|
|
OperationType.RESTART_DEPLOYMENT, "awoooi-api", "default"),
|
|
]
|
|
|
|
EDGE_CASES = [
|
|
# 應該解析失敗的案例
|
|
("這是一段普通文字,沒有任何操作", None, None, "default"),
|
|
("SELECT * FROM users", None, None, "default"),
|
|
("", None, None, "default"),
|
|
|
|
# 邊界情況
|
|
("restart", None, None, "default"), # 缺少目標
|
|
("delete", None, None, "default"), # 缺少目標
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# 測試函數
|
|
# =============================================================================
|
|
|
|
class TestEnglishActionParsing:
|
|
"""英文行動解析測試"""
|
|
|
|
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", ENGLISH_TEST_CASES)
|
|
def test_english_actions(self, action, expected_op, expected_resource, expected_ns):
|
|
op, resource, ns = parse_operation_from_action(action)
|
|
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
|
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
|
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
|
|
|
|
|
|
class TestChineseActionParsing:
|
|
"""中文行動解析測試"""
|
|
|
|
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", CHINESE_TEST_CASES)
|
|
def test_chinese_actions(self, action, expected_op, expected_resource, expected_ns):
|
|
op, resource, ns = parse_operation_from_action(action)
|
|
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
|
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
|
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
|
|
|
|
|
|
class TestMixedActionParsing:
|
|
"""混合語言行動解析測試"""
|
|
|
|
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", MIXED_TEST_CASES)
|
|
def test_mixed_actions(self, action, expected_op, expected_resource, expected_ns):
|
|
op, resource, ns = parse_operation_from_action(action)
|
|
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
|
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
|
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""邊界情況測試"""
|
|
|
|
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", EDGE_CASES)
|
|
def test_edge_cases(self, action, expected_op, expected_resource, expected_ns):
|
|
op, resource, ns = parse_operation_from_action(action)
|
|
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
|
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
|
|
|
|
|
# =============================================================================
|
|
# 準確度報告
|
|
# =============================================================================
|
|
|
|
def test_accuracy_report():
|
|
"""生成準確度報告"""
|
|
all_cases = ENGLISH_TEST_CASES + CHINESE_TEST_CASES + MIXED_TEST_CASES
|
|
|
|
passed = 0
|
|
failed = 0
|
|
failures = []
|
|
|
|
for action, expected_op, expected_resource, expected_ns in all_cases:
|
|
op, resource, ns = parse_operation_from_action(action)
|
|
|
|
if op == expected_op and resource == expected_resource and ns == expected_ns:
|
|
passed += 1
|
|
else:
|
|
failed += 1
|
|
failures.append({
|
|
"action": action,
|
|
"expected": (expected_op, expected_resource, expected_ns),
|
|
"got": (op, resource, ns),
|
|
})
|
|
|
|
total = passed + failed
|
|
accuracy = (passed / total * 100) if total > 0 else 0
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Phase 12.1: 行動解析準確度基線報告")
|
|
print("=" * 60)
|
|
print(f"總測試案例: {total}")
|
|
print(f"通過: {passed}")
|
|
print(f"失敗: {failed}")
|
|
print(f"準確率: {accuracy:.1f}%")
|
|
print("=" * 60)
|
|
|
|
if failures:
|
|
print("\n失敗案例:")
|
|
for f in failures:
|
|
print(f" - '{f['action']}'")
|
|
print(f" 期望: {f['expected']}")
|
|
print(f" 實際: {f['got']}")
|
|
|
|
# 確保準確率符合預期 (當前基線,後續改進後調高)
|
|
assert accuracy >= 70, f"Accuracy {accuracy}% is below baseline 70%"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v", "--tb=short"])
|