Files
awoooi/apps/api/tests/test_action_parsing.py
OG T afda3123eb feat(api): Phase 12.1 Tool Calling 優化 (#60-62)
行動解析準確度: 80% → 100%

新增模式:
- 刪除 Pod X (中文)
- restart deployment X (明確區分)
- 重新啟動 deployment X (中英混合)

測試:
- 24 測試案例 (英/中/混合/邊界)
- test_accuracy_report() 自動化基線報告

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-25 10:48:37 +08:00

196 lines
7.5 KiB
Python

"""
Phase 12.1: Tool Calling 優化 - 行動解析測試
============================================
#60 OpenClaw 行動解析測試
#61 Tool Calling 準確度基線建立
測試 parse_operation_from_action() 的準確度
"""
import pytest
from src.api.v1.approvals import parse_operation_from_action
from src.services.executor import OperationType
# =============================================================================
# 測試案例定義
# =============================================================================
# 格式: (action_string, expected_operation, expected_resource, expected_namespace)
ENGLISH_TEST_CASES = [
# kubectl 標準格式
("kubectl delete pod nginx-frontend-7d4b8c9f5-xk2m3 -n production",
OperationType.DELETE_POD, "nginx-frontend-7d4b8c9f5-xk2m3", "production"),
("kubectl rollout restart deployment/api-backend -n default",
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
("kubectl delete pod awoooi-worker-0 -n default",
OperationType.DELETE_POD, "awoooi-worker-0", "default"),
# 自然語言英文
("restart deployment api-backend",
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
("Restart deployment: web-frontend",
OperationType.RESTART_DEPLOYMENT, "web-frontend", "default"),
("delete pod nginx-ingress-abc123",
OperationType.DELETE_POD, "nginx-ingress-abc123", "default"),
("scale deployment web-frontend to 5 replicas",
OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
("Scale deployment api-backend -n staging",
OperationType.SCALE_DEPLOYMENT, "api-backend", "staging"),
]
CHINESE_TEST_CASES = [
# 中文標準格式
("重新啟動 api-backend 服務",
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
("重新啟動 awoooi-worker 服務",
OperationType.RESTART_DEPLOYMENT, "awoooi-worker", "default"),
("重新啟動 postgres-primary-0",
OperationType.DELETE_POD, "postgres-primary-0", "default"), # StatefulSet Pod
("擴容 api-backend",
OperationType.SCALE_DEPLOYMENT, "api-backend", "default"),
("擴展 web-frontend 副本數到 5",
OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
("擴展 api-backend-deployment 副本數至 10",
OperationType.SCALE_DEPLOYMENT, "api-backend", "default"), # 移除 -deployment 後綴
# 刪除 Pod
("刪除 Pod nginx-ingress-7d6f8c9b5-abc12",
OperationType.DELETE_POD, "nginx-ingress-7d6f8c9b5-abc12", "default"),
]
MIXED_TEST_CASES = [
# 混合中英文
("kubectl delete pod api-backend-0 -n default",
OperationType.DELETE_POD, "api-backend-0", "default"),
("重新啟動 deployment api-backend",
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
# OpenClaw 生成的常見格式
("建議行動: kubectl rollout restart deployment/awoooi-api -n default",
OperationType.RESTART_DEPLOYMENT, "awoooi-api", "default"),
]
EDGE_CASES = [
# 應該解析失敗的案例
("這是一段普通文字,沒有任何操作", None, None, "default"),
("SELECT * FROM users", None, None, "default"),
("", None, None, "default"),
# 邊界情況
("restart", None, None, "default"), # 缺少目標
("delete", None, None, "default"), # 缺少目標
]
# =============================================================================
# 測試函數
# =============================================================================
class TestEnglishActionParsing:
"""英文行動解析測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", ENGLISH_TEST_CASES)
def test_english_actions(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
class TestChineseActionParsing:
"""中文行動解析測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", CHINESE_TEST_CASES)
def test_chinese_actions(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
class TestMixedActionParsing:
"""混合語言行動解析測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", MIXED_TEST_CASES)
def test_mixed_actions(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
class TestEdgeCases:
"""邊界情況測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", EDGE_CASES)
def test_edge_cases(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
# =============================================================================
# 準確度報告
# =============================================================================
def test_accuracy_report():
"""生成準確度報告"""
all_cases = ENGLISH_TEST_CASES + CHINESE_TEST_CASES + MIXED_TEST_CASES
passed = 0
failed = 0
failures = []
for action, expected_op, expected_resource, expected_ns in all_cases:
op, resource, ns = parse_operation_from_action(action)
if op == expected_op and resource == expected_resource and ns == expected_ns:
passed += 1
else:
failed += 1
failures.append({
"action": action,
"expected": (expected_op, expected_resource, expected_ns),
"got": (op, resource, ns),
})
total = passed + failed
accuracy = (passed / total * 100) if total > 0 else 0
print("\n" + "=" * 60)
print("Phase 12.1: 行動解析準確度基線報告")
print("=" * 60)
print(f"總測試案例: {total}")
print(f"通過: {passed}")
print(f"失敗: {failed}")
print(f"準確率: {accuracy:.1f}%")
print("=" * 60)
if failures:
print("\n失敗案例:")
for f in failures:
print(f" - '{f['action']}'")
print(f" 期望: {f['expected']}")
print(f" 實際: {f['got']}")
# 確保準確率符合預期 (當前基線,後續改進後調高)
assert accuracy >= 70, f"Accuracy {accuracy}% is below baseline 70%"
if __name__ == "__main__":
pytest.main([__file__, "-v", "--tb=short"])