Files
awoooi/apps/api/tests/test_action_parsing.py
Your Name 433f7b068e
All checks were successful
CD Pipeline / tests (push) Successful in 2m7s
Code Review / ai-code-review (push) Successful in 42s
CD Pipeline / build-and-deploy (push) Successful in 13m14s
CD Pipeline / post-deploy-checks (push) Successful in 4m29s
fix(aiops): close ssh and telegram remediation gaps
2026-05-01 16:53:02 +08:00

202 lines
8.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Phase 12.1: Tool Calling 優化 - 行動解析測試
============================================
#60 OpenClaw 行動解析測試
#61 Tool Calling 準確度基線建立
測試 parse_operation_from_action() 的準確度
"""
import pytest
from src.services.executor import OperationType
from src.services.operation_parser import parse_operation_from_action
# =============================================================================
# 測試案例定義
# =============================================================================
# 格式: (action_string, expected_operation, expected_resource, expected_namespace)
ENGLISH_TEST_CASES = [
# kubectl 標準格式
("kubectl delete pod nginx-frontend-7d4b8c9f5-xk2m3 -n production",
OperationType.DELETE_POD, "nginx-frontend-7d4b8c9f5-xk2m3", "production"),
("kubectl rollout restart deployment/api-backend -n default",
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
("kubectl rollout restart statefulset/postgres-primary -n awoooi-prod",
OperationType.RESTART_STATEFULSET, "postgres-primary", "awoooi-prod"),
("kubectl rollout restart daemonset/node-exporter -n monitoring",
OperationType.RESTART_DAEMONSET, "node-exporter", "monitoring"),
("kubectl delete pod awoooi-worker-0 -n default",
OperationType.DELETE_POD, "awoooi-worker-0", "default"),
# 自然語言英文 (無明確 -n使用 default_namespace=awoooi-prod)
("restart deployment api-backend",
OperationType.RESTART_DEPLOYMENT, "api-backend", "awoooi-prod"),
("Restart deployment: web-frontend",
OperationType.RESTART_DEPLOYMENT, "web-frontend", "awoooi-prod"),
("delete pod nginx-ingress-abc123",
OperationType.DELETE_POD, "nginx-ingress-abc123", "awoooi-prod"),
("scale deployment web-frontend to 5 replicas",
OperationType.SCALE_DEPLOYMENT, "web-frontend", "awoooi-prod"),
("Scale deployment api-backend -n staging",
OperationType.SCALE_DEPLOYMENT, "api-backend", "staging"),
]
CHINESE_TEST_CASES = [
# 中文標準格式 (無明確 -n使用 default_namespace=awoooi-prod)
("重新啟動 api-backend 服務",
OperationType.RESTART_DEPLOYMENT, "api-backend", "awoooi-prod"),
("重新啟動 awoooi-worker 服務",
OperationType.RESTART_DEPLOYMENT, "awoooi-worker", "awoooi-prod"),
("重新啟動 postgres-primary-0",
OperationType.DELETE_POD, "postgres-primary-0", "awoooi-prod"), # StatefulSet Pod
("擴容 api-backend",
OperationType.SCALE_DEPLOYMENT, "api-backend", "awoooi-prod"),
("擴展 web-frontend 副本數到 5",
OperationType.SCALE_DEPLOYMENT, "web-frontend", "awoooi-prod"),
("擴展 api-backend-deployment 副本數至 10",
OperationType.SCALE_DEPLOYMENT, "api-backend", "awoooi-prod"), # 移除 -deployment 後綴
# 刪除 Pod
("刪除 Pod nginx-ingress-7d6f8c9b5-abc12",
OperationType.DELETE_POD, "nginx-ingress-7d6f8c9b5-abc12", "awoooi-prod"),
]
MIXED_TEST_CASES = [
# 混合中英文
("kubectl delete pod api-backend-0 -n default",
OperationType.DELETE_POD, "api-backend-0", "default"),
("重新啟動 deployment api-backend",
OperationType.RESTART_DEPLOYMENT, "api-backend", "awoooi-prod"),
# OpenClaw 生成的常見格式
("建議行動: kubectl rollout restart deployment/awoooi-api -n default",
OperationType.RESTART_DEPLOYMENT, "awoooi-api", "default"),
]
EDGE_CASES = [
# 應該解析失敗的案例
("這是一段普通文字,沒有任何操作", None, None, "awoooi-prod"),
("SELECT * FROM users", None, None, "awoooi-prod"),
("", None, None, "awoooi-prod"),
# 邊界情況
("restart", None, None, "awoooi-prod"), # 缺少目標
("delete", None, None, "awoooi-prod"), # 缺少目標
]
# =============================================================================
# 測試函數
# =============================================================================
class TestEnglishActionParsing:
"""英文行動解析測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", ENGLISH_TEST_CASES)
def test_english_actions(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
class TestChineseActionParsing:
"""中文行動解析測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", CHINESE_TEST_CASES)
def test_chinese_actions(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
class TestMixedActionParsing:
"""混合語言行動解析測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", MIXED_TEST_CASES)
def test_mixed_actions(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
class TestEdgeCases:
"""邊界情況測試"""
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", EDGE_CASES)
def test_edge_cases(self, action, expected_op, expected_resource, expected_ns):
op, resource, ns = parse_operation_from_action(action)
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
# =============================================================================
# 準確度報告
# =============================================================================
def test_accuracy_report():
"""生成準確度報告"""
all_cases = ENGLISH_TEST_CASES + CHINESE_TEST_CASES + MIXED_TEST_CASES
passed = 0
failed = 0
failures = []
for action, expected_op, expected_resource, expected_ns in all_cases:
op, resource, ns = parse_operation_from_action(action)
if op == expected_op and resource == expected_resource and ns == expected_ns:
passed += 1
else:
failed += 1
failures.append({
"action": action,
"expected": (expected_op, expected_resource, expected_ns),
"got": (op, resource, ns),
})
total = passed + failed
accuracy = (passed / total * 100) if total > 0 else 0
print("\n" + "=" * 60)
print("Phase 12.1: 行動解析準確度基線報告")
print("=" * 60)
print(f"總測試案例: {total}")
print(f"通過: {passed}")
print(f"失敗: {failed}")
print(f"準確率: {accuracy:.1f}%")
print("=" * 60)
if failures:
print("\n失敗案例:")
for f in failures:
print(f" - '{f['action']}'")
print(f" 期望: {f['expected']}")
print(f" 實際: {f['got']}")
# 確保準確率符合預期 (當前基線,後續改進後調高)
assert accuracy >= 70, f"Accuracy {accuracy}% is below baseline 70%"
if __name__ == "__main__":
pytest.main([__file__, "-v", "--tb=short"])