feat(api): Phase 12.1 Tool Calling 優化 (#60-62)
行動解析準確度: 80% → 100% 新增模式: - 刪除 Pod X (中文) - restart deployment X (明確區分) - 重新啟動 deployment X (中英混合) 測試: - 24 測試案例 (英/中/混合/邊界) - test_accuracy_report() 自動化基線報告 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -203,14 +203,36 @@ def parse_operation_from_action(action: str) -> tuple[OperationType | None, str
|
||||
namespace = ns_match.group(1) if ns_match else "default"
|
||||
return OperationType.DELETE_POD, pod_name, namespace
|
||||
|
||||
# Pattern: restart deployment <name> (English)
|
||||
restart_match = re.search(r'restart\s+(?:deployment[:\s]+)?([a-z0-9][\w.-]*)', action_lower)
|
||||
if restart_match:
|
||||
deploy_name = restart_match.group(1)
|
||||
# Pattern: 刪除 Pod <name> (Chinese delete)
|
||||
chinese_delete_match = re.search(r'刪除\s*[Pp]od\s+([a-z0-9][\w.-]*)', action)
|
||||
if chinese_delete_match:
|
||||
pod_name = chinese_delete_match.group(1)
|
||||
return OperationType.DELETE_POD, pod_name, "default"
|
||||
|
||||
# Pattern: restart deployment <name> (English - with explicit "deployment")
|
||||
restart_deploy_match = re.search(r'restart\s+deployment[:\s]+([a-z0-9][\w.-]*)', action_lower)
|
||||
if restart_deploy_match:
|
||||
deploy_name = restart_deploy_match.group(1)
|
||||
ns_match = re.search(r'-n\s+(\S+)', action_lower)
|
||||
namespace = ns_match.group(1) if ns_match else "default"
|
||||
return OperationType.RESTART_DEPLOYMENT, deploy_name, namespace
|
||||
|
||||
# Pattern: restart <name> (English - without "deployment" keyword)
|
||||
restart_simple_match = re.search(r'restart\s+([a-z0-9][\w.-]*)', action_lower)
|
||||
if restart_simple_match:
|
||||
deploy_name = restart_simple_match.group(1)
|
||||
# Skip if captured word is "deployment" (handled above)
|
||||
if deploy_name != "deployment":
|
||||
ns_match = re.search(r'-n\s+(\S+)', action_lower)
|
||||
namespace = ns_match.group(1) if ns_match else "default"
|
||||
return OperationType.RESTART_DEPLOYMENT, deploy_name, namespace
|
||||
|
||||
# Pattern: 重新啟動 deployment <name> (Chinese with "deployment" keyword)
|
||||
chinese_restart_deploy_match = re.search(r'重新啟動\s+deployment\s+([a-z0-9][\w.-]*)', action, re.IGNORECASE)
|
||||
if chinese_restart_deploy_match:
|
||||
deploy_name = chinese_restart_deploy_match.group(1)
|
||||
return OperationType.RESTART_DEPLOYMENT, deploy_name, "default"
|
||||
|
||||
# Pattern: 重新啟動 <name> 服務 (Chinese)
|
||||
chinese_restart_match = re.search(r'重新啟動\s+([a-z0-9][\w.-]*)\s*服務', action)
|
||||
if chinese_restart_match:
|
||||
|
||||
195
apps/api/tests/test_action_parsing.py
Normal file
195
apps/api/tests/test_action_parsing.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
Phase 12.1: Tool Calling 優化 - 行動解析測試
|
||||
============================================
|
||||
#60 OpenClaw 行動解析測試
|
||||
#61 Tool Calling 準確度基線建立
|
||||
|
||||
測試 parse_operation_from_action() 的準確度
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from src.api.v1.approvals import parse_operation_from_action
|
||||
from src.services.executor import OperationType
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 測試案例定義
|
||||
# =============================================================================
|
||||
|
||||
# 格式: (action_string, expected_operation, expected_resource, expected_namespace)
|
||||
|
||||
ENGLISH_TEST_CASES = [
|
||||
# kubectl 標準格式
|
||||
("kubectl delete pod nginx-frontend-7d4b8c9f5-xk2m3 -n production",
|
||||
OperationType.DELETE_POD, "nginx-frontend-7d4b8c9f5-xk2m3", "production"),
|
||||
|
||||
("kubectl rollout restart deployment/api-backend -n default",
|
||||
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
||||
|
||||
("kubectl delete pod awoooi-worker-0 -n default",
|
||||
OperationType.DELETE_POD, "awoooi-worker-0", "default"),
|
||||
|
||||
# 自然語言英文
|
||||
("restart deployment api-backend",
|
||||
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
||||
|
||||
("Restart deployment: web-frontend",
|
||||
OperationType.RESTART_DEPLOYMENT, "web-frontend", "default"),
|
||||
|
||||
("delete pod nginx-ingress-abc123",
|
||||
OperationType.DELETE_POD, "nginx-ingress-abc123", "default"),
|
||||
|
||||
("scale deployment web-frontend to 5 replicas",
|
||||
OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
|
||||
|
||||
("Scale deployment api-backend -n staging",
|
||||
OperationType.SCALE_DEPLOYMENT, "api-backend", "staging"),
|
||||
]
|
||||
|
||||
CHINESE_TEST_CASES = [
|
||||
# 中文標準格式
|
||||
("重新啟動 api-backend 服務",
|
||||
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
||||
|
||||
("重新啟動 awoooi-worker 服務",
|
||||
OperationType.RESTART_DEPLOYMENT, "awoooi-worker", "default"),
|
||||
|
||||
("重新啟動 postgres-primary-0",
|
||||
OperationType.DELETE_POD, "postgres-primary-0", "default"), # StatefulSet Pod
|
||||
|
||||
("擴容 api-backend",
|
||||
OperationType.SCALE_DEPLOYMENT, "api-backend", "default"),
|
||||
|
||||
("擴展 web-frontend 副本數到 5",
|
||||
OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
|
||||
|
||||
("擴展 api-backend-deployment 副本數至 10",
|
||||
OperationType.SCALE_DEPLOYMENT, "api-backend", "default"), # 移除 -deployment 後綴
|
||||
|
||||
# 刪除 Pod
|
||||
("刪除 Pod nginx-ingress-7d6f8c9b5-abc12",
|
||||
OperationType.DELETE_POD, "nginx-ingress-7d6f8c9b5-abc12", "default"),
|
||||
]
|
||||
|
||||
MIXED_TEST_CASES = [
|
||||
# 混合中英文
|
||||
("kubectl delete pod api-backend-0 -n default",
|
||||
OperationType.DELETE_POD, "api-backend-0", "default"),
|
||||
|
||||
("重新啟動 deployment api-backend",
|
||||
OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
|
||||
|
||||
# OpenClaw 生成的常見格式
|
||||
("建議行動: kubectl rollout restart deployment/awoooi-api -n default",
|
||||
OperationType.RESTART_DEPLOYMENT, "awoooi-api", "default"),
|
||||
]
|
||||
|
||||
EDGE_CASES = [
|
||||
# 應該解析失敗的案例
|
||||
("這是一段普通文字,沒有任何操作", None, None, "default"),
|
||||
("SELECT * FROM users", None, None, "default"),
|
||||
("", None, None, "default"),
|
||||
|
||||
# 邊界情況
|
||||
("restart", None, None, "default"), # 缺少目標
|
||||
("delete", None, None, "default"), # 缺少目標
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 測試函數
|
||||
# =============================================================================
|
||||
|
||||
class TestEnglishActionParsing:
|
||||
"""英文行動解析測試"""
|
||||
|
||||
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", ENGLISH_TEST_CASES)
|
||||
def test_english_actions(self, action, expected_op, expected_resource, expected_ns):
|
||||
op, resource, ns = parse_operation_from_action(action)
|
||||
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
||||
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
||||
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
|
||||
|
||||
|
||||
class TestChineseActionParsing:
|
||||
"""中文行動解析測試"""
|
||||
|
||||
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", CHINESE_TEST_CASES)
|
||||
def test_chinese_actions(self, action, expected_op, expected_resource, expected_ns):
|
||||
op, resource, ns = parse_operation_from_action(action)
|
||||
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
||||
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
||||
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
|
||||
|
||||
|
||||
class TestMixedActionParsing:
|
||||
"""混合語言行動解析測試"""
|
||||
|
||||
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", MIXED_TEST_CASES)
|
||||
def test_mixed_actions(self, action, expected_op, expected_resource, expected_ns):
|
||||
op, resource, ns = parse_operation_from_action(action)
|
||||
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
||||
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
||||
assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""邊界情況測試"""
|
||||
|
||||
@pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", EDGE_CASES)
|
||||
def test_edge_cases(self, action, expected_op, expected_resource, expected_ns):
|
||||
op, resource, ns = parse_operation_from_action(action)
|
||||
assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
|
||||
assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 準確度報告
|
||||
# =============================================================================
|
||||
|
||||
def test_accuracy_report():
|
||||
"""生成準確度報告"""
|
||||
all_cases = ENGLISH_TEST_CASES + CHINESE_TEST_CASES + MIXED_TEST_CASES
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
failures = []
|
||||
|
||||
for action, expected_op, expected_resource, expected_ns in all_cases:
|
||||
op, resource, ns = parse_operation_from_action(action)
|
||||
|
||||
if op == expected_op and resource == expected_resource and ns == expected_ns:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
failures.append({
|
||||
"action": action,
|
||||
"expected": (expected_op, expected_resource, expected_ns),
|
||||
"got": (op, resource, ns),
|
||||
})
|
||||
|
||||
total = passed + failed
|
||||
accuracy = (passed / total * 100) if total > 0 else 0
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Phase 12.1: 行動解析準確度基線報告")
|
||||
print("=" * 60)
|
||||
print(f"總測試案例: {total}")
|
||||
print(f"通過: {passed}")
|
||||
print(f"失敗: {failed}")
|
||||
print(f"準確率: {accuracy:.1f}%")
|
||||
print("=" * 60)
|
||||
|
||||
if failures:
|
||||
print("\n失敗案例:")
|
||||
for f in failures:
|
||||
print(f" - '{f['action']}'")
|
||||
print(f" 期望: {f['expected']}")
|
||||
print(f" 實際: {f['got']}")
|
||||
|
||||
# 確保準確率符合預期 (當前基線,後續改進後調高)
|
||||
assert accuracy >= 70, f"Accuracy {accuracy}% is below baseline 70%"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "--tb=short"])
|
||||
Reference in New Issue
Block a user