diff --git a/apps/api/src/api/v1/approvals.py b/apps/api/src/api/v1/approvals.py index 6f1a48e6..ea74ffae 100644 --- a/apps/api/src/api/v1/approvals.py +++ b/apps/api/src/api/v1/approvals.py @@ -203,14 +203,36 @@ def parse_operation_from_action(action: str) -> tuple[OperationType | None, str namespace = ns_match.group(1) if ns_match else "default" return OperationType.DELETE_POD, pod_name, namespace - # Pattern: restart deployment (English) - restart_match = re.search(r'restart\s+(?:deployment[:\s]+)?([a-z0-9][\w.-]*)', action_lower) - if restart_match: - deploy_name = restart_match.group(1) + # Pattern: 刪除 Pod (Chinese delete) + chinese_delete_match = re.search(r'刪除\s*[Pp]od\s+([a-z0-9][\w.-]*)', action) + if chinese_delete_match: + pod_name = chinese_delete_match.group(1) + return OperationType.DELETE_POD, pod_name, "default" + + # Pattern: restart deployment (English - with explicit "deployment") + restart_deploy_match = re.search(r'restart\s+deployment[:\s]+([a-z0-9][\w.-]*)', action_lower) + if restart_deploy_match: + deploy_name = restart_deploy_match.group(1) ns_match = re.search(r'-n\s+(\S+)', action_lower) namespace = ns_match.group(1) if ns_match else "default" return OperationType.RESTART_DEPLOYMENT, deploy_name, namespace + # Pattern: restart (English - without "deployment" keyword) + restart_simple_match = re.search(r'restart\s+([a-z0-9][\w.-]*)', action_lower) + if restart_simple_match: + deploy_name = restart_simple_match.group(1) + # Skip if captured word is "deployment" (handled above) + if deploy_name != "deployment": + ns_match = re.search(r'-n\s+(\S+)', action_lower) + namespace = ns_match.group(1) if ns_match else "default" + return OperationType.RESTART_DEPLOYMENT, deploy_name, namespace + + # Pattern: 重新啟動 deployment (Chinese with "deployment" keyword) + chinese_restart_deploy_match = re.search(r'重新啟動\s+deployment\s+([a-z0-9][\w.-]*)', action, re.IGNORECASE) + if chinese_restart_deploy_match: + deploy_name = chinese_restart_deploy_match.group(1) + return OperationType.RESTART_DEPLOYMENT, deploy_name, "default" + # Pattern: 重新啟動 服務 (Chinese) chinese_restart_match = re.search(r'重新啟動\s+([a-z0-9][\w.-]*)\s*服務', action) if chinese_restart_match: diff --git a/apps/api/tests/test_action_parsing.py b/apps/api/tests/test_action_parsing.py new file mode 100644 index 00000000..21359360 --- /dev/null +++ b/apps/api/tests/test_action_parsing.py @@ -0,0 +1,195 @@ +""" +Phase 12.1: Tool Calling 優化 - 行動解析測試 +============================================ +#60 OpenClaw 行動解析測試 +#61 Tool Calling 準確度基線建立 + +測試 parse_operation_from_action() 的準確度 +""" + +import pytest +from src.api.v1.approvals import parse_operation_from_action +from src.services.executor import OperationType + + +# ============================================================================= +# 測試案例定義 +# ============================================================================= + +# 格式: (action_string, expected_operation, expected_resource, expected_namespace) + +ENGLISH_TEST_CASES = [ + # kubectl 標準格式 + ("kubectl delete pod nginx-frontend-7d4b8c9f5-xk2m3 -n production", + OperationType.DELETE_POD, "nginx-frontend-7d4b8c9f5-xk2m3", "production"), + + ("kubectl rollout restart deployment/api-backend -n default", + OperationType.RESTART_DEPLOYMENT, "api-backend", "default"), + + ("kubectl delete pod awoooi-worker-0 -n default", + OperationType.DELETE_POD, "awoooi-worker-0", "default"), + + # 自然語言英文 + ("restart deployment api-backend", + OperationType.RESTART_DEPLOYMENT, "api-backend", "default"), + + ("Restart deployment: web-frontend", + OperationType.RESTART_DEPLOYMENT, "web-frontend", "default"), + + ("delete pod nginx-ingress-abc123", + OperationType.DELETE_POD, "nginx-ingress-abc123", "default"), + + ("scale deployment web-frontend to 5 replicas", + OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"), + + ("Scale deployment api-backend -n staging", + OperationType.SCALE_DEPLOYMENT, "api-backend", "staging"), +] + +CHINESE_TEST_CASES = [ + # 中文標準格式 + ("重新啟動 api-backend 服務", + OperationType.RESTART_DEPLOYMENT, "api-backend", "default"), + + ("重新啟動 awoooi-worker 服務", + OperationType.RESTART_DEPLOYMENT, "awoooi-worker", "default"), + + ("重新啟動 postgres-primary-0", + OperationType.DELETE_POD, "postgres-primary-0", "default"), # StatefulSet Pod + + ("擴容 api-backend", + OperationType.SCALE_DEPLOYMENT, "api-backend", "default"), + + ("擴展 web-frontend 副本數到 5", + OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"), + + ("擴展 api-backend-deployment 副本數至 10", + OperationType.SCALE_DEPLOYMENT, "api-backend", "default"), # 移除 -deployment 後綴 + + # 刪除 Pod + ("刪除 Pod nginx-ingress-7d6f8c9b5-abc12", + OperationType.DELETE_POD, "nginx-ingress-7d6f8c9b5-abc12", "default"), +] + +MIXED_TEST_CASES = [ + # 混合中英文 + ("kubectl delete pod api-backend-0 -n default", + OperationType.DELETE_POD, "api-backend-0", "default"), + + ("重新啟動 deployment api-backend", + OperationType.RESTART_DEPLOYMENT, "api-backend", "default"), + + # OpenClaw 生成的常見格式 + ("建議行動: kubectl rollout restart deployment/awoooi-api -n default", + OperationType.RESTART_DEPLOYMENT, "awoooi-api", "default"), +] + +EDGE_CASES = [ + # 應該解析失敗的案例 + ("這是一段普通文字,沒有任何操作", None, None, "default"), + ("SELECT * FROM users", None, None, "default"), + ("", None, None, "default"), + + # 邊界情況 + ("restart", None, None, "default"), # 缺少目標 + ("delete", None, None, "default"), # 缺少目標 +] + + +# ============================================================================= +# 測試函數 +# ============================================================================= + +class TestEnglishActionParsing: + """英文行動解析測試""" + + @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", ENGLISH_TEST_CASES) + def test_english_actions(self, action, expected_op, expected_resource, expected_ns): + op, resource, ns = parse_operation_from_action(action) + assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}" + assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}" + assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}" + + +class TestChineseActionParsing: + """中文行動解析測試""" + + @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", CHINESE_TEST_CASES) + def test_chinese_actions(self, action, expected_op, expected_resource, expected_ns): + op, resource, ns = parse_operation_from_action(action) + assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}" + assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}" + assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}" + + +class TestMixedActionParsing: + """混合語言行動解析測試""" + + @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", MIXED_TEST_CASES) + def test_mixed_actions(self, action, expected_op, expected_resource, expected_ns): + op, resource, ns = parse_operation_from_action(action) + assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}" + assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}" + assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}" + + +class TestEdgeCases: + """邊界情況測試""" + + @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", EDGE_CASES) + def test_edge_cases(self, action, expected_op, expected_resource, expected_ns): + op, resource, ns = parse_operation_from_action(action) + assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}" + assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}" + + +# ============================================================================= +# 準確度報告 +# ============================================================================= + +def test_accuracy_report(): + """生成準確度報告""" + all_cases = ENGLISH_TEST_CASES + CHINESE_TEST_CASES + MIXED_TEST_CASES + + passed = 0 + failed = 0 + failures = [] + + for action, expected_op, expected_resource, expected_ns in all_cases: + op, resource, ns = parse_operation_from_action(action) + + if op == expected_op and resource == expected_resource and ns == expected_ns: + passed += 1 + else: + failed += 1 + failures.append({ + "action": action, + "expected": (expected_op, expected_resource, expected_ns), + "got": (op, resource, ns), + }) + + total = passed + failed + accuracy = (passed / total * 100) if total > 0 else 0 + + print("\n" + "=" * 60) + print("Phase 12.1: 行動解析準確度基線報告") + print("=" * 60) + print(f"總測試案例: {total}") + print(f"通過: {passed}") + print(f"失敗: {failed}") + print(f"準確率: {accuracy:.1f}%") + print("=" * 60) + + if failures: + print("\n失敗案例:") + for f in failures: + print(f" - '{f['action']}'") + print(f" 期望: {f['expected']}") + print(f" 實際: {f['got']}") + + # 確保準確率符合預期 (當前基線,後續改進後調高) + assert accuracy >= 70, f"Accuracy {accuracy}% is below baseline 70%" + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "--tb=short"])