feat(api): Phase 12.1 Tool Calling 優化 (#60-62)

行動解析準確度: 80% → 100% 新增模式: - 刪除 Pod X (中文) - restart deployment X (明確區分) - 重新啟動 deployment X (中英混合) 測試: - 24 測試案例 (英/中/混合/邊界) - test_accuracy_report() 自動化基線報告 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-25 10:48:37 +08:00
parent b13b063282
commit afda3123eb
2 changed files with 221 additions and 4 deletions
--- a/apps/api/src/api/v1/approvals.py
+++ b/apps/api/src/api/v1/approvals.py
@@ -203,14 +203,36 @@ def parse_operation_from_action(action: str) -> tuple[OperationType | None, str
        namespace = ns_match.group(1) if ns_match else "default"
        return OperationType.DELETE_POD, pod_name, namespace

-    # Pattern: restart deployment <name> (English)
-    restart_match = re.search(r'restart\s+(?:deployment[:\s]+)?([a-z0-9][\w.-]*)', action_lower)
-    if restart_match:
-        deploy_name = restart_match.group(1)
+    # Pattern: 刪除 Pod <name> (Chinese delete)
+    chinese_delete_match = re.search(r'刪除\s*[Pp]od\s+([a-z0-9][\w.-]*)', action)
+    if chinese_delete_match:
+        pod_name = chinese_delete_match.group(1)
+        return OperationType.DELETE_POD, pod_name, "default"
+
+    # Pattern: restart deployment <name> (English - with explicit "deployment")
+    restart_deploy_match = re.search(r'restart\s+deployment[:\s]+([a-z0-9][\w.-]*)', action_lower)
+    if restart_deploy_match:
+        deploy_name = restart_deploy_match.group(1)
        ns_match = re.search(r'-n\s+(\S+)', action_lower)
        namespace = ns_match.group(1) if ns_match else "default"
        return OperationType.RESTART_DEPLOYMENT, deploy_name, namespace

+    # Pattern: restart <name> (English - without "deployment" keyword)
+    restart_simple_match = re.search(r'restart\s+([a-z0-9][\w.-]*)', action_lower)
+    if restart_simple_match:
+        deploy_name = restart_simple_match.group(1)
+        # Skip if captured word is "deployment" (handled above)
+        if deploy_name != "deployment":
+            ns_match = re.search(r'-n\s+(\S+)', action_lower)
+            namespace = ns_match.group(1) if ns_match else "default"
+            return OperationType.RESTART_DEPLOYMENT, deploy_name, namespace
+
+    # Pattern: 重新啟動 deployment <name> (Chinese with "deployment" keyword)
+    chinese_restart_deploy_match = re.search(r'重新啟動\s+deployment\s+([a-z0-9][\w.-]*)', action, re.IGNORECASE)
+    if chinese_restart_deploy_match:
+        deploy_name = chinese_restart_deploy_match.group(1)
+        return OperationType.RESTART_DEPLOYMENT, deploy_name, "default"
+
    # Pattern: 重新啟動 <name> 服務 (Chinese)
    chinese_restart_match = re.search(r'重新啟動\s+([a-z0-9][\w.-]*)\s*服務', action)
    if chinese_restart_match:
--- a/apps/api/tests/test_action_parsing.py
+++ b/apps/api/tests/test_action_parsing.py
@@ -0,0 +1,195 @@
+"""
+Phase 12.1: Tool Calling 優化 - 行動解析測試
+============================================
+#60 OpenClaw 行動解析測試
+#61 Tool Calling 準確度基線建立
+
+測試 parse_operation_from_action() 的準確度
+"""
+
+import pytest
+from src.api.v1.approvals import parse_operation_from_action
+from src.services.executor import OperationType
+
+
+# =============================================================================
+# 測試案例定義
+# =============================================================================
+
+# 格式: (action_string, expected_operation, expected_resource, expected_namespace)
+
+ENGLISH_TEST_CASES = [
+    # kubectl 標準格式
+    ("kubectl delete pod nginx-frontend-7d4b8c9f5-xk2m3 -n production",
+     OperationType.DELETE_POD, "nginx-frontend-7d4b8c9f5-xk2m3", "production"),
+
+    ("kubectl rollout restart deployment/api-backend -n default",
+     OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
+
+    ("kubectl delete pod awoooi-worker-0 -n default",
+     OperationType.DELETE_POD, "awoooi-worker-0", "default"),
+
+    # 自然語言英文
+    ("restart deployment api-backend",
+     OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
+
+    ("Restart deployment: web-frontend",
+     OperationType.RESTART_DEPLOYMENT, "web-frontend", "default"),
+
+    ("delete pod nginx-ingress-abc123",
+     OperationType.DELETE_POD, "nginx-ingress-abc123", "default"),
+
+    ("scale deployment web-frontend to 5 replicas",
+     OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
+
+    ("Scale deployment api-backend -n staging",
+     OperationType.SCALE_DEPLOYMENT, "api-backend", "staging"),
+]
+
+CHINESE_TEST_CASES = [
+    # 中文標準格式
+    ("重新啟動 api-backend 服務",
+     OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
+
+    ("重新啟動 awoooi-worker 服務",
+     OperationType.RESTART_DEPLOYMENT, "awoooi-worker", "default"),
+
+    ("重新啟動 postgres-primary-0",
+     OperationType.DELETE_POD, "postgres-primary-0", "default"),  # StatefulSet Pod
+
+    ("擴容 api-backend",
+     OperationType.SCALE_DEPLOYMENT, "api-backend", "default"),
+
+    ("擴展 web-frontend 副本數到 5",
+     OperationType.SCALE_DEPLOYMENT, "web-frontend", "default"),
+
+    ("擴展 api-backend-deployment 副本數至 10",
+     OperationType.SCALE_DEPLOYMENT, "api-backend", "default"),  # 移除 -deployment 後綴
+
+    # 刪除 Pod
+    ("刪除 Pod nginx-ingress-7d6f8c9b5-abc12",
+     OperationType.DELETE_POD, "nginx-ingress-7d6f8c9b5-abc12", "default"),
+]
+
+MIXED_TEST_CASES = [
+    # 混合中英文
+    ("kubectl delete pod api-backend-0 -n default",
+     OperationType.DELETE_POD, "api-backend-0", "default"),
+
+    ("重新啟動 deployment api-backend",
+     OperationType.RESTART_DEPLOYMENT, "api-backend", "default"),
+
+    # OpenClaw 生成的常見格式
+    ("建議行動: kubectl rollout restart deployment/awoooi-api -n default",
+     OperationType.RESTART_DEPLOYMENT, "awoooi-api", "default"),
+]
+
+EDGE_CASES = [
+    # 應該解析失敗的案例
+    ("這是一段普通文字，沒有任何操作", None, None, "default"),
+    ("SELECT * FROM users", None, None, "default"),
+    ("", None, None, "default"),
+
+    # 邊界情況
+    ("restart", None, None, "default"),  # 缺少目標
+    ("delete", None, None, "default"),   # 缺少目標
+]
+
+
+# =============================================================================
+# 測試函數
+# =============================================================================
+
+class TestEnglishActionParsing:
+    """英文行動解析測試"""
+
+    @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", ENGLISH_TEST_CASES)
+    def test_english_actions(self, action, expected_op, expected_resource, expected_ns):
+        op, resource, ns = parse_operation_from_action(action)
+        assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
+        assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
+        assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
+
+
+class TestChineseActionParsing:
+    """中文行動解析測試"""
+
+    @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", CHINESE_TEST_CASES)
+    def test_chinese_actions(self, action, expected_op, expected_resource, expected_ns):
+        op, resource, ns = parse_operation_from_action(action)
+        assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
+        assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
+        assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
+
+
+class TestMixedActionParsing:
+    """混合語言行動解析測試"""
+
+    @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", MIXED_TEST_CASES)
+    def test_mixed_actions(self, action, expected_op, expected_resource, expected_ns):
+        op, resource, ns = parse_operation_from_action(action)
+        assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
+        assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
+        assert ns == expected_ns, f"Namespace mismatch for '{action}': got {ns}, expected {expected_ns}"
+
+
+class TestEdgeCases:
+    """邊界情況測試"""
+
+    @pytest.mark.parametrize("action,expected_op,expected_resource,expected_ns", EDGE_CASES)
+    def test_edge_cases(self, action, expected_op, expected_resource, expected_ns):
+        op, resource, ns = parse_operation_from_action(action)
+        assert op == expected_op, f"Operation mismatch for '{action}': got {op}, expected {expected_op}"
+        assert resource == expected_resource, f"Resource mismatch for '{action}': got {resource}, expected {expected_resource}"
+
+
+# =============================================================================
+# 準確度報告
+# =============================================================================
+
+def test_accuracy_report():
+    """生成準確度報告"""
+    all_cases = ENGLISH_TEST_CASES + CHINESE_TEST_CASES + MIXED_TEST_CASES
+
+    passed = 0
+    failed = 0
+    failures = []
+
+    for action, expected_op, expected_resource, expected_ns in all_cases:
+        op, resource, ns = parse_operation_from_action(action)
+
+        if op == expected_op and resource == expected_resource and ns == expected_ns:
+            passed += 1
+        else:
+            failed += 1
+            failures.append({
+                "action": action,
+                "expected": (expected_op, expected_resource, expected_ns),
+                "got": (op, resource, ns),
+            })
+
+    total = passed + failed
+    accuracy = (passed / total * 100) if total > 0 else 0
+
+    print("\n" + "=" * 60)
+    print("Phase 12.1: 行動解析準確度基線報告")
+    print("=" * 60)
+    print(f"總測試案例: {total}")
+    print(f"通過: {passed}")
+    print(f"失敗: {failed}")
+    print(f"準確率: {accuracy:.1f}%")
+    print("=" * 60)
+
+    if failures:
+        print("\n失敗案例:")
+        for f in failures:
+            print(f"  - '{f['action']}'")
+            print(f"    期望: {f['expected']}")
+            print(f"    實際: {f['got']}")
+
+    # 確保準確率符合預期 (當前基線，後續改進後調高)
+    assert accuracy >= 70, f"Accuracy {accuracy}% is below baseline 70%"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "--tb=short"])