feat(k8s): add Worker HPA + fix non-AI confidence values

Wave 2 Deployment:
- Worker HPA: min:1 max:3, CPU 70%, Memory 80%
- 前置條件: XCLAIM + terminationGracePeriodSeconds:90 (Wave 1 )
- 比 API/Web 更保守的擴縮策略 (120s up, 600s down)

Confidence Fix:
- 非 AI 分析來源 (fallback/playbook/historical/consensus) 設 confidence=0.0
- 避免混淆 AI 信心度與其他指標 (成功率/相似度)
- 涉及: github_webhook, decision_manager, intent_classifier, learning_service

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-03-29 16:09:37 +08:00
parent 3bfb9c51f5
commit b97f9364fb
5 changed files with 63 additions and 10 deletions

View File

@@ -1128,7 +1128,7 @@ async def call_openclaw_ci_diagnosis(
auto_fixable=False,
risk_level="medium",
analyzed_by="fallback",
confidence=0.3,
confidence=0.0, # 🔴 Fallback 不是 AI 分析
)
except httpx.TimeoutException:
@@ -1141,7 +1141,7 @@ async def call_openclaw_ci_diagnosis(
auto_fixable=False,
risk_level="low",
analyzed_by="fallback",
confidence=0.1,
confidence=0.0, # 🔴 Fallback 不是 AI 分析
)
except Exception as e:
logger.exception("openclaw_ci_diagnosis_error", error=str(e))

View File

@@ -677,7 +677,7 @@ class DecisionManager:
"description": playbook.description,
"risk_level": playbook.repair_steps[0].risk_level.value.lower() if playbook.repair_steps else "medium",
"reasoning": f"Playbook 匹配 ({best_match.similarity_score:.0%} 相似度, {playbook.success_rate:.0%} 成功率): {best_match.reason}",
"confidence": min(best_match.similarity_score, playbook.success_rate) if playbook.total_executions > 0 else best_match.similarity_score,
"confidence": 0.0, # 🔴 Playbook RAG 匹配不是 AI 分析,信心度設 0
"matched_symptoms": best_match.matched_symptoms,
"from_cache": False,
}
@@ -839,7 +839,7 @@ class DecisionManager:
"risk_level": consensus_result.risk_level,
"kubectl_command": consensus_result.recommended_kubectl,
"reasoning": consensus_result.final_reasoning,
"confidence": consensus_result.consensus_score,
"confidence": 0.0, # 🔴 Consensus Engine 共識分數不是 AI 信心度,設 0
"agent_count": len(consensus_result.opinions),
"dissenting_opinions": consensus_result.dissenting_opinions,
"from_cache": False,

View File

@@ -518,11 +518,11 @@ class IntentClassifier:
"""
# TODO: 整合 Ollama qwen2.5:1b (Phase 13.4)
# 預計使用 text 呼叫 Ollama API 進行分類
# 目前先返回低信心度 UNKNOWN規則引擎已能處理大部分情況
# 目前先返回 UNKNOWN規則引擎已能處理大部分情況
del text # 預留給 LLM 分類使用,避免 unused-parameter 警告
return IntentResult(
intent=IntentType.UNKNOWN,
confidence=0.3,
confidence=0.0, # 🔴 LLM 未啟用,非 AI 分析
method="llm",
matched_keywords=[],
detected_resources=[],

View File

@@ -544,12 +544,13 @@ class LearningService:
return {
"action": best["action"],
"confidence": best["success_rate"],
"confidence": 0.0, # 🔴 歷史學習不是 AI 分析,信心度設 0
"tier": best["tier"],
"based_on": f"{best['total_samples']} 次歷史數據",
"avg_execution_time": best["avg_execution_time"],
"success_rate": best["success_rate"], # 保留原始成功率作為參考
"alternatives": [
{"action": a["action"], "confidence": a["success_rate"], "tier": a["tier"]}
{"action": a["action"], "confidence": 0.0, "success_rate": a["success_rate"], "tier": a["tier"]}
for a in alternatives
],
}
@@ -589,12 +590,12 @@ class LearningService:
"""預設推薦 (無歷史數據時)"""
return {
"action": "restart_pod",
"confidence": 0.3,
"confidence": 0.0, # 🔴 預設推薦不是 AI 分析,信心度設 0
"tier": 1,
"based_on": "無歷史數據,使用預設",
"avg_execution_time": 30.0,
"alternatives": [
{"action": "delete_pod", "confidence": 0.3, "tier": 1},
{"action": "delete_pod", "confidence": 0.0, "tier": 1},
],
}

View File

@@ -3,6 +3,7 @@
# =============================================================================
# K3.2 2026-03-28: HPA for API/Web (based on VPA recommendations)
# P3 2026-03-29: maxReplicas 4 → 6 (首席架構師建議)
# K-Worker 2026-03-29: Worker HPA (min:1 max:3, CPU 指標)
# Deployed by: Claude Code (首席架構師)
# VPA 建議: API target 100m, Web target 63m
# =============================================================================
@@ -94,3 +95,54 @@ spec:
- type: Pods
value: 1
periodSeconds: 120
---
# =============================================================================
# Worker HPAK-Worker 2026-03-29
# =============================================================================
# 前置條件: ADR-038/039 Wave 1 完成XCLAIM + terminationGracePeriodSeconds:90
# 說明: Worker 消費 Redis Streams使用 CPU 指標(未來可升級 KEDA
# =============================================================================
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: awoooi-worker-hpa
namespace: awoooi-prod
labels:
app.kubernetes.io/name: awoooi
app.kubernetes.io/component: worker
annotations:
description: "Worker 水平自動擴展 (1-3 replicas, 70% CPU)"
note: "未來可升級 KEDA Redis Stream 指標按 Queue 長度擴縮"
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: awoooi-worker
minReplicas: 1
maxReplicas: 3
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleUp:
stabilizationWindowSeconds: 120
policies:
- type: Pods
value: 1
periodSeconds: 120
scaleDown:
stabilizationWindowSeconds: 600
policies:
- type: Pods
value: 1
periodSeconds: 300