feat(k8s): add Worker HPA + fix non-AI confidence values
Wave 2 Deployment: - Worker HPA: min:1 max:3, CPU 70%, Memory 80% - 前置條件: XCLAIM + terminationGracePeriodSeconds:90 (Wave 1 ✅) - 比 API/Web 更保守的擴縮策略 (120s up, 600s down) Confidence Fix: - 非 AI 分析來源 (fallback/playbook/historical/consensus) 設 confidence=0.0 - 避免混淆 AI 信心度與其他指標 (成功率/相似度) - 涉及: github_webhook, decision_manager, intent_classifier, learning_service Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1128,7 +1128,7 @@ async def call_openclaw_ci_diagnosis(
|
||||
auto_fixable=False,
|
||||
risk_level="medium",
|
||||
analyzed_by="fallback",
|
||||
confidence=0.3,
|
||||
confidence=0.0, # 🔴 Fallback 不是 AI 分析
|
||||
)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
@@ -1141,7 +1141,7 @@ async def call_openclaw_ci_diagnosis(
|
||||
auto_fixable=False,
|
||||
risk_level="low",
|
||||
analyzed_by="fallback",
|
||||
confidence=0.1,
|
||||
confidence=0.0, # 🔴 Fallback 不是 AI 分析
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("openclaw_ci_diagnosis_error", error=str(e))
|
||||
|
||||
@@ -677,7 +677,7 @@ class DecisionManager:
|
||||
"description": playbook.description,
|
||||
"risk_level": playbook.repair_steps[0].risk_level.value.lower() if playbook.repair_steps else "medium",
|
||||
"reasoning": f"Playbook 匹配 ({best_match.similarity_score:.0%} 相似度, {playbook.success_rate:.0%} 成功率): {best_match.reason}",
|
||||
"confidence": min(best_match.similarity_score, playbook.success_rate) if playbook.total_executions > 0 else best_match.similarity_score,
|
||||
"confidence": 0.0, # 🔴 Playbook RAG 匹配不是 AI 分析,信心度設 0
|
||||
"matched_symptoms": best_match.matched_symptoms,
|
||||
"from_cache": False,
|
||||
}
|
||||
@@ -839,7 +839,7 @@ class DecisionManager:
|
||||
"risk_level": consensus_result.risk_level,
|
||||
"kubectl_command": consensus_result.recommended_kubectl,
|
||||
"reasoning": consensus_result.final_reasoning,
|
||||
"confidence": consensus_result.consensus_score,
|
||||
"confidence": 0.0, # 🔴 Consensus Engine 共識分數不是 AI 信心度,設 0
|
||||
"agent_count": len(consensus_result.opinions),
|
||||
"dissenting_opinions": consensus_result.dissenting_opinions,
|
||||
"from_cache": False,
|
||||
|
||||
@@ -518,11 +518,11 @@ class IntentClassifier:
|
||||
"""
|
||||
# TODO: 整合 Ollama qwen2.5:1b (Phase 13.4)
|
||||
# 預計使用 text 呼叫 Ollama API 進行分類
|
||||
# 目前先返回低信心度 UNKNOWN,規則引擎已能處理大部分情況
|
||||
# 目前先返回 UNKNOWN,規則引擎已能處理大部分情況
|
||||
del text # 預留給 LLM 分類使用,避免 unused-parameter 警告
|
||||
return IntentResult(
|
||||
intent=IntentType.UNKNOWN,
|
||||
confidence=0.3,
|
||||
confidence=0.0, # 🔴 LLM 未啟用,非 AI 分析
|
||||
method="llm",
|
||||
matched_keywords=[],
|
||||
detected_resources=[],
|
||||
|
||||
@@ -544,12 +544,13 @@ class LearningService:
|
||||
|
||||
return {
|
||||
"action": best["action"],
|
||||
"confidence": best["success_rate"],
|
||||
"confidence": 0.0, # 🔴 歷史學習不是 AI 分析,信心度設 0
|
||||
"tier": best["tier"],
|
||||
"based_on": f"{best['total_samples']} 次歷史數據",
|
||||
"avg_execution_time": best["avg_execution_time"],
|
||||
"success_rate": best["success_rate"], # 保留原始成功率作為參考
|
||||
"alternatives": [
|
||||
{"action": a["action"], "confidence": a["success_rate"], "tier": a["tier"]}
|
||||
{"action": a["action"], "confidence": 0.0, "success_rate": a["success_rate"], "tier": a["tier"]}
|
||||
for a in alternatives
|
||||
],
|
||||
}
|
||||
@@ -589,12 +590,12 @@ class LearningService:
|
||||
"""預設推薦 (無歷史數據時)"""
|
||||
return {
|
||||
"action": "restart_pod",
|
||||
"confidence": 0.3,
|
||||
"confidence": 0.0, # 🔴 預設推薦不是 AI 分析,信心度設 0
|
||||
"tier": 1,
|
||||
"based_on": "無歷史數據,使用預設",
|
||||
"avg_execution_time": 30.0,
|
||||
"alternatives": [
|
||||
{"action": "delete_pod", "confidence": 0.3, "tier": 1},
|
||||
{"action": "delete_pod", "confidence": 0.0, "tier": 1},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# =============================================================================
|
||||
# K3.2 2026-03-28: HPA for API/Web (based on VPA recommendations)
|
||||
# P3 2026-03-29: maxReplicas 4 → 6 (首席架構師建議)
|
||||
# K-Worker 2026-03-29: Worker HPA (min:1 max:3, CPU 指標)
|
||||
# Deployed by: Claude Code (首席架構師)
|
||||
# VPA 建議: API target 100m, Web target 63m
|
||||
# =============================================================================
|
||||
@@ -94,3 +95,54 @@ spec:
|
||||
- type: Pods
|
||||
value: 1
|
||||
periodSeconds: 120
|
||||
---
|
||||
# =============================================================================
|
||||
# Worker HPA(K-Worker 2026-03-29)
|
||||
# =============================================================================
|
||||
# 前置條件: ADR-038/039 Wave 1 完成(XCLAIM + terminationGracePeriodSeconds:90)
|
||||
# 說明: Worker 消費 Redis Streams,使用 CPU 指標(未來可升級 KEDA)
|
||||
# =============================================================================
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: awoooi-worker-hpa
|
||||
namespace: awoooi-prod
|
||||
labels:
|
||||
app.kubernetes.io/name: awoooi
|
||||
app.kubernetes.io/component: worker
|
||||
annotations:
|
||||
description: "Worker 水平自動擴展 (1-3 replicas, 70% CPU)"
|
||||
note: "未來可升級 KEDA Redis Stream 指標按 Queue 長度擴縮"
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: awoooi-worker
|
||||
minReplicas: 1
|
||||
maxReplicas: 3
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 80
|
||||
behavior:
|
||||
scaleUp:
|
||||
stabilizationWindowSeconds: 120
|
||||
policies:
|
||||
- type: Pods
|
||||
value: 1
|
||||
periodSeconds: 120
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 600
|
||||
policies:
|
||||
- type: Pods
|
||||
value: 1
|
||||
periodSeconds: 300
|
||||
|
||||
Reference in New Issue
Block a user