fix(flywheel): fallback on OpenClaw degraded responses
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 9m56s

This commit is contained in:
Your Name
2026-04-29 22:38:57 +08:00
parent 525a243550
commit fe2b8f4571
3 changed files with 121 additions and 1 deletions

View File

@@ -21,7 +21,7 @@ import httpx
import structlog
from src.core.config import get_settings
from src.services.ai_providers.interfaces import AIProvider, AIResult, is_provider_enabled_by_env
from src.services.ai_providers.interfaces import AIResult, is_provider_enabled_by_env
logger = structlog.get_logger(__name__)
settings = get_settings()
@@ -45,6 +45,23 @@ def _to_serializable(obj: Any) -> Any:
return str(obj)
def _confidence_value(data: dict[str, Any]) -> float:
try:
return float(data.get("confidence", 0.0))
except (TypeError, ValueError):
return 0.0
def _is_degraded_response(data: dict[str, Any]) -> bool:
"""
OpenClaw may return a syntactically valid degraded proposal when its
downstream NIM/Ollama call timed out. Treat that as fallbackable so Gemini
and Claude remain available instead of accepting a low-confidence stub.
"""
provider = str(data.get("provider", ""))
return bool(data.get("degraded")) or provider == "openclaw_degraded" or _confidence_value(data) < 0.3
class OpenClawNemoProvider:
"""
OpenClaw 委派 Provider (188 → NVIDIA NIM)
@@ -137,6 +154,22 @@ class OpenClawNemoProvider:
result_json = _json.dumps(data, ensure_ascii=False)
latency = (time.perf_counter() - start) * 1000
if _is_degraded_response(data):
reason = str(data.get("reasoning") or data.get("description") or "OpenClaw degraded response")
logger.warning(
"openclaw_nemo_degraded_response",
provider=data.get("provider"),
confidence=data.get("confidence"),
latency_ms=round(latency, 1),
)
return AIResult(
raw_response=result_json,
success=False,
provider=self.name,
latency_ms=latency,
error=f"OpenClaw degraded: {reason[:200]}",
)
logger.info(
"openclaw_nemo_provider_success",
confidence=data.get("confidence", 0),

View File

@@ -364,6 +364,19 @@ class OpenClawService:
logger.warning("openclaw_analyze_invalid_response", incident_id=incident_id)
return None
try:
confidence_value = float(data.get("confidence", 0.0))
except (TypeError, ValueError):
confidence_value = 0.0
if data.get("degraded") or data.get("provider") == "openclaw_degraded" or confidence_value < 0.3:
logger.warning(
"openclaw_analyze_degraded_response",
incident_id=incident_id,
provider=data.get("provider"),
confidence=data.get("confidence"),
)
return None
logger.info(
"openclaw_analyze_success",
incident_id=incident_id,

View File

@@ -0,0 +1,74 @@
from __future__ import annotations
import httpx
import pytest
from src.services.ai_providers.openclaw_nemo import OpenClawNemoProvider
def _transport(payload: dict) -> httpx.MockTransport:
def handler(_: httpx.Request) -> httpx.Response:
return httpx.Response(200, json=payload)
return httpx.MockTransport(handler)
@pytest.mark.asyncio
async def test_openclaw_degraded_response_is_fallbackable():
provider = OpenClawNemoProvider()
provider._http_client = httpx.AsyncClient(transport=_transport({
"action_title": "OpenClaw 降級調查",
"description": "OpenClaw 下游 LLM timeout",
"suggested_action": "investigate",
"kubectl_command": None,
"target_resource": "awoooi-api",
"namespace": "awoooi-prod",
"risk_level": "low",
"confidence": 0.2,
"provider": "openclaw_degraded",
"degraded": True,
"reasoning": "NVIDIA NIM timeout",
}))
result = await provider.analyze("diagnose", context={"incident_id": "inc-1"})
await provider.close()
assert result.success is False
assert result.provider == "openclaw_nemo"
assert "OpenClaw degraded" in (result.error or "")
@pytest.mark.asyncio
async def test_openclaw_low_confidence_response_is_fallbackable():
provider = OpenClawNemoProvider()
provider._http_client = httpx.AsyncClient(transport=_transport({
"action_title": "調查服務異常",
"risk_level": "low",
"confidence": 0.1,
"provider": "openclaw_nvidia_nim",
"reasoning": "low confidence",
}))
result = await provider.analyze("diagnose", context={"incident_id": "inc-2"})
await provider.close()
assert result.success is False
assert "OpenClaw degraded" in (result.error or "")
@pytest.mark.asyncio
async def test_openclaw_normal_response_stays_successful():
provider = OpenClawNemoProvider()
provider._http_client = httpx.AsyncClient(transport=_transport({
"action_title": "重啟服務",
"risk_level": "medium",
"confidence": 0.8,
"provider": "openclaw_nvidia_nim",
"reasoning": "valid proposal",
}))
result = await provider.analyze("diagnose", context={"incident_id": "inc-3"})
await provider.close()
assert result.success is True
assert result.provider == "openclaw_nemo"