diff --git a/run_scheduler.py b/run_scheduler.py index d7f8079..6a51459 100644 --- a/run_scheduler.py +++ b/run_scheduler.py @@ -477,6 +477,13 @@ def run_host_health_probe(): logger.debug("[HostHealthProbe] all 3 hosts healthy") except Exception as e: logger.error(f"[HostHealthProbe] failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_host_health_probe", + e, + source="Scheduler.HostHealth", + event_type="host_health_probe_failure", + title="三主機健康探針失敗", + ) def _push_host_transition_alert(tr): @@ -605,6 +612,13 @@ def run_ai_calls_error_spike_check(): ) except Exception as e: logger.error(f"[AICallsErrorSpike] failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_ai_calls_error_spike_check", + e, + source="Scheduler.Observability", + event_type="ai_calls_error_spike_check_failure", + title="AI 呼叫錯誤率檢查失敗", + ) def run_observability_daily_summary(): @@ -751,6 +765,13 @@ def run_observability_daily_summary(): logger.info("[ObservabilityDaily] summary pushed to Telegram") except Exception as e: logger.error(f"[ObservabilityDaily] failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_observability_daily_summary", + e, + source="Scheduler.Observability", + event_type="observability_daily_summary_failure", + title="觀測台每日摘要失敗", + ) def run_host_health_probe_cleanup(): @@ -769,6 +790,13 @@ def run_host_health_probe_cleanup(): session.close() except Exception as e: logger.error(f"[HostHealthProbe] cleanup failed: {e}", exc_info=True) + _notify_scheduler_failure( + "run_host_health_probe_cleanup", + e, + source="Scheduler.HostHealth", + event_type="host_health_probe_cleanup_failure", + title="主機健康探針清理失敗", + ) def run_cost_throttle_reset_if_new_month(): diff --git a/tests/test_run_scheduler_embed_consistency.py b/tests/test_run_scheduler_embed_consistency.py index b2ad338..6819eae 100644 --- a/tests/test_run_scheduler_embed_consistency.py +++ b/tests/test_run_scheduler_embed_consistency.py @@ -1,4 +1,5 @@ import importlib +import inspect def _load_run_scheduler(monkeypatch): @@ -94,3 +95,16 @@ def test_notify_scheduler_failure_without_active_exception_uses_error_trace(monk ) assert calls[0]["trace"] == "RuntimeError: mismatch" + + +def test_scheduler_observability_wrappers_notify_on_exception(monkeypatch): + run_scheduler = _load_run_scheduler(monkeypatch) + + for fn_name in [ + "run_host_health_probe", + "run_ai_calls_error_spike_check", + "run_observability_daily_summary", + "run_host_health_probe_cleanup", + ]: + source = inspect.getsource(getattr(run_scheduler, fn_name)) + assert "_notify_scheduler_failure(" in source