diff --git a/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py b/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py index 4092f622..e7264f3e 100644 --- a/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py +++ b/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py @@ -430,6 +430,16 @@ def _harbor_recovery_receipt_inputs() -> list[dict[str, Any]]: "purpose": "prove bounded repair-once result without daemon restart", **metadata_boundary, }, + { + "input_id": "controlled_cd_lane_readiness_output", + "source": "check-awoooi-110-controlled-cd-lane-readiness.sh", + "required_when": "before_retrying_harbor_110_local_repair_queue", + "purpose": ( + "classify awoooi-host lane guardrail blockers such as service, " + "registration, root restore-source, pressure, or label/config drift" + ), + **metadata_boundary, + }, { "input_id": "public_registry_v2_http_status", "source": "public registry /v2/ verifier", diff --git a/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py b/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py index bd74ce6c..33221af2 100644 --- a/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py +++ b/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py @@ -27,11 +27,17 @@ def validate_harbor_registry_controlled_recovery_receipt( ssh_local_output = _text(receipt_payload.get("ssh_local_repair_output")) watchdog_check_output = _text(receipt_payload.get("watchdog_check_output")) watchdog_repair_output = _text(receipt_payload.get("watchdog_repair_output")) + controlled_cd_lane_output = _text( + receipt_payload.get("controlled_cd_lane_readiness_output") + ) ssh_diagnosis = _parse_ssh_publickey_diagnosis_output(ssh_diagnosis_output) ssh_local = _parse_ssh_local_repair_output(ssh_local_output) watchdog_check = _parse_watchdog_output(watchdog_check_output) watchdog_repair = _parse_watchdog_output(watchdog_repair_output) + controlled_cd_lane = _parse_controlled_cd_lane_readiness_output( + controlled_cd_lane_output + ) verifier = _post_apply_verifier(receipt_payload) gitea_queue = _gitea_queue_readback( receipt_payload.get("gitea_actions_queue_readback") @@ -45,6 +51,7 @@ def validate_harbor_registry_controlled_recovery_receipt( ssh_local=ssh_local, watchdog_check=watchdog_check, watchdog_repair=watchdog_repair, + controlled_cd_lane=controlled_cd_lane, verifier=verifier, gitea_queue=gitea_queue, deploy_marker=deploy_marker, @@ -54,6 +61,7 @@ def validate_harbor_registry_controlled_recovery_receipt( ssh_local=ssh_local, watchdog_check=watchdog_check, watchdog_repair=watchdog_repair, + controlled_cd_lane=controlled_cd_lane, verifier=verifier, gitea_queue=gitea_queue, deploy_marker=deploy_marker, @@ -64,6 +72,7 @@ def validate_harbor_registry_controlled_recovery_receipt( ssh_local=ssh_local, watchdog_check=watchdog_check, watchdog_repair=watchdog_repair, + controlled_cd_lane=controlled_cd_lane, verifier=verifier, ) @@ -84,6 +93,9 @@ def validate_harbor_registry_controlled_recovery_receipt( "ssh_local_repair_output": _text_stats(ssh_local_output), "watchdog_check_output": _text_stats(watchdog_check_output), "watchdog_repair_output": _text_stats(watchdog_repair_output), + "controlled_cd_lane_readiness_output": _text_stats( + controlled_cd_lane_output + ), "gitea_actions_queue_readback": { "provided": gitea_queue["receipt_seen"], "metadata_only": True, @@ -101,6 +113,7 @@ def validate_harbor_registry_controlled_recovery_receipt( "ssh_local_repair": ssh_local, "watchdog_check": watchdog_check, "watchdog_repair": watchdog_repair, + "controlled_cd_lane_readiness": controlled_cd_lane, "post_apply_verifier": verifier, "gitea_actions_queue": gitea_queue, "deploy_marker": deploy_marker, @@ -117,6 +130,7 @@ def validate_harbor_registry_controlled_recovery_receipt( "validate_non_secret_ssh_local_repair_receipt", "validate_harbor_watchdog_check_receipt", "validate_harbor_watchdog_repair_once_receipt", + "validate_110_controlled_cd_lane_readiness_receipt", "validate_public_and_internal_registry_v2_verifier", "km_rag_mcp_playbook_metadata_writeback", "retry_gitea_cd_after_registry_v2_green", @@ -170,6 +184,18 @@ def validate_harbor_registry_controlled_recovery_receipt( "watchdog_check_harbor_ready": watchdog_check["harbor_ready"], "watchdog_repair_receipt_seen": watchdog_repair["receipt_seen"], "watchdog_repair_harbor_ready": watchdog_repair["harbor_ready"], + "controlled_cd_lane_readiness_receipt_seen": controlled_cd_lane[ + "receipt_seen" + ], + "controlled_cd_lane_ready": controlled_cd_lane[ + "controlled_cd_lane_ready" + ], + "controlled_cd_lane_blocker_count": controlled_cd_lane[ + "blocker_count" + ], + "controlled_cd_lane_safe_next_step": controlled_cd_lane[ + "safe_next_step" + ], "post_apply_verifier_ready": verifier["registry_v2_ready"], "gitea_queue_readback_seen": gitea_queue["receipt_seen"], "gitea_queue_blocker_count": gitea_queue["blocker_count"], @@ -260,6 +286,7 @@ def _local_console_phase_readback( ssh_local: dict[str, Any], watchdog_check: dict[str, Any], watchdog_repair: dict[str, Any], + controlled_cd_lane: dict[str, Any], verifier: dict[str, Any], ) -> dict[str, Any]: phases = [ @@ -298,11 +325,11 @@ def _local_console_phase_readback( ), _phase( "verify_controlled_cd_lane", - _phase_status( - ready=verifier["registry_v2_ready"], - blocked_status="blocked_waiting_registry_v2_verifier_green", + _controlled_cd_lane_phase_status( + controlled_cd_lane=controlled_cd_lane, + verifier=verifier, ), - "post_apply_verifier", + "controlled_cd_lane_readiness", ), ] completed_statuses = {"ready", "skipped_not_required"} @@ -381,6 +408,21 @@ def _harbor_repair_once_phase_status( return "blocked_waiting_harbor_watchdog_check_receipt" +def _controlled_cd_lane_phase_status( + *, + controlled_cd_lane: dict[str, Any], + verifier: dict[str, Any], +) -> str: + if controlled_cd_lane["receipt_seen"]: + if controlled_cd_lane["controlled_cd_lane_ready"]: + return "ready" + return "blocked_controlled_cd_lane_readiness_receipt_not_ready" + return _phase_status( + ready=verifier["registry_v2_ready"], + blocked_status="blocked_waiting_controlled_cd_lane_readiness_receipt", + ) + + def _parse_ssh_publickey_diagnosis_output(output: str) -> dict[str, Any]: fields = _parse_key_values(output) marker_seen = "AWOOOI_110_SSH_PUBLICKEY_AUTH_DIAGNOSIS" in output @@ -548,6 +590,40 @@ def _parse_watchdog_output(output: str) -> dict[str, Any]: } +def _parse_controlled_cd_lane_readiness_output(output: str) -> dict[str, Any]: + fields = _parse_key_values(output) + marker_seen = "AWOOOI_110_CONTROLLED_CD_LANE_READY=" in output + blockers = _prefixed_blockers( + output, + prefix="controlled_cd_lane_readiness:", + ) + warning_count = _int_or_none(fields.get("WARNING_COUNT")) or 0 + blocker_count = _int_or_none(fields.get("BLOCKER_COUNT")) + if blocker_count is None: + blocker_count = len(blockers) + ready = _bool_from_field(fields.get("AWOOOI_110_CONTROLLED_CD_LANE_READY")) + return { + "receipt_seen": marker_seen, + "controlled_cd_lane_ready": bool( + marker_seen and ready and blocker_count == 0 and not blockers + ), + "config_ready": _bool_from_field(fields.get("CONFIG_READY")), + "binary_ready": _bool_from_field(fields.get("BINARY_READY")), + "registration_ready": _bool_from_field(fields.get("REGISTRATION_READY")), + "service_ready": _bool_from_field(fields.get("SERVICE_READY")), + "legacy_failclosed": _bool_from_field(fields.get("LEGACY_FAILCLOSED")), + "primary_lane_failclosed": _bool_from_field( + fields.get("PRIMARY_LANE_FAILCLOSED") + ), + "warning_count": warning_count, + "blocker_count": blocker_count, + "blockers": blockers, + "safe_next_step": str(fields.get("safe_next_step") or ""), + "metadata_only": True, + "raw_output_returned": False, + } + + def _post_apply_verifier(receipt_payload: dict[str, Any]) -> dict[str, Any]: public_status = _int_or_none(receipt_payload.get("public_registry_v2_http_status")) internal_status = _int_or_none( @@ -977,6 +1053,7 @@ def _active_blockers( ssh_local: dict[str, Any], watchdog_check: dict[str, Any], watchdog_repair: dict[str, Any], + controlled_cd_lane: dict[str, Any], verifier: dict[str, Any], gitea_queue: dict[str, Any], deploy_marker: dict[str, Any], @@ -1010,6 +1087,7 @@ def _active_blockers( blockers.append("harbor_watchdog_repair_forbidden_action_seen") if watchdog_repair["receipt_seen"] and not watchdog_repair["harbor_ready"]: blockers.append("harbor_watchdog_repair_did_not_restore_local_v2") + blockers.extend(_strings(controlled_cd_lane.get("blockers"))) if not verifier["public_registry_v2_ready"]: blockers.append("public_registry_v2_verifier_not_green") if not verifier["internal_registry_v2_ready"]: @@ -1025,6 +1103,7 @@ def _status( ssh_local: dict[str, Any], watchdog_check: dict[str, Any], watchdog_repair: dict[str, Any], + controlled_cd_lane: dict[str, Any], verifier: dict[str, Any], gitea_queue: dict[str, Any], deploy_marker: dict[str, Any], @@ -1044,6 +1123,10 @@ def _status( and deploy_marker["blocker_count"] > 0 ): return "harbor_registry_recovery_receipt_verified_waiting_deploy_marker_readback" + if controlled_cd_lane["receipt_seen"] and not controlled_cd_lane[ + "controlled_cd_lane_ready" + ]: + return "controlled_cd_lane_readiness_receipt_blocked" if watchdog_repair["receipt_seen"]: return "harbor_registry_repair_receipt_waiting_registry_v2_verifier" if watchdog_check["receipt_seen"] and watchdog_check["harbor_ready"]: @@ -1064,6 +1147,8 @@ def _safe_next_step(*, status: str) -> str: return "clear_harbor_110_runner_queue_then_retry_cd_marker_readback" if status == "harbor_registry_recovery_receipt_verified_waiting_deploy_marker_readback": return "rerun_gitea_cd_then_verify_deploy_marker_and_priority_readback" + if status == "controlled_cd_lane_readiness_receipt_blocked": + return "fix_controlled_cd_lane_guardrail_blockers_then_rerun_readiness_verifier" if status == "harbor_registry_repair_receipt_waiting_registry_v2_verifier": return "rerun_public_and_internal_registry_v2_verifier_before_cd_retry" if status == "harbor_local_registry_ready_waiting_public_registry_v2_verifier": @@ -1087,6 +1172,8 @@ def _current_apply_blocker(*, status: str) -> str: return "gitea_queue_clearance_required_after_registry_receipt" if status == "harbor_registry_recovery_receipt_verified_waiting_deploy_marker_readback": return "deploy_marker_readback_required_after_registry_receipt" + if status == "controlled_cd_lane_readiness_receipt_blocked": + return "controlled_cd_lane_readiness_required_for_awoooi_host_queue" if status == "harbor_watchdog_check_unhealthy_waiting_repair_once_receipt": return "repair_once_receipt_required_after_unhealthy_check" if status == "ssh_local_repair_receipt_waiting_harbor_watchdog_check": @@ -1132,6 +1219,18 @@ def _parse_key_values(output: str) -> dict[str, str]: return fields +def _prefixed_blockers(output: str, *, prefix: str) -> list[str]: + blockers: list[str] = [] + for raw_line in output.splitlines(): + line = raw_line.strip() + if not line.startswith("BLOCKER "): + continue + blocker = line.split(" ", 1)[1].strip() + if blocker: + blockers.append(f"{prefix}{blocker}") + return _unique_strings(blockers) + + def _mode_from_marker_line(output: str) -> str: for raw_line in output.splitlines(): if "AWOOOI_110_SSH_PUBLICKEY_AUTH_LOCAL_REPAIR" not in raw_line: diff --git a/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py b/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py index eff49588..0ee93157 100644 --- a/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py +++ b/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py @@ -72,7 +72,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False): assert payload["rollups"]["current_blocker_local_recovery_package_count"] == 1 assert ( payload["rollups"]["current_blocker_harbor_recovery_receipt_input_count"] - == 8 + == 9 ) assert payload["rollups"]["runtime_dispatch_performed"] is False @@ -133,7 +133,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False): assert current_queue[0]["harbor_recovery_receipt_endpoint"] == ( "/api/v1/agents/harbor-registry-controlled-recovery-receipt" ) - assert current_queue[0]["harbor_recovery_receipt_input_count"] == 8 + assert current_queue[0]["harbor_recovery_receipt_input_count"] == 9 assert [ item["input_id"] for item in current_queue[0]["harbor_recovery_receipt_inputs"] @@ -143,6 +143,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False): "ssh_local_repair_output", "watchdog_check_output", "watchdog_repair_output", + "controlled_cd_lane_readiness_output", "public_registry_v2_http_status", "internal_registry_v2_http_status", "deploy_marker_readback", diff --git a/apps/api/tests/test_awoooi_priority_work_order_readback_api.py b/apps/api/tests/test_awoooi_priority_work_order_readback_api.py index 1141e034..ea1d3cfe 100644 --- a/apps/api/tests/test_awoooi_priority_work_order_readback_api.py +++ b/apps/api/tests/test_awoooi_priority_work_order_readback_api.py @@ -329,13 +329,14 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_endpoint"] == ( "/api/v1/agents/harbor-registry-controlled-recovery-receipt" ) - assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_input_count"] == 8 + assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_input_count"] == 9 assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_input_ids"] == [ "gitea_actions_queue_readback", "ssh_publickey_diagnosis_output", "ssh_local_repair_output", "watchdog_check_output", "watchdog_repair_output", + "controlled_cd_lane_readiness_output", "public_registry_v2_http_status", "internal_registry_v2_http_status", "deploy_marker_readback", @@ -384,7 +385,7 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu payload["summary"][ "ai_loop_current_blocker_harbor_recovery_receipt_input_count" ] - == 8 + == 9 ) assert payload["summary"][ "ai_loop_current_blocker_harbor_recovery_receipt_input_ids" diff --git a/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py b/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py index 1f8ffa91..7530dac6 100644 --- a/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py +++ b/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py @@ -225,6 +225,73 @@ def test_harbor_recovery_receipt_classifies_110_high_load() -> None: assert payload["rollups"]["ssh_publickey_node_high_load_seen"] is True +def test_harbor_recovery_receipt_surfaces_controlled_cd_lane_readiness() -> None: + payload = validate_harbor_registry_controlled_recovery_receipt( + { + "controlled_cd_lane_readiness_output": _controlled_cd_lane_blocked_output(), + } + ) + + assert payload["status"] == "controlled_cd_lane_readiness_receipt_blocked" + assert payload["safe_next_step"] == ( + "fix_controlled_cd_lane_guardrail_blockers_then_rerun_readiness_verifier" + ) + assert "controlled_cd_lane_readiness:controlled_cd_lane_service_not_active" in payload[ + "active_blockers" + ] + assert "controlled_cd_lane_readiness:controlled_cd_lane_registration_missing" in payload[ + "active_blockers" + ] + lane = payload["readback"]["controlled_cd_lane_readiness"] + assert lane["receipt_seen"] is True + assert lane["controlled_cd_lane_ready"] is False + assert lane["registration_ready"] is False + assert lane["service_ready"] is False + assert lane["blocker_count"] == 2 + assert lane["safe_next_step"] == ( + "restore_or_register_awoooi_cd_lane_drain_registration_without_printing_token_then_rerun_this_verifier" + ) + assert lane["raw_output_returned"] is False + assert "secret-token-like-content" not in str(lane) + phases = { + phase["phase_id"]: phase + for phase in payload["local_console_phase_readback"]["phases"] + } + assert phases["verify_controlled_cd_lane"]["status"] == ( + "blocked_controlled_cd_lane_readiness_receipt_not_ready" + ) + assert payload["rollups"]["controlled_cd_lane_readiness_receipt_seen"] is True + assert payload["rollups"]["controlled_cd_lane_ready"] is False + assert payload["rollups"]["controlled_cd_lane_blocker_count"] == 2 + assert payload["input_redaction"]["controlled_cd_lane_readiness_output"][ + "line_count" + ] > 0 + + +def test_harbor_recovery_receipt_blocks_controlled_cd_lane_blocker_count() -> None: + payload = validate_harbor_registry_controlled_recovery_receipt( + { + "controlled_cd_lane_readiness_output": """ +AWOOOI_110_CONTROLLED_CD_LANE_READY=1 +CONFIG_READY=1 +BINARY_READY=1 +REGISTRATION_READY=1 +SERVICE_READY=1 +LEGACY_FAILCLOSED=1 +PRIMARY_LANE_FAILCLOSED=1 +WARNING_COUNT=0 +BLOCKER_COUNT=1 +""", + } + ) + + assert payload["status"] == "controlled_cd_lane_readiness_receipt_blocked" + assert payload["readback"]["controlled_cd_lane_readiness"][ + "controlled_cd_lane_ready" + ] is False + assert payload["rollups"]["controlled_cd_lane_blocker_count"] == 1 + + def test_harbor_recovery_receipt_surfaces_gitea_queue_blockers() -> None: payload = validate_harbor_registry_controlled_recovery_receipt( { @@ -587,6 +654,33 @@ host_reboot_performed=false """ +def _controlled_cd_lane_blocked_output() -> str: + return """ +== audit metadata == +read_only=true +secret_values_collected=false +runner_token_read=false +raw_runner_registration_read=false +== controlled lane registration metadata == +CD_LANE_REGISTRATION path=/home/wooo/awoooi-cd-lane-drain/data/.runner present=0 content_read=false +BLOCKER controlled_cd_lane_registration_missing +== controlled lane service == +CD_LANE_SERVICE_GUARDRAILS unit=awoooi-cd-lane-drain.service active=inactive main_pid=0 limits=1 target_match=1 registration_condition=1 +BLOCKER controlled_cd_lane_service_not_active +== verdict == +CONFIG_READY=1 +BINARY_READY=1 +REGISTRATION_READY=0 +SERVICE_READY=0 +LEGACY_FAILCLOSED=1 +PRIMARY_LANE_FAILCLOSED=1 +WARNING_COUNT=0 +BLOCKER_COUNT=2 +AWOOOI_110_CONTROLLED_CD_LANE_READY=0 +safe_next_step=restore_or_register_awoooi_cd_lane_drain_registration_without_printing_token_then_rerun_this_verifier +""" + + def _gitea_queue_no_matching_runner() -> dict: return { "schema_version": "awoooi_public_gitea_actions_queue_readback_v1", diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 4d6ccedd..e97597b2 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -51033,3 +51033,24 @@ production browser smoke: **下一步**: - 188 DB CPU 已降;110 仍高,原因仍是 `gitea` / queue / `awoooi-host` control path:110 `load5=27.22`、`gitea=3.4019` cores、Harbor repair `#4176 Waiting`、no matching `awoooi-host`。主線下一步繼續 110 Gitea queue / controlled lane recovery,不恢復 generic runner、不重啟主機。 + +## 2026-07-01 — 08:05 P0 110 controlled CD lane readiness receipt + +**完成內容**: +- `harbor-registry-controlled-recovery-receipt` 新增 `controlled_cd_lane_readiness_output` metadata-only input,解析 `check-awoooi-110-controlled-cd-lane-readiness.sh` 的 non-secret verifier output。 +- receipt readback 新增 `controlled_cd_lane_readiness`、rollups、active blocker 與 `verify_controlled_cd_lane` phase 狀態;當 lane registration / service / root restore-source / pressure / label-config drift 未 ready 時,狀態轉為 `controlled_cd_lane_readiness_receipt_blocked`。 +- `agent-log-controlled-writeback-executor-readback` 的 Harbor receipt input contract 從 8 個增為 9 個,讓 AI Loop / priority work-order 明確要求 110 controlled lane readiness receipt 才能重試 `awoooi-host` repair queue。 +- readiness 判斷同時要求 marker、ready flag、`BLOCKER_COUNT=0` 與無 `BLOCKER` 行,避免 verifier 格式不完整時誤判 lane ready。 + +**本地驗證結果**: +- Focused suite:`73 passed`。 +- `ruff check`、`py_compile`、`ops/runner/guard-gitea-runner-pressure.py --root .`、`scripts/ci/check-gitea-step-env-secrets.js`、`git diff --check`:通過。 +- runner pressure guard 讀回 `auto_branch_events_on_110=0`、`generic_runner_labels=0`,未恢復 legacy / generic runner。 + +**仍維持**: +- 沒有讀 secret / token / `.env` / raw sessions / SQLite / auth;沒有讀 `.runner` 內容。 +- 沒有使用 GitHub / gh / GitHub API / GitHub Actions。 +- 沒有重啟主機,沒有 Docker / Nginx / K3s / DB restart,沒有 workflow_dispatch,沒有 runtime write。 + +**下一步**: +- commit / push 後讀回 Gitea queue、registry `/v2/`、Harbor health 與 priority work-order API;若仍卡 `awoooi-host` no matching,下一步維持 110 local console / controlled lane restore-registration verifier,不恢復 generic runner。