feat(agent): accept controlled cd lane readiness receipt
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 42s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled

This commit is contained in:
Your Name
2026-07-01 08:05:49 +08:00
parent 1c6bc6ce08
commit ecce119bbb
6 changed files with 234 additions and 8 deletions

View File

@@ -430,6 +430,16 @@ def _harbor_recovery_receipt_inputs() -> list[dict[str, Any]]:
"purpose": "prove bounded repair-once result without daemon restart",
**metadata_boundary,
},
{
"input_id": "controlled_cd_lane_readiness_output",
"source": "check-awoooi-110-controlled-cd-lane-readiness.sh",
"required_when": "before_retrying_harbor_110_local_repair_queue",
"purpose": (
"classify awoooi-host lane guardrail blockers such as service, "
"registration, root restore-source, pressure, or label/config drift"
),
**metadata_boundary,
},
{
"input_id": "public_registry_v2_http_status",
"source": "public registry /v2/ verifier",

View File

@@ -27,11 +27,17 @@ def validate_harbor_registry_controlled_recovery_receipt(
ssh_local_output = _text(receipt_payload.get("ssh_local_repair_output"))
watchdog_check_output = _text(receipt_payload.get("watchdog_check_output"))
watchdog_repair_output = _text(receipt_payload.get("watchdog_repair_output"))
controlled_cd_lane_output = _text(
receipt_payload.get("controlled_cd_lane_readiness_output")
)
ssh_diagnosis = _parse_ssh_publickey_diagnosis_output(ssh_diagnosis_output)
ssh_local = _parse_ssh_local_repair_output(ssh_local_output)
watchdog_check = _parse_watchdog_output(watchdog_check_output)
watchdog_repair = _parse_watchdog_output(watchdog_repair_output)
controlled_cd_lane = _parse_controlled_cd_lane_readiness_output(
controlled_cd_lane_output
)
verifier = _post_apply_verifier(receipt_payload)
gitea_queue = _gitea_queue_readback(
receipt_payload.get("gitea_actions_queue_readback")
@@ -45,6 +51,7 @@ def validate_harbor_registry_controlled_recovery_receipt(
ssh_local=ssh_local,
watchdog_check=watchdog_check,
watchdog_repair=watchdog_repair,
controlled_cd_lane=controlled_cd_lane,
verifier=verifier,
gitea_queue=gitea_queue,
deploy_marker=deploy_marker,
@@ -54,6 +61,7 @@ def validate_harbor_registry_controlled_recovery_receipt(
ssh_local=ssh_local,
watchdog_check=watchdog_check,
watchdog_repair=watchdog_repair,
controlled_cd_lane=controlled_cd_lane,
verifier=verifier,
gitea_queue=gitea_queue,
deploy_marker=deploy_marker,
@@ -64,6 +72,7 @@ def validate_harbor_registry_controlled_recovery_receipt(
ssh_local=ssh_local,
watchdog_check=watchdog_check,
watchdog_repair=watchdog_repair,
controlled_cd_lane=controlled_cd_lane,
verifier=verifier,
)
@@ -84,6 +93,9 @@ def validate_harbor_registry_controlled_recovery_receipt(
"ssh_local_repair_output": _text_stats(ssh_local_output),
"watchdog_check_output": _text_stats(watchdog_check_output),
"watchdog_repair_output": _text_stats(watchdog_repair_output),
"controlled_cd_lane_readiness_output": _text_stats(
controlled_cd_lane_output
),
"gitea_actions_queue_readback": {
"provided": gitea_queue["receipt_seen"],
"metadata_only": True,
@@ -101,6 +113,7 @@ def validate_harbor_registry_controlled_recovery_receipt(
"ssh_local_repair": ssh_local,
"watchdog_check": watchdog_check,
"watchdog_repair": watchdog_repair,
"controlled_cd_lane_readiness": controlled_cd_lane,
"post_apply_verifier": verifier,
"gitea_actions_queue": gitea_queue,
"deploy_marker": deploy_marker,
@@ -117,6 +130,7 @@ def validate_harbor_registry_controlled_recovery_receipt(
"validate_non_secret_ssh_local_repair_receipt",
"validate_harbor_watchdog_check_receipt",
"validate_harbor_watchdog_repair_once_receipt",
"validate_110_controlled_cd_lane_readiness_receipt",
"validate_public_and_internal_registry_v2_verifier",
"km_rag_mcp_playbook_metadata_writeback",
"retry_gitea_cd_after_registry_v2_green",
@@ -170,6 +184,18 @@ def validate_harbor_registry_controlled_recovery_receipt(
"watchdog_check_harbor_ready": watchdog_check["harbor_ready"],
"watchdog_repair_receipt_seen": watchdog_repair["receipt_seen"],
"watchdog_repair_harbor_ready": watchdog_repair["harbor_ready"],
"controlled_cd_lane_readiness_receipt_seen": controlled_cd_lane[
"receipt_seen"
],
"controlled_cd_lane_ready": controlled_cd_lane[
"controlled_cd_lane_ready"
],
"controlled_cd_lane_blocker_count": controlled_cd_lane[
"blocker_count"
],
"controlled_cd_lane_safe_next_step": controlled_cd_lane[
"safe_next_step"
],
"post_apply_verifier_ready": verifier["registry_v2_ready"],
"gitea_queue_readback_seen": gitea_queue["receipt_seen"],
"gitea_queue_blocker_count": gitea_queue["blocker_count"],
@@ -260,6 +286,7 @@ def _local_console_phase_readback(
ssh_local: dict[str, Any],
watchdog_check: dict[str, Any],
watchdog_repair: dict[str, Any],
controlled_cd_lane: dict[str, Any],
verifier: dict[str, Any],
) -> dict[str, Any]:
phases = [
@@ -298,11 +325,11 @@ def _local_console_phase_readback(
),
_phase(
"verify_controlled_cd_lane",
_phase_status(
ready=verifier["registry_v2_ready"],
blocked_status="blocked_waiting_registry_v2_verifier_green",
_controlled_cd_lane_phase_status(
controlled_cd_lane=controlled_cd_lane,
verifier=verifier,
),
"post_apply_verifier",
"controlled_cd_lane_readiness",
),
]
completed_statuses = {"ready", "skipped_not_required"}
@@ -381,6 +408,21 @@ def _harbor_repair_once_phase_status(
return "blocked_waiting_harbor_watchdog_check_receipt"
def _controlled_cd_lane_phase_status(
*,
controlled_cd_lane: dict[str, Any],
verifier: dict[str, Any],
) -> str:
if controlled_cd_lane["receipt_seen"]:
if controlled_cd_lane["controlled_cd_lane_ready"]:
return "ready"
return "blocked_controlled_cd_lane_readiness_receipt_not_ready"
return _phase_status(
ready=verifier["registry_v2_ready"],
blocked_status="blocked_waiting_controlled_cd_lane_readiness_receipt",
)
def _parse_ssh_publickey_diagnosis_output(output: str) -> dict[str, Any]:
fields = _parse_key_values(output)
marker_seen = "AWOOOI_110_SSH_PUBLICKEY_AUTH_DIAGNOSIS" in output
@@ -548,6 +590,40 @@ def _parse_watchdog_output(output: str) -> dict[str, Any]:
}
def _parse_controlled_cd_lane_readiness_output(output: str) -> dict[str, Any]:
fields = _parse_key_values(output)
marker_seen = "AWOOOI_110_CONTROLLED_CD_LANE_READY=" in output
blockers = _prefixed_blockers(
output,
prefix="controlled_cd_lane_readiness:",
)
warning_count = _int_or_none(fields.get("WARNING_COUNT")) or 0
blocker_count = _int_or_none(fields.get("BLOCKER_COUNT"))
if blocker_count is None:
blocker_count = len(blockers)
ready = _bool_from_field(fields.get("AWOOOI_110_CONTROLLED_CD_LANE_READY"))
return {
"receipt_seen": marker_seen,
"controlled_cd_lane_ready": bool(
marker_seen and ready and blocker_count == 0 and not blockers
),
"config_ready": _bool_from_field(fields.get("CONFIG_READY")),
"binary_ready": _bool_from_field(fields.get("BINARY_READY")),
"registration_ready": _bool_from_field(fields.get("REGISTRATION_READY")),
"service_ready": _bool_from_field(fields.get("SERVICE_READY")),
"legacy_failclosed": _bool_from_field(fields.get("LEGACY_FAILCLOSED")),
"primary_lane_failclosed": _bool_from_field(
fields.get("PRIMARY_LANE_FAILCLOSED")
),
"warning_count": warning_count,
"blocker_count": blocker_count,
"blockers": blockers,
"safe_next_step": str(fields.get("safe_next_step") or ""),
"metadata_only": True,
"raw_output_returned": False,
}
def _post_apply_verifier(receipt_payload: dict[str, Any]) -> dict[str, Any]:
public_status = _int_or_none(receipt_payload.get("public_registry_v2_http_status"))
internal_status = _int_or_none(
@@ -977,6 +1053,7 @@ def _active_blockers(
ssh_local: dict[str, Any],
watchdog_check: dict[str, Any],
watchdog_repair: dict[str, Any],
controlled_cd_lane: dict[str, Any],
verifier: dict[str, Any],
gitea_queue: dict[str, Any],
deploy_marker: dict[str, Any],
@@ -1010,6 +1087,7 @@ def _active_blockers(
blockers.append("harbor_watchdog_repair_forbidden_action_seen")
if watchdog_repair["receipt_seen"] and not watchdog_repair["harbor_ready"]:
blockers.append("harbor_watchdog_repair_did_not_restore_local_v2")
blockers.extend(_strings(controlled_cd_lane.get("blockers")))
if not verifier["public_registry_v2_ready"]:
blockers.append("public_registry_v2_verifier_not_green")
if not verifier["internal_registry_v2_ready"]:
@@ -1025,6 +1103,7 @@ def _status(
ssh_local: dict[str, Any],
watchdog_check: dict[str, Any],
watchdog_repair: dict[str, Any],
controlled_cd_lane: dict[str, Any],
verifier: dict[str, Any],
gitea_queue: dict[str, Any],
deploy_marker: dict[str, Any],
@@ -1044,6 +1123,10 @@ def _status(
and deploy_marker["blocker_count"] > 0
):
return "harbor_registry_recovery_receipt_verified_waiting_deploy_marker_readback"
if controlled_cd_lane["receipt_seen"] and not controlled_cd_lane[
"controlled_cd_lane_ready"
]:
return "controlled_cd_lane_readiness_receipt_blocked"
if watchdog_repair["receipt_seen"]:
return "harbor_registry_repair_receipt_waiting_registry_v2_verifier"
if watchdog_check["receipt_seen"] and watchdog_check["harbor_ready"]:
@@ -1064,6 +1147,8 @@ def _safe_next_step(*, status: str) -> str:
return "clear_harbor_110_runner_queue_then_retry_cd_marker_readback"
if status == "harbor_registry_recovery_receipt_verified_waiting_deploy_marker_readback":
return "rerun_gitea_cd_then_verify_deploy_marker_and_priority_readback"
if status == "controlled_cd_lane_readiness_receipt_blocked":
return "fix_controlled_cd_lane_guardrail_blockers_then_rerun_readiness_verifier"
if status == "harbor_registry_repair_receipt_waiting_registry_v2_verifier":
return "rerun_public_and_internal_registry_v2_verifier_before_cd_retry"
if status == "harbor_local_registry_ready_waiting_public_registry_v2_verifier":
@@ -1087,6 +1172,8 @@ def _current_apply_blocker(*, status: str) -> str:
return "gitea_queue_clearance_required_after_registry_receipt"
if status == "harbor_registry_recovery_receipt_verified_waiting_deploy_marker_readback":
return "deploy_marker_readback_required_after_registry_receipt"
if status == "controlled_cd_lane_readiness_receipt_blocked":
return "controlled_cd_lane_readiness_required_for_awoooi_host_queue"
if status == "harbor_watchdog_check_unhealthy_waiting_repair_once_receipt":
return "repair_once_receipt_required_after_unhealthy_check"
if status == "ssh_local_repair_receipt_waiting_harbor_watchdog_check":
@@ -1132,6 +1219,18 @@ def _parse_key_values(output: str) -> dict[str, str]:
return fields
def _prefixed_blockers(output: str, *, prefix: str) -> list[str]:
blockers: list[str] = []
for raw_line in output.splitlines():
line = raw_line.strip()
if not line.startswith("BLOCKER "):
continue
blocker = line.split(" ", 1)[1].strip()
if blocker:
blockers.append(f"{prefix}{blocker}")
return _unique_strings(blockers)
def _mode_from_marker_line(output: str) -> str:
for raw_line in output.splitlines():
if "AWOOOI_110_SSH_PUBLICKEY_AUTH_LOCAL_REPAIR" not in raw_line:

View File

@@ -72,7 +72,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False):
assert payload["rollups"]["current_blocker_local_recovery_package_count"] == 1
assert (
payload["rollups"]["current_blocker_harbor_recovery_receipt_input_count"]
== 8
== 9
)
assert payload["rollups"]["runtime_dispatch_performed"] is False
@@ -133,7 +133,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False):
assert current_queue[0]["harbor_recovery_receipt_endpoint"] == (
"/api/v1/agents/harbor-registry-controlled-recovery-receipt"
)
assert current_queue[0]["harbor_recovery_receipt_input_count"] == 8
assert current_queue[0]["harbor_recovery_receipt_input_count"] == 9
assert [
item["input_id"]
for item in current_queue[0]["harbor_recovery_receipt_inputs"]
@@ -143,6 +143,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False):
"ssh_local_repair_output",
"watchdog_check_output",
"watchdog_repair_output",
"controlled_cd_lane_readiness_output",
"public_registry_v2_http_status",
"internal_registry_v2_http_status",
"deploy_marker_readback",

View File

@@ -329,13 +329,14 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu
assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_endpoint"] == (
"/api/v1/agents/harbor-registry-controlled-recovery-receipt"
)
assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_input_count"] == 8
assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_input_count"] == 9
assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_input_ids"] == [
"gitea_actions_queue_readback",
"ssh_publickey_diagnosis_output",
"ssh_local_repair_output",
"watchdog_check_output",
"watchdog_repair_output",
"controlled_cd_lane_readiness_output",
"public_registry_v2_http_status",
"internal_registry_v2_http_status",
"deploy_marker_readback",
@@ -384,7 +385,7 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu
payload["summary"][
"ai_loop_current_blocker_harbor_recovery_receipt_input_count"
]
== 8
== 9
)
assert payload["summary"][
"ai_loop_current_blocker_harbor_recovery_receipt_input_ids"

View File

@@ -225,6 +225,73 @@ def test_harbor_recovery_receipt_classifies_110_high_load() -> None:
assert payload["rollups"]["ssh_publickey_node_high_load_seen"] is True
def test_harbor_recovery_receipt_surfaces_controlled_cd_lane_readiness() -> None:
payload = validate_harbor_registry_controlled_recovery_receipt(
{
"controlled_cd_lane_readiness_output": _controlled_cd_lane_blocked_output(),
}
)
assert payload["status"] == "controlled_cd_lane_readiness_receipt_blocked"
assert payload["safe_next_step"] == (
"fix_controlled_cd_lane_guardrail_blockers_then_rerun_readiness_verifier"
)
assert "controlled_cd_lane_readiness:controlled_cd_lane_service_not_active" in payload[
"active_blockers"
]
assert "controlled_cd_lane_readiness:controlled_cd_lane_registration_missing" in payload[
"active_blockers"
]
lane = payload["readback"]["controlled_cd_lane_readiness"]
assert lane["receipt_seen"] is True
assert lane["controlled_cd_lane_ready"] is False
assert lane["registration_ready"] is False
assert lane["service_ready"] is False
assert lane["blocker_count"] == 2
assert lane["safe_next_step"] == (
"restore_or_register_awoooi_cd_lane_drain_registration_without_printing_token_then_rerun_this_verifier"
)
assert lane["raw_output_returned"] is False
assert "secret-token-like-content" not in str(lane)
phases = {
phase["phase_id"]: phase
for phase in payload["local_console_phase_readback"]["phases"]
}
assert phases["verify_controlled_cd_lane"]["status"] == (
"blocked_controlled_cd_lane_readiness_receipt_not_ready"
)
assert payload["rollups"]["controlled_cd_lane_readiness_receipt_seen"] is True
assert payload["rollups"]["controlled_cd_lane_ready"] is False
assert payload["rollups"]["controlled_cd_lane_blocker_count"] == 2
assert payload["input_redaction"]["controlled_cd_lane_readiness_output"][
"line_count"
] > 0
def test_harbor_recovery_receipt_blocks_controlled_cd_lane_blocker_count() -> None:
payload = validate_harbor_registry_controlled_recovery_receipt(
{
"controlled_cd_lane_readiness_output": """
AWOOOI_110_CONTROLLED_CD_LANE_READY=1
CONFIG_READY=1
BINARY_READY=1
REGISTRATION_READY=1
SERVICE_READY=1
LEGACY_FAILCLOSED=1
PRIMARY_LANE_FAILCLOSED=1
WARNING_COUNT=0
BLOCKER_COUNT=1
""",
}
)
assert payload["status"] == "controlled_cd_lane_readiness_receipt_blocked"
assert payload["readback"]["controlled_cd_lane_readiness"][
"controlled_cd_lane_ready"
] is False
assert payload["rollups"]["controlled_cd_lane_blocker_count"] == 1
def test_harbor_recovery_receipt_surfaces_gitea_queue_blockers() -> None:
payload = validate_harbor_registry_controlled_recovery_receipt(
{
@@ -587,6 +654,33 @@ host_reboot_performed=false
"""
def _controlled_cd_lane_blocked_output() -> str:
return """
== audit metadata ==
read_only=true
secret_values_collected=false
runner_token_read=false
raw_runner_registration_read=false
== controlled lane registration metadata ==
CD_LANE_REGISTRATION path=/home/wooo/awoooi-cd-lane-drain/data/.runner present=0 content_read=false
BLOCKER controlled_cd_lane_registration_missing
== controlled lane service ==
CD_LANE_SERVICE_GUARDRAILS unit=awoooi-cd-lane-drain.service active=inactive main_pid=0 limits=1 target_match=1 registration_condition=1
BLOCKER controlled_cd_lane_service_not_active
== verdict ==
CONFIG_READY=1
BINARY_READY=1
REGISTRATION_READY=0
SERVICE_READY=0
LEGACY_FAILCLOSED=1
PRIMARY_LANE_FAILCLOSED=1
WARNING_COUNT=0
BLOCKER_COUNT=2
AWOOOI_110_CONTROLLED_CD_LANE_READY=0
safe_next_step=restore_or_register_awoooi_cd_lane_drain_registration_without_printing_token_then_rerun_this_verifier
"""
def _gitea_queue_no_matching_runner() -> dict:
return {
"schema_version": "awoooi_public_gitea_actions_queue_readback_v1",

View File

@@ -51033,3 +51033,24 @@ production browser smoke:
**下一步**
- 188 DB CPU 已降110 仍高,原因仍是 `gitea` / queue / `awoooi-host` control path110 `load5=27.22``gitea=3.4019` cores、Harbor repair `#4176 Waiting`、no matching `awoooi-host`。主線下一步繼續 110 Gitea queue / controlled lane recovery不恢復 generic runner、不重啟主機。
## 2026-07-01 — 08:05 P0 110 controlled CD lane readiness receipt
**完成內容**
- `harbor-registry-controlled-recovery-receipt` 新增 `controlled_cd_lane_readiness_output` metadata-only input解析 `check-awoooi-110-controlled-cd-lane-readiness.sh` 的 non-secret verifier output。
- receipt readback 新增 `controlled_cd_lane_readiness`、rollups、active blocker 與 `verify_controlled_cd_lane` phase 狀態;當 lane registration / service / root restore-source / pressure / label-config drift 未 ready 時,狀態轉為 `controlled_cd_lane_readiness_receipt_blocked`
- `agent-log-controlled-writeback-executor-readback` 的 Harbor receipt input contract 從 8 個增為 9 個,讓 AI Loop / priority work-order 明確要求 110 controlled lane readiness receipt 才能重試 `awoooi-host` repair queue。
- readiness 判斷同時要求 marker、ready flag、`BLOCKER_COUNT=0` 與無 `BLOCKER` 行,避免 verifier 格式不完整時誤判 lane ready。
**本地驗證結果**
- Focused suite`73 passed`
- `ruff check``py_compile``ops/runner/guard-gitea-runner-pressure.py --root .``scripts/ci/check-gitea-step-env-secrets.js``git diff --check`:通過。
- runner pressure guard 讀回 `auto_branch_events_on_110=0``generic_runner_labels=0`,未恢復 legacy / generic runner。
**仍維持**
- 沒有讀 secret / token / `.env` / raw sessions / SQLite / auth沒有讀 `.runner` 內容。
- 沒有使用 GitHub / gh / GitHub API / GitHub Actions。
- 沒有重啟主機,沒有 Docker / Nginx / K3s / DB restart沒有 workflow_dispatch沒有 runtime write。
**下一步**
- commit / push 後讀回 Gitea queue、registry `/v2/`、Harbor health 與 priority work-order API若仍卡 `awoooi-host` no matching下一步維持 110 local console / controlled lane restore-registration verifier不恢復 generic runner。