fix(recovery): classify harbor repair cross-workflow jobs
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 35s
CD Pipeline / build-and-deploy (push) Failing after 2m38s
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 0s
CD Pipeline / post-deploy-checks (push) Has been skipped
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Has been cancelled

This commit is contained in:
Your Name
2026-07-01 07:37:59 +08:00
parent 57df8602f5
commit 77f9bb0417
4 changed files with 205 additions and 0 deletions

View File

@@ -170,6 +170,12 @@ def validate_harbor_registry_controlled_recovery_receipt(
"gitea_queue_harbor_110_jobs_stale_or_mismatched": gitea_queue[
"harbor_110_repair_jobs_stale_or_mismatched"
],
"gitea_queue_harbor_110_jobs_cross_workflow_mismatch": gitea_queue[
"harbor_110_repair_jobs_cross_workflow_mismatch"
],
"gitea_queue_harbor_110_jobs_payload_classifier": gitea_queue[
"harbor_110_repair_jobs_payload_classifier"
],
"gitea_queue_latest_cd_run_status": gitea_queue[
"latest_visible_cd_run_status"
],
@@ -535,6 +541,9 @@ def _gitea_queue_readback(value: Any) -> dict[str, Any]:
"harbor_110_repair_waiting": False,
"harbor_110_repair_blocked": False,
"harbor_110_repair_jobs_stale_or_mismatched": False,
"harbor_110_repair_jobs_cross_workflow_mismatch": False,
"harbor_110_repair_jobs_payload_classifier": "",
"harbor_110_repair_jobs_expected_names": [],
"harbor_110_repair_jobs_unexpected_names": [],
"harbor_110_repair_jobs_labels": [],
"harbor_110_repair_jobs_runner_names": [],
@@ -558,6 +567,10 @@ def _gitea_queue_readback(value: Any) -> dict[str, Any]:
rollups.get("harbor_110_repair_jobs_stale_or_mismatched") is True
or readback.get("harbor_110_repair_jobs_stale_or_mismatched") is True
)
jobs_cross_workflow = bool(
rollups.get("harbor_110_repair_jobs_cross_workflow_mismatch") is True
or readback.get("harbor_110_repair_jobs_cross_workflow_mismatch") is True
)
waiting = bool(
rollups.get("harbor_110_repair_waiting") is True
or readback.get("latest_visible_harbor_110_repair_waiting") is True
@@ -598,6 +611,7 @@ def _gitea_queue_readback(value: Any) -> dict[str, Any]:
blockers = _gitea_queue_blockers(
no_matching_runner=bool(no_matching_label),
jobs_stale=jobs_stale,
jobs_cross_workflow=jobs_cross_workflow,
current_cd_harbor_retrying=current_cd_harbor_retrying,
blocked=blocked,
boundary_violation=boundary_violation,
@@ -629,6 +643,15 @@ def _gitea_queue_readback(value: Any) -> dict[str, Any]:
"harbor_110_repair_waiting": waiting,
"harbor_110_repair_blocked": blocked,
"harbor_110_repair_jobs_stale_or_mismatched": jobs_stale,
"harbor_110_repair_jobs_cross_workflow_mismatch": jobs_cross_workflow,
"harbor_110_repair_jobs_payload_classifier": str(
rollups.get("harbor_110_repair_jobs_payload_classifier")
or readback.get("harbor_110_repair_jobs_payload_classifier")
or ""
),
"harbor_110_repair_jobs_expected_names": _strings(
readback.get("harbor_110_repair_jobs_expected_names")
),
"harbor_110_repair_jobs_unexpected_names": _strings(
readback.get("harbor_110_repair_jobs_unexpected_names")
),
@@ -651,6 +674,7 @@ def _gitea_queue_blockers(
*,
no_matching_runner: bool,
jobs_stale: bool,
jobs_cross_workflow: bool,
current_cd_harbor_retrying: bool,
blocked: bool,
boundary_violation: bool,
@@ -662,6 +686,8 @@ def _gitea_queue_blockers(
blockers.append("gitea_queue_harbor_110_repair_no_matching_runner")
elif blocked:
blockers.append("gitea_queue_harbor_110_repair_blocked")
if jobs_cross_workflow:
blockers.append("gitea_queue_harbor_110_repair_jobs_cross_workflow_mismatch")
if jobs_stale:
blockers.append("gitea_queue_harbor_110_repair_jobs_stale_or_mismatched")
if boundary_violation:

View File

@@ -225,6 +225,42 @@ def test_harbor_recovery_receipt_surfaces_gitea_queue_blockers() -> None:
}
def test_harbor_recovery_receipt_surfaces_cross_workflow_queue_payload() -> None:
payload = validate_harbor_registry_controlled_recovery_receipt(
{
"watchdog_check_output": _watchdog_check_output(
ready=True,
status=401,
),
"public_registry_v2_http_status": 401,
"internal_registry_v2_http_status": 401,
"gitea_actions_queue_readback": _gitea_queue_cross_workflow_jobs(),
}
)
assert "gitea_queue_harbor_110_repair_jobs_cross_workflow_mismatch" in payload[
"active_blockers"
]
assert "gitea_queue_harbor_110_repair_jobs_stale_or_mismatched" in payload[
"active_blockers"
]
queue = payload["readback"]["gitea_actions_queue"]
assert queue["harbor_110_repair_jobs_cross_workflow_mismatch"] is True
assert queue["harbor_110_repair_jobs_payload_classifier"] == (
"cd_workflow_jobs_returned_for_harbor_110_repair_run"
)
assert queue["harbor_110_repair_jobs_expected_names"] == [
"harbor-110-local-repair",
"workflow-shape",
]
assert payload["rollups"][
"gitea_queue_harbor_110_jobs_cross_workflow_mismatch"
] is True
assert payload["rollups"]["gitea_queue_harbor_110_jobs_payload_classifier"] == (
"cd_workflow_jobs_returned_for_harbor_110_repair_run"
)
def test_harbor_recovery_receipt_waits_for_deploy_marker_readback() -> None:
payload = validate_harbor_registry_controlled_recovery_receipt(
{
@@ -464,6 +500,37 @@ def _gitea_queue_no_matching_runner() -> dict:
}
def _gitea_queue_cross_workflow_jobs() -> dict:
payload = _gitea_queue_no_matching_runner()
payload["readback"].update(
{
"harbor_110_repair_jobs_cross_workflow_mismatch": True,
"harbor_110_repair_jobs_payload_classifier": (
"cd_workflow_jobs_returned_for_harbor_110_repair_run"
),
"harbor_110_repair_jobs_expected_names": [
"harbor-110-local-repair",
"workflow-shape",
],
"harbor_110_repair_jobs_unexpected_names": [
"build-and-deploy",
"post-deploy-checks",
"tests",
],
"harbor_110_repair_jobs_labels": ["awoooi-host"],
}
)
payload["rollups"].update(
{
"harbor_110_repair_jobs_cross_workflow_mismatch": True,
"harbor_110_repair_jobs_payload_classifier": (
"cd_workflow_jobs_returned_for_harbor_110_repair_run"
),
}
)
return payload
def _deploy_marker_verified() -> dict:
return {
"schema_version": "awoooi_production_deploy_readback_blocker_v1",

View File

@@ -33,6 +33,11 @@ EXPECTED_HARBOR_110_REPAIR_JOB_NAMES = {
"workflow-shape",
"harbor-110-local-repair",
}
CD_WORKFLOW_JOB_NAMES = {
"build-and-deploy",
"post-deploy-checks",
"tests",
}
_RUN_ROW_RE = re.compile(
r'<span data-tooltip-content="([^"]+)">.*?'
@@ -352,6 +357,17 @@ def build_readback(
harbor_110_repair_jobs_unexpected_names = sorted(
harbor_job_names - EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
)
harbor_110_repair_jobs_cross_workflow_mismatch = (
bool(harbor_job_names)
and harbor_job_names.issubset(CD_WORKFLOW_JOB_NAMES)
)
harbor_110_repair_jobs_payload_classifier = (
"cd_workflow_jobs_returned_for_harbor_110_repair_run"
if harbor_110_repair_jobs_cross_workflow_mismatch
else "unexpected_harbor_110_repair_job_names"
if harbor_110_repair_jobs_unexpected_names
else ""
)
harbor_110_repair_jobs_match_expected_workflow = (
bool(harbor_job_names)
and not harbor_110_repair_jobs_unexpected_names
@@ -477,9 +493,18 @@ def build_readback(
"harbor_110_repair_jobs_conclusion_counts": harbor_job_conclusion_counts,
"harbor_110_repair_jobs_run_ids": sorted(harbor_job_run_ids),
"harbor_110_repair_jobs_names": sorted(harbor_job_names),
"harbor_110_repair_jobs_expected_names": sorted(
EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
),
"harbor_110_repair_jobs_unexpected_names": (
harbor_110_repair_jobs_unexpected_names
),
"harbor_110_repair_jobs_cross_workflow_mismatch": (
harbor_110_repair_jobs_cross_workflow_mismatch
),
"harbor_110_repair_jobs_payload_classifier": (
harbor_110_repair_jobs_payload_classifier
),
"harbor_110_repair_jobs_labels": sorted(harbor_job_labels),
"harbor_110_repair_jobs_runner_names": sorted(harbor_job_runner_names),
"harbor_110_repair_jobs_run_id_matches_visible": (
@@ -625,9 +650,18 @@ def build_readback(
),
"harbor_110_repair_jobs_total_count": harbor_jobs_total_count,
"harbor_110_repair_jobs_names": sorted(harbor_job_names),
"harbor_110_repair_jobs_expected_names": sorted(
EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
),
"harbor_110_repair_jobs_stale_or_mismatched": (
harbor_110_repair_jobs_stale_or_mismatched
),
"harbor_110_repair_jobs_cross_workflow_mismatch": (
harbor_110_repair_jobs_cross_workflow_mismatch
),
"harbor_110_repair_jobs_payload_classifier": (
harbor_110_repair_jobs_payload_classifier
),
"harbor_110_repair_jobs_unexpected_names": (
harbor_110_repair_jobs_unexpected_names
),

View File

@@ -263,6 +263,44 @@ def _harbor_110_repair_stale_code_review_jobs() -> dict:
}
def _harbor_110_repair_cross_workflow_jobs() -> dict:
return {
"total_count": 3,
"jobs": [
{
"id": 5901,
"name": "build-and-deploy",
"status": "completed",
"conclusion": "success",
"labels": ["awoooi-host"],
"runner_name": "wooo-runner",
"run_id": 4060,
"head_sha": "f9ad460ff6f3d258bf86da2f30a2d40451234567",
},
{
"id": 5902,
"name": "tests",
"status": "completed",
"conclusion": "success",
"labels": ["awoooi-host"],
"runner_name": "wooo-runner",
"run_id": 4060,
"head_sha": "f9ad460ff6f3d258bf86da2f30a2d40451234567",
},
{
"id": 5903,
"name": "post-deploy-checks",
"status": "completed",
"conclusion": "success",
"labels": ["awoooi-host"],
"runner_name": "wooo-runner",
"run_id": 4060,
"head_sha": "f9ad460ff6f3d258bf86da2f30a2d40451234567",
},
],
}
def _host_pressure_waiting_log() -> str:
return """
2026-06-30T11:48:41.7864172Z ⏳ host web/build/smoke pressure detected (attempt 1/60); waiting 10s
@@ -435,10 +473,50 @@ def test_build_readback_rejects_stale_harbor_110_repair_jobs_payload() -> None:
assert payload["readback"]["harbor_110_repair_jobs_unexpected_names"] == [
"ai-code-review"
]
assert (
payload["readback"]["harbor_110_repair_jobs_payload_classifier"]
== "unexpected_harbor_110_repair_job_names"
)
assert payload["readback"]["harbor_110_repair_jobs_labels"] == ["ubuntu-latest"]
assert payload["rollups"]["harbor_110_repair_jobs_stale_or_mismatched"] is True
def test_build_readback_classifies_cross_workflow_harbor_jobs_payload() -> None:
module = _load_module()
payload = module.build_readback(
actions_html=_actions_html_harbor_repair_waiting_with_workflow_no_matching(),
actions_list_http_status=401,
actions_list_payload={"message": "token is required"},
cd_jobs_http_status=200,
cd_jobs_payload={"jobs": [], "total_count": 0},
harbor_110_repair_jobs_http_status=200,
harbor_110_repair_jobs_payload=_harbor_110_repair_cross_workflow_jobs(),
)
assert payload["status"] == "blocked_harbor_110_repair_no_matching_runner"
assert payload["readback"]["harbor_110_repair_jobs_stale_or_mismatched"] is True
assert (
payload["readback"]["harbor_110_repair_jobs_cross_workflow_mismatch"]
is True
)
assert payload["readback"]["harbor_110_repair_jobs_payload_classifier"] == (
"cd_workflow_jobs_returned_for_harbor_110_repair_run"
)
assert payload["readback"]["harbor_110_repair_jobs_expected_names"] == [
"harbor-110-local-repair",
"workflow-shape",
]
assert payload["readback"]["harbor_110_repair_jobs_unexpected_names"] == [
"build-and-deploy",
"post-deploy-checks",
"tests",
]
assert (
payload["rollups"]["harbor_110_repair_jobs_cross_workflow_mismatch"]
is True
)
def test_build_readback_prioritizes_harbor_repair_jobs_stale_status() -> None:
module = _load_module()
payload = module.build_readback(