awoooi/ops/runner/read-public-gitea-actions-queue.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import html
import json
import re
import sys
import urllib.error
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from urllib.parse import unquote


DEFAULT_ACTIONS_URL = "https://gitea.wooo.work/wooo/awoooi/actions"
DEFAULT_CD_WORKFLOW_ACTIONS_URL = (
    "https://gitea.wooo.work/wooo/awoooi/actions?workflow=cd.yaml&actor=0&status=0"
)
DEFAULT_ACTIONS_LIST_API_URL = (
    "https://gitea.wooo.work/api/v1/repos/wooo/awoooi/actions/runs?limit=10"
)
DEFAULT_CD_RUN_JOBS_API_URL = ""
DEFAULT_CD_BUILD_JOB_LOG_URL_TEMPLATE = (
    "https://gitea.wooo.work/wooo/awoooi/actions/runs/{run_id}/jobs/3/logs"
)
DEFAULT_CD_TESTS_JOB_LOG_URL_TEMPLATE = (
    "https://gitea.wooo.work/wooo/awoooi/actions/runs/{run_id}/jobs/2/logs"
)
SCHEMA_VERSION = "awoooi_public_gitea_actions_queue_readback_v1"
EXPECTED_HARBOR_110_REPAIR_JOB_NAMES = {
    "workflow-shape",
    "harbor-110-local-repair",
}
CD_WORKFLOW_JOB_NAMES = {
    "build-and-deploy",
    "post-deploy-checks",
    "tests",
}

_RUN_ROW_RE = re.compile(
    r'<span data-tooltip-content="([^"]+)">.*?'
    r"<span><b>([^<]+)</b>:</span>([^<]+)</div>",
    re.S,
)
_RUN_ITEM_SPLIT_RE = re.compile(r'<div class="flex-item tw-items-center">')
_RUN_LINK_RE = re.compile(
    r'<a class="flex-item-title" title="([^"]*)" '
    r'href="/wooo/awoooi/actions/runs/(\d+)">',
    re.S,
)
_RUN_STATUS_RE = re.compile(r'<span data-tooltip-content="([^"]+)">')
_RUN_BODY_RE = re.compile(r"<span><b>([^<]+)</b>:</span>\s*([^<]+)", re.S)
_RUN_COMMIT_RE = re.compile(r"/wooo/awoooi/commit/([0-9a-f]{40})")
_RUN_NAME_RE = re.compile(r"^(?P<workflow>.+)\s+#(?P<run_id>\d+)$")
_WORKFLOW_FILTER_ITEM_RE = re.compile(
    r'<a class="item" href="\?workflow=(?P<workflow>[^"&]+)[^"]*">'
    r"(?P<body>.*?)</a>",
    re.S,
)
_NO_MATCHING_LABEL_RE = re.compile(
    r"No matching online runner with label:\s*(?P<label>[A-Za-z0-9_.:-]+)"
)
_HARBOR_ATTEMPT_RE = re.compile(
    r"harbor_login_attempt=(?P<attempt>\d+)\s+registry_v2_status=(?P<status>\d{3})"
)
_HARBOR_BLOCKER_RE = re.compile(
    r"BLOCKER harbor_registry_public_route_unavailable "
    r"registry_v2_status=(?P<status>\d{3})"
)
_HARBOR_CONTROLLED_REPAIR_SKIP_RE = re.compile(
    r"harbor_controlled_repair_skipped=(?P<reason>[A-Za-z0-9_.:-]+)"
)
_HARBOR_CONTROLLED_REPAIR_STATUS_RE = re.compile(
    r"harbor_controlled_repair_public_registry_v2_status=(?P<status>\d{3})"
)
_HOST_PRESSURE_ATTEMPT_RE = re.compile(
    r"host web/build/smoke pressure detected "
    r"\(attempt (?P<attempt>\d+)/(?P<limit>\d+)\)"
)
_HOST_PRESSURE_LOAD_RE = re.compile(
    r"host load5/core (?P<load>[0-9.]+) > (?P<threshold>[0-9.]+)"
)
_HOST_PRESSURE_REFUSAL_RE = re.compile(
    r"refusing to start AWOOI image build while host web/build/smoke pressure is still active"
)


@dataclass(frozen=True)
class HttpRead:
    http_status: int
    text: str


def fetch_public_url(url: str, timeout_seconds: float) -> HttpRead:
    request = urllib.request.Request(
        url,
        headers={"User-Agent": "awoooi-public-gitea-actions-readback/1.0"},
    )
    try:
        with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
            raw = response.read()
            status = int(getattr(response, "status", 200))
    except urllib.error.HTTPError as exc:
        raw = exc.read()
        status = int(exc.code)
    return HttpRead(
        http_status=status,
        text=raw.decode("utf-8", errors="replace"),
    )


def parse_visible_runs(
    actions_html: str,
    *,
    workflow_hint: str = "",
) -> list[dict[str, str]]:
    visible_runs: list[dict[str, str]] = []
    for chunk in _RUN_ITEM_SPLIT_RE.split(actions_html)[1:]:
        status_match = _RUN_STATUS_RE.search(chunk)
        link_match = _RUN_LINK_RE.search(chunk)
        body_match = _RUN_BODY_RE.search(chunk)
        if not status_match or not link_match or not body_match:
            continue
        raw_status = status_match.group(1)
        raw_title = link_match.group(1)
        raw_run_id = link_match.group(2)
        raw_name = body_match.group(1)
        raw_kind = body_match.group(2)
        commit_match = _RUN_COMMIT_RE.search(chunk)
        visible_runs.append(
            _visible_run_from_parts(
                raw_status=raw_status,
                raw_name=raw_name,
                raw_kind=raw_kind,
                raw_run_id=raw_run_id,
                raw_title=raw_title,
                raw_commit_sha=commit_match.group(1) if commit_match else "",
                workflow_hint=workflow_hint,
            )
        )

    if visible_runs:
        return visible_runs

    for raw_status, raw_name, raw_kind in _RUN_ROW_RE.findall(actions_html):
        visible_runs.append(
            _visible_run_from_parts(
                raw_status=raw_status,
                raw_name=raw_name,
                raw_kind=raw_kind,
                workflow_hint=workflow_hint,
            )
        )
    return visible_runs


def parse_workflow_no_matching_labels(actions_html: str) -> dict[str, str]:
    labels: dict[str, str] = {}
    for match in _WORKFLOW_FILTER_ITEM_RE.finditer(actions_html):
        label_match = _NO_MATCHING_LABEL_RE.search(match.group("body"))
        if not label_match:
            continue
        workflow = html.unescape(unquote(match.group("workflow"))).strip()
        labels[workflow] = label_match.group("label")
    return labels


def merge_visible_runs(
    primary_runs: list[dict[str, str]],
    fallback_runs: list[dict[str, str]],
) -> list[dict[str, str]]:
    merged: list[dict[str, str]] = []
    seen: set[tuple[str, str]] = set()
    for run in primary_runs + fallback_runs:
        key = (run.get("workflow", ""), run.get("run_id", ""))
        if key in seen:
            continue
        seen.add(key)
        merged.append(run)
    return merged


def _visible_run_from_parts(
    *,
    raw_status: str,
    raw_name: str,
    raw_kind: str,
    raw_run_id: str = "",
    raw_title: str = "",
    raw_commit_sha: str = "",
    workflow_hint: str = "",
) -> dict[str, str]:
    name = html.unescape(raw_name).strip()
    kind = html.unescape(raw_kind).strip()
    status = html.unescape(raw_status).strip()
    match = _RUN_NAME_RE.match(name)
    workflow = match.group("workflow") if match else (workflow_hint or name)
    run_id = raw_run_id or (match.group("run_id") if match else "")
    label_match = _NO_MATCHING_LABEL_RE.search(status)
    return {
        "run_id": run_id,
        "workflow": workflow,
        "kind": kind,
        "status": status,
        "title": html.unescape(raw_title).strip(),
        "commit_sha": raw_commit_sha,
        "no_matching_runner_label": (
            label_match.group("label") if label_match else ""
        ),
    }


def build_readback(
    *,
    actions_html: str,
    cd_workflow_actions_html: str = "",
    actions_list_http_status: int,
    actions_list_payload: Any,
    cd_jobs_http_status: int,
    cd_jobs_payload: Any,
    harbor_110_repair_jobs_http_status: int = 0,
    harbor_110_repair_jobs_payload: Any | None = None,
    latest_cd_build_log_http_status: int = 0,
    latest_cd_build_log_text: str = "",
    latest_cd_tests_log_http_status: int = 0,
    latest_cd_tests_log_text: str = "",
) -> dict[str, Any]:
    visible_runs = parse_visible_runs(actions_html)
    cd_workflow_visible_runs = parse_visible_runs(
        cd_workflow_actions_html,
        workflow_hint="cd.yaml",
    )
    combined_visible_runs = merge_visible_runs(visible_runs, cd_workflow_visible_runs)
    workflow_no_matching_labels = parse_workflow_no_matching_labels(actions_html)
    no_matching = next(
        (run for run in combined_visible_runs if run["no_matching_runner_label"]),
        {},
    )
    workflow_no_matching = next(
        (
            {"workflow": workflow, "no_matching_runner_label": label}
            for workflow, label in workflow_no_matching_labels.items()
        ),
        {},
    )
    latest_cd_run = next(
        (run for run in combined_visible_runs if run.get("workflow") == "cd.yaml"),
        {},
    )
    latest_harbor_110_repair_run = next(
        (
            run
            for run in combined_visible_runs
            if run.get("workflow") == "harbor-110-local-repair.yaml"
        ),
        {},
    )
    cd_workflow_fallback_used = bool(
        latest_cd_run
        and not any(run.get("workflow") == "cd.yaml" for run in visible_runs)
    )
    cd_jobs = cd_jobs_payload if isinstance(cd_jobs_payload, dict) else {}
    harbor_110_repair_jobs = (
        harbor_110_repair_jobs_payload
        if isinstance(harbor_110_repair_jobs_payload, dict)
        else {}
    )
    actions_list = actions_list_payload if isinstance(actions_list_payload, dict) else {}
    actions_list_message = str(actions_list.get("message") or "")
    jobs_total_count = _int(cd_jobs.get("total_count"))
    jobs = cd_jobs.get("jobs") if isinstance(cd_jobs.get("jobs"), list) else []
    harbor_jobs_total_count = _int(harbor_110_repair_jobs.get("total_count"))
    harbor_jobs = (
        harbor_110_repair_jobs.get("jobs")
        if isinstance(harbor_110_repair_jobs.get("jobs"), list)
        else []
    )
    latest_cd_run_id = latest_cd_run.get("run_id", "")
    latest_cd_commit_sha = latest_cd_run.get("commit_sha", "")
    job_head_shas = sorted(
        {
            str(job.get("head_sha") or "")
            for job in jobs
            if isinstance(job, dict) and job.get("head_sha")
        }
    )
    job_run_ids = sorted(
        {
            str(job.get("run_id") or "")
            for job in jobs
            if isinstance(job, dict) and job.get("run_id") is not None
        }
    )
    job_conclusion_counts: dict[str, int] = {}
    for job in jobs:
      if isinstance(job, dict):
          conclusion = str(job.get("conclusion") or job.get("status") or "unknown")
          job_conclusion_counts[conclusion] = job_conclusion_counts.get(conclusion, 0) + 1
    harbor_job_conclusion_counts: dict[str, int] = {}
    harbor_job_run_ids: set[str] = set()
    harbor_job_names: set[str] = set()
    harbor_job_labels: set[str] = set()
    harbor_job_runner_names: set[str] = set()
    for job in harbor_jobs:
        if not isinstance(job, dict):
            continue
        job_name = str(job.get("name") or "")
        if job_name:
            harbor_job_names.add(job_name)
        conclusion = str(job.get("conclusion") or job.get("status") or "unknown")
        harbor_job_conclusion_counts[conclusion] = (
            harbor_job_conclusion_counts.get(conclusion, 0) + 1
        )
        if job.get("run_id") is not None:
            harbor_job_run_ids.add(str(job.get("run_id")))
        labels = job.get("labels")
        if isinstance(labels, list):
            harbor_job_labels.update(str(label) for label in labels if label)
        runner_name = str(job.get("runner_name") or "")
        if runner_name:
            harbor_job_runner_names.add(runner_name)
    cd_jobs_head_sha_matches_visible = (
        bool(latest_cd_commit_sha)
        and latest_cd_commit_sha in job_head_shas
    )
    cd_jobs_run_id_matches_visible = (
        bool(latest_cd_run_id)
        and latest_cd_run_id in job_run_ids
    )
    cd_jobs_stale_or_mismatched = (
        cd_jobs_http_status == 200
        and jobs_total_count > 0
        and bool(latest_cd_run)
        and (
            not cd_jobs_head_sha_matches_visible
            or not cd_jobs_run_id_matches_visible
        )
    )
    build_log_classifier = classify_cd_build_log(latest_cd_build_log_text)
    tests_log_classifier = classify_cd_tests_log(latest_cd_tests_log_text)
    latest_cd_visible_blocked = latest_cd_run.get("status", "") == "Blocked"
    harbor_110_repair_status = latest_harbor_110_repair_run.get("status", "")
    harbor_110_repair_run_id = latest_harbor_110_repair_run.get("run_id", "")
    harbor_110_repair_no_matching_runner_label = (
        latest_harbor_110_repair_run.get("no_matching_runner_label", "")
        or workflow_no_matching_labels.get("harbor-110-local-repair.yaml", "")
    )
    harbor_110_repair_waiting = harbor_110_repair_status == "Waiting"
    harbor_110_repair_running = harbor_110_repair_status == "Running"
    harbor_110_repair_status_blocked = harbor_110_repair_status == "Blocked"
    harbor_110_repair_jobs_run_id_matches_visible = (
        bool(harbor_110_repair_run_id)
        and harbor_110_repair_run_id in harbor_job_run_ids
    )
    harbor_110_repair_jobs_unexpected_names = sorted(
        harbor_job_names - EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
    )
    harbor_110_repair_jobs_cross_workflow_mismatch = (
        bool(harbor_job_names)
        and harbor_job_names.issubset(CD_WORKFLOW_JOB_NAMES)
    )
    harbor_110_repair_jobs_payload_classifier = (
        "cd_workflow_jobs_returned_for_harbor_110_repair_run"
        if harbor_110_repair_jobs_cross_workflow_mismatch
        else "unexpected_harbor_110_repair_job_names"
        if harbor_110_repair_jobs_unexpected_names
        else ""
    )
    harbor_110_repair_jobs_match_expected_workflow = (
        bool(harbor_job_names)
        and not harbor_110_repair_jobs_unexpected_names
        and "harbor-110-local-repair" in harbor_job_names
    )
    harbor_110_repair_jobs_stale_or_mismatched = (
        harbor_110_repair_jobs_http_status == 200
        and harbor_jobs_total_count > 0
        and not harbor_110_repair_jobs_match_expected_workflow
    )
    harbor_110_repair_jobs_all_success = (
        harbor_110_repair_jobs_http_status == 200
        and harbor_jobs_total_count > 0
        and harbor_110_repair_jobs_match_expected_workflow
        and harbor_job_conclusion_counts.get("success") == harbor_jobs_total_count
        and harbor_110_repair_jobs_run_id_matches_visible
    )
    harbor_110_repair_visible_waiting_stale = (
        harbor_110_repair_waiting and harbor_110_repair_jobs_all_success
    )
    harbor_110_repair_waiting_after_cd_harbor_blocker = (
        build_log_classifier["harbor_public_route_blocked_or_retrying"]
        and harbor_110_repair_waiting
    )
    harbor_110_repair_blocked = (
        harbor_110_repair_status_blocked
        or bool(harbor_110_repair_no_matching_runner_label)
        or harbor_110_repair_waiting_after_cd_harbor_blocker
    )

    readback = {
        "actions_page_visible_run_count": len(visible_runs),
        "cd_workflow_actions_page_visible_run_count": len(cd_workflow_visible_runs),
        "cd_workflow_fallback_used": cd_workflow_fallback_used,
        "actions_list_without_token_http_status": actions_list_http_status,
        "actions_list_without_token_message": actions_list_message,
        "cd_run_jobs_http_status": cd_jobs_http_status,
        "cd_run_jobs_total_count": jobs_total_count,
        "cd_run_jobs_head_shas": job_head_shas,
        "cd_run_jobs_run_ids": job_run_ids,
        "cd_run_jobs_conclusion_counts": job_conclusion_counts,
        "cd_run_jobs_head_sha_matches_visible": cd_jobs_head_sha_matches_visible,
        "cd_run_jobs_run_id_matches_visible": cd_jobs_run_id_matches_visible,
        "cd_run_jobs_stale_or_mismatched": cd_jobs_stale_or_mismatched,
        "latest_visible_no_matching_runner_run_id": no_matching.get("run_id", ""),
        "latest_visible_no_matching_runner_workflow": no_matching.get(
            "workflow", workflow_no_matching.get("workflow", "")
        ),
        "latest_visible_no_matching_runner_kind": no_matching.get("kind", ""),
        "latest_visible_no_matching_runner_status": no_matching.get("status", ""),
        "latest_visible_no_matching_runner_label": no_matching.get(
            "no_matching_runner_label",
            workflow_no_matching.get("no_matching_runner_label", ""),
        ),
        "workflow_no_matching_runner_labels": workflow_no_matching_labels,
        "latest_visible_cd_run_id": latest_cd_run.get("run_id", ""),
        "latest_visible_cd_run_status": latest_cd_run.get("status", ""),
        "latest_visible_cd_run_blocked": latest_cd_visible_blocked,
        "latest_visible_cd_run_kind": latest_cd_run.get("kind", ""),
        "latest_visible_cd_run_title": latest_cd_run.get("title", ""),
        "latest_visible_cd_run_commit_sha": latest_cd_run.get("commit_sha", ""),
        "latest_visible_cd_build_log_http_status": latest_cd_build_log_http_status,
        "latest_visible_cd_failure_classifier": build_log_classifier[
            "failure_classifier"
        ],
        "latest_visible_cd_failure_status_code": build_log_classifier[
            "failure_status_code"
        ],
        "latest_visible_cd_inflight_classifier": build_log_classifier[
            "inflight_classifier"
        ],
        "latest_visible_cd_harbor_latest_registry_v2_status": (
            build_log_classifier["harbor_latest_registry_v2_status"]
        ),
        "latest_visible_cd_harbor_login_attempt_count": build_log_classifier[
            "harbor_login_attempt_count"
        ],
        "latest_visible_cd_harbor_controlled_repair_attempted": build_log_classifier[
            "harbor_controlled_repair_attempted"
        ],
        "latest_visible_cd_harbor_controlled_repair_skip_reason": build_log_classifier[
            "harbor_controlled_repair_skip_reason"
        ],
        "latest_visible_cd_harbor_controlled_repair_public_registry_v2_status": (
            build_log_classifier[
                "harbor_controlled_repair_public_registry_v2_status"
            ]
        ),
        "latest_visible_cd_harbor_public_route_blocked": build_log_classifier[
            "harbor_public_route_blocked"
        ],
        "latest_visible_cd_harbor_public_route_retrying_unavailable": (
            build_log_classifier["harbor_public_route_retrying_unavailable"]
        ),
        "latest_visible_cd_tests_log_http_status": latest_cd_tests_log_http_status,
        "latest_visible_harbor_110_repair_run_id": (
            latest_harbor_110_repair_run.get("run_id", "")
        ),
        "latest_visible_harbor_110_repair_run_status": harbor_110_repair_status,
        "latest_visible_harbor_110_repair_run_kind": (
            latest_harbor_110_repair_run.get("kind", "")
        ),
        "latest_visible_harbor_110_repair_run_title": (
            latest_harbor_110_repair_run.get("title", "")
        ),
        "latest_visible_harbor_110_repair_run_commit_sha": (
            latest_harbor_110_repair_run.get("commit_sha", "")
        ),
        "latest_visible_harbor_110_repair_no_matching_runner_label": (
            harbor_110_repair_no_matching_runner_label
        ),
        "latest_visible_harbor_110_repair_waiting": harbor_110_repair_waiting,
        "latest_visible_harbor_110_repair_running": harbor_110_repair_running,
        "latest_visible_harbor_110_repair_status_blocked": (
            harbor_110_repair_status_blocked
        ),
        "latest_visible_harbor_110_repair_blocked": harbor_110_repair_blocked,
        "harbor_110_repair_waiting_after_cd_harbor_blocker": (
            harbor_110_repair_waiting_after_cd_harbor_blocker
        ),
        "harbor_110_repair_jobs_http_status": harbor_110_repair_jobs_http_status,
        "harbor_110_repair_jobs_total_count": harbor_jobs_total_count,
        "harbor_110_repair_jobs_conclusion_counts": harbor_job_conclusion_counts,
        "harbor_110_repair_jobs_run_ids": sorted(harbor_job_run_ids),
        "harbor_110_repair_jobs_names": sorted(harbor_job_names),
        "harbor_110_repair_jobs_expected_names": sorted(
            EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
        ),
        "harbor_110_repair_jobs_unexpected_names": (
            harbor_110_repair_jobs_unexpected_names
        ),
        "harbor_110_repair_jobs_cross_workflow_mismatch": (
            harbor_110_repair_jobs_cross_workflow_mismatch
        ),
        "harbor_110_repair_jobs_payload_classifier": (
            harbor_110_repair_jobs_payload_classifier
        ),
        "harbor_110_repair_jobs_labels": sorted(harbor_job_labels),
        "harbor_110_repair_jobs_runner_names": sorted(harbor_job_runner_names),
        "harbor_110_repair_jobs_run_id_matches_visible": (
            harbor_110_repair_jobs_run_id_matches_visible
        ),
        "harbor_110_repair_jobs_match_expected_workflow": (
            harbor_110_repair_jobs_match_expected_workflow
        ),
        "harbor_110_repair_jobs_stale_or_mismatched": (
            harbor_110_repair_jobs_stale_or_mismatched
        ),
        "harbor_110_repair_jobs_all_success": harbor_110_repair_jobs_all_success,
        "harbor_110_repair_visible_waiting_stale": (
            harbor_110_repair_visible_waiting_stale
        ),
        "latest_visible_cd_host_pressure_classifier": tests_log_classifier[
            "host_pressure_classifier"
        ],
        "latest_visible_cd_host_pressure_attempt_count": tests_log_classifier[
            "host_pressure_attempt_count"
        ],
        "latest_visible_cd_host_pressure_attempt_limit": tests_log_classifier[
            "host_pressure_attempt_limit"
        ],
        "latest_visible_cd_host_pressure_latest_load5_per_core": tests_log_classifier[
            "latest_load5_per_core"
        ],
        "latest_visible_cd_host_pressure_load5_threshold": tests_log_classifier[
            "load5_per_core_threshold"
        ],
        "latest_visible_cd_host_pressure_waiting": tests_log_classifier[
            "host_pressure_waiting"
        ],
        "latest_visible_cd_host_pressure_refused": tests_log_classifier[
            "host_pressure_refused"
        ],
        "no_matching_online_runner_visible": bool(no_matching)
        or bool(workflow_no_matching_labels),
        "top_visible_runs": combined_visible_runs[:10],
        "cd_workflow_visible_runs": cd_workflow_visible_runs[:10],
    }
    return {
        "schema_version": SCHEMA_VERSION,
        "status": (
            "blocked_no_matching_online_runner"
            if no_matching
            else "blocked_latest_visible_cd_run"
            if latest_cd_visible_blocked
            else (
                "blocked_harbor_public_route_unavailable_after_harbor_110_repair_success"
            )
            if (
                build_log_classifier["harbor_public_route_blocked_or_retrying"]
                and harbor_110_repair_jobs_all_success
            )
            else "blocked_harbor_110_repair_no_matching_runner"
            if (
                build_log_classifier["harbor_public_route_blocked_or_retrying"]
                and harbor_110_repair_no_matching_runner_label
            )
            else "blocked_harbor_110_repair_jobs_stale_or_mismatched"
            if (
                build_log_classifier["harbor_public_route_blocked_or_retrying"]
                and harbor_110_repair_jobs_stale_or_mismatched
            )
            else "blocked_harbor_110_repair_workflow_waiting"
            if harbor_110_repair_waiting_after_cd_harbor_blocker
            else "blocked_harbor_public_route_unavailable"
            if build_log_classifier["harbor_public_route_blocked"]
            else "blocked_harbor_public_route_unavailable_pending_retry"
            if build_log_classifier["harbor_public_route_retrying_unavailable"]
            else "blocked_host_web_build_pressure"
            if tests_log_classifier["host_pressure_blocked_or_waiting"]
            else "blocked_harbor_110_repair_no_matching_runner"
            if harbor_110_repair_no_matching_runner_label
            else "blocked_no_matching_online_runner"
            if workflow_no_matching
            else "harbor_110_repair_jobs_stale_or_mismatched"
            if harbor_110_repair_jobs_stale_or_mismatched
            else "harbor_110_repair_waiting_for_runner_or_queue"
            if harbor_110_repair_waiting
            else "harbor_110_repair_running"
            if harbor_110_repair_running
            else "blocked_harbor_110_repair_run"
            if harbor_110_repair_blocked
            else "cd_jobs_stale_or_mismatched"
            if cd_jobs_stale_or_mismatched
            else "no_matching_runner_not_visible"
        ),
        "readback": readback,
        "rollups": {
            "public_actions_readback_count": len(visible_runs),
            "cd_workflow_actions_readback_count": len(cd_workflow_visible_runs),
            "cd_workflow_fallback_used": cd_workflow_fallback_used,
            "actions_list_requires_token": actions_list_http_status == 401,
            "cd_run_jobs_total_count": jobs_total_count,
            "cd_run_jobs_stale_or_mismatched": cd_jobs_stale_or_mismatched,
            "current_main_cd_run_visible": bool(latest_cd_run),
            "current_main_cd_run_status": latest_cd_run.get("status", ""),
            "current_main_cd_run_blocked": latest_cd_visible_blocked,
            "current_main_cd_failure_classifier": build_log_classifier[
                "failure_classifier"
            ],
            "current_main_cd_inflight_classifier": build_log_classifier[
                "inflight_classifier"
            ],
            "current_main_cd_harbor_public_route_blocked": build_log_classifier[
                "harbor_public_route_blocked"
            ],
            "current_main_cd_harbor_public_route_retrying_unavailable": (
                build_log_classifier["harbor_public_route_retrying_unavailable"]
            ),
            "current_main_cd_harbor_latest_registry_v2_status": (
                build_log_classifier["harbor_latest_registry_v2_status"]
            ),
            "current_main_cd_harbor_controlled_repair_attempted": build_log_classifier[
                "harbor_controlled_repair_attempted"
            ],
            "current_main_cd_harbor_controlled_repair_skip_reason": (
                build_log_classifier["harbor_controlled_repair_skip_reason"]
            ),
            "current_main_cd_host_pressure_classifier": tests_log_classifier[
                "host_pressure_classifier"
            ],
            "current_main_cd_host_pressure_waiting": tests_log_classifier[
                "host_pressure_waiting"
            ],
            "current_main_cd_host_pressure_refused": tests_log_classifier[
                "host_pressure_refused"
            ],
            "no_matching_online_runner_visible": bool(no_matching)
            or bool(workflow_no_matching_labels),
            "harbor_110_repair_run_visible": bool(latest_harbor_110_repair_run),
            "harbor_110_repair_run_status": harbor_110_repair_status,
            "harbor_110_repair_waiting": harbor_110_repair_waiting,
            "harbor_110_repair_running": harbor_110_repair_running,
            "harbor_110_repair_blocked": harbor_110_repair_blocked,
            "harbor_110_repair_waiting_after_cd_harbor_blocker": (
                harbor_110_repair_waiting_after_cd_harbor_blocker
            ),
            "harbor_110_repair_no_matching_runner_label": (
                harbor_110_repair_no_matching_runner_label
            ),
            "harbor_110_repair_jobs_total_count": harbor_jobs_total_count,
            "harbor_110_repair_jobs_names": sorted(harbor_job_names),
            "harbor_110_repair_jobs_expected_names": sorted(
                EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
            ),
            "harbor_110_repair_jobs_stale_or_mismatched": (
                harbor_110_repair_jobs_stale_or_mismatched
            ),
            "harbor_110_repair_jobs_cross_workflow_mismatch": (
                harbor_110_repair_jobs_cross_workflow_mismatch
            ),
            "harbor_110_repair_jobs_payload_classifier": (
                harbor_110_repair_jobs_payload_classifier
            ),
            "harbor_110_repair_jobs_unexpected_names": (
                harbor_110_repair_jobs_unexpected_names
            ),
            "harbor_110_repair_jobs_all_success": (
                harbor_110_repair_jobs_all_success
            ),
            "harbor_110_repair_jobs_runner_names": sorted(harbor_job_runner_names),
            "harbor_110_repair_visible_waiting_stale": (
                harbor_110_repair_visible_waiting_stale
            ),
        },
        "operation_boundaries": {
            "public_gitea_read_only": True,
            "token_required_but_not_collected": actions_list_http_status == 401,
            "gitea_api_write_performed": False,
            "workflow_dispatch_performed": False,
            "host_write_performed": False,
            "runner_registration_performed": False,
            "runner_service_start_performed": False,
            "secret_or_runner_token_read": False,
            "github_api_used": False,
        },
    }


def classify_cd_build_log(text: str) -> dict[str, Any]:
    attempt_statuses: list[str] = []
    attempt_numbers: list[int] = []
    for match in _HARBOR_ATTEMPT_RE.finditer(text):
        attempt_numbers.append(_int(match.group("attempt")))
        attempt_statuses.append(match.group("status"))

    blocker_match = _HARBOR_BLOCKER_RE.search(text)
    harbor_public_route_blocked = blocker_match is not None
    failure_status_code = blocker_match.group("status") if blocker_match else ""
    harbor_latest_registry_v2_status = attempt_statuses[-1] if attempt_statuses else ""
    harbor_public_route_retrying_unavailable = (
        not harbor_public_route_blocked
        and bool(attempt_statuses)
        and harbor_latest_registry_v2_status not in {"200", "401"}
    )
    repair_skip_matches = list(_HARBOR_CONTROLLED_REPAIR_SKIP_RE.finditer(text))
    repair_status_matches = list(_HARBOR_CONTROLLED_REPAIR_STATUS_RE.finditer(text))
    repair_attempted = (
        "harbor_controlled_repair_check_start=1" in text
        or bool(repair_skip_matches)
        or bool(repair_status_matches)
    )
    return {
        "failure_classifier": (
            "harbor_registry_public_route_unavailable"
            if harbor_public_route_blocked
            else ""
        ),
        "failure_status_code": failure_status_code,
        "inflight_classifier": (
            "harbor_registry_public_route_unavailable_pending_retry"
            if harbor_public_route_retrying_unavailable
            else ""
        ),
        "harbor_latest_registry_v2_status": harbor_latest_registry_v2_status,
        "harbor_login_attempt_count": max(attempt_numbers) if attempt_numbers else 0,
        "harbor_controlled_repair_attempted": repair_attempted,
        "harbor_controlled_repair_skip_reason": (
            repair_skip_matches[-1].group("reason") if repair_skip_matches else ""
        ),
        "harbor_controlled_repair_public_registry_v2_status": (
            repair_status_matches[-1].group("status") if repair_status_matches else ""
        ),
        "harbor_public_route_blocked": harbor_public_route_blocked,
        "harbor_public_route_retrying_unavailable": (
            harbor_public_route_retrying_unavailable
        ),
        "harbor_public_route_blocked_or_retrying": (
            harbor_public_route_blocked or harbor_public_route_retrying_unavailable
        ),
        "harbor_registry_v2_statuses": attempt_statuses[-12:],
    }


def classify_cd_tests_log(text: str) -> dict[str, Any]:
    attempt_numbers: list[int] = []
    attempt_limits: list[int] = []
    for match in _HOST_PRESSURE_ATTEMPT_RE.finditer(text):
        attempt_numbers.append(_int(match.group("attempt")))
        attempt_limits.append(_int(match.group("limit")))

    latest_load = ""
    latest_threshold = ""
    for match in _HOST_PRESSURE_LOAD_RE.finditer(text):
        latest_load = match.group("load")
        latest_threshold = match.group("threshold")

    host_pressure_refused = _HOST_PRESSURE_REFUSAL_RE.search(text) is not None
    host_pressure_waiting = bool(attempt_numbers) and not host_pressure_refused
    host_pressure_blocked_or_waiting = host_pressure_waiting or host_pressure_refused
    return {
        "host_pressure_classifier": (
            "host_web_build_pressure_refused"
            if host_pressure_refused
            else "host_web_build_pressure_waiting"
            if host_pressure_waiting
            else ""
        ),
        "host_pressure_attempt_count": max(attempt_numbers)
        if attempt_numbers
        else 0,
        "host_pressure_attempt_limit": max(attempt_limits) if attempt_limits else 0,
        "latest_load5_per_core": latest_load,
        "load5_per_core_threshold": latest_threshold,
        "host_pressure_waiting": host_pressure_waiting,
        "host_pressure_refused": host_pressure_refused,
        "host_pressure_blocked_or_waiting": host_pressure_blocked_or_waiting,
    }


def load_json_text(text: str) -> Any:
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        return {"message": text.strip()}


def load_json_file(path: Path) -> Any:
    return load_json_text(path.read_text(encoding="utf-8"))


def derive_jobs_api_url(actions_list_api_url: str, run_id: str) -> str:
    if not run_id:
        return ""
    return re.sub(
        r"/actions/runs(?:\?.*)?$",
        f"/actions/runs/{run_id}/jobs",
        actions_list_api_url,
    )


def _int(value: Any) -> int:
    try:
        return int(value)
    except (TypeError, ValueError):
        return 0


def _read_text_file(path: Path) -> str:
    return path.read_text(encoding="utf-8")


def _human_summary(payload: dict[str, Any]) -> str:
    readback = payload["readback"]
    lines = [
        f"AWOOOI_PUBLIC_GITEA_ACTIONS_QUEUE_STATUS={payload['status']}",
        (
            "ACTIONS_LIST_WITHOUT_TOKEN_HTTP_STATUS="
            f"{readback['actions_list_without_token_http_status']}"
        ),
        f"CD_RUN_JOBS_TOTAL_COUNT={readback['cd_run_jobs_total_count']}",
        (
            "NO_MATCHING_ONLINE_RUNNER_VISIBLE="
            f"{int(readback['no_matching_online_runner_visible'])}"
        ),
        (
            "LATEST_NO_MATCHING_RUNNER_LABEL="
            f"{readback['latest_visible_no_matching_runner_label']}"
        ),
        f"LATEST_VISIBLE_CD_RUN_ID={readback['latest_visible_cd_run_id']}",
        f"LATEST_VISIBLE_CD_RUN_STATUS={readback['latest_visible_cd_run_status']}",
        (
            "LATEST_VISIBLE_CD_FAILURE_CLASSIFIER="
            f"{readback['latest_visible_cd_failure_classifier']}"
        ),
        (
            "LATEST_VISIBLE_CD_INFLIGHT_CLASSIFIER="
            f"{readback['latest_visible_cd_inflight_classifier']}"
        ),
        (
            "LATEST_VISIBLE_CD_HARBOR_LATEST_REGISTRY_V2_STATUS="
            f"{readback['latest_visible_cd_harbor_latest_registry_v2_status']}"
        ),
        (
            "LATEST_VISIBLE_CD_HOST_PRESSURE_CLASSIFIER="
            f"{readback['latest_visible_cd_host_pressure_classifier']}"
        ),
        (
            "LATEST_VISIBLE_HARBOR_110_REPAIR_RUN_ID="
            f"{readback['latest_visible_harbor_110_repair_run_id']}"
        ),
        (
            "LATEST_VISIBLE_HARBOR_110_REPAIR_RUN_STATUS="
            f"{readback['latest_visible_harbor_110_repair_run_status']}"
        ),
        (
            "LATEST_VISIBLE_HARBOR_110_REPAIR_NO_MATCHING_RUNNER_LABEL="
            f"{readback['latest_visible_harbor_110_repair_no_matching_runner_label']}"
        ),
        (
            "HARBOR_110_REPAIR_WAITING_AFTER_CD_HARBOR_BLOCKER="
            f"{int(readback['harbor_110_repair_waiting_after_cd_harbor_blocker'])}"
        ),
        (
            "HARBOR_110_REPAIR_JOBS_ALL_SUCCESS="
            f"{int(readback['harbor_110_repair_jobs_all_success'])}"
        ),
        (
            "HARBOR_110_REPAIR_JOBS_STALE_OR_MISMATCHED="
            f"{int(readback['harbor_110_repair_jobs_stale_or_mismatched'])}"
        ),
        (
            "HARBOR_110_REPAIR_VISIBLE_WAITING_STALE="
            f"{int(readback['harbor_110_repair_visible_waiting_stale'])}"
        ),
        "WRITE_PERFORMED=false",
        "TOKEN_COLLECTED=false",
    ]
    return "\n".join(lines) + "\n"


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        description=(
            "Read public Gitea Actions queue state without credentials, dispatch, "
            "runner registration, host access, or secret reads."
        )
    )
    parser.add_argument("--actions-url", default=DEFAULT_ACTIONS_URL)
    parser.add_argument(
        "--cd-workflow-actions-url",
        default=DEFAULT_CD_WORKFLOW_ACTIONS_URL,
    )
    parser.add_argument("--actions-list-api-url", default=DEFAULT_ACTIONS_LIST_API_URL)
    parser.add_argument("--cd-run-jobs-api-url", default=DEFAULT_CD_RUN_JOBS_API_URL)
    parser.add_argument(
        "--cd-build-job-log-url-template",
        default=DEFAULT_CD_BUILD_JOB_LOG_URL_TEMPLATE,
    )
    parser.add_argument(
        "--cd-tests-job-log-url-template",
        default=DEFAULT_CD_TESTS_JOB_LOG_URL_TEMPLATE,
    )
    parser.add_argument("--timeout-seconds", type=float, default=10.0)
    parser.add_argument("--actions-html-file", type=Path)
    parser.add_argument("--cd-workflow-actions-html-file", type=Path)
    parser.add_argument("--actions-list-json-file", type=Path)
    parser.add_argument("--actions-list-http-status", type=int)
    parser.add_argument("--cd-run-jobs-json-file", type=Path)
    parser.add_argument("--cd-run-jobs-http-status", type=int)
    parser.add_argument("--harbor-110-repair-jobs-json-file", type=Path)
    parser.add_argument("--harbor-110-repair-jobs-http-status", type=int)
    parser.add_argument("--cd-build-job-log-file", type=Path)
    parser.add_argument("--cd-build-job-log-http-status", type=int)
    parser.add_argument("--cd-tests-job-log-file", type=Path)
    parser.add_argument("--cd-tests-job-log-http-status", type=int)
    parser.add_argument("--skip-cd-build-job-log-read", action="store_true")
    parser.add_argument("--skip-cd-tests-job-log-read", action="store_true")
    parser.add_argument("--json", action="store_true")
    args = parser.parse_args(argv)

    if args.actions_html_file:
        actions_html = _read_text_file(args.actions_html_file)
    else:
        actions_html = fetch_public_url(args.actions_url, args.timeout_seconds).text
    if args.cd_workflow_actions_html_file:
        cd_workflow_actions_html = _read_text_file(args.cd_workflow_actions_html_file)
    elif args.actions_html_file:
        cd_workflow_actions_html = ""
    elif any(run.get("workflow") == "cd.yaml" for run in parse_visible_runs(actions_html)):
        cd_workflow_actions_html = ""
    else:
        cd_workflow_actions_html = fetch_public_url(
            args.cd_workflow_actions_url,
            args.timeout_seconds,
        ).text
    actions_lookup_runs = merge_visible_runs(
        parse_visible_runs(actions_html),
        parse_visible_runs(cd_workflow_actions_html, workflow_hint="cd.yaml"),
    )

    if args.actions_list_json_file:
        actions_list_http_status = args.actions_list_http_status or 0
        actions_list_payload = load_json_file(args.actions_list_json_file)
    else:
        actions_list_read = fetch_public_url(
            args.actions_list_api_url,
            args.timeout_seconds,
        )
        actions_list_http_status = actions_list_read.http_status
        actions_list_payload = load_json_text(actions_list_read.text)

    if args.cd_run_jobs_json_file:
        cd_jobs_http_status = args.cd_run_jobs_http_status or 0
        cd_jobs_payload = load_json_file(args.cd_run_jobs_json_file)
    else:
        visible_runs_for_jobs = actions_lookup_runs
        latest_cd_run_for_jobs = next(
            (run for run in visible_runs_for_jobs if run.get("workflow") == "cd.yaml"),
            {},
        )
        cd_run_jobs_api_url = args.cd_run_jobs_api_url or derive_jobs_api_url(
            args.actions_list_api_url,
            latest_cd_run_for_jobs.get("run_id", ""),
        )
        if cd_run_jobs_api_url:
            cd_jobs_read = fetch_public_url(
                cd_run_jobs_api_url,
                args.timeout_seconds,
            )
            cd_jobs_http_status = cd_jobs_read.http_status
            cd_jobs_payload = load_json_text(cd_jobs_read.text)
        else:
            cd_jobs_http_status = 0
            cd_jobs_payload = {"jobs": [], "total_count": 0}

    if args.harbor_110_repair_jobs_json_file:
        harbor_110_repair_jobs_http_status = (
            args.harbor_110_repair_jobs_http_status or 0
        )
        harbor_110_repair_jobs_payload = load_json_file(
            args.harbor_110_repair_jobs_json_file
        )
    else:
        visible_runs_for_harbor_jobs = actions_lookup_runs
        latest_harbor_110_repair_run_for_jobs = next(
            (
                run
                for run in visible_runs_for_harbor_jobs
                if run.get("workflow") == "harbor-110-local-repair.yaml"
            ),
            {},
        )
        harbor_110_repair_jobs_api_url = derive_jobs_api_url(
            args.actions_list_api_url,
            latest_harbor_110_repair_run_for_jobs.get("run_id", ""),
        )
        if harbor_110_repair_jobs_api_url:
            harbor_110_repair_jobs_read = fetch_public_url(
                harbor_110_repair_jobs_api_url,
                args.timeout_seconds,
            )
            harbor_110_repair_jobs_http_status = (
                harbor_110_repair_jobs_read.http_status
            )
            harbor_110_repair_jobs_payload = load_json_text(
                harbor_110_repair_jobs_read.text
            )
        else:
            harbor_110_repair_jobs_http_status = 0
            harbor_110_repair_jobs_payload = {"jobs": [], "total_count": 0}

    if args.cd_build_job_log_file:
        cd_build_job_log_http_status = args.cd_build_job_log_http_status or 0
        cd_build_job_log_text = _read_text_file(args.cd_build_job_log_file)
    elif args.skip_cd_build_job_log_read:
        cd_build_job_log_http_status = 0
        cd_build_job_log_text = ""
    else:
        latest_cd_run = next(
            (
                run
                for run in actions_lookup_runs
                if run.get("workflow") == "cd.yaml"
            ),
            {},
        )
        latest_cd_run_id = latest_cd_run.get("run_id", "")
        if latest_cd_run_id:
            log_url = args.cd_build_job_log_url_template.format(
                run_id=latest_cd_run_id,
            )
            cd_build_job_log_read = fetch_public_url(
                log_url,
                args.timeout_seconds,
            )
            cd_build_job_log_http_status = cd_build_job_log_read.http_status
            cd_build_job_log_text = cd_build_job_log_read.text
        else:
            cd_build_job_log_http_status = 0
            cd_build_job_log_text = ""

    if args.cd_tests_job_log_file:
        cd_tests_job_log_http_status = args.cd_tests_job_log_http_status or 0
        cd_tests_job_log_text = _read_text_file(args.cd_tests_job_log_file)
    elif args.skip_cd_tests_job_log_read:
        cd_tests_job_log_http_status = 0
        cd_tests_job_log_text = ""
    else:
        latest_cd_run = next(
            (
                run
                for run in actions_lookup_runs
                if run.get("workflow") == "cd.yaml"
            ),
            {},
        )
        latest_cd_run_id = latest_cd_run.get("run_id", "")
        if latest_cd_run_id:
            tests_log_url = args.cd_tests_job_log_url_template.format(
                run_id=latest_cd_run_id,
            )
            cd_tests_job_log_read = fetch_public_url(
                tests_log_url,
                args.timeout_seconds,
            )
            cd_tests_job_log_http_status = cd_tests_job_log_read.http_status
            cd_tests_job_log_text = cd_tests_job_log_read.text
        else:
            cd_tests_job_log_http_status = 0
            cd_tests_job_log_text = ""

    payload = build_readback(
        actions_html=actions_html,
        cd_workflow_actions_html=cd_workflow_actions_html,
        actions_list_http_status=actions_list_http_status,
        actions_list_payload=actions_list_payload,
        cd_jobs_http_status=cd_jobs_http_status,
        cd_jobs_payload=cd_jobs_payload,
        harbor_110_repair_jobs_http_status=harbor_110_repair_jobs_http_status,
        harbor_110_repair_jobs_payload=harbor_110_repair_jobs_payload,
        latest_cd_build_log_http_status=cd_build_job_log_http_status,
        latest_cd_build_log_text=cd_build_job_log_text,
        latest_cd_tests_log_http_status=cd_tests_job_log_http_status,
        latest_cd_tests_log_text=cd_tests_job_log_text,
    )
    if args.json:
        json.dump(payload, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
        sys.stdout.write("\n")
    else:
        sys.stdout.write(_human_summary(payload))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())