Files
awoooi/ops/runner/read-public-gitea-actions-queue.py
Your Name 77f9bb0417
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 35s
CD Pipeline / build-and-deploy (push) Failing after 2m38s
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 0s
CD Pipeline / post-deploy-checks (push) Has been skipped
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Has been cancelled
fix(recovery): classify harbor repair cross-workflow jobs
2026-07-01 07:38:08 +08:00

1097 lines
44 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import html
import json
import re
import sys
import urllib.error
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from urllib.parse import unquote
DEFAULT_ACTIONS_URL = "https://gitea.wooo.work/wooo/awoooi/actions"
DEFAULT_CD_WORKFLOW_ACTIONS_URL = (
"https://gitea.wooo.work/wooo/awoooi/actions?workflow=cd.yaml&actor=0&status=0"
)
DEFAULT_ACTIONS_LIST_API_URL = (
"https://gitea.wooo.work/api/v1/repos/wooo/awoooi/actions/runs?limit=10"
)
DEFAULT_CD_RUN_JOBS_API_URL = ""
DEFAULT_CD_BUILD_JOB_LOG_URL_TEMPLATE = (
"https://gitea.wooo.work/wooo/awoooi/actions/runs/{run_id}/jobs/3/logs"
)
DEFAULT_CD_TESTS_JOB_LOG_URL_TEMPLATE = (
"https://gitea.wooo.work/wooo/awoooi/actions/runs/{run_id}/jobs/2/logs"
)
SCHEMA_VERSION = "awoooi_public_gitea_actions_queue_readback_v1"
EXPECTED_HARBOR_110_REPAIR_JOB_NAMES = {
"workflow-shape",
"harbor-110-local-repair",
}
CD_WORKFLOW_JOB_NAMES = {
"build-and-deploy",
"post-deploy-checks",
"tests",
}
_RUN_ROW_RE = re.compile(
r'<span data-tooltip-content="([^"]+)">.*?'
r"<span><b>([^<]+)</b>:</span>([^<]+)</div>",
re.S,
)
_RUN_ITEM_SPLIT_RE = re.compile(r'<div class="flex-item tw-items-center">')
_RUN_LINK_RE = re.compile(
r'<a class="flex-item-title" title="([^"]*)" '
r'href="/wooo/awoooi/actions/runs/(\d+)">',
re.S,
)
_RUN_STATUS_RE = re.compile(r'<span data-tooltip-content="([^"]+)">')
_RUN_BODY_RE = re.compile(r"<span><b>([^<]+)</b>:</span>\s*([^<]+)", re.S)
_RUN_COMMIT_RE = re.compile(r"/wooo/awoooi/commit/([0-9a-f]{40})")
_RUN_NAME_RE = re.compile(r"^(?P<workflow>.+)\s+#(?P<run_id>\d+)$")
_WORKFLOW_FILTER_ITEM_RE = re.compile(
r'<a class="item" href="\?workflow=(?P<workflow>[^"&]+)[^"]*">'
r"(?P<body>.*?)</a>",
re.S,
)
_NO_MATCHING_LABEL_RE = re.compile(
r"No matching online runner with label:\s*(?P<label>[A-Za-z0-9_.:-]+)"
)
_HARBOR_ATTEMPT_RE = re.compile(
r"harbor_login_attempt=(?P<attempt>\d+)\s+registry_v2_status=(?P<status>\d{3})"
)
_HARBOR_BLOCKER_RE = re.compile(
r"BLOCKER harbor_registry_public_route_unavailable "
r"registry_v2_status=(?P<status>\d{3})"
)
_HARBOR_CONTROLLED_REPAIR_SKIP_RE = re.compile(
r"harbor_controlled_repair_skipped=(?P<reason>[A-Za-z0-9_.:-]+)"
)
_HARBOR_CONTROLLED_REPAIR_STATUS_RE = re.compile(
r"harbor_controlled_repair_public_registry_v2_status=(?P<status>\d{3})"
)
_HOST_PRESSURE_ATTEMPT_RE = re.compile(
r"host web/build/smoke pressure detected "
r"\(attempt (?P<attempt>\d+)/(?P<limit>\d+)\)"
)
_HOST_PRESSURE_LOAD_RE = re.compile(
r"host load5/core (?P<load>[0-9.]+) > (?P<threshold>[0-9.]+)"
)
_HOST_PRESSURE_REFUSAL_RE = re.compile(
r"refusing to start AWOOI image build while host web/build/smoke pressure is still active"
)
@dataclass(frozen=True)
class HttpRead:
http_status: int
text: str
def fetch_public_url(url: str, timeout_seconds: float) -> HttpRead:
request = urllib.request.Request(
url,
headers={"User-Agent": "awoooi-public-gitea-actions-readback/1.0"},
)
try:
with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
raw = response.read()
status = int(getattr(response, "status", 200))
except urllib.error.HTTPError as exc:
raw = exc.read()
status = int(exc.code)
return HttpRead(
http_status=status,
text=raw.decode("utf-8", errors="replace"),
)
def parse_visible_runs(
actions_html: str,
*,
workflow_hint: str = "",
) -> list[dict[str, str]]:
visible_runs: list[dict[str, str]] = []
for chunk in _RUN_ITEM_SPLIT_RE.split(actions_html)[1:]:
status_match = _RUN_STATUS_RE.search(chunk)
link_match = _RUN_LINK_RE.search(chunk)
body_match = _RUN_BODY_RE.search(chunk)
if not status_match or not link_match or not body_match:
continue
raw_status = status_match.group(1)
raw_title = link_match.group(1)
raw_run_id = link_match.group(2)
raw_name = body_match.group(1)
raw_kind = body_match.group(2)
commit_match = _RUN_COMMIT_RE.search(chunk)
visible_runs.append(
_visible_run_from_parts(
raw_status=raw_status,
raw_name=raw_name,
raw_kind=raw_kind,
raw_run_id=raw_run_id,
raw_title=raw_title,
raw_commit_sha=commit_match.group(1) if commit_match else "",
workflow_hint=workflow_hint,
)
)
if visible_runs:
return visible_runs
for raw_status, raw_name, raw_kind in _RUN_ROW_RE.findall(actions_html):
visible_runs.append(
_visible_run_from_parts(
raw_status=raw_status,
raw_name=raw_name,
raw_kind=raw_kind,
workflow_hint=workflow_hint,
)
)
return visible_runs
def parse_workflow_no_matching_labels(actions_html: str) -> dict[str, str]:
labels: dict[str, str] = {}
for match in _WORKFLOW_FILTER_ITEM_RE.finditer(actions_html):
label_match = _NO_MATCHING_LABEL_RE.search(match.group("body"))
if not label_match:
continue
workflow = html.unescape(unquote(match.group("workflow"))).strip()
labels[workflow] = label_match.group("label")
return labels
def merge_visible_runs(
primary_runs: list[dict[str, str]],
fallback_runs: list[dict[str, str]],
) -> list[dict[str, str]]:
merged: list[dict[str, str]] = []
seen: set[tuple[str, str]] = set()
for run in primary_runs + fallback_runs:
key = (run.get("workflow", ""), run.get("run_id", ""))
if key in seen:
continue
seen.add(key)
merged.append(run)
return merged
def _visible_run_from_parts(
*,
raw_status: str,
raw_name: str,
raw_kind: str,
raw_run_id: str = "",
raw_title: str = "",
raw_commit_sha: str = "",
workflow_hint: str = "",
) -> dict[str, str]:
name = html.unescape(raw_name).strip()
kind = html.unescape(raw_kind).strip()
status = html.unescape(raw_status).strip()
match = _RUN_NAME_RE.match(name)
workflow = match.group("workflow") if match else (workflow_hint or name)
run_id = raw_run_id or (match.group("run_id") if match else "")
label_match = _NO_MATCHING_LABEL_RE.search(status)
return {
"run_id": run_id,
"workflow": workflow,
"kind": kind,
"status": status,
"title": html.unescape(raw_title).strip(),
"commit_sha": raw_commit_sha,
"no_matching_runner_label": (
label_match.group("label") if label_match else ""
),
}
def build_readback(
*,
actions_html: str,
cd_workflow_actions_html: str = "",
actions_list_http_status: int,
actions_list_payload: Any,
cd_jobs_http_status: int,
cd_jobs_payload: Any,
harbor_110_repair_jobs_http_status: int = 0,
harbor_110_repair_jobs_payload: Any | None = None,
latest_cd_build_log_http_status: int = 0,
latest_cd_build_log_text: str = "",
latest_cd_tests_log_http_status: int = 0,
latest_cd_tests_log_text: str = "",
) -> dict[str, Any]:
visible_runs = parse_visible_runs(actions_html)
cd_workflow_visible_runs = parse_visible_runs(
cd_workflow_actions_html,
workflow_hint="cd.yaml",
)
combined_visible_runs = merge_visible_runs(visible_runs, cd_workflow_visible_runs)
workflow_no_matching_labels = parse_workflow_no_matching_labels(actions_html)
no_matching = next(
(run for run in combined_visible_runs if run["no_matching_runner_label"]),
{},
)
workflow_no_matching = next(
(
{"workflow": workflow, "no_matching_runner_label": label}
for workflow, label in workflow_no_matching_labels.items()
),
{},
)
latest_cd_run = next(
(run for run in combined_visible_runs if run.get("workflow") == "cd.yaml"),
{},
)
latest_harbor_110_repair_run = next(
(
run
for run in combined_visible_runs
if run.get("workflow") == "harbor-110-local-repair.yaml"
),
{},
)
cd_workflow_fallback_used = bool(
latest_cd_run
and not any(run.get("workflow") == "cd.yaml" for run in visible_runs)
)
cd_jobs = cd_jobs_payload if isinstance(cd_jobs_payload, dict) else {}
harbor_110_repair_jobs = (
harbor_110_repair_jobs_payload
if isinstance(harbor_110_repair_jobs_payload, dict)
else {}
)
actions_list = actions_list_payload if isinstance(actions_list_payload, dict) else {}
actions_list_message = str(actions_list.get("message") or "")
jobs_total_count = _int(cd_jobs.get("total_count"))
jobs = cd_jobs.get("jobs") if isinstance(cd_jobs.get("jobs"), list) else []
harbor_jobs_total_count = _int(harbor_110_repair_jobs.get("total_count"))
harbor_jobs = (
harbor_110_repair_jobs.get("jobs")
if isinstance(harbor_110_repair_jobs.get("jobs"), list)
else []
)
latest_cd_run_id = latest_cd_run.get("run_id", "")
latest_cd_commit_sha = latest_cd_run.get("commit_sha", "")
job_head_shas = sorted(
{
str(job.get("head_sha") or "")
for job in jobs
if isinstance(job, dict) and job.get("head_sha")
}
)
job_run_ids = sorted(
{
str(job.get("run_id") or "")
for job in jobs
if isinstance(job, dict) and job.get("run_id") is not None
}
)
job_conclusion_counts: dict[str, int] = {}
for job in jobs:
if isinstance(job, dict):
conclusion = str(job.get("conclusion") or job.get("status") or "unknown")
job_conclusion_counts[conclusion] = job_conclusion_counts.get(conclusion, 0) + 1
harbor_job_conclusion_counts: dict[str, int] = {}
harbor_job_run_ids: set[str] = set()
harbor_job_names: set[str] = set()
harbor_job_labels: set[str] = set()
harbor_job_runner_names: set[str] = set()
for job in harbor_jobs:
if not isinstance(job, dict):
continue
job_name = str(job.get("name") or "")
if job_name:
harbor_job_names.add(job_name)
conclusion = str(job.get("conclusion") or job.get("status") or "unknown")
harbor_job_conclusion_counts[conclusion] = (
harbor_job_conclusion_counts.get(conclusion, 0) + 1
)
if job.get("run_id") is not None:
harbor_job_run_ids.add(str(job.get("run_id")))
labels = job.get("labels")
if isinstance(labels, list):
harbor_job_labels.update(str(label) for label in labels if label)
runner_name = str(job.get("runner_name") or "")
if runner_name:
harbor_job_runner_names.add(runner_name)
cd_jobs_head_sha_matches_visible = (
bool(latest_cd_commit_sha)
and latest_cd_commit_sha in job_head_shas
)
cd_jobs_run_id_matches_visible = (
bool(latest_cd_run_id)
and latest_cd_run_id in job_run_ids
)
cd_jobs_stale_or_mismatched = (
cd_jobs_http_status == 200
and jobs_total_count > 0
and bool(latest_cd_run)
and (
not cd_jobs_head_sha_matches_visible
or not cd_jobs_run_id_matches_visible
)
)
build_log_classifier = classify_cd_build_log(latest_cd_build_log_text)
tests_log_classifier = classify_cd_tests_log(latest_cd_tests_log_text)
latest_cd_visible_blocked = latest_cd_run.get("status", "") == "Blocked"
harbor_110_repair_status = latest_harbor_110_repair_run.get("status", "")
harbor_110_repair_run_id = latest_harbor_110_repair_run.get("run_id", "")
harbor_110_repair_no_matching_runner_label = (
latest_harbor_110_repair_run.get("no_matching_runner_label", "")
or workflow_no_matching_labels.get("harbor-110-local-repair.yaml", "")
)
harbor_110_repair_waiting = harbor_110_repair_status == "Waiting"
harbor_110_repair_running = harbor_110_repair_status == "Running"
harbor_110_repair_status_blocked = harbor_110_repair_status == "Blocked"
harbor_110_repair_jobs_run_id_matches_visible = (
bool(harbor_110_repair_run_id)
and harbor_110_repair_run_id in harbor_job_run_ids
)
harbor_110_repair_jobs_unexpected_names = sorted(
harbor_job_names - EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
)
harbor_110_repair_jobs_cross_workflow_mismatch = (
bool(harbor_job_names)
and harbor_job_names.issubset(CD_WORKFLOW_JOB_NAMES)
)
harbor_110_repair_jobs_payload_classifier = (
"cd_workflow_jobs_returned_for_harbor_110_repair_run"
if harbor_110_repair_jobs_cross_workflow_mismatch
else "unexpected_harbor_110_repair_job_names"
if harbor_110_repair_jobs_unexpected_names
else ""
)
harbor_110_repair_jobs_match_expected_workflow = (
bool(harbor_job_names)
and not harbor_110_repair_jobs_unexpected_names
and "harbor-110-local-repair" in harbor_job_names
)
harbor_110_repair_jobs_stale_or_mismatched = (
harbor_110_repair_jobs_http_status == 200
and harbor_jobs_total_count > 0
and not harbor_110_repair_jobs_match_expected_workflow
)
harbor_110_repair_jobs_all_success = (
harbor_110_repair_jobs_http_status == 200
and harbor_jobs_total_count > 0
and harbor_110_repair_jobs_match_expected_workflow
and harbor_job_conclusion_counts.get("success") == harbor_jobs_total_count
and harbor_110_repair_jobs_run_id_matches_visible
)
harbor_110_repair_visible_waiting_stale = (
harbor_110_repair_waiting and harbor_110_repair_jobs_all_success
)
harbor_110_repair_waiting_after_cd_harbor_blocker = (
build_log_classifier["harbor_public_route_blocked_or_retrying"]
and harbor_110_repair_waiting
)
harbor_110_repair_blocked = (
harbor_110_repair_status_blocked
or bool(harbor_110_repair_no_matching_runner_label)
or harbor_110_repair_waiting_after_cd_harbor_blocker
)
readback = {
"actions_page_visible_run_count": len(visible_runs),
"cd_workflow_actions_page_visible_run_count": len(cd_workflow_visible_runs),
"cd_workflow_fallback_used": cd_workflow_fallback_used,
"actions_list_without_token_http_status": actions_list_http_status,
"actions_list_without_token_message": actions_list_message,
"cd_run_jobs_http_status": cd_jobs_http_status,
"cd_run_jobs_total_count": jobs_total_count,
"cd_run_jobs_head_shas": job_head_shas,
"cd_run_jobs_run_ids": job_run_ids,
"cd_run_jobs_conclusion_counts": job_conclusion_counts,
"cd_run_jobs_head_sha_matches_visible": cd_jobs_head_sha_matches_visible,
"cd_run_jobs_run_id_matches_visible": cd_jobs_run_id_matches_visible,
"cd_run_jobs_stale_or_mismatched": cd_jobs_stale_or_mismatched,
"latest_visible_no_matching_runner_run_id": no_matching.get("run_id", ""),
"latest_visible_no_matching_runner_workflow": no_matching.get(
"workflow", workflow_no_matching.get("workflow", "")
),
"latest_visible_no_matching_runner_kind": no_matching.get("kind", ""),
"latest_visible_no_matching_runner_status": no_matching.get("status", ""),
"latest_visible_no_matching_runner_label": no_matching.get(
"no_matching_runner_label",
workflow_no_matching.get("no_matching_runner_label", ""),
),
"workflow_no_matching_runner_labels": workflow_no_matching_labels,
"latest_visible_cd_run_id": latest_cd_run.get("run_id", ""),
"latest_visible_cd_run_status": latest_cd_run.get("status", ""),
"latest_visible_cd_run_blocked": latest_cd_visible_blocked,
"latest_visible_cd_run_kind": latest_cd_run.get("kind", ""),
"latest_visible_cd_run_title": latest_cd_run.get("title", ""),
"latest_visible_cd_run_commit_sha": latest_cd_run.get("commit_sha", ""),
"latest_visible_cd_build_log_http_status": latest_cd_build_log_http_status,
"latest_visible_cd_failure_classifier": build_log_classifier[
"failure_classifier"
],
"latest_visible_cd_failure_status_code": build_log_classifier[
"failure_status_code"
],
"latest_visible_cd_inflight_classifier": build_log_classifier[
"inflight_classifier"
],
"latest_visible_cd_harbor_latest_registry_v2_status": (
build_log_classifier["harbor_latest_registry_v2_status"]
),
"latest_visible_cd_harbor_login_attempt_count": build_log_classifier[
"harbor_login_attempt_count"
],
"latest_visible_cd_harbor_controlled_repair_attempted": build_log_classifier[
"harbor_controlled_repair_attempted"
],
"latest_visible_cd_harbor_controlled_repair_skip_reason": build_log_classifier[
"harbor_controlled_repair_skip_reason"
],
"latest_visible_cd_harbor_controlled_repair_public_registry_v2_status": (
build_log_classifier[
"harbor_controlled_repair_public_registry_v2_status"
]
),
"latest_visible_cd_harbor_public_route_blocked": build_log_classifier[
"harbor_public_route_blocked"
],
"latest_visible_cd_harbor_public_route_retrying_unavailable": (
build_log_classifier["harbor_public_route_retrying_unavailable"]
),
"latest_visible_cd_tests_log_http_status": latest_cd_tests_log_http_status,
"latest_visible_harbor_110_repair_run_id": (
latest_harbor_110_repair_run.get("run_id", "")
),
"latest_visible_harbor_110_repair_run_status": harbor_110_repair_status,
"latest_visible_harbor_110_repair_run_kind": (
latest_harbor_110_repair_run.get("kind", "")
),
"latest_visible_harbor_110_repair_run_title": (
latest_harbor_110_repair_run.get("title", "")
),
"latest_visible_harbor_110_repair_run_commit_sha": (
latest_harbor_110_repair_run.get("commit_sha", "")
),
"latest_visible_harbor_110_repair_no_matching_runner_label": (
harbor_110_repair_no_matching_runner_label
),
"latest_visible_harbor_110_repair_waiting": harbor_110_repair_waiting,
"latest_visible_harbor_110_repair_running": harbor_110_repair_running,
"latest_visible_harbor_110_repair_status_blocked": (
harbor_110_repair_status_blocked
),
"latest_visible_harbor_110_repair_blocked": harbor_110_repair_blocked,
"harbor_110_repair_waiting_after_cd_harbor_blocker": (
harbor_110_repair_waiting_after_cd_harbor_blocker
),
"harbor_110_repair_jobs_http_status": harbor_110_repair_jobs_http_status,
"harbor_110_repair_jobs_total_count": harbor_jobs_total_count,
"harbor_110_repair_jobs_conclusion_counts": harbor_job_conclusion_counts,
"harbor_110_repair_jobs_run_ids": sorted(harbor_job_run_ids),
"harbor_110_repair_jobs_names": sorted(harbor_job_names),
"harbor_110_repair_jobs_expected_names": sorted(
EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
),
"harbor_110_repair_jobs_unexpected_names": (
harbor_110_repair_jobs_unexpected_names
),
"harbor_110_repair_jobs_cross_workflow_mismatch": (
harbor_110_repair_jobs_cross_workflow_mismatch
),
"harbor_110_repair_jobs_payload_classifier": (
harbor_110_repair_jobs_payload_classifier
),
"harbor_110_repair_jobs_labels": sorted(harbor_job_labels),
"harbor_110_repair_jobs_runner_names": sorted(harbor_job_runner_names),
"harbor_110_repair_jobs_run_id_matches_visible": (
harbor_110_repair_jobs_run_id_matches_visible
),
"harbor_110_repair_jobs_match_expected_workflow": (
harbor_110_repair_jobs_match_expected_workflow
),
"harbor_110_repair_jobs_stale_or_mismatched": (
harbor_110_repair_jobs_stale_or_mismatched
),
"harbor_110_repair_jobs_all_success": harbor_110_repair_jobs_all_success,
"harbor_110_repair_visible_waiting_stale": (
harbor_110_repair_visible_waiting_stale
),
"latest_visible_cd_host_pressure_classifier": tests_log_classifier[
"host_pressure_classifier"
],
"latest_visible_cd_host_pressure_attempt_count": tests_log_classifier[
"host_pressure_attempt_count"
],
"latest_visible_cd_host_pressure_attempt_limit": tests_log_classifier[
"host_pressure_attempt_limit"
],
"latest_visible_cd_host_pressure_latest_load5_per_core": tests_log_classifier[
"latest_load5_per_core"
],
"latest_visible_cd_host_pressure_load5_threshold": tests_log_classifier[
"load5_per_core_threshold"
],
"latest_visible_cd_host_pressure_waiting": tests_log_classifier[
"host_pressure_waiting"
],
"latest_visible_cd_host_pressure_refused": tests_log_classifier[
"host_pressure_refused"
],
"no_matching_online_runner_visible": bool(no_matching)
or bool(workflow_no_matching_labels),
"top_visible_runs": combined_visible_runs[:10],
"cd_workflow_visible_runs": cd_workflow_visible_runs[:10],
}
return {
"schema_version": SCHEMA_VERSION,
"status": (
"blocked_no_matching_online_runner"
if no_matching
else "blocked_latest_visible_cd_run"
if latest_cd_visible_blocked
else (
"blocked_harbor_public_route_unavailable_after_harbor_110_repair_success"
)
if (
build_log_classifier["harbor_public_route_blocked_or_retrying"]
and harbor_110_repair_jobs_all_success
)
else "blocked_harbor_110_repair_no_matching_runner"
if (
build_log_classifier["harbor_public_route_blocked_or_retrying"]
and harbor_110_repair_no_matching_runner_label
)
else "blocked_harbor_110_repair_jobs_stale_or_mismatched"
if (
build_log_classifier["harbor_public_route_blocked_or_retrying"]
and harbor_110_repair_jobs_stale_or_mismatched
)
else "blocked_harbor_110_repair_workflow_waiting"
if harbor_110_repair_waiting_after_cd_harbor_blocker
else "blocked_harbor_public_route_unavailable"
if build_log_classifier["harbor_public_route_blocked"]
else "blocked_harbor_public_route_unavailable_pending_retry"
if build_log_classifier["harbor_public_route_retrying_unavailable"]
else "blocked_host_web_build_pressure"
if tests_log_classifier["host_pressure_blocked_or_waiting"]
else "blocked_harbor_110_repair_no_matching_runner"
if harbor_110_repair_no_matching_runner_label
else "blocked_no_matching_online_runner"
if workflow_no_matching
else "harbor_110_repair_jobs_stale_or_mismatched"
if harbor_110_repair_jobs_stale_or_mismatched
else "harbor_110_repair_waiting_for_runner_or_queue"
if harbor_110_repair_waiting
else "harbor_110_repair_running"
if harbor_110_repair_running
else "blocked_harbor_110_repair_run"
if harbor_110_repair_blocked
else "cd_jobs_stale_or_mismatched"
if cd_jobs_stale_or_mismatched
else "no_matching_runner_not_visible"
),
"readback": readback,
"rollups": {
"public_actions_readback_count": len(visible_runs),
"cd_workflow_actions_readback_count": len(cd_workflow_visible_runs),
"cd_workflow_fallback_used": cd_workflow_fallback_used,
"actions_list_requires_token": actions_list_http_status == 401,
"cd_run_jobs_total_count": jobs_total_count,
"cd_run_jobs_stale_or_mismatched": cd_jobs_stale_or_mismatched,
"current_main_cd_run_visible": bool(latest_cd_run),
"current_main_cd_run_status": latest_cd_run.get("status", ""),
"current_main_cd_run_blocked": latest_cd_visible_blocked,
"current_main_cd_failure_classifier": build_log_classifier[
"failure_classifier"
],
"current_main_cd_inflight_classifier": build_log_classifier[
"inflight_classifier"
],
"current_main_cd_harbor_public_route_blocked": build_log_classifier[
"harbor_public_route_blocked"
],
"current_main_cd_harbor_public_route_retrying_unavailable": (
build_log_classifier["harbor_public_route_retrying_unavailable"]
),
"current_main_cd_harbor_latest_registry_v2_status": (
build_log_classifier["harbor_latest_registry_v2_status"]
),
"current_main_cd_harbor_controlled_repair_attempted": build_log_classifier[
"harbor_controlled_repair_attempted"
],
"current_main_cd_harbor_controlled_repair_skip_reason": (
build_log_classifier["harbor_controlled_repair_skip_reason"]
),
"current_main_cd_host_pressure_classifier": tests_log_classifier[
"host_pressure_classifier"
],
"current_main_cd_host_pressure_waiting": tests_log_classifier[
"host_pressure_waiting"
],
"current_main_cd_host_pressure_refused": tests_log_classifier[
"host_pressure_refused"
],
"no_matching_online_runner_visible": bool(no_matching)
or bool(workflow_no_matching_labels),
"harbor_110_repair_run_visible": bool(latest_harbor_110_repair_run),
"harbor_110_repair_run_status": harbor_110_repair_status,
"harbor_110_repair_waiting": harbor_110_repair_waiting,
"harbor_110_repair_running": harbor_110_repair_running,
"harbor_110_repair_blocked": harbor_110_repair_blocked,
"harbor_110_repair_waiting_after_cd_harbor_blocker": (
harbor_110_repair_waiting_after_cd_harbor_blocker
),
"harbor_110_repair_no_matching_runner_label": (
harbor_110_repair_no_matching_runner_label
),
"harbor_110_repair_jobs_total_count": harbor_jobs_total_count,
"harbor_110_repair_jobs_names": sorted(harbor_job_names),
"harbor_110_repair_jobs_expected_names": sorted(
EXPECTED_HARBOR_110_REPAIR_JOB_NAMES
),
"harbor_110_repair_jobs_stale_or_mismatched": (
harbor_110_repair_jobs_stale_or_mismatched
),
"harbor_110_repair_jobs_cross_workflow_mismatch": (
harbor_110_repair_jobs_cross_workflow_mismatch
),
"harbor_110_repair_jobs_payload_classifier": (
harbor_110_repair_jobs_payload_classifier
),
"harbor_110_repair_jobs_unexpected_names": (
harbor_110_repair_jobs_unexpected_names
),
"harbor_110_repair_jobs_all_success": (
harbor_110_repair_jobs_all_success
),
"harbor_110_repair_jobs_runner_names": sorted(harbor_job_runner_names),
"harbor_110_repair_visible_waiting_stale": (
harbor_110_repair_visible_waiting_stale
),
},
"operation_boundaries": {
"public_gitea_read_only": True,
"token_required_but_not_collected": actions_list_http_status == 401,
"gitea_api_write_performed": False,
"workflow_dispatch_performed": False,
"host_write_performed": False,
"runner_registration_performed": False,
"runner_service_start_performed": False,
"secret_or_runner_token_read": False,
"github_api_used": False,
},
}
def classify_cd_build_log(text: str) -> dict[str, Any]:
attempt_statuses: list[str] = []
attempt_numbers: list[int] = []
for match in _HARBOR_ATTEMPT_RE.finditer(text):
attempt_numbers.append(_int(match.group("attempt")))
attempt_statuses.append(match.group("status"))
blocker_match = _HARBOR_BLOCKER_RE.search(text)
harbor_public_route_blocked = blocker_match is not None
failure_status_code = blocker_match.group("status") if blocker_match else ""
harbor_latest_registry_v2_status = attempt_statuses[-1] if attempt_statuses else ""
harbor_public_route_retrying_unavailable = (
not harbor_public_route_blocked
and bool(attempt_statuses)
and harbor_latest_registry_v2_status not in {"200", "401"}
)
repair_skip_matches = list(_HARBOR_CONTROLLED_REPAIR_SKIP_RE.finditer(text))
repair_status_matches = list(_HARBOR_CONTROLLED_REPAIR_STATUS_RE.finditer(text))
repair_attempted = (
"harbor_controlled_repair_check_start=1" in text
or bool(repair_skip_matches)
or bool(repair_status_matches)
)
return {
"failure_classifier": (
"harbor_registry_public_route_unavailable"
if harbor_public_route_blocked
else ""
),
"failure_status_code": failure_status_code,
"inflight_classifier": (
"harbor_registry_public_route_unavailable_pending_retry"
if harbor_public_route_retrying_unavailable
else ""
),
"harbor_latest_registry_v2_status": harbor_latest_registry_v2_status,
"harbor_login_attempt_count": max(attempt_numbers) if attempt_numbers else 0,
"harbor_controlled_repair_attempted": repair_attempted,
"harbor_controlled_repair_skip_reason": (
repair_skip_matches[-1].group("reason") if repair_skip_matches else ""
),
"harbor_controlled_repair_public_registry_v2_status": (
repair_status_matches[-1].group("status") if repair_status_matches else ""
),
"harbor_public_route_blocked": harbor_public_route_blocked,
"harbor_public_route_retrying_unavailable": (
harbor_public_route_retrying_unavailable
),
"harbor_public_route_blocked_or_retrying": (
harbor_public_route_blocked or harbor_public_route_retrying_unavailable
),
"harbor_registry_v2_statuses": attempt_statuses[-12:],
}
def classify_cd_tests_log(text: str) -> dict[str, Any]:
attempt_numbers: list[int] = []
attempt_limits: list[int] = []
for match in _HOST_PRESSURE_ATTEMPT_RE.finditer(text):
attempt_numbers.append(_int(match.group("attempt")))
attempt_limits.append(_int(match.group("limit")))
latest_load = ""
latest_threshold = ""
for match in _HOST_PRESSURE_LOAD_RE.finditer(text):
latest_load = match.group("load")
latest_threshold = match.group("threshold")
host_pressure_refused = _HOST_PRESSURE_REFUSAL_RE.search(text) is not None
host_pressure_waiting = bool(attempt_numbers) and not host_pressure_refused
host_pressure_blocked_or_waiting = host_pressure_waiting or host_pressure_refused
return {
"host_pressure_classifier": (
"host_web_build_pressure_refused"
if host_pressure_refused
else "host_web_build_pressure_waiting"
if host_pressure_waiting
else ""
),
"host_pressure_attempt_count": max(attempt_numbers)
if attempt_numbers
else 0,
"host_pressure_attempt_limit": max(attempt_limits) if attempt_limits else 0,
"latest_load5_per_core": latest_load,
"load5_per_core_threshold": latest_threshold,
"host_pressure_waiting": host_pressure_waiting,
"host_pressure_refused": host_pressure_refused,
"host_pressure_blocked_or_waiting": host_pressure_blocked_or_waiting,
}
def load_json_text(text: str) -> Any:
try:
return json.loads(text)
except json.JSONDecodeError:
return {"message": text.strip()}
def load_json_file(path: Path) -> Any:
return load_json_text(path.read_text(encoding="utf-8"))
def derive_jobs_api_url(actions_list_api_url: str, run_id: str) -> str:
if not run_id:
return ""
return re.sub(
r"/actions/runs(?:\?.*)?$",
f"/actions/runs/{run_id}/jobs",
actions_list_api_url,
)
def _int(value: Any) -> int:
try:
return int(value)
except (TypeError, ValueError):
return 0
def _read_text_file(path: Path) -> str:
return path.read_text(encoding="utf-8")
def _human_summary(payload: dict[str, Any]) -> str:
readback = payload["readback"]
lines = [
f"AWOOOI_PUBLIC_GITEA_ACTIONS_QUEUE_STATUS={payload['status']}",
(
"ACTIONS_LIST_WITHOUT_TOKEN_HTTP_STATUS="
f"{readback['actions_list_without_token_http_status']}"
),
f"CD_RUN_JOBS_TOTAL_COUNT={readback['cd_run_jobs_total_count']}",
(
"NO_MATCHING_ONLINE_RUNNER_VISIBLE="
f"{int(readback['no_matching_online_runner_visible'])}"
),
(
"LATEST_NO_MATCHING_RUNNER_LABEL="
f"{readback['latest_visible_no_matching_runner_label']}"
),
f"LATEST_VISIBLE_CD_RUN_ID={readback['latest_visible_cd_run_id']}",
f"LATEST_VISIBLE_CD_RUN_STATUS={readback['latest_visible_cd_run_status']}",
(
"LATEST_VISIBLE_CD_FAILURE_CLASSIFIER="
f"{readback['latest_visible_cd_failure_classifier']}"
),
(
"LATEST_VISIBLE_CD_INFLIGHT_CLASSIFIER="
f"{readback['latest_visible_cd_inflight_classifier']}"
),
(
"LATEST_VISIBLE_CD_HARBOR_LATEST_REGISTRY_V2_STATUS="
f"{readback['latest_visible_cd_harbor_latest_registry_v2_status']}"
),
(
"LATEST_VISIBLE_CD_HOST_PRESSURE_CLASSIFIER="
f"{readback['latest_visible_cd_host_pressure_classifier']}"
),
(
"LATEST_VISIBLE_HARBOR_110_REPAIR_RUN_ID="
f"{readback['latest_visible_harbor_110_repair_run_id']}"
),
(
"LATEST_VISIBLE_HARBOR_110_REPAIR_RUN_STATUS="
f"{readback['latest_visible_harbor_110_repair_run_status']}"
),
(
"LATEST_VISIBLE_HARBOR_110_REPAIR_NO_MATCHING_RUNNER_LABEL="
f"{readback['latest_visible_harbor_110_repair_no_matching_runner_label']}"
),
(
"HARBOR_110_REPAIR_WAITING_AFTER_CD_HARBOR_BLOCKER="
f"{int(readback['harbor_110_repair_waiting_after_cd_harbor_blocker'])}"
),
(
"HARBOR_110_REPAIR_JOBS_ALL_SUCCESS="
f"{int(readback['harbor_110_repair_jobs_all_success'])}"
),
(
"HARBOR_110_REPAIR_JOBS_STALE_OR_MISMATCHED="
f"{int(readback['harbor_110_repair_jobs_stale_or_mismatched'])}"
),
(
"HARBOR_110_REPAIR_VISIBLE_WAITING_STALE="
f"{int(readback['harbor_110_repair_visible_waiting_stale'])}"
),
"WRITE_PERFORMED=false",
"TOKEN_COLLECTED=false",
]
return "\n".join(lines) + "\n"
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description=(
"Read public Gitea Actions queue state without credentials, dispatch, "
"runner registration, host access, or secret reads."
)
)
parser.add_argument("--actions-url", default=DEFAULT_ACTIONS_URL)
parser.add_argument(
"--cd-workflow-actions-url",
default=DEFAULT_CD_WORKFLOW_ACTIONS_URL,
)
parser.add_argument("--actions-list-api-url", default=DEFAULT_ACTIONS_LIST_API_URL)
parser.add_argument("--cd-run-jobs-api-url", default=DEFAULT_CD_RUN_JOBS_API_URL)
parser.add_argument(
"--cd-build-job-log-url-template",
default=DEFAULT_CD_BUILD_JOB_LOG_URL_TEMPLATE,
)
parser.add_argument(
"--cd-tests-job-log-url-template",
default=DEFAULT_CD_TESTS_JOB_LOG_URL_TEMPLATE,
)
parser.add_argument("--timeout-seconds", type=float, default=10.0)
parser.add_argument("--actions-html-file", type=Path)
parser.add_argument("--cd-workflow-actions-html-file", type=Path)
parser.add_argument("--actions-list-json-file", type=Path)
parser.add_argument("--actions-list-http-status", type=int)
parser.add_argument("--cd-run-jobs-json-file", type=Path)
parser.add_argument("--cd-run-jobs-http-status", type=int)
parser.add_argument("--harbor-110-repair-jobs-json-file", type=Path)
parser.add_argument("--harbor-110-repair-jobs-http-status", type=int)
parser.add_argument("--cd-build-job-log-file", type=Path)
parser.add_argument("--cd-build-job-log-http-status", type=int)
parser.add_argument("--cd-tests-job-log-file", type=Path)
parser.add_argument("--cd-tests-job-log-http-status", type=int)
parser.add_argument("--skip-cd-build-job-log-read", action="store_true")
parser.add_argument("--skip-cd-tests-job-log-read", action="store_true")
parser.add_argument("--json", action="store_true")
args = parser.parse_args(argv)
if args.actions_html_file:
actions_html = _read_text_file(args.actions_html_file)
else:
actions_html = fetch_public_url(args.actions_url, args.timeout_seconds).text
if args.cd_workflow_actions_html_file:
cd_workflow_actions_html = _read_text_file(args.cd_workflow_actions_html_file)
elif args.actions_html_file:
cd_workflow_actions_html = ""
elif any(run.get("workflow") == "cd.yaml" for run in parse_visible_runs(actions_html)):
cd_workflow_actions_html = ""
else:
cd_workflow_actions_html = fetch_public_url(
args.cd_workflow_actions_url,
args.timeout_seconds,
).text
actions_lookup_runs = merge_visible_runs(
parse_visible_runs(actions_html),
parse_visible_runs(cd_workflow_actions_html, workflow_hint="cd.yaml"),
)
if args.actions_list_json_file:
actions_list_http_status = args.actions_list_http_status or 0
actions_list_payload = load_json_file(args.actions_list_json_file)
else:
actions_list_read = fetch_public_url(
args.actions_list_api_url,
args.timeout_seconds,
)
actions_list_http_status = actions_list_read.http_status
actions_list_payload = load_json_text(actions_list_read.text)
if args.cd_run_jobs_json_file:
cd_jobs_http_status = args.cd_run_jobs_http_status or 0
cd_jobs_payload = load_json_file(args.cd_run_jobs_json_file)
else:
visible_runs_for_jobs = actions_lookup_runs
latest_cd_run_for_jobs = next(
(run for run in visible_runs_for_jobs if run.get("workflow") == "cd.yaml"),
{},
)
cd_run_jobs_api_url = args.cd_run_jobs_api_url or derive_jobs_api_url(
args.actions_list_api_url,
latest_cd_run_for_jobs.get("run_id", ""),
)
if cd_run_jobs_api_url:
cd_jobs_read = fetch_public_url(
cd_run_jobs_api_url,
args.timeout_seconds,
)
cd_jobs_http_status = cd_jobs_read.http_status
cd_jobs_payload = load_json_text(cd_jobs_read.text)
else:
cd_jobs_http_status = 0
cd_jobs_payload = {"jobs": [], "total_count": 0}
if args.harbor_110_repair_jobs_json_file:
harbor_110_repair_jobs_http_status = (
args.harbor_110_repair_jobs_http_status or 0
)
harbor_110_repair_jobs_payload = load_json_file(
args.harbor_110_repair_jobs_json_file
)
else:
visible_runs_for_harbor_jobs = actions_lookup_runs
latest_harbor_110_repair_run_for_jobs = next(
(
run
for run in visible_runs_for_harbor_jobs
if run.get("workflow") == "harbor-110-local-repair.yaml"
),
{},
)
harbor_110_repair_jobs_api_url = derive_jobs_api_url(
args.actions_list_api_url,
latest_harbor_110_repair_run_for_jobs.get("run_id", ""),
)
if harbor_110_repair_jobs_api_url:
harbor_110_repair_jobs_read = fetch_public_url(
harbor_110_repair_jobs_api_url,
args.timeout_seconds,
)
harbor_110_repair_jobs_http_status = (
harbor_110_repair_jobs_read.http_status
)
harbor_110_repair_jobs_payload = load_json_text(
harbor_110_repair_jobs_read.text
)
else:
harbor_110_repair_jobs_http_status = 0
harbor_110_repair_jobs_payload = {"jobs": [], "total_count": 0}
if args.cd_build_job_log_file:
cd_build_job_log_http_status = args.cd_build_job_log_http_status or 0
cd_build_job_log_text = _read_text_file(args.cd_build_job_log_file)
elif args.skip_cd_build_job_log_read:
cd_build_job_log_http_status = 0
cd_build_job_log_text = ""
else:
latest_cd_run = next(
(
run
for run in actions_lookup_runs
if run.get("workflow") == "cd.yaml"
),
{},
)
latest_cd_run_id = latest_cd_run.get("run_id", "")
if latest_cd_run_id:
log_url = args.cd_build_job_log_url_template.format(
run_id=latest_cd_run_id,
)
cd_build_job_log_read = fetch_public_url(
log_url,
args.timeout_seconds,
)
cd_build_job_log_http_status = cd_build_job_log_read.http_status
cd_build_job_log_text = cd_build_job_log_read.text
else:
cd_build_job_log_http_status = 0
cd_build_job_log_text = ""
if args.cd_tests_job_log_file:
cd_tests_job_log_http_status = args.cd_tests_job_log_http_status or 0
cd_tests_job_log_text = _read_text_file(args.cd_tests_job_log_file)
elif args.skip_cd_tests_job_log_read:
cd_tests_job_log_http_status = 0
cd_tests_job_log_text = ""
else:
latest_cd_run = next(
(
run
for run in actions_lookup_runs
if run.get("workflow") == "cd.yaml"
),
{},
)
latest_cd_run_id = latest_cd_run.get("run_id", "")
if latest_cd_run_id:
tests_log_url = args.cd_tests_job_log_url_template.format(
run_id=latest_cd_run_id,
)
cd_tests_job_log_read = fetch_public_url(
tests_log_url,
args.timeout_seconds,
)
cd_tests_job_log_http_status = cd_tests_job_log_read.http_status
cd_tests_job_log_text = cd_tests_job_log_read.text
else:
cd_tests_job_log_http_status = 0
cd_tests_job_log_text = ""
payload = build_readback(
actions_html=actions_html,
cd_workflow_actions_html=cd_workflow_actions_html,
actions_list_http_status=actions_list_http_status,
actions_list_payload=actions_list_payload,
cd_jobs_http_status=cd_jobs_http_status,
cd_jobs_payload=cd_jobs_payload,
harbor_110_repair_jobs_http_status=harbor_110_repair_jobs_http_status,
harbor_110_repair_jobs_payload=harbor_110_repair_jobs_payload,
latest_cd_build_log_http_status=cd_build_job_log_http_status,
latest_cd_build_log_text=cd_build_job_log_text,
latest_cd_tests_log_http_status=cd_tests_job_log_http_status,
latest_cd_tests_log_text=cd_tests_job_log_text,
)
if args.json:
json.dump(payload, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
sys.stdout.write("\n")
else:
sys.stdout.write(_human_summary(payload))
return 0
if __name__ == "__main__":
raise SystemExit(main())