Files
awoooi/ops/runner/audit-workflow-labels.py
2026-05-24 09:52:04 +08:00

260 lines
8.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""Read-only inventory for Gitea workflow runner labels.
The script never prints credentials. It reads workflow files from Gitea when
GITEA_BASE/GITEA_USER/GITEA_TOKEN are available, or derives them from the
current repository's `gitea` remote when that remote embeds basic auth.
Example:
ops/runner/audit-workflow-labels.py \
--local-repo wooo/stockplatform-v2=/Users/ogt/stockplatform-v2
"""
from __future__ import annotations
import argparse
import base64
import json
import re
import subprocess
import sys
import urllib.error
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
DEFAULT_REPOS = ("wooo/awoooi", "wooo/ewoooc", "wooo/stockplatform-v2")
WORKFLOW_DIRS = (".gitea/workflows",)
RUNS_ON_RE = re.compile(r"^\s*runs-on:\s*(?P<label>.+?)\s*(?:#.*)?$")
@dataclass(frozen=True)
class GiteaAuth:
base: str
user: str
token: str
@dataclass(frozen=True)
class WorkflowLabel:
repo: str
source: str
branch: str
file_path: str
line_number: int
label: str
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--repo",
action="append",
dest="repos",
help="Repository in owner/name form. Defaults to AWOOI, EwoooC, stockplatform-v2.",
)
parser.add_argument("--branch", default="main", help="Branch/ref to inspect. Default: main.")
parser.add_argument(
"--local-repo",
action="append",
default=[],
metavar="OWNER/NAME=PATH",
help="Local fallback repository path used when Gitea content is unavailable.",
)
return parser.parse_args()
def derive_gitea_auth() -> GiteaAuth | None:
try:
remote_url = subprocess.check_output(
["git", "remote", "get-url", "gitea"],
text=True,
stderr=subprocess.DEVNULL,
).strip()
except (OSError, subprocess.CalledProcessError):
return None
match = re.match(r"http://([^:]+):([^@]+)@([^/]+)", remote_url)
if not match:
return None
user, token, host = match.groups()
return GiteaAuth(base=f"http://{host}", user=user, token=token)
def build_opener(auth: GiteaAuth) -> urllib.request.OpenerDirector:
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, auth.base, auth.user, auth.token)
return urllib.request.build_opener(urllib.request.HTTPBasicAuthHandler(password_mgr))
def get_json(opener: urllib.request.OpenerDirector, auth: GiteaAuth, path: str) -> object:
with opener.open(auth.base + path, timeout=10) as response:
return json.load(response)
def parse_runs_on(repo: str, source: str, branch: str, file_path: str, content: str) -> list[WorkflowLabel]:
labels: list[WorkflowLabel] = []
for line_number, line in enumerate(content.splitlines(), start=1):
match = RUNS_ON_RE.match(line)
if not match:
continue
label = match.group("label").strip().strip("'\"")
labels.append(
WorkflowLabel(
repo=repo,
source=source,
branch=branch,
file_path=file_path,
line_number=line_number,
label=label,
)
)
return labels
def fetch_gitea_labels(repo: str, branch: str, auth: GiteaAuth) -> tuple[list[WorkflowLabel], str | None]:
opener = build_opener(auth)
labels: list[WorkflowLabel] = []
owner, name = repo.split("/", 1)
for workflow_dir in WORKFLOW_DIRS:
api_dir = f"/api/v1/repos/{owner}/{name}/contents/{workflow_dir}?ref={branch}"
try:
entries = get_json(opener, auth, api_dir)
except urllib.error.HTTPError as exc:
return labels, f"gitea_http_{exc.code}:{workflow_dir}"
except Exception as exc: # noqa: BLE001 - inventory should report and continue.
return labels, f"gitea_error:{type(exc).__name__}:{workflow_dir}"
if not isinstance(entries, list):
continue
for entry in entries:
if not isinstance(entry, dict) or entry.get("type") != "file":
continue
name = str(entry.get("name", ""))
if not re.search(r"\.ya?ml$", name):
continue
file_path = f"{workflow_dir}/{name}"
api_file = f"/api/v1/repos/{owner}/{repo.split('/', 1)[1]}/contents/{file_path}?ref={branch}"
try:
item = get_json(opener, auth, api_file)
if not isinstance(item, dict):
continue
content = base64.b64decode(str(item.get("content", ""))).decode("utf-8", "replace")
except Exception as exc: # noqa: BLE001
return labels, f"gitea_file_error:{type(exc).__name__}:{file_path}"
labels.extend(parse_runs_on(repo, "gitea", branch, file_path, content))
return labels, None
def parse_local_repo_args(values: Iterable[str]) -> dict[str, Path]:
paths: dict[str, Path] = {}
for value in values:
if "=" not in value:
raise SystemExit(f"invalid --local-repo value: {value}")
repo, path = value.split("=", 1)
paths[repo] = Path(path).expanduser().resolve()
return paths
def fetch_local_labels(repo: str, branch: str, repo_path: Path) -> tuple[list[WorkflowLabel], str | None]:
labels: list[WorkflowLabel] = []
if not repo_path.exists():
return labels, f"local_missing:{repo_path}"
for workflow_dir in WORKFLOW_DIRS:
directory = repo_path / workflow_dir
if not directory.exists():
continue
for path in sorted(directory.glob("*.y*ml")):
content = path.read_text(encoding="utf-8", errors="replace")
labels.extend(parse_runs_on(repo, "local", branch, str(path.relative_to(repo_path)), content))
return labels, None
def label_owner(label: str) -> str:
value = label.strip().strip("'\"")
if value == "awoooi-host":
return "awoooi_dedicated"
if value == "ewoooc-host":
return "foreign_dedicated"
if value == "ubuntu-latest" or "ubuntu-latest" in value:
return "shared_queue"
if value.startswith("ubuntu-") or value.startswith("["):
return "shared_queue"
return "unknown_or_custom"
def print_labels(labels: list[WorkflowLabel], errors: list[str]) -> None:
print("== workflow label inventory ==")
if labels:
print("repo\tsource\tbranch\tfile\tline\truns_on\towner")
for item in labels:
print(
f"{item.repo}\t{item.source}\t{item.branch}\t{item.file_path}\t"
f"{item.line_number}\t{item.label}\t{label_owner(item.label)}"
)
else:
print("labels_found=0")
print("\n== label summary ==")
summary: dict[str, set[str]] = {}
for item in labels:
summary.setdefault(item.label, set()).add(item.repo)
if summary:
for label, repos in sorted(summary.items(), key=lambda pair: (label_owner(pair[0]), pair[0])):
print(f"label={label} owner={label_owner(label)} repo_count={len(repos)} repos={','.join(sorted(repos))}")
else:
print("summary=none")
print("\n== inventory warnings ==")
if errors:
for error in errors:
print(error)
else:
print("warnings=none")
def main() -> int:
args = parse_args()
repos = args.repos or list(DEFAULT_REPOS)
local_paths = parse_local_repo_args(args.local_repo)
auth = derive_gitea_auth()
labels: list[WorkflowLabel] = []
errors: list[str] = []
for repo in repos:
repo_labels: list[WorkflowLabel] = []
error: str | None = None
if auth is not None:
repo_labels, error = fetch_gitea_labels(repo, args.branch, auth)
elif repo not in local_paths:
error = "gitea_auth_unavailable"
if error and repo in local_paths:
local_labels, local_error = fetch_local_labels(repo, args.branch, local_paths[repo])
if local_labels:
repo_labels = local_labels
errors.append(f"{repo}: {error}; local_fallback=used")
elif local_error:
errors.append(f"{repo}: {error}; {local_error}")
else:
errors.append(f"{repo}: {error}; local_fallback=no_workflows")
elif error:
errors.append(f"{repo}: {error}")
labels.extend(repo_labels)
print_labels(labels, errors)
return 0
if __name__ == "__main__":
sys.exit(main())