#!/usr/bin/env python3 """Read-only Gitea repo 全量盤點工具。 此工具可查詢 Gitea org/user API endpoint,或吃管理介面匯出的 JSON。 執行期間不寫入 Gitea,也不會把 API token 寫入輸出檔。 """ from __future__ import annotations import argparse import json import os import sys import urllib.error import urllib.request from pathlib import Path from urllib.parse import quote, urlencode, urlsplit, urlunsplit def redact_url(value: str) -> str: if "://" not in value: if "@" in value and ":" in value.split("@", 1)[1]: return value.split("@", 1)[1] return value parts = urlsplit(value) netloc = parts.netloc.split("@", 1)[-1] return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment)) def api_get_json(url: str, token: str | None, timeout: int) -> tuple[int, object]: headers = { "Accept": "application/json", "User-Agent": "awoooi-security-inventory/1.0", } if token: headers["Authorization"] = f"token {token}" request = urllib.request.Request(url, headers=headers, method="GET") try: with urllib.request.urlopen(request, timeout=timeout) as response: body = response.read().decode("utf-8") return response.status, json.loads(body) except urllib.error.HTTPError as exc: body = exc.read().decode("utf-8", errors="replace") try: payload: object = json.loads(body) except json.JSONDecodeError: payload = {"message": body.strip()} return exc.code, payload def repo_summary(repo: dict[str, object], github_owner: str | None) -> dict[str, object]: owner = repo.get("owner") if isinstance(repo.get("owner"), dict) else {} owner_name = owner.get("login") if isinstance(owner, dict) else None raw_full_name = str(repo.get("full_name") or "") name = str(repo.get("name") or raw_full_name.rsplit("/", 1)[-1] or "") full_name = raw_full_name or (f"{owner_name}/{name}" if owner_name else name) clone_url = str(repo.get("clone_url") or repo.get("html_url") or "") ssh_url = str(repo.get("ssh_url") or "") github_repo = f"{github_owner}/{name}" if github_owner and name else "" return { "gitea_repo": full_name, "name": name, "owner": owner_name or "", "private": bool(repo.get("private", False)), "empty": bool(repo.get("empty", False)), "archived": bool(repo.get("archived", False)), "default_branch": str(repo.get("default_branch") or ""), "clone_url_redacted": redact_url(clone_url), "ssh_url_redacted": redact_url(ssh_url), "github_repo_candidate": github_repo, } def load_export(path: Path) -> object: return json.loads(path.read_text(encoding="utf-8")) def extract_repos(payload: object) -> list[dict[str, object]]: if isinstance(payload, list): return [item for item in payload if isinstance(item, dict)] if isinstance(payload, dict): for key in ("data", "repos", "repositories"): value = payload.get(key) if isinstance(value, list): return [item for item in value if isinstance(item, dict)] return [] def build_inventory( *, base_url: str, org: str, github_owner: str | None, token_present: bool, http_status: int | None, payload: object, query_mode: str, query: str, ) -> dict[str, object]: repos = [repo_summary(repo, github_owner) for repo in extract_repos(payload)] if query_mode == "export": visibility_scope = "admin_export" elif token_present: visibility_scope = "authenticated" else: visibility_scope = "public_only" if repos and visibility_scope in ("authenticated", "admin_export"): status = "ok" elif repos: status = "partial" else: status = "blocked" blocking_reason = "" if repos and status == "partial": blocking_reason = "未提供 token,結果只代表公開可見 repo;private/internal repos 仍需只讀 token 或管理匯出" elif not repos: if http_status in (401, 403): blocking_reason = "Gitea API 需要只讀 token 或權限不足" elif query_mode == "search" and http_status == 200: blocking_reason = "Gitea public repo search 未回傳 repo,可能沒有公開 repo 或需要只讀 token" elif http_status == 404: blocking_reason = "Gitea API 查無 org/user repos,需確認 org 名稱或使用管理匯出" elif http_status is None: blocking_reason = "匯入檔案沒有可解析的 repo list" else: blocking_reason = f"Gitea API 回應無 repo list,HTTP {http_status}" return { "schema_version": "gitea_repo_inventory_v1", "base_url": redact_url(base_url.rstrip("/")), "org": org, "github_owner": github_owner or "", "query_mode": query_mode, "query": query, "visibility_scope": visibility_scope, "token_present": token_present, "http_status": http_status, "status": status, "blocking_reason": blocking_reason, "repo_count": len(repos), "repos": repos, } def write_markdown(inventory: dict[str, object], path: Path) -> None: lines = [ "# Gitea Repo 全量盤點快照", "", "| 項目 | 值 |", "|------|----|", f"| 狀態 | `{inventory['status']}` |", f"| Gitea base URL | `{inventory['base_url']}` |", f"| Org/User | `{inventory['org']}` |", f"| GitHub owner 候選 | `{inventory['github_owner']}` |", f"| 查詢模式 | `{inventory.get('query_mode', '')}` |", f"| 查詢字串 | `{inventory.get('query', '')}` |", f"| 可見性範圍 | `{inventory.get('visibility_scope', '')}` |", f"| 是否提供 token | `{inventory['token_present']}` |", f"| HTTP status | `{inventory['http_status']}` |", f"| Repo 數量 | `{inventory['repo_count']}` |", f"| 阻塞原因 | {inventory['blocking_reason'] or '無'} |", "", "## Repo 清單", "", "| Gitea repo | GitHub 候選 | default branch | private | archived |", "|------------|------------------|----------------|---------|----------|", ] repos = inventory.get("repos") if isinstance(repos, list): for repo in repos: if not isinstance(repo, dict): continue lines.append( "| " + " | ".join( [ f"`{repo.get('gitea_repo', '')}`", f"`{repo.get('github_repo_candidate', '')}`", f"`{repo.get('default_branch', '')}`", f"`{repo.get('private', '')}`", f"`{repo.get('archived', '')}`", ] ) + " |" ) lines.extend( [ "", "> 注意:本檔由 read-only Gitea inventory 工具產生,不包含 API token 或 remote URL 帳密。", "", ] ) path.write_text("\n".join(lines), encoding="utf-8") def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--base-url", default="http://192.168.0.110:3001") parser.add_argument("--org", default="wooo") parser.add_argument("--scope", choices=["org", "user", "search"], default="org") parser.add_argument("--query", default="") parser.add_argument("--limit", type=int, default=50) parser.add_argument("--github-owner", default="") parser.add_argument("--token-env", default="GITEA_READONLY_TOKEN") parser.add_argument("--input-json") parser.add_argument("--output-json", required=True) parser.add_argument("--output-md", required=True) parser.add_argument("--timeout", type=int, default=5) args = parser.parse_args() token = os.environ.get(args.token_env) http_status: int | None = None if args.input_json: payload = load_export(Path(args.input_json)) query_mode = "export" query = "input-json" else: if args.scope == "search": params = {"limit": str(args.limit)} if args.query: params["q"] = args.query url = f"{args.base_url.rstrip('/')}/api/v1/repos/search?{urlencode(params)}" else: quoted = quote(args.org, safe="") prefix = "orgs" if args.scope == "org" else "users" url = f"{args.base_url.rstrip('/')}/api/v1/{prefix}/{quoted}/repos" http_status, payload = api_get_json(url, token, args.timeout) query_mode = args.scope query = args.query inventory = build_inventory( base_url=args.base_url, org=args.org, github_owner=args.github_owner or None, token_present=bool(token), http_status=http_status, payload=payload, query_mode=query_mode, query=query, ) Path(args.output_json).write_text( json.dumps(inventory, ensure_ascii=False, indent=2) + "\n", encoding="utf-8", ) write_markdown(inventory, Path(args.output_md)) if inventory["status"] == "blocked": print(inventory["blocking_reason"], file=sys.stderr) return 2 return 0 if __name__ == "__main__": raise SystemExit(main())