#!/usr/bin/env python3 """把 branch/tag read-only diff 轉成真相來源分類草案。 此工具只讀取已脫敏的 ref detail diff snapshot,不呼叫遠端 Git、不 fetch、 不 push、不刪 branch/tag。輸出用途是人工 review 與 AwoooP mirror。 """ from __future__ import annotations import argparse import json from pathlib import Path from typing import Any DEFAULT_STILL_FORBIDDEN = [ "fetch", "push refs", "force push", "delete refs", "create GitHub repo", "change repo visibility", "switch GitHub primary", "disable Gitea", "move secret values", ] DEFAULT_ALLOWED_NOW = [ "mirror_classification", "display_review_lane", "request_single_ref_owner_decision", "update_read_only_evidence", ] def load_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) def risk_for_repo(gitea_repo: str) -> str: if gitea_repo == "wooo/awoooi": return "HIGH" return "MEDIUM" def short_sha(value: str) -> str: return value[:8] if value else "無" def classification_item( *, ref_type: str, ref_name: str, lane: str, risk: str, proposed_truth_source: str, classification: str, reason: str, next_review: str, gitea_sha: str = "", github_sha: str = "", ) -> dict[str, Any]: return { "ref_type": ref_type, "ref_name": ref_name, "lane": lane, "risk": risk, "proposed_truth_source": proposed_truth_source, "classification": classification, "reason": reason, "next_review": next_review, "gitea_sha": gitea_sha, "github_sha": github_sha, "allowed_now": DEFAULT_ALLOWED_NOW, "forbidden_actions": DEFAULT_STILL_FORBIDDEN, } def classify_branch_only_gitea(item: dict[str, str], repo_risk: str) -> dict[str, Any]: name = item["name"] sha = item["sha"] if name.startswith("drift/adopt-"): return classification_item( ref_type="branch", ref_name=name, lane="archive_or_deprecate_candidate", risk="LOW", proposed_truth_source="deprecated_candidate", classification="manual_review_deprecated_candidate", reason="drift/adopt 類分支疑似為漂移承接或暫存分支,先標為可能封存/降級候選,但不得自動刪除。", next_review="由 repo owner 確認是否仍有部署、PR、回滾或稽核用途;確認前保留。", gitea_sha=sha, ) if name == "dev": return classification_item( ref_type="branch", ref_name=name, lane="active_branch_truth_required", risk=repo_risk, proposed_truth_source="manual_required", classification="manual_truth_required", reason="`dev` 可能是仍在使用的開發分支,GitHub 缺少此 ref 前不得判定 GitHub ready。", next_review="確認 dev 是否仍為有效工作流;若有效,再決定單 branch 同步策略。", gitea_sha=sha, ) return classification_item( ref_type="branch", ref_name=name, lane="manual_review", risk=repo_risk, proposed_truth_source="manual_required", classification="manual_review", reason="此 Gitea-only branch 不符合已知低風險規則,需要人工判定用途。", next_review="確認 ref owner、是否 active、是否需保留到 GitHub 或改列封存。", gitea_sha=sha, ) def classify_branch_only_github(item: dict[str, str], repo_risk: str) -> dict[str, Any]: name = item["name"] return classification_item( ref_type="branch", ref_name=name, lane="github_only_manual_review", risk=repo_risk, proposed_truth_source="manual_required", classification="manual_review_github_only", reason="GitHub-only branch 可能代表 GitHub 端曾有獨立工作,不能用 Gitea 覆蓋或刪除。", next_review="確認該 branch 是否為有效 refactor/feature 線,並判定是否回補到 Gitea 或保留 GitHub-only。", github_sha=item["sha"], ) def classify_tag_only_gitea(item: dict[str, str], repo_risk: str) -> dict[str, Any]: return classification_item( ref_type="tag", ref_name=item["name"], lane="release_tag_missing_on_github", risk=repo_risk, proposed_truth_source="manual_required", classification="manual_review_release_tag", reason="Gitea-only release tag 可能是正式版本證據;GitHub primary 前需確認是否補 tag。", next_review="確認 tag 對應 release / artifact / deploy marker,再以單 repo approval 決定是否同步。", gitea_sha=item["sha"], ) def classify_tag_only_github(item: dict[str, str], repo_risk: str) -> dict[str, Any]: name = item["name"] if name.startswith("uat-"): lane = "github_only_uat_tag" reason = "GitHub-only UAT tag 可能是舊驗收或臨時發布標記,不得自動刪除或搬回 Gitea。" next_review = "確認 UAT tag 是否仍需保留為稽核 evidence;若已過期,再列入封存決策。" else: lane = "github_only_manual_review" reason = "GitHub-only tag 可能代表 GitHub 端 release evidence,需要人工判定。" next_review = "確認 tag owner、release 用途與是否需要與 Gitea 對齊。" return classification_item( ref_type="tag", ref_name=name, lane=lane, risk=repo_risk, proposed_truth_source="manual_required", classification="manual_review_github_only", reason=reason, next_review=next_review, github_sha=item["sha"], ) def classify_main_mismatch(item: dict[str, str], repo_risk: str) -> dict[str, Any]: return classification_item( ref_type="branch", ref_name=item["name"], lane="main_truth_required", risk=repo_risk, proposed_truth_source="manual_required", classification="manual_truth_required", reason="兩端 main SHA 不一致,這是 GitHub primary / deploy control plane 的硬阻塞。", next_review="先確認目前 production deploy 真相來源、deploy marker、rollback 點,再決定單 repo reconcile。", gitea_sha=item["gitea_sha"], github_sha=item["github_sha"], ) def classify_repo(repo: dict[str, Any]) -> dict[str, Any]: repo_risk = risk_for_repo(str(repo["gitea_repo"])) items: list[dict[str, Any]] = [] branch = repo["branch_diff"] tag = repo["tag_diff"] for mismatch in branch["sha_mismatch"]: if mismatch["name"] == "main": items.append(classify_main_mismatch(mismatch, repo_risk)) else: items.append( classification_item( ref_type="branch", ref_name=mismatch["name"], lane="manual_review", risk=repo_risk, proposed_truth_source="manual_required", classification="manual_truth_required", reason="Branch 兩端皆存在但 SHA 不一致,需要人工判定哪一端為真相來源。", next_review="確認是否 active、是否有 PR / deploy / rollback 依賴,再進單 branch reconcile。", gitea_sha=mismatch["gitea_sha"], github_sha=mismatch["github_sha"], ) ) for item in branch["only_gitea"]: items.append(classify_branch_only_gitea(item, repo_risk)) for item in branch["only_github"]: items.append(classify_branch_only_github(item, repo_risk)) for item in tag["only_gitea"]: items.append(classify_tag_only_gitea(item, repo_risk)) for item in tag["only_github"]: items.append(classify_tag_only_github(item, repo_risk)) return { "gitea_repo": repo["gitea_repo"], "github_repo": repo["github_repo"], "risk": repo_risk, "awooop_consumption": "approval_candidate", "item_count": len(items), "items": items, } def build_snapshot(args: argparse.Namespace) -> dict[str, Any]: source_path = Path(args.source_snapshot) source = load_json(source_path) repos = [classify_repo(repo) for repo in source["repos"]] all_items = [item for repo in repos for item in repo["items"]] return { "schema_version": "source_control_ref_truth_classification_v1", "status": "draft_blocked", "date": args.date, "default_mode": "classification_only", "source_snapshot": str(source_path), "summary": { "repo_count": len(repos), "total_items": len(all_items), "manual_truth_required_count": sum( 1 for item in all_items if item["classification"] == "manual_truth_required" ), "deprecated_candidate_count": sum( 1 for item in all_items if item["classification"] == "manual_review_deprecated_candidate" ), "release_tag_review_count": sum( 1 for item in all_items if item["classification"] == "manual_review_release_tag" ), "github_only_review_count": sum( 1 for item in all_items if item["classification"] == "manual_review_github_only" ), }, "still_forbidden": DEFAULT_STILL_FORBIDDEN, "repos": repos, } def write_markdown(snapshot: dict[str, Any], path: Path, *, list_limit: int) -> None: summary = snapshot["summary"] lines = [ "# Source Control Ref Truth Classification", "", "| 項目 | 內容 |", "|------|------|", f"| 日期 | {snapshot['date']} |", f"| 狀態 | `{snapshot['status']}` |", f"| 預設模式 | `{snapshot['default_mode']}` |", f"| 來源 snapshot | `{snapshot['source_snapshot']}` |", f"| repo count | `{summary['repo_count']}` |", f"| total items | `{summary['total_items']}` |", "", "## 0. 核心結論", "", "本檔把 branch/tag diff 轉成「人工審核分類」:哪些 ref 需要真相來源判定、哪些可能是 deprecated 候選、哪些 release / UAT tags 需要保留判定。它不是同步計畫,也不授權 fetch、push、delete refs 或 GitHub primary 切換。", "", "## 1. 摘要", "", "| 指標 | 數量 |", "|------|------|", f"| 需要人工指定真相來源 | `{summary['manual_truth_required_count']}` |", f"| 可能 deprecated / archive 候選 | `{summary['deprecated_candidate_count']}` |", f"| release tag 待審核 | `{summary['release_tag_review_count']}` |", f"| GitHub-only ref 待審核 | `{summary['github_only_review_count']}` |", "", "## 2. Repo 分類", "", ] for repo in snapshot["repos"]: lines.extend( [ f"### {repo['gitea_repo']} -> {repo['github_repo']}", "", f"- Risk:`{repo['risk']}`", f"- AwoooP consumption:`{repo['awooop_consumption']}`", f"- Item count:`{repo['item_count']}`", "", "| Ref | Type | Lane | Classification | Gitea | GitHub | 下一步 |", "|-----|------|------|----------------|-------|--------|--------|", ] ) visible_items = repo["items"] if list_limit <= 0 else repo["items"][:list_limit] for item in visible_items: lines.append( "| " + " | ".join( [ f"`{item['ref_name']}`", f"`{item['ref_type']}`", f"`{item['lane']}`", f"`{item['classification']}`", f"`{short_sha(item['gitea_sha'])}`", f"`{short_sha(item['github_sha'])}`", item["next_review"], ] ) + " |" ) if list_limit > 0 and repo["item_count"] > list_limit: lines.append( f"| 另有 `{repo['item_count'] - list_limit}` 筆 | | | 完整清單見 JSON snapshot | | | |" ) lines.append("") lines.extend( [ "## 3. AwoooP 消費方式", "", "1. 只 mirror `source_control_ref_truth_classification_v1`。", "2. 可顯示 review lane 與 owner decision queue。", "3. 可產生單 repo / 單 ref approval candidate,但不得自動批准。", "4. 不得新增 refs sync、delete、force-push、primary switch action。", "", "## 4. 仍然禁止", "", ] ) for value in snapshot["still_forbidden"]: lines.append(f"- {value}") lines.append("") path.write_text("\n".join(lines), encoding="utf-8") def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("--date", required=True) parser.add_argument("--source-snapshot", required=True) parser.add_argument("--output-json", required=True) parser.add_argument("--output-md", required=True) parser.add_argument("--md-list-limit", type=int, default=40) args = parser.parse_args() snapshot = build_snapshot(args) Path(args.output_json).write_text( json.dumps(snapshot, ensure_ascii=False, indent=2) + "\n", encoding="utf-8", ) write_markdown(snapshot, Path(args.output_md), list_limit=args.md_list_limit) print( "OK source-control ref truth classification " f"repos={snapshot['summary']['repo_count']} items={snapshot['summary']['total_items']}" ) return 0 if __name__ == "__main__": raise SystemExit(main())