Files
awoooi/scripts/security/source-control-ref-truth-classification.py

372 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""把 branch/tag read-only diff 轉成真相來源分類草案。
此工具只讀取已脫敏的 ref detail diff snapshot不呼叫遠端 Git、不 fetch、
不 push、不刪 branch/tag。輸出用途是人工 review 與 AwoooP mirror。
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Any
DEFAULT_STILL_FORBIDDEN = [
"fetch",
"push refs",
"force push",
"delete refs",
"create GitHub repo",
"change repo visibility",
"switch GitHub primary",
"disable Gitea",
"move secret values",
]
DEFAULT_ALLOWED_NOW = [
"mirror_classification",
"display_review_lane",
"request_single_ref_owner_decision",
"update_read_only_evidence",
]
def load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def risk_for_repo(gitea_repo: str) -> str:
if gitea_repo == "wooo/awoooi":
return "HIGH"
return "MEDIUM"
def short_sha(value: str) -> str:
return value[:8] if value else ""
def classification_item(
*,
ref_type: str,
ref_name: str,
lane: str,
risk: str,
proposed_truth_source: str,
classification: str,
reason: str,
next_review: str,
gitea_sha: str = "",
github_sha: str = "",
) -> dict[str, Any]:
return {
"ref_type": ref_type,
"ref_name": ref_name,
"lane": lane,
"risk": risk,
"proposed_truth_source": proposed_truth_source,
"classification": classification,
"reason": reason,
"next_review": next_review,
"gitea_sha": gitea_sha,
"github_sha": github_sha,
"allowed_now": DEFAULT_ALLOWED_NOW,
"forbidden_actions": DEFAULT_STILL_FORBIDDEN,
}
def classify_branch_only_gitea(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
name = item["name"]
sha = item["sha"]
if name.startswith("drift/adopt-"):
return classification_item(
ref_type="branch",
ref_name=name,
lane="archive_or_deprecate_candidate",
risk="LOW",
proposed_truth_source="deprecated_candidate",
classification="manual_review_deprecated_candidate",
reason="drift/adopt 類分支疑似為漂移承接或暫存分支,先標為可能封存/降級候選,但不得自動刪除。",
next_review="由 repo owner 確認是否仍有部署、PR、回滾或稽核用途確認前保留。",
gitea_sha=sha,
)
if name == "dev":
return classification_item(
ref_type="branch",
ref_name=name,
lane="active_branch_truth_required",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_truth_required",
reason="`dev` 可能是仍在使用的開發分支GitHub 缺少此 ref 前不得判定 GitHub ready。",
next_review="確認 dev 是否仍為有效工作流;若有效,再決定單 branch 同步策略。",
gitea_sha=sha,
)
return classification_item(
ref_type="branch",
ref_name=name,
lane="manual_review",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review",
reason="此 Gitea-only branch 不符合已知低風險規則,需要人工判定用途。",
next_review="確認 ref owner、是否 active、是否需保留到 GitHub 或改列封存。",
gitea_sha=sha,
)
def classify_branch_only_github(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
name = item["name"]
return classification_item(
ref_type="branch",
ref_name=name,
lane="github_only_manual_review",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review_github_only",
reason="GitHub-only branch 可能代表 GitHub 端曾有獨立工作,不能用 Gitea 覆蓋或刪除。",
next_review="確認該 branch 是否為有效 refactor/feature 線,並判定是否回補到 Gitea 或保留 GitHub-only。",
github_sha=item["sha"],
)
def classify_tag_only_gitea(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
return classification_item(
ref_type="tag",
ref_name=item["name"],
lane="release_tag_missing_on_github",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review_release_tag",
reason="Gitea-only release tag 可能是正式版本證據GitHub primary 前需確認是否補 tag。",
next_review="確認 tag 對應 release / artifact / deploy marker再以單 repo approval 決定是否同步。",
gitea_sha=item["sha"],
)
def classify_tag_only_github(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
name = item["name"]
if name.startswith("uat-"):
lane = "github_only_uat_tag"
reason = "GitHub-only UAT tag 可能是舊驗收或臨時發布標記,不得自動刪除或搬回 Gitea。"
next_review = "確認 UAT tag 是否仍需保留為稽核 evidence若已過期再列入封存決策。"
else:
lane = "github_only_manual_review"
reason = "GitHub-only tag 可能代表 GitHub 端 release evidence需要人工判定。"
next_review = "確認 tag owner、release 用途與是否需要與 Gitea 對齊。"
return classification_item(
ref_type="tag",
ref_name=name,
lane=lane,
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review_github_only",
reason=reason,
next_review=next_review,
github_sha=item["sha"],
)
def classify_main_mismatch(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
return classification_item(
ref_type="branch",
ref_name=item["name"],
lane="main_truth_required",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_truth_required",
reason="兩端 main SHA 不一致,這是 GitHub primary / deploy control plane 的硬阻塞。",
next_review="先確認目前 production deploy 真相來源、deploy marker、rollback 點,再決定單 repo reconcile。",
gitea_sha=item["gitea_sha"],
github_sha=item["github_sha"],
)
def classify_repo(repo: dict[str, Any]) -> dict[str, Any]:
repo_risk = risk_for_repo(str(repo["gitea_repo"]))
items: list[dict[str, Any]] = []
branch = repo["branch_diff"]
tag = repo["tag_diff"]
for mismatch in branch["sha_mismatch"]:
if mismatch["name"] == "main":
items.append(classify_main_mismatch(mismatch, repo_risk))
else:
items.append(
classification_item(
ref_type="branch",
ref_name=mismatch["name"],
lane="manual_review",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_truth_required",
reason="Branch 兩端皆存在但 SHA 不一致,需要人工判定哪一端為真相來源。",
next_review="確認是否 active、是否有 PR / deploy / rollback 依賴,再進單 branch reconcile。",
gitea_sha=mismatch["gitea_sha"],
github_sha=mismatch["github_sha"],
)
)
for item in branch["only_gitea"]:
items.append(classify_branch_only_gitea(item, repo_risk))
for item in branch["only_github"]:
items.append(classify_branch_only_github(item, repo_risk))
for item in tag["only_gitea"]:
items.append(classify_tag_only_gitea(item, repo_risk))
for item in tag["only_github"]:
items.append(classify_tag_only_github(item, repo_risk))
return {
"gitea_repo": repo["gitea_repo"],
"github_repo": repo["github_repo"],
"risk": repo_risk,
"awooop_consumption": "approval_candidate",
"item_count": len(items),
"items": items,
}
def build_snapshot(args: argparse.Namespace) -> dict[str, Any]:
source_path = Path(args.source_snapshot)
source = load_json(source_path)
repos = [classify_repo(repo) for repo in source["repos"]]
all_items = [item for repo in repos for item in repo["items"]]
return {
"schema_version": "source_control_ref_truth_classification_v1",
"status": "draft_blocked",
"date": args.date,
"default_mode": "classification_only",
"source_snapshot": str(source_path),
"summary": {
"repo_count": len(repos),
"total_items": len(all_items),
"manual_truth_required_count": sum(
1 for item in all_items if item["classification"] == "manual_truth_required"
),
"deprecated_candidate_count": sum(
1 for item in all_items if item["classification"] == "manual_review_deprecated_candidate"
),
"release_tag_review_count": sum(
1 for item in all_items if item["classification"] == "manual_review_release_tag"
),
"github_only_review_count": sum(
1 for item in all_items if item["classification"] == "manual_review_github_only"
),
},
"still_forbidden": DEFAULT_STILL_FORBIDDEN,
"repos": repos,
}
def write_markdown(snapshot: dict[str, Any], path: Path, *, list_limit: int) -> None:
summary = snapshot["summary"]
lines = [
"# Source Control Ref Truth Classification",
"",
"| 項目 | 內容 |",
"|------|------|",
f"| 日期 | {snapshot['date']} |",
f"| 狀態 | `{snapshot['status']}` |",
f"| 預設模式 | `{snapshot['default_mode']}` |",
f"| 來源 snapshot | `{snapshot['source_snapshot']}` |",
f"| repo count | `{summary['repo_count']}` |",
f"| total items | `{summary['total_items']}` |",
"",
"## 0. 核心結論",
"",
"本檔把 branch/tag diff 轉成「人工審核分類」:哪些 ref 需要真相來源判定、哪些可能是 deprecated 候選、哪些 release / UAT tags 需要保留判定。它不是同步計畫,也不授權 fetch、push、delete refs 或 GitHub primary 切換。",
"",
"## 1. 摘要",
"",
"| 指標 | 數量 |",
"|------|------|",
f"| 需要人工指定真相來源 | `{summary['manual_truth_required_count']}` |",
f"| 可能 deprecated / archive 候選 | `{summary['deprecated_candidate_count']}` |",
f"| release tag 待審核 | `{summary['release_tag_review_count']}` |",
f"| GitHub-only ref 待審核 | `{summary['github_only_review_count']}` |",
"",
"## 2. Repo 分類",
"",
]
for repo in snapshot["repos"]:
lines.extend(
[
f"### {repo['gitea_repo']} -> {repo['github_repo']}",
"",
f"- Risk`{repo['risk']}`",
f"- AwoooP consumption`{repo['awooop_consumption']}`",
f"- Item count`{repo['item_count']}`",
"",
"| Ref | Type | Lane | Classification | Gitea | GitHub | 下一步 |",
"|-----|------|------|----------------|-------|--------|--------|",
]
)
visible_items = repo["items"] if list_limit <= 0 else repo["items"][:list_limit]
for item in visible_items:
lines.append(
"| "
+ " | ".join(
[
f"`{item['ref_name']}`",
f"`{item['ref_type']}`",
f"`{item['lane']}`",
f"`{item['classification']}`",
f"`{short_sha(item['gitea_sha'])}`",
f"`{short_sha(item['github_sha'])}`",
item["next_review"],
]
)
+ " |"
)
if list_limit > 0 and repo["item_count"] > list_limit:
lines.append(
f"| 另有 `{repo['item_count'] - list_limit}` 筆 | | | 完整清單見 JSON snapshot | | | |"
)
lines.append("")
lines.extend(
[
"## 3. AwoooP 消費方式",
"",
"1. 只 mirror `source_control_ref_truth_classification_v1`。",
"2. 可顯示 review lane 與 owner decision queue。",
"3. 可產生單 repo / 單 ref approval candidate但不得自動批准。",
"4. 不得新增 refs sync、delete、force-push、primary switch action。",
"",
"## 4. 仍然禁止",
"",
]
)
for value in snapshot["still_forbidden"]:
lines.append(f"- {value}")
lines.append("")
path.write_text("\n".join(lines), encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--date", required=True)
parser.add_argument("--source-snapshot", required=True)
parser.add_argument("--output-json", required=True)
parser.add_argument("--output-md", required=True)
parser.add_argument("--md-list-limit", type=int, default=40)
args = parser.parse_args()
snapshot = build_snapshot(args)
Path(args.output_json).write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
write_markdown(snapshot, Path(args.output_md), list_limit=args.md_list_limit)
print(
"OK source-control ref truth classification "
f"repos={snapshot['summary']['repo_count']} items={snapshot['summary']['total_items']}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())