docs(security): add ref truth classification [skip ci]

This commit is contained in:
Your Name
2026-05-13 09:30:50 +08:00
parent 3139d3ce48
commit 147e86d1de
15 changed files with 4904 additions and 17 deletions

View File

@@ -0,0 +1,371 @@
#!/usr/bin/env python3
"""把 branch/tag read-only diff 轉成真相來源分類草案。
此工具只讀取已脫敏的 ref detail diff snapshot不呼叫遠端 Git、不 fetch、
不 push、不刪 branch/tag。輸出用途是人工 review 與 AwoooP mirror。
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Any
DEFAULT_STILL_FORBIDDEN = [
"fetch",
"push refs",
"force push",
"delete refs",
"create GitHub repo",
"change repo visibility",
"switch GitHub primary",
"disable Gitea",
"move secret values",
]
DEFAULT_ALLOWED_NOW = [
"mirror_classification",
"display_review_lane",
"request_single_ref_owner_decision",
"update_read_only_evidence",
]
def load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def risk_for_repo(gitea_repo: str) -> str:
if gitea_repo == "wooo/awoooi":
return "HIGH"
return "MEDIUM"
def short_sha(value: str) -> str:
return value[:8] if value else ""
def classification_item(
*,
ref_type: str,
ref_name: str,
lane: str,
risk: str,
proposed_truth_source: str,
classification: str,
reason: str,
next_review: str,
gitea_sha: str = "",
github_sha: str = "",
) -> dict[str, Any]:
return {
"ref_type": ref_type,
"ref_name": ref_name,
"lane": lane,
"risk": risk,
"proposed_truth_source": proposed_truth_source,
"classification": classification,
"reason": reason,
"next_review": next_review,
"gitea_sha": gitea_sha,
"github_sha": github_sha,
"allowed_now": DEFAULT_ALLOWED_NOW,
"forbidden_actions": DEFAULT_STILL_FORBIDDEN,
}
def classify_branch_only_gitea(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
name = item["name"]
sha = item["sha"]
if name.startswith("drift/adopt-"):
return classification_item(
ref_type="branch",
ref_name=name,
lane="archive_or_deprecate_candidate",
risk="LOW",
proposed_truth_source="deprecated_candidate",
classification="manual_review_deprecated_candidate",
reason="drift/adopt 類分支疑似為漂移承接或暫存分支,先標為可能封存/降級候選,但不得自動刪除。",
next_review="由 repo owner 確認是否仍有部署、PR、回滾或稽核用途確認前保留。",
gitea_sha=sha,
)
if name == "dev":
return classification_item(
ref_type="branch",
ref_name=name,
lane="active_branch_truth_required",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_truth_required",
reason="`dev` 可能是仍在使用的開發分支GitHub 缺少此 ref 前不得判定 GitHub ready。",
next_review="確認 dev 是否仍為有效工作流;若有效,再決定單 branch 同步策略。",
gitea_sha=sha,
)
return classification_item(
ref_type="branch",
ref_name=name,
lane="manual_review",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review",
reason="此 Gitea-only branch 不符合已知低風險規則,需要人工判定用途。",
next_review="確認 ref owner、是否 active、是否需保留到 GitHub 或改列封存。",
gitea_sha=sha,
)
def classify_branch_only_github(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
name = item["name"]
return classification_item(
ref_type="branch",
ref_name=name,
lane="github_only_manual_review",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review_github_only",
reason="GitHub-only branch 可能代表 GitHub 端曾有獨立工作,不能用 Gitea 覆蓋或刪除。",
next_review="確認該 branch 是否為有效 refactor/feature 線,並判定是否回補到 Gitea 或保留 GitHub-only。",
github_sha=item["sha"],
)
def classify_tag_only_gitea(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
return classification_item(
ref_type="tag",
ref_name=item["name"],
lane="release_tag_missing_on_github",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review_release_tag",
reason="Gitea-only release tag 可能是正式版本證據GitHub primary 前需確認是否補 tag。",
next_review="確認 tag 對應 release / artifact / deploy marker再以單 repo approval 決定是否同步。",
gitea_sha=item["sha"],
)
def classify_tag_only_github(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
name = item["name"]
if name.startswith("uat-"):
lane = "github_only_uat_tag"
reason = "GitHub-only UAT tag 可能是舊驗收或臨時發布標記,不得自動刪除或搬回 Gitea。"
next_review = "確認 UAT tag 是否仍需保留為稽核 evidence若已過期再列入封存決策。"
else:
lane = "github_only_manual_review"
reason = "GitHub-only tag 可能代表 GitHub 端 release evidence需要人工判定。"
next_review = "確認 tag owner、release 用途與是否需要與 Gitea 對齊。"
return classification_item(
ref_type="tag",
ref_name=name,
lane=lane,
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_review_github_only",
reason=reason,
next_review=next_review,
github_sha=item["sha"],
)
def classify_main_mismatch(item: dict[str, str], repo_risk: str) -> dict[str, Any]:
return classification_item(
ref_type="branch",
ref_name=item["name"],
lane="main_truth_required",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_truth_required",
reason="兩端 main SHA 不一致,這是 GitHub primary / deploy control plane 的硬阻塞。",
next_review="先確認目前 production deploy 真相來源、deploy marker、rollback 點,再決定單 repo reconcile。",
gitea_sha=item["gitea_sha"],
github_sha=item["github_sha"],
)
def classify_repo(repo: dict[str, Any]) -> dict[str, Any]:
repo_risk = risk_for_repo(str(repo["gitea_repo"]))
items: list[dict[str, Any]] = []
branch = repo["branch_diff"]
tag = repo["tag_diff"]
for mismatch in branch["sha_mismatch"]:
if mismatch["name"] == "main":
items.append(classify_main_mismatch(mismatch, repo_risk))
else:
items.append(
classification_item(
ref_type="branch",
ref_name=mismatch["name"],
lane="manual_review",
risk=repo_risk,
proposed_truth_source="manual_required",
classification="manual_truth_required",
reason="Branch 兩端皆存在但 SHA 不一致,需要人工判定哪一端為真相來源。",
next_review="確認是否 active、是否有 PR / deploy / rollback 依賴,再進單 branch reconcile。",
gitea_sha=mismatch["gitea_sha"],
github_sha=mismatch["github_sha"],
)
)
for item in branch["only_gitea"]:
items.append(classify_branch_only_gitea(item, repo_risk))
for item in branch["only_github"]:
items.append(classify_branch_only_github(item, repo_risk))
for item in tag["only_gitea"]:
items.append(classify_tag_only_gitea(item, repo_risk))
for item in tag["only_github"]:
items.append(classify_tag_only_github(item, repo_risk))
return {
"gitea_repo": repo["gitea_repo"],
"github_repo": repo["github_repo"],
"risk": repo_risk,
"awooop_consumption": "approval_candidate",
"item_count": len(items),
"items": items,
}
def build_snapshot(args: argparse.Namespace) -> dict[str, Any]:
source_path = Path(args.source_snapshot)
source = load_json(source_path)
repos = [classify_repo(repo) for repo in source["repos"]]
all_items = [item for repo in repos for item in repo["items"]]
return {
"schema_version": "source_control_ref_truth_classification_v1",
"status": "draft_blocked",
"date": args.date,
"default_mode": "classification_only",
"source_snapshot": str(source_path),
"summary": {
"repo_count": len(repos),
"total_items": len(all_items),
"manual_truth_required_count": sum(
1 for item in all_items if item["classification"] == "manual_truth_required"
),
"deprecated_candidate_count": sum(
1 for item in all_items if item["classification"] == "manual_review_deprecated_candidate"
),
"release_tag_review_count": sum(
1 for item in all_items if item["classification"] == "manual_review_release_tag"
),
"github_only_review_count": sum(
1 for item in all_items if item["classification"] == "manual_review_github_only"
),
},
"still_forbidden": DEFAULT_STILL_FORBIDDEN,
"repos": repos,
}
def write_markdown(snapshot: dict[str, Any], path: Path, *, list_limit: int) -> None:
summary = snapshot["summary"]
lines = [
"# Source Control Ref Truth Classification",
"",
"| 項目 | 內容 |",
"|------|------|",
f"| 日期 | {snapshot['date']} |",
f"| 狀態 | `{snapshot['status']}` |",
f"| 預設模式 | `{snapshot['default_mode']}` |",
f"| 來源 snapshot | `{snapshot['source_snapshot']}` |",
f"| repo count | `{summary['repo_count']}` |",
f"| total items | `{summary['total_items']}` |",
"",
"## 0. 核心結論",
"",
"本檔把 branch/tag diff 轉成「人工審核分類」:哪些 ref 需要真相來源判定、哪些可能是 deprecated 候選、哪些 release / UAT tags 需要保留判定。它不是同步計畫,也不授權 fetch、push、delete refs 或 GitHub primary 切換。",
"",
"## 1. 摘要",
"",
"| 指標 | 數量 |",
"|------|------|",
f"| 需要人工指定真相來源 | `{summary['manual_truth_required_count']}` |",
f"| 可能 deprecated / archive 候選 | `{summary['deprecated_candidate_count']}` |",
f"| release tag 待審核 | `{summary['release_tag_review_count']}` |",
f"| GitHub-only ref 待審核 | `{summary['github_only_review_count']}` |",
"",
"## 2. Repo 分類",
"",
]
for repo in snapshot["repos"]:
lines.extend(
[
f"### {repo['gitea_repo']} -> {repo['github_repo']}",
"",
f"- Risk`{repo['risk']}`",
f"- AwoooP consumption`{repo['awooop_consumption']}`",
f"- Item count`{repo['item_count']}`",
"",
"| Ref | Type | Lane | Classification | Gitea | GitHub | 下一步 |",
"|-----|------|------|----------------|-------|--------|--------|",
]
)
visible_items = repo["items"] if list_limit <= 0 else repo["items"][:list_limit]
for item in visible_items:
lines.append(
"| "
+ " | ".join(
[
f"`{item['ref_name']}`",
f"`{item['ref_type']}`",
f"`{item['lane']}`",
f"`{item['classification']}`",
f"`{short_sha(item['gitea_sha'])}`",
f"`{short_sha(item['github_sha'])}`",
item["next_review"],
]
)
+ " |"
)
if list_limit > 0 and repo["item_count"] > list_limit:
lines.append(
f"| 另有 `{repo['item_count'] - list_limit}` 筆 | | | 完整清單見 JSON snapshot | | | |"
)
lines.append("")
lines.extend(
[
"## 3. AwoooP 消費方式",
"",
"1. 只 mirror `source_control_ref_truth_classification_v1`。",
"2. 可顯示 review lane 與 owner decision queue。",
"3. 可產生單 repo / 單 ref approval candidate但不得自動批准。",
"4. 不得新增 refs sync、delete、force-push、primary switch action。",
"",
"## 4. 仍然禁止",
"",
]
)
for value in snapshot["still_forbidden"]:
lines.append(f"- {value}")
lines.append("")
path.write_text("\n".join(lines), encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--date", required=True)
parser.add_argument("--source-snapshot", required=True)
parser.add_argument("--output-json", required=True)
parser.add_argument("--output-md", required=True)
parser.add_argument("--md-list-limit", type=int, default=40)
args = parser.parse_args()
snapshot = build_snapshot(args)
Path(args.output_json).write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
write_markdown(snapshot, Path(args.output_md), list_limit=args.md_list_limit)
print(
"OK source-control ref truth classification "
f"repos={snapshot['summary']['repo_count']} items={snapshot['summary']['total_items']}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())