Files
awoooi/scripts/security/host-service-change-evidence-acceptance.py
Your Name 8294a05456
All checks were successful
Code Review / ai-code-review (push) Successful in 22s
CD Pipeline / tests (push) Successful in 1m35s
CD Pipeline / build-and-deploy (push) Successful in 3m53s
CD Pipeline / post-deploy-checks (push) Successful in 1m26s
feat(iwooos): 新增主機服務變更證據驗收 gate
2026-06-15 18:16:34 +08:00

450 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
IwoooS Docker / systemd / host service 變更證據驗收只讀帳本產生器。
本工具讀取 host service owner response acceptance snapshot建立未來
主機重啟、Docker daemon 競爭、compose stack 變動、systemd failed unit、
repair-bot / runner 互斥、cold-start recovery 與服務恢復證據如何收件、
補件、拒收或進 reviewer review 的 metadata-only ledger。它不 SSH、
不讀 live host、不執行 docker / systemctl、不呼叫 repair-bot、不跑
Ansible、不做 route smoke、不保存 raw log / raw config。
"""
from __future__ import annotations
import argparse
import json
import subprocess
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any
TAIPEI = timezone(timedelta(hours=8))
CHANGE_EVIDENCE_FIELDS = [
"change_evidence_candidate_id",
"source_acceptance_candidate_id",
"surface_id",
"label",
"expected_host_scope",
"config_kind",
"service_scope",
"control_tier",
"write_capable_surface",
"requires_live_evidence",
"change_or_incident_ref",
"actor_role_or_team",
"decision",
"decision_reason",
"affected_scope",
"boot_or_restart_window_ref",
"before_service_state_ref",
"after_service_state_ref",
"docker_daemon_state_ref",
"compose_stack_state_ref",
"systemd_unit_state_ref",
"failed_unit_review_ref",
"port_binding_state_ref",
"network_gateway_impact_ref",
"dependency_impact_ref",
"cold_start_sequence_ref",
"runner_or_repair_bot_contention_ref",
"backup_job_overlap_ref",
"public_route_recovery_refs",
"admin_route_recovery_refs",
"agent_provider_health_refs",
"monitoring_alert_refs",
"operator_notification_refs",
"cross_project_sync_ref",
"customer_impact_ref",
"incident_severity",
"restoration_time_ref",
"maintenance_window",
"rollback_owner",
"rollback_plan_ref",
"postcheck_evidence_refs",
"change_freeze_rule",
"reviewer_outcome",
"followup_owner",
"not_approval",
]
REQUIRED_EVIDENCE_FIELDS = [
"change_or_incident_ref",
"actor_role_or_team",
"decision",
"decision_reason",
"affected_scope",
"boot_or_restart_window_ref",
"before_service_state_ref",
"after_service_state_ref",
"docker_daemon_state_ref",
"compose_stack_state_ref",
"systemd_unit_state_ref",
"failed_unit_review_ref",
"port_binding_state_ref",
"dependency_impact_ref",
"cold_start_sequence_ref",
"runner_or_repair_bot_contention_ref",
"public_route_recovery_refs",
"admin_route_recovery_refs",
"agent_provider_health_refs",
"monitoring_alert_refs",
"operator_notification_refs",
"cross_project_sync_ref",
"restoration_time_ref",
"rollback_owner",
"postcheck_evidence_refs",
]
REVIEWER_CHECKS = [
{"check_id": "change_ref_present", "instruction": "必須有可追溯的 change / incident ref。"},
{"check_id": "actor_role_traceable", "instruction": "必須標示 actor role / team不接受匿名 restart、kill、compose action 或 daemon 操作。"},
{"check_id": "decision_reason_present", "instruction": "decision 與 decision reason 必須同時存在。"},
{"check_id": "affected_scope_matches_surface", "instruction": "affected scope 必須能對回既有 host service surface。"},
{"check_id": "boot_or_restart_window_present", "instruction": "涉及重啟、cold-start 或服務恢復時,必須有 boot / restart / recovery 時間 ref。"},
{"check_id": "before_after_service_state_present", "instruction": "需有 before / after service state ref不能只寫服務已恢復。"},
{"check_id": "docker_daemon_state_present", "instruction": "Docker daemon active、starting、failed、contention 或 socket state 必須有脫敏 metadata ref。"},
{"check_id": "compose_stack_state_present", "instruction": "Compose stack / container state 只能收狀態摘要 ref不保存 raw docker ps dump。"},
{"check_id": "systemd_unit_state_present", "instruction": "systemd failed unit、restart policy 或 service degraded 需有摘要 ref。"},
{"check_id": "failed_unit_review_present", "instruction": "必須確認 failed unit 是否與事故、restart 或服務恢復相關。"},
{"check_id": "port_binding_state_present", "instruction": "必須確認 host port、container port、proxy、firewall / gateway 狀態是否一致。"},
{"check_id": "dependency_impact_present", "instruction": "必須列出上游、下游、資料庫、queue、registry、AI provider、public route 與 monitoring 影響。"},
{"check_id": "cold_start_sequence_present", "instruction": "必須提供 cold-start / recovery sequence ref不接受憑印象重啟。"},
{"check_id": "runner_repair_bot_contention_present", "instruction": "必須確認 runner、repair-bot、backup job、iptables / xtables 或 compose action 是否競爭。"},
{"check_id": "route_recovery_evidence_present", "instruction": "public / admin route 受影響時需有恢復 ref無影響也需說明不適用。"},
{"check_id": "agent_provider_health_present", "instruction": "AI provider、Ollama proxy、agent 或 webhook 受影響時需有健康 ref。"},
{"check_id": "monitoring_alert_ref_present", "instruction": "需列 monitoring / alert / incident ref不能只靠人工觀察。"},
{"check_id": "operator_notification_present", "instruction": "需提供已通知受影響產品、owner 或 Session 的脫敏 ref。"},
{"check_id": "cross_project_sync_present", "instruction": "若影響 AwoooP、IwoooS、agent-bounty、StockPlatform、公開網站或監控需有跨專案同步 ref。"},
{"check_id": "restoration_time_present", "instruction": "已恢復事故需提供恢復時間;未恢復需提供 still-degraded ref。"},
{"check_id": "rollback_owner_present", "instruction": "rollback owner 與 rollback plan 必須存在。"},
{"check_id": "postcheck_evidence_present", "instruction": "post-check evidence 必須覆蓋 service、route、agent、queue、monitoring 與 rollback stop condition。"},
{"check_id": "secret_or_raw_payload_absent", "instruction": "不得包含 secret、env dump、raw docker logs、raw systemd journal、private key 或 cookie。"},
{"check_id": "no_false_green_service_health", "instruction": "不得把服務 healthy、route 200、container up 或 dashboard up 當成 config / recovery 已驗收。"},
{"check_id": "no_runtime_authorization", "instruction": "驗收證據不等於允許 SSH、Docker、systemctl、repair-bot、Ansible、host write 或 route smoke。"},
{"check_id": "counts_transition_safe", "instruction": "只有 reviewer record 能更新 accepted count且不得同時開 runtime gate。"},
]
OUTCOME_LANES = [
{"lane_id": "waiting_change_evidence", "meaning": "尚未收到 host service 變更 / 事故證據;所有 accepted / runtime count 維持 0。"},
{"lane_id": "quarantine_secret_or_raw_payload", "meaning": "收到 secret、env dump、raw docker logs、raw journal、raw compose 或未脫敏 host config 時只能隔離。"},
{"lane_id": "reject_unattributed_restart", "meaning": "無 actor、無 owner、無 affected scope、無 rollback 或無 post-check 的 restart / reload / kill 不得驗收。"},
{"lane_id": "request_recovery_supplement", "meaning": "缺 before / after、daemon state、compose state、systemd state、dependency、route recovery 或 cross-project sync 時要求補件。"},
{"lane_id": "incident_backfill_required", "meaning": "涉及服務異常、靜默重啟、端口事故、cold-start recovery 或 daemon contention 時必須回補。"},
{"lane_id": "no_false_green_supplement_required", "meaning": "只用 route 200、container up、dashboard up 或服務 healthy 代表恢復時,要求補 no-false-green 證據。"},
{"lane_id": "ready_for_host_service_review", "meaning": "metadata 合格後,只能進 host service reviewer review。"},
{"lane_id": "owner_review_only_update", "meaning": "只允許更新 reviewer ledger不改 compose、systemd、repair-bot、Ansible 或 host。"},
{"lane_id": "waiting_maintenance_window", "meaning": "若未來要 restart / repair / apply仍需獨立維護窗口。"},
{"lane_id": "waiting_runtime_gate", "meaning": "即使 change evidence acceptedruntime gate 仍需獨立人工批准。"},
]
BLOCKED_ACTIONS = [
"ssh_read",
"ssh_write",
"live_host_read",
"docker_ps_live_read",
"docker_compose_up",
"docker_compose_down",
"docker_compose_pull",
"docker_restart",
"docker_kill",
"systemctl_restart",
"systemctl_reload",
"systemctl_kill",
"repair_bot_execute",
"ansible_apply",
"sudo_action",
"host_file_write",
"firewall_change",
"port_change",
"route_smoke",
"public_gateway_reload",
"nginx_reload",
"active_scan",
"secret_value_collection",
"raw_live_config_storage",
"raw_docker_log_storage",
"raw_journal_storage",
"raw_env_dump_storage",
"accept_restart_without_actor",
"accept_restart_without_window",
"accept_restart_without_rollback",
"accept_recovery_without_postcheck",
"accept_service_healthy_as_config_accepted",
"skip_dependency_map_review",
"skip_port_binding_review",
"skip_cold_start_sequence",
"hide_daemon_runner_contention",
"mark_change_evidence_accepted_without_reviewer_record",
"open_runtime_gate",
"add_action_button",
]
def git_short_sha(root: Path) -> str:
try:
result = subprocess.run(
["git", "rev-parse", "--short", "HEAD"],
cwd=root,
check=True,
capture_output=True,
text=True,
)
return result.stdout.strip()
except Exception:
return "unknown"
def load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def build_candidate(source: dict[str, Any]) -> dict[str, Any]:
surface_id = source["surface_id"]
return {
"change_evidence_candidate_id": f"host_service_change_evidence:{surface_id}",
"status": "waiting_change_evidence",
"source_acceptance_candidate_id": source["acceptance_candidate_id"],
"surface_id": surface_id,
"label": source["label"],
"expected_host_scope": source["expected_host_scope"],
"config_kind": source["config_kind"],
"service_scope": source["service_scope"],
"control_tier": source["control_tier"],
"write_capable_surface": source["write_capable_surface"],
"requires_live_evidence": source["requires_live_evidence"],
"change_or_incident_ref": None,
"actor_role_or_team": "pending_change_evidence",
"decision": "pending_change_evidence",
"decision_reason": "pending_change_evidence",
"affected_scope": "pending_change_evidence",
"boot_or_restart_window_ref": None,
"before_service_state_ref": None,
"after_service_state_ref": None,
"docker_daemon_state_ref": None,
"compose_stack_state_ref": None,
"systemd_unit_state_ref": None,
"failed_unit_review_ref": None,
"port_binding_state_ref": None,
"network_gateway_impact_ref": None,
"dependency_impact_ref": None,
"cold_start_sequence_ref": None,
"runner_or_repair_bot_contention_ref": None,
"backup_job_overlap_ref": None,
"public_route_recovery_refs": [],
"admin_route_recovery_refs": [],
"agent_provider_health_refs": [],
"monitoring_alert_refs": [],
"operator_notification_refs": [],
"cross_project_sync_ref": None,
"customer_impact_ref": None,
"incident_severity": "pending_change_evidence",
"restoration_time_ref": None,
"maintenance_window": "pending_change_evidence",
"rollback_owner": "pending_change_evidence",
"rollback_plan_ref": None,
"postcheck_evidence_refs": [],
"change_freeze_rule": "pending_change_evidence",
"reviewer_outcome": "waiting_change_evidence",
"followup_owner": "pending_change_evidence",
"change_evidence_fields": CHANGE_EVIDENCE_FIELDS,
"required_evidence_fields": REQUIRED_EVIDENCE_FIELDS,
"reviewer_checks": [item["check_id"] for item in REVIEWER_CHECKS],
"outcome_lanes": [item["lane_id"] for item in OUTCOME_LANES],
"blocked_actions": BLOCKED_ACTIONS,
"not_approval": True,
"change_evidence_received": False,
"change_evidence_accepted": False,
"change_evidence_rejected": False,
"change_evidence_quarantined": False,
"recovery_supplement_requested": False,
"actor_identified": False,
"boot_or_restart_window_accepted": False,
"before_service_state_accepted": False,
"after_service_state_accepted": False,
"docker_daemon_state_accepted": False,
"compose_stack_state_accepted": False,
"systemd_unit_state_accepted": False,
"failed_unit_review_accepted": False,
"port_binding_state_accepted": False,
"dependency_impact_accepted": False,
"cold_start_sequence_accepted": False,
"runner_or_repair_bot_contention_accepted": False,
"public_route_recovery_accepted": False,
"admin_route_recovery_accepted": False,
"agent_provider_health_accepted": False,
"monitoring_alert_accepted": False,
"operator_notification_accepted": False,
"cross_project_sync_accepted": False,
"restoration_time_accepted": False,
"rollback_owner_accepted": False,
"postcheck_evidence_accepted": False,
"host_write_authorized": False,
"ssh_read_authorized": False,
"ssh_write_authorized": False,
"live_host_read_authorized": False,
"docker_compose_action_authorized": False,
"docker_restart_authorized": False,
"systemctl_action_authorized": False,
"repair_bot_execution_authorized": False,
"ansible_apply_authorized": False,
"sudo_action_authorized": False,
"route_smoke_authorized": False,
"secret_value_collection_allowed": False,
"active_scan_authorized": False,
"runtime_gate": False,
"action_buttons_allowed": False,
}
def build_report(
root: Path,
owner_response_acceptance: dict[str, Any],
generated_at: str | None,
) -> dict[str, Any]:
report_time = generated_at or datetime.now(TAIPEI).isoformat(timespec="seconds")
sources = owner_response_acceptance.get("acceptance_candidates", [])
candidates = [build_candidate(item) for item in sources]
write_capable = [item for item in candidates if item["write_capable_surface"]]
live_evidence = [item for item in candidates if item["requires_live_evidence"]]
return {
"schema_version": "host_service_change_evidence_acceptance_v1",
"generated_at": report_time,
"git_commit": git_short_sha(root),
"source_owner_response_acceptance_schema_version": owner_response_acceptance.get("schema_version"),
"source_owner_response_acceptance_status": owner_response_acceptance.get("status"),
"status": "change_evidence_acceptance_ready_no_runtime_action",
"summary": {
"source_acceptance_candidate_count": len(sources),
"change_evidence_candidate_count": len(candidates),
"write_capable_change_evidence_candidate_count": len(write_capable),
"live_evidence_required_candidate_count": len(live_evidence),
"change_evidence_field_count": len(CHANGE_EVIDENCE_FIELDS),
"required_evidence_field_count": len(REQUIRED_EVIDENCE_FIELDS),
"reviewer_check_count": len(REVIEWER_CHECKS),
"outcome_lane_count": len(OUTCOME_LANES),
"blocked_action_count": len(BLOCKED_ACTIONS),
"change_evidence_received_count": 0,
"change_evidence_accepted_count": 0,
"change_evidence_rejected_count": 0,
"change_evidence_quarantined_count": 0,
"recovery_supplement_requested_count": 0,
"actor_identified_count": 0,
"boot_or_restart_window_accepted_count": 0,
"before_service_state_accepted_count": 0,
"after_service_state_accepted_count": 0,
"docker_daemon_state_accepted_count": 0,
"compose_stack_state_accepted_count": 0,
"systemd_unit_state_accepted_count": 0,
"failed_unit_review_accepted_count": 0,
"port_binding_state_accepted_count": 0,
"dependency_impact_accepted_count": 0,
"cold_start_sequence_accepted_count": 0,
"runner_or_repair_bot_contention_accepted_count": 0,
"public_route_recovery_accepted_count": 0,
"admin_route_recovery_accepted_count": 0,
"agent_provider_health_accepted_count": 0,
"monitoring_alert_accepted_count": 0,
"operator_notification_accepted_count": 0,
"cross_project_sync_accepted_count": 0,
"restoration_time_accepted_count": 0,
"rollback_owner_accepted_count": 0,
"postcheck_evidence_accepted_count": 0,
"host_write_authorized_count": 0,
"ssh_read_authorized_count": 0,
"ssh_write_authorized_count": 0,
"live_host_read_authorized_count": 0,
"docker_compose_action_authorized_count": 0,
"docker_restart_authorized_count": 0,
"systemctl_action_authorized_count": 0,
"repair_bot_execution_authorized_count": 0,
"ansible_apply_authorized_count": 0,
"sudo_action_authorized_count": 0,
"route_smoke_authorized_count": 0,
"secret_value_collection_allowed_count": 0,
"active_scan_authorized_count": 0,
"runtime_gate_count": 0,
"action_button_count": 0,
"coverage_percent_before_acceptance": 58,
"coverage_percent_after_acceptance": 62,
},
"execution_boundaries": {
"request_dispatch_authorized": False,
"change_evidence_received": False,
"change_evidence_accepted": False,
"live_host_read_authorized": False,
"host_write_authorized": False,
"ssh_read_authorized": False,
"ssh_write_authorized": False,
"docker_compose_action_authorized": False,
"docker_restart_authorized": False,
"systemctl_action_authorized": False,
"repair_bot_execution_authorized": False,
"ansible_apply_authorized": False,
"sudo_action_authorized": False,
"route_smoke_authorized": False,
"secret_value_collection_allowed": False,
"active_scan_authorized": False,
"runtime_execution_authorized": False,
"action_buttons_allowed": False,
"not_authorization": True,
},
"change_evidence_fields": CHANGE_EVIDENCE_FIELDS,
"required_evidence_fields": REQUIRED_EVIDENCE_FIELDS,
"reviewer_checks": REVIEWER_CHECKS,
"outcome_lanes": OUTCOME_LANES,
"blocked_actions": BLOCKED_ACTIONS,
"change_evidence_candidates": candidates,
"next_steps": [
"等待 owner 以脫敏 metadata ref 補 change / incident ref、actor、before / after state、Docker daemon、compose / systemd、failed unit、port binding、dependency、cold-start、route recovery、operator notification、rollback 與 post-check evidence。",
"收到證據後先做欄位完整性、敏感 payload 隔離與 execution request 拒收;不得直接 SSH、docker、systemctl、repair-bot、Ansible 或 route smoke。",
"即使 reviewer 接受 change evidence也只能形成 runtime approval package維護窗口、rollback、post-check、stop condition 與人工 runtime gate 需另行批准。",
],
}
def main() -> int:
parser = argparse.ArgumentParser(description="IwoooS host service change evidence acceptance 產生器")
parser.add_argument("--root", default=".", help="repo root")
parser.add_argument(
"--owner-response-report",
default="docs/security/host-service-owner-response-acceptance.snapshot.json",
help="host-service-owner-response-acceptance.py 輸出的 JSON",
)
parser.add_argument("--output", help="寫出 JSON 報告")
parser.add_argument("--generated-at", help="固定報告時間,供 committed snapshot 使用")
args = parser.parse_args()
root = Path(args.root).resolve()
owner_response_acceptance = load_json(root / args.owner_response_report)
report = build_report(root, owner_response_acceptance, args.generated_at)
payload = json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True)
if args.output:
output = Path(args.output)
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(payload + "\n", encoding="utf-8")
else:
print(payload)
summary = report["summary"]
print(
"HOST_SERVICE_CHANGE_EVIDENCE_ACCEPTANCE_OK "
f"candidates={summary['change_evidence_candidate_count']} "
f"write_capable={summary['write_capable_change_evidence_candidate_count']} "
f"checks={summary['reviewer_check_count']} "
f"lanes={summary['outcome_lane_count']} "
f"accepted={summary['change_evidence_accepted_count']} "
f"runtime_gate={summary['runtime_gate_count']}",
file=sys.stderr,
)
return 0
if __name__ == "__main__":
raise SystemExit(main())