fix(governance): redact legacy agent evidence display terms
All checks were successful
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Successful in 1m27s
CD Pipeline / build-and-deploy (push) Successful in 5m18s
CD Pipeline / post-deploy-checks (push) Successful in 18s

This commit is contained in:
Your Name
2026-06-13 04:46:14 +08:00
parent 047b6d2ea2
commit 1b5eb3c328
6 changed files with 153 additions and 29 deletions

View File

@@ -43,6 +43,7 @@ def load_latest_ai_agent_candidate_operation_dry_run_evidence(
_require_gate_requirements(payload, str(latest))
_require_operator_handoffs(payload, str(latest))
_require_redaction_contract(payload, str(latest))
_require_no_forbidden_display_terms(payload, str(latest))
_require_rollup_consistency(payload, str(latest))
return payload
@@ -250,6 +251,56 @@ def _require_redaction_contract(payload: dict[str, Any], label: str) -> None:
raise ValueError(f"{label}: display redaction fields must remain false: {unsafe}")
def _require_no_forbidden_display_terms(payload: dict[str, Any], label: str) -> None:
forbidden_terms = {
"工作視窗",
"對話內容",
"批准!繼續",
"In app browser",
"My request for Codex",
"browser_context",
"codex_user_message",
"prompt_text",
"raw payload",
"raw_prompt",
"private reasoning",
"private_reasoning",
"chain_of_thought",
"bot_token",
"authorization header",
"authorization_header",
"secret value",
"secret_value",
"raw tool output",
"raw_tool_output",
"raw Telegram payload",
"raw_telegram_payload",
"work window transcript",
"work_window_transcript",
"internal collaboration transcript",
}
hits: list[str] = []
def walk(value: Any, path: str) -> None:
if isinstance(value, dict):
for key, nested in value.items():
walk(nested, f"{path}.{key}" if path else str(key))
return
if isinstance(value, list):
for index, nested in enumerate(value):
walk(nested, f"{path}[{index}]")
return
if isinstance(value, str):
matched = sorted(term for term in forbidden_terms if term in value)
if matched:
hits.append(f"{path}: {', '.join(matched)}")
walk(payload, "")
if hits:
raise ValueError(f"{label}: forbidden display terms found: {hits}")
def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None:
rollups = payload.get("rollups") or {}
truth = payload.get("dry_run_truth") or {}

View File

@@ -40,6 +40,7 @@ def load_latest_ai_agent_runtime_verifier_evidence_review(
_require_schema(payload, str(latest))
_require_runtime_boundaries(payload, str(latest))
_require_review_contract(payload, str(latest))
_require_no_forbidden_display_terms(payload, str(latest))
_require_rollup_consistency(payload, str(latest))
return payload
@@ -115,6 +116,58 @@ def _require_review_contract(payload: dict[str, Any], label: str) -> None:
raise ValueError(f"{label}: {flag} must remain false")
def _require_no_forbidden_display_terms(payload: dict[str, Any], label: str) -> None:
forbidden_terms = {
"工作視窗",
"對話內容",
"批准!繼續",
"In app browser",
"My request for Codex",
"browser_context",
"codex_user_message",
"prompt_text",
"raw payload",
"raw_prompt",
"private reasoning",
"private_reasoning",
"chain_of_thought",
"bot_token",
"authorization header",
"authorization_header",
"secret value",
"secret_value",
"raw tool output",
"raw_tool_output",
"raw Telegram payload",
"raw_telegram_payload",
"unredacted canonical payload",
"unredacted_canonical_payload",
"work window transcript",
"work_window_transcript",
"internal collaboration transcript",
}
hits: list[str] = []
def walk(value: Any, path: str) -> None:
if isinstance(value, dict):
for key, nested in value.items():
walk(nested, f"{path}.{key}" if path else str(key))
return
if isinstance(value, list):
for index, nested in enumerate(value):
walk(nested, f"{path}[{index}]")
return
if isinstance(value, str):
matched = sorted(term for term in forbidden_terms if term in value)
if matched:
hits.append(f"{path}: {', '.join(matched)}")
walk(payload, "")
if hits:
raise ValueError(f"{label}: forbidden display terms found: {hits}")
def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None:
rollups = payload.get("rollups") or {}
checks = payload.get("evidence_checks") or []

View File

@@ -120,6 +120,16 @@ def test_rejects_handoff_runtime_action(tmp_path):
load_latest_ai_agent_candidate_operation_dry_run_evidence(tmp_path)
def test_rejects_forbidden_display_terms(tmp_path):
data = load_latest_ai_agent_candidate_operation_dry_run_evidence()
bad = copy.deepcopy(data)
bad["verifier_plans"][0]["expected_signal"] = "raw payload / private reasoning 不得顯示"
_write_snapshot(tmp_path, bad)
with pytest.raises(ValueError, match="forbidden display terms"):
load_latest_ai_agent_candidate_operation_dry_run_evidence(tmp_path)
def test_rejects_rollup_mismatch(tmp_path):
data = load_latest_ai_agent_candidate_operation_dry_run_evidence()
bad = copy.deepcopy(data)

View File

@@ -66,6 +66,16 @@ def test_rejects_missing_required_evidence(tmp_path):
load_latest_ai_agent_runtime_verifier_evidence_review(tmp_path)
def test_rejects_forbidden_display_terms(tmp_path):
data = load_latest_ai_agent_runtime_verifier_evidence_review()
bad = copy.deepcopy(data)
bad["evidence_checks"][0]["review_question"] = "authorization header / secret value 不得顯示"
_write_snapshot(tmp_path, bad)
with pytest.raises(ValueError, match="forbidden display terms"):
load_latest_ai_agent_runtime_verifier_evidence_review(tmp_path)
def test_rejects_rollup_mismatch(tmp_path):
data = load_latest_ai_agent_runtime_verifier_evidence_review()
bad = copy.deepcopy(data)

View File

@@ -325,7 +325,7 @@
"telegram_send_count": 0,
"secret_value_read_count": 0,
"destructive_action_count": 0,
"blocked_actions": ["secret value read", "paid API call", "authorization header read"],
"blocked_actions": ["機密明文讀取", "paid API call", "授權標頭讀取"],
"required_human_decision": "只能審查 metadata、secret name、費用批准與供應商 fallback 理由;明文與付費呼叫需另行批准。",
"verifier_plan_id": "verifier_redaction_and_cost_boundary",
"rollback_or_noop_plan": "no-op不提供 runtime escalation。",
@@ -361,7 +361,7 @@
"display_name": "redacted evidence hash verifier",
"owner_agent": "hermes",
"verifier_scope": "比對 input / output hash、source refs 與 redaction 欄位。",
"expected_signal": "hash 存在、source ref 可追溯、未顯示 raw payload。",
"expected_signal": "hash 存在、source ref 可追溯、未顯示原始載荷。",
"failure_lane": "退回 evidence collection不進 queue 或 runtime。",
"live_readback_enabled": false,
"writes_result": false,
@@ -409,7 +409,7 @@
"display_name": "live readback allow-list verifier",
"owner_agent": "nemotron",
"verifier_scope": "先審查允許讀回的欄位、redaction 與 failure receipt template。",
"expected_signal": "allow-list 完整、secret / raw payload / private reasoning 都不顯示。",
"expected_signal": "allow-list 完整、機密明文、原始載荷與私有推理內容都不顯示。",
"failure_lane": "阻擋 live readback產生人工檢查清單。",
"live_readback_enabled": false,
"writes_result": false,
@@ -469,7 +469,7 @@
"gate_id": "secret_or_paid_provider_gate",
"display_name": "secret / paid provider gate",
"required_evidence": ["metadata-only scope", "cost approval", "redaction policy", "owner response"],
"missing_or_blocked": ["secret value read is prohibited", "paid provider expansion approval"],
"missing_or_blocked": ["機密明文讀取仍禁止", "paid provider expansion approval"],
"opens_live_execution": false
},
{
@@ -548,15 +548,15 @@
"next_gate"
],
"blocked_display_fields": [
"raw_prompt",
"chain_of_thought",
"telegram_chat_id",
"telegram_message_id",
"bot_token",
"authorization_header",
"secret_value",
"raw_tool_output",
"work_window_transcript"
"未脫敏提示內容",
"推理鏈內容",
"Telegram chat id",
"Telegram message id",
"Bot token 識別欄位",
"授權標頭",
"機密明文",
"原始工具輸出",
"內部協作逐字稿"
]
},
"rollups": {

View File

@@ -39,12 +39,12 @@
"redacted_evidence_refs"
],
"forbidden_evidence": [
"secret_value",
"authorization_header",
"raw_tool_output",
"private_reasoning",
"raw_telegram_payload",
"unredacted_canonical_payload"
"機密明文",
"授權標頭",
"原始工具輸出",
"私有推理內容",
"原始 Telegram 載荷",
"未脫敏 canonical 載荷"
],
"operator_meaning": "這份 review package 用來審查 runtime verifier implementation 是否可以進下一階段;它不是 verifier 實作,也不是 live write 授權。",
"approval_policy": "只有 OpenClaw owner 明確批准、Hermes redaction review 通過、NemoTron replay regression fixture 完成後,下一階段才能討論 runtime verifier implementation。",
@@ -84,7 +84,7 @@
"status": "approval_required",
"owner_agent": "openclaw",
"required_evidence": "Telegram failure receipt 只允許摘要、風險、批准連結、evidence id 與下一步。",
"review_question": "失敗通知是否不含 raw payload、private reasoning、authorization header 或 secret value",
"review_question": "失敗通知是否不含原始載荷、私有推理內容、授權標頭或機密明文",
"blocked_runtime_action": "telegram_send_or_receipt_write"
},
{
@@ -113,7 +113,7 @@
"status": "contract_ready",
"owner_agent": "hermes",
"trigger": "new evidence refs attached",
"operator_instruction": "檢查 evidence refs 僅含脫敏 id / hash / summary不含 secret、raw tool output 或完整對話。",
"operator_instruction": "檢查 evidence refs 僅含脫敏 id / hash / summary不含機密明文、原始工具輸出或未脫敏對話。",
"blocked_runtime_action": "canonical_readback_query_execution"
},
{
@@ -151,7 +151,7 @@
"action_type": "collect_evidence",
"status": "ready_for_owner",
"owner_agent": "hermes",
"operator_instruction": "補 evidence id、hash、summary 與 owner refs不得補 secret value 或 raw payload。",
"operator_instruction": "補 evidence id、hash、summary 與 owner refs不得補機密明文或原始載荷。",
"blocked_runtime_action": "canonical_readback_query_execution"
},
{
@@ -203,14 +203,14 @@
"failure policy"
],
"forbidden_frontend_content": [
"secret value",
"authorization header",
"raw tool output",
"private reasoning",
"raw Telegram payload",
"unredacted canonical payload"
"機密明文",
"授權標頭",
"原始工具輸出",
"私有推理內容",
"原始 Telegram 載荷",
"未脫敏 canonical 載荷"
],
"frontend_display_policy": "治理頁只顯示 evidence check、implementation lane、operator action、blocked runtime action 與 policy不顯示 secret、authorization header、raw tool output、private reasoning、raw Telegram payload 或未脫敏 canonical payload。"
"frontend_display_policy": "治理頁只顯示 evidence check、implementation lane、operator action、blocked runtime action 與 policy不顯示機密明文、授權標頭、原始工具輸出、私有推理內容、原始 Telegram 載荷或未脫敏 canonical 載荷。"
},
"rollups": {
"evidence_check_count": 5,