From 1b5eb3c328f2682c33a7f72ca25b05a8edfdb456 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 13 Jun 2026 04:46:14 +0800 Subject: [PATCH] fix(governance): redact legacy agent evidence display terms --- ...nt_candidate_operation_dry_run_evidence.py | 51 ++++++++++++++++++ ..._agent_runtime_verifier_evidence_review.py | 53 +++++++++++++++++++ ...nt_candidate_operation_dry_run_evidence.py | 10 ++++ ..._agent_runtime_verifier_evidence_review.py | 10 ++++ ...operation_dry_run_evidence_2026-06-12.json | 26 ++++----- ...e_verifier_evidence_review_2026-06-12.json | 32 +++++------ 6 files changed, 153 insertions(+), 29 deletions(-) diff --git a/apps/api/src/services/ai_agent_candidate_operation_dry_run_evidence.py b/apps/api/src/services/ai_agent_candidate_operation_dry_run_evidence.py index 6c376a07..61a8a9bd 100644 --- a/apps/api/src/services/ai_agent_candidate_operation_dry_run_evidence.py +++ b/apps/api/src/services/ai_agent_candidate_operation_dry_run_evidence.py @@ -43,6 +43,7 @@ def load_latest_ai_agent_candidate_operation_dry_run_evidence( _require_gate_requirements(payload, str(latest)) _require_operator_handoffs(payload, str(latest)) _require_redaction_contract(payload, str(latest)) + _require_no_forbidden_display_terms(payload, str(latest)) _require_rollup_consistency(payload, str(latest)) return payload @@ -250,6 +251,56 @@ def _require_redaction_contract(payload: dict[str, Any], label: str) -> None: raise ValueError(f"{label}: display redaction fields must remain false: {unsafe}") +def _require_no_forbidden_display_terms(payload: dict[str, Any], label: str) -> None: + forbidden_terms = { + "工作視窗", + "對話內容", + "批准!繼續", + "In app browser", + "My request for Codex", + "browser_context", + "codex_user_message", + "prompt_text", + "raw payload", + "raw_prompt", + "private reasoning", + "private_reasoning", + "chain_of_thought", + "bot_token", + "authorization header", + "authorization_header", + "secret value", + "secret_value", + "raw tool output", + "raw_tool_output", + "raw Telegram payload", + "raw_telegram_payload", + "work window transcript", + "work_window_transcript", + "internal collaboration transcript", + } + + hits: list[str] = [] + + def walk(value: Any, path: str) -> None: + if isinstance(value, dict): + for key, nested in value.items(): + walk(nested, f"{path}.{key}" if path else str(key)) + return + if isinstance(value, list): + for index, nested in enumerate(value): + walk(nested, f"{path}[{index}]") + return + if isinstance(value, str): + matched = sorted(term for term in forbidden_terms if term in value) + if matched: + hits.append(f"{path}: {', '.join(matched)}") + + walk(payload, "") + if hits: + raise ValueError(f"{label}: forbidden display terms found: {hits}") + + def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None: rollups = payload.get("rollups") or {} truth = payload.get("dry_run_truth") or {} diff --git a/apps/api/src/services/ai_agent_runtime_verifier_evidence_review.py b/apps/api/src/services/ai_agent_runtime_verifier_evidence_review.py index 633a5076..c8757c1e 100644 --- a/apps/api/src/services/ai_agent_runtime_verifier_evidence_review.py +++ b/apps/api/src/services/ai_agent_runtime_verifier_evidence_review.py @@ -40,6 +40,7 @@ def load_latest_ai_agent_runtime_verifier_evidence_review( _require_schema(payload, str(latest)) _require_runtime_boundaries(payload, str(latest)) _require_review_contract(payload, str(latest)) + _require_no_forbidden_display_terms(payload, str(latest)) _require_rollup_consistency(payload, str(latest)) return payload @@ -115,6 +116,58 @@ def _require_review_contract(payload: dict[str, Any], label: str) -> None: raise ValueError(f"{label}: {flag} must remain false") +def _require_no_forbidden_display_terms(payload: dict[str, Any], label: str) -> None: + forbidden_terms = { + "工作視窗", + "對話內容", + "批准!繼續", + "In app browser", + "My request for Codex", + "browser_context", + "codex_user_message", + "prompt_text", + "raw payload", + "raw_prompt", + "private reasoning", + "private_reasoning", + "chain_of_thought", + "bot_token", + "authorization header", + "authorization_header", + "secret value", + "secret_value", + "raw tool output", + "raw_tool_output", + "raw Telegram payload", + "raw_telegram_payload", + "unredacted canonical payload", + "unredacted_canonical_payload", + "work window transcript", + "work_window_transcript", + "internal collaboration transcript", + } + + hits: list[str] = [] + + def walk(value: Any, path: str) -> None: + if isinstance(value, dict): + for key, nested in value.items(): + walk(nested, f"{path}.{key}" if path else str(key)) + return + if isinstance(value, list): + for index, nested in enumerate(value): + walk(nested, f"{path}[{index}]") + return + if isinstance(value, str): + matched = sorted(term for term in forbidden_terms if term in value) + if matched: + hits.append(f"{path}: {', '.join(matched)}") + + walk(payload, "") + if hits: + raise ValueError(f"{label}: forbidden display terms found: {hits}") + + def _require_rollup_consistency(payload: dict[str, Any], label: str) -> None: rollups = payload.get("rollups") or {} checks = payload.get("evidence_checks") or [] diff --git a/apps/api/tests/test_ai_agent_candidate_operation_dry_run_evidence.py b/apps/api/tests/test_ai_agent_candidate_operation_dry_run_evidence.py index 220f2e9b..6dd958c1 100644 --- a/apps/api/tests/test_ai_agent_candidate_operation_dry_run_evidence.py +++ b/apps/api/tests/test_ai_agent_candidate_operation_dry_run_evidence.py @@ -120,6 +120,16 @@ def test_rejects_handoff_runtime_action(tmp_path): load_latest_ai_agent_candidate_operation_dry_run_evidence(tmp_path) +def test_rejects_forbidden_display_terms(tmp_path): + data = load_latest_ai_agent_candidate_operation_dry_run_evidence() + bad = copy.deepcopy(data) + bad["verifier_plans"][0]["expected_signal"] = "raw payload / private reasoning 不得顯示" + _write_snapshot(tmp_path, bad) + + with pytest.raises(ValueError, match="forbidden display terms"): + load_latest_ai_agent_candidate_operation_dry_run_evidence(tmp_path) + + def test_rejects_rollup_mismatch(tmp_path): data = load_latest_ai_agent_candidate_operation_dry_run_evidence() bad = copy.deepcopy(data) diff --git a/apps/api/tests/test_ai_agent_runtime_verifier_evidence_review.py b/apps/api/tests/test_ai_agent_runtime_verifier_evidence_review.py index 5ba884d7..250239c2 100644 --- a/apps/api/tests/test_ai_agent_runtime_verifier_evidence_review.py +++ b/apps/api/tests/test_ai_agent_runtime_verifier_evidence_review.py @@ -66,6 +66,16 @@ def test_rejects_missing_required_evidence(tmp_path): load_latest_ai_agent_runtime_verifier_evidence_review(tmp_path) +def test_rejects_forbidden_display_terms(tmp_path): + data = load_latest_ai_agent_runtime_verifier_evidence_review() + bad = copy.deepcopy(data) + bad["evidence_checks"][0]["review_question"] = "authorization header / secret value 不得顯示" + _write_snapshot(tmp_path, bad) + + with pytest.raises(ValueError, match="forbidden display terms"): + load_latest_ai_agent_runtime_verifier_evidence_review(tmp_path) + + def test_rejects_rollup_mismatch(tmp_path): data = load_latest_ai_agent_runtime_verifier_evidence_review() bad = copy.deepcopy(data) diff --git a/docs/evaluations/ai_agent_candidate_operation_dry_run_evidence_2026-06-12.json b/docs/evaluations/ai_agent_candidate_operation_dry_run_evidence_2026-06-12.json index 3cce3e87..8be73dae 100644 --- a/docs/evaluations/ai_agent_candidate_operation_dry_run_evidence_2026-06-12.json +++ b/docs/evaluations/ai_agent_candidate_operation_dry_run_evidence_2026-06-12.json @@ -325,7 +325,7 @@ "telegram_send_count": 0, "secret_value_read_count": 0, "destructive_action_count": 0, - "blocked_actions": ["secret value read", "paid API call", "authorization header read"], + "blocked_actions": ["機密明文讀取", "paid API call", "授權標頭讀取"], "required_human_decision": "只能審查 metadata、secret name、費用批准與供應商 fallback 理由;明文與付費呼叫需另行批准。", "verifier_plan_id": "verifier_redaction_and_cost_boundary", "rollback_or_noop_plan": "no-op;不提供 runtime escalation。", @@ -361,7 +361,7 @@ "display_name": "redacted evidence hash verifier", "owner_agent": "hermes", "verifier_scope": "比對 input / output hash、source refs 與 redaction 欄位。", - "expected_signal": "hash 存在、source ref 可追溯、未顯示 raw payload。", + "expected_signal": "hash 存在、source ref 可追溯、未顯示原始載荷。", "failure_lane": "退回 evidence collection,不進 queue 或 runtime。", "live_readback_enabled": false, "writes_result": false, @@ -409,7 +409,7 @@ "display_name": "live readback allow-list verifier", "owner_agent": "nemotron", "verifier_scope": "先審查允許讀回的欄位、redaction 與 failure receipt template。", - "expected_signal": "allow-list 完整、secret / raw payload / private reasoning 都不顯示。", + "expected_signal": "allow-list 完整、機密明文、原始載荷與私有推理內容都不顯示。", "failure_lane": "阻擋 live readback,產生人工檢查清單。", "live_readback_enabled": false, "writes_result": false, @@ -469,7 +469,7 @@ "gate_id": "secret_or_paid_provider_gate", "display_name": "secret / paid provider gate", "required_evidence": ["metadata-only scope", "cost approval", "redaction policy", "owner response"], - "missing_or_blocked": ["secret value read is prohibited", "paid provider expansion approval"], + "missing_or_blocked": ["機密明文讀取仍禁止", "paid provider expansion approval"], "opens_live_execution": false }, { @@ -548,15 +548,15 @@ "next_gate" ], "blocked_display_fields": [ - "raw_prompt", - "chain_of_thought", - "telegram_chat_id", - "telegram_message_id", - "bot_token", - "authorization_header", - "secret_value", - "raw_tool_output", - "work_window_transcript" + "未脫敏提示內容", + "推理鏈內容", + "Telegram chat id", + "Telegram message id", + "Bot token 識別欄位", + "授權標頭", + "機密明文", + "原始工具輸出", + "內部協作逐字稿" ] }, "rollups": { diff --git a/docs/evaluations/ai_agent_runtime_verifier_evidence_review_2026-06-12.json b/docs/evaluations/ai_agent_runtime_verifier_evidence_review_2026-06-12.json index cf82c85a..c4a48d13 100644 --- a/docs/evaluations/ai_agent_runtime_verifier_evidence_review_2026-06-12.json +++ b/docs/evaluations/ai_agent_runtime_verifier_evidence_review_2026-06-12.json @@ -39,12 +39,12 @@ "redacted_evidence_refs" ], "forbidden_evidence": [ - "secret_value", - "authorization_header", - "raw_tool_output", - "private_reasoning", - "raw_telegram_payload", - "unredacted_canonical_payload" + "機密明文", + "授權標頭", + "原始工具輸出", + "私有推理內容", + "原始 Telegram 載荷", + "未脫敏 canonical 載荷" ], "operator_meaning": "這份 review package 用來審查 runtime verifier implementation 是否可以進下一階段;它不是 verifier 實作,也不是 live write 授權。", "approval_policy": "只有 OpenClaw owner 明確批准、Hermes redaction review 通過、NemoTron replay regression fixture 完成後,下一階段才能討論 runtime verifier implementation。", @@ -84,7 +84,7 @@ "status": "approval_required", "owner_agent": "openclaw", "required_evidence": "Telegram failure receipt 只允許摘要、風險、批准連結、evidence id 與下一步。", - "review_question": "失敗通知是否不含 raw payload、private reasoning、authorization header 或 secret value?", + "review_question": "失敗通知是否不含原始載荷、私有推理內容、授權標頭或機密明文?", "blocked_runtime_action": "telegram_send_or_receipt_write" }, { @@ -113,7 +113,7 @@ "status": "contract_ready", "owner_agent": "hermes", "trigger": "new evidence refs attached", - "operator_instruction": "檢查 evidence refs 僅含脫敏 id / hash / summary,不含 secret、raw tool output 或完整對話。", + "operator_instruction": "檢查 evidence refs 僅含脫敏 id / hash / summary,不含機密明文、原始工具輸出或未脫敏對話。", "blocked_runtime_action": "canonical_readback_query_execution" }, { @@ -151,7 +151,7 @@ "action_type": "collect_evidence", "status": "ready_for_owner", "owner_agent": "hermes", - "operator_instruction": "補 evidence id、hash、summary 與 owner refs;不得補 secret value 或 raw payload。", + "operator_instruction": "補 evidence id、hash、summary 與 owner refs;不得補機密明文或原始載荷。", "blocked_runtime_action": "canonical_readback_query_execution" }, { @@ -203,14 +203,14 @@ "failure policy" ], "forbidden_frontend_content": [ - "secret value", - "authorization header", - "raw tool output", - "private reasoning", - "raw Telegram payload", - "unredacted canonical payload" + "機密明文", + "授權標頭", + "原始工具輸出", + "私有推理內容", + "原始 Telegram 載荷", + "未脫敏 canonical 載荷" ], - "frontend_display_policy": "治理頁只顯示 evidence check、implementation lane、operator action、blocked runtime action 與 policy;不顯示 secret、authorization header、raw tool output、private reasoning、raw Telegram payload 或未脫敏 canonical payload。" + "frontend_display_policy": "治理頁只顯示 evidence check、implementation lane、operator action、blocked runtime action 與 policy;不顯示機密明文、授權標頭、原始工具輸出、私有推理內容、原始 Telegram 載荷或未脫敏 canonical 載荷。" }, "rollups": { "evidence_check_count": 5,