feat(governance): attach km archive history to dedupe groups

2026-05-20 10:20:01 +08:00
parent 9b0f68f6c4
commit edb6daef88
6 changed files with 255 additions and 40 deletions
--- a/apps/api/src/models/governance.py
+++ b/apps/api/src/models/governance.py
@@ -138,6 +138,7 @@ class KnowledgeReviewDraftDedupeGroup(BaseModel):
    owner_action: str
    writes_on_read: bool = False
    can_archive_without_owner_approval: bool = False
+    archive_history: list[DispatchItem] = Field(default_factory=list)


 class KnowledgeReviewDraftDedupeResponse(BaseModel):
--- a/apps/api/src/services/governance_query_service.py
+++ b/apps/api/src/services/governance_query_service.py
@@ -394,43 +394,7 @@ async def _query_dispatch_table(
    items: list[DispatchItem] = []
    for row in page_rows:
        decision_ctx: dict = (row.decision_context or {}) if hasattr(row, "decision_context") else {}
-        proposed_action = _extract_proposed_action(decision_ctx)
-
-        # playbook_trust: Track D 完成後改為 JOIN playbooks 表取 trust_score
-        # 現階段從 decision_context 取 mock 值
-        playbook_trust_raw = decision_ctx.get("playbook_trust")
-        try:
-            playbook_trust = float(playbook_trust_raw) if playbook_trust_raw is not None else None
-        except (TypeError, ValueError):
-            playbook_trust = None
-
-        items.append(DispatchItem(
-            id=str(row.id),
-            governance_event_id=str(row.governance_event_id),
-            event_type=str(row.event_type),
-            dispatch_status=str(row.dispatch_status),
-            executor_type=str(row.executor_type) if row.executor_type else None,
-            proposed_action=proposed_action,
-            playbook_id=str(row.playbook_id) if row.playbook_id else None,
-            playbook_trust=playbook_trust,
-            created_at=row.created_at,
-            dispatched_at=row.dispatched_at,
-            started_at=row.started_at,
-            completed_at=row.completed_at,
-            operator_note=row.operator_note,
-            decision_path=_extract_decision_path(decision_ctx),
-            workflow_stage=_extract_workflow_stage(decision_ctx, str(row.dispatch_status)),
-            workflow_steps=_extract_workflow_steps(decision_ctx),
-            next_action=_extract_next_action(decision_ctx),
-            lead_agent=_extract_lead_agent(decision_ctx),
-            support_agents=_extract_support_agents(decision_ctx),
-            human_owner=_extract_human_owner(decision_ctx),
-            kb_draft_entry_id=_extract_kb_draft_entry_id(decision_ctx),
-            worker_status=_extract_worker_status(decision_ctx),
-            dry_run_plan_fingerprint=_extract_dry_run_plan_fingerprint(decision_ctx),
-            archived_count=_extract_archived_count(decision_ctx),
-            stale_ratio_snapshot=_extract_stale_ratio_snapshot(decision_ctx),
-        ))
+        items.append(_to_dispatch_item(row, decision_ctx))

    return GovernanceQueueResponse(
        items=items,
@@ -441,6 +405,47 @@ async def _query_dispatch_table(
    )


+def _to_dispatch_item(row: Any, decision_ctx: dict) -> DispatchItem:
+    """把 governance_remediation_dispatch SQL row 轉成 Work Items read model。"""
+    proposed_action = _extract_proposed_action(decision_ctx)
+
+    # playbook_trust: Track D 完成後改為 JOIN playbooks 表取 trust_score
+    # 現階段從 decision_context 取 mock 值
+    playbook_trust_raw = decision_ctx.get("playbook_trust")
+    try:
+        playbook_trust = float(playbook_trust_raw) if playbook_trust_raw is not None else None
+    except (TypeError, ValueError):
+        playbook_trust = None
+
+    return DispatchItem(
+        id=str(row.id),
+        governance_event_id=str(row.governance_event_id),
+        event_type=str(row.event_type),
+        dispatch_status=str(row.dispatch_status),
+        executor_type=str(row.executor_type) if row.executor_type else None,
+        proposed_action=proposed_action,
+        playbook_id=str(row.playbook_id) if row.playbook_id else None,
+        playbook_trust=playbook_trust,
+        created_at=row.created_at,
+        dispatched_at=row.dispatched_at,
+        started_at=row.started_at,
+        completed_at=row.completed_at,
+        operator_note=row.operator_note,
+        decision_path=_extract_decision_path(decision_ctx),
+        workflow_stage=_extract_workflow_stage(decision_ctx, str(row.dispatch_status)),
+        workflow_steps=_extract_workflow_steps(decision_ctx),
+        next_action=_extract_next_action(decision_ctx),
+        lead_agent=_extract_lead_agent(decision_ctx),
+        support_agents=_extract_support_agents(decision_ctx),
+        human_owner=_extract_human_owner(decision_ctx),
+        kb_draft_entry_id=_extract_kb_draft_entry_id(decision_ctx),
+        worker_status=_extract_worker_status(decision_ctx),
+        dry_run_plan_fingerprint=_extract_dry_run_plan_fingerprint(decision_ctx),
+        archived_count=_extract_archived_count(decision_ctx),
+        stale_ratio_snapshot=_extract_stale_ratio_snapshot(decision_ctx),
+    )
+
+
 def _extract_proposed_action(decision_ctx: dict) -> str:
    """
    從 decision_context JSONB 抽取 proposed_action，≤120 字。
@@ -626,12 +631,14 @@ async def query_km_review_draft_dedupe(
 ) -> KnowledgeReviewDraftDedupeResponse:
    """產生 Hermes KM healthcheck review drafts 的 read-only 去重計畫。"""
    rows = await _load_km_healthcheck_review_drafts(limit=limit)
-    preferred = await _load_preferred_km_draft_ids_by_event([
+    event_ids = [
        event_id
        for row in rows
        if (event_id := _extract_governance_event_id_from_tags(row.get("tags")))
-    ])
-    groups = _build_km_review_draft_dedupe_groups(rows, preferred)
+    ]
+    preferred = await _load_preferred_km_draft_ids_by_event(event_ids)
+    archive_history = await _load_km_archive_history_by_event(event_ids)
+    groups = _build_km_review_draft_dedupe_groups(rows, preferred, archive_history)

    return KnowledgeReviewDraftDedupeResponse(
        total_review_drafts=len(rows),
@@ -720,6 +727,65 @@ async def _load_preferred_km_draft_ids_by_event(
    return preferred


+async def _load_km_archive_history_by_event(
+    event_ids: list[str],
+) -> dict[str, list[DispatchItem]]:
+    """讀取 KM duplicate archive / stale ratio recheck 的 terminal audit trail。"""
+    if not event_ids:
+        return {}
+
+    unique_event_ids = list(dict.fromkeys(event_ids))
+    sql = text("""
+        SELECT
+            d.id,
+            d.governance_event_id,
+            e.event_type,
+            d.dispatch_status,
+            d.executor_type,
+            d.decision_context,
+            d.playbook_id,
+            d.dispatched_at AS created_at,
+            d.dispatched_at,
+            d.started_at,
+            d.completed_at,
+            NULL::text AS operator_note
+        FROM governance_remediation_dispatch d
+        JOIN ai_governance_events e ON e.id = d.governance_event_id
+        WHERE d.governance_event_id IN :event_ids
+          AND d.executor_type IN (
+            'hermes_km_review_dedupe_owner_archive',
+            'hermes_km_stale_ratio_recheck'
+          )
+        ORDER BY
+            d.governance_event_id,
+            d.completed_at DESC NULLS LAST,
+            d.started_at DESC NULLS LAST,
+            d.dispatched_at DESC
+    """).bindparams(bindparam("event_ids", expanding=True))
+
+    try:
+        async with get_db_context() as db:
+            result = await db.execute(sql, {"event_ids": unique_event_ids})
+            rows = result.fetchall()
+    except ProgrammingError as exc:
+        logger.warning(
+            "km_review_dedupe_archive_history_table_not_ready",
+            error=str(exc),
+        )
+        return {}
+
+    history: dict[str, list[DispatchItem]] = {}
+    for row in rows:
+        event_id = str(row.governance_event_id)
+        bucket = history.setdefault(event_id, [])
+        if len(bucket) >= 3:
+            continue
+        decision_ctx: dict = (row.decision_context or {}) if hasattr(row, "decision_context") else {}
+        bucket.append(_to_dispatch_item(row, decision_ctx))
+
+    return history
+
+
 def _extract_governance_event_id_from_tags(tags: Any) -> str | None:
    if not isinstance(tags, list):
        return None
@@ -734,9 +800,11 @@ def _extract_governance_event_id_from_tags(tags: Any) -> str | None:
 def _build_km_review_draft_dedupe_groups(
    rows: list[dict[str, Any]],
    preferred_draft_ids_by_event: dict[str, str] | None = None,
+    archive_history_by_event: dict[str, list[DispatchItem]] | None = None,
 ) -> list[KnowledgeReviewDraftDedupeGroup]:
    """把 KM review drafts 依 governance_event tag 分組並產生 owner action。"""
    preferred_draft_ids_by_event = preferred_draft_ids_by_event or {}
+    archive_history_by_event = archive_history_by_event or {}
    grouped: dict[str, list[dict[str, Any]]] = {}
    for row in rows:
        event_id = _extract_governance_event_id_from_tags(row.get("tags"))
@@ -779,6 +847,7 @@ def _build_km_review_draft_dedupe_groups(
            owner_action="review_canonical_and_archive_duplicate_drafts",
            writes_on_read=False,
            can_archive_without_owner_approval=False,
+            archive_history=archive_history_by_event.get(event_id, []),
        ))

    return sorted(