feat(governance): trace km stale ratio rechecks

2026-05-20 00:52:14 +08:00
parent 5ac315c119
commit d283e65340
8 changed files with 416 additions and 7 deletions
--- a/apps/api/src/models/governance.py
+++ b/apps/api/src/models/governance.py
@@ -154,6 +154,14 @@ class KnowledgeReviewDraftArchiveRequest(BaseModel):
    dry_run: bool = False


+class KnowledgeReviewDraftStaleRatioSnapshot(BaseModel):
+    stale_count: int
+    total_count: int
+    stale_ratio: float
+    threshold: float
+    stale_days: int
+
+
 class KnowledgeReviewDraftArchiveResponse(BaseModel):
    schema_version: str = "km_review_draft_archive_v1"
    governance_event_id: str
@@ -169,6 +177,14 @@ class KnowledgeReviewDraftArchiveResponse(BaseModel):
    writes_km: bool
    writes_governance_audit: bool
    audit_dispatch_id: str | None = None
+    stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None
+    stale_ratio_recheck_status: Literal[
+        "dry_run",
+        "completed",
+        "already_active",
+        "not_requested",
+    ] = "not_requested"
+    stale_ratio_recheck_dispatch_id: str | None = None
    next_action: str = "stale_ratio_recheck"
    generated_at: datetime

--- a/apps/api/src/services/governance_km_review_service.py
+++ b/apps/api/src/services/governance_km_review_service.py
@@ -13,10 +13,12 @@ Owner-approved operations for Hermes KM healthcheck review drafts.

 from __future__ import annotations

+from datetime import timedelta
 from typing import Any, Literal

 import structlog
-from sqlalchemy import select
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession

 from src.db.base import get_db_context
 from src.db.models import (
@@ -29,8 +31,10 @@ from src.models.governance import (
    KnowledgeReviewDraftArchiveRequest,
    KnowledgeReviewDraftArchiveResponse,
    KnowledgeReviewDraftDedupeGroup,
+    KnowledgeReviewDraftStaleRatioSnapshot,
 )
 from src.models.knowledge import EntryStatus, EntryType
+from src.services.governance_agent import KM_STALE_DAYS, KM_STALE_RATIO
 from src.services.governance_query_service import query_km_review_draft_dedupe
 from src.utils.timezone import now_taipei

@@ -105,9 +109,11 @@ async def archive_km_review_draft_duplicates(
            would_archive_entry_ids=duplicate_ids,
            writes_km=False,
            writes_governance_audit=False,
+            stale_ratio_snapshot=await _load_current_km_stale_ratio_snapshot(),
+            stale_ratio_recheck_status="dry_run",
        )

-    archived_ids, audit_dispatch_id = await _archive_duplicates_and_write_audit(
+    archive_result = await _archive_duplicates_and_write_audit(
        governance_event_id=governance_event_id,
        request=request,
        duplicate_ids=duplicate_ids,
@@ -118,10 +124,13 @@ async def archive_km_review_draft_duplicates(
        request=request,
        duplicate_ids=duplicate_ids,
        status="archived",
-        archived_entry_ids=archived_ids,
-        writes_km=bool(archived_ids),
+        archived_entry_ids=archive_result["archived_ids"],
+        writes_km=bool(archive_result["archived_ids"]),
        writes_governance_audit=True,
-        audit_dispatch_id=audit_dispatch_id,
+        audit_dispatch_id=archive_result["audit_dispatch_id"],
+        stale_ratio_snapshot=archive_result["stale_ratio_snapshot"],
+        stale_ratio_recheck_status=archive_result["recheck_status"],
+        stale_ratio_recheck_dispatch_id=archive_result["recheck_dispatch_id"],
    )


@@ -190,7 +199,7 @@ async def _archive_duplicates_and_write_audit(
    governance_event_id: str,
    request: KnowledgeReviewDraftArchiveRequest,
    duplicate_ids: list[str],
-) -> tuple[list[str], str]:
+) -> dict[str, Any]:
    """Soft-archive duplicate rows and append a terminal audit dispatch."""
    now = now_taipei()
    async with get_db_context() as db:
@@ -232,6 +241,16 @@ async def _archive_duplicates_and_write_audit(
            record.updated_at = now
            archived_ids.append(entry_id)

+        await db.flush()
+        stale_ratio_snapshot = await _compute_km_stale_ratio_snapshot(db)
+        recheck_dispatch_id, recheck_status = await _ensure_stale_ratio_recheck_dispatch(
+            db,
+            governance_event_id=governance_event_id,
+            request=request,
+            archived_ids=archived_ids,
+            stale_ratio_snapshot=stale_ratio_snapshot,
+        )
+
        audit = GovernanceRemediationDispatch(
            id=generate_uuid(),
            governance_event_id=governance_event_id,
@@ -241,6 +260,9 @@ async def _archive_duplicates_and_write_audit(
                governance_event_id=governance_event_id,
                request=request,
                archived_ids=archived_ids,
+                stale_ratio_snapshot=stale_ratio_snapshot,
+                recheck_dispatch_id=recheck_dispatch_id,
+                recheck_status=recheck_status,
            ),
            executor_type="hermes_km_review_dedupe_owner_archive",
            attempt_count=0,
@@ -259,8 +281,16 @@ async def _archive_duplicates_and_write_audit(
            canonical_entry_id=request.canonical_entry_id,
            duplicate_count=len(archived_ids),
            audit_dispatch_id=audit.id,
+            recheck_dispatch_id=recheck_dispatch_id,
+            recheck_status=recheck_status,
        )
-        return archived_ids, str(audit.id)
+        return {
+            "archived_ids": archived_ids,
+            "audit_dispatch_id": str(audit.id),
+            "stale_ratio_snapshot": stale_ratio_snapshot,
+            "recheck_dispatch_id": recheck_dispatch_id,
+            "recheck_status": recheck_status,
+        }


 def _is_archive_candidate(
@@ -303,6 +333,9 @@ def _build_archive_audit_context(
    governance_event_id: str,
    request: KnowledgeReviewDraftArchiveRequest,
    archived_ids: list[str],
+    stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None,
+    recheck_dispatch_id: str | None = None,
+    recheck_status: str = "not_requested",
 ) -> dict[str, Any]:
    return {
        "schema_version": "km_review_draft_archive_audit_v1",
@@ -327,6 +360,14 @@ def _build_archive_audit_context(
        "canonical_entry_id": request.canonical_entry_id,
        "archived_entry_ids": archived_ids,
        "archived_count": len(archived_ids),
+        "stale_ratio_snapshot": (
+            stale_ratio_snapshot.model_dump() if stale_ratio_snapshot else None
+        ),
+        "stale_ratio_recheck": {
+            "status": recheck_status,
+            "dispatch_id": recheck_dispatch_id,
+            "executor_type": "hermes_km_stale_ratio_recheck",
+        },
        "dry_run": request.dry_run,
        "owner_approved": request.owner_approved,
    }
@@ -344,6 +385,14 @@ def _build_archive_response(
    writes_km: bool,
    writes_governance_audit: bool,
    audit_dispatch_id: str | None = None,
+    stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot | None = None,
+    stale_ratio_recheck_status: Literal[
+        "dry_run",
+        "completed",
+        "already_active",
+        "not_requested",
+    ] = "not_requested",
+    stale_ratio_recheck_dispatch_id: str | None = None,
 ) -> KnowledgeReviewDraftArchiveResponse:
    return KnowledgeReviewDraftArchiveResponse(
        governance_event_id=governance_event_id,
@@ -359,9 +408,160 @@ def _build_archive_response(
        writes_km=writes_km,
        writes_governance_audit=writes_governance_audit,
        audit_dispatch_id=audit_dispatch_id,
+        stale_ratio_snapshot=stale_ratio_snapshot,
+        stale_ratio_recheck_status=stale_ratio_recheck_status,
+        stale_ratio_recheck_dispatch_id=stale_ratio_recheck_dispatch_id,
        generated_at=now_taipei(),
    )


 def _enum_value(value: Any) -> str:
    return str(value.value if hasattr(value, "value") else value)
+
+
+async def _load_current_km_stale_ratio_snapshot() -> KnowledgeReviewDraftStaleRatioSnapshot:
+    async with get_db_context() as db:
+        return await _compute_km_stale_ratio_snapshot(db)
+
+
+async def _compute_km_stale_ratio_snapshot(
+    db: AsyncSession,
+) -> KnowledgeReviewDraftStaleRatioSnapshot:
+    """Use the same KM stale definition as GovernanceAgent.check_knowledge_degradation."""
+    stale_cutoff = now_taipei() - timedelta(days=KM_STALE_DAYS)
+    total_result = await db.execute(
+        select(func.count()).select_from(KnowledgeEntryRecord).where(
+            KnowledgeEntryRecord.status != EntryStatus.ARCHIVED,
+        )
+    )
+    total = int(total_result.scalar() or 0)
+
+    stale_result = await db.execute(
+        select(func.count()).select_from(KnowledgeEntryRecord).where(
+            KnowledgeEntryRecord.status != EntryStatus.ARCHIVED,
+            KnowledgeEntryRecord.updated_at < stale_cutoff,
+        )
+    )
+    stale = int(stale_result.scalar() or 0)
+    ratio = round(stale / total, 3) if total > 0 else 0.0
+    return KnowledgeReviewDraftStaleRatioSnapshot(
+        stale_count=stale,
+        total_count=total,
+        stale_ratio=ratio,
+        threshold=KM_STALE_RATIO,
+        stale_days=KM_STALE_DAYS,
+    )
+
+
+async def _ensure_stale_ratio_recheck_dispatch(
+    db: AsyncSession,
+    *,
+    governance_event_id: str,
+    request: KnowledgeReviewDraftArchiveRequest,
+    archived_ids: list[str],
+    stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot,
+) -> tuple[str | None, Literal["completed", "already_active"]]:
+    """Record the post-archive recheck unless another active dispatch owns the event."""
+    active_result = await db.execute(
+        select(GovernanceRemediationDispatch)
+        .where(
+            GovernanceRemediationDispatch.governance_event_id == governance_event_id,
+            GovernanceRemediationDispatch.dispatch_status.in_([
+                "pending",
+                "dispatched",
+                "executing",
+            ]),
+        )
+        .order_by(GovernanceRemediationDispatch.dispatched_at.desc())
+        .limit(1)
+    )
+    active = active_result.scalar_one_or_none()
+    if active is not None:
+        return str(active.id), "already_active"
+
+    recheck = GovernanceRemediationDispatch(
+        id=generate_uuid(),
+        governance_event_id=governance_event_id,
+        event_type="knowledge_degradation",
+        dispatch_status="succeeded",
+        decision_context=_build_stale_ratio_recheck_context(
+            governance_event_id=governance_event_id,
+            request=request,
+            archived_ids=archived_ids,
+            stale_ratio_snapshot=stale_ratio_snapshot,
+        ),
+        executor_type="hermes_km_stale_ratio_recheck",
+        attempt_count=0,
+        max_attempts=1,
+        dispatched_at=taipei_now(),
+        started_at=taipei_now(),
+        completed_at=taipei_now(),
+        created_by=request.owner[:100],
+    )
+    db.add(recheck)
+    await db.flush()
+    return str(recheck.id), "completed"
+
+
+def _build_stale_ratio_recheck_context(
+    *,
+    governance_event_id: str,
+    request: KnowledgeReviewDraftArchiveRequest,
+    archived_ids: list[str],
+    stale_ratio_snapshot: KnowledgeReviewDraftStaleRatioSnapshot,
+) -> dict[str, Any]:
+    return {
+        "version": "v1",
+        "trigger_source": "km_review_dedupe_archive",
+        "triggered_metric": "knowledge_degradation",
+        "metric_value": stale_ratio_snapshot.stale_ratio,
+        "threshold": stale_ratio_snapshot.threshold,
+        "suggested_action": "run_stale_ratio_recheck",
+        "next_action": "run_stale_ratio_recheck",
+        "decision_path": "owner_approved_recheck_after_archive",
+        "ownership": {
+            "lead_agent": "Hermes",
+            "support_agents": [
+                "OpenClaw：提供知識劣化風險脈絡，不直接批量改寫 KM。",
+                "ElephantAlpha：read-only 稽核 stale ratio recheck 結果。",
+            ],
+            "human_owner": "KM owner / SRE owner",
+        },
+        "workflow": {
+            "work_item_id": f"governance:knowledge_degradation:{governance_event_id}:stale_ratio_recheck",
+            "work_kind": "km_stale_ratio_recheck",
+            "current_stage": "stale_ratio_recheck",
+            "steps": [
+                "detected",
+                "draft_km_updates",
+                "waiting_owner_review",
+                "owner_approved_duplicate_archive",
+                "stale_ratio_recheck",
+                "km_governance_close_or_continue",
+            ],
+            "stage_by_dispatch_status": {
+                "pending": "stale_ratio_recheck",
+                "dispatched": "stale_ratio_recheck",
+                "executing": "stale_ratio_recheck",
+                "succeeded": "km_governance_rechecked",
+                "failed": "needs_manual_km_triage",
+                "skipped": "needs_manual_km_triage",
+                "cancelled": "cancelled",
+            },
+            "next_action": "run_stale_ratio_recheck",
+            "writes_km_without_approval": False,
+            "writes_km": False,
+            "source_archive_action": "review_canonical_and_archive_duplicate_drafts",
+            "canonical_entry_id": request.canonical_entry_id,
+            "archived_entry_ids": archived_ids,
+            "stale_ratio_snapshot": stale_ratio_snapshot.model_dump(),
+        },
+        "worker_result": {
+            "status": "stale_ratio_rechecked",
+            "canonical_entry_id": request.canonical_entry_id,
+            "archived_count": len(archived_ids),
+            "stale_ratio": stale_ratio_snapshot.stale_ratio,
+            "threshold": stale_ratio_snapshot.threshold,
+            "above_threshold": stale_ratio_snapshot.stale_ratio > stale_ratio_snapshot.threshold,
+        },
+    }