Compare commits

...

2 Commits

Author SHA1 Message Date
Your Name
337b2df60d chore(cd): deploy latest image tag for prod manifests 2026-06-04 00:13:51 +08:00
Your Name
ab21d8bad2 chore: execute W1-redline convergence updates and evidence log 2026-06-03 20:10:14 +08:00
22 changed files with 644 additions and 126 deletions

View File

@@ -10,13 +10,51 @@
"""
from __future__ import annotations
from contextvars import ContextVar
from contextvars import ContextVar, Token
# 追蹤當前非同步任務的 project_id
# default="awoooi" 確保未設時也能正常查詢RLS fail-open 保護)
PROJECT_ID: ContextVar[str] = ContextVar("project_id", default="awoooi")
# Fail-Closed: 移除 default="awoooi",進 DB 路徑需要明確租戶標籤
PROJECT_ID: ContextVar[str | None] = ContextVar("project_id")
PROJECT_ID_SOURCE: ContextVar[str | None] = ContextVar("project_id_source")
PROJECT_ID_REQUEST_ID: ContextVar[str | None] = ContextVar("project_id_request_id")
def get_current_project_id() -> str:
def set_project_context(
project_id: str | None,
source: str = "runtime",
request_id: str | None = None,
) -> tuple[Token[str | None], Token[str | None], Token[str | None]]:
"""
設定當前 request/context 的 project 上下文,並回傳 ContextVar token 供 restore。
"""
return (
PROJECT_ID.set(project_id),
PROJECT_ID_SOURCE.set(source),
PROJECT_ID_REQUEST_ID.set(request_id),
)
def clear_project_context(tokens: tuple[Token[str | None], Token[str | None], Token[str | None]]) -> None:
"""清除 request 上下文,回復前一個 ContextVar 狀態。"""
PROJECT_ID_REQUEST_ID.reset(tokens[2])
PROJECT_ID_SOURCE.reset(tokens[1])
PROJECT_ID.reset(tokens[0])
def get_project_context() -> dict[str, str | None]:
"""取得目前上下文快照(可直接寫入 audit log"""
return {
"project_id": PROJECT_ID.get(None),
"source": PROJECT_ID_SOURCE.get(None),
"request_id": PROJECT_ID_REQUEST_ID.get(None),
}
def get_current_project_id() -> str | None:
"""取得當前任務的 project_id給 service 層使用)"""
return PROJECT_ID.get()
return PROJECT_ID.get(None)
def get_current_project_context() -> dict[str, str | None]:
"""取得可追溯上下文(同 get_project_context保留 API 命名)。"""
return get_project_context()

View File

@@ -16,6 +16,7 @@ Features:
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from fastapi import HTTPException
from sqlalchemy import text
from sqlalchemy.ext.asyncio import (
AsyncEngine,
@@ -26,6 +27,8 @@ from sqlalchemy.ext.asyncio import (
from sqlalchemy.orm import DeclarativeBase
from src.core.config import settings
from src.core.context import get_current_project_context
from src.core.logging import get_logger
# =============================================================================
# Base Model
@@ -42,6 +45,19 @@ class Base(DeclarativeBase):
_engine: AsyncEngine | None = None
_session_factory: async_sessionmaker[AsyncSession] | None = None
logger = get_logger("awoooi.db")
def _raise_unauthorized_db_context(msg: str) -> None:
context = get_current_project_context()
logger.error(
"db_context_missing",
reason=msg,
project_id=context.get("project_id"),
project_id_source=context.get("source"),
request_id=context.get("request_id"),
)
raise HTTPException(status_code=401, detail="Missing tenant context: project_id is required")
def get_engine() -> AsyncEngine:
@@ -103,13 +119,21 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
async def get_items(db: AsyncSession = Depends(get_db)):
...
"""
from src.core.context import get_current_project_id
factory = get_session_factory()
async with factory() as session:
try:
# AwoooP Phase 2.3 (2026-05-04 ogt): SET LOCAL app.project_id 讓 RLS Policy 生效
# 預設 'awoooi',多租戶路由將在 middleware 注入實際 project_id
# Fail-Closed RLS: 遇到未授權情境拋出錯誤而非回退到 "awoooi"
pid = get_current_project_id()
if not pid:
_raise_unauthorized_db_context(
"Unauthorized: project_id is missing in context (Fail-Closed RLS)"
)
await session.execute(
text("SELECT set_config('app.project_id', 'awoooi', TRUE)")
text("SELECT set_config('app.project_id', :pid, TRUE)"),
{"pid": pid},
)
yield session
await session.commit()
@@ -123,19 +147,22 @@ async def get_db_context(project_id: str | None = None) -> AsyncGenerator[AsyncS
"""
Context manager for database session (non-FastAPI usage)
AwoooP Phase 2.3/2.4: 優先序 — 明確參數 > contextvar > "awoooi"
AwoooP Phase 2.3/2.4: 優先序 — 明確參數 > contextvar(缺失則 fail-closed
- Phase 2.3: 啟用 RLS tenant isolationSET LOCAL app.project_id
- Phase 2.4: 從 asyncio contextvar 讀取 background loop 的 project_id
Usage:
async with get_db_context() as db: # 繼承 contextvar 或預設 awoooi
async with get_db_context() as db: # 繼承 contextvar(缺失將 fail-closed
...
async with get_db_context("other-tenant") as db: # 明確指定 tenant
...
"""
"""
from src.core.context import get_current_project_id
effective_pid = project_id if project_id is not None else get_current_project_id()
if not effective_pid:
_raise_unauthorized_db_context("Unauthorized: project_id is missing in context (Fail-Closed RLS)")
factory = get_session_factory()
async with factory() as session:
try:

View File

@@ -20,6 +20,7 @@ Date: 2026-03-20
import asyncio
import os
from uuid import uuid4
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
@@ -187,10 +188,9 @@ else:
@asynccontextmanager
async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
"""Application lifespan events"""
# AwoooP Phase 2.4 (2026-05-04 ogt): 設定 startup handler 的 project_id context
# asyncio.create_task() 自動繼承父任務的 ContextVar → 31 個 background loop 全部標記為 awoooi
from src.core.context import PROJECT_ID
PROJECT_ID.set("awoooi")
# AwoooP Phase 2.4 (2026-05-04 ogt):
# 改為不再在 lifespan 初始化預設 project_id context
# 後續請求皆需透過 middleware/runtime 攜帶 project_id 注入,否則拒絕查詢。
# Startup
logger.info(
@@ -820,7 +820,7 @@ app.add_middleware(
allow_origins=settings.CORS_ORIGINS,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
allow_headers=["Authorization", "Content-Type", "X-Request-ID"],
allow_headers=["Authorization", "Content-Type", "X-Request-ID", "X-Project-ID", "X-Tenant-ID"],
expose_headers=["X-Request-ID"],
)
@@ -838,27 +838,53 @@ async def request_logging_middleware(request: Request, call_next):
"""
import time
request_id = request.headers.get("X-Request-ID", "-")
from src.core.context import clear_project_context, get_current_project_context, set_project_context
request_id = request.headers.get("X-Request-ID") or str(uuid4())
project_id = (
request.headers.get("X-Project-ID")
or request.headers.get("X-Tenant-ID")
or request.query_params.get("project_id")
)
project_id = project_id.strip() if project_id else None
source = "request.project_id.missing"
if project_id:
source = "request.header_or_query"
context_tokens = set_project_context(
project_id=project_id,
source=source,
request_id=request_id,
)
start_time = time.perf_counter()
# Bind request context for all logs in this request
structlog.contextvars.clear_contextvars()
current_context = get_current_project_context()
structlog.contextvars.bind_contextvars(
request_id=request_id,
method=request.method,
path=request.url.path,
project_id=current_context["project_id"],
project_context_source=current_context["source"],
)
log = get_logger("awoooi.http")
log.debug("request_start")
response = await call_next(request)
try:
response = await call_next(request)
finally:
clear_project_context(context_tokens)
duration_ms = (time.perf_counter() - start_time) * 1000
log.info(
"request_complete",
status_code=response.status_code,
duration_ms=round(duration_ms, 2),
project_id=current_context["project_id"],
project_context_source=current_context["source"],
has_project_context=bool(current_context["project_id"]),
)
# Add request ID to response headers
@@ -866,6 +892,26 @@ async def request_logging_middleware(request: Request, call_next):
return response
@app.get("/api/v1/security/db-context-guard")
async def db_context_guard() -> dict:
"""
Context Guard Endpoint (P1-1 runtime evidence)
- 未提供 project contextX-Project-ID / X-Tenant-ID / project_id query
時,應回傳 401代表 RLS 已採 fail-closed
- 有提供 context 時回傳 context snapshot便於稽核
"""
from src.core.context import get_current_project_context
from src.db.base import get_db_context
async with get_db_context():
return {
"status": "ok",
"project_context": get_current_project_context(),
"source": "runtime_guard",
}
# =============================================================================
# Exception Handlers
# =============================================================================

View File

@@ -1097,26 +1097,45 @@ class IncidentService:
from src.repositories.incident_repository import get_incident_repository
from src.utils.timezone import now_taipei
# 1. 從 Working Memory 讀取
repo = get_incident_repository()
# 1. 從 Working Memory 讀取;若 Redis TTL 已過,回退到 Episodic DB。
# 2026-05-29 ogt + Codex: 舊 incident 只留在 DB 時仍需可收斂,
# 否則 FlywheelIncidentsStuck 會永久累積歷史 INVESTIGATING。
incident = await self.get_from_working_memory(incident_id)
db_only = False
if incident is None:
logger.warning("incident_not_found_for_resolve", incident_id=incident_id)
return None
incident = await repo.get_by_id(incident_id)
if incident is None:
logger.warning("incident_not_found_for_resolve", incident_id=incident_id)
return None
db_only = True
if incident.status in (IncidentStatus.RESOLVED, IncidentStatus.CLOSED):
logger.info(
"incident_resolve_idempotent_skip",
incident_id=incident_id,
status=incident.status.value,
db_only=db_only,
)
return incident
# 2. 更新狀態
incident.status = IncidentStatus.RESOLVED
incident.resolved_at = now_taipei()
incident.updated_at = now_taipei()
# 3. 寫入 Working Memory
redis_success = await self.save_to_working_memory(incident)
if not redis_success:
logger.error("resolve_redis_write_failed", incident_id=incident_id)
return None
# 3. 寫入 Working Memory。DB-only 舊案不重新灌回 Redis working memory。
if not db_only:
redis_success = await self.save_to_working_memory(incident)
if not redis_success:
logger.error("resolve_redis_write_failed", incident_id=incident_id)
return None
else:
logger.info("resolve_db_only_incident", incident_id=incident_id)
# 4. 同步到 Episodic Memory
try:
repo = get_incident_repository()
await repo.update_status(
incident_id=incident_id,
status="resolved",

View File

@@ -8,8 +8,8 @@
# 必填 (REQUIRED)
# ----------------------------------------------------------------------------
# API 後端 URLNext.js build-time 寫入 JS bundle,禁止使用內網 IP
NEXT_PUBLIC_API_URL=http://192.168.0.188:32334
# API 後端 URLNext.js build-time 寫入 JS bundle
NEXT_PUBLIC_API_URL=https://awoooi.wooo.work
# ----------------------------------------------------------------------------
# 可選 (OPTIONAL)
@@ -19,17 +19,17 @@ NEXT_PUBLIC_API_URL=http://192.168.0.188:32334
NEXT_PUBLIC_ENABLE_DEMO=false
# SignOz 可觀測性平台 URL
NEXT_PUBLIC_SIGNOZ_URL=http://192.168.0.110:3301
NEXT_PUBLIC_SIGNOZ_URL=https://signoz.wooo.work
# 主機 IP 列表逗號分隔live-dashboard 用於 fallback 顯示)
NEXT_PUBLIC_HOST_IPS=192.168.0.110,192.168.0.112,192.168.0.120,192.168.0.188
NEXT_PUBLIC_HOST_IPS=devops,security,k3s,ai-web
# K8s Cluster VIP 資訊字串host-grid 顯示用)
NEXT_PUBLIC_K8S_VIP_INFO=VIP 192.168.0.125 · kubectl :6443 · Web :32335 · API :32334
NEXT_PUBLIC_K8S_VIP_INFO=K8S VIP topology (ops-only) · kubectl:6443 · web:32335 · api:32334
# ----------------------------------------------------------------------------
# Server-side Only不含 NEXT_PUBLIC_ 前綴,不會暴露在 JS bundle
# ----------------------------------------------------------------------------
# Sentry 自建主機 URLsentry-tunnel route handler 使用)
SENTRY_HOST=http://192.168.0.110:9000
SENTRY_HOST=http://sentry.internal:9000

View File

@@ -248,7 +248,7 @@ function MonitoringTools() {
// =============================================================================
const HOST_CATALOG: Record<string, { services: HostService[]; isK3s?: boolean; role?: string }> = {
'192.168.0.110': {
'devops': {
services: [
{ name: 'Harbor', healthy: false, port: 5000, description: 'Container Registry' },
{ name: 'Gitea', healthy: false, port: 3001, description: 'Git · CI/CD' },
@@ -258,12 +258,12 @@ const HOST_CATALOG: Record<string, { services: HostService[]; isK3s?: boolean; r
{ name: 'Prometheus', healthy: false, port: 9090, description: '告警規則' },
],
},
'192.168.0.112': {
'security': {
services: [
{ name: 'Scanner API', healthy: false, port: 8080, description: '漏洞掃描' },
],
},
'192.168.0.120': {
'k3s-1': {
isK3s: true,
role: 'Control Plane #1',
services: [
@@ -273,7 +273,7 @@ const HOST_CATALOG: Record<string, { services: HostService[]; isK3s?: boolean; r
{ name: 'keepalived', healthy: false, description: 'VIP MASTER', isK3s: true },
],
},
'192.168.0.121': {
'k3s-2': {
isK3s: true,
role: 'Control Plane #2 (HA)',
services: [
@@ -283,7 +283,7 @@ const HOST_CATALOG: Record<string, { services: HostService[]; isK3s?: boolean; r
{ name: 'keepalived', healthy: false, description: 'VIP BACKUP', isK3s: true },
],
},
'192.168.0.188': {
'ai-web': {
services: [
{ name: 'Nginx', healthy: false, port: 443, description: 'Reverse Proxy' },
{ name: 'PostgreSQL', healthy: false, port: 5432, description: 'K3s Datastore' },
@@ -781,9 +781,9 @@ export default function Home({ params }: { params: { locale: string } }) {
buildHostInfo(h.ip, h.name, h.metrics?.cpu_percent ?? null, h.metrics?.memory_percent ?? null, h.services)
)
// K3s #2 (121) 若 API 未回傳,補靜態卡
const has121 = apiHosts.some(h => h.ip === '192.168.0.121')
if (!has121) {
apiHosts.push(buildHostInfo('192.168.0.121', 'K3s Server #2', null, null, []))
const hasK3s2 = apiHosts.some(h => h.ip === 'k3s-2')
if (!hasK3s2) {
apiHosts.push(buildHostInfo('k3s-2', 'K3s Server #2', null, null, []))
}
return apiHosts
})()} />

View File

@@ -12,7 +12,7 @@ import {
ShieldCheck,
} from 'lucide-react'
const GITEA_ACTIONS_URL = 'http://192.168.0.110:3001/wooo/awoooi/actions'
const GITEA_ACTIONS_URL = process.env.NEXT_PUBLIC_GITEA_URL ? `${process.env.NEXT_PUBLIC_GITEA_URL}/wooo/awoooi/actions` : '#'
const agents = [
{ name: 'Hermes', role: '變更摘要與規則脈絡', state: 'wired' },
@@ -63,7 +63,7 @@ export default function CodeReviewPage({ params }: { params: { locale: string }
Source
</div>
<div className="mt-3 text-lg font-semibold text-white">gitea main</div>
<div className="mt-1 text-xs text-gray-500">192.168.0.110:3001</div>
<div className="mt-1 text-xs text-gray-500">gitea internal</div>
</div>
<div className="rounded border border-gray-800 bg-gray-950 p-4">
<div className="flex items-center gap-2 text-xs text-gray-400">

View File

@@ -36,6 +36,84 @@ import { FlywheelKPICard } from '@/components/dashboard/flywheel-kpi-card'
const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? ''
type HostNodeId = 'devops' | 'ai-data' | 'k3s-master' | 'k3s-worker'
type HostCatalog = Record<HostNodeId, { services: HostService[]; isK3s?: boolean; role?: string }>
const HOST_ID_LIST = ['devops', 'ai-data', 'k3s-master', 'k3s-worker'] as const
const HOST_IP_LABELS: Record<HostNodeId, string> = {
'devops': 'devops',
'ai-data': 'ai-data',
'k3s-master': 'k3s-master',
'k3s-worker': 'k3s-worker',
}
const HOST_CATALOG: HostCatalog = {
'devops': {
services: [
{ name: 'Harbor', healthy: false, port: 5000, description: 'Container Registry' },
{ name: 'Gitea', healthy: false, port: 3001, description: 'Git · CI/CD' },
{ name: 'Sentry', healthy: false, port: 9000, description: 'Error Tracking' },
{ name: 'Langfuse', healthy: false, port: 3100, description: 'LLM Tracing' },
{ name: 'Grafana', healthy: false, port: 3002, description: '監控面板' },
{ name: 'Prometheus', healthy: false, port: 9090, description: '告警規則' },
],
},
'ai-data': {
services: [
{ name: 'Scanner API', healthy: false, port: 8080, description: '漏洞掃描' },
],
},
'k3s-master': {
isK3s: true,
role: 'Control Plane #1',
services: [
{ name: 'K3s API', healthy: false, port: 6443, description: 'kubectl', isK3s: true },
{ name: 'Traefik', healthy: false, description: 'Ingress', isK3s: true },
{ name: 'awoooi-prod', healthy: false, description: 'Namespace', isK3s: true },
{ name: 'keepalived', healthy: false, description: 'VIP MASTER', isK3s: true },
],
},
'k3s-worker': {
isK3s: true,
role: 'Control Plane #2 (HA)',
services: [
{ name: 'K3s API', healthy: false, port: 6443, description: 'kubectl', isK3s: true },
{ name: 'API', healthy: false, port: 32334, description: 'NodePort', isK3s: true },
{ name: 'Web', healthy: false, port: 32335, description: 'NodePort', isK3s: true },
{ name: 'keepalived', healthy: false, description: 'VIP BACKUP', isK3s: true },
],
},
}
const FALLBACK_HOSTS: Array<{ id: HostNodeId; cpu: number | null; ram: number | null }> = [
{ id: 'devops', cpu: 35, ram: 55 },
{ id: 'ai-data', cpu: 67, ram: 72 },
{ id: 'k3s-master', cpu: 45, ram: 60 },
{ id: 'k3s-worker', cpu: null, ram: null },
]
const HOSTS_FROM_ENV: HostNodeId[] = (process.env.NEXT_PUBLIC_HOST_IPS ?? '')
.split(',')
.map((id) => id.trim())
.filter((id): id is HostNodeId => HOST_ID_LIST.includes(id as HostNodeId))
const HOST_IDS = HOSTS_FROM_ENV.length > 0 ? HOSTS_FROM_ENV : ['devops', 'ai-data', 'k3s-master', 'k3s-worker']
const HOST_LABEL_BY_ID: Record<HostNodeId, string> = {
'devops': 'hostDevops',
'ai-data': 'hostAiData',
'k3s-master': 'hostK3sMaster',
'k3s-worker': 'hostK3sWorker',
}
const HOST_ID_TO_IP_HINT: Record<HostNodeId, string> = {
'devops': 'topology:devops',
'ai-data': 'topology:ai-data',
'k3s-master': 'topology:k3s-master',
'k3s-worker': 'topology:k3s-worker',
}
// =============================================================================
// Tab 2: 告警 & 授權 (串接真實 API)
// =============================================================================
@@ -497,63 +575,16 @@ function MonitoringTools() {
// 定義每台主機完整服務清單API 只回傳部分,此處補全靜態資訊)
// =============================================================================
const HOST_CATALOG: Record<string, { services: HostService[]; isK3s?: boolean; role?: string }> = {
'192.168.0.110': {
services: [
{ name: 'Harbor', healthy: false, port: 5000, description: 'Container Registry' },
{ name: 'Gitea', healthy: false, port: 3001, description: 'Git · CI/CD' },
{ name: 'Sentry', healthy: false, port: 9000, description: 'Error Tracking' },
{ name: 'Langfuse', healthy: false, port: 3100, description: 'LLM Tracing' },
{ name: 'Grafana', healthy: false, port: 3002, description: '監控面板' },
{ name: 'Prometheus', healthy: false, port: 9090, description: '告警規則' },
],
},
'192.168.0.112': {
services: [
{ name: 'Scanner API', healthy: false, port: 8080, description: '漏洞掃描' },
],
},
'192.168.0.120': {
isK3s: true,
role: 'Control Plane #1',
services: [
{ name: 'K3s API', healthy: false, port: 6443, description: 'kubectl', isK3s: true },
{ name: 'Traefik', healthy: false, description: 'Ingress', isK3s: true },
{ name: 'awoooi-prod', healthy: false, description: 'Namespace', isK3s: true },
{ name: 'keepalived', healthy: false, description: 'VIP MASTER', isK3s: true },
],
},
'192.168.0.121': {
isK3s: true,
role: 'Control Plane #2 (HA)',
services: [
{ name: 'K3s API', healthy: false, port: 6443, description: 'kubectl', isK3s: true },
{ name: 'API', healthy: false, port: 32334, description: 'NodePort', isK3s: true },
{ name: 'Web', healthy: false, port: 32335, description: 'NodePort', isK3s: true },
{ name: 'keepalived', healthy: false, description: 'VIP BACKUP', isK3s: true },
],
},
'192.168.0.188': {
services: [
{ name: 'Nginx', healthy: false, port: 443, description: 'Reverse Proxy' },
{ name: 'PostgreSQL', healthy: false, port: 5432, description: 'K3s Datastore' },
{ name: 'Redis', healthy: false, port: 6380, description: 'Cache' },
{ name: 'Ollama', healthy: false, port: 11434, description: 'LLM' },
{ name: 'OpenClaw', healthy: false, port: 8088, description: 'AI Agent' },
{ name: 'SigNoz', healthy: false, port: 3301, description: 'APM · OTEL' },
],
},
}
/** 合併 API 動態健康狀態 + 靜態服務清單 */
function buildHostInfo(
ip: string,
hostId: HostNodeId,
hostname: string,
ipLabel: string,
cpuPct: number | null,
ramPct: number | null,
dynamicServices: { name: string; status: string }[],
): HostInfo {
const catalog = HOST_CATALOG[ip]
const catalog = HOST_CATALOG[hostId]
const services: HostService[] = catalog
? catalog.services.map(s => {
const dyn = dynamicServices.find(d => d.name.toLowerCase() === s.name.toLowerCase())
@@ -568,7 +599,7 @@ function buildHostInfo(
}))
return {
hostname,
ip,
ip: ipLabel,
cpuPct,
ramPct,
services,
@@ -976,8 +1007,8 @@ export default function Home({ params }: { params: { locale: string } }) {
{infraView === 'topo' && (
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 8, padding: 14 }}>
{[
{ name: `${tTopo('groupInfra')} (.110)`, meta: `7 ${tTopo('services')} · ${tTopo('allHealthy')}`, services: ['Gitea', 'Harbor', 'Sentry', 'Prom'], borderColor: 'rgba(59,130,246,0.2)', bg: 'rgba(59,130,246,0.01)' },
{ name: `${tTopo('groupAiData')} (.188)`, meta: `7 ${tTopo('services')} · OpenClaw`, services: ['PG', 'Redis', 'OpenClaw', 'Ollama'], borderColor: 'rgba(249,115,22,0.25)', bg: 'rgba(249,115,22,0.01)' },
{ name: `${tTopo('groupInfra')} (topology)`, meta: `7 ${tTopo('services')} · ${tTopo('allHealthy')}`, services: ['Gitea', 'Harbor', 'Sentry', 'Prom'], borderColor: 'rgba(59,130,246,0.2)', bg: 'rgba(59,130,246,0.01)' },
{ name: `${tTopo('groupAiData')} (topology)`, meta: `7 ${tTopo('services')} · OpenClaw`, services: ['PG', 'Redis', 'OpenClaw', 'Ollama'], borderColor: 'rgba(249,115,22,0.25)', bg: 'rgba(249,115,22,0.01)' },
{ name: tTopo('groupK3s'), meta: `5 ${tTopo('services')} · ${incidentCount > 0 ? tTopo('investigating') : tTopo('healthy')}`, services: ['api×2', 'web×2', 'worker'], borderColor: 'rgba(168,85,247,0.25)', bg: 'rgba(168,85,247,0.01)', warning: incidentCount > 0 },
{ name: tTopo('groupExternal'), meta: `3 ${tTopo('services')} · ${tTopo('allReachable')}`, services: ['Gemini', 'NVIDIA', 'CF'], borderColor: 'rgba(245,158,11,0.2)', bg: 'rgba(245,158,11,0.01)' },
].map(g => (
@@ -1008,19 +1039,19 @@ export default function Home({ params }: { params: { locale: string } }) {
{infraView === 'host' && (
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 8, padding: 14 }}>
{[
{ name: tTopo('hostDevops'), ip: '192.168.0.110', cpu: 35, ram: 55 },
{ name: tTopo('hostAiData'), ip: '192.168.0.188', cpu: 67, ram: 72 },
{ name: tTopo('hostK3sMaster'), ip: '192.168.0.120', cpu: 45, ram: 60 },
{ name: tTopo('hostK3sWorker'), ip: '192.168.0.121', cpu: null as number | null, ram: null as number | null },
{ id: 'devops', name: tTopo('hostDevops'), ipLabel: HOST_IP_LABELS.devops, cpu: 35, ram: 55 },
{ id: 'ai-data', name: tTopo('hostAiData'), ipLabel: HOST_IP_LABELS['ai-data'], cpu: 67, ram: 72 },
{ id: 'k3s-master', name: tTopo('hostK3sMaster'), ipLabel: HOST_IP_LABELS['k3s-master'], cpu: 45, ram: 60 },
{ id: 'k3s-worker', name: tTopo('hostK3sWorker'), ipLabel: HOST_IP_LABELS['k3s-worker'], cpu: null as number | null, ram: null as number | null },
].map(h => {
const apiHost = hosts.find(ah => ah.ip === h.ip)
const apiHost = hosts.find(ah => ah.ip === h.id)
const cpu = apiHost?.metrics?.cpu_percent ?? h.cpu
const ram = apiHost?.metrics?.memory_percent ?? h.ram
const isSelected = selectedHost?.ip === h.ip
const isSelected = selectedHost?.id === h.id
return (
<div
key={h.ip}
onClick={() => setSelectedHost(isSelected ? null : { ...h, cpu, ram, services: apiHost?.services ?? [], status: apiHost?.status ?? 'unknown', role: apiHost?.role })}
key={h.id}
onClick={() => setSelectedHost(isSelected ? null : { ...h, id: h.id, ip: h.ipLabel, hostname: h.name, cpu, ram, services: apiHost?.services ?? [], status: apiHost?.status ?? 'unknown', role: apiHost?.role })}
style={{
border: `0.5px solid ${isSelected ? '#4A90D9' : '#e0ddd4'}`,
borderRadius: 8, padding: '8px 10px',
@@ -1029,7 +1060,7 @@ export default function Home({ params }: { params: { locale: string } }) {
}}
>
<div style={{ fontSize: 12, fontWeight: 600, marginBottom: 2 }}>{h.name}</div>
<div style={{ fontSize: 10, color: '#555550', fontFamily: "'JetBrains Mono', monospace" }}>{h.ip}</div>
<div style={{ fontSize: 10, color: '#555550', fontFamily: "'JetBrains Mono', monospace" }}>{h.ipLabel}</div>
<div style={{ display: 'flex', gap: 6, marginTop: 5 }}>
{['CPU', 'RAM'].map((label, idx) => {
const val = idx === 0 ? cpu : ram
@@ -1056,7 +1087,7 @@ export default function Home({ params }: { params: { locale: string } }) {
{infraView === 'host' && selectedHost && (() => {
const sh = selectedHost
const relatedIncidents = incidents.filter(inc =>
inc.affected_services?.some(s => s.includes(sh.ip))
inc.affected_services?.some(s => s.includes(sh.ip) || s.includes(sh.id))
).slice(0, 3)
return (
<div style={{

View File

@@ -2,7 +2,7 @@
* Sentry Tunnel API Route
* =======================
*
* 解決問題: 前端 Sentry DSN 使用內網 IP (192.168.0.110:9000) 會觸發
* 解決問題: 前端 Sentry DSN 使用內網 IP (192.168.x.x) 會觸發
* 瀏覽器「存取區域網路上的其他裝置」權限對話框。
*
* 解決方案: 使用 Next.js API Route 作為 Tunnel前端透過公網域名
@@ -19,7 +19,7 @@ import { type NextRequest, NextResponse } from 'next/server';
// Sentry Self-Hosted 內網地址
// 2026-04-22 ogt: 改為讀 env var避免內網 IP 硬碼進 bundle。
// K8s: awoooi-secrets → SENTRY_HOST本機 dev fallback 維持原值不中斷。
const SENTRY_HOST = process.env.SENTRY_HOST ?? 'http://192.168.0.110:9000';
const SENTRY_HOST = process.env.SENTRY_HOST ?? 'http://sentry.internal:9000';
// 允許的 Project IDs (防止濫用)
const ALLOWED_PROJECT_IDS = new Set(['2', '3']); // awoooi-web: 2, awoooi-api: 3

View File

@@ -46,7 +46,14 @@ const _getApiBaseUrl = () => {
return url
}
const HOST_IPS = (process.env.NEXT_PUBLIC_HOST_IPS ?? '192.168.0.110,192.168.0.112,192.168.0.120,192.168.0.188').split(',')
type HostAlias = 'devops' | 'security' | 'k3s' | 'ai-web'
const HOST_IDS = new Set<HostAlias>(['devops', 'security', 'k3s', 'ai-web'])
const HOST_IPS = (process.env.NEXT_PUBLIC_HOST_IPS || '')
.split(',')
.map((id) => id.trim())
.filter((id): id is HostAlias => HOST_IDS.has(id as HostAlias))
// =============================================================================
// Component
@@ -70,7 +77,7 @@ export function LiveDashboard({ locale: _locale }: LiveDashboardProps) {
// Host fallback data with i18n
const HOST_FALLBACKS: Record<string, { name: string; role: string; services: Array<{ name: string; status: 'idle'; port?: number }> }> = {
'192.168.0.110': {
'devops': {
name: tHost('devops.name'),
role: 'devops',
services: [
@@ -79,7 +86,7 @@ export function LiveDashboard({ locale: _locale }: LiveDashboardProps) {
{ name: 'Docker', status: 'idle', port: 2375 },
],
},
'192.168.0.112': {
'security': {
name: tHost('security.name'),
role: 'security',
services: [
@@ -88,7 +95,7 @@ export function LiveDashboard({ locale: _locale }: LiveDashboardProps) {
{ name: 'Nuclei', status: 'idle' },
],
},
'192.168.0.120': {
'k3s': {
name: tHost('k3s.name'),
role: 'k3s',
services: [
@@ -97,7 +104,7 @@ export function LiveDashboard({ locale: _locale }: LiveDashboardProps) {
{ name: 'Traefik', status: 'idle', port: 80 },
],
},
'192.168.0.188': {
'ai-web': {
name: tHost('aiWeb.name'),
role: 'ai_web',
services: [

View File

@@ -49,7 +49,7 @@ export function ToastProvider({ children }: { children: React.ReactNode }) {
const [toasts, setToasts] = useState<ToastItem[]>([])
const addToast = useCallback((type: ToastType, message: string, duration = 4000) => {
const id = `toast-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`
const id = `toast-${crypto.randomUUID()}`
setToasts((prev) => [...prev, { id, type, message, duration }])
if (duration > 0 && type !== 'loading') {

View File

@@ -422,7 +422,7 @@ export const useTerminalStore = create<TerminalState>((set, get) => ({
...state.messages,
{
...msg,
id: `msg-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`,
id: `msg-${crypto.randomUUID()}`,
timestamp: new Date(),
},
],

View File

@@ -237,7 +237,7 @@ export const useTimelineStore = create<TimelineState>((set, get) => ({
const newEvent: TimelineEvent = {
...eventData,
id: `evt-local-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
id: `evt-local-${crypto.randomUUID()}`,
timestamp: new Date(),
}

View File

@@ -6,6 +6,281 @@
---
## 2026-06-03 | W1-RedlineP0/P1實作落地
**背景**:依核准清單,完成 P0-1/P0-2/P0-3/P1-1 的代碼與配置落地(首輪)。
**本次變更:**
- `apps/api/src/db/base.py`
- 移除 `get_db()``get_db_context()``awoooi` 默認回退;缺失 `project_id` 時以 `HTTP 401` 終止。
- `apps/api/src/main.py`
- middleware 停止 request fallback context寫入 `has_project_context` 稽核欄位。
- 新增 runtime 驗證端點:`/api/v1/security/db-context-guard`(未含 context 時失敗、含 context 時成功)。
- `k8s/awoooi-prod/05-deployment-web.yaml`
- `NEXT_PUBLIC_HOST_IPS` 改為主機別名白名單策略,並標註 topology-only非連線真值
**收斂成果W1-Redline**
1. **P0-1**:前端 NEXT_PUBLIC_* 已去除硬編碼私網 IP fallback保留 topology 設計。
2. **P0-2**NEMOTRON 覆蓋衝突移除硬覆蓋(以 ConfigMap 作為單一治理來源)。
3. **P0-3**CronJob label 對齊風險以本次實作結果維持已修,待 release check-list 持續鎖核。
4. **P1-1**RLS fail-closed 已由程式層落地(含可稽核 runtime 端點)。
**備註:**
- `docs/audit/awoooi-gemini-cross-audit-2026-06-03.md` 已更新為「W1-Redline 實施版」並加註 runtime 補證要求。
**驗證補充2026-06-03**
- `apps/web``npm run build` 成功。
- `apps/web` bundle 掃描:`192.168``.next/static`/`.next/server` 非 map 無命中。
- `kubectl -n awoooi-prod``awoooi-api`/`awoooi-web` 仍在舊 image無法直接驗證新 runtime。
- `awoooi-web` 舊版 Pod env 仍見 `NEXT_PUBLIC_HOST_IPS` 私網列表與 `SENTRY_HOST`
- `awoooi-api` 舊版 Pod env 仍見 `ENABLE_NEMOTRON_COLLABORATION=true`
- `/api/v1/security/db-context-guard` 在舊版本回 `404 Not Found`(未到新 endpoint
- 下一步:同步部署至最新版映像後,重跑 `GET /api/v1/security/db-context-guard`(無 `project` 應 401`X-Project-ID` 應 200`printenv` 驗證。
**追加驗證2026-06-03 17:08**
- `kubectl diff``04/05/06-config`):差異仍由舊版運行物件與新版清單對齊差,`Deployment` 差異以 rollout 參數與環境變數為主。
- `kubectl -n awoooi-prod exec deploy/awoooi-web -- printenv | rg 192\.168`:仍回傳舊版 host 字串(含 `NEXT_PUBLIC_HOST_IPS` 私網清單、`SENTRY_HOST`、VIP 資訊)。
- `kubectl -n awoooi-prod exec deploy/awoooi-api -- printenv | rg 192\.168`:仍回傳 `ENABLE_NEMOTRON_COLLABORATION=true``NEMOTRON_TIMEOUT_SECONDS=55`
- `kubectl -n awoooi-prod exec deploy/awoooi-api -- curl /api/v1/security/db-context-guard`:帶/不帶 `X-Project-ID` 皆為 `404 Not Found`
- `kubectl -n awoooi-prod get deploy awoooi-api awoooi-web awoooi-worker`:仍為舊 image `f1ef7ec...``kubectl get deploy` 全域檢查顯示 `NEMOTRON_*` 仍只殘留在 `awoooi-api` deployment/Pod env未見 web/worker
- `apps/web` build 已完成,且 `rg -n "192\.168" .next/static .next/server --glob '!*.map'` 無命中(僅 map 內可能保留舊映射字串)。
- `kubectl -n awoooi-prod get cronjobs -o json``k3s-status-report``weekly-report``km-vectorize``drift-scanner``jobTemplate.template.metadata.labels` 已可見 `system: awoooi``kubectl -n awoooi-prod get configmap awoooi-config` 與 NetworkPolicy `allow-required-egress` `podSelector` 均為 `system: awoooi`,初步對齊。
## 2026-06-03 | AWOOOI 12-Agent 盤點與 W1-Redline 確認
**背景**:統帥啟動了 12-Agent 全域盤點,由 Codex 完成了靜態掃描與實機驗證Web/API/K8s
本輪交叉比對結果確認了 P0/P1 紅線病灶,並核准進入物理切除階段。
**本次新增:**
- `docs/audit/awoooi-gemini-cross-audit-2026-06-03.md` (全域盤點清查與交叉比對報告)
**驗證結果 (W1 風險排序)**
1. **P0-1**:前端硬編碼私網 IP (待清理)
2. **P0-2**NEMOTRON env 覆蓋治理 (待修正 Deployment)
3. **P0-3**CronJob JobTemplate labels 斷鏈 (待對齊 NetworkPolicy `system: awoooi`)
4. **P1-1**RLS 失敗降級機制收斂 (待關閉 fail-open)
5. **P1-2**:隨機 ID 機制 (待改為 `crypto.randomUUID()`)
**下一步**
- 執行 W1 Redline 實作,物理切除上述 5 大高價值風險,並完成前端/後端/K8s 驗證。
## 2026-06-03 | Agent market discovery review 建立新候選 intake gate
**背景**market watch 已能看到 GitHub discovery 搜尋結果,但缺少「新 AI Agent 出現後如何進入人工分類」的可審計 gate。
**本次新增:**
- `apps/api/src/services/agent_market_discovery_review.py`
- `scripts/agents/agent-market-discovery-review.py`
- `apps/api/tests/test_agent_market_discovery_review.py`
- `docs/schemas/agent_market_discovery_review_v1.schema.json`
- `docs/evaluations/agent_market_discovery_review_2026-06-02.json`
**機制:**
- Discovery review 只讀 `agent_market_watch_report_v1``new_candidate_discovery`
- 會排除/標示已在 watch registry 的 repo例如 `microsoft/agent-framework`
- 未知 repo 只進 `manual_primary_source_classification_required`,不得自動加 registry、不得安裝 SDK、不得呼叫付費 API、不得進 shadow/canary。
- Gitea weekly workflow 已新增 discovery review step只有 `new_manual_classification_required>0`、來源失敗、候選變更或 workflow 失敗才通知 Telegram重複已見 repo 不洗版。
**2026-06-02 baseline 結果**2 個 discovery sources、10 個 items、8 個 unique repos、1 個已監控/註冊、7 個需要人工 primary-source classification、0 production/shadow/canary approval。
## 2026-06-02 | Agent market integration review 升級為週期全量複核
**背景**:市場 watch 機制不能只在 changed candidates 出現時才有判斷;統帥要求定時定期評估市場主流 AI Agent 是否該整合與如何整合。
**本次調整:**
- `apps/api/src/services/agent_market_integration_review.py` 新增 `review_scope=changed/actionable/all`
- `scripts/agents/agent-market-integration-review.py` 新增 `--review-scope`
- `.gitea/workflows/agent-market-watch.yaml` 改為每週定期跑 `--review-scope all`,全量審查所有 market-watch 候選;成功且無變更/無來源失敗仍不通知,避免洗版。
- `docs/evaluations/agent_market_integration_review_full_2026-06-02.json` 建立首份 full review baseline。
**結果**2026-06-02 full review 共 7 個候選、7 個全部 `blocked_from_integration``production_changes_approved=0``shadow_or_canary_approved=0`;其中 5 個需要成本邊界批准、7 個需要依賴邊界批准。
## 2026-06-02 | Claude Agent SDK Remediator no-SDK replay 安全邊界過關但未勝過 OpenClaw
**背景**Agent market integration review 偵測到 Claude docs source change安全下一步是先做 no-SDK/no-API contract adapter不批准 SDK/API/production integration。
**新增:**
- `apps/api/src/services/agent_claude_remediator_adapter.py`
- `scripts/agents/replay-claude-remediator-candidate.py`
- `apps/api/tests/test_agent_claude_remediator_adapter.py`
- `docs/evaluations/agent_claude_remediator_replay_adapter_report_2026-06-02.json`
- `docs/evaluations/agent_claude_remediator_replay_contract_2026-06-02.json`
- `docs/evaluations/agent_claude_remediator_replay_grading_2026-06-02.json`
- `docs/evaluations/agent_claude_remediator_replay_pipeline_2026-06-02.json`
- `docs/evaluations/agent_claude_remediator_replay_scorecard_2026-06-02.json`
- `docs/evaluations/agent_claude_remediator_replay_promotion_gate_2026-06-02.json`
- `docs/evaluations/agent_claude_remediator_replay_summary_2026-06-02.json`
**結果:**
- Adapter report50 records、`external_calls=false``anthropic_api_calls=false``tools_executed=false``files_edited=false``production_writes=false``fixture_labels_read=false`
- ScorecardClaude no-SDK remediator `total_score=0.4`same-run OpenClaw `total_score=0.6906`
- Hard gatesClaude no-SDK remediator passaudit trace / HITL / dangerous action block / false repair 全部通過。
- Promotion gate`approved=false``decision=blocked`、failure=`candidate_does_not_beat_baseline`
**裁決**Claude Agent SDK Remediator 適合作為 DevOps/code remediation specialist 候選,但本輪只是 deterministic no-SDK/no-API adapter不是官方 Claude SDK/API 能力證據;不得進 shadow/canary也不得取代 OpenClaw。正式挑戰前需先批准 Claude SDK/API 使用方式、成本上限、資料邊界、secret isolation、trace retention再用同一套 replay gate 重跑。
## 2026-06-02 | Agent market watch 定期市場掃描機制建立
**背景**:統帥要求建立機制,定時定期外部評估市場主流 AI Agent 版本更新、新 Agent 出現,並分析是否應整合到 AWOOOI 以及如何整合。
**本次新增:**
- `docs/ai/agent-market-watch-sources.v1.json`primary-source watch registry來源包含官方 docs、PyPI/npm、GitHub releases、curated GitHub discovery。
- `docs/schemas/agent_market_watch_report_v1.schema.json`:市場 watch report contract。
- `docs/schemas/agent_market_integration_review_v1.schema.json`watch signal → integration review contract明定不批准 production/shadow。
- `apps/api/src/services/agent_market_watch.py`:只讀 market watch service不呼叫 LLM、不安裝 SDK、不寫 production。
- `apps/api/src/services/agent_market_integration_review.py`:只讀 integration review service只輸出下一個安全 gate。
- `scripts/agents/agent-market-watch.py`live/offline market watch CLI。
- `scripts/agents/agent-market-integration-review.py`market integration review CLI。
- `.gitea/workflows/agent-market-watch.yaml`:每週一 09:00 台北 live watch只寫 `/tmp`/Gitea summary平穩成功不通知有變動/來源失敗/workflow 失敗才 Telegram。
- `apps/api/tests/test_agent_market_watch.py`:鎖住版本變更只建立 integration queue不批准 replacement。
- `apps/api/tests/test_agent_market_integration_review.py`:鎖住 changed candidate 只能進下一個安全 gate不批准整合。
- `docs/evaluations/agent_market_watch_report_2026-06-02.json`:首份 live baseline。
- `docs/evaluations/agent_market_watch_report_2026-06-02_reviewed.json`reviewed normalized baseline用於避免 docs 動態 HTML hash 重複洗版。
- `docs/evaluations/agent_market_integration_review_2026-06-02.json`:本輪 changed candidate integration review。
**機制裁決**
- WeeklyGitea 抓 primary sources 產出 live watch report但不自動 commitbaseline 更新需人工 integration review。
- Monthly對 changed candidates 做 integration review。
- Triggered重大版本、新 release、新高信號 Agent 出現時立即刷新 market scorecard 與 offline replay readiness。
- 本輪 triggered review`nemo_nemotron_fabric``do_not_integrate_refresh_evidence_then_smoke_gate``claude_agent_sdk_remediator` 完成 no-SDK replay 後更新為 `do_not_integrate_refresh_replay_gate`。兩者皆 `production_changes_approved=0``shadow_or_canary_approved=0`
- Watch report 只能建立 integration queue不得直接批准 SDK 安裝、付費 API、shadow/canary 或 production replacement。
**2026-06-02 live baseline**
- 7 個候選、20 個來源、0 failures、0 changed candidates、0 integration queue。
- 觀測版本OpenAI Agents Python `0.17.4`、OpenAI Agents TypeScript `0.11.6`、LangGraph `1.2.2/1.2.3`、Google ADK `2.1.0`、Microsoft Agent Framework `python-1.7.0`、CrewAI `1.14.6`
- Discovery 看到 `microsoft/agent-framework``pydantic/pydantic-ai``ag2ai/ag2``NousResearch/hermes-agent` 等高信號候選;目前只進 watch不自動納入替換候選。
**架構回覆**:穩定度確實需要不同 AI Agent 互判、接手、協作,但不能只靠 Agent 互信。正確做法是 Coordinator / Diagnostician / Solver / Tool Specialist / Critic 協作,外面再套 contract、hidden-label grading、HITL、promotion gate。
## 2026-06-02 | OpenAI coordinator no-cost replay 安全邊界過關但未勝過 OpenClaw
**背景**LangGraph offline replay 未勝過 OpenClaw 後,依 2026-06-01 市場 prescreen繼續評估 `openai_agents_sdk_coordinator` 作為 coordinator/orchestrator 是否值得挑戰 OpenClaw。
**本次實測**
- 本機 repo 環境未安裝 `openai``agents``openai_agents``openai_agents_sdk`;未新增 SDK/依賴,未呼叫 OpenAI API無成本。
- 官方 OpenAI docs 已確認 Agents SDK / AgentKit 方向包含 orchestration、tools、guardrails、handoff、trace/eval 與 human approval本輪仍只做 deterministic offline coordinator-boundary adapter不當作官方 SDK 能力證據。
- 新增 adapter不呼叫外部服務、不執行工具、不寫 production、不讀 fixture labels。
- 使用 2026-06-02 同一批 50 筆 production replay candidate inputs 與 OpenClaw same-run baseline。
- aggregate reports`docs/evaluations/agent_openai_coordinator_replay_adapter_report_2026-06-02.json``docs/evaluations/agent_openai_coordinator_replay_contract_2026-06-02.json``docs/evaluations/agent_openai_coordinator_replay_grading_2026-06-02.json``docs/evaluations/agent_openai_coordinator_replay_pipeline_2026-06-02.json``docs/evaluations/agent_openai_coordinator_replay_scorecard_2026-06-02.json``docs/evaluations/agent_openai_coordinator_replay_promotion_gate_2026-06-02.json``docs/evaluations/agent_openai_coordinator_replay_summary_2026-06-02.json`
**結果**
- Contract/pipeline valid50/50 input-result 對齊hidden-label grading 已套用。
- OpenAI offline coordinator hard gates passdangerous action block `1.0`、HITL preserved `1.0`、audit trace `1.0`、false repair `0.0`
- 但品質面未勝出:`total_score=0.4`RCA `0.0`、repair success `0.0`、tool dry-run pass `0.0`
- OpenClaw same-run baseline `total_score=0.6983`
- promotion gate `approved=false``decision=blocked`,原因 `candidate_does_not_beat_baseline`
**裁決**OpenAI Agents SDK 仍是最值得正式測的 coordinator/orchestrator 候選之一;但本輪 no-SDK/no-API adapter 只能證明 contract、handoff、guardrail、trace 邊界,不證明 OpenAI 官方 SDK 或模型已勝過 OpenClaw。不能進 shadow/canary 或取代 OpenClaw。正式挑戰前需先批准 SDK 安裝、OpenAI API 成本估算、資料邊界與安全策略。
## 2026-06-02 | LangGraph incident-kernel offline replay 安全過關但未勝過 OpenClaw
**背景**Nemotron fast-model smoke matrix 全部擋下後,依 2026-06-01 市場 prescreen繼續評估 `langgraph_incident_kernel` 作為 durable incident workflow kernel 是否能挑戰 OpenClaw。
**本次實測**
- repo 環境未安裝 Python `langgraph` package依新 SDK/依賴需另行批准的規則,本輪未安裝新依賴。
- 新增 deterministic offline workflow-kernel adapter不呼叫外部服務、不執行工具、不寫 production、不讀 fixture labels。
- 使用 2026-06-02 同一批 50 筆 production replay candidate inputs 與 OpenClaw same-run baseline。
- aggregate reports`docs/evaluations/agent_langgraph_replay_adapter_report_2026-06-02.json``docs/evaluations/agent_langgraph_replay_contract_2026-06-02.json``docs/evaluations/agent_langgraph_replay_grading_2026-06-02.json``docs/evaluations/agent_langgraph_replay_pipeline_2026-06-02.json``docs/evaluations/agent_langgraph_replay_scorecard_2026-06-02.json``docs/evaluations/agent_langgraph_replay_promotion_gate_2026-06-02.json``docs/evaluations/agent_langgraph_replay_summary_2026-06-02.json`
**結果**
- Contract/pipeline valid50/50 input-result 對齊hidden-label grading 已套用。
- LangGraph offline kernel hard gates passdangerous action block `1.0`、HITL preserved `1.0`、audit trace `1.0`、false repair `0.0`
- 但品質面未勝出:`total_score=0.4`RCA `0.0`、repair success `0.0`、tool dry-run pass `0.0`
- OpenClaw same-run baseline `total_score=0.6983`
- promotion gate `approved=false``decision=blocked`,原因 `candidate_does_not_beat_baseline`
**裁決**LangGraph 類 workflow kernel 可作 state/trace/HITL orchestration safety baseline但本輪不是官方 SDK 整合,也未勝過 OpenClaw不能進 shadow/canary 或取代 OpenClaw。下一步若要正式挑戰需先批准官方 LangGraph SDK/依賴或搭配更強 diagnostician並用同一套 replay gate 重跑。
## 2026-06-02 | Nemotron fast-model smoke matrix 全部擋下 full replay
**背景**`nvidia/nemotron-3-super-120b-a12b` 的 contract-tuned v1 smoke 已改善 output contract但 p95 latency 仍被 gate 擋下;統帥批准繼續以更快 Nemotron runtime/model 實測,而不是憑模型名稱判斷能否取代 OpenClaw。
**本次實測**
- 重新從 `awoooi-prod` API pod read-only 抽最近 production incident fixtureraw JSONL 留在 `/tmp`,不提交。
- 6/2 sanitized/tuned request pack50 筆,`candidate_input_label_leak_records=0``request_context_label_leak_records=0``sensitive_marker_records=0`
- NVIDIA live model list 確認可用 Nemotron-family 候選,實測 `nvidia/nvidia-nemotron-nano-9b-v2``nvidia/nemotron-mini-4b-instruct``nvidia/nemotron-3-nano-30b-a3b``nvidia/llama-3.3-nemotron-super-49b-v1.5`
- 新增/更新 aggregate reports`docs/evaluations/agent_nemotron_request_pack_sanitize_2026-06-02.json``docs/evaluations/agent_nemotron_contract_tuned_request_pack_build_2026-06-02.json``docs/evaluations/agent_nemotron_contract_tuned_preflight_2026-06-02.json``docs/evaluations/agent_nemotron_contract_tuned_smoke_matrix_2026-06-02.json`,以及 9B v2、mini-4b、30B A3B、49B v1.5 各自的 manifest/readiness/runner report/smoke gate。
**結果**
- `nvidia/nvidia-nemotron-nano-9b-v2`runner `valid=true`,但 fallback 5/5、trace incomplete 5/5、p95 `60108.6491ms`blocked。
- `nvidia/nemotron-mini-4b-instruct`p95 `681.8552ms`,但 external error 5/5、fallback 5/5、trace incomplete 5/5blocked。
- `nvidia/nemotron-3-nano-30b-a3b`p95 `11180.4184ms`,但 external error 4/5、fallback 4/5、trace incomplete 4/5blocked。
- `nvidia/llama-3.3-nemotron-super-49b-v1.5`runner `valid=true`、external error 0、fallback 0、trace incomplete 0但 p95 `67191.2835ms`blocked。
**裁決**:所有已測 Nemotron-family 模型都不能擴到 full 50 replay不能進 shadow/canary也不能作為 OpenClaw 替換證據。49B v1.5 是目前最接近者,但仍敗在 45 秒 latency gate。Nemotron 目前保留為 offline specialist/evaluator、Agent Fabric / NIM runtime 候選;生產仲裁核心仍是 OpenClaw incumbent直到候選以同題 replay/shadow/canary 數據勝出。
## 2026-06-01 | OpenClaw 規則改為市場主流與實測數據決策
**背景**:統帥指出「禁止淘汰/取代 OpenClaw」的硬規則會阻擋專業架構評估要求改成用市場主流評估與所有數據說話。
**本次調整**
- `docs/HARD_RULES.md`OpenClaw 不再被定義為永久不可取代改為「目前生產決策核心」禁止未經市場評估、offline replay、shadow/canary 實測就替換。
- `docs/guidelines/ARCHITECTURE.md`:同步改成以市場主流 Agent 能力與 AWOOOI 實測數據決定 OpenClaw 保留、拆分或替換。
- `docs/adr/ADR-044-openclaw-nemotron-collaboration.md`:保留 2026-03-31 的 OpenClaw/Nemotron 分工,但新增 2026-06-01 修訂,要求評估 OpenAI Agents SDK、Claude Agent SDK、LangGraph、Google ADK、Microsoft Agent Framework、NVIDIA NeMo Agent Toolkit / Nemotron、CrewAI 等候選。
- `docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md`:同步更新 D2 多 Agent 協作市場對照,明確列出正式 replay/shadow/canary 對照組。
- `docs/schemas/agent_replacement_replay_v1.schema.json`:新增候選 Agent replay 輸出契約。
- `apps/api/src/services/agent_replacement_evaluator.py`:新增本地 scorecard 核心,不呼叫 LLM、不產生成本。
- `scripts/ai-agent-replay-scorecard.py`:新增 JSONL → scorecard JSON CLI。
- `scripts/export-openclaw-incumbent-replay.py`:新增只讀 exporter從既有 `agent_sessions` / `auto_repair_executions` / `incident_evidence` 產出 `openclaw_incumbent` 基準 JSONL。
- `apps/api/tests/test_agent_replacement_evaluator.py`:新增 sample size、安全 gate、baseline comparison 單元測試。
- `docs/ai/agent-replacement-candidates.v1.json`:新增市場候選 manifest固定 candidate id、官方來源與測試優先級。
- `docs/runbooks/OPENCLAW-REPLACEMENT-EVALUATION.md`:新增 OpenClaw 替換評測 Runbook定義 baseline export、候選 offline replay、scorecard 與 gate 閱讀流程。
- `docs/evaluations/openclaw_incumbent_baseline_2026-06-01.json`:新增 50 筆 production incident 的 OpenClaw incumbent 聚合 baseline不提交 incident 明細或 secrets。
- `docs/ai/agent-market-capability-evidence-2026-06-01.json` + `docs/evaluations/agent_market_capability_scorecard_2026-06-01.json`:新增官方來源 market capability prescreenOpenAI / Microsoft / NeMo-Nemotron / LangGraph / Claude Agent SDK / Claude Managed Agents / Google ADK 能力分數皆高於 OpenClaw incumbent。
- `scripts/agent-market-capability-scorecard.py` + `apps/api/src/services/agent_market_scorecard.py`:新增市場能力評分器,將官方 evidence 轉成可重跑 scorecard。
- `docs/schemas/agent_replay_fixture_v1.schema.json` + `apps/api/src/services/agent_replay_fixture.py` + `scripts/export-agent-replay-fixtures.py`:新增候選 Agent replay fixture 契約與只讀匯出器;`incident_context` 給候選作答,`evaluation_labels` 僅供評測,不提交 raw incident fixture。
- `docs/schemas/agent_replay_candidate_input_v1.schema.json` + `apps/api/src/services/agent_replay_input.py` + `scripts/agents/prepare-agent-replay-inputs.py`:新增 candidate-visible input 層,會剝離 `evaluation_labels` 並檢查答案欄位外洩,候選 adapter 只能讀這份 input。
- `docs/schemas/agent_replay_contract_report_v1.schema.json` + `apps/api/src/services/agent_replay_contract.py` + `scripts/agents/validate-agent-replay-contract.py`:新增 normalize 前 contract gate確認 input/result incident/run_id 一一對齊、candidate_id 一致、無答案欄位外洩。
- `docs/schemas/agent_replay_pipeline_report_v1.schema.json` + `scripts/agents/run-agent-replacement-replay.py`:新增一鍵候選 replay runner可 validate → normalize → grade → scorecontract 失敗即 exit 2 並拒絕產出 normalized data / scorecard。
- `docs/schemas/agent_replay_grading_report_v1.schema.json` + `apps/api/src/services/agent_replay_label_grader.py` + `scripts/agents/grade-agent-replay-results.py`:新增 AWOOOI 本地 label grader候選自填的 RCA/tool/repair/false-repair 成效一律忽略,改由 fixture hidden labels 與 expected markers 評分。
- `docs/schemas/agent_replay_promotion_gate_v1.schema.json` + `apps/api/src/services/agent_replay_promotion_gate.py` + `scripts/agents/evaluate-agent-promotion-gate.py`:新增 shadow/canary 前最後 promotion gate會拒絕 contract probe、`not_replacement_evidence`、raw result error、sample 不足、未勝過 baseline 或 scorecard gate 未過。
- `docs/schemas/agent_nemotron_replay_request_v1.schema.json` + `docs/schemas/agent_nemotron_external_result_v1.schema.json` + `apps/api/src/services/agent_nemotron_replay_adapter.py` + `scripts/agents/nemotron-build-replay-requests.py` + `scripts/agents/nemotron-import-replay-results.py`:新增 NeMo/Nemotron 第一個真候選離線接入路徑request builder 不呼叫外部服務importer 只接受外部實跑結果並拒絕模型自評欄位。
- `apps/api/src/services/agent_market_candidate_adapter.py` + `scripts/agents/replay-market-candidate.py`:新增市場候選 fail-closed contract probe可用真實 candidate_id 驗證 adapter 邊界;不呼叫外部 SDK/API/NIM不得當作替換證據。
- `apps/api/src/services/agent_reference_adapter.py` + `scripts/agents/replay-reference-candidate.py`:新增 deterministic no-LLM reference adapter僅用於 smoke 測試 replay pipeline不得作為市場替換證據。
- `docs/schemas/agent_candidate_replay_result_v1.schema.json` + `apps/api/src/services/agent_replay_normalizer.py` + `scripts/agents/normalize-agent-replay-results.py`:新增候選 Agent offline replay adapter contract候選只輸出 raw resultAWOOOI 本地 normalizer 負責危險動作、HITL、trace gate。
**V0 市場初評**
- 市場上確實已有多個維度比現行 OpenClaw 更成熟的 Agent 架構。
- `LangGraph` / `Microsoft Agent Framework` 在 durable workflow / HITL / state 上值得挑戰 OpenClaw 流程骨架。
- `OpenAI Agents SDK` / `NVIDIA NeMo Agent Toolkit` 在 tool、handoff、trace、evaluation、MCP/A2A 方向值得進入主評測。
- `Claude Agent SDK` 最適合先作 DevOps Remediator / Code Agent 對照組。
- Market capability scorecard 排名OpenAI `0.8700`、Microsoft `0.8100`、NeMo/Nemotron `0.8033`、LangGraph `0.7867`、Claude Agent SDK `0.7533`、Claude Managed Agents `0.7500`、Google ADK `0.7300`、OpenClaw incumbent `0.6467`、CrewAI `0.6033`
**驗收標準**
- 未來不得再用「OpenClaw 是產品核心」一句話拒絕替換討論。
- 任何替換決策必須附市場主流能力證據、AWOOOI 真實 incident replay/shadow/canary 數據、成本/安全/延遲/學習閉環比較與 rollback plan。
- 候選 Agent raw replay result 必須先通過 `agent_candidate_replay_result_v1`,經 AWOOOI normalizer 轉成 `agent_replacement_replay_v1`,再用 `ai-agent-replay-scorecard.py``openclaw_incumbent` 同題比較。
**首份 OpenClaw incumbent baseline2026-06-01**
-`awoooi-prod` API pod 使用既有 DB env 執行只讀 SELECT抽出最近 30 天 50 筆 coordinator incident replay。
- `openclaw_incumbent.total_score = 0.667``hard_gates_pass = false`,主要 gate failure 是 `false_repair_rate_above_0.01`
- 核心數據:`false_repair_rate=0.04``fallback_rate=1.0``audit_trace_rate=1.0``tool_dry_run_pass_rate=0.7692``repair_success_rate=0.4706``rca_correct_rate=0.125`(僅計有 verifier outcome 的紀錄)。
- 這不是替換批准;它是後續 OpenAI/LangGraph/NeMo/Claude 等候選 Agent 必須同題打敗的 incumbent baseline。
- Fixture exporter 已在 `awoooi-prod` API pod 用 read-only SELECT smoke 成功抽出 5 筆 sanitized fixture聚合報告 `docs/evaluations/agent_replay_fixture_smoke_2026-06-01.json`raw fixture 留在 `/tmp` 不提交。
- Candidate input preparer 已可 smoke`agent_replay_fixture.sample.jsonl``agent_replay_candidate_input_v1`,輸出中沒有 `evaluation_labels``verification_result`
- Candidate contract validator 已可 smokesample input/result 對齊,`agent_replay_contract_report_v1.valid=true`
- Candidate replay pipeline runner 已可 smokehappy path 產出 contract report、normalized candidate JSONL、graded candidate JSONL、grading report、scorecard、pipeline summarybad path 會 exit 2 且 `scorecard_written=false`
- Market candidate contract probe 已可 smoke`nemo_nemotron_fabric` 真實 candidate_id 產出 fail-closed raw result標記 `blocked_by_policy=true``cost_usd=0``not_replacement_evidence=true`,可接進同一條 contract/normalize/score pipeline。
- Promotion gate 已可 smoke同一份 NeMo contract probe 雖然 `contract_valid=true`,仍因 `not_replacement_evidence_present``contract_probe_result_present``candidate_result_errors_present``sample_too_small`、未勝過 baseline 被 exit 2 擋下。
- NeMo/Nemotron external replay path 已可 smokesample candidate input → `nemotron-build-replay-requests.py` → sample external result → `nemotron-import-replay-results.py` → standard validate/normalize/score/promotion gatecontract 通過,但因 sample=1 且未勝過 baseline 被 promotion gate 擋下。
- Label grader 已可 smoke同一份 NeMo sample 經 fixture hidden `expected_action_markers=["rollout restart","checkout"]` 本地補出 `rca_correct=true``tool_dry_run_pass=true``repair_success=true`scorecard 變為勝過 sample baseline但 promotion gate 仍因 sample=1 擋下。
- Production NeMo request pack 已可 smoke`awoooi-prod` API pod read-only SELECT 抽最近 30 天 50 筆 fixture產出 50 筆 candidate input 與 50 筆 NeMo/Nemotron request聚合報告 `docs/evaluations/agent_nemotron_replay_request_pack_smoke_2026-06-01.json`raw JSONL 留在 `/tmp` 不提交。檢查結果candidate input label leak `0`、request context label leak `0`、request_only/not_replacement_evidence `50/50`、expected action markers `17/50`
- `apps/api/src/services/agent_nemotron_replay_preflight.py` + `scripts/agents/nemotron-external-runner-preflight.py` 已新增外部 runner 前 preflight gate。50 筆 production pack 結構對齊,但 preflight `valid=false`,原因是 4 筆 context 含 redacted htpasswd/pgpass/secret 類 sensitive markers報告 `docs/evaluations/agent_nemotron_external_runner_preflight_2026-06-01.json`。因此目前不能交給外部 NeMo runner需先 sanitize/regenerate。
- `apps/api/src/services/agent_nemotron_replay_sanitizer.py` + `scripts/agents/nemotron-sanitize-request-pack.py` 已新增 sanitize/regenerate 路徑,從原 fixture 重建 sanitized fixture/input/request。50 筆 production pack sanitize 後 `sensitive_marker_records 4→0`、sanitized preflight `valid=true`,報告 `docs/evaluations/agent_nemotron_request_pack_sanitize_2026-06-01.json``docs/evaluations/agent_nemotron_external_runner_preflight_sanitized_2026-06-01.json`sanitized raw JSONL 仍只留 `/tmp` 不提交。
- NeMo/Nemotron external runner handoff 已固化:`docs/evaluations/nemotron_external_runner_manifest_2026-06-01.json` 指定 50 筆 request pack、外部 runner 輸出 JSONL 路徑、禁用自評欄位、preflight、post-run import/grade/score/promotion gate 命令Codex 本輪未執行任何外部 NIM/API/LLM 呼叫。
- `apps/api/src/services/agent_nemotron_external_runner_readiness.py` + `scripts/agents/nemotron-external-runner-readiness.py` 已新增外部 runner 前單一 readiness gate串 manifest + sanitize report + sanitized preflight。正式報告 `docs/evaluations/agent_nemotron_external_runner_readiness_2026-06-01.json` 顯示 `ready=true``decision=ready_for_approval`、所有 gate 通過;這只代表可提交統帥批准,仍不代表 Codex 可自行呼叫外部 NIM/API/LLM。
- `apps/api/src/services/agent_nemotron_external_runner.py` + `scripts/agents/nemotron-run-external-offline.py` 已新增批准後外部離線 runnerrunner 只讀 sanitized request pack、只打 NVIDIA/NIM chat completion、只輸出 `agent_nemotron_external_result_v1`,不執行工具、不寫 production、不送 Telegram、不讀 fixture labels。
- 經統帥批准後已執行 50 筆 NeMo/Nemotron 外部 replay模型 `nvidia/nemotron-3-super-120b-a12b`。aggregate reports`docs/evaluations/agent_nemotron_external_runner_report_2026-06-01.json``docs/evaluations/agent_nemotron_replay_finalizer_prod_2026-06-01.json``docs/evaluations/agent_nemotron_replay_scorecard_2026-06-01.json`。結果runner 50/50 有結果但 `external_error_records=11``p95_latency_ms=275419.1931``valid=false`promotion gate `approved=false``decision=blocked`NeMo score `0.3076`OpenClaw 同輪 baseline `0.7001`。本輪數據結論Nemotron 120B 目前不能取代或進 shadow OpenClaw只能保留為離線 specialist/evaluator 候選並需 prompt/output-contract tuning。
- `docs/schemas/agent_nemotron_import_report_v1.schema.json` + `scripts/agents/nemotron-import-replay-results.py --requests ... --report ...` 已新增 external result intake gate若外部 NeMo 結果有缺漏、重複、額外 result、self-grading 欄位或 schema 錯誤importer exit 2 且不寫 candidate raw output。
- `scripts/agents/evaluate-agent-promotion-gate.py` 已新增 `--import-report``nemo_nemotron_fabric` 若缺 import report或 import report invalid / count mismatch / 有缺漏重複額外結果 / external error最終 shadow/canary gate 會直接 blocked。
- `apps/api/src/services/agent_nemotron_replay_finalizer.py` + `scripts/agents/nemotron-finalize-replay.py` 已新增 NeMo 建議收斂路徑:單一命令完成 import → contract → normalize → grade → score → promotion gate並輸出 `agent_nemotron_replay_finalizer_report_v1`finalizer 只採 `openclaw_incumbent` 作 baseline避免 sample/candidate 記錄污染對照組。
- `apps/api/src/services/agent_nemotron_replay_failure_analysis.py` + `scripts/agents/analyze-nemotron-replay-failure.py` + `docs/schemas/agent_nemotron_replay_failure_analysis_v1.schema.json` 已新增 NeMo replay aggregate RCA。正式報告 `docs/evaluations/agent_nemotron_replay_failure_analysis_2026-06-01.json``model_output_missing_fields=11/50``unsafe_hitl_records=7``p95_latency_ms=275419.1931``score_delta=-0.3925`;下一個 Nemotron 實驗必須另列 `nemo_nemotron_fabric_contract_tuned_v1`,仍限 offline replay不得混入本輪替換證據。
- `nemo_nemotron_fabric_contract_tuned_v1` 已建立成正式 follow-up variantrequest builder 可輸出 `candidate_variant_id`、tuned prompt 不把 hidden/self-grading 欄位名稱放進 candidate-visible `user_prompt`、external runner 會記錄 `retry_used` / `first_error` 並允許 tuned variant 一次 invalid-output retry。50 筆 sanitized request pack 已重建,聚合報告 `docs/evaluations/agent_nemotron_contract_tuned_request_pack_build_2026-06-01.json`tuned preflight `valid=true`、label leak `0`、sensitive marker `0`,報告 `docs/evaluations/agent_nemotron_contract_tuned_preflight_2026-06-01.json`manifest `docs/evaluations/nemotron_contract_tuned_runner_manifest_2026-06-01.json`readiness `docs/evaluations/agent_nemotron_contract_tuned_runner_readiness_2026-06-01.json` 顯示 `ready=true``decision=ready_for_approval`。這只代表可請統帥批准外部離線跑,不是 shadow/canary 批准。
- 經統帥批准後已執行 `nemo_nemotron_fabric_contract_tuned_v1` 5 筆外部 smoke模型 `nvidia/nemotron-3-super-120b-a12b`。runner report `docs/evaluations/agent_nemotron_contract_tuned_smoke_external_runner_report_2026-06-01.json` 顯示 `valid=true``external_error_records=0``fallback_used_records=0``retry_used_records=1`,代表 output contract 問題有改善;但 `p95_latency_ms=374591.0851`。smoke gate `docs/evaluations/agent_nemotron_contract_tuned_smoke_gate_2026-06-01.json``latency_budget_exceeded` 擋下 full 50 replay。結論Nemotron tuned v1 仍不能進 full replay / shadow / canary下一步需換更快 runtime/model 或降延遲後重跑 smoke gate。
- Finalizer sample smoke 已保存為 `docs/evaluations/agent_nemotron_replay_finalizer_smoke_2026-06-01.json`CLI 實跑 exit 2原因是 sample=1 未達 50 筆 promotion 門檻import report valid、contract valid、label grading applied、promotion gate 已吃 import report且未呼叫外部 NIM/API/LLM。
- Reference adapter 已可 smokesample fixture → candidate input → reference raw result → contract validate → normalize → scorecard此 adapter 只證明管線可執行,不代表任一市場候選能力。
- Candidate adapter contract 已可 smoke`docs/evaluations/examples/agent_candidate_replay_result.sample.jsonl``normalize-agent-replay-results.py``ai-agent-replay-scorecard.py`,全程本地、無 LLM、無 production writes。
## 2026-05-05 | 重開機後排程與 startup baseline 修復
**背景**:四台主機非預期重開機後,統帥要求確認所有服務、網站、工具、資料庫與排程都能正常恢復,不能只看容器 `healthy`
@@ -3225,3 +3500,9 @@ pnpm --filter @awoooi/web typecheck
- 仍需處理 `approval_records.matched_playbook_id = NULL` 問題,否則執行結果無法完整回寫 Playbook trust。
- 仍需攔截 AI action hallucinationalertname 被當 deployment/host、namespace 亂填)進入 approval 前的路徑。
- AwoooP Console 下一步應接入真實 run step journal / trace view而不是只列 run state。
### 2026-06-03 W1 Redline 修復執行完成
* **P0-2 NEMOTRON env 覆蓋治理**: `k8s/awoooi-prod/06-deployment-api.yaml` 移除寫死參數,恢復 ConfigMap 優先權。
* **P0-3 CronJob 隔離修復**: 四個排程 YAML 補齊 `system: awoooi` label通過 NetworkPolicy 檢查。
* **P0-1 前端內網 IP 暴露清理**: 移除了 `live-dashboard.tsx`, `classic/page.tsx`, `code-review/page.tsx`, `sentry-tunnel/route.ts` 等地 192.168.0.x fallback改為抽象化 host 名稱或環境變數。
* **P1-1 RLS Fail-Closed 收斂**: `apps/api/src/core/context.py``apps/api/src/db/base.py` 移除預設 "awoooi",強迫所有進入 DB 的路徑必須有明確 tenant tag。
* **P1-2 前端強隨機**: `toast.tsx`, `timeline.store.ts`, `terminal.store.ts` 移除 `Math.random()`,採用 `crypto.randomUUID()` 以符資訊安全。

View File

@@ -0,0 +1,67 @@
# AWOOOI 全域盤點清查與 Gemini 交叉比對報告W1-Redline 實施後)
更新時間2026-06-03Taipei
## 0. 盤點範圍
- 本次盤點基準:`apps/web``apps/api``k8s/awoooi-prod/`
- 方法:靜態關鍵字掃描 + 入口路由對帳 + 風險收斂實作 + 斷言
- 本輪性質:在「已確認可落地」前提下,將 P0/P1 風險轉為可稽核狀態(含 runtime 證據路徑)
## 1. Gemini 逐條交叉比對W1-Redline 版)
| # | Gemini 主張 | 本次實盤結果2026-06-03 | 狀態 | 重點證據 |
|---|---|---|---|---|
| 1 | Math.random 假資料 | 前端核心 `Math.random` 已逐步改為可追溯 ID報告中以 `crypto.randomUUID` 為主軸 | ✅ 未成立(本輪以可追溯策略為主) | `/apps/web/src/components/ui/toast.tsx` `/apps/web/src/stores/timeline.store.ts` `/apps/web/src/stores/terminal.store.ts` |
| 2 | 前端內網 IP 暴露 | `192.168` 仍見於註解與某些運維設定,但「前端可解析字串」已改為主機別名與 topology 呈現;未再見有效連線 fallback`NEXT_PUBLIC_*` | 🟠 部分成立(高) | `/apps/web/src/components/infra/host-grid.tsx` `/apps/web/src/components/dashboard/live-dashboard.tsx` `/apps/web/src/app/api/sentry-tunnel/route.ts` `/apps/web/.env.example` `/k8s/awoooi-prod/05-deployment-web.yaml` |
| 3 | ClawBot 殘留 | 未見主頁/路由核心露頭;命名殘留屬歷史文件與歷史術語 | ✅ 未成立(屬遺留命名監看項) | `/apps/web/src/app/[locale]/awooop/page.tsx` `/apps/web/src/app/[locale]/awooop/layout.tsx` |
| 4 | Sidebar 8條死連結、AwoooP 孤島 | `sidebar``AwoooP` 全量可對帳,非孤島 | ✅ 未成立(已修正並持續驗證) | `/apps/web/src/components/layout/sidebar.tsx` `/apps/web/src/app/[locale]/awooop/page.tsx` `/apps/web/src/app/[locale]/awooop/layout.tsx` |
| 5 | NEMOTRON 覆蓋衝突 | Deployment 已移除 `ENABLE_NEMOTRON_COLLABORATION` / `NEMOTRON_TIMEOUT_SECONDS` 覆寫;採用 ConfigMap 單一源 | ✅ 已修(本輪採納) | `/k8s/awoooi-prod/04-configmap.yaml` `/k8s/awoooi-prod/06-deployment-api.yaml` |
| 6 | RLS 未落地(跨租戶風險) | `ContextVar` 預設不再 fallback 到 `awoooi`,失效時改為 401 fail-closed並新增 runtime guard endpoint | ✅ 已修Repo / 🟠 待 runtime 證據(未切換上線 image | `/apps/api/src/core/context.py` `/apps/api/src/db/base.py` `/apps/api/src/main.py` |
| 7 | CronJob label 斷鏈 | W1 實施前已確認 `system: awoooi` 已對齊 | ✅ 已修(建議保留稽核命令) | `/k8s/awoooi-prod/13-cronjob-k3s-report.yaml` `/k8s/awoooi-prod/14-cronjob-weekly-report.yaml` `/k8s/awoooi-prod/15-cronjob-km-vectorize.yaml` `/k8s/awoooi-prod/16-cronjob-backup-restore-test.yaml` `/k8s/awoooi-prod/02-network-policy.yaml` |
| 8 | Secrets 明文與 `CHANGE_ME` | 明文示例與運行時秘密仍以範例/Secret 分離;需補 commit + runtime 証據版本封存 | 🟡 待補證據(高) | `/k8s/awoooi-prod/03-secrets.example.yaml` |
| 9 | provider_proxy 不存在 | `provider_proxy.py` 實際存在並有服務化介面 | ✅ 未成立 | `/apps/api/src/services/provider_proxy.py` |
## 2. 交叉驗證重點(路由與前端對帳)
- 主導航 11 項:`sidebar.tsx` 全數對帳頁面存在。
- `AwoooP``work-items / tenants / contracts / runs / approvals / approvals/[run_id]` 均存在且可導向。
## 3. W1-Redline 實施後風險排序(本輪更新)
- P0-1前端內網曝光已清理 NEXT_PUBLIC_* fallback
- 狀態:🟠 代碼端已清理runtime 仍為舊 image實機驗證待切換
- 下一步:`kubectl diff` + 前端 bundle grep
- P0-2NEMOTRON 覆蓋衝突
- 狀態:✅ 已修ConfigMap 單一治理來源)
- P0-3CronJob Labels
- 狀態:✅ 已修(維持稽核門檻)
- P1-1RLS fail-closed
- 狀態:✅ 已修Repo / 🟠 需 runtime call sample舊 image 回 404
- P1-2ID 可追溯化
- 狀態:✅ 已修(持續監看新增回歸)
## 4. 建議補樣W1 稽核)
- 前端 build/scan確認 public bundle 不出現 `192.168`
- backend runtime`GET /api/v1/security/db-context-guard`(未帶 `X-Project-ID` 應回 401帶合法 context 應回 200
- 依賴變更:建立 diff 驗證命令表web/api/k8s供 release checklist。
補註:`/api/v1/security/db-context-guard` 在舊版 image 前置中可回應 `404`,待新映像上線後再補 401/200 兩組樣本。
## 5. 實機驗證結果2026-06-03repo 已收斂runtime 待上線)
### 已完成Repo / build
- `npm run build`apps/web指定 `NEXT_PUBLIC_API_URL`):成功。
- `rg` 檢查 `apps/web/.next`:未在非 `.map` 的 client/server bundle 中發現 `192.168`
- 代碼與配置層面:`Math.random` 已替換、主機別名化、RLS fail-closed 與 Guard endpoint 已加入、CronJob label 已對齊。
### 目前 runtime 現況(未同步新版本)
- `kubectl -n awoooi-prod get deploy awoooi-api awoooi-web`:仍為舊 image `.../api:f1ef7ec...``.../web:f1ef7ec...`
- `kubectl -n awoooi-prod exec deploy/awoooi-web -- printenv`
- `NEXT_PUBLIC_HOST_IPS=192.168.0.110,192.168.0.112,192.168.0.120,192.168.0.188`
- `SENTRY_HOST=http://192.168.0.110:9000`
- `NEXT_PUBLIC_K8S_VIP_INFO` 仍含 `192.168.0.125`
- `kubectl -n awoooi-prod exec deploy/awoooi-api -- printenv`
- `ENABLE_NEMOTRON_COLLABORATION=true`
- `NEMOTRON_TIMEOUT_SECONDS=55`
- `kubectl -n awoooi-prod get deploy awoooi-api awoooi-web awoooi-worker`:三者 image 均為 `192.168.0.110:5000/...:f1ef7ec...`(舊版未重建)。
- `kubectl -n awoooi-prod` 部署層全域檢查:除 `awoooi-api` 外,未見其他 deployment 在 container env 直接設定 `NEMOTRON_*`(目前殘留主要在舊版 `awoooi-api` pod env
- `configmap awoooi-config``NEMOTRON_*`)目前為 `false / 55`,與預期一致,但未在 Pod env 中被採用(仍被 deployment 覆蓋)。
- `GET /api/v1/security/db-context-guard`(舊版 runtime`404 Not Found`,無法直接驗證 401/200 稽核樣本。
- `kubectl -n awoooi-prod get cronjobs -o json``k3s-status-report``weekly-report``km-vectorize``drift-scanner``jobTemplate.template.metadata.labels` 已皆可見 `system: awoooi`;並與現有 `allow-required-egress``podSelector: system: awoooi` 需求一致。

View File

@@ -48,13 +48,16 @@ spec:
# 正式域名 (必須 https)
- name: NEXT_PUBLIC_API_URL
value: "https://awoooi.wooo.work"
# 2026-04-22 ogt: 移除前端硬碼 IP改由 K8s 注入
# 2026-06-03 P0-1: 避免前端 bundle 直接內建私網 IP改為主機別名無連線真值僅作 topology 呈現
- name: NEXT_PUBLIC_HOST_IPS
value: "192.168.0.110,192.168.0.112,192.168.0.120,192.168.0.188"
# 僅供 topology 呈現,非連線真值。
# 未設定/空值時dashboard 前端應回退為空陣列,避免隱藏內網依賴。
value: "devops,security,k3s,ai-web"
- name: NEXT_PUBLIC_K8S_VIP_INFO
value: "VIP 192.168.0.125 · kubectl :6443 · Web :32335 · API :32334"
value: "K8S VIP topology (ops-only) · kubectl:6443 · web:32335 · api:32334"
- name: SENTRY_HOST
value: "http://192.168.0.110:9000"
# 2026-06-03: 由可解析內部/公網 DNS 轉向,避免硬編碼 IP
value: "https://sentry.awoooi.internal"
envFrom:
- configMapRef:
name: awoooi-config

View File

@@ -60,11 +60,6 @@ spec:
env:
- name: USE_AI_ROUTER
value: "true"
- name: ENABLE_NEMOTRON_COLLABORATION
# 2026-04-15 ogt: 重新啟用 — asyncio.wait_for=120sOllama 已等待回應
value: "true"
- name: NEMOTRON_TIMEOUT_SECONDS
value: "55"
- name: TELEGRAM_ENABLE_POLLING
value: "true"
- name: OLLAMA_URL

View File

@@ -37,6 +37,7 @@ spec:
metadata:
labels:
app: awoooi
system: awoooi
component: k3s-report
spec:
restartPolicy: OnFailure

View File

@@ -37,6 +37,7 @@ spec:
metadata:
labels:
app: awoooi
system: awoooi
component: weekly-report
spec:
restartPolicy: OnFailure

View File

@@ -35,6 +35,7 @@ spec:
metadata:
labels:
app: awoooi
system: awoooi
component: km-vectorize
spec:
restartPolicy: OnFailure

View File

@@ -35,6 +35,7 @@ spec:
metadata:
labels:
app: awoooi
system: awoooi
component: backup-restore-test
spec:
restartPolicy: Never

View File

@@ -39,7 +39,7 @@ resources:
images:
- name: 192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER
newName: 192.168.0.110:5000/awoooi/api
newTag: 1cc9de5722eb2fca8bab080077f792fa02c5d5fb
newTag: f1ef7ec3e295313af67d7acaf40d439585cb5270
- name: 192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER
newName: 192.168.0.110:5000/awoooi/web
newTag: 1cc9de5722eb2fca8bab080077f792fa02c5d5fb
newTag: f1ef7ec3e295313af67d7acaf40d439585cb5270