docs(security): 建立高價值配置控管清冊 [skip ci]

This commit is contained in:
Your Name
2026-06-11 11:28:36 +08:00
parent e427af3cb2
commit 6efd186750
17 changed files with 295 additions and 54 deletions

View File

@@ -39,15 +39,16 @@ async def _probe_grafana(client: httpx.AsyncClient) -> dict:
if r.status_code == 200:
data = r.json()
version = data.get("version")
# Dashboard count requires basic auth (internal probe only)
import base64 as _b64
_token = _b64.b64encode(b"admin:WoooTech2026").decode()
dash_r = await client.get(
f"{base}/api/search?type=dash-db",
headers={"Authorization": f"Basic {_token}"},
timeout=TIMEOUT,
)
dash_count = len(dash_r.json()) if dash_r.status_code == 200 and isinstance(dash_r.json(), list) else None
dash_count = None
grafana_api_key = settings.GRAFANA_API_KEY.strip()
if grafana_api_key and grafana_api_key != "CHANGE_ME":
dash_r = await client.get(
f"{base}/api/search?type=dash-db",
headers={"Authorization": f"Bearer {grafana_api_key}"},
timeout=TIMEOUT,
)
if dash_r.status_code == 200 and isinstance(dash_r.json(), list):
dash_count = len(dash_r.json())
return {
"name": "Grafana",
"status": "up",

View File

@@ -97,7 +97,7 @@ def extract_inner_command(ssh_cmd: str) -> str:
範例:
"ssh 192.168.1.1 'uptime'""uptime"
"ssh -o StrictHostKeyChecking=no host 'free -m'""free -m"
"ssh -o StrictHostKeyChecking=accept-new host 'free -m'""free -m"
Args:
ssh_cmd: 完整 SSH 指令字串

View File

@@ -29,7 +29,7 @@ class TestSSHRecognition:
def test_ssh_with_options(self):
p = parse_operation_from_action(
"ssh -o StrictHostKeyChecking=no wooo@192.168.0.188 'systemctl restart ollama'"
"ssh -o StrictHostKeyChecking=accept-new wooo@192.168.0.188 'systemctl restart ollama'"
)
assert p.operation_type == OperationType.SSH_HOST
assert p.resource_name == "192.168.0.188"

View File

@@ -8,11 +8,11 @@
| 欄位 | 值 |
|------|-----|
| **版本** | v2.2 |
| **版本** | v2.3 |
| **建立日期** | 2026-03-20 (台北) |
| **建立者** | Claude Code |
| **最後修改** | 2026-06-04 (台北) |
| **修改者** | Codex + ogt (新增 IwoooS 資安治理禁令) |
| **最後修改** | 2026-06-11 (台北) |
| **修改者** | Codex + ogt (新增高價值配置資安控管) |
### 變更紀錄
@@ -31,6 +31,7 @@
| v2.0 | 2026-04-16 | Claude Code + ogt | 新增 No Island Coding / 主動執行熔斷機制 / 自循環工作流 / 狀態機驗證鐵律 |
| v2.1 | 2026-05-06 | Codex + ogt | 🔴 文件語言鐵律Markdown/ADR/LOGBOOK/Runbook/交接文件一律繁體中文 |
| v2.2 | 2026-06-04 | Codex + ogt | 🔴🔴🔴 IwoooS 資安治理禁令:只讀證據、低摩擦、不可誤讀 UI / AwoooP approval / runtime gate |
| v2.3 | 2026-06-11 | Codex + ogt | 🔴🔴🔴 高價值配置資安控管Nginx、DNS / TLS、K8s、workflow、runner、secret、backup、AI provider、主機與產品 runtime config 必須有 source-of-truth、owner gate、diff、rollback 與驗證 |
---
@@ -63,9 +64,60 @@
| **🔴🔴 工作流節奏** | **每步都來回報** | **內部自循環,全局單次回報** | [→ 自循環工作流](#self-loop-workflow) |
| **🟡 狀態機驗證** | **不查中間狀態卡死** | **必驗 TTL + Cleanup + Fallback** | [→ State & Flow Validation](#state--flow-validation) |
| **🔴🔴🔴 IwoooS 資安治理** | **UI 可見 / AwoooP approval 當 runtime 授權** | **只讀證據 + owner response gate + 獨立人工批准** | [→ IwoooS Security Governance](#iwooos-security-governance) |
| **🔴🔴🔴 高價值配置** | **手改 Nginx / workflow / secret / runtime config 後直接 reload 或部署** | **source-of-truth + owner gate + diff + rollback + 驗證** | [→ High Value Config Control](#high-value-config-control) |
---
## 🔴🔴🔴 High Value Config Control
> 2026-06-11 統帥指示:所有重要配置都必須納入資安控管,尤其 Nginx 常被手動變更,不能只靠人記得不要亂改。
### 高價值配置範圍
以下配置一律視為高價值配置,不得以「只是小改設定」繞過資安流程:
1. Nginx / reverse proxy / public route / upstream / Basic Auth / admin path / WebSocket / `.well-known` / ACME challenge。
2. DNS / domain / TLS / certbot / certificate path / renewal script / HSTS / public HTTPS route。
3. K8s / ArgoCD / Kustomize / Deployment / ConfigMap / Secret metadata / RBAC / NetworkPolicy / CronJob / HPA / VPA / Velero。
4. Gitea workflow / runner label / deploy key / webhook / branch protection / CODEOWNERS / repository secret name。
5. Harbor / image tag / registry mirror / Dockerfile / docker-compose / package lockfile / supply-chain baseline。
6. Prometheus / Alertmanager / Grafana / SigNoz / Sentry / Langfuse / OTEL / alert route / receiver / silence policy。
7. PostgreSQL / Redis / MinIO / backup / restic / offsite escrow / cold-start / restore / retention policy。
8. SSH / sudoers / known_hosts / authorized_keys / systemd unit / firewall / WireGuard / NodePort / VIP。
9. AI provider / OpenClaw / Ollama / NemoTron / Hermes / Gemini / Claude / MCP / A2A / agent runtime / payout or treasury boundary。
10. AWOOOI、AwoooP、IwoooS、VibeWork、agent-bounty-protocol、StockPlatform、Tsenyang、Bitan、VTuber 等產品的 public / admin / API / callback / webhook / env runtime config。
### 絕對禁止
```text
❌ 未記錄 owner / affected scope / rollback owner 就修改高價值配置
❌ 直接 SSH 到主機手改 Nginx live conf 後 reload除非已進 break-glass 並補 evidence
❌ 將 Nginx、workflow、docker-compose、K8s Secret、Grafana、Harbor、Gitea、MinIO、ArgoCD、Telegram、AI provider 的 secret value 寫入 repo、文件、LOGBOOK 或對話
❌ 關閉 SSH host key 驗證、使用未驗證 host key 或複製停用 host key 檢查的範例
❌ 把配置 diff 可見、AwoooP approval 或 IwoooS UI 卡片當成 runtime reload / deploy / secret rotation 授權
❌ 在未完成 `nginx -t`、route smoke、rollback ref 與跨產品通知前reload public gateway
```
### 正確流程
1. 先確認 source-of-truthGit / Ansible / K8s / compose / owner-managed secret store / live-only break-glass。
2. 變更前建立 owner responseowner role / team、decision、decision reason、affected scope、redacted evidence refs、followup owner、rollback owner。
3. 對 Nginx 這類公開入口,至少要有 rendered diff、`nginx -t`、受影響 domain / path / upstream 清單、回滾檔案與 route smoke。
4. 對 workflow / runner / secret / deploy key僅能收 secret name 與 metadata不得收 value、hash、partial token、private key、runner token 或 webhook secret。
5. 對 live drift先記錄 drift evidence 與 owner decision不得自動覆寫 live緊急情況需 break-glass 記錄與事後補件。
6. 完成後更新 LOGBOOK列出驗證、未執行項、完成度與仍維持 `0 / false` 的 gate。
### Nginx 最低控管線
```text
source-of-truth = infra/ansible/roles/nginx/templates/*.j2 + infra/ansible/playbooks/nginx-sync.yml
live path 188 = /etc/nginx/sites-enabled/all-sites.conf
live path 110 = /etc/nginx/sites-enabled/110-ollama-proxy.conf
required preflight = rendered diff + nginx -t + affected route list + rollback ref
required post-check = public route smoke + admin route smoke if affected + WebSocket / API smoke if affected
runtime gate = 0 until explicit maintenance window and rollback owner exist
```
## 🔴🔴🔴 IwoooS Security Governance
> 2026-06-04 統帥指示IwoooS 初期資安不要拉太嚴格,先建立框架、只讀證據、低摩擦流程,再階段性收攏。

View File

@@ -1,3 +1,35 @@
## 2026-06-11IwoooS 高價值配置控管清冊與 P0 source-control 止血
**背景**:使用者要求所有重要配置都要納入資安控管,特別指出 Nginx 常被手動變更,必須建立資安機制。同時需完整盤點哪些配置要先納管、哪些既有規範不符合現在要求、哪些需要新增或調整。
**完成內容:**
- 新增 `docs/security/IWOOOS-CONFIG-CONTROL-INVENTORY.md`,將高價值配置分為 C0 / C1 / C2 / C3並列出 Nginx、DNS / TLS、K8s / ArgoCD、Gitea workflow / runner / secret、backup、monitoring、AI provider、Kali、agent-bounty-protocol、VibeWork 與其他產品 route / admin / API / webhook 的控管優先序。
- `docs/HARD_RULES.md` 升級到 v2.3新增「High Value Config Control」高價值配置必須有 source-of-truth、owner gate、diff、rollback 與驗證Nginx live conf 不得手改後直接 reload。
- Nginx 控管機制已明確定義source-of-truth 為 `infra/ansible/roles/nginx/templates/*.j2``infra/ansible/playbooks/nginx-sync.yml`live drift 只能先建立 evidence不得自動覆寫reload 需 `nginx -t`、route smoke、rollback owner 與跨產品通知。
- 清理 `docs/runbooks/SECRETS-MANAGEMENT.md` 中的可疑 Gitea token 範例,改為 owner-managed token env文件不再保存 token value。
- 清理 `k8s/monitoring/docker-compose-110.yml` 中 Grafana admin 密碼常值,改由 `GRAFANA_ADMIN_PASSWORD` owner secret store 注入。
- 修正 `apps/api/src/api/v1/monitoring.py`,移除 Grafana Basic Auth 常值,改由 `settings.GRAFANA_API_KEY` Bearer token 控制;未設定時不送 Authorization header。
- 修正 `ops/monitoring/discover_docker.py`,移除關閉 SSH host key 驗證的參數,改為 `BatchMode=yes``accept-new`;後續可再升級 pinned known_hosts。
- 清理舊監控 / K3s 文件中的 MinIO credential 常值,改為 placeholder。
**本地驗證:**
- `/Users/ogt/.pyenv/shims/python3.11 -m py_compile apps/api/src/api/v1/monitoring.py ops/monitoring/discover_docker.py apps/api/src/services/ssh_command_whitelist.py` 通過。
- `DATABASE_URL='postgresql+asyncpg://test:test@127.0.0.1:5432/test' /Users/ogt/.pyenv/shims/python3.11 -m pytest apps/api/tests/test_operation_parser_ssh.py` 通過,`9 passed`;此 DSN 為本機非機密測試值。
- `python3 scripts/security/security-mirror-progress-guard.py --root .` 通過。
- `python3 scripts/security/source-control-owner-response-guard.py --root .` 通過。
- `node scripts/ci/check-gitea-step-env-secrets.js` 通過。
- `python3 scripts/ops/doc-secrets-sanity-check.py docs .gitea` 通過,`scanned_files=635`
- `git diff --check` 通過。
- 高風險字串掃描通過:關閉 SSH host key 驗證的逐字參數、舊 Gitea token、Grafana 密碼常值、舊 MinIO credential、Prometheus inline bearer token 均未命中。
**完成度與邊界:**
- 重要配置範圍盤點:`100%`
- Nginx 控管機制定義:`100%`
- 本波 source-control P0 止血:`100%`
- live Nginx drift detector`0%`,尚未 SSH、Ansible check-mode、live hash 或 reload。
- Nginx reload / restart、DNS 修改、TLS renew、ArgoCD sync、kubectl、SSH 主機修改、workflow 修改、runner 啟用、secret rotation、active scan、agent-bounty runtime、payout / withdrawal、deploy 或 runtime execution本階段全部未執行。
- IwoooS 整體仍維持 `64%`active runtime gate 仍為 `0`owner response received / accepted 仍為 `0 / false`
## 2026-06-11OpenClaw / Hermes / NemoTron 佈建布局第一波
**背景**:統帥要求將 OpenClaw、Hermes、NemoTron 依各自專長安排到所有主機、套件、工具、服務、專案、網站前後台,並納入主動學習、互相溝通、持續成長與 Telegram Bot 告警鏈路。本波先建立可驗證的只讀佈建布局,避免把尚未批准的 runtime deploy、SDK/API、Telegram 發送或主機操作誤當已授權。
@@ -40,7 +72,7 @@
- `python3 scripts/security/security-mirror-progress-guard.py --root .` 通過。
- `python3 scripts/security/source-control-owner-response-guard.py --root .` 通過。
- `git diff --check` 通過。
- 高危字串掃描通過:`fSCLMBhtpRxhbRxw``Minio_Velero_2026`、MinIO JWT 前綴、`StrictHostKeyChecking=no``bearer_token:` 均未再命中 `.gitea``k8s``infra``scripts``docs/reference``docs/security`
- 高危字串掃描通過:舊 MinIO token、舊 MinIO credential、MinIO JWT 前綴、關閉 SSH host key 驗證的參數、Prometheus inline bearer token 均未再命中 `.gitea``k8s``infra``scripts``docs/reference``docs/security`
**完成度與邊界:**
- 即時資安 scope 重新確認:`100%`

View File

@@ -226,7 +226,7 @@ Alertmanager 重複 delivery、修復失敗後告警仍 firing、以及 provider
待觀察問題(已知、可接受):
- **M1**: 測試使用 AsyncMock豁免原因`_ssh_execute` 是真實 subprocess無法在 CI 無 SSH 環境直接測試;已透過 E2E 驗收補充覆蓋)
- **M2**: `StrictHostKeyChecking=no`(內網環境可接受,未來加 known_hosts ConfigMap
- **M2**: 短期使用 `accept-new`,後續改為 pinned known_hosts ConfigMap
---

View File

@@ -41,10 +41,10 @@ claude
### 步驟 3: 配置 Bot Token
```bash
# 在 Claude 對話中執行 (使用 AWOOOI 的 Bot Token)
# 注意:使用 Claude Code 專用 Bot不要用 OpenClaw Bot
# Token 只能從 owner 受控密碼庫注入不得寫入文件、commit 或對話。
# 注意:使用 Claude Code 專用 Bot不要用 OpenClaw Bot
# Claude Code Bot: @wooowooowooobot
/telegram:configure 8075645931:AAH-EGKMo8ZC4QJs-Nc1_0s92xHrGdQvdpg
/telegram:configure <TELEGRAM_BOT_TOKEN_FROM_OWNER_SECRET_STORE>
```
### 步驟 4: 以 Channels 模式啟動

View File

@@ -286,8 +286,8 @@ docker run -d \
--name minio-exporter \
--network momo-pro-network \
-e MINIO_URL=http://minio:9000 \
-e MINIO_ACCESS_KEY=minio_admin \
-e MINIO_SECRET_KEY=Minio_Velero_2026! \
-e MINIO_ACCESS_KEY=<MINIO_ACCESS_KEY> \
-e MINIO_SECRET_KEY=<MINIO_SECRET_KEY> \
-p 9290:9290 \
bitnami/minio-exporter:latest

View File

@@ -53,11 +53,12 @@ CD Pipeline 每次部署自動注入到 K8s禁止手動 patch。
# http://192.168.0.110:3001/wooo/awoooi/settings/secrets
# 方法二: Gitea API
GITEA_TOKEN="e6c9fecb1f0148939493ae0fa30407d28c91279d"
# Token 只能從受控密碼庫或 owner 臨時 shell 注入不得寫入文件、commit 或對話。
: "${OWNER_GITEA_TOKEN:?請由 owner 從受控密碼庫注入 Gitea token}"
curl -X PUT "http://192.168.0.110:3001/api/v1/repos/wooo/awoooi/actions/secrets/NVIDIA_API_KEY" \
-H "Authorization: token $GITEA_TOKEN" \
-H "Authorization: token $OWNER_GITEA_TOKEN" \
-H "Content-Type: application/json" \
-d '{"data": "nvapi-新的key值"}'
-d '{"data": "<新的 secret value僅在受控終端輸入>"}'
```
更新後,**推一個空 commit 觸發 CD 重新注入**

View File

@@ -0,0 +1,146 @@
# IwoooS 高價值配置控管清冊
| 項目 | 內容 |
|------|------|
| 日期 | 2026-06-11 |
| 狀態 | `inventory_and_control_policy_ready` |
| 範圍 | AWOOOI / IwoooS 全產品重要配置 |
| 本階段模式 | source-control 修補 + 只讀盤點,不做 live reload / restart / sync |
| runtime gate | `0` |
## 0. 核心結論
目前 IwoooS 的資安範圍不能只看程式碼漏洞必須把「能改變公開入口、部署、憑證、告警、資料、備份、AI provider、agent 行為與跨產品路由」的配置全部納入控管。
本次盤點後,配置控管分為四級:
| 等級 | 定義 | 處理速度 | 例子 |
|------|------|----------|------|
| C0 | 立即影響公開入口、權限、secret、部署或遠端執行 | 立即納管,先止血再補 owner gate | Nginx public gateway、TLS、secret、workflow、runner、K8s prod、ArgoCD、backup credential |
| C1 | 會影響監控、資料、供應鏈、AI provider 或主機維護 | 近程納管,建立 drift 與維護窗口 | Prometheus、Alertmanager、Docker Compose、PostgreSQL、Redis、MinIO、Ollama、Kali、WireGuard |
| C2 | 產品 runtime、admin、API、webhook、frontend build 或跨產品 route | 隨產品變更納管 | AWOOOI、AwoooP、IwoooS、VibeWork、agent-bounty-protocol、StockPlatform、Tsenyang、Bitan、VTuber |
| C3 | 文件、runbook、template、snapshot 與證據索引 | 持續納管,避免範例變成可複製風險 | `SERVICE-ENDPOINTS.md`、DR runbook、owner response template |
## 1. 目前已不符合新要求的項目
| 優先 | 項目 | 現況 | 風險 | 本階段處置 |
|------|------|------|------|------------|
| P0 | Nginx public gateway | 已有 Ansible source-of-truth但缺少資安等級、owner gate、drift evidence、reload 前後驗證與跨產品通知規範 | 手改 live conf 會讓公開網站、admin route、TLS、API、WebSocket 或 ACME 被改壞,且不易追責 | 已新增高價值配置 Hard Rule 與本清冊live drift detector 尚未實作 |
| P0 | `docs/runbooks/SECRETS-MANAGEMENT.md` Gitea token 範例 | 文件內存在可疑 token 範例 | 可能造成 Gitea API 權限外洩或複製貼上事故 | 已改為 owner-managed token env不保存 value |
| P0 | `k8s/monitoring/docker-compose-110.yml` Grafana admin 密碼 | compose 內有固定密碼常值 | 若被當作 live 密碼或複製使用,會造成監控後台弱控管 | 已改為 `GRAFANA_ADMIN_PASSWORD` owner secret store 注入 |
| P0 | `ops/monitoring/discover_docker.py` SSH host key 驗證 | 仍使用關閉 host key 驗證的參數 | MITM 或錯誤主機信任風險 | 已改為 `BatchMode=yes` + `accept-new`;後續升級 pinned known_hosts |
| P0 | `apps/api/src/api/v1/monitoring.py` Grafana 探測認證 | 程式碼內有 Grafana Basic Auth 常值 | API 程式碼保存 credential且會被複製到後續部署 | 已改為 `settings.GRAFANA_API_KEY` Bearer token未設定時不送 Authorization header |
| P1 | Nginx 188 / 110 live conf drift | repo 有 templates但尚未自動比較 live hash / rendered hash | 手改後 repo 不知道,下一次 Ansible 可能覆蓋或保留錯誤路由 | 新增為 P0 後續任務:只讀 drift detector |
| P1 | DNS / TLS / certbot | 多產品共用 188 / 110 public gateway憑證路徑與 renewal 仍分散在 runbook / template | 憑證過期、錯誤 cert path、ACME challenge 被覆蓋會造成公開服務中斷 | 納入 C0需建立 domain / cert / renewal 清冊 |
| P1 | workflow / runner / deploy key / secret name | 已有 Gitea / GitHub readiness 盤點,但尚未把配置變更和 IwoooS 高價值配置共用 gate 合併 | workflow 或 runner 改錯會直接影響部署與 secret 注入 | 納入 C0維持只讀 owner response不收 secret value |
| P1 | Docker Compose / systemd live config | 110 / 188 多服務由 compose、systemd 與 recovery scripts 管理 | restart policy、port、volume、env 改動會影響 Harbor、Sentry、Langfuse、Gitea、agent-bounty-protocol | 納入 C1先做只讀 inventory |
| P1 | AI provider / Ollama proxy | 110 Nginx proxy、GCP-A/B、111 fallback、API provider route 多處配置 | provider route drift 會造成成本、可用性、資料外送與模型品質風險 | 納入 C1任何切換仍需 dry-run / benchmark / owner gate |
| P1 | agent-bounty-protocol runtime / treasury / A2A / MCP | 已納入只讀範圍,但尚未有 production host、compose、domain、TLS、rollback owner 完整資料 | 外部 agent、claim / submit、payout 或 webhook 若未控管,風險高於一般網站 | 納入 C2仍不改該 repo、不讀 `.env`、不部署 |
## 2. Nginx 控管機制
Nginx 是目前必須最先資安控管的配置,原因是它同時控制公開 domain、TLS、admin route、API / WebSocket、ACME challenge、跨產品 upstream 與內網曝光邊界。
### 2.1 Source of truth
| 主機 | repo source-of-truth | live path | 涵蓋 |
|------|----------------------|-----------|------|
| `192.168.0.188` | `infra/ansible/roles/nginx/templates/188-all-sites.conf.j2` | `/etc/nginx/sites-enabled/all-sites.conf` | `aiops.wooo.work``gitlab.wooo.work``signoz.wooo.work``www.tsenyang.com``tsenyang.com``stock.wooo.work``mo.wooo.work``bitan.wooo.work``vtuber.wooo.work` |
| `192.168.0.188` | `infra/ansible/roles/nginx/templates/188-internal-tools-https.conf.j2` | live path 需 owner 確認 | `gitea.wooo.work``sentry.wooo.work``langfuse.wooo.work``harbor.wooo.work``registry.wooo.work``signoz.wooo.work``stock.wooo.work` |
| `192.168.0.110` | `infra/ansible/roles/nginx/templates/110-ollama-proxy.conf.j2` | `/etc/nginx/sites-enabled/110-ollama-proxy.conf` | Ollama GCP-A `11435`、GCP-B `11436`、local fallback `11437` |
| 部署入口 | `infra/ansible/playbooks/nginx-sync.yml` | Ansible apply | `nginx -t`、backup、reload handler |
| 回滾 SOP | `docs/runbooks/disaster-recovery/DR-Nginx.md` | Runbook | 語法錯誤、Git rollback、188 失效接管 |
### 2.2 必要 gate
| 階段 | 必要資料 | 未滿足時 |
|------|----------|----------|
| 變更前 | owner role / team、affected domains、affected paths、upstream、TLS / ACME 影響、rollback owner、maintenance window | 不可 reload不可部署 |
| diff | repo diff、rendered diff、live drift evidence refs | 只可進入 owner review |
| preflight | `nginx -t`、port conflict check、certificate path check | 不可 reload |
| post-check | public route smoke、API / WebSocket smoke、admin route smoke、ACME path smoke、錯誤率觀察 | 不可宣稱完成 |
| rollback | 前一份 live backup、Git revert ref、rollback owner、停止條件 | 不可進 production window |
### 2.3 Drift 原則
1. 偵測到 live Nginx 與 repo template 不一致時,只建立 evidence不自動覆寫 live。
2. drift 必須標記受影響 domain、upstream、TLS、admin route、ACME path 與風險等級。
3. 若 drift 是緊急手改,需補 break-glass owner response、時間、原因、回滾條件與後續 source-of-truth patch。
4. 若 drift 是未授權變更,列為 P0 config drift不得等到下一次部署才處理。
5. IwoooS UI 可顯示 drift但不能因此提高 runtime gate。
## 3. 需要優先納管的配置總清單
| 優先 | 配置 | 代表 repo 路徑 | live / owner 來源 | 必要控管 |
|------|------|----------------|-------------------|----------|
| P0 | Nginx public gateway | `infra/ansible/roles/nginx/templates/*.j2``infra/ansible/playbooks/nginx-sync.yml``ops/nginx/*` | 188 / 110 live Nginx | source-of-truth、drift detector、owner gate、`nginx -t`、route smoke、rollback |
| P0 | DNS / TLS / certbot | Nginx templates、`docs/runbooks/REGISTRY-CERTBOT-188.md`、TLS alert rules | DNS provider、Let's Encrypt、188 / 110 | domain inventory、cert path、renewal check、ACME path smoke |
| P0 | K8s production manifests | `k8s/awoooi-prod/*``k8s/argocd/awoooi-prod-app.yaml` | ArgoCD / K3s | GitOps diff、ArgoCD health / sync readback、rollback revision、no manual kubectl unless approved |
| P0 | K8s Secret metadata | `k8s/awoooi-prod/03-secrets.example.yaml`、secret templates、workflow injection | Gitea Secrets / K8s Secret names | secret name parity only、no value collection、rotation owner |
| P0 | Gitea workflows | `.gitea/workflows/*.yaml` | Gitea Actions | self-hosted runner, secret reference guard, deployment verification, no write action without owner |
| P0 | Runner / deploy key / webhook / branch protection | `ops/runner/*`、source-control snapshots | Gitea / GitHub owner metadata | labels、key names、webhook names、ruleset metadata onlyno token / key value |
| P0 | Public admin / API route config | Nginx templates、`apps/web/src/lib/config.ts``apps/api/src/core/config.py` | Product owner + runtime owner | auth boundary、CORS、public URL、admin path smoke、frontend internal IP ban |
| P0 | Backup / restore credential | `scripts/backup/*``k8s/velero/*`、DR runbooks | MinIO / restic / offsite escrow | credential value absent、restore drill gate、escrow owner、retention policy |
| P0 | agent-bounty-protocol treasury / MCP / A2A | `docs/security/AGENT-BOUNTY-IWOOOS-ONBOARDING-HANDOFF.md` | agent-bounty owner response | no payout / claim / submit / daemon / webhook until explicit runtime approval |
| P1 | Prometheus / Alertmanager | `k8s/monitoring/*``ops/alertmanager/alertmanager.yml``ops/monitoring/*` | 110 monitoring stack | rule diff、receiver diff、reload gate、failure-only notification policy |
| P1 | Grafana / SigNoz / Sentry / Langfuse | `ops/grafana/*``ops/signoz/*``ops/sentry-self-hosted/*``infra/langfuse/*` | 110 compose / public gateway | admin secret externalized、public route, backup, smoke, upgrade window |
| P1 | Harbor / registry | Nginx templates、backup scripts、CD workflows | 110 Harbor / registry domains | robot account owner、image tag immutability、scan policy、TLS |
| P1 | PostgreSQL / Redis / MinIO | app config、backup scripts、monitoring config | 188 / 110 / K3s | no plaintext DSN, access boundary, backup, restore, metrics auth |
| P1 | Docker Compose / systemd | `docker-compose.yml``ops/*/docker-compose.yml``scripts/reboot-recovery/*.service` | 110 / 188 / agent-bounty hosts | port / volume / env diff、restart window、rollback owner |
| P1 | SSH / sudoers / known_hosts | Ansible inventory、ops scripts、runner scripts | host owners | pinned or accept-new policy、no host key disable、target whitelist |
| P1 | Firewall / WireGuard / NodePort / VIP | K8s service / network policy、Kali / wg-easy docs | network owner | ingress / egress matrix、no unreviewed port exposure |
| P1 | AI provider / model routing | `apps/api/src/services/ai_providers/*`、Ollama runbooks、Nginx proxy | AI owner | dry-run、benchmark、cost / privacy review、fallback order gate |
| P1 | Kali 112 scanner config | `docs/security/KALI-112-MAINTENANCE-WINDOW-DRAFT.md`、Kali snapshots | Kali owner | maintenance window、no active scan、no `/execute`、hardening dry-run |
| P2 | AWOOOI / AwoooP / IwoooS frontend runtime config | `apps/web/next.config.js``apps/web/src/lib/config.ts`、i18n | web owner | NEXT_PUBLIC public-domain only、no internal transcript, desktop/mobile smoke |
| P2 | VibeWork product boundary | VibeWork owner docs / future evidence refs | VibeWork owner | independent product boundary、repo / deploy / admin / backup scope |
| P2 | StockPlatform / Tsenyang / Bitan / VTuber routes | Nginx templates、product runbooks | product owner | domain / admin / API / backup / owner matrix |
| P2 | Package / supply-chain baselines | `pnpm-lock.yaml``package.json`、Dockerfiles、inventory snapshots | repo owner | lockfile drift, CVE / license policy, image digest evidence |
| P3 | Runbook / endpoint docs / snapshots | `docs/reference/*``docs/runbooks/*``docs/security/*.snapshot.json` | doc owner | no secret value, stale endpoint flag, owner-reviewed evidence refs |
## 4. 新增規範
1. 高價值配置必須先分級C0 / C1 / C2 / C3。
2. 所有 C0 配置變更必須有 source-of-truth、owner gate、diff、rollback owner 與驗證點。
3. Nginx live drift 不得自動覆蓋,只能先形成 P0 evidence 與 owner decision。
4. 文件與 runbook 的範例不得包含可用 token、password、private key、runner token、webhook secret、cookie、authorization header 或 partial credential。
5. SSH 類工具不得關閉 host key 驗證;短期可用 `accept-new`,長期要升級 pinned known_hosts。
6. Grafana / Harbor / MinIO / ArgoCD / Gitea / Telegram / AI provider 等管理面密碼只能由 owner secret store 注入。
7. agent-bounty-protocol、VibeWork 與其他產品的 route / admin / webhook / payout / deploy config 必須放入 IwoooS 控管,但不能混用 AWOOOI runtime approval。
## 5. 需要調整的既有規範
| 規範 | 目前狀態 | 調整方向 |
|------|----------|----------|
| IwoooS 初期低摩擦 | 原本偏只讀框架 | 保留只讀框架,但 P0 即時危害可先做 source-control 止血 |
| Nginx DR runbook | 已寫禁止直接手改 live conf | 補 owner gate、drift detector、跨產品通知、post-check |
| Secrets 管理手冊 | 有 secret 來源與 CD 注入說明 | 去除可用 token 範例補「metadata only」與 owner secret store |
| Gitea / GitHub readiness | 已有 repo / workflow / secret name 盤點 | 與高價值配置分級合併workflow 變更仍需獨立批准 |
| Deployment verification | 偏重 Pod / health | 加入 Nginx / DNS / TLS / public route / admin route smoke |
| AI provider governance | 已有 dry-run / benchmark 邊界 | 加入 Nginx Ollama proxy、GCP fallback、成本與資料外送控管 |
| Frontend i18n / internal IP | 已有 NEXT_PUBLIC 禁令 | 擴大到 public route / Sentry tunnel / admin path / product domain 一起驗證 |
## 6. 階段完成度
| 工作 | 完成度 | 說明 |
|------|--------|------|
| 重要配置範圍盤點 | `100%` | 已建立 C0-C3 分級與總清單 |
| Nginx 控管機制定義 | `100%` | 已定義 source-of-truth、live path、gate、drift 原則 |
| source-control P0 止血 | `100%` | 已清掉本波掃到的 token 範例、Grafana 密碼常值與 SSH host key 關閉 |
| live Nginx drift detector | `0%` | 尚未 SSH / Ansible check-mode / live hash需 owner 與維護窗口規則 |
| live Nginx reload / restart | `0%` | 未授權,未執行 |
| DNS / TLS live validation | `0%` | 本階段未跑 live probe若下一階段改前端或 route需 desktop / mobile / route smoke |
| cross-product owner response | `0%` | 尚未收到 VibeWork、agent-bounty-protocol、StockPlatform 等 owner acceptance |
## 7. 下一階段優先順序
1. P0建立 Nginx 只讀 drift detector 草案,輸出 repo-rendered hash、live hash、affected domain / upstream / TLS / admin route不自動覆寫。
2. P0補 DNS / TLS / certbot domain inventory先只讀不 renew、不 reload。
3. P0把 workflow / runner / secret name owner response 與高價值配置 C0 gate 串成同一個 IwoooS 狀態。
4. P1盤點 110 / 188 Docker Compose 與 systemd live config標記 Harbor、Sentry、Langfuse、Gitea、agent-bounty-protocol 影響面。
5. P1盤點 Prometheus / Alertmanager / Grafana / SigNoz / Sentry 設定 drift 與 secret externalization。
6. P1補 Kali 112、111、168 維護窗口 owner 欄位,仍不做 upgrade / restart / scan。
7. P2`/zh-TW/iwooos` 配置控管摘要產品化但不得顯示內部工作對話、token、secret 或可執行按鈕。
## 8. 邊界
本清冊完成不代表 Nginx reload、DNS 修改、TLS renew、ArgoCD sync、kubectl、SSH 主機修改、workflow 修改、runner 啟用、secret rotation、active scan、agent-bounty runtime、payout、withdrawal、deploy 或任何 runtime execution 已授權。

View File

@@ -1406,7 +1406,7 @@ PUBKEY=$(cat /tmp/awoooi_repair_bot.pub)
ssh wooo@192.168.0.110 "echo 'command=\"/home/wooo/bin/repair-bot-110.sh\",no-port-forwarding,no-X11-forwarding,no-agent-forwarding ${PUBKEY}' >> ~/.ssh/authorized_keys"
# 驗證
ssh -i /tmp/awoooi_repair_bot -o StrictHostKeyChecking=no wooo@192.168.0.110 "health"
ssh -i /tmp/awoooi_repair_bot -o StrictHostKeyChecking=accept-new wooo@192.168.0.110 "health"
```
預期輸出:`REPAIR_BOT_HEALTHY:110`
@@ -1418,7 +1418,7 @@ PUBKEY=$(cat /tmp/awoooi_repair_bot.pub)
ssh ollama@192.168.0.188 "echo 'command=\"/home/ollama/bin/repair-bot-188.sh\",no-port-forwarding,no-X11-forwarding,no-agent-forwarding ${PUBKEY}' >> ~/.ssh/authorized_keys"
# 驗證
ssh -i /tmp/awoooi_repair_bot -o StrictHostKeyChecking=no ollama@192.168.0.188 "health"
ssh -i /tmp/awoooi_repair_bot -o StrictHostKeyChecking=accept-new ollama@192.168.0.188 "health"
```
預期輸出:`REPAIR_BOT_HEALTHY:188`
@@ -1835,7 +1835,7 @@ class HostRepairAgent:
proc = await asyncio.create_subprocess_exec(
"ssh",
"-i", key_path,
"-o", "StrictHostKeyChecking=no",
"-o", "StrictHostKeyChecking=accept-new",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=yes",
f"{user}@{host}",
@@ -2255,7 +2255,7 @@ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>"
- [ ] **Step 15.1: 驗證 SSH 連線 (從 API Pod)**
```bash
ssh wooo@192.168.0.120 "kubectl exec -n awoooi-prod deploy/awoooi-api -- ssh -i /etc/repair-ssh/id_ed25519 -o StrictHostKeyChecking=no -o BatchMode=yes wooo@192.168.0.110 health"
ssh wooo@192.168.0.120 "kubectl exec -n awoooi-prod deploy/awoooi-api -- ssh -i /etc/repair-ssh/id_ed25519 -o StrictHostKeyChecking=accept-new -o BatchMode=yes wooo@192.168.0.110 health"
```
預期:`REPAIR_BOT_HEALTHY:110`
@@ -2349,7 +2349,7 @@ check "Alert chain E2E" "curl -s -X POST http://192.168.0.121:32334/api/v1/webho
# Sprint 3
check "SSH key mounted in API pod" "ssh wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-api -- ls /etc/repair-ssh/id_ed25519'" "id_ed25519"
check "SSH health 110" "ssh wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-api -- ssh -i /etc/repair-ssh/id_ed25519 -o StrictHostKeyChecking=no -o BatchMode=yes wooo@192.168.0.110 health'" "REPAIR_BOT_HEALTHY"
check "SSH health 110" "ssh wooo@192.168.0.120 'kubectl exec -n awoooi-prod deploy/awoooi-api -- ssh -i /etc/repair-ssh/id_ed25519 -o StrictHostKeyChecking=accept-new -o BatchMode=yes wooo@192.168.0.110 health'" "REPAIR_BOT_HEALTHY"
echo ""
echo "=== 結果: ${PASS} 通過, ${FAIL} 失敗 ==="

View File

@@ -227,7 +227,7 @@ git commit -m "feat(api): URI scheme 解析器 + Shell Injection 防護 (Sprint
# 安全說明:
# - known_hosts 存 K8s Secret掛載至 /etc/repair-ssh/known_hosts
# - SSH 命令使用 -o UserKnownHostsFile=/etc/repair-ssh/known_hosts
# - 移除 -o StrictHostKeyChecking=no (安全漏洞)
# - 移除關閉 SSH host key 驗證的參數 (安全漏洞)
apiVersion: v1
kind: Secret
metadata:
@@ -545,14 +545,14 @@ def validate_ansible_playbook(playbook_name: str) -> None:
)
```
- [ ] **Step 5: 修正 `_ssh_execute` — 移除 StrictHostKeyChecking=no,改用 known_hosts**
- [ ] **Step 5: 修正 `_ssh_execute` — 移除關閉 SSH host key 驗證的參數,改用 known_hosts**
將現有的 `_ssh_execute` 方法中的 SSH 呼叫從:
```python
"ssh",
"-i", key_path,
"-o", "StrictHostKeyChecking=no",
"-o", "StrictHostKeyChecking=accept-new",
"-o", "BatchMode=yes",
"-o", f"ConnectTimeout={SSH_TIMEOUT}",
```

View File

@@ -323,34 +323,34 @@ git commit -m "feat(events): pydantic v2 event schema with 7 types"
from aider_watch.redactor import redact
def test_openrouter_key_redacted():
s = "failed with sk-or-v1-8ad9d715327496e71e30d1e50cc00903a1ece23f0"
assert "sk-or-v1-8ad9" not in redact(s)
s = "failed with sk-or-example-redaction-test-token"
assert "sk-or-example" not in redact(s)
assert "<redacted:openrouter>" in redact(s)
def test_github_token_redacted():
assert "<redacted:github>" in redact("ghp_abcdef0123456789ABCDEFghijklmnopqrst")
assert "<redacted:github>" in redact("ghp_example_redaction_test_token")
def test_google_api_key_redacted():
assert "<redacted:google>" in redact("AIzaSyABCDEFGHIJKLMNOPQRSTUVWXYZ1234567")
assert "<redacted:google>" in redact("AIzaSy-example-redaction-test-key")
def test_openai_key_redacted():
assert "<redacted:openai>" in redact("sk-abcdEFGH1234567890abcdEFGH1234567890abcdEFGH12")
assert "<redacted:openai>" in redact("sk-example-redaction-test-token")
def test_anthropic_key_redacted():
assert "<redacted:anthropic>" in redact("sk-ant-api03-abcDEF_123-xyz")
def test_telegram_bot_token_redacted():
assert "<redacted:telegram>" in redact("111222333:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
assert "<redacted:telegram>" in redact("<TELEGRAM_BOT_TOKEN_SAMPLE>")
def test_clean_text_passthrough():
s = "hello world, nothing secret here"
assert redact(s) == s
def test_dict_recursive():
d = {"msg": "token=ghp_abcdef0123456789ABCDEFghijklmnopqrst",
"nested": {"tg": "111222333:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"}}
d = {"msg": "token=ghp_example_redaction_test_token",
"nested": {"tg": "<TELEGRAM_BOT_TOKEN_SAMPLE>"}}
out = redact(d)
assert "ghp_abcdef" not in str(out)
assert "ghp_example" not in str(out)
assert "<redacted:github>" in out["msg"]
assert "<redacted:telegram>" in out["nested"]["tg"]
```
@@ -522,10 +522,10 @@ def test_format_session_end():
def test_format_redacts_secrets():
ev = ErrorEvent(ts=_ts(), session_id="s1",
payload={"kind": "api_auth", "message": "bad key sk-or-v1-abcdef0123456789ABCDEFghijklmnopqrstuv",
payload={"kind": "api_auth", "message": "bad key sk-or-example-redaction-test-token",
"context_50chars": ""})
m = format_event(ev)
assert "sk-or-v1-abcdef" not in m
assert "sk-or-example" not in m
assert "<redacted:" in m
```
@@ -1861,7 +1861,7 @@ git commit -m "feat(install): one-shot installer"
cat > ~/.aider-watch.env <<'EOF'
# aider-watch secrets | 2026-04-19 @ Asia/Taipei
AIDER_WATCH_DATABASE_URL=postgresql://aider_watch:<Task6產生的PW>@192.168.0.188:5432/aider_watch
AIDER_WATCH_TELEGRAM_TOKEN=111222333:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AIDER_WATCH_TELEGRAM_TOKEN=<TELEGRAM_BOT_TOKEN_FROM_OWNER_SECRET_STORE>
AIDER_WATCH_TELEGRAM_CHAT_ID=5619078117
AIDER_WATCH_HOSTNAME=ogt-mac
EOF

View File

@@ -176,7 +176,7 @@ from apps.api.src.utils.secret_redactor import redact
def test_openrouter_key_redacted():
assert "<redacted:openrouter>" in redact("sk-or-v1-abcdef0123456789ABCDEFghijklmnopqrstuv")
assert "<redacted:openrouter>" in redact("sk-or-example-redaction-test-token")
def test_anthropic_key_redacted():
@@ -184,15 +184,15 @@ def test_anthropic_key_redacted():
def test_github_token_redacted():
assert "<redacted:github>" in redact("ghp_abcdef0123456789ABCDEFghijklmnopqrst")
assert "<redacted:github>" in redact("ghp_example_redaction_test_token")
def test_google_key_redacted():
assert "<redacted:google>" in redact("AIzaSyABCDEFGHIJKLMNOPQRSTUVWXYZ1234567")
assert "<redacted:google>" in redact("AIzaSy-example-redaction-test-key")
def test_telegram_bot_token_redacted():
assert "<redacted:telegram>" in redact("111222333:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
assert "<redacted:telegram>" in redact("<TELEGRAM_BOT_TOKEN_SAMPLE>")
def test_aws_key_redacted():
@@ -204,9 +204,9 @@ def test_clean_passthrough():
def test_nested_dict():
d = {"a": "ghp_abcdef0123456789ABCDEFghijklmnopqrst", "b": {"c": "AIzaSyABCDEFGHIJKLMNOPQRSTUVWXYZ1234567"}}
d = {"a": "ghp_example_redaction_test_token", "b": {"c": "AIzaSy-example-redaction-test-key"}}
out = redact(d)
assert "ghp_abc" not in str(out)
assert "ghp_example" not in str(out)
assert "AIzaSy" not in str(out)
```

View File

@@ -68,7 +68,7 @@
### #1 Velero MinIO 密碼明文已進 git history
- **位置**`k8s/velero/01-credentials.yaml:13-14`commit `eea6e3ac`
- **內容**`aws_access_key_id=minio_admin` / `aws_secret_access_key=Minio_Velero_2026!`
- **內容**`aws_access_key_id=<MINIO_ACCESS_KEY>` / `aws_secret_access_key=<MINIO_SECRET_KEY>`;舊版曾保存明文,已改用 placeholder 記錄
- **後果**:拿到 git repo含 GitHub mirror即可刪/竄改所有 Velero 備份 → DR 全崩
- **修復**:① 立即輪換 MinIO root + Velero key`.gitignore``*-credentials.yaml`;③ `git filter-repo` 擦歷史;④ 改 SealedSecret/ExternalSecret

View File

@@ -71,7 +71,7 @@ services:
- "3002:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=WoooTech2026
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:?set_GRAFANA_ADMIN_PASSWORD_from_owner_secret_store}
- GF_SERVER_ROOT_URL=http://192.168.0.110:3002
- TZ=Asia/Taipei
volumes:

View File

@@ -62,8 +62,17 @@ def run_ssh_command(host: str, user: str, command: str) -> tuple[bool, str]:
"""執行 SSH 命令"""
try:
result = subprocess.run(
["ssh", "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no",
f"{user}@{host}", command],
[
"ssh",
"-o",
"BatchMode=yes",
"-o",
"ConnectTimeout=5",
"-o",
"StrictHostKeyChecking=accept-new",
f"{user}@{host}",
command,
],
capture_output=True,
text=True,
timeout=30