From 4e82acc0f50193047415a31748703ece2e9a0492 Mon Sep 17 00:00:00 2001 From: OoO Date: Mon, 4 May 2026 09:02:07 +0800 Subject: [PATCH] =?UTF-8?q?feat(p10)+docs(adr):=20MCP=20=E8=87=AA=E5=BB=BA?= =?UTF-8?q?=20Stack=20docker-compose=20+=20ADR-031?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operation Ollama-First v5.0 / Phase 10 + Phase 12 收尾 docker-compose.mcp.yml — 4+3 容器 MCP stack - postgres-mcp (port 3001): Claude 直連 momo_pro DB read-only RBAC - mcp-omnisearch (3003): Tavily 主 + Exa 備(取代 Gemini Grounding) 避開 Brave(2026-02 取消免費 tier) - firecrawl-self (3002): 自建爬蟲,SPA 反爬蟲 - filesystem-mcp (3004): 跨主機檔案 read-only 護欄 #2 落地(Owen v5.0 鐵律 / ADR-033): firecrawl-self mem_limit:2g + cpus:1.5 PLAYWRIGHT_BROWSER_POOL_MAX=3 chrome-reaper sidecar 每小時清 Chrome zombies 安全設計: - 全部 127.0.0.1 暴露(不外網) - read-only volume mount(filesystem 只能讀) - postgres-mcp RBAC mcp_readonly role 限 SELECT 6 熱表 - API key 全走 env var 不寫死 ADR-031 — MCP 自建 Stack 治理決策 - 取代 Gemini Grounding 唯一通路(多供應商策略) - 預期 70%+ grounding 流量走免費 Tavily - 188 主機資源 +4-5GB RAM 可控 - Migration Plan:6 步驟(含 Tavily/Exa key 申請 + mcp_readonly role 預建) 啟用前置(待統帥): 1. .env 加 TAVILY_API_KEY / EXA_API_KEY / MCP_POSTGRES_PASSWORD / FIRECRAWL_AUTH_KEY 2. momo-db 建 mcp_readonly role + GRANT SELECT 3. ssh wooo@110 → ssh ollama@188 → docker compose -f docker-compose.mcp.yml up -d Co-Authored-By: Claude Opus 4.7 (1M context) --- docker-compose.mcp.yml | 182 ++++++++++++++++++++++ docs/adr/ADR-031-mcp-self-hosted-stack.md | 165 ++++++++++++++++++++ docs/adr/README.md | 1 + 3 files changed, 348 insertions(+) create mode 100644 docker-compose.mcp.yml create mode 100644 docs/adr/ADR-031-mcp-self-hosted-stack.md diff --git a/docker-compose.mcp.yml b/docker-compose.mcp.yml new file mode 100644 index 0000000..05374b9 --- /dev/null +++ b/docker-compose.mcp.yml @@ -0,0 +1,182 @@ +# ============================================================================= +# Operation Ollama-First v5.0 / Phase 10 — MCP 自建 Stack +# ============================================================================= +# 部署位置:188 主機,與既有 momo-pro 容器共存 +# 啟動方式:docker compose -f docker-compose.mcp.yml up -d +# 啟用前置: +# 1. .env 加 TAVILY_API_KEY(Tavily 1000 free credits/月) +# 2. .env 加 EXA_API_KEY(Exa 備援) +# 3. 確認 188 防火牆 allow 172.x docker bridge 對外(firecrawl 抓網頁) +# +# 4 個 MCP server 對應 ADR-031 (Phase 10) 規格: +# - postgres-mcp: Claude 直連 momo_pro DB(read-only RBAC) +# - mcp-omnisearch: 取代 Gemini Grounding,Tavily 主 + Exa 備 +# - firecrawl-self: 自建爬蟲(含 Owen v5.0 護欄 #2 mem_limit:2g + chrome-reaper) +# - filesystem-mcp: 跨主機檔案操作 +# +# 護欄一覽(ADR-033): +# #2 Firecrawl mem_limit:2g + chrome-reaper sidecar + 1.8GB 告警 +# +# 部署後驗收(給統帥): +# curl http://localhost:3001/health # postgres-mcp +# curl http://localhost:3002/health # firecrawl +# curl http://localhost:3003/health # omnisearch +# curl http://localhost:3004/health # filesystem +# ============================================================================= + +services: + + # ───────────────────────────────────────────────────────────────────────── + # postgres-mcp: Claude 直連 momo_pro DB(read-only) + # ───────────────────────────────────────────────────────────────────────── + postgres-mcp: + image: mcp/postgres:latest + container_name: momo-mcp-postgres + restart: unless-stopped + init: true + ports: + - "127.0.0.1:3001:3000" # 僅 localhost 暴露(避免外網直連 DB) + environment: + - POSTGRES_HOST=momo-db + - POSTGRES_PORT=5432 + - POSTGRES_USER=mcp_readonly # 須在 momo-db 預先建 read-only role + - POSTGRES_PASSWORD=${MCP_POSTGRES_PASSWORD} + - POSTGRES_DB=momo_pro + # RBAC:限制 SELECT 到熱表 + - ALLOWED_TABLES=ai_insights,ai_calls,mcp_calls,daily_sales_snapshot,competitor_prices,products + networks: + - momo-shared + deploy: + resources: + limits: + memory: 256m + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3000/health"] + interval: 30s + timeout: 5s + retries: 3 + + # ───────────────────────────────────────────────────────────────────────── + # mcp-omnisearch: 統一搜尋(Tavily 主 + Exa 備) + # ───────────────────────────────────────────────────────────────────────── + mcp-omnisearch: + image: ghcr.io/spences10/mcp-omnisearch:latest + container_name: momo-mcp-omnisearch + restart: unless-stopped + init: true + ports: + - "127.0.0.1:3003:3000" + environment: + # 排除 Brave(2026-02 已取消免費 tier) + - TAVILY_API_KEY=${TAVILY_API_KEY} # 1000 free credits/月(主) + - EXA_API_KEY=${EXA_API_KEY} # 1000 free credits/月(備援) + - SEARCH_PROVIDER_ORDER=tavily,exa # fallback 順序 + deploy: + resources: + limits: + memory: 512m + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3000/health"] + interval: 30s + timeout: 5s + retries: 3 + + # ───────────────────────────────────────────────────────────────────────── + # firecrawl-self: 自建爬蟲(含 Owen v5.0 護欄 #2) + # ───────────────────────────────────────────────────────────────────────── + firecrawl-self: + image: firecrawl/firecrawl:latest + container_name: momo-mcp-firecrawl + restart: unless-stopped + init: true + ports: + - "127.0.0.1:3002:3002" + environment: + - REDIS_URL=redis://firecrawl-redis:6379 + - PLAYWRIGHT_MICROSERVICE_URL=http://firecrawl-playwright:3000 + - PLAYWRIGHT_BROWSER_POOL_MAX=3 # ⭐ 護欄 #2:瀏覽器池上限 + - SCRAPE_TIMEOUT_MS=30000 + - BULL_AUTH_KEY=${FIRECRAWL_AUTH_KEY:-momo-internal-only} + depends_on: + - firecrawl-redis + - firecrawl-playwright + deploy: + resources: + limits: + memory: 2g # ⭐ Owen v5.0 護欄 #2 硬上限 + cpus: '1.5' + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3002/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + firecrawl-redis: + image: redis:7-alpine + container_name: momo-mcp-firecrawl-redis + restart: unless-stopped + deploy: + resources: + limits: + memory: 128m + + firecrawl-playwright: + image: firecrawl/playwright:latest + container_name: momo-mcp-firecrawl-playwright + restart: unless-stopped + deploy: + resources: + limits: + memory: 1.5g + cpus: '1.0' + + # ───────────────────────────────────────────────────────────────────────── + # chrome-reaper: 護欄 #2 — 每小時清 Chrome 殘留 + # ───────────────────────────────────────────────────────────────────────── + chrome-reaper: + image: alpine:3 + container_name: momo-mcp-chrome-reaper + restart: unless-stopped + command: | + sh -c ' + apk add --no-cache docker-cli; + while true; do + echo "[reaper] $(date) cleaning Chrome zombies..."; + docker exec momo-mcp-firecrawl-playwright \ + sh -c "pkill -f \"chrome.*--type=zygote\" 2>/dev/null || true; + pkill -f \"chrome.*--type=renderer\" 2>/dev/null || true" \ + 2>/dev/null || true; + sleep 3600; + done + ' + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + + # ───────────────────────────────────────────────────────────────────────── + # filesystem-mcp: 跨主機檔案操作(限本地) + # ───────────────────────────────────────────────────────────────────────── + filesystem-mcp: + image: mcp/filesystem:latest + container_name: momo-mcp-filesystem + restart: unless-stopped + init: true + ports: + - "127.0.0.1:3004:3000" + environment: + - ALLOWED_PATHS=/data,/logs # 限制存取範圍 + volumes: + - ./data:/data:ro # ⚠️ ro 唯讀,避免 LLM 改檔 + - ./logs:/logs:ro + deploy: + resources: + limits: + memory: 128m + +# ───────────────────────────────────────────────────────────────────────────── +# 與既有 momo-pro 共用 network(讓 postgres-mcp 連 momo-db) +# ───────────────────────────────────────────────────────────────────────────── +networks: + momo-shared: + external: true + name: momo-pro_default # 既有 docker-compose.yml 的 default network diff --git a/docs/adr/ADR-031-mcp-self-hosted-stack.md b/docs/adr/ADR-031-mcp-self-hosted-stack.md new file mode 100644 index 0000000..b4046dd --- /dev/null +++ b/docs/adr/ADR-031-mcp-self-hosted-stack.md @@ -0,0 +1,165 @@ +# ADR-031: MCP 自建 Stack — postgres + omnisearch + firecrawl + filesystem + +- **Status**: Accepted (待 188 deploy 後 Active) +- **Date**: 2026-05-04 +- **Decision Maker**: 統帥 +- **Author**: Operation Ollama-First v5.0 / Phase 10 +- **Related**: ADR-028(LLM 路由統一準則)、ADR-032(RAG 自主學習)、ADR-033(三護欄) + +--- + +## Context + +戰役 v4.0 階段就提案 MCP 自建(取代 Gemini Grounding 唯一聯網能力),但 Phase 10 因 hook 阻擋 SSH 188 deploy,先完成本地 docker-compose.mcp.yml + ADR 設計,待統帥手動 `ssh wooo@110 → ssh ollama@188 → docker compose -f docker-compose.mcp.yml up -d` 啟用。 + +**為何要自建 MCP?** +- mcp_collector_service.py 目前 100% 走 Gemini 2.0 Flash Grounding(鎖定 7 場景之一) +- 若 Gemini API 配額爆 / 政策變更 → 即時情報唯一通路斷 +- 多供應商策略(ADR-030)需要 Tavily / Exa 作為 grounding 備援 +- Claude Code 直連 momo_pro DB(read-only RBAC)能加速統帥日常 SQL 查詢 + +--- + +## Decision — 4 + 3 容器架構 + +### 4 個 MCP server(核心) + +| Server | port | 用途 | 取代誰 | +|---|---|---|---| +| `postgres-mcp` | 127.0.0.1:3001 | Claude 直連 momo_pro DB(read-only) | 統帥手動 SQL | +| `mcp-omnisearch` | 127.0.0.1:3003 | Tavily + Exa 統一搜尋 | Gemini Grounding | +| `firecrawl-self` | 127.0.0.1:3002 | 自建爬蟲(含護欄 #2) | 部分自寫 BeautifulSoup | +| `filesystem-mcp` | 127.0.0.1:3004 | 跨主機檔案操作(read-only) | SSH 跳板手動 cat | + +### 3 個輔助容器 + +| Container | 用途 | +|---|---| +| `firecrawl-redis` | Firecrawl job queue | +| `firecrawl-playwright` | Browser pool(mem 1.5g)| +| `chrome-reaper` | 每小時清 Chrome 殘留(護欄 #2)| + +### 護欄 #2 落地(Owen v5.0 鐵律) + +```yaml +firecrawl-self: + deploy: + resources: + limits: + memory: 2g # ⭐ 硬上限 + cpus: '1.5' + environment: + - PLAYWRIGHT_BROWSER_POOL_MAX=3 + - SCRAPE_TIMEOUT_MS=30000 + healthcheck: + interval: 30s + start_period: 60s + +chrome-reaper: + command: 每小時 pkill chrome zombie processes +``` + +### 安全設計 + +- **僅 127.0.0.1 暴露**:避免外網直連 DB / 爬蟲服務 +- **read-only volume mount**:filesystem-mcp 只能讀 +- **postgres-mcp RBAC**:mcp_readonly role 限 SELECT 到熱表(ai_insights / ai_calls / mcp_calls / daily_sales_snapshot / competitor_prices / products) +- **Tavily/Exa API key 走 env**:不寫死 docker-compose + +### 取代 Gemini Grounding 的 fallback 鏈(mcp_collector_service.py 改造) + +``` +舊: + Gemini 2.0 Grounding → Gemini 1.5 → Ollama → 靜態 + +新: + mcp-omnisearch (Tavily) → omnisearch (Exa) → + 全失敗 → Gemini 2.0 Grounding (保留為 L4) + → Gemini 1.5 → Ollama → 靜態 +``` + +預期 70%+ 流量走免費 Tavily,省 ~70% Gemini Grounding 成本。 + +--- + +## Alternatives Considered + +| 方案 | 否決理由 | +|---|---| +| **A. 維持 Gemini Grounding 唯一** | 單供應商風險(已是 ADR-030 否決理由)| +| **B. 用 Brave Search API** | 2026-02 取消免費 tier(A2 web research 紅燈)| +| **C. 純 Tavily 不要 Firecrawl** | Firecrawl 對 SPA 動態頁更強(蝦皮等 JS-heavy 站)| +| **D. Firecrawl 不限資源** | 188 主機跑 5+ project,OOM 連鎖(reference_188_multi_project)| +| **E. 用 SaaS Firecrawl Cloud** | 成本(自建免費)+ 資料外流風險 | + +--- + +## Consequences + +### 正面(5) +1. **Gemini Grounding 多供應商**:Tavily 主 + Exa 備援,月省 ~70% grounding 成本 +2. **Claude 直連 DB**:統帥日常 SQL 查詢可走 MCP 介面(read-only 安全) +3. **爬蟲自主性**:Firecrawl 取代部分自寫爬蟲,SPA 反爬蟲更強 +4. **零外部 SaaS 依賴**:全部自建在 188(Tavily/Exa 是 API 不是 SaaS) +5. **Owen 護欄 #2 落地**:mem_limit + chrome-reaper 防 OOM + +### 負面(3) +1. **188 主機資源占用 +4-5GB RAM**(Firecrawl 2g + Playwright 1.5g + 其他) +2. **Tavily/Exa API key 維護**:申請 + 月配額追蹤 +3. **mcp_collector_service.py 重構工作量**:~200 行改動 + +### 風險(3) +1. **Firecrawl OOM 連鎖**:mem_limit 2g 觸發 OOM kill → mitigate by healthcheck + restart +2. **Tavily 免費額度(1000/月)爆**:mitigate by Exa 備援 + Gemini Grounding L4 +3. **postgres-mcp RBAC 設置失誤**:mitigate by mcp_readonly role 預先建立 + only SELECT + +--- + +## Verification + +### V1:健康檢查 +```bash +curl http://localhost:3001/health # postgres-mcp +curl http://localhost:3002/health # firecrawl +curl http://localhost:3003/health # omnisearch +curl http://localhost:3004/health # filesystem +# 全部期待 200 OK +``` + +### V2:Firecrawl 資源 +```bash +ssh ollama@192.168.0.188 'docker stats momo-mcp-firecrawl --no-stream' +# 期望 < 1.8GB(mem_limit 2GB 90%) +``` + +### V3:Tavily 配額 +```sql +SELECT COUNT(*) FROM mcp_calls +WHERE server = 'omnisearch' AND tool = 'tavily_search' + AND called_at > date_trunc('month', NOW()); +-- 期望 < 1000(免費額度上限) +``` + +--- + +## Migration Plan + +| 步驟 | 工作 | 狀態 | +|---|---|---| +| 10.1 | docker-compose.mcp.yml 寫完 | ✅ 本 commit | +| 10.2 | ADR-031 撰寫 | ✅ 本 commit | +| 10.3 | 統帥申請 Tavily + Exa API key | ⏳ 待 | +| 10.4 | momo-db 建 mcp_readonly role + GRANT SELECT | ⏳ 待 | +| 10.5 | 188 deploy: docker compose -f docker-compose.mcp.yml up -d | ⏳ 待 | +| 10.6 | mcp_collector_service.py 改用 mcp-omnisearch(取代 Gemini Grounding 主路徑)| ⏳ Phase 10.5 | +| 10.7 | 健康檢查 + Firecrawl mem 監控告警 | ⏳ Phase 10.5 | + +--- + +## References + +- `docker-compose.mcp.yml`(本 commit) +- ADR-028(LLM 路由)/ ADR-030(多供應商)/ ADR-033(護欄) +- `services/mcp_collector_service.py`(將改造) +- A2 web research 報告:`docs/phase0_research_report_20260503.md` +- mcp-omnisearch GitHub:https://github.com/spences10/mcp-omnisearch diff --git a/docs/adr/README.md b/docs/adr/README.md index 8774d24..3a08672 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -52,6 +52,7 @@ | [028](ADR-028-llm-routing-unified-principles.md) | LLM 路由統一準則 — Ollama-First 五大支柱(補述 ADR-027) | Accepted | 2026-05-03 | | [029](ADR-029-hermes-first-twin-tower.md) | Hermes-First 雙塔分工(戰術主塔 / 戰略副塔,Gemini 月支出 -23%) | Accepted | 2026-05-03 | | [030](ADR-030-frontier-multi-vendor-strategy.md) | Frontier 多供應商策略(Anthropic + Google + OpenRouter;Phase 7 Code Review 升 Claude Opus 4.7) | Accepted | 2026-05-03 | +| [031](ADR-031-mcp-self-hosted-stack.md) | MCP 自建 Stack(postgres + omnisearch + firecrawl + filesystem;含 Owen 護欄 #2 Firecrawl 2g 限制) | Accepted | 2026-05-04 | | [032](ADR-032-rag-autonomous-learning-loop.md) | RAG 自主學習迴圈 — Distiller + PromotionGate + 反饋環(Phase 11) | Accepted | 2026-05-03 | | [033](ADR-033-rag-three-guardrails.md) | RAG 治理三護欄 — Promotion Gate / Firecrawl 資源 / BGE-M3 一致性(Owen v5.0 鐵律) | Accepted | 2026-05-03 |