fix: align deploy health checks with live endpoint
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
This commit is contained in:
@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.676"
|
||||
SYSTEM_VERSION = "V10.677"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ SERVICE=$(echo "$QUERY_STRING" | sed -n 's/.*service=\([^&]*\).*/\1/p')
|
||||
|
||||
# 定義服務健康檢查 URL
|
||||
declare -A HEALTH_URLS=(
|
||||
["momo-uat"]="https://mo.wooo.work/health"
|
||||
["momo-gcp"]="https://momo.wooo.work/health"
|
||||
["momo-live"]="https://mo.wooo.work/health"
|
||||
["momo-prod"]="https://mo.wooo.work/health"
|
||||
["gitlab"]="http://127.0.0.1:8929/"
|
||||
["registry"]="http://127.0.0.1:5002/v2/"
|
||||
["n8n"]="http://127.0.0.1:5678/"
|
||||
|
||||
@@ -27,9 +27,9 @@ check_service() {
|
||||
echo '{"services": {'
|
||||
|
||||
# 核心服務
|
||||
check_service "momo-uat" "https://mo.wooo.work/health"
|
||||
check_service "momo-live" "https://mo.wooo.work/health"
|
||||
echo ","
|
||||
check_service "momo-gcp" "https://momo.wooo.work/health"
|
||||
check_service "momo-prod" "https://mo.wooo.work/health"
|
||||
echo ","
|
||||
|
||||
# 開發工具
|
||||
|
||||
@@ -119,7 +119,7 @@ scrape_configs:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
- https://momo.wooo.work/health
|
||||
- https://mo.wooo.work/health
|
||||
labels:
|
||||
env: 'prod'
|
||||
probe_type: 'http'
|
||||
@@ -268,24 +268,6 @@ scrape_configs:
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
- job_name: 'blackbox-dns-momo'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [dns_check_momo]
|
||||
static_configs:
|
||||
- targets:
|
||||
- 8.8.8.8 # Google DNS - momo.wooo.work
|
||||
labels:
|
||||
domain: 'momo.wooo.work'
|
||||
probe_type: 'dns'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:9115
|
||||
|
||||
# ===========================================================================
|
||||
# 監控系統自身
|
||||
# ===========================================================================
|
||||
|
||||
@@ -753,3 +753,4 @@ POSTGRES_HOST=momo-db
|
||||
| 2026-06-25 | 共用成長流程列手機版不可溢出畫面 | V10.674 起全站 `momo-growth-rail` 在手機寬度改為換行呈現,避免「評估 / 分析 / 建議 / 解法 / 治理」流程 chip 超出視覺邊界。 |
|
||||
| 2026-06-25 | 匯入、缺貨、設定與通知模板頁不可外露 SQL / 資料表 / 模板代碼 | V10.675 起匯入 job API 對前台回傳白話處置訊息,保留 raw error 於 DB/log;自動匯入失敗通知不再顯示 `psycopg2`、`daily_sales_snapshot`、`snapshot_date` 等內部字串;缺貨首頁、系統設定與通知模板列表改用營運語言,並補上逾時匯入任務重置與取消 API。 |
|
||||
| 2026-06-25 | 觀測台入口與通知預覽不可用工程主語干擾營運判讀 | V10.676 起觀測台導覽統一使用「AI 分工矩陣」,通知模板列表會把 K8s/Pod/資料庫/CI Pipeline 等內部詞轉成服務健康、資料連線與部署流程;主機健康事件與自癒劇本改顯示任務/問題/處置提醒,不直接露 `unknown_task`、`scheduler_task_failure`、`CODE_FIX` 等 raw code。 |
|
||||
| 2026-06-25 | 部署監控不得用退役正式域名判定失敗 | V10.677 起 CI/CD 狀態 API 與 active blackbox 監控預設以 `PUBLIC_URL` / `PROD_BASE_URL` 對齊現行正式入口 `https://mo.wooo.work/health`,不再把 `momo.wooo.work` timeout 判成正式部署失敗;Webcrumbs loader fallback 也改為資訊級降級訊號,避免健康頁與 log 產生假紅燈。 |
|
||||
|
||||
@@ -11,6 +11,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
cicd_bp = Blueprint('cicd', __name__)
|
||||
cicd_log = logging.getLogger('cicd_routes')
|
||||
@@ -97,14 +98,41 @@ if not GITLAB_ENABLED:
|
||||
)
|
||||
|
||||
# 環境配置
|
||||
def _normalize_base_url(value, fallback='https://mo.wooo.work'):
|
||||
candidate = (value or '').strip().rstrip('/') or fallback
|
||||
parsed = urlparse(candidate)
|
||||
if parsed.scheme not in {'http', 'https'} or not parsed.netloc:
|
||||
return fallback.rstrip('/')
|
||||
return candidate
|
||||
|
||||
|
||||
def _health_endpoint_for(base_url):
|
||||
return f"{base_url.rstrip('/')}/health"
|
||||
|
||||
|
||||
PUBLIC_BASE_URL = _normalize_base_url(os.getenv('PUBLIC_URL'), 'https://mo.wooo.work')
|
||||
CICD_UAT_BASE_URL = _normalize_base_url(
|
||||
os.getenv('CICD_UAT_BASE_URL')
|
||||
or os.getenv('MOMO_BASE_URL')
|
||||
or PUBLIC_BASE_URL,
|
||||
PUBLIC_BASE_URL,
|
||||
)
|
||||
CICD_PROD_BASE_URL = _normalize_base_url(
|
||||
os.getenv('CICD_PROD_BASE_URL')
|
||||
or os.getenv('PROD_BASE_URL')
|
||||
or PUBLIC_BASE_URL,
|
||||
PUBLIC_BASE_URL,
|
||||
)
|
||||
|
||||
|
||||
ENVIRONMENTS = {
|
||||
'uat': {
|
||||
'name': 'UAT',
|
||||
'label': '測試環境',
|
||||
'name': 'LIVE',
|
||||
'label': '線上入口',
|
||||
'color': '#3498db',
|
||||
'icon': '🟦',
|
||||
'url': 'https://mo.wooo.work',
|
||||
'health_endpoint': 'https://mo.wooo.work/health',
|
||||
'url': CICD_UAT_BASE_URL,
|
||||
'health_endpoint': _health_endpoint_for(CICD_UAT_BASE_URL),
|
||||
'runtime_host': '192.168.0.188'
|
||||
},
|
||||
'prod': {
|
||||
@@ -112,12 +140,21 @@ ENVIRONMENTS = {
|
||||
'label': '正式環境',
|
||||
'color': '#e74c3c',
|
||||
'icon': '🟥',
|
||||
'url': 'https://momo.wooo.work',
|
||||
'health_endpoint': 'https://momo.wooo.work/health',
|
||||
'url': CICD_PROD_BASE_URL,
|
||||
'health_endpoint': _health_endpoint_for(CICD_PROD_BASE_URL),
|
||||
'runtime_host': '192.168.0.188'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _public_health_error(exc):
|
||||
text = str(exc or '').lower()
|
||||
if 'timeout' in text or 'timed out' in text:
|
||||
return '健康檢查逾時,請確認正式入口、Nginx 與 188 應用容器狀態。'
|
||||
if 'connection' in text or 'refused' in text or 'max retries' in text:
|
||||
return '健康檢查無法連線,請確認正式入口、Nginx 與 188 應用容器狀態。'
|
||||
return '健康檢查暫時無法完成,請稍後重試或查看部署診斷。'
|
||||
|
||||
# =============================================================================
|
||||
# 部署監控頁面
|
||||
# =============================================================================
|
||||
@@ -171,7 +208,7 @@ def get_cicd_status():
|
||||
'type': 'environment',
|
||||
'environment': env_id,
|
||||
'message': f"{env_status.get('name')} 環境異常",
|
||||
'error': env_status.get('error'),
|
||||
'error': env_status.get('display_error') or env_status.get('error'),
|
||||
'severity': 'critical',
|
||||
'auto_fixable': True,
|
||||
'fix_action': 'diagnose'
|
||||
@@ -259,7 +296,7 @@ def get_pipeline_detail(pipeline_id):
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
'error': '部署監控暫時無法完成,請稍後重試或查看服務健康狀態。'
|
||||
}), 500
|
||||
|
||||
|
||||
@@ -448,7 +485,7 @@ def run_diagnosis(env):
|
||||
diagnosis['checks'].append({
|
||||
'name': '健康端點',
|
||||
'status': 'failed',
|
||||
'error': str(e)
|
||||
'error': _public_health_error(e)
|
||||
})
|
||||
|
||||
# EwoooC 已撤除舊叢集 runtime,這裡只保留現行 Docker Compose 狀態說明。
|
||||
@@ -473,7 +510,7 @@ def run_diagnosis(env):
|
||||
diagnosis['checks'].append({
|
||||
'name': 'Registry',
|
||||
'status': 'failed',
|
||||
'error': str(e)
|
||||
'error': '映像倉庫健康檢查暫時無法完成,請查看 Registry 服務狀態。'
|
||||
})
|
||||
|
||||
# 生成總結
|
||||
@@ -501,7 +538,7 @@ def run_diagnosis(env):
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
diagnosis['error'] = str(e)
|
||||
diagnosis['error'] = '部署診斷暫時無法完成,請稍後重試。'
|
||||
|
||||
return diagnosis
|
||||
|
||||
@@ -877,7 +914,13 @@ def get_environment_status(env_id):
|
||||
status['response_time'] = round(response_time, 2)
|
||||
status['last_check'] = datetime.now().isoformat()
|
||||
except Exception as e:
|
||||
status['error'] = str(e)
|
||||
cicd_log.warning(
|
||||
"[CI/CD] Health check failed env=%s url=%s error=%s",
|
||||
env_id,
|
||||
env_config.get('health_endpoint'),
|
||||
e,
|
||||
)
|
||||
status['error'] = _public_health_error(e)
|
||||
|
||||
status['runtime_note'] = 'Docker Compose on 192.168.0.188; legacy cluster probes disabled.'
|
||||
|
||||
|
||||
@@ -217,15 +217,20 @@ def webcrumbs_asset_proxy(asset_path):
|
||||
try:
|
||||
upstream_response = requests.get(upstream_url, timeout=(2, 8))
|
||||
except requests.RequestException as exc:
|
||||
sys_log.warning(f"[Webcrumbs] Asset proxy failed: {normalized_path} -> {exc}")
|
||||
if normalized_path == WEBCRUMBS_COMPATIBLE_LOADER_PATH:
|
||||
sys_log.info("[Webcrumbs] Loader upstream unavailable; serving local fallback")
|
||||
return _webcrumbs_fallback_loader_response('upstream-unavailable')
|
||||
sys_log.warning(f"[Webcrumbs] Asset proxy failed: {normalized_path} -> {exc}")
|
||||
return Response('Webcrumbs asset upstream unavailable', status=502, mimetype='text/plain')
|
||||
|
||||
if upstream_response.status_code >= 400:
|
||||
sys_log.warning(f"[Webcrumbs] Asset proxy returned {upstream_response.status_code}: {normalized_path}")
|
||||
if normalized_path == WEBCRUMBS_COMPATIBLE_LOADER_PATH:
|
||||
sys_log.info(
|
||||
"[Webcrumbs] Loader upstream returned %s; serving local fallback",
|
||||
upstream_response.status_code,
|
||||
)
|
||||
return _webcrumbs_fallback_loader_response(f'upstream-{upstream_response.status_code}')
|
||||
sys_log.warning(f"[Webcrumbs] Asset proxy returned {upstream_response.status_code}: {normalized_path}")
|
||||
return Response('Webcrumbs asset upstream returned error', status=upstream_response.status_code, mimetype='text/plain')
|
||||
|
||||
content_type = upstream_response.headers.get('Content-Type')
|
||||
|
||||
@@ -97,7 +97,7 @@ compare_health() {
|
||||
log "💓 比對服務健康狀態..."
|
||||
|
||||
UAT_HEALTH=$(curl -s -o /dev/null -w '%{http_code}' "https://mo.wooo.work/health" 2>/dev/null)
|
||||
GCP_HEALTH=$(curl -s -o /dev/null -w '%{http_code}' "https://momo.wooo.work/health" 2>/dev/null)
|
||||
GCP_HEALTH=$(curl -s -o /dev/null -w '%{http_code}' "https://mo.wooo.work/health" 2>/dev/null)
|
||||
|
||||
log " UAT: ${UAT_HEALTH}"
|
||||
log " GCP: ${GCP_HEALTH}"
|
||||
@@ -228,7 +228,7 @@ main() {
|
||||
|
||||
<b>檢查時間:</b> $(date '+%Y-%m-%d %H:%M:%S')
|
||||
<b>UAT:</b> https://mo.wooo.work
|
||||
<b>GCP:</b> https://momo.wooo.work
|
||||
<b>正式入口:</b> https://mo.wooo.work
|
||||
|
||||
所有配置、版本、服務狀態一致。"
|
||||
fi
|
||||
|
||||
@@ -34,8 +34,7 @@ NC='\033[0m'
|
||||
|
||||
declare -A DOMAINS=(
|
||||
# 核心業務 - 最高優先級
|
||||
["https://mo.wooo.work/health"]="200|MOMO App UAT|kubectl rollout restart deployment/momo-app -n momo|60"
|
||||
["https://momo.wooo.work/health"]="200|MOMO App GCP|gcloud compute ssh momo-pro-gcp --zone=asia-east1-b --command='sudo kubectl rollout restart deployment/momo-app -n momo'|60"
|
||||
["https://mo.wooo.work/health"]="200|MOMO Pro 正式入口|ssh ollama@192.168.0.188 'cd /home/ollama/momo-pro && docker compose up -d --no-deps --force-recreate momo-app'|60"
|
||||
|
||||
# CI/CD 工具 - 高優先級(GitLab 需要更長啟動時間)
|
||||
["http://192.168.0.110:8929/"]="200|GitLab|docker restart wooo-gitlab|120"
|
||||
|
||||
@@ -718,7 +718,7 @@
|
||||
<div class="issue-detail">
|
||||
${issue.type === 'job' ? `<span class="badge bg-secondary me-1">${displayStageName(issue.stage)}</span>` : ''}
|
||||
${issue.type === 'runtime' ? `<span class="badge bg-info me-1">${issue.environment?.toUpperCase()}</span>` : ''}
|
||||
${issue.error ? `<br><code>${escapeHtml(issue.error.substring(0, 100))}</code>` : ''}
|
||||
${issue.error ? `<br><span class="text-muted">${escapeHtml(issue.error.substring(0, 120))}</span>` : ''}
|
||||
</div>
|
||||
${issue.fix_suggestion ? `<div class="issue-suggestion">💡 ${escapeHtml(issue.fix_suggestion)}</div>` : ''}
|
||||
${issue.error_log ? `<div class="error-log-preview">${escapeHtml(issue.error_log.substring(0, 300))}</div>` : ''}
|
||||
|
||||
@@ -85,5 +85,5 @@ def test_compose_prometheus_blackbox_targets_health_only():
|
||||
assert "- http://momo-pro-system:80/health" in uat_block
|
||||
assert "- https://mo.wooo.work\n" not in uat_block
|
||||
assert "- http://192.168.0.110:5001\n" not in uat_block
|
||||
assert "- https://momo.wooo.work/health" in prod_block
|
||||
assert "- https://momo.wooo.work\n" not in prod_block
|
||||
assert "- https://mo.wooo.work/health" in prod_block
|
||||
assert "- https://momo.wooo.work" not in prod_block
|
||||
|
||||
@@ -9,3 +9,4 @@ def test_webcrumbs_loader_has_safe_fallback_response():
|
||||
assert "status=200, mimetype='application/javascript'" in source
|
||||
assert "X-Webcrumbs-Fallback" in source
|
||||
assert "upstream-unavailable" in source
|
||||
assert "Loader upstream unavailable; serving local fallback" in source
|
||||
|
||||
Reference in New Issue
Block a user