fix: align deploy health checks with live endpoint
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
This commit is contained in:
@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
|||||||
# ==========================================
|
# ==========================================
|
||||||
# 系統版本與路徑
|
# 系統版本與路徑
|
||||||
# ==========================================
|
# ==========================================
|
||||||
SYSTEM_VERSION = "V10.676"
|
SYSTEM_VERSION = "V10.677"
|
||||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||||
public_url = PUBLIC_URL # 用於模板顯示
|
public_url = PUBLIC_URL # 用於模板顯示
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ SERVICE=$(echo "$QUERY_STRING" | sed -n 's/.*service=\([^&]*\).*/\1/p')
|
|||||||
|
|
||||||
# 定義服務健康檢查 URL
|
# 定義服務健康檢查 URL
|
||||||
declare -A HEALTH_URLS=(
|
declare -A HEALTH_URLS=(
|
||||||
["momo-uat"]="https://mo.wooo.work/health"
|
["momo-live"]="https://mo.wooo.work/health"
|
||||||
["momo-gcp"]="https://momo.wooo.work/health"
|
["momo-prod"]="https://mo.wooo.work/health"
|
||||||
["gitlab"]="http://127.0.0.1:8929/"
|
["gitlab"]="http://127.0.0.1:8929/"
|
||||||
["registry"]="http://127.0.0.1:5002/v2/"
|
["registry"]="http://127.0.0.1:5002/v2/"
|
||||||
["n8n"]="http://127.0.0.1:5678/"
|
["n8n"]="http://127.0.0.1:5678/"
|
||||||
|
|||||||
@@ -27,9 +27,9 @@ check_service() {
|
|||||||
echo '{"services": {'
|
echo '{"services": {'
|
||||||
|
|
||||||
# 核心服務
|
# 核心服務
|
||||||
check_service "momo-uat" "https://mo.wooo.work/health"
|
check_service "momo-live" "https://mo.wooo.work/health"
|
||||||
echo ","
|
echo ","
|
||||||
check_service "momo-gcp" "https://momo.wooo.work/health"
|
check_service "momo-prod" "https://mo.wooo.work/health"
|
||||||
echo ","
|
echo ","
|
||||||
|
|
||||||
# 開發工具
|
# 開發工具
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ scrape_configs:
|
|||||||
module: [http_2xx]
|
module: [http_2xx]
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets:
|
- targets:
|
||||||
- https://momo.wooo.work/health
|
- https://mo.wooo.work/health
|
||||||
labels:
|
labels:
|
||||||
env: 'prod'
|
env: 'prod'
|
||||||
probe_type: 'http'
|
probe_type: 'http'
|
||||||
@@ -268,24 +268,6 @@ scrape_configs:
|
|||||||
- target_label: __address__
|
- target_label: __address__
|
||||||
replacement: blackbox-exporter:9115
|
replacement: blackbox-exporter:9115
|
||||||
|
|
||||||
- job_name: 'blackbox-dns-momo'
|
|
||||||
metrics_path: /probe
|
|
||||||
params:
|
|
||||||
module: [dns_check_momo]
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- 8.8.8.8 # Google DNS - momo.wooo.work
|
|
||||||
labels:
|
|
||||||
domain: 'momo.wooo.work'
|
|
||||||
probe_type: 'dns'
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__address__]
|
|
||||||
target_label: __param_target
|
|
||||||
- source_labels: [__param_target]
|
|
||||||
target_label: instance
|
|
||||||
- target_label: __address__
|
|
||||||
replacement: blackbox-exporter:9115
|
|
||||||
|
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
# 監控系統自身
|
# 監控系統自身
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
|
|||||||
@@ -753,3 +753,4 @@ POSTGRES_HOST=momo-db
|
|||||||
| 2026-06-25 | 共用成長流程列手機版不可溢出畫面 | V10.674 起全站 `momo-growth-rail` 在手機寬度改為換行呈現,避免「評估 / 分析 / 建議 / 解法 / 治理」流程 chip 超出視覺邊界。 |
|
| 2026-06-25 | 共用成長流程列手機版不可溢出畫面 | V10.674 起全站 `momo-growth-rail` 在手機寬度改為換行呈現,避免「評估 / 分析 / 建議 / 解法 / 治理」流程 chip 超出視覺邊界。 |
|
||||||
| 2026-06-25 | 匯入、缺貨、設定與通知模板頁不可外露 SQL / 資料表 / 模板代碼 | V10.675 起匯入 job API 對前台回傳白話處置訊息,保留 raw error 於 DB/log;自動匯入失敗通知不再顯示 `psycopg2`、`daily_sales_snapshot`、`snapshot_date` 等內部字串;缺貨首頁、系統設定與通知模板列表改用營運語言,並補上逾時匯入任務重置與取消 API。 |
|
| 2026-06-25 | 匯入、缺貨、設定與通知模板頁不可外露 SQL / 資料表 / 模板代碼 | V10.675 起匯入 job API 對前台回傳白話處置訊息,保留 raw error 於 DB/log;自動匯入失敗通知不再顯示 `psycopg2`、`daily_sales_snapshot`、`snapshot_date` 等內部字串;缺貨首頁、系統設定與通知模板列表改用營運語言,並補上逾時匯入任務重置與取消 API。 |
|
||||||
| 2026-06-25 | 觀測台入口與通知預覽不可用工程主語干擾營運判讀 | V10.676 起觀測台導覽統一使用「AI 分工矩陣」,通知模板列表會把 K8s/Pod/資料庫/CI Pipeline 等內部詞轉成服務健康、資料連線與部署流程;主機健康事件與自癒劇本改顯示任務/問題/處置提醒,不直接露 `unknown_task`、`scheduler_task_failure`、`CODE_FIX` 等 raw code。 |
|
| 2026-06-25 | 觀測台入口與通知預覽不可用工程主語干擾營運判讀 | V10.676 起觀測台導覽統一使用「AI 分工矩陣」,通知模板列表會把 K8s/Pod/資料庫/CI Pipeline 等內部詞轉成服務健康、資料連線與部署流程;主機健康事件與自癒劇本改顯示任務/問題/處置提醒,不直接露 `unknown_task`、`scheduler_task_failure`、`CODE_FIX` 等 raw code。 |
|
||||||
|
| 2026-06-25 | 部署監控不得用退役正式域名判定失敗 | V10.677 起 CI/CD 狀態 API 與 active blackbox 監控預設以 `PUBLIC_URL` / `PROD_BASE_URL` 對齊現行正式入口 `https://mo.wooo.work/health`,不再把 `momo.wooo.work` timeout 判成正式部署失敗;Webcrumbs loader fallback 也改為資訊級降級訊號,避免健康頁與 log 產生假紅燈。 |
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
cicd_bp = Blueprint('cicd', __name__)
|
cicd_bp = Blueprint('cicd', __name__)
|
||||||
cicd_log = logging.getLogger('cicd_routes')
|
cicd_log = logging.getLogger('cicd_routes')
|
||||||
@@ -97,14 +98,41 @@ if not GITLAB_ENABLED:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 環境配置
|
# 環境配置
|
||||||
|
def _normalize_base_url(value, fallback='https://mo.wooo.work'):
|
||||||
|
candidate = (value or '').strip().rstrip('/') or fallback
|
||||||
|
parsed = urlparse(candidate)
|
||||||
|
if parsed.scheme not in {'http', 'https'} or not parsed.netloc:
|
||||||
|
return fallback.rstrip('/')
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
|
||||||
|
def _health_endpoint_for(base_url):
|
||||||
|
return f"{base_url.rstrip('/')}/health"
|
||||||
|
|
||||||
|
|
||||||
|
PUBLIC_BASE_URL = _normalize_base_url(os.getenv('PUBLIC_URL'), 'https://mo.wooo.work')
|
||||||
|
CICD_UAT_BASE_URL = _normalize_base_url(
|
||||||
|
os.getenv('CICD_UAT_BASE_URL')
|
||||||
|
or os.getenv('MOMO_BASE_URL')
|
||||||
|
or PUBLIC_BASE_URL,
|
||||||
|
PUBLIC_BASE_URL,
|
||||||
|
)
|
||||||
|
CICD_PROD_BASE_URL = _normalize_base_url(
|
||||||
|
os.getenv('CICD_PROD_BASE_URL')
|
||||||
|
or os.getenv('PROD_BASE_URL')
|
||||||
|
or PUBLIC_BASE_URL,
|
||||||
|
PUBLIC_BASE_URL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
ENVIRONMENTS = {
|
ENVIRONMENTS = {
|
||||||
'uat': {
|
'uat': {
|
||||||
'name': 'UAT',
|
'name': 'LIVE',
|
||||||
'label': '測試環境',
|
'label': '線上入口',
|
||||||
'color': '#3498db',
|
'color': '#3498db',
|
||||||
'icon': '🟦',
|
'icon': '🟦',
|
||||||
'url': 'https://mo.wooo.work',
|
'url': CICD_UAT_BASE_URL,
|
||||||
'health_endpoint': 'https://mo.wooo.work/health',
|
'health_endpoint': _health_endpoint_for(CICD_UAT_BASE_URL),
|
||||||
'runtime_host': '192.168.0.188'
|
'runtime_host': '192.168.0.188'
|
||||||
},
|
},
|
||||||
'prod': {
|
'prod': {
|
||||||
@@ -112,12 +140,21 @@ ENVIRONMENTS = {
|
|||||||
'label': '正式環境',
|
'label': '正式環境',
|
||||||
'color': '#e74c3c',
|
'color': '#e74c3c',
|
||||||
'icon': '🟥',
|
'icon': '🟥',
|
||||||
'url': 'https://momo.wooo.work',
|
'url': CICD_PROD_BASE_URL,
|
||||||
'health_endpoint': 'https://momo.wooo.work/health',
|
'health_endpoint': _health_endpoint_for(CICD_PROD_BASE_URL),
|
||||||
'runtime_host': '192.168.0.188'
|
'runtime_host': '192.168.0.188'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _public_health_error(exc):
|
||||||
|
text = str(exc or '').lower()
|
||||||
|
if 'timeout' in text or 'timed out' in text:
|
||||||
|
return '健康檢查逾時,請確認正式入口、Nginx 與 188 應用容器狀態。'
|
||||||
|
if 'connection' in text or 'refused' in text or 'max retries' in text:
|
||||||
|
return '健康檢查無法連線,請確認正式入口、Nginx 與 188 應用容器狀態。'
|
||||||
|
return '健康檢查暫時無法完成,請稍後重試或查看部署診斷。'
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# 部署監控頁面
|
# 部署監控頁面
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -171,7 +208,7 @@ def get_cicd_status():
|
|||||||
'type': 'environment',
|
'type': 'environment',
|
||||||
'environment': env_id,
|
'environment': env_id,
|
||||||
'message': f"{env_status.get('name')} 環境異常",
|
'message': f"{env_status.get('name')} 環境異常",
|
||||||
'error': env_status.get('error'),
|
'error': env_status.get('display_error') or env_status.get('error'),
|
||||||
'severity': 'critical',
|
'severity': 'critical',
|
||||||
'auto_fixable': True,
|
'auto_fixable': True,
|
||||||
'fix_action': 'diagnose'
|
'fix_action': 'diagnose'
|
||||||
@@ -259,7 +296,7 @@ def get_pipeline_detail(pipeline_id):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': str(e)
|
'error': '部署監控暫時無法完成,請稍後重試或查看服務健康狀態。'
|
||||||
}), 500
|
}), 500
|
||||||
|
|
||||||
|
|
||||||
@@ -448,7 +485,7 @@ def run_diagnosis(env):
|
|||||||
diagnosis['checks'].append({
|
diagnosis['checks'].append({
|
||||||
'name': '健康端點',
|
'name': '健康端點',
|
||||||
'status': 'failed',
|
'status': 'failed',
|
||||||
'error': str(e)
|
'error': _public_health_error(e)
|
||||||
})
|
})
|
||||||
|
|
||||||
# EwoooC 已撤除舊叢集 runtime,這裡只保留現行 Docker Compose 狀態說明。
|
# EwoooC 已撤除舊叢集 runtime,這裡只保留現行 Docker Compose 狀態說明。
|
||||||
@@ -473,7 +510,7 @@ def run_diagnosis(env):
|
|||||||
diagnosis['checks'].append({
|
diagnosis['checks'].append({
|
||||||
'name': 'Registry',
|
'name': 'Registry',
|
||||||
'status': 'failed',
|
'status': 'failed',
|
||||||
'error': str(e)
|
'error': '映像倉庫健康檢查暫時無法完成,請查看 Registry 服務狀態。'
|
||||||
})
|
})
|
||||||
|
|
||||||
# 生成總結
|
# 生成總結
|
||||||
@@ -501,7 +538,7 @@ def run_diagnosis(env):
|
|||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
diagnosis['error'] = str(e)
|
diagnosis['error'] = '部署診斷暫時無法完成,請稍後重試。'
|
||||||
|
|
||||||
return diagnosis
|
return diagnosis
|
||||||
|
|
||||||
@@ -877,7 +914,13 @@ def get_environment_status(env_id):
|
|||||||
status['response_time'] = round(response_time, 2)
|
status['response_time'] = round(response_time, 2)
|
||||||
status['last_check'] = datetime.now().isoformat()
|
status['last_check'] = datetime.now().isoformat()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
status['error'] = str(e)
|
cicd_log.warning(
|
||||||
|
"[CI/CD] Health check failed env=%s url=%s error=%s",
|
||||||
|
env_id,
|
||||||
|
env_config.get('health_endpoint'),
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
status['error'] = _public_health_error(e)
|
||||||
|
|
||||||
status['runtime_note'] = 'Docker Compose on 192.168.0.188; legacy cluster probes disabled.'
|
status['runtime_note'] = 'Docker Compose on 192.168.0.188; legacy cluster probes disabled.'
|
||||||
|
|
||||||
|
|||||||
@@ -217,15 +217,20 @@ def webcrumbs_asset_proxy(asset_path):
|
|||||||
try:
|
try:
|
||||||
upstream_response = requests.get(upstream_url, timeout=(2, 8))
|
upstream_response = requests.get(upstream_url, timeout=(2, 8))
|
||||||
except requests.RequestException as exc:
|
except requests.RequestException as exc:
|
||||||
sys_log.warning(f"[Webcrumbs] Asset proxy failed: {normalized_path} -> {exc}")
|
|
||||||
if normalized_path == WEBCRUMBS_COMPATIBLE_LOADER_PATH:
|
if normalized_path == WEBCRUMBS_COMPATIBLE_LOADER_PATH:
|
||||||
|
sys_log.info("[Webcrumbs] Loader upstream unavailable; serving local fallback")
|
||||||
return _webcrumbs_fallback_loader_response('upstream-unavailable')
|
return _webcrumbs_fallback_loader_response('upstream-unavailable')
|
||||||
|
sys_log.warning(f"[Webcrumbs] Asset proxy failed: {normalized_path} -> {exc}")
|
||||||
return Response('Webcrumbs asset upstream unavailable', status=502, mimetype='text/plain')
|
return Response('Webcrumbs asset upstream unavailable', status=502, mimetype='text/plain')
|
||||||
|
|
||||||
if upstream_response.status_code >= 400:
|
if upstream_response.status_code >= 400:
|
||||||
sys_log.warning(f"[Webcrumbs] Asset proxy returned {upstream_response.status_code}: {normalized_path}")
|
|
||||||
if normalized_path == WEBCRUMBS_COMPATIBLE_LOADER_PATH:
|
if normalized_path == WEBCRUMBS_COMPATIBLE_LOADER_PATH:
|
||||||
|
sys_log.info(
|
||||||
|
"[Webcrumbs] Loader upstream returned %s; serving local fallback",
|
||||||
|
upstream_response.status_code,
|
||||||
|
)
|
||||||
return _webcrumbs_fallback_loader_response(f'upstream-{upstream_response.status_code}')
|
return _webcrumbs_fallback_loader_response(f'upstream-{upstream_response.status_code}')
|
||||||
|
sys_log.warning(f"[Webcrumbs] Asset proxy returned {upstream_response.status_code}: {normalized_path}")
|
||||||
return Response('Webcrumbs asset upstream returned error', status=upstream_response.status_code, mimetype='text/plain')
|
return Response('Webcrumbs asset upstream returned error', status=upstream_response.status_code, mimetype='text/plain')
|
||||||
|
|
||||||
content_type = upstream_response.headers.get('Content-Type')
|
content_type = upstream_response.headers.get('Content-Type')
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ compare_health() {
|
|||||||
log "💓 比對服務健康狀態..."
|
log "💓 比對服務健康狀態..."
|
||||||
|
|
||||||
UAT_HEALTH=$(curl -s -o /dev/null -w '%{http_code}' "https://mo.wooo.work/health" 2>/dev/null)
|
UAT_HEALTH=$(curl -s -o /dev/null -w '%{http_code}' "https://mo.wooo.work/health" 2>/dev/null)
|
||||||
GCP_HEALTH=$(curl -s -o /dev/null -w '%{http_code}' "https://momo.wooo.work/health" 2>/dev/null)
|
GCP_HEALTH=$(curl -s -o /dev/null -w '%{http_code}' "https://mo.wooo.work/health" 2>/dev/null)
|
||||||
|
|
||||||
log " UAT: ${UAT_HEALTH}"
|
log " UAT: ${UAT_HEALTH}"
|
||||||
log " GCP: ${GCP_HEALTH}"
|
log " GCP: ${GCP_HEALTH}"
|
||||||
@@ -228,7 +228,7 @@ main() {
|
|||||||
|
|
||||||
<b>檢查時間:</b> $(date '+%Y-%m-%d %H:%M:%S')
|
<b>檢查時間:</b> $(date '+%Y-%m-%d %H:%M:%S')
|
||||||
<b>UAT:</b> https://mo.wooo.work
|
<b>UAT:</b> https://mo.wooo.work
|
||||||
<b>GCP:</b> https://momo.wooo.work
|
<b>正式入口:</b> https://mo.wooo.work
|
||||||
|
|
||||||
所有配置、版本、服務狀態一致。"
|
所有配置、版本、服務狀態一致。"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -34,8 +34,7 @@ NC='\033[0m'
|
|||||||
|
|
||||||
declare -A DOMAINS=(
|
declare -A DOMAINS=(
|
||||||
# 核心業務 - 最高優先級
|
# 核心業務 - 最高優先級
|
||||||
["https://mo.wooo.work/health"]="200|MOMO App UAT|kubectl rollout restart deployment/momo-app -n momo|60"
|
["https://mo.wooo.work/health"]="200|MOMO Pro 正式入口|ssh ollama@192.168.0.188 'cd /home/ollama/momo-pro && docker compose up -d --no-deps --force-recreate momo-app'|60"
|
||||||
["https://momo.wooo.work/health"]="200|MOMO App GCP|gcloud compute ssh momo-pro-gcp --zone=asia-east1-b --command='sudo kubectl rollout restart deployment/momo-app -n momo'|60"
|
|
||||||
|
|
||||||
# CI/CD 工具 - 高優先級(GitLab 需要更長啟動時間)
|
# CI/CD 工具 - 高優先級(GitLab 需要更長啟動時間)
|
||||||
["http://192.168.0.110:8929/"]="200|GitLab|docker restart wooo-gitlab|120"
|
["http://192.168.0.110:8929/"]="200|GitLab|docker restart wooo-gitlab|120"
|
||||||
|
|||||||
@@ -718,7 +718,7 @@
|
|||||||
<div class="issue-detail">
|
<div class="issue-detail">
|
||||||
${issue.type === 'job' ? `<span class="badge bg-secondary me-1">${displayStageName(issue.stage)}</span>` : ''}
|
${issue.type === 'job' ? `<span class="badge bg-secondary me-1">${displayStageName(issue.stage)}</span>` : ''}
|
||||||
${issue.type === 'runtime' ? `<span class="badge bg-info me-1">${issue.environment?.toUpperCase()}</span>` : ''}
|
${issue.type === 'runtime' ? `<span class="badge bg-info me-1">${issue.environment?.toUpperCase()}</span>` : ''}
|
||||||
${issue.error ? `<br><code>${escapeHtml(issue.error.substring(0, 100))}</code>` : ''}
|
${issue.error ? `<br><span class="text-muted">${escapeHtml(issue.error.substring(0, 120))}</span>` : ''}
|
||||||
</div>
|
</div>
|
||||||
${issue.fix_suggestion ? `<div class="issue-suggestion">💡 ${escapeHtml(issue.fix_suggestion)}</div>` : ''}
|
${issue.fix_suggestion ? `<div class="issue-suggestion">💡 ${escapeHtml(issue.fix_suggestion)}</div>` : ''}
|
||||||
${issue.error_log ? `<div class="error-log-preview">${escapeHtml(issue.error_log.substring(0, 300))}</div>` : ''}
|
${issue.error_log ? `<div class="error-log-preview">${escapeHtml(issue.error_log.substring(0, 300))}</div>` : ''}
|
||||||
|
|||||||
@@ -85,5 +85,5 @@ def test_compose_prometheus_blackbox_targets_health_only():
|
|||||||
assert "- http://momo-pro-system:80/health" in uat_block
|
assert "- http://momo-pro-system:80/health" in uat_block
|
||||||
assert "- https://mo.wooo.work\n" not in uat_block
|
assert "- https://mo.wooo.work\n" not in uat_block
|
||||||
assert "- http://192.168.0.110:5001\n" not in uat_block
|
assert "- http://192.168.0.110:5001\n" not in uat_block
|
||||||
assert "- https://momo.wooo.work/health" in prod_block
|
assert "- https://mo.wooo.work/health" in prod_block
|
||||||
assert "- https://momo.wooo.work\n" not in prod_block
|
assert "- https://momo.wooo.work" not in prod_block
|
||||||
|
|||||||
@@ -9,3 +9,4 @@ def test_webcrumbs_loader_has_safe_fallback_response():
|
|||||||
assert "status=200, mimetype='application/javascript'" in source
|
assert "status=200, mimetype='application/javascript'" in source
|
||||||
assert "X-Webcrumbs-Fallback" in source
|
assert "X-Webcrumbs-Fallback" in source
|
||||||
assert "upstream-unavailable" in source
|
assert "upstream-unavailable" in source
|
||||||
|
assert "Loader upstream unavailable; serving local fallback" in source
|
||||||
|
|||||||
Reference in New Issue
Block a user