97 lines
3.0 KiB
YAML
97 lines
3.0 KiB
YAML
# =============================================================================
|
|
# AWOOOI Nightly LLM Tests (2026-03-26)
|
|
# =============================================================================
|
|
# 🤖 完整 LLM 測試 - 每日執行 (CPU 推理需 ~45 分鐘)
|
|
#
|
|
# 測試內容:
|
|
# - test_model_regression.py: 模型回歸測試 (4 案例)
|
|
# - test_prompt_validation.py: Prompt 品質驗證 (5 案例)
|
|
#
|
|
# 觸發時機:
|
|
# - 每日 00:00 UTC (08:00 台北)
|
|
# - 手動觸發
|
|
|
|
name: Nightly LLM Tests
|
|
|
|
on:
|
|
schedule:
|
|
- cron: '0 0 * * *' # 每日 00:00 UTC (08:00 台北)
|
|
workflow_dispatch:
|
|
inputs:
|
|
timeout:
|
|
description: 'Timeout per test (seconds)'
|
|
required: false
|
|
default: '300'
|
|
|
|
concurrency:
|
|
group: nightly-llm
|
|
cancel-in-progress: true
|
|
|
|
env:
|
|
PYTHON_VERSION: '3.11'
|
|
OLLAMA_URL: http://192.168.0.188:11434
|
|
OLLAMA_MODEL: qwen2.5:7b-instruct
|
|
|
|
jobs:
|
|
llm-regression:
|
|
name: LLM Regression Tests
|
|
runs-on: [self-hosted, harbor, k8s]
|
|
timeout-minutes: 60 # 1 小時超時
|
|
steps:
|
|
# 2026-03-29 Claude Code: 修復 _diag/pages 檔案衝突
|
|
- name: "Clean Runner Diagnostics"
|
|
run: |
|
|
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
|
rm -rf "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
|
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
|
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Setup Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ env.PYTHON_VERSION }}
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v3
|
|
|
|
- name: Check Ollama
|
|
run: |
|
|
echo "🔗 檢查 Ollama 服務..."
|
|
if curl -s --connect-timeout 10 ${{ env.OLLAMA_URL }}/api/tags > /dev/null; then
|
|
echo "✅ Ollama 可用"
|
|
curl -s ${{ env.OLLAMA_URL }}/api/tags | python3 -c "import sys,json; [print(f' - {m[\"name\"]}') for m in json.load(sys.stdin).get('models',[])]"
|
|
else
|
|
echo "❌ Ollama 無法連線"
|
|
exit 1
|
|
fi
|
|
|
|
- name: Model Regression Tests
|
|
working-directory: apps/api
|
|
env:
|
|
PYTHONPATH: ${{ github.workspace }}/apps/api
|
|
run: |
|
|
echo "🧪 執行模型回歸測試..."
|
|
echo " CPU 模式: 預估 ~15-20 分鐘"
|
|
uv sync
|
|
uv run pytest tests/test_model_regression.py -v --tb=short \
|
|
--timeout=${{ github.event.inputs.timeout || '300' }}
|
|
|
|
- name: Prompt Validation Tests
|
|
working-directory: apps/api
|
|
env:
|
|
PYTHONPATH: ${{ github.workspace }}/apps/api
|
|
run: |
|
|
echo "📝 執行 Prompt 品質驗證..."
|
|
echo " CPU 模式: 預估 ~20-25 分鐘"
|
|
uv run pytest tests/test_prompt_validation.py -v --tb=short \
|
|
--timeout=${{ github.event.inputs.timeout || '300' }}
|
|
|
|
- name: Summary
|
|
if: always()
|
|
run: |
|
|
echo "📊 Nightly LLM 測試完成"
|
|
echo " Ollama: ${{ env.OLLAMA_URL }}"
|
|
echo " Model: ${{ env.OLLAMA_MODEL }}"
|
|
echo " Mode: CPU (no GPU)"
|