Files
awoooi/scripts/ops/seed-repair-playbooks.py
2026-04-05 12:23:55 +08:00

153 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
scripts/ops/seed-repair-playbooks.py
建立 Sprint 3 Host Repair Playbooks
2026-04-05 Claude Code: Sprint 3 Host Auto-Repair
用法: python3 scripts/ops/seed-repair-playbooks.py
需要: AWOOOI API 可訪問 (http://192.168.0.121:32334)
"""
import json
import os
import urllib.request
import urllib.error
API_BASE = os.environ.get("API_BASE", "http://192.168.0.121:32334")
# ssh_command 格式: "layer/component" → auto_repair_service._execute_step 解析
PLAYBOOKS = [
{
"name": "sentry-down-repair",
"description": "Sentry (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["SentryDown"],
"affected_services": ["sentry"],
"keywords": ["SentryDown", "sentry", "9000"],
"label_patterns": {"layer": "docker-110", "component": "sentry"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/sentry",
"description": "SSH 到 110docker compose up -d Sentry",
"risk_level": "LOW",
}
],
"tags": ["sentry", "docker-110", "auto-repair"],
},
{
"name": "harbor-down-repair",
"description": "Harbor Registry (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["HarborDown"],
"affected_services": ["harbor"],
"keywords": ["HarborDown", "harbor", "5000", "ImagePullBackOff"],
"label_patterns": {"layer": "docker-110", "component": "harbor"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/harbor",
"description": "SSH 到 110docker compose up -d Harbor",
"risk_level": "LOW",
}
],
"tags": ["harbor", "docker-110", "auto-repair", "registry"],
},
{
"name": "gitea-down-repair",
"description": "Gitea (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["GiteaDown"],
"affected_services": ["gitea"],
"keywords": ["GiteaDown", "gitea", "3001"],
"label_patterns": {"layer": "docker-110", "component": "gitea"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/gitea",
"description": "SSH 到 110docker compose up -d Gitea",
"risk_level": "LOW",
}
],
"tags": ["gitea", "docker-110", "auto-repair"],
},
{
"name": "alertmanager-down-repair",
"description": "Alertmanager (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["AlertmanagerDown"],
"affected_services": ["alertmanager"],
"keywords": ["AlertmanagerDown", "alertmanager", "9093"],
"label_patterns": {"layer": "docker-110", "component": "alertmanager"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/alertmanager",
"description": "SSH 到 110docker compose up -d monitoring (含 Alertmanager)",
"risk_level": "LOW",
}
],
"tags": ["alertmanager", "docker-110", "auto-repair", "critical-infra"],
},
{
"name": "openclaw-down-repair",
"description": "OpenClaw (188) 離線自動修復",
"symptom_pattern": {
"alert_names": ["OpenClawDown"],
"affected_services": ["openclaw"],
"keywords": ["OpenClawDown", "openclaw", "8088"],
"label_patterns": {"layer": "docker-188", "component": "openclaw"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-188/openclaw",
"description": "SSH 到 188docker compose up -d OpenClaw",
"risk_level": "LOW",
}
],
"tags": ["openclaw", "docker-188", "auto-repair"],
},
]
def create_playbook(playbook_data: dict) -> bool:
"""透過 API 建立 Playbook"""
data = json.dumps(playbook_data).encode()
req = urllib.request.Request(
f"{API_BASE}/api/v1/playbooks/",
data=data,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
result = json.loads(resp.read())
print(f" OK Created: {playbook_data['name']} (id: {result.get('playbook_id', '?')})")
return True
except urllib.error.HTTPError as e:
body = e.read().decode()
if "already exists" in body or e.code == 409:
print(f" -- Already exists: {playbook_data['name']}")
return True
print(f" ER Failed: {playbook_data['name']} -- HTTP {e.code}: {body[:100]}")
return False
except Exception as e:
print(f" ER Error: {playbook_data['name']} -- {e}")
return False
if __name__ == "__main__":
print("=== 建立 Host Repair Playbooks ===")
success = 0
for pb in PLAYBOOKS:
if create_playbook(pb):
success += 1
print(f"\n結果: {success}/{len(PLAYBOOKS)} playbooks 建立成功")