Files
awoooi/scripts/cron_km_vectorize.py
Your Name 8ddb80d63d
All checks were successful
Code Review / ai-code-review (push) Successful in 26s
CD Pipeline / tests (push) Successful in 1m57s
CD Pipeline / build-and-deploy (push) Successful in 6m1s
CD Pipeline / post-deploy-checks (push) Successful in 38s
fix(k8s): pass project context to km vectorize
2026-06-14 08:09:39 +08:00

52 lines
1.6 KiB
Python

#!/usr/bin/env python3
"""
KM Vectorize CronJob 入口腳本 — ADR-073 Phase 4-3
每日由 K8s CronJob 呼叫,對新增 KM 條目執行向量化,
確保 RAG 查詢可存取最新知識(飛輪「學習固化」節點)。
2026-04-12 ogt (ADR-073 Phase 4-3, P2-1 重構)
"""
import asyncio
import os
import sys
import httpx
def _project_headers() -> dict[str, str]:
"""Return internal API project context headers required by fail-closed RLS."""
project_id = os.environ.get("KM_PROJECT_ID", "awoooi").strip()
if not project_id:
project_id = "awoooi"
return {"X-Project-ID": project_id}
async def main() -> int:
api_base = os.environ.get(
"INTERNAL_API_URL",
"http://awoooi-api-svc.awoooi-prod.svc.cluster.local:8000",
)
url = f"{api_base}/api/v1/knowledge/embed-all"
async with httpx.AsyncClient(timeout=1800) as client:
try:
resp = await client.post(url, headers=_project_headers())
print(f"embed-all: {resp.status_code} {resp.text[:200]}")
if resp.status_code >= 400:
print(f"ERROR: embed-all returned {resp.status_code}", file=sys.stderr)
return 1
result = resp.json()
if int(result.get("failed", 0)) > 0:
print(f"ERROR: embed-all failed rows: {result}", file=sys.stderr)
return 1
return 0
except httpx.RequestError as exc:
print(f"ERROR: request failed — {exc}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(asyncio.run(main()))