diff --git a/.gitea/workflows/cd.yaml b/.gitea/workflows/cd.yaml index f63c0a41..4a3ab32f 100644 --- a/.gitea/workflows/cd.yaml +++ b/.gitea/workflows/cd.yaml @@ -14,8 +14,12 @@ on: # Production CD is reopened for controlled apply through the dedicated # capacity=1 cd-lane drain verifier. Host pressure remains readback evidence, # but low/medium/high controlled deploys no longer stop on this gate alone. - # Push-triggered CD stays off until runner relocation or non-110 hard limits - # satisfy ops/runner/guard-gitea-runner-pressure.py. + # Push-triggered CD is limited to main and must keep passing + # ops/runner/guard-gitea-runner-pressure.py, which only allows this workflow + # when the controlled drain lane source contract is present. + push: + branches: + - main workflow_dispatch: # 手動觸發永遠可用(用於補跑、緊急部署) diff --git a/ops/runner/guard-gitea-runner-pressure.py b/ops/runner/guard-gitea-runner-pressure.py index c27b5726..7313fe1c 100644 --- a/ops/runner/guard-gitea-runner-pressure.py +++ b/ops/runner/guard-gitea-runner-pressure.py @@ -18,6 +18,25 @@ from pathlib import Path AUTO_BRANCH_EVENTS = {"push", "pull_request", "pull_request_target"} INCIDENT_RUNNER_LABELS = {"awoooi-ubuntu", "awoooi-host"} GENERIC_LABELS = {"ubuntu-latest", "self-hosted"} +CONTROLLED_CD_WORKFLOW = Path(".gitea/workflows/cd.yaml") +CONTROLLED_CD_SERVICE = Path("ops/runner/awoooi-cd-lane-drain.service") +CONTROLLED_CD_WORKFLOW_SNIPPETS = ( + "push:", + "branches:", + "- main", + 'HOST_WEB_BUILD_PRESSURE_WARN_ONLY: "1"', + 'DOCKER_BUILD_LOCK_WARN_ONLY: "1"', + "scripts/ci/wait-host-web-build-pressure.sh", + "contains(github.event.head_commit.message, '[skip ci]')", + "contains(github.event.head_commit.message, 'cancel-stale-cd')", +) +CONTROLLED_CD_SERVICE_SNIPPETS = ( + "Environment=AWOOOI_CD_LANE_CONTROLLED=1", + "CPUQuota=300%", + "MemoryMax=10G", + "TasksMax=1024", + "NoNewPrivileges=true", +) RUNS_ON_RE = re.compile(r"^\s*runs-on:\s*(?P.+?)\s*$") ON_RE = re.compile(r"^on:\s*(?P.*)$") @@ -115,18 +134,41 @@ def parse_workflow(path: Path) -> WorkflowInfo: return WorkflowInfo(path=path, events=parse_events(lines), labels=labels) +def contains_all(path: Path, snippets: tuple[str, ...]) -> bool: + if not path.exists(): + return False + text = path.read_text(encoding="utf-8") + return all(snippet in text for snippet in snippets) + + +def controlled_cd_push_allowed(root: Path, workflow: WorkflowInfo) -> bool: + try: + rel_path = workflow.path.relative_to(root) + except ValueError: + return False + if rel_path != CONTROLLED_CD_WORKFLOW: + return False + if "push" not in workflow.events: + return False + return contains_all(workflow.path, CONTROLLED_CD_WORKFLOW_SNIPPETS) and contains_all( + root / CONTROLLED_CD_SERVICE, + CONTROLLED_CD_SERVICE_SNIPPETS, + ) + + def label_is_generic(label: str) -> bool: if label in GENERIC_LABELS: return True return label.startswith("ubuntu-") -def inspect(workflow_dir: Path) -> tuple[list[WorkflowInfo], list[Violation]]: +def inspect(root: Path, workflow_dir: Path) -> tuple[list[WorkflowInfo], list[Violation]]: workflows = [parse_workflow(path) for path in sorted(workflow_dir.glob("*.y*ml"))] violations: list[Violation] = [] for workflow in workflows: auto_events = workflow.events & AUTO_BRANCH_EVENTS + controlled_cd_allowed = controlled_cd_push_allowed(root, workflow) for line, label in workflow.labels: if label_is_generic(label): violations.append( @@ -138,7 +180,7 @@ def inspect(workflow_dir: Path) -> tuple[list[WorkflowInfo], list[Violation]]: events=workflow.events, ) ) - if auto_events and label in INCIDENT_RUNNER_LABELS: + if auto_events and label in INCIDENT_RUNNER_LABELS and not controlled_cd_allowed: violations.append( Violation( path=workflow.path, @@ -159,7 +201,7 @@ def main() -> int: print(f"GITEA_RUNNER_PRESSURE_GUARD_BLOCKED workflow_dir_missing={workflow_dir}") return 2 - workflows, violations = inspect(workflow_dir) + workflows, violations = inspect(root, workflow_dir) if violations: print("GITEA_RUNNER_PRESSURE_GUARD_BLOCKED") for item in violations: @@ -172,10 +214,17 @@ def main() -> int: return 1 scheduled = sum(1 for workflow in workflows if "schedule" in workflow.events) + controlled_cd_push = sum( + 1 + for workflow in workflows + if controlled_cd_push_allowed(root, workflow) + and "push" in workflow.events + ) print( "GITEA_RUNNER_PRESSURE_GUARD_OK " f"workflow_files={len(workflows)} " f"scheduled_workflows={scheduled} " + f"controlled_cd_push={controlled_cd_push} " "auto_branch_events_on_110=0 " "generic_runner_labels=0" ) diff --git a/ops/runner/test_guard_gitea_runner_pressure.py b/ops/runner/test_guard_gitea_runner_pressure.py new file mode 100644 index 00000000..01cecf9d --- /dev/null +++ b/ops/runner/test_guard_gitea_runner_pressure.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + + +def load_guard_module(): + module_path = Path(__file__).with_name("guard-gitea-runner-pressure.py") + spec = importlib.util.spec_from_file_location("guard_gitea_runner_pressure", module_path) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +def write_workflow(root: Path, body: str) -> None: + workflow_dir = root / ".gitea" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "cd.yaml").write_text(body, encoding="utf-8") + + +def write_controlled_service(root: Path) -> None: + service_dir = root / "ops" / "runner" + service_dir.mkdir(parents=True) + (service_dir / "awoooi-cd-lane-drain.service").write_text( + "\n".join( + ( + "Environment=AWOOOI_CD_LANE_CONTROLLED=1", + "CPUQuota=300%", + "MemoryMax=10G", + "TasksMax=1024", + "NoNewPrivileges=true", + ) + ), + encoding="utf-8", + ) + + +def test_blocks_auto_push_to_incident_runner(tmp_path: Path) -> None: + guard = load_guard_module() + write_workflow( + tmp_path, + """ +on: + push: + branches: + - main +jobs: + deploy: + runs-on: awoooi-host +""", + ) + + _, violations = guard.inspect(tmp_path, tmp_path / ".gitea" / "workflows") + + assert [item.reason for item in violations] == [ + "auto_branch_event_targets_110_incident_runner" + ] + + +def test_allows_controlled_cd_push_contract(tmp_path: Path) -> None: + guard = load_guard_module() + write_workflow( + tmp_path, + """ +on: + push: + branches: + - main + workflow_dispatch: +env: + HOST_WEB_BUILD_PRESSURE_WARN_ONLY: "1" + DOCKER_BUILD_LOCK_WARN_ONLY: "1" +jobs: + deploy: + if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, 'cancel-stale-cd')) }} + runs-on: awoooi-host + steps: + - run: bash scripts/ci/wait-host-web-build-pressure.sh +""", + ) + write_controlled_service(tmp_path) + + _, violations = guard.inspect(tmp_path, tmp_path / ".gitea" / "workflows") + + assert violations == [] + + +def test_blocks_controlled_cd_push_without_service_contract(tmp_path: Path) -> None: + guard = load_guard_module() + write_workflow( + tmp_path, + """ +on: + push: + branches: + - main + workflow_dispatch: +env: + HOST_WEB_BUILD_PRESSURE_WARN_ONLY: "1" + DOCKER_BUILD_LOCK_WARN_ONLY: "1" +jobs: + deploy: + if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, 'cancel-stale-cd')) }} + runs-on: awoooi-host + steps: + - run: bash scripts/ci/wait-host-web-build-pressure.sh +""", + ) + + _, violations = guard.inspect(tmp_path, tmp_path / ".gitea" / "workflows") + + assert [item.reason for item in violations] == [ + "auto_branch_event_targets_110_incident_runner" + ]