Files
awoooi/scripts/security/package-supply-chain-baseline.py

375 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""產生 AWOOOI package / Docker 供應鏈 repo-only baseline。
本工具只掃描 repo 內的 manifest、lockfile、Dockerfile 與 docker-compose
檔案,不安裝套件、不連外、不跑 CVE scan、不讀 secret、不修改 workflow 或
runtime。輸出用於 IwoooS 供應鏈治理的低摩擦證據基線。
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any
try:
import tomllib
except ModuleNotFoundError: # pragma: no cover - Python 3.10 fallback
tomllib = None # type: ignore[assignment]
TAIPEI = timezone(timedelta(hours=8))
IGNORED_DIRS = {
".git",
".next",
".turbo",
"__pycache__",
"node_modules",
"test-results",
}
PACKAGE_JSON_NAMES = {"package.json"}
PYPROJECT_NAMES = {"pyproject.toml"}
REQUIREMENTS_PATTERN = re.compile(r"requirements(?:[-_.a-zA-Z0-9]*)?\.txt$")
DOCKERFILE_PATTERN = re.compile(r"(?:^|/)Dockerfile(?:\.[A-Za-z0-9_.-]+)?$")
COMPOSE_PATTERN = re.compile(r"(?:^|/)(?:docker-compose|compose)(?:[A-Za-z0-9_.-]*)?\.ya?ml$")
FROM_PATTERN = re.compile(r"^\s*FROM\s+(?:--platform=\S+\s+)?(?P<image>\S+)", re.IGNORECASE)
FROM_ALIAS_PATTERN = re.compile(r"\s+AS\s+(?P<alias>[A-Za-z0-9_.-]+)\s*$", re.IGNORECASE)
COPY_FROM_PATTERN = re.compile(r"^\s*COPY\s+--from=(?P<image>\S+)", re.IGNORECASE)
IMAGE_PATTERN = re.compile(r"^\s*image\s*:\s*[\"']?(?P<image>[^\"'#\s]+)", re.IGNORECASE)
LOCKFILE_NAMES = {
"pnpm-lock.yaml",
"package-lock.json",
"yarn.lock",
"poetry.lock",
"uv.lock",
"Pipfile.lock",
}
EXECUTION_BOUNDARIES = {
"package_install_authorized": False,
"dependency_upgrade_authorized": False,
"lockfile_rewrite_authorized": False,
"npm_audit_authorized": False,
"pip_audit_authorized": False,
"cve_scan_authorized": False,
"docker_build_authorized": False,
"docker_pull_authorized": False,
"docker_push_authorized": False,
"image_tag_change_authorized": False,
"image_digest_pin_change_authorized": False,
"registry_login_authorized": False,
"secret_value_collection_allowed": False,
"workflow_modification_authorized": False,
"production_deploy_authorized": False,
"runtime_gate_count": 0,
"action_button_count": 0,
"not_authorization": True,
}
def should_skip(path: Path) -> bool:
return any(part in IGNORED_DIRS for part in path.parts)
def git_commit(root: Path) -> str:
try:
return subprocess.check_output(
["git", "rev-parse", "--short=8", "HEAD"],
cwd=root,
text=True,
stderr=subprocess.DEVNULL,
).strip()
except (OSError, subprocess.CalledProcessError):
return "unknown"
def read_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def package_manager_from_root(root: Path) -> str:
package_json = root / "package.json"
if not package_json.exists():
return "unknown"
data = read_json(package_json)
value = data.get("packageManager")
if isinstance(value, str):
return value
if (root / "pnpm-lock.yaml").exists():
return "pnpm-lock-present"
return "unknown"
def scan_package_json(root: Path, path: Path) -> dict[str, Any]:
data = read_json(path)
rel = path.relative_to(root).as_posix()
dependency_keys = ["dependencies", "devDependencies", "optionalDependencies", "peerDependencies"]
dependency_count = sum(len(data.get(key, {})) for key in dependency_keys if isinstance(data.get(key), dict))
return {
"path": rel,
"name": data.get("name", "(unnamed)"),
"private": data.get("private", False),
"package_manager": data.get("packageManager"),
"dependency_count": dependency_count,
"has_scripts": isinstance(data.get("scripts"), dict) and bool(data.get("scripts")),
}
def scan_pyproject(root: Path, path: Path) -> dict[str, Any]:
text = path.read_text(encoding="utf-8")
if tomllib is None:
name_match = re.search(r"(?m)^\s*name\s*=\s*[\"'](?P<name>[^\"']+)", text)
return {
"path": path.relative_to(root).as_posix(),
"name": name_match.group("name") if name_match else "(unnamed)",
"dependency_count": len(re.findall(r"(?m)^\s*[\"'][^\"']+[\"']\s*,?\s*$", text)),
"has_build_system": "[build-system]" in text,
}
data = tomllib.loads(text)
project = data.get("project", {})
poetry = data.get("tool", {}).get("poetry", {})
name = project.get("name") or poetry.get("name") or "(unnamed)"
dependencies = project.get("dependencies", [])
optional = project.get("optional-dependencies", {})
poetry_deps = poetry.get("dependencies", {})
dependency_count = 0
if isinstance(dependencies, list):
dependency_count += len(dependencies)
if isinstance(optional, dict):
dependency_count += sum(len(value) for value in optional.values() if isinstance(value, list))
if isinstance(poetry_deps, dict):
dependency_count += len(poetry_deps)
return {
"path": path.relative_to(root).as_posix(),
"name": name,
"dependency_count": dependency_count,
"has_build_system": "build-system" in data,
}
def scan_requirements(root: Path, path: Path) -> dict[str, Any]:
lines = path.read_text(encoding="utf-8").splitlines()
entries = [
line.strip()
for line in lines
if line.strip() and not line.lstrip().startswith("#") and not line.lstrip().startswith("-r ")
]
pinned = [line for line in entries if "==" in line]
return {
"path": path.relative_to(root).as_posix(),
"entry_count": len(entries),
"pinned_entry_count": len(pinned),
"unpinned_entry_count": len(entries) - len(pinned),
}
def scan_dockerfile(root: Path, path: Path) -> dict[str, Any]:
images: list[str] = []
copy_from_images: list[str] = []
stage_aliases: set[str] = set()
for line in path.read_text(encoding="utf-8").splitlines():
match = FROM_PATTERN.match(line)
if match:
image = match.group("image")
if image not in stage_aliases:
images.append(image)
alias_match = FROM_ALIAS_PATTERN.search(line)
if alias_match:
stage_aliases.add(alias_match.group("alias"))
continue
copy_match = COPY_FROM_PATTERN.match(line)
if copy_match:
image = copy_match.group("image")
if image not in stage_aliases:
copy_from_images.append(image)
return {
"path": path.relative_to(root).as_posix(),
"from_images": images,
"from_image_count": len(images),
"digest_pinned_from_image_count": sum(1 for image in images if "@" in image),
"copy_from_images": copy_from_images,
"copy_from_image_count": len(copy_from_images),
"digest_pinned_copy_from_image_count": sum(1 for image in copy_from_images if "@" in image),
}
def scan_compose(root: Path, path: Path) -> dict[str, Any]:
images: list[str] = []
for line in path.read_text(encoding="utf-8").splitlines():
match = IMAGE_PATTERN.match(line)
if match:
images.append(match.group("image"))
return {
"path": path.relative_to(root).as_posix(),
"image_refs": images,
"image_ref_count": len(images),
"digest_pinned_image_ref_count": sum(1 for image in images if "@" in image),
}
def iter_repo_files(root: Path) -> list[Path]:
files: list[Path] = []
for path in root.rglob("*"):
if path.is_file() and not should_skip(path.relative_to(root)):
files.append(path)
return sorted(files)
def build_snapshot(root: Path, generated_at: str | None = None) -> dict[str, Any]:
generated_at = generated_at or datetime.now(TAIPEI).isoformat(timespec="seconds")
files = iter_repo_files(root)
package_json = [scan_package_json(root, path) for path in files if path.name in PACKAGE_JSON_NAMES]
pyprojects = [scan_pyproject(root, path) for path in files if path.name in PYPROJECT_NAMES]
requirements = [scan_requirements(root, path) for path in files if REQUIREMENTS_PATTERN.fullmatch(path.name)]
dockerfiles = [
scan_dockerfile(root, path)
for path in files
if DOCKERFILE_PATTERN.search(path.relative_to(root).as_posix())
]
compose_files = [
scan_compose(root, path)
for path in files
if COMPOSE_PATTERN.search(path.relative_to(root).as_posix())
]
lockfiles = [
path.relative_to(root).as_posix()
for path in files
if path.name in LOCKFILE_NAMES
]
docker_base_image_count = sum(item["from_image_count"] for item in dockerfiles)
docker_base_digest_count = sum(item["digest_pinned_from_image_count"] for item in dockerfiles)
docker_copy_from_image_count = sum(item["copy_from_image_count"] for item in dockerfiles)
docker_copy_from_digest_count = sum(item["digest_pinned_copy_from_image_count"] for item in dockerfiles)
compose_image_count = sum(item["image_ref_count"] for item in compose_files)
compose_digest_count = sum(item["digest_pinned_image_ref_count"] for item in compose_files)
requirements_entry_count = sum(item["entry_count"] for item in requirements)
requirements_unpinned_count = sum(item["unpinned_entry_count"] for item in requirements)
gaps = []
if "pnpm-lock.yaml" not in lockfiles:
gaps.append("pnpm_lock_missing")
if any(path.endswith(("package-lock.json", "yarn.lock")) for path in lockfiles):
gaps.append("unexpected_node_lockfile_present")
if pyprojects and not any(path.endswith(("poetry.lock", "uv.lock", "Pipfile.lock")) for path in lockfiles):
gaps.append("python_lockfile_absent")
if docker_base_image_count and docker_base_digest_count < docker_base_image_count:
gaps.append("docker_base_images_not_all_digest_pinned")
if docker_copy_from_image_count and docker_copy_from_digest_count < docker_copy_from_image_count:
gaps.append("docker_copy_from_images_not_all_digest_pinned")
if compose_image_count and compose_digest_count < compose_image_count:
gaps.append("compose_images_not_all_digest_pinned")
if requirements_unpinned_count:
gaps.append("requirements_unpinned_entries_present")
return {
"schema_version": "package_supply_chain_baseline_v1",
"status": "repo_only_inventory_ready_needs_owner_policy",
"mode": "repo_snapshot_only_no_install_no_network_no_cve_scan",
"generated_at": generated_at,
"git_commit": git_commit(root),
"package_manager": package_manager_from_root(root),
"summary": {
"package_json_count": len(package_json),
"pyproject_count": len(pyprojects),
"requirements_file_count": len(requirements),
"requirements_entry_count": requirements_entry_count,
"requirements_unpinned_entry_count": requirements_unpinned_count,
"lockfile_count": len(lockfiles),
"pnpm_lock_present": "pnpm-lock.yaml" in lockfiles,
"npm_lock_present": any(path.endswith("package-lock.json") for path in lockfiles),
"yarn_lock_present": any(path.endswith("yarn.lock") for path in lockfiles),
"python_lockfile_count": sum(
1 for path in lockfiles if path.endswith(("poetry.lock", "uv.lock", "Pipfile.lock"))
),
"dockerfile_count": len(dockerfiles),
"docker_base_image_count": docker_base_image_count,
"docker_base_digest_pinned_count": docker_base_digest_count,
"docker_copy_from_image_count": docker_copy_from_image_count,
"docker_copy_from_digest_pinned_count": docker_copy_from_digest_count,
"compose_file_count": len(compose_files),
"compose_image_ref_count": compose_image_count,
"compose_digest_pinned_image_ref_count": compose_digest_count,
"gap_count": len(gaps),
"owner_response_received_count": 0,
"owner_response_accepted_count": 0,
"runtime_gate_count": 0,
"action_button_count": 0,
},
"package_json_manifests": package_json,
"pyproject_manifests": pyprojects,
"requirements_files": requirements,
"lockfiles": lockfiles,
"dockerfiles": dockerfiles,
"compose_files": compose_files,
"gaps": gaps,
"next_owner_evidence_fields": [
"package_manager_policy",
"lockfile_owner",
"python_lock_policy",
"docker_base_image_policy",
"compose_image_policy",
"registry_owner",
"cve_scan_window",
"rollback_owner",
],
"execution_boundaries": EXECUTION_BOUNDARIES,
"operator_interpretation": [
"此 baseline 只代表 repo 供應鏈來源盤點,不代表 CVE / license / SBOM 已驗收。",
"Docker image 未全數 digest pinning 是 policy gap不在本輪自動改 image tag。",
"Python lockfile 缺口是 owner policy gap不在本輪自動產生 lockfile。",
"不得把此 snapshot 當成 install、upgrade、docker pull、registry login 或 deploy 授權。",
],
}
def write_json(path: Path, data: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--root",
default=Path(__file__).resolve().parents[2],
type=Path,
help="Repository root. Defaults to the current script's repository.",
)
parser.add_argument("--generated-at", help="Override generated_at for committed snapshots.")
parser.add_argument("--output", type=Path, help="Write snapshot JSON to this path.")
args = parser.parse_args()
root = args.root.resolve()
snapshot = build_snapshot(root, generated_at=args.generated_at)
if args.output:
output = args.output
if not output.is_absolute():
output = root / output
write_json(output, snapshot)
summary = snapshot["summary"]
print(
"PACKAGE_SUPPLY_CHAIN_BASELINE_OK "
f"package_json={summary['package_json_count']} "
f"pyproject={summary['pyproject_count']} "
f"requirements={summary['requirements_file_count']} "
f"dockerfiles={summary['dockerfile_count']} "
f"compose={summary['compose_file_count']} "
f"gaps={summary['gap_count']} "
f"runtime_gate={summary['runtime_gate_count']}"
)
if __name__ == "__main__":
main()