#!/usr/bin/env python3 """產生 AWOOOI package / Docker 供應鏈 repo-only baseline。 本工具只掃描 repo 內的 manifest、lockfile、Dockerfile 與 docker-compose 檔案,不安裝套件、不連外、不跑 CVE scan、不讀 secret、不修改 workflow 或 runtime。輸出用於 IwoooS 供應鏈治理的低摩擦證據基線。 """ from __future__ import annotations import argparse import json import re import subprocess from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any try: import tomllib except ModuleNotFoundError: # pragma: no cover - Python 3.10 fallback tomllib = None # type: ignore[assignment] TAIPEI = timezone(timedelta(hours=8)) IGNORED_DIRS = { ".git", ".next", ".turbo", "__pycache__", "node_modules", "test-results", } PACKAGE_JSON_NAMES = {"package.json"} PYPROJECT_NAMES = {"pyproject.toml"} REQUIREMENTS_PATTERN = re.compile(r"requirements(?:[-_.a-zA-Z0-9]*)?\.txt$") DOCKERFILE_PATTERN = re.compile(r"(?:^|/)Dockerfile(?:\.[A-Za-z0-9_.-]+)?$") COMPOSE_PATTERN = re.compile(r"(?:^|/)(?:docker-compose|compose)(?:[A-Za-z0-9_.-]*)?\.ya?ml$") FROM_PATTERN = re.compile(r"^\s*FROM\s+(?:--platform=\S+\s+)?(?P\S+)", re.IGNORECASE) FROM_ALIAS_PATTERN = re.compile(r"\s+AS\s+(?P[A-Za-z0-9_.-]+)\s*$", re.IGNORECASE) COPY_FROM_PATTERN = re.compile(r"^\s*COPY\s+--from=(?P\S+)", re.IGNORECASE) IMAGE_PATTERN = re.compile(r"^\s*image\s*:\s*[\"']?(?P[^\"'#\s]+)", re.IGNORECASE) LOCKFILE_NAMES = { "pnpm-lock.yaml", "package-lock.json", "yarn.lock", "poetry.lock", "uv.lock", "Pipfile.lock", } EXECUTION_BOUNDARIES = { "package_install_authorized": False, "dependency_upgrade_authorized": False, "lockfile_rewrite_authorized": False, "npm_audit_authorized": False, "pip_audit_authorized": False, "cve_scan_authorized": False, "docker_build_authorized": False, "docker_pull_authorized": False, "docker_push_authorized": False, "image_tag_change_authorized": False, "image_digest_pin_change_authorized": False, "registry_login_authorized": False, "secret_value_collection_allowed": False, "workflow_modification_authorized": False, "production_deploy_authorized": False, "runtime_gate_count": 0, "action_button_count": 0, "not_authorization": True, } def should_skip(path: Path) -> bool: return any(part in IGNORED_DIRS for part in path.parts) def git_commit(root: Path) -> str: try: return subprocess.check_output( ["git", "rev-parse", "--short=8", "HEAD"], cwd=root, text=True, stderr=subprocess.DEVNULL, ).strip() except (OSError, subprocess.CalledProcessError): return "unknown" def read_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) def package_manager_from_root(root: Path) -> str: package_json = root / "package.json" if not package_json.exists(): return "unknown" data = read_json(package_json) value = data.get("packageManager") if isinstance(value, str): return value if (root / "pnpm-lock.yaml").exists(): return "pnpm-lock-present" return "unknown" def scan_package_json(root: Path, path: Path) -> dict[str, Any]: data = read_json(path) rel = path.relative_to(root).as_posix() dependency_keys = ["dependencies", "devDependencies", "optionalDependencies", "peerDependencies"] dependency_count = sum(len(data.get(key, {})) for key in dependency_keys if isinstance(data.get(key), dict)) return { "path": rel, "name": data.get("name", "(unnamed)"), "private": data.get("private", False), "package_manager": data.get("packageManager"), "dependency_count": dependency_count, "has_scripts": isinstance(data.get("scripts"), dict) and bool(data.get("scripts")), } def scan_pyproject(root: Path, path: Path) -> dict[str, Any]: text = path.read_text(encoding="utf-8") if tomllib is None: name_match = re.search(r"(?m)^\s*name\s*=\s*[\"'](?P[^\"']+)", text) return { "path": path.relative_to(root).as_posix(), "name": name_match.group("name") if name_match else "(unnamed)", "dependency_count": len(re.findall(r"(?m)^\s*[\"'][^\"']+[\"']\s*,?\s*$", text)), "has_build_system": "[build-system]" in text, } data = tomllib.loads(text) project = data.get("project", {}) poetry = data.get("tool", {}).get("poetry", {}) name = project.get("name") or poetry.get("name") or "(unnamed)" dependencies = project.get("dependencies", []) optional = project.get("optional-dependencies", {}) poetry_deps = poetry.get("dependencies", {}) dependency_count = 0 if isinstance(dependencies, list): dependency_count += len(dependencies) if isinstance(optional, dict): dependency_count += sum(len(value) for value in optional.values() if isinstance(value, list)) if isinstance(poetry_deps, dict): dependency_count += len(poetry_deps) return { "path": path.relative_to(root).as_posix(), "name": name, "dependency_count": dependency_count, "has_build_system": "build-system" in data, } def scan_requirements(root: Path, path: Path) -> dict[str, Any]: lines = path.read_text(encoding="utf-8").splitlines() entries = [ line.strip() for line in lines if line.strip() and not line.lstrip().startswith("#") and not line.lstrip().startswith("-r ") ] pinned = [line for line in entries if "==" in line] return { "path": path.relative_to(root).as_posix(), "entry_count": len(entries), "pinned_entry_count": len(pinned), "unpinned_entry_count": len(entries) - len(pinned), } def scan_dockerfile(root: Path, path: Path) -> dict[str, Any]: images: list[str] = [] copy_from_images: list[str] = [] stage_aliases: set[str] = set() for line in path.read_text(encoding="utf-8").splitlines(): match = FROM_PATTERN.match(line) if match: image = match.group("image") if image not in stage_aliases: images.append(image) alias_match = FROM_ALIAS_PATTERN.search(line) if alias_match: stage_aliases.add(alias_match.group("alias")) continue copy_match = COPY_FROM_PATTERN.match(line) if copy_match: image = copy_match.group("image") if image not in stage_aliases: copy_from_images.append(image) return { "path": path.relative_to(root).as_posix(), "from_images": images, "from_image_count": len(images), "digest_pinned_from_image_count": sum(1 for image in images if "@" in image), "copy_from_images": copy_from_images, "copy_from_image_count": len(copy_from_images), "digest_pinned_copy_from_image_count": sum(1 for image in copy_from_images if "@" in image), } def scan_compose(root: Path, path: Path) -> dict[str, Any]: images: list[str] = [] for line in path.read_text(encoding="utf-8").splitlines(): match = IMAGE_PATTERN.match(line) if match: images.append(match.group("image")) return { "path": path.relative_to(root).as_posix(), "image_refs": images, "image_ref_count": len(images), "digest_pinned_image_ref_count": sum(1 for image in images if "@" in image), } def iter_repo_files(root: Path) -> list[Path]: files: list[Path] = [] for path in root.rglob("*"): if path.is_file() and not should_skip(path.relative_to(root)): files.append(path) return sorted(files) def build_snapshot(root: Path, generated_at: str | None = None) -> dict[str, Any]: generated_at = generated_at or datetime.now(TAIPEI).isoformat(timespec="seconds") files = iter_repo_files(root) package_json = [scan_package_json(root, path) for path in files if path.name in PACKAGE_JSON_NAMES] pyprojects = [scan_pyproject(root, path) for path in files if path.name in PYPROJECT_NAMES] requirements = [scan_requirements(root, path) for path in files if REQUIREMENTS_PATTERN.fullmatch(path.name)] dockerfiles = [ scan_dockerfile(root, path) for path in files if DOCKERFILE_PATTERN.search(path.relative_to(root).as_posix()) ] compose_files = [ scan_compose(root, path) for path in files if COMPOSE_PATTERN.search(path.relative_to(root).as_posix()) ] lockfiles = [ path.relative_to(root).as_posix() for path in files if path.name in LOCKFILE_NAMES ] docker_base_image_count = sum(item["from_image_count"] for item in dockerfiles) docker_base_digest_count = sum(item["digest_pinned_from_image_count"] for item in dockerfiles) docker_copy_from_image_count = sum(item["copy_from_image_count"] for item in dockerfiles) docker_copy_from_digest_count = sum(item["digest_pinned_copy_from_image_count"] for item in dockerfiles) compose_image_count = sum(item["image_ref_count"] for item in compose_files) compose_digest_count = sum(item["digest_pinned_image_ref_count"] for item in compose_files) requirements_entry_count = sum(item["entry_count"] for item in requirements) requirements_unpinned_count = sum(item["unpinned_entry_count"] for item in requirements) gaps = [] if "pnpm-lock.yaml" not in lockfiles: gaps.append("pnpm_lock_missing") if any(path.endswith(("package-lock.json", "yarn.lock")) for path in lockfiles): gaps.append("unexpected_node_lockfile_present") if pyprojects and not any(path.endswith(("poetry.lock", "uv.lock", "Pipfile.lock")) for path in lockfiles): gaps.append("python_lockfile_absent") if docker_base_image_count and docker_base_digest_count < docker_base_image_count: gaps.append("docker_base_images_not_all_digest_pinned") if docker_copy_from_image_count and docker_copy_from_digest_count < docker_copy_from_image_count: gaps.append("docker_copy_from_images_not_all_digest_pinned") if compose_image_count and compose_digest_count < compose_image_count: gaps.append("compose_images_not_all_digest_pinned") if requirements_unpinned_count: gaps.append("requirements_unpinned_entries_present") return { "schema_version": "package_supply_chain_baseline_v1", "status": "repo_only_inventory_ready_needs_owner_policy", "mode": "repo_snapshot_only_no_install_no_network_no_cve_scan", "generated_at": generated_at, "git_commit": git_commit(root), "package_manager": package_manager_from_root(root), "summary": { "package_json_count": len(package_json), "pyproject_count": len(pyprojects), "requirements_file_count": len(requirements), "requirements_entry_count": requirements_entry_count, "requirements_unpinned_entry_count": requirements_unpinned_count, "lockfile_count": len(lockfiles), "pnpm_lock_present": "pnpm-lock.yaml" in lockfiles, "npm_lock_present": any(path.endswith("package-lock.json") for path in lockfiles), "yarn_lock_present": any(path.endswith("yarn.lock") for path in lockfiles), "python_lockfile_count": sum( 1 for path in lockfiles if path.endswith(("poetry.lock", "uv.lock", "Pipfile.lock")) ), "dockerfile_count": len(dockerfiles), "docker_base_image_count": docker_base_image_count, "docker_base_digest_pinned_count": docker_base_digest_count, "docker_copy_from_image_count": docker_copy_from_image_count, "docker_copy_from_digest_pinned_count": docker_copy_from_digest_count, "compose_file_count": len(compose_files), "compose_image_ref_count": compose_image_count, "compose_digest_pinned_image_ref_count": compose_digest_count, "gap_count": len(gaps), "owner_response_received_count": 0, "owner_response_accepted_count": 0, "runtime_gate_count": 0, "action_button_count": 0, }, "package_json_manifests": package_json, "pyproject_manifests": pyprojects, "requirements_files": requirements, "lockfiles": lockfiles, "dockerfiles": dockerfiles, "compose_files": compose_files, "gaps": gaps, "next_owner_evidence_fields": [ "package_manager_policy", "lockfile_owner", "python_lock_policy", "docker_base_image_policy", "compose_image_policy", "registry_owner", "cve_scan_window", "rollback_owner", ], "execution_boundaries": EXECUTION_BOUNDARIES, "operator_interpretation": [ "此 baseline 只代表 repo 供應鏈來源盤點,不代表 CVE / license / SBOM 已驗收。", "Docker image 未全數 digest pinning 是 policy gap,不在本輪自動改 image tag。", "Python lockfile 缺口是 owner policy gap,不在本輪自動產生 lockfile。", "不得把此 snapshot 當成 install、upgrade、docker pull、registry login 或 deploy 授權。", ], } def write_json(path: Path, data: dict[str, Any]) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--root", default=Path(__file__).resolve().parents[2], type=Path, help="Repository root. Defaults to the current script's repository.", ) parser.add_argument("--generated-at", help="Override generated_at for committed snapshots.") parser.add_argument("--output", type=Path, help="Write snapshot JSON to this path.") args = parser.parse_args() root = args.root.resolve() snapshot = build_snapshot(root, generated_at=args.generated_at) if args.output: output = args.output if not output.is_absolute(): output = root / output write_json(output, snapshot) summary = snapshot["summary"] print( "PACKAGE_SUPPLY_CHAIN_BASELINE_OK " f"package_json={summary['package_json_count']} " f"pyproject={summary['pyproject_count']} " f"requirements={summary['requirements_file_count']} " f"dockerfiles={summary['dockerfile_count']} " f"compose={summary['compose_file_count']} " f"gaps={summary['gap_count']} " f"runtime_gate={summary['runtime_gate_count']}" ) if __name__ == "__main__": main()