375 lines
15 KiB
Python
375 lines
15 KiB
Python
#!/usr/bin/env python3
|
||
"""產生 AWOOOI package / Docker 供應鏈 repo-only baseline。
|
||
|
||
本工具只掃描 repo 內的 manifest、lockfile、Dockerfile 與 docker-compose
|
||
檔案,不安裝套件、不連外、不跑 CVE scan、不讀 secret、不修改 workflow 或
|
||
runtime。輸出用於 IwoooS 供應鏈治理的低摩擦證據基線。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import re
|
||
import subprocess
|
||
from datetime import datetime, timedelta, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
try:
|
||
import tomllib
|
||
except ModuleNotFoundError: # pragma: no cover - Python 3.10 fallback
|
||
tomllib = None # type: ignore[assignment]
|
||
|
||
|
||
TAIPEI = timezone(timedelta(hours=8))
|
||
|
||
IGNORED_DIRS = {
|
||
".git",
|
||
".next",
|
||
".turbo",
|
||
"__pycache__",
|
||
"node_modules",
|
||
"test-results",
|
||
}
|
||
|
||
PACKAGE_JSON_NAMES = {"package.json"}
|
||
PYPROJECT_NAMES = {"pyproject.toml"}
|
||
REQUIREMENTS_PATTERN = re.compile(r"requirements(?:[-_.a-zA-Z0-9]*)?\.txt$")
|
||
DOCKERFILE_PATTERN = re.compile(r"(?:^|/)Dockerfile(?:\.[A-Za-z0-9_.-]+)?$")
|
||
COMPOSE_PATTERN = re.compile(r"(?:^|/)(?:docker-compose|compose)(?:[A-Za-z0-9_.-]*)?\.ya?ml$")
|
||
FROM_PATTERN = re.compile(r"^\s*FROM\s+(?:--platform=\S+\s+)?(?P<image>\S+)", re.IGNORECASE)
|
||
FROM_ALIAS_PATTERN = re.compile(r"\s+AS\s+(?P<alias>[A-Za-z0-9_.-]+)\s*$", re.IGNORECASE)
|
||
COPY_FROM_PATTERN = re.compile(r"^\s*COPY\s+--from=(?P<image>\S+)", re.IGNORECASE)
|
||
IMAGE_PATTERN = re.compile(r"^\s*image\s*:\s*[\"']?(?P<image>[^\"'#\s]+)", re.IGNORECASE)
|
||
|
||
LOCKFILE_NAMES = {
|
||
"pnpm-lock.yaml",
|
||
"package-lock.json",
|
||
"yarn.lock",
|
||
"poetry.lock",
|
||
"uv.lock",
|
||
"Pipfile.lock",
|
||
}
|
||
|
||
EXECUTION_BOUNDARIES = {
|
||
"package_install_authorized": False,
|
||
"dependency_upgrade_authorized": False,
|
||
"lockfile_rewrite_authorized": False,
|
||
"npm_audit_authorized": False,
|
||
"pip_audit_authorized": False,
|
||
"cve_scan_authorized": False,
|
||
"docker_build_authorized": False,
|
||
"docker_pull_authorized": False,
|
||
"docker_push_authorized": False,
|
||
"image_tag_change_authorized": False,
|
||
"image_digest_pin_change_authorized": False,
|
||
"registry_login_authorized": False,
|
||
"secret_value_collection_allowed": False,
|
||
"workflow_modification_authorized": False,
|
||
"production_deploy_authorized": False,
|
||
"runtime_gate_count": 0,
|
||
"action_button_count": 0,
|
||
"not_authorization": True,
|
||
}
|
||
|
||
|
||
def should_skip(path: Path) -> bool:
|
||
return any(part in IGNORED_DIRS for part in path.parts)
|
||
|
||
|
||
def git_commit(root: Path) -> str:
|
||
try:
|
||
return subprocess.check_output(
|
||
["git", "rev-parse", "--short=8", "HEAD"],
|
||
cwd=root,
|
||
text=True,
|
||
stderr=subprocess.DEVNULL,
|
||
).strip()
|
||
except (OSError, subprocess.CalledProcessError):
|
||
return "unknown"
|
||
|
||
|
||
def read_json(path: Path) -> dict[str, Any]:
|
||
return json.loads(path.read_text(encoding="utf-8"))
|
||
|
||
|
||
def package_manager_from_root(root: Path) -> str:
|
||
package_json = root / "package.json"
|
||
if not package_json.exists():
|
||
return "unknown"
|
||
data = read_json(package_json)
|
||
value = data.get("packageManager")
|
||
if isinstance(value, str):
|
||
return value
|
||
if (root / "pnpm-lock.yaml").exists():
|
||
return "pnpm-lock-present"
|
||
return "unknown"
|
||
|
||
|
||
def scan_package_json(root: Path, path: Path) -> dict[str, Any]:
|
||
data = read_json(path)
|
||
rel = path.relative_to(root).as_posix()
|
||
dependency_keys = ["dependencies", "devDependencies", "optionalDependencies", "peerDependencies"]
|
||
dependency_count = sum(len(data.get(key, {})) for key in dependency_keys if isinstance(data.get(key), dict))
|
||
return {
|
||
"path": rel,
|
||
"name": data.get("name", "(unnamed)"),
|
||
"private": data.get("private", False),
|
||
"package_manager": data.get("packageManager"),
|
||
"dependency_count": dependency_count,
|
||
"has_scripts": isinstance(data.get("scripts"), dict) and bool(data.get("scripts")),
|
||
}
|
||
|
||
|
||
def scan_pyproject(root: Path, path: Path) -> dict[str, Any]:
|
||
text = path.read_text(encoding="utf-8")
|
||
if tomllib is None:
|
||
name_match = re.search(r"(?m)^\s*name\s*=\s*[\"'](?P<name>[^\"']+)", text)
|
||
return {
|
||
"path": path.relative_to(root).as_posix(),
|
||
"name": name_match.group("name") if name_match else "(unnamed)",
|
||
"dependency_count": len(re.findall(r"(?m)^\s*[\"'][^\"']+[\"']\s*,?\s*$", text)),
|
||
"has_build_system": "[build-system]" in text,
|
||
}
|
||
|
||
data = tomllib.loads(text)
|
||
project = data.get("project", {})
|
||
poetry = data.get("tool", {}).get("poetry", {})
|
||
name = project.get("name") or poetry.get("name") or "(unnamed)"
|
||
dependencies = project.get("dependencies", [])
|
||
optional = project.get("optional-dependencies", {})
|
||
poetry_deps = poetry.get("dependencies", {})
|
||
dependency_count = 0
|
||
if isinstance(dependencies, list):
|
||
dependency_count += len(dependencies)
|
||
if isinstance(optional, dict):
|
||
dependency_count += sum(len(value) for value in optional.values() if isinstance(value, list))
|
||
if isinstance(poetry_deps, dict):
|
||
dependency_count += len(poetry_deps)
|
||
return {
|
||
"path": path.relative_to(root).as_posix(),
|
||
"name": name,
|
||
"dependency_count": dependency_count,
|
||
"has_build_system": "build-system" in data,
|
||
}
|
||
|
||
|
||
def scan_requirements(root: Path, path: Path) -> dict[str, Any]:
|
||
lines = path.read_text(encoding="utf-8").splitlines()
|
||
entries = [
|
||
line.strip()
|
||
for line in lines
|
||
if line.strip() and not line.lstrip().startswith("#") and not line.lstrip().startswith("-r ")
|
||
]
|
||
pinned = [line for line in entries if "==" in line]
|
||
return {
|
||
"path": path.relative_to(root).as_posix(),
|
||
"entry_count": len(entries),
|
||
"pinned_entry_count": len(pinned),
|
||
"unpinned_entry_count": len(entries) - len(pinned),
|
||
}
|
||
|
||
|
||
def scan_dockerfile(root: Path, path: Path) -> dict[str, Any]:
|
||
images: list[str] = []
|
||
copy_from_images: list[str] = []
|
||
stage_aliases: set[str] = set()
|
||
for line in path.read_text(encoding="utf-8").splitlines():
|
||
match = FROM_PATTERN.match(line)
|
||
if match:
|
||
image = match.group("image")
|
||
if image not in stage_aliases:
|
||
images.append(image)
|
||
alias_match = FROM_ALIAS_PATTERN.search(line)
|
||
if alias_match:
|
||
stage_aliases.add(alias_match.group("alias"))
|
||
continue
|
||
copy_match = COPY_FROM_PATTERN.match(line)
|
||
if copy_match:
|
||
image = copy_match.group("image")
|
||
if image not in stage_aliases:
|
||
copy_from_images.append(image)
|
||
return {
|
||
"path": path.relative_to(root).as_posix(),
|
||
"from_images": images,
|
||
"from_image_count": len(images),
|
||
"digest_pinned_from_image_count": sum(1 for image in images if "@" in image),
|
||
"copy_from_images": copy_from_images,
|
||
"copy_from_image_count": len(copy_from_images),
|
||
"digest_pinned_copy_from_image_count": sum(1 for image in copy_from_images if "@" in image),
|
||
}
|
||
|
||
|
||
def scan_compose(root: Path, path: Path) -> dict[str, Any]:
|
||
images: list[str] = []
|
||
for line in path.read_text(encoding="utf-8").splitlines():
|
||
match = IMAGE_PATTERN.match(line)
|
||
if match:
|
||
images.append(match.group("image"))
|
||
return {
|
||
"path": path.relative_to(root).as_posix(),
|
||
"image_refs": images,
|
||
"image_ref_count": len(images),
|
||
"digest_pinned_image_ref_count": sum(1 for image in images if "@" in image),
|
||
}
|
||
|
||
|
||
def iter_repo_files(root: Path) -> list[Path]:
|
||
files: list[Path] = []
|
||
for path in root.rglob("*"):
|
||
if path.is_file() and not should_skip(path.relative_to(root)):
|
||
files.append(path)
|
||
return sorted(files)
|
||
|
||
|
||
def build_snapshot(root: Path, generated_at: str | None = None) -> dict[str, Any]:
|
||
generated_at = generated_at or datetime.now(TAIPEI).isoformat(timespec="seconds")
|
||
files = iter_repo_files(root)
|
||
|
||
package_json = [scan_package_json(root, path) for path in files if path.name in PACKAGE_JSON_NAMES]
|
||
pyprojects = [scan_pyproject(root, path) for path in files if path.name in PYPROJECT_NAMES]
|
||
requirements = [scan_requirements(root, path) for path in files if REQUIREMENTS_PATTERN.fullmatch(path.name)]
|
||
dockerfiles = [
|
||
scan_dockerfile(root, path)
|
||
for path in files
|
||
if DOCKERFILE_PATTERN.search(path.relative_to(root).as_posix())
|
||
]
|
||
compose_files = [
|
||
scan_compose(root, path)
|
||
for path in files
|
||
if COMPOSE_PATTERN.search(path.relative_to(root).as_posix())
|
||
]
|
||
lockfiles = [
|
||
path.relative_to(root).as_posix()
|
||
for path in files
|
||
if path.name in LOCKFILE_NAMES
|
||
]
|
||
|
||
docker_base_image_count = sum(item["from_image_count"] for item in dockerfiles)
|
||
docker_base_digest_count = sum(item["digest_pinned_from_image_count"] for item in dockerfiles)
|
||
docker_copy_from_image_count = sum(item["copy_from_image_count"] for item in dockerfiles)
|
||
docker_copy_from_digest_count = sum(item["digest_pinned_copy_from_image_count"] for item in dockerfiles)
|
||
compose_image_count = sum(item["image_ref_count"] for item in compose_files)
|
||
compose_digest_count = sum(item["digest_pinned_image_ref_count"] for item in compose_files)
|
||
requirements_entry_count = sum(item["entry_count"] for item in requirements)
|
||
requirements_unpinned_count = sum(item["unpinned_entry_count"] for item in requirements)
|
||
|
||
gaps = []
|
||
if "pnpm-lock.yaml" not in lockfiles:
|
||
gaps.append("pnpm_lock_missing")
|
||
if any(path.endswith(("package-lock.json", "yarn.lock")) for path in lockfiles):
|
||
gaps.append("unexpected_node_lockfile_present")
|
||
if pyprojects and not any(path.endswith(("poetry.lock", "uv.lock", "Pipfile.lock")) for path in lockfiles):
|
||
gaps.append("python_lockfile_absent")
|
||
if docker_base_image_count and docker_base_digest_count < docker_base_image_count:
|
||
gaps.append("docker_base_images_not_all_digest_pinned")
|
||
if docker_copy_from_image_count and docker_copy_from_digest_count < docker_copy_from_image_count:
|
||
gaps.append("docker_copy_from_images_not_all_digest_pinned")
|
||
if compose_image_count and compose_digest_count < compose_image_count:
|
||
gaps.append("compose_images_not_all_digest_pinned")
|
||
if requirements_unpinned_count:
|
||
gaps.append("requirements_unpinned_entries_present")
|
||
|
||
return {
|
||
"schema_version": "package_supply_chain_baseline_v1",
|
||
"status": "repo_only_inventory_ready_needs_owner_policy",
|
||
"mode": "repo_snapshot_only_no_install_no_network_no_cve_scan",
|
||
"generated_at": generated_at,
|
||
"git_commit": git_commit(root),
|
||
"package_manager": package_manager_from_root(root),
|
||
"summary": {
|
||
"package_json_count": len(package_json),
|
||
"pyproject_count": len(pyprojects),
|
||
"requirements_file_count": len(requirements),
|
||
"requirements_entry_count": requirements_entry_count,
|
||
"requirements_unpinned_entry_count": requirements_unpinned_count,
|
||
"lockfile_count": len(lockfiles),
|
||
"pnpm_lock_present": "pnpm-lock.yaml" in lockfiles,
|
||
"npm_lock_present": any(path.endswith("package-lock.json") for path in lockfiles),
|
||
"yarn_lock_present": any(path.endswith("yarn.lock") for path in lockfiles),
|
||
"python_lockfile_count": sum(
|
||
1 for path in lockfiles if path.endswith(("poetry.lock", "uv.lock", "Pipfile.lock"))
|
||
),
|
||
"dockerfile_count": len(dockerfiles),
|
||
"docker_base_image_count": docker_base_image_count,
|
||
"docker_base_digest_pinned_count": docker_base_digest_count,
|
||
"docker_copy_from_image_count": docker_copy_from_image_count,
|
||
"docker_copy_from_digest_pinned_count": docker_copy_from_digest_count,
|
||
"compose_file_count": len(compose_files),
|
||
"compose_image_ref_count": compose_image_count,
|
||
"compose_digest_pinned_image_ref_count": compose_digest_count,
|
||
"gap_count": len(gaps),
|
||
"owner_response_received_count": 0,
|
||
"owner_response_accepted_count": 0,
|
||
"runtime_gate_count": 0,
|
||
"action_button_count": 0,
|
||
},
|
||
"package_json_manifests": package_json,
|
||
"pyproject_manifests": pyprojects,
|
||
"requirements_files": requirements,
|
||
"lockfiles": lockfiles,
|
||
"dockerfiles": dockerfiles,
|
||
"compose_files": compose_files,
|
||
"gaps": gaps,
|
||
"next_owner_evidence_fields": [
|
||
"package_manager_policy",
|
||
"lockfile_owner",
|
||
"python_lock_policy",
|
||
"docker_base_image_policy",
|
||
"compose_image_policy",
|
||
"registry_owner",
|
||
"cve_scan_window",
|
||
"rollback_owner",
|
||
],
|
||
"execution_boundaries": EXECUTION_BOUNDARIES,
|
||
"operator_interpretation": [
|
||
"此 baseline 只代表 repo 供應鏈來源盤點,不代表 CVE / license / SBOM 已驗收。",
|
||
"Docker image 未全數 digest pinning 是 policy gap,不在本輪自動改 image tag。",
|
||
"Python lockfile 缺口是 owner policy gap,不在本輪自動產生 lockfile。",
|
||
"不得把此 snapshot 當成 install、upgrade、docker pull、registry login 或 deploy 授權。",
|
||
],
|
||
}
|
||
|
||
|
||
def write_json(path: Path, data: dict[str, Any]) -> None:
|
||
path.parent.mkdir(parents=True, exist_ok=True)
|
||
path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser(description=__doc__)
|
||
parser.add_argument(
|
||
"--root",
|
||
default=Path(__file__).resolve().parents[2],
|
||
type=Path,
|
||
help="Repository root. Defaults to the current script's repository.",
|
||
)
|
||
parser.add_argument("--generated-at", help="Override generated_at for committed snapshots.")
|
||
parser.add_argument("--output", type=Path, help="Write snapshot JSON to this path.")
|
||
args = parser.parse_args()
|
||
|
||
root = args.root.resolve()
|
||
snapshot = build_snapshot(root, generated_at=args.generated_at)
|
||
if args.output:
|
||
output = args.output
|
||
if not output.is_absolute():
|
||
output = root / output
|
||
write_json(output, snapshot)
|
||
|
||
summary = snapshot["summary"]
|
||
print(
|
||
"PACKAGE_SUPPLY_CHAIN_BASELINE_OK "
|
||
f"package_json={summary['package_json_count']} "
|
||
f"pyproject={summary['pyproject_count']} "
|
||
f"requirements={summary['requirements_file_count']} "
|
||
f"dockerfiles={summary['dockerfile_count']} "
|
||
f"compose={summary['compose_file_count']} "
|
||
f"gaps={summary['gap_count']} "
|
||
f"runtime_gate={summary['runtime_gate_count']}"
|
||
)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|