#!/usr/bin/env python3 # -*- coding: utf-8 -*- """browse.sh CLI 的可選執行 wrapper。 正式爬蟲仍以既有 Python/API client 為準;browse.sh 只用於動態頁面診斷、 selector 探勘與 network trace,避免把外部 CLI 變成 scheduler 的硬依賴。 """ from __future__ import annotations import os import shutil import subprocess from dataclasses import dataclass from typing import Mapping, Sequence BROWSE_SH_CLI_ENV = "BROWSE_SH_CLI" DEFAULT_TIMEOUT_SECONDS = 90 @dataclass(frozen=True) class BrowseShAvailability: available: bool command: tuple[str, ...] reason: str = "" version: str = "" def as_dict(self) -> dict: return { "available": self.available, "command": list(self.command), "reason": self.reason, "version": self.version, } @dataclass(frozen=True) class BrowseShResult: ok: bool command: tuple[str, ...] stdout: str = "" stderr: str = "" returncode: int | None = None timed_out: bool = False unavailable_reason: str = "" def as_dict(self) -> dict: return { "ok": self.ok, "command": list(self.command), "stdout": self.stdout, "stderr": self.stderr, "returncode": self.returncode, "timed_out": self.timed_out, "unavailable_reason": self.unavailable_reason, } class BrowseShTool: """browse CLI 的最小安全包裝。""" def __init__( self, cli_path: str | None = None, timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS, env: Mapping[str, str] | None = None, ) -> None: self.cli_path = cli_path self.timeout_seconds = timeout_seconds self.env = dict(env or {}) def resolve_cli_path(self) -> str | None: override = self.cli_path or os.getenv(BROWSE_SH_CLI_ENV) if override: return override return shutil.which("browse") def build_command(self, args: Sequence[str]) -> tuple[str, ...]: cli_path = self.resolve_cli_path() if not cli_path: return tuple() return (cli_path, *[str(arg) for arg in args]) def availability(self) -> BrowseShAvailability: command = self.build_command(("--version",)) if not command: return BrowseShAvailability( available=False, command=tuple(), reason="browse CLI 未安裝;請先安裝並確認 PATH 可找到 browse。", ) try: completed = subprocess.run( command, capture_output=True, check=False, env={**os.environ, **self.env}, text=True, timeout=8, ) except FileNotFoundError: return BrowseShAvailability(False, command, "browse CLI 路徑不存在。") except subprocess.TimeoutExpired: return BrowseShAvailability(False, command, "browse --version 執行逾時。") except OSError as exc: return BrowseShAvailability(False, command, f"browse CLI 無法啟動:{exc}") stdout = (completed.stdout or "").strip() stderr = (completed.stderr or "").strip() if completed.returncode != 0: reason = stderr or stdout or f"browse --version 回傳 {completed.returncode}" return BrowseShAvailability(False, command, reason) return BrowseShAvailability(True, command, version=stdout or stderr) def run( self, args: Sequence[str], timeout_seconds: int | None = None, require_available: bool = True, ) -> BrowseShResult: command = self.build_command(args) if not command: return BrowseShResult( ok=False, command=tuple(), unavailable_reason="browse CLI 未安裝;此工具只會略過,不影響正式爬蟲。", ) if require_available: availability = self.availability() if not availability.available: return BrowseShResult( ok=False, command=command, unavailable_reason=availability.reason, ) try: completed = subprocess.run( command, capture_output=True, check=False, env={**os.environ, **self.env}, text=True, timeout=timeout_seconds or self.timeout_seconds, ) except subprocess.TimeoutExpired as exc: return BrowseShResult( ok=False, command=command, stdout=exc.stdout or "", stderr=exc.stderr or "", timed_out=True, ) except OSError as exc: return BrowseShResult( ok=False, command=command, stderr=str(exc), unavailable_reason=str(exc), ) return BrowseShResult( ok=completed.returncode == 0, command=command, stdout=completed.stdout or "", stderr=completed.stderr or "", returncode=completed.returncode, ) def run_skill(self, skill_name: str, *skill_args: str, timeout_seconds: int | None = None) -> BrowseShResult: return self.run((skill_name, *skill_args), timeout_seconds=timeout_seconds)