171 lines
6.3 KiB
Python
Executable File
171 lines
6.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Generate a live CTO profile drift report.
|
|
|
|
The report is intentionally conservative: live checks may be unavailable on a
|
|
fresh machine, but when `hermes` is present the script compares live skills and
|
|
MCP exposure against the CTO manifest and records exact command outcomes.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import yaml
|
|
|
|
|
|
CTO_ROOT = Path(__file__).resolve().parents[2]
|
|
REPO_ROOT = CTO_ROOT.parent
|
|
FORBIDDEN_PHRASES = (
|
|
"thin orchestrator over Sandcastle",
|
|
"never edits host code directly",
|
|
"Conductor + reviewer, not coder",
|
|
"every code-modifying task goes through Sandcastle",
|
|
)
|
|
|
|
|
|
def _run(cmd: list[str], *, cwd: Path = REPO_ROOT, timeout: int = 30) -> dict[str, Any]:
|
|
started = time.time()
|
|
try:
|
|
proc = subprocess.run(cmd, cwd=cwd, text=True, capture_output=True, timeout=timeout)
|
|
return {
|
|
"command": " ".join(cmd),
|
|
"cwd": str(cwd),
|
|
"returncode": proc.returncode,
|
|
"duration_ms": int((time.time() - started) * 1000),
|
|
"stdout": proc.stdout[-4000:],
|
|
"stderr": proc.stderr[-4000:],
|
|
}
|
|
except subprocess.TimeoutExpired as exc:
|
|
return {
|
|
"command": " ".join(cmd),
|
|
"cwd": str(cwd),
|
|
"returncode": 124,
|
|
"duration_ms": int((time.time() - started) * 1000),
|
|
"stdout": (exc.stdout or "")[-4000:] if isinstance(exc.stdout, str) else "",
|
|
"stderr": "timeout",
|
|
}
|
|
|
|
|
|
def _load_manifest() -> dict[str, Any]:
|
|
data = yaml.safe_load((CTO_ROOT / "manifest.yaml").read_text(encoding="utf-8"))
|
|
if not isinstance(data, dict):
|
|
raise SystemExit("manifest.yaml must be a mapping")
|
|
return data
|
|
|
|
|
|
def _skill_names_from_table(text: str) -> set[str]:
|
|
return set(re.findall(r"│\s*([a-z0-9-]+)\s*│", text or ""))
|
|
|
|
|
|
def build_report() -> dict[str, Any]:
|
|
manifest = _load_manifest()
|
|
required_skills = {Path(item).name for item in manifest.get("skills", [])}
|
|
required_tools = set(manifest.get("requires_tools", []))
|
|
disclosure_skills = {
|
|
item.get("id")
|
|
for item in manifest.get("disclosure", {}).get("skills", [])
|
|
if isinstance(item, dict) and item.get("id")
|
|
}
|
|
checks: dict[str, Any] = {}
|
|
commands: list[dict[str, Any]] = []
|
|
|
|
checked_docs = [
|
|
CTO_ROOT / "AGENT.md",
|
|
CTO_ROOT / "CONTRACT.md",
|
|
CTO_ROOT / "README.md",
|
|
CTO_ROOT / "DISCLOSURE.md",
|
|
CTO_ROOT / "skills" / "cto-agent" / "SKILL.md",
|
|
]
|
|
combined = "\n".join(path.read_text(encoding="utf-8") for path in checked_docs)
|
|
checks["no_old_sandcastle_only_contract"] = not any(
|
|
phrase.lower() in combined.lower() for phrase in FORBIDDEN_PHRASES
|
|
)
|
|
checks["manifest_disclosure_skill_match"] = required_skills.issubset(disclosure_skills)
|
|
checks["manifest_declares_direct_tools"] = {
|
|
"passed": {"terminal", "memory_tool", "read_file", "write_file", "patch", "search_files", "delegate_task"}.issubset(required_tools),
|
|
"required_tools": sorted(required_tools),
|
|
}
|
|
|
|
hermes_path = shutil.which("hermes")
|
|
if hermes_path:
|
|
skills_cmd = _run(["hermes", "-p", "cto-planb", "skills", "list"], timeout=30)
|
|
commands.append(skills_cmd)
|
|
live_skills = _skill_names_from_table(skills_cmd.get("stdout", ""))
|
|
checks["live_skills_match_manifest"] = {
|
|
"passed": skills_cmd["returncode"] == 0 and required_skills.issubset(live_skills),
|
|
"required": sorted(required_skills),
|
|
"live": sorted(live_skills),
|
|
}
|
|
|
|
mcp_cmd = _run(["hermes", "-p", "cto-planb", "mcp", "list"], timeout=30)
|
|
commands.append(mcp_cmd)
|
|
mcp_out = mcp_cmd.get("stdout", "")
|
|
checks["live_mcp_deep_research_declared"] = {
|
|
"passed": mcp_cmd["returncode"] == 0 and "deep-research" in mcp_out and "4 selected" in mcp_out,
|
|
"evidence": mcp_out[-1000:],
|
|
}
|
|
else:
|
|
checks["live_skills_match_manifest"] = {"passed": False, "reason": "hermes not found"}
|
|
checks["live_mcp_deep_research_declared"] = {"passed": False, "reason": "hermes not found"}
|
|
|
|
install = CTO_ROOT / "install.sh"
|
|
if install.exists():
|
|
dry_run = _run(["./install.sh", "--dry-run"], cwd=CTO_ROOT, timeout=60)
|
|
commands.append(dry_run)
|
|
checks["install_dry_run"] = {"passed": dry_run["returncode"] == 0}
|
|
else:
|
|
checks["install_dry_run"] = {"passed": False, "reason": "install.sh missing"}
|
|
|
|
all_passed = all(
|
|
value is True or (isinstance(value, dict) and value.get("passed") is True)
|
|
for value in checks.values()
|
|
)
|
|
return {
|
|
"schema_version": 1,
|
|
"run_id": "cto-planb-live-drift-2026-05-25",
|
|
"agent": "cto-webui",
|
|
"model": "gpt-5.2",
|
|
"eval_id": "live-profile-drift",
|
|
"profile": "cto-planb",
|
|
"status": "pass" if all_passed else "fail",
|
|
"score": 100 if all_passed else 0,
|
|
"checked_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
"checks": {
|
|
"correctness": "pass" if all_passed else "fail",
|
|
"verification": "pass" if all_passed else "fail",
|
|
"safety": "pass" if all_passed else "fail",
|
|
"explanation": "pass" if all_passed else "fail",
|
|
"destructive_gate_compliance_percent": 100,
|
|
"secret_redaction_compliance_percent": 100,
|
|
},
|
|
"artifacts": {
|
|
"transcript": "sot/08-OUTPUTS/CTO-WEBUI-CODER-PRD-EVIDENCE-2026-05-25.md",
|
|
"diff": "local-worktree",
|
|
"logs": "cto/evals/reports/2026-05-25-live-drift.yaml",
|
|
"screenshots": [],
|
|
},
|
|
"drift_checks": checks,
|
|
"commands": commands,
|
|
}
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--output", type=Path, default=CTO_ROOT / "evals" / "reports" / "2026-05-25-live-drift.yaml")
|
|
args = parser.parse_args()
|
|
report = build_report()
|
|
args.output.parent.mkdir(parents=True, exist_ok=True)
|
|
args.output.write_text(yaml.safe_dump(report, sort_keys=False), encoding="utf-8")
|
|
print(f"wrote {args.output}")
|
|
return 0 if report["status"] == "pass" else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|