From 0ebd2f69ea5f6884201e75cc6475faf5196d9991 Mon Sep 17 00:00:00 2001
From: Svrnty <info@svrnty.io>
Date: Mon, 25 May 2026 13:41:12 -0400
Subject: [PATCH] Tighten CTO live promotion opt-in audit

---
 evals/reports/2026-05-25-live-drift.yaml      |  8 ++--
 .../2026-05-25-live-promotion-readiness.yaml  |  4 +-
 ...5-25-local-regression-execution-slice.yaml | 48 +++++++++----------
 evals/runners/run-live-promotion-readiness.py | 20 ++++++--
 4 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/evals/reports/2026-05-25-live-drift.yaml b/evals/reports/2026-05-25-live-drift.yaml
index 8ee6695..9ba1b38 100644
--- a/evals/reports/2026-05-25-live-drift.yaml
+++ b/evals/reports/2026-05-25-live-drift.yaml
@@ -6,7 +6,7 @@ eval_id: live-profile-drift
 profile: cto-planb
 status: pass
 score: 100
-checked_at: '2026-05-25T17:37:05Z'
+checked_at: '2026-05-25T17:40:32Z'
 checks:
   correctness: pass
   verification: pass
@@ -76,7 +76,7 @@ commands:
 - command: hermes -p cto-planb skills list
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 221
+  duration_ms: 251
   stdout: "                        Installed Skills                        \n\u250F\
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\
@@ -113,7 +113,7 @@ commands:
 - command: hermes -p cto-planb mcp list
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 465
+  duration_ms: 497
   stdout: "\n  MCP Servers:\n\n  Name             Transport                      Tools\
     \        Status    \n  \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
     \u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\
@@ -126,7 +126,7 @@ commands:
 - command: ./install.sh --dry-run
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 4
+  duration_ms: 3
   stdout: "== preflight ==\n  hermes \u2713  python3 \u2713  sqlite3 \u2713  HERMES_HOME\
     \ \u2713\n  sandcastle \u2713 (/home/svrnty/workspaces/hermes/cto/../sandcastle)\n\
     == DRY RUN \u2014 no mutations ==\n  would: ln -sfn /home/svrnty/workspaces/hermes/cto\
diff --git a/evals/reports/2026-05-25-live-promotion-readiness.yaml b/evals/reports/2026-05-25-live-promotion-readiness.yaml
index 620a99b..b0b587a 100644
--- a/evals/reports/2026-05-25-live-promotion-readiness.yaml
+++ b/evals/reports/2026-05-25-live-promotion-readiness.yaml
@@ -100,7 +100,7 @@ eval_results:
   command:
     command: hermes -p cto-planb mcp list
     returncode: 0
-    duration_ms: 462
+    duration_ms: 458
     stdout: "\n  MCP Servers:\n\n  Name             Transport                    \
       \  Tools        Status    \n  \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
       \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\
@@ -116,7 +116,9 @@ eval_results:
   - Live paid/mutating promotion execution is disabled unless HERMES_CTO_LIVE_PROMOTION=1
   - HERMES_CTO_LIVE_PROMOTION_ACK must match the required acknowledgement string
   live_requested: false
+  live_acknowledged: false
   live_execution_allowed: false
+  opt_in_state_valid: true
 live_execution:
   requested: false
   allowed: false
diff --git a/evals/reports/2026-05-25-local-regression-execution-slice.yaml b/evals/reports/2026-05-25-local-regression-execution-slice.yaml
index a83b6ba..77d58a2 100644
--- a/evals/reports/2026-05-25-local-regression-execution-slice.yaml
+++ b/evals/reports/2026-05-25-local-regression-execution-slice.yaml
@@ -31,26 +31,26 @@ eval_results:
   evidence:
   - cto/evals/reports/2026-05-25-promotion-suite-readiness.yaml
   command: python3 evals/runners/run-promotion-suite.py --output evals/reports/2026-05-25-promotion-suite-readiness.yaml
-  duration_ms: 34
+  duration_ms: 37
 - eval_id: promotion-fixture-execution
   status: pass
   evidence:
   - cto/evals/reports/2026-05-25-promotion-fixture-execution.yaml
   command: python3 evals/runners/run-promotion-fixtures.py --output evals/reports/2026-05-25-promotion-fixture-execution.yaml
     --artifact-output evals/artifacts/2026-05-25-promotion-fixture-execution.json
-  duration_ms: 755
+  duration_ms: 799
 - eval_id: live-promotion-readiness
   status: pass
   evidence:
   - cto/evals/reports/2026-05-25-live-promotion-readiness.yaml
   command: python3 evals/runners/run-live-promotion-readiness.py --output evals/reports/2026-05-25-live-promotion-readiness.yaml
-  duration_ms: 726
+  duration_ms: 720
 - eval_id: static-prd-contract
   status: pass
   evidence:
   - tests/e2e/test_j_cto_webui_prd.py
   command: pytest -q tests/e2e/test_j_cto_webui_prd.py
-  duration_ms: 1282
+  duration_ms: 2151
 - eval_id: webui-cto-event-browser
   status: pass
   evidence:
@@ -59,43 +59,43 @@ eval_results:
   command: pytest -q tests/test_cto_events.py tests/test_live_tool_callback_events.py
     tests/test_cto_webui_journal_e2e.py tests/test_cto_browser_e2e.py tests/test_cancel_interrupt.py
     tests/test_approval_queue.py
-  duration_ms: 3152
+  duration_ms: 3692
 - eval_id: webui-cto-live-streaming
   status: pass
   evidence:
   - hermes-webui/tests/test_cto_live_streaming_e2e.py
   command: pytest -q tests/test_cto_live_streaming_e2e.py
-  duration_ms: 1852
+  duration_ms: 1921
 - eval_id: live-profile-drift
   status: pass
   evidence:
   - cto/evals/reports/2026-05-25-live-drift.yaml
   command: python3 evals/runners/drift.py --output evals/reports/2026-05-25-live-drift.yaml
-  duration_ms: 731
+  duration_ms: 792
 - eval_id: acceptance-audit
   status: pass
   evidence:
   - cto/evals/reports/2026-05-25-acceptance-audit.yaml
   command: python3 evals/runners/audit-acceptance.py --output evals/reports/2026-05-25-acceptance-audit.yaml
-  duration_ms: 44
+  duration_ms: 49
 - eval_id: eval-report-scoring
   status: pass
   evidence:
   - cto/evals/reports/*.yaml
   command: bash -lc for r in evals/reports/*.yaml; do python3 evals/runners/score.py
     "$r"; done
-  duration_ms: 339
+  duration_ms: 341
 - eval_id: diff-whitespace-check
   status: pass
   evidence:
   - git diff --check
   command: git diff --check
-  duration_ms: 5
+  duration_ms: 7
 commands:
 - command: python3 evals/runners/run-promotion-suite.py --output evals/reports/2026-05-25-promotion-suite-readiness.yaml
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 34
+  duration_ms: 37
   stdout: 'wrote /home/svrnty/workspaces/hermes/cto/evals/reports/2026-05-25-promotion-suite-readiness.yaml
 
     '
@@ -104,7 +104,7 @@ commands:
     --artifact-output evals/artifacts/2026-05-25-promotion-fixture-execution.json
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 755
+  duration_ms: 799
   stdout: 'wrote /home/svrnty/workspaces/hermes/cto/evals/reports/2026-05-25-promotion-fixture-execution.yaml
 
     wrote /home/svrnty/workspaces/hermes/cto/evals/artifacts/2026-05-25-promotion-fixture-execution.json
@@ -114,7 +114,7 @@ commands:
 - command: python3 evals/runners/run-live-promotion-readiness.py --output evals/reports/2026-05-25-live-promotion-readiness.yaml
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 726
+  duration_ms: 720
   stdout: 'wrote evals/reports/2026-05-25-live-promotion-readiness.yaml
 
     '
@@ -122,7 +122,7 @@ commands:
 - command: python3 evals/runners/audit-acceptance.py --output evals/reports/2026-05-25-acceptance-audit.yaml
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 44
+  duration_ms: 49
   stdout: 'wrote evals/reports/2026-05-25-acceptance-audit.yaml
 
     '
@@ -130,10 +130,10 @@ commands:
 - command: pytest -q tests/e2e/test_j_cto_webui_prd.py
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 1282
-  stdout: '...........                                                              [100%]
+  duration_ms: 2151
+  stdout: '............                                                             [100%]
 
-    11 passed in 1.11s
+    12 passed in 1.92s
 
     '
   stderr: ''
@@ -142,27 +142,27 @@ commands:
     tests/test_approval_queue.py
   cwd: /home/svrnty/workspaces/hermes/hermes-webui
   returncode: 0
-  duration_ms: 3152
+  duration_ms: 3692
   stdout: '......................................                                   [100%]
 
-    38 passed in 2.74s
+    38 passed in 3.11s
 
     '
   stderr: ''
 - command: pytest -q tests/test_cto_live_streaming_e2e.py
   cwd: /home/svrnty/workspaces/hermes/hermes-webui
   returncode: 0
-  duration_ms: 1852
+  duration_ms: 1921
   stdout: '..                                                                       [100%]
 
-    2 passed in 1.49s
+    2 passed in 1.48s
 
     '
   stderr: ''
 - command: python3 evals/runners/drift.py --output evals/reports/2026-05-25-live-drift.yaml
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 731
+  duration_ms: 792
   stdout: 'wrote evals/reports/2026-05-25-live-drift.yaml
 
     '
@@ -171,7 +171,7 @@ commands:
     "$r"; done
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 339
+  duration_ms: 341
   stdout: 'ok
 
     ok
@@ -199,7 +199,7 @@ commands:
 - command: git diff --check
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 5
+  duration_ms: 7
   stdout: ''
   stderr: ''
 notes:
diff --git a/evals/runners/run-live-promotion-readiness.py b/evals/runners/run-live-promotion-readiness.py
index deb082d..7b91c2b 100755
--- a/evals/runners/run-live-promotion-readiness.py
+++ b/evals/runners/run-live-promotion-readiness.py
@@ -25,6 +25,13 @@ FIXTURES = CTO_ROOT / "evals" / "fixtures" / "manifest.yaml"
 REQUIRED_LIVE_ACK = "i-understand-this-may-spend-tokens-and-edit-temp-workspaces"
 
 
+def _artifact_path(path: Path) -> str:
+    try:
+        return str(path.relative_to(REPO_ROOT))
+    except ValueError:
+        return str(path)
+
+
 def _run(cmd: list[str], *, cwd: Path, timeout: int = 60) -> dict[str, Any]:
     started = time.time()
     try:
@@ -79,9 +86,12 @@ def build_report(output: Path) -> dict[str, Any]:
     skills = _run(["hermes", "-p", "cto-planb", "skills", "list"], cwd=REPO_ROOT) if hermes_available else None
     mcp = _run(["hermes", "-p", "cto-planb", "mcp", "list"], cwd=REPO_ROOT) if hermes_available else None
 
-    live_requested = os.environ.get("HERMES_CTO_LIVE_PROMOTION") == "1"
-    live_ack = os.environ.get("HERMES_CTO_LIVE_PROMOTION_ACK") == REQUIRED_LIVE_ACK
+    live_requested_raw = os.environ.get("HERMES_CTO_LIVE_PROMOTION", "")
+    live_ack_raw = os.environ.get("HERMES_CTO_LIVE_PROMOTION_ACK", "")
+    live_requested = live_requested_raw == "1"
+    live_ack = live_ack_raw == REQUIRED_LIVE_ACK
     live_execution_allowed = live_requested and live_ack
+    opt_in_state_valid = (not live_requested_raw and not live_ack_raw) or live_execution_allowed
 
     eval_results = [
         _result(
@@ -110,13 +120,15 @@ def build_report(output: Path) -> dict[str, Any]:
         ),
         _result(
             "live-execution-opt-in-policy",
-            True,
+            opt_in_state_valid,
             [
                 "Live paid/mutating promotion execution is disabled unless HERMES_CTO_LIVE_PROMOTION=1",
                 "HERMES_CTO_LIVE_PROMOTION_ACK must match the required acknowledgement string",
             ],
             live_requested=live_requested,
+            live_acknowledged=live_ack,
             live_execution_allowed=live_execution_allowed,
+            opt_in_state_valid=opt_in_state_valid,
         ),
     ]
     all_passed = all(item["status"] == "pass" for item in eval_results)
@@ -149,7 +161,7 @@ def build_report(output: Path) -> dict[str, Any]:
         "artifacts": {
             "transcript": "sot/08-OUTPUTS/CTO-WEBUI-CODER-PRD-EVIDENCE-2026-05-25.md",
             "diff": "local-worktree",
-            "logs": str(output.relative_to(REPO_ROOT)),
+            "logs": _artifact_path(output),
             "screenshots": [],
         },
         "eval_results": eval_results,