From 8246411b7b4f78ea87824feac7fd195322c469b2 Mon Sep 17 00:00:00 2001
From: Svrnty <info@svrnty.io>
Date: Mon, 25 May 2026 13:27:29 -0400
Subject: [PATCH] Harden CTO sandcastle provider gate

---
 evals/reports/2026-05-25-live-drift.yaml      |  8 ++--
 .../2026-05-25-live-promotion-readiness.yaml  |  4 +-
 ...5-25-local-regression-execution-slice.yaml | 44 +++++++++----------
 lib/cto-worker.sh                             | 12 +++++
 4 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/evals/reports/2026-05-25-live-drift.yaml b/evals/reports/2026-05-25-live-drift.yaml
index 6ac2906..cf3032e 100644
--- a/evals/reports/2026-05-25-live-drift.yaml
+++ b/evals/reports/2026-05-25-live-drift.yaml
@@ -6,7 +6,7 @@ eval_id: live-profile-drift
 profile: cto-planb
 status: pass
 score: 100
-checked_at: '2026-05-25T17:21:42Z'
+checked_at: '2026-05-25T17:27:03Z'
 checks:
   correctness: pass
   verification: pass
@@ -76,7 +76,7 @@ commands:
 - command: hermes -p cto-planb skills list
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 214
+  duration_ms: 203
   stdout: "                        Installed Skills                        \n\u250F\
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\
     \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\
@@ -113,7 +113,7 @@ commands:
 - command: hermes -p cto-planb mcp list
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 430
+  duration_ms: 401
   stdout: "\n  MCP Servers:\n\n  Name             Transport                      Tools\
     \        Status    \n  \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
     \u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\
@@ -126,7 +126,7 @@ commands:
 - command: ./install.sh --dry-run
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 3
+  duration_ms: 2
   stdout: "== preflight ==\n  hermes \u2713  python3 \u2713  sqlite3 \u2713  HERMES_HOME\
     \ \u2713\n  sandcastle \u2713 (/home/svrnty/workspaces/hermes/cto/../sandcastle)\n\
     == DRY RUN \u2014 no mutations ==\n  would: ln -sfn /home/svrnty/workspaces/hermes/cto\
diff --git a/evals/reports/2026-05-25-live-promotion-readiness.yaml b/evals/reports/2026-05-25-live-promotion-readiness.yaml
index 913d419..ee5a978 100644
--- a/evals/reports/2026-05-25-live-promotion-readiness.yaml
+++ b/evals/reports/2026-05-25-live-promotion-readiness.yaml
@@ -59,7 +59,7 @@ eval_results:
   command:
     command: hermes -p cto-planb skills list
     returncode: 0
-    duration_ms: 215
+    duration_ms: 229
     stdout: "                        Installed Skills                        \n\u250F\
       \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\
       \u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2533\u2501\
@@ -100,7 +100,7 @@ eval_results:
   command:
     command: hermes -p cto-planb mcp list
     returncode: 0
-    duration_ms: 435
+    duration_ms: 450
     stdout: "\n  MCP Servers:\n\n  Name             Transport                    \
       \  Tools        Status    \n  \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\
       \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\
diff --git a/evals/reports/2026-05-25-local-regression-execution-slice.yaml b/evals/reports/2026-05-25-local-regression-execution-slice.yaml
index 084119c..df32a08 100644
--- a/evals/reports/2026-05-25-local-regression-execution-slice.yaml
+++ b/evals/reports/2026-05-25-local-regression-execution-slice.yaml
@@ -31,26 +31,26 @@ eval_results:
   evidence:
   - cto/evals/reports/2026-05-25-promotion-suite-readiness.yaml
   command: python3 evals/runners/run-promotion-suite.py --output evals/reports/2026-05-25-promotion-suite-readiness.yaml
-  duration_ms: 35
+  duration_ms: 39
 - eval_id: promotion-fixture-execution
   status: pass
   evidence:
   - cto/evals/reports/2026-05-25-promotion-fixture-execution.yaml
   command: python3 evals/runners/run-promotion-fixtures.py --output evals/reports/2026-05-25-promotion-fixture-execution.yaml
     --artifact-output evals/artifacts/2026-05-25-promotion-fixture-execution.json
-  duration_ms: 741
+  duration_ms: 780
 - eval_id: live-promotion-readiness
   status: pass
   evidence:
   - cto/evals/reports/2026-05-25-live-promotion-readiness.yaml
   command: python3 evals/runners/run-live-promotion-readiness.py --output evals/reports/2026-05-25-live-promotion-readiness.yaml
-  duration_ms: 687
+  duration_ms: 717
 - eval_id: static-prd-contract
   status: pass
   evidence:
   - tests/e2e/test_j_cto_webui_prd.py
   command: pytest -q tests/e2e/test_j_cto_webui_prd.py
-  duration_ms: 1180
+  duration_ms: 1227
 - eval_id: webui-cto-event-browser
   status: pass
   evidence:
@@ -59,37 +59,37 @@ eval_results:
   command: pytest -q tests/test_cto_events.py tests/test_live_tool_callback_events.py
     tests/test_cto_webui_journal_e2e.py tests/test_cto_browser_e2e.py tests/test_cancel_interrupt.py
     tests/test_approval_queue.py
-  duration_ms: 3186
+  duration_ms: 3273
 - eval_id: webui-cto-live-streaming
   status: pass
   evidence:
   - hermes-webui/tests/test_cto_live_streaming_e2e.py
   command: pytest -q tests/test_cto_live_streaming_e2e.py
-  duration_ms: 2097
+  duration_ms: 1831
 - eval_id: live-profile-drift
   status: pass
   evidence:
   - cto/evals/reports/2026-05-25-live-drift.yaml
   command: python3 evals/runners/drift.py --output evals/reports/2026-05-25-live-drift.yaml
-  duration_ms: 690
+  duration_ms: 649
 - eval_id: eval-report-scoring
   status: pass
   evidence:
   - cto/evals/reports/*.yaml
   command: bash -lc for r in evals/reports/*.yaml; do python3 evals/runners/score.py
     "$r"; done
-  duration_ms: 291
+  duration_ms: 294
 - eval_id: diff-whitespace-check
   status: pass
   evidence:
   - git diff --check
   command: git diff --check
-  duration_ms: 7
+  duration_ms: 6
 commands:
 - command: python3 evals/runners/run-promotion-suite.py --output evals/reports/2026-05-25-promotion-suite-readiness.yaml
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 35
+  duration_ms: 39
   stdout: 'wrote /home/svrnty/workspaces/hermes/cto/evals/reports/2026-05-25-promotion-suite-readiness.yaml
 
     '
@@ -98,7 +98,7 @@ commands:
     --artifact-output evals/artifacts/2026-05-25-promotion-fixture-execution.json
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 741
+  duration_ms: 780
   stdout: 'wrote /home/svrnty/workspaces/hermes/cto/evals/reports/2026-05-25-promotion-fixture-execution.yaml
 
     wrote /home/svrnty/workspaces/hermes/cto/evals/artifacts/2026-05-25-promotion-fixture-execution.json
@@ -108,7 +108,7 @@ commands:
 - command: python3 evals/runners/run-live-promotion-readiness.py --output evals/reports/2026-05-25-live-promotion-readiness.yaml
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 687
+  duration_ms: 717
   stdout: 'wrote evals/reports/2026-05-25-live-promotion-readiness.yaml
 
     '
@@ -116,10 +116,10 @@ commands:
 - command: pytest -q tests/e2e/test_j_cto_webui_prd.py
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 1180
+  duration_ms: 1227
   stdout: '..........                                                               [100%]
 
-    10 passed in 1.00s
+    10 passed in 1.05s
 
     '
   stderr: ''
@@ -128,27 +128,27 @@ commands:
     tests/test_approval_queue.py
   cwd: /home/svrnty/workspaces/hermes/hermes-webui
   returncode: 0
-  duration_ms: 3186
+  duration_ms: 3273
   stdout: '......................................                                   [100%]
 
-    38 passed in 2.72s
+    38 passed in 2.78s
 
     '
   stderr: ''
 - command: pytest -q tests/test_cto_live_streaming_e2e.py
   cwd: /home/svrnty/workspaces/hermes/hermes-webui
   returncode: 0
-  duration_ms: 2097
-  stdout: '.                                                                        [100%]
+  duration_ms: 1831
+  stdout: '..                                                                       [100%]
 
-    1 passed in 1.77s
+    2 passed in 1.49s
 
     '
   stderr: ''
 - command: python3 evals/runners/drift.py --output evals/reports/2026-05-25-live-drift.yaml
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 690
+  duration_ms: 649
   stdout: 'wrote evals/reports/2026-05-25-live-drift.yaml
 
     '
@@ -157,7 +157,7 @@ commands:
     "$r"; done
   cwd: /home/svrnty/workspaces/hermes/cto
   returncode: 0
-  duration_ms: 291
+  duration_ms: 294
   stdout: 'ok
 
     ok
@@ -183,7 +183,7 @@ commands:
 - command: git diff --check
   cwd: /home/svrnty/workspaces/hermes
   returncode: 0
-  duration_ms: 7
+  duration_ms: 6
   stdout: ''
   stderr: ''
 notes:
diff --git a/lib/cto-worker.sh b/lib/cto-worker.sh
index e6dc7aa..f178934 100755
--- a/lib/cto-worker.sh
+++ b/lib/cto-worker.sh
@@ -36,6 +36,18 @@ cmd_sandcastle() {
   [ -d "$target" ] || { echo "ERROR: target repo $target not found" >&2; return 1; }
   [ -f "$prompt_file" ] || { echo "ERROR: prompt file $prompt_file not found" >&2; return 1; }
 
+  case "$provider" in
+    docker|podman) ;;
+    noSandbox|nosandbox|head)
+      echo "BLOCK: unsafe sandcastle provider/strategy requires JP approval: $provider" >&2
+      return 1
+      ;;
+    *)
+      echo "BLOCK: unsupported sandcastle provider: $provider" >&2
+      return 1
+      ;;
+  esac
+
   # Hard rule: never run against read-only workspace siblings.
   case "$(basename "$target")" in
     hermes-agent|hermes-webui|marketingskills|sandcastle)