cto/evals/runners/run-webui-cto.sh
2026-05-25 12:57:33 -04:00

15 lines
546 B
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# Deterministic CTO WebUI local regression entrypoint.
# This executes the current direct WebUI CTO proof slice and writes a scoreable
# eval report. It intentionally does not claim Codex comparative parity.
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
cd "$ROOT/cto"
python3 evals/runners/run-local-regression.py \
--output evals/reports/2026-05-25-local-regression-execution-slice.yaml
python3 evals/runners/score.py \
evals/reports/2026-05-25-local-regression-execution-slice.yaml