diff --git a/evals/reports/2026-05-25-live-drift.yaml b/evals/reports/2026-05-25-live-drift.yaml index 37ca9c6..465d6c6 100644 --- a/evals/reports/2026-05-25-live-drift.yaml +++ b/evals/reports/2026-05-25-live-drift.yaml @@ -6,7 +6,7 @@ eval_id: live-profile-drift profile: cto-planb status: pass score: 100 -checked_at: '2026-05-25T16:56:06Z' +checked_at: '2026-05-25T17:07:15Z' checks: correctness: pass verification: pass @@ -113,7 +113,7 @@ commands: - command: hermes -p cto-planb mcp list cwd: /home/svrnty/workspaces/hermes returncode: 0 - duration_ms: 401 + duration_ms: 440 stdout: "\n MCP Servers:\n\n Name Transport Tools\ \ Status \n \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\ \u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\ @@ -126,7 +126,7 @@ commands: - command: ./install.sh --dry-run cwd: /home/svrnty/workspaces/hermes/cto returncode: 0 - duration_ms: 2 + duration_ms: 3 stdout: "== preflight ==\n hermes \u2713 python3 \u2713 sqlite3 \u2713 HERMES_HOME\ \ \u2713\n sandcastle \u2713 (/home/svrnty/workspaces/hermes/cto/../sandcastle)\n\ == DRY RUN \u2014 no mutations ==\n would: ln -sfn /home/svrnty/workspaces/hermes/cto\ diff --git a/evals/reports/2026-05-25-local-regression-execution-slice.yaml b/evals/reports/2026-05-25-local-regression-execution-slice.yaml index 3b23f08..cc7580e 100644 --- a/evals/reports/2026-05-25-local-regression-execution-slice.yaml +++ b/evals/reports/2026-05-25-local-regression-execution-slice.yaml @@ -38,45 +38,45 @@ eval_results: - cto/evals/reports/2026-05-25-promotion-fixture-execution.yaml command: python3 evals/runners/run-promotion-fixtures.py --output evals/reports/2026-05-25-promotion-fixture-execution.yaml --artifact-output evals/artifacts/2026-05-25-promotion-fixture-execution.json - duration_ms: 710 + duration_ms: 750 - eval_id: static-prd-contract status: pass evidence: - tests/e2e/test_j_cto_webui_prd.py command: pytest -q tests/e2e/test_j_cto_webui_prd.py - duration_ms: 1143 + duration_ms: 1223 - eval_id: webui-cto-event-browser status: pass evidence: - hermes-webui/tests/test_cto_browser_e2e.py command: pytest -q tests/test_cto_events.py tests/test_live_tool_callback_events.py tests/test_cto_webui_journal_e2e.py tests/test_cto_browser_e2e.py - duration_ms: 2592 + duration_ms: 3006 - eval_id: webui-cto-live-streaming status: pass evidence: - hermes-webui/tests/test_cto_live_streaming_e2e.py command: pytest -q tests/test_cto_live_streaming_e2e.py - duration_ms: 1786 + duration_ms: 2195 - eval_id: live-profile-drift status: pass evidence: - cto/evals/reports/2026-05-25-live-drift.yaml command: python3 evals/runners/drift.py --output evals/reports/2026-05-25-live-drift.yaml - duration_ms: 658 + duration_ms: 706 - eval_id: eval-report-scoring status: pass evidence: - cto/evals/reports/*.yaml command: bash -lc for r in evals/reports/*.yaml; do python3 evals/runners/score.py "$r"; done - duration_ms: 260 + duration_ms: 275 - eval_id: diff-whitespace-check status: pass evidence: - git diff --check command: git diff --check - duration_ms: 5 + duration_ms: 7 commands: - command: python3 evals/runners/run-promotion-suite.py --output evals/reports/2026-05-25-promotion-suite-readiness.yaml cwd: /home/svrnty/workspaces/hermes/cto @@ -90,7 +90,7 @@ commands: --artifact-output evals/artifacts/2026-05-25-promotion-fixture-execution.json cwd: /home/svrnty/workspaces/hermes/cto returncode: 0 - duration_ms: 710 + duration_ms: 750 stdout: 'wrote /home/svrnty/workspaces/hermes/cto/evals/reports/2026-05-25-promotion-fixture-execution.yaml wrote /home/svrnty/workspaces/hermes/cto/evals/artifacts/2026-05-25-promotion-fixture-execution.json @@ -100,10 +100,10 @@ commands: - command: pytest -q tests/e2e/test_j_cto_webui_prd.py cwd: /home/svrnty/workspaces/hermes returncode: 0 - duration_ms: 1143 + duration_ms: 1223 stdout: '.......... [100%] - 10 passed in 0.95s + 10 passed in 1.05s ' stderr: '' @@ -111,27 +111,27 @@ commands: tests/test_cto_webui_journal_e2e.py tests/test_cto_browser_e2e.py cwd: /home/svrnty/workspaces/hermes/hermes-webui returncode: 0 - duration_ms: 2592 - stdout: '.............. [100%] + duration_ms: 3006 + stdout: '............... [100%] - 14 passed in 2.32s + 15 passed in 2.71s ' stderr: '' - command: pytest -q tests/test_cto_live_streaming_e2e.py cwd: /home/svrnty/workspaces/hermes/hermes-webui returncode: 0 - duration_ms: 1786 + duration_ms: 2195 stdout: '. [100%] - 1 passed in 1.46s + 1 passed in 1.79s ' stderr: '' - command: python3 evals/runners/drift.py --output evals/reports/2026-05-25-live-drift.yaml cwd: /home/svrnty/workspaces/hermes/cto returncode: 0 - duration_ms: 658 + duration_ms: 706 stdout: 'wrote evals/reports/2026-05-25-live-drift.yaml ' @@ -140,7 +140,7 @@ commands: "$r"; done cwd: /home/svrnty/workspaces/hermes/cto returncode: 0 - duration_ms: 260 + duration_ms: 275 stdout: 'ok ok @@ -164,7 +164,7 @@ commands: - command: git diff --check cwd: /home/svrnty/workspaces/hermes returncode: 0 - duration_ms: 5 + duration_ms: 7 stdout: '' stderr: '' notes: