From b685254d42c07522983c7535a02c773515513f58 Mon Sep 17 00:00:00 2001 From: Svrnty Date: Sun, 31 May 2026 18:44:11 -0400 Subject: [PATCH] Add CTO harness evidence interface contract --- README.md | 5 +- WORKBOARD.yaml | 4 +- ...CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md | 163 ++++++++++++++++++ tools/validate_cto_child.py | 38 ++++ 4 files changed, 207 insertions(+), 3 deletions(-) create mode 100644 sot/03-PROTOCOLS/CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md diff --git a/README.md b/README.md index ddafb4a..f327cc6 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,10 @@ This workspace is registered as a child-local planning workspace. Registration d | |-- 00-START/ | | `-- CTO-WORKSPACE-INTENT.md | `-- 03-PROTOCOLS/ -| `-- CTO-CASE-BACKEND-BRIEF.md +| |-- CTO-CASE-BACKEND-BRIEF.md +| |-- CTO-CASE-CANDIDATE-BACKEND-PRD.md +| |-- CTO-CASE-CANDIDATE-BACKEND-ISSUES.md +| `-- CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md `-- tools/ `-- validate_cto_child.py ``` diff --git a/WORKBOARD.yaml b/WORKBOARD.yaml index 291e566..9ed39e5 100644 --- a/WORKBOARD.yaml +++ b/WORKBOARD.yaml @@ -16,8 +16,8 @@ items: owner: jp - id: CTO-WORK-004 title: Harness Evidence Interface Contract - status: candidate - source: sot/03-PROTOCOLS/CTO-CASE-CANDIDATE-BACKEND-ISSUES.md + status: validated + source: sot/03-PROTOCOLS/CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md owner: jp - id: CTO-WORK-005 title: Case Source Admission Record diff --git a/sot/03-PROTOCOLS/CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md b/sot/03-PROTOCOLS/CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md new file mode 100644 index 0000000..60dd6c2 --- /dev/null +++ b/sot/03-PROTOCOLS/CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md @@ -0,0 +1,163 @@ +--- +name: cto-harness-evidence-interface-contract +tier: local +status: draft +owner: jp +source: sot/03-PROTOCOLS/CTO-CASE-CANDIDATE-BACKEND-PRD.md +created: 2026-05-31 +last_reviewed: 2026-05-31 +lifecycle_classification: planning +core_promotion_status: not-promoted +description: Child-local contract for the stable CTO Harness evidence interface required before Case adapter execution. +--- + +# CTO Harness Evidence Interface Contract + +Local planning SOT only. Not a Core Protocol. Not active Core authority. + +## Purpose + +Define the stable evidence interface that any future Case adapter must satisfy before execution work starts. + +This contract makes the CTO Harness the owner of backend proof. Case may supply logs and results, but the CTO Harness must normalize, validate, and record evidence before any result can be trusted. + +## Non-Authority Notice + +This contract does not authorize Runtime behavior, WebUI Product behavior, Case execution, real-repo mutation, merge, deploy, push, close, vendor-source mutation, external developer repository mutation, or Core promotion. + +## Interface Boundary + +The Harness Evidence Interface is the only accepted proof surface for backend comparison. + +Backend internals stay behind the harness seam. Case lifecycle phases, reviewer comments, raw provider logs, and backend-specific run metadata may be stored as raw artifacts, but callers must not need them to decide whether a run passed. + +## Required Artifact Set + +Every accepted backend run must produce these artifacts under one run artifact directory: + +| Artifact | Required | Purpose | +| --- | --- | --- | +| `report.json` | yes | Machine-readable summary and evidence index. | +| `report.md` | yes | Human-readable summary. | +| `events.normalized.jsonl` | yes | Replayable normalized event stream. | +| `patch.diff` | yes | Exact source diff, even when empty for a fail-closed no-diff case. | +| `test.log` | yes | Verification output or explicit missing-test blocker. | +| `trace.jsonl` | yes | Harness-level trace of major actions and checks. | +| `backend/` | yes | Backend-specific raw logs and metadata. | + +Artifacts must live outside tracked source by default. For Hermes CTO harness parity, the expected runtime root pattern is: + +```text +~/.hermes/profiles/cto-planb/harness-runs// +``` + +## Required `report.json` Fields + +`report.json` must include: + +| Field | Meaning | +| --- | --- | +| `case_id` | Case or task identifier. | +| `run_id` | Unique run identifier. | +| `status` | `pass`, `fail`, or `blocked`. | +| `backend` | Backend adapter name. | +| `run_started_at` | UTC timestamp captured before backend work starts. | +| `run_finished_at` | UTC timestamp captured after harness checks finish. | +| `backend_exit_code` | Raw backend process exit code when applicable. | +| `artifacts_dir` | Run artifact directory. | +| `changed_files` | Files changed by the run. | +| `allowed_writes_passed` | Whether changed files stayed within allowed writes. | +| `approval_status` | `not-required`, `requested`, `granted`, or `denied`. | +| `verification` | Verification commands and pass/fail results. | +| `blockers` | Human-readable fail-closed blockers. | +| `artifact_paths` | Paths for required artifacts. | +| `artifact_digests` | SHA-256 digests for required artifacts. | +| `freshness` | Proof that artifacts were written or checked after `run_started_at`. | + +Additional fields are allowed. Callers must not require additional fields unless a later governed contract updates this file. + +## Digest Manifest + +`artifact_digests` must include SHA-256 digests for: + +- `report.json`; +- `events.normalized.jsonl`; +- `patch.diff`; +- `test.log`; +- `trace.jsonl`; +- each raw backend log required by the adapter contract. + +Digest proof is not sufficient by itself. `freshness` must also show that each required artifact was written or checked after `run_started_at`. + +## Required Normalized Events + +Every accepted run must emit these normalized events in order where applicable: + +1. `run.started` +2. `task.contract.created` +3. `plan.updated` +4. `approval.requested` +5. `approval.granted` or `approval.denied` +6. `patch.applied` +7. `git.diff.checked` +8. `verification.completed` +9. `run.completed` + +Approval events are required only for mutation modes that need human approval. If approval is denied, the run must fail closed and emit `run.completed` with a failed or blocked status. + +## Fail-Closed Semantics + +The harness must fail closed for: + +- no diff when a diff is required; +- disallowed file change; +- failed tests; +- missing test command; +- missing required event; +- backend reviewer reject; +- approval denied; +- timeout; +- dirty starting tree; +- dirty ending tree; +- artifact write failure; +- provider unavailable. + +Each fail-closed result must record: + +- blocker reason in `report.json`; +- normalized event when the failure class has an event; +- report status of `fail` or `blocked`; +- nonzero exit behavior where the adapter contract marks the failure executable. + +## Approval Semantics + +Case may recommend. Case must not approve itself. + +The only accepted human approval signal is a CTO Harness or Hermes/operator approval event: + +- `approval.requested` +- `approval.granted` +- `approval.denied` + +Each approval event must include actor, scope, mutation mode, timestamp, and expiry when expiry is applicable. + +No merge, push, deploy, close, or real-repo mutation is allowed without explicit task-contract permission. + +## Adapter Acceptance Rule + +A future Case adapter is not accepted until: + +- `case` is registered as a gated engine; +- the harness accepts `--engine case` without a parallel runner path; +- the adapter is default-denied unless explicitly enabled; +- the adapter writes the required artifact set; +- the adapter satisfies required `report.json` fields; +- the adapter emits required normalized events; +- the adapter records digest and freshness proof; +- the adapter fails closed for the required failure classes. + +## Validation Expectations + +Current validation is planning-only and checks this contract exists. + +Later adapter validation must add executable checks for artifact shape, event order, allowed writes, approval events, digest fields, freshness proof, and fail-closed fixture behavior. diff --git a/tools/validate_cto_child.py b/tools/validate_cto_child.py index b592675..67cd714 100644 --- a/tools/validate_cto_child.py +++ b/tools/validate_cto_child.py @@ -18,6 +18,7 @@ REQUIRED_FILES = [ "sot/03-PROTOCOLS/CTO-CASE-BACKEND-BRIEF.md", "sot/03-PROTOCOLS/CTO-CASE-CANDIDATE-BACKEND-PRD.md", "sot/03-PROTOCOLS/CTO-CASE-CANDIDATE-BACKEND-ISSUES.md", + "sot/03-PROTOCOLS/CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md", ] REQUIRED_BRIEF_PHRASES = [ @@ -56,6 +57,29 @@ REQUIRED_ISSUE_IDS = [ "CTO-WORK-008", ] +REQUIRED_EVIDENCE_INTERFACE_PHRASES = [ + "Local planning SOT only. Not a Core Protocol. Not active Core authority.", + "This contract does not authorize Runtime behavior", + "The Harness Evidence Interface is the only accepted proof surface for backend comparison.", + "report.json", + "events.normalized.jsonl", + "patch.diff", + "test.log", + "trace.jsonl", + "artifact_digests", + "SHA-256", + "run_started_at", + "run_finished_at", + "backend_exit_code", + "allowed_writes_passed", + "approval.requested", + "approval.granted", + "approval.denied", + "Case may recommend. Case must not approve itself.", + "No merge, push, deploy, close, or real-repo mutation is allowed without explicit task-contract permission.", + "fail closed", +] + def main() -> int: checked: list[str] = [] @@ -103,6 +127,16 @@ def main() -> int: if issue_id not in text: errors.append(f"missing_issue_id:{issue_id}") + evidence_interface = ROOT / "sot/03-PROTOCOLS/CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md" + if evidence_interface.is_file(): + text = evidence_interface.read_text(encoding="utf-8") + if "core_promotion_status: not-promoted" not in text: + errors.append("evidence_interface_missing_not_promoted_frontmatter") + for phrase in REQUIRED_EVIDENCE_INTERFACE_PHRASES: + checked.append(f"evidence_interface_phrase:{phrase}") + if phrase not in text: + errors.append(f"missing_evidence_interface_phrase:{phrase}") + board = ROOT / "WORKBOARD.yaml" if board.is_file(): text = board.read_text(encoding="utf-8") @@ -110,6 +144,10 @@ def main() -> int: checked.append(f"workboard_id:{issue_id}") if issue_id not in text: errors.append(f"missing_workboard_id:{issue_id}") + if "CTO-HARNESS-EVIDENCE-INTERFACE-CONTRACT.md" not in text: + errors.append("workboard_missing_evidence_interface_contract_source") + if "CTO-WORK-004" in text and "status: validated" not in text: + errors.append("workboard_cto_work_004_not_validated") payload = { "ok": not errors,