From 7753006073c13122ab9b62ab389af317e19755c7 Mon Sep 17 00:00:00 2001 From: Svrnty Date: Thu, 4 Jun 2026 13:09:59 -0400 Subject: [PATCH] CC: Add Codex retention dry-run packet --- .../CTO-CODEX-RETENTION-DRY-RUN-PACKET.md | 72 +++++++++++ WORKBOARD.yaml | 5 + tools/report_codex_retention_pressure.py | 122 ++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 .sot/03-PROTOCOLS/CTO-CODEX-RETENTION-DRY-RUN-PACKET.md create mode 100644 tools/report_codex_retention_pressure.py diff --git a/.sot/03-PROTOCOLS/CTO-CODEX-RETENTION-DRY-RUN-PACKET.md b/.sot/03-PROTOCOLS/CTO-CODEX-RETENTION-DRY-RUN-PACKET.md new file mode 100644 index 0000000..72c1510 --- /dev/null +++ b/.sot/03-PROTOCOLS/CTO-CODEX-RETENTION-DRY-RUN-PACKET.md @@ -0,0 +1,72 @@ +--- +name: cto-codex-retention-dry-run-packet +tier: T1 +status: validated +owner: jp +source: CTO-WORK-093 +last_reviewed: 2026-06-04 +description: Local CTO dry-run packet for Codex retention pressure. +--- + +# CTO Codex Retention Dry-Run Packet + +Local planning SOT only. Not a Core Protocol. Not active Core authority. + +## Claim + +Codex retention pressure is measurable without reading raw transcripts or mutating `~/.codex`. + +## Context + +Core S410 already reduced Core-owned successful Validator stdout. Remaining pressure is external Codex state: session JSONL files and Codex SQLite logs. CTO records the operator-facing dry-run boundary because Codex is an admitted model provider surface, not Cortex OS authority. + +Measured state on 2026-06-04: + +| Surface | Value | +| --- | --- | +| `~/.codex` | 5.4G | +| `~/.codex/sessions` | 2.3G | +| session JSONL files | 1,038 | +| JSONL files over 10MB | 51 | +| JSONL files over 50MB | 3 | +| `logs_2.sqlite` | 2.7G | +| `logs_2.sqlite-wal` | 268M | +| log rows | 1,140,357 | +| archived threads | 0 | +| active threads | 1,038 | + +Largest log families are responses websocket, `codex_otel.log_only`, and `codex_otel.trace_safe`. + +## Dry-Run Tool + +`python3 tools/report_codex_retention_pressure.py` emits metadata-only JSON: + +- storage bytes; +- session JSONL counts and size bands; +- thread archive counts; +- log row count and top log targets; +- blocked mutation list. + +It does not read transcript bodies, update SQLite, delete files, vacuum databases, archive threads, mutate Core, start Runtime, read secrets, change Codex config, or claim product readiness. + +## Policy + +Prevention default: use `codex exec --ephemeral` for disposable non-interactive worker runs. + +Blocked without explicit operator approval: + +- direct `threads.archived` updates; +- session JSONL deletion; +- logs table deletion; +- SQLite vacuum or checkpoint; +- raw transcript import into Core; +- broad cleanup of `~/.codex`. + +## Decision + +Use the dry-run report as the next operator gate. If cleanup is approved later, execute it as backup-first, archive-first, delete-last work. Core remains out of raw transcript retention authority. + +## New Issues + +- must-fix: existing Codex state has 1,038 active threads and no archived threads. +- follow-up: check whether Codex 0.137.0 adds native retention commands before custom cleanup. diff --git a/WORKBOARD.yaml b/WORKBOARD.yaml index de7a8cf..2793839 100644 --- a/WORKBOARD.yaml +++ b/WORKBOARD.yaml @@ -461,3 +461,8 @@ items: status: validated source: .sot/03-PROTOCOLS/CTO-CORE-ROUTE-ADMISSION-GUARD-ISSUES.md owner: "" + - id: CTO-WORK-093 + title: Codex Retention Dry-Run Packet + status: validated + source: .sot/03-PROTOCOLS/CTO-CODEX-RETENTION-DRY-RUN-PACKET.md + owner: "" diff --git a/tools/report_codex_retention_pressure.py b/tools/report_codex_retention_pressure.py new file mode 100644 index 0000000..ec5bb31 --- /dev/null +++ b/tools/report_codex_retention_pressure.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +"""Report Codex retention pressure without reading transcript bodies.""" + +from __future__ import annotations + +import argparse +import json +import os +import sqlite3 +from pathlib import Path +from typing import Any + + +def bytes_on_disk(path: Path) -> int: + if not path.exists(): + return 0 + if path.is_file(): + return path.stat().st_size + total = 0 + for item in path.rglob("*"): + if item.is_file(): + total += item.stat().st_size + return total + + +def count_session_files(sessions_dir: Path) -> dict[str, Any]: + files = list(sessions_dir.rglob("*.jsonl")) if sessions_dir.exists() else [] + sizes = [path.stat().st_size for path in files] + return { + "jsonl_file_count": len(files), + "bytes": sum(sizes), + "files_over_10mb": sum(size >= 10 * 1024 * 1024 for size in sizes), + "files_over_50mb": sum(size >= 50 * 1024 * 1024 for size in sizes), + } + + +def query_one(db_path: Path, sql: str) -> list[Any]: + if not db_path.exists(): + return [] + with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn: + return list(conn.execute(sql).fetchone() or []) + + +def query_all(db_path: Path, sql: str) -> list[list[Any]]: + if not db_path.exists(): + return [] + with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn: + return [list(row) for row in conn.execute(sql).fetchall()] + + +def build_report(codex_home: Path) -> dict[str, Any]: + sessions_dir = codex_home / "sessions" + state_db = codex_home / "state_5.sqlite" + logs_db = codex_home / "logs_2.sqlite" + logs_wal = codex_home / "logs_2.sqlite-wal" + thread_rows = query_all( + state_db, + "select archived, count(*) from threads group by archived order by archived", + ) + log_summary = query_one( + logs_db, + "select count(*), coalesce(sum(estimated_bytes), 0) from logs", + ) + top_log_targets = query_all( + logs_db, + """ + select target, count(*) as rows, coalesce(sum(estimated_bytes), 0) as bytes + from logs + group by target + order by bytes desc + limit 8 + """, + ) + return { + "schema_version": "cto-codex-retention-pressure.v1", + "codex_home": str(codex_home), + "metadata_only": True, + "raw_transcript_bodies_read": False, + "mutation_performed": False, + "storage": { + "codex_home_bytes": bytes_on_disk(codex_home), + "sessions_bytes": bytes_on_disk(sessions_dir), + "logs_sqlite_bytes": bytes_on_disk(logs_db), + "logs_sqlite_wal_bytes": bytes_on_disk(logs_wal), + }, + "sessions": count_session_files(sessions_dir), + "threads_by_archive_state": [ + {"archived": bool(row[0]), "thread_count": row[1]} for row in thread_rows + ], + "logs": { + "row_count": log_summary[0] if log_summary else 0, + "estimated_bytes": log_summary[1] if len(log_summary) > 1 else 0, + "top_targets": [ + {"target": row[0], "rows": row[1], "estimated_bytes": row[2]} + for row in top_log_targets + ], + }, + "safe_default": "Use `codex exec --ephemeral` for disposable non-interactive runs.", + "blocked_without_approval": [ + "direct `threads.archived` updates", + "session JSONL deletion", + "logs table deletion", + "SQLite vacuum or checkpoint", + "raw transcript import into Cortex OS Core", + ], + } + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--codex-home", + default=os.environ.get("CODEX_HOME", str(Path.home() / ".codex")), + help="Codex home directory; default CODEX_HOME or ~/.codex.", + ) + args = parser.parse_args() + print(json.dumps(build_report(Path(args.codex_home).expanduser()), indent=2, sort_keys=True)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())