CC: Add Codex retention dry-run packet

This commit is contained in:
Svrnty
2026-06-04 13:09:59 -04:00
parent 46b7296cb2
commit 7753006073
3 changed files with 199 additions and 0 deletions
@@ -0,0 +1,72 @@
---
name: cto-codex-retention-dry-run-packet
tier: T1
status: validated
owner: jp
source: CTO-WORK-093
last_reviewed: 2026-06-04
description: Local CTO dry-run packet for Codex retention pressure.
---
# CTO Codex Retention Dry-Run Packet
Local planning SOT only. Not a Core Protocol. Not active Core authority.
## Claim
Codex retention pressure is measurable without reading raw transcripts or mutating `~/.codex`.
## Context
Core S410 already reduced Core-owned successful Validator stdout. Remaining pressure is external Codex state: session JSONL files and Codex SQLite logs. CTO records the operator-facing dry-run boundary because Codex is an admitted model provider surface, not Cortex OS authority.
Measured state on 2026-06-04:
| Surface | Value |
| --- | --- |
| `~/.codex` | 5.4G |
| `~/.codex/sessions` | 2.3G |
| session JSONL files | 1,038 |
| JSONL files over 10MB | 51 |
| JSONL files over 50MB | 3 |
| `logs_2.sqlite` | 2.7G |
| `logs_2.sqlite-wal` | 268M |
| log rows | 1,140,357 |
| archived threads | 0 |
| active threads | 1,038 |
Largest log families are responses websocket, `codex_otel.log_only`, and `codex_otel.trace_safe`.
## Dry-Run Tool
`python3 tools/report_codex_retention_pressure.py` emits metadata-only JSON:
- storage bytes;
- session JSONL counts and size bands;
- thread archive counts;
- log row count and top log targets;
- blocked mutation list.
It does not read transcript bodies, update SQLite, delete files, vacuum databases, archive threads, mutate Core, start Runtime, read secrets, change Codex config, or claim product readiness.
## Policy
Prevention default: use `codex exec --ephemeral` for disposable non-interactive worker runs.
Blocked without explicit operator approval:
- direct `threads.archived` updates;
- session JSONL deletion;
- logs table deletion;
- SQLite vacuum or checkpoint;
- raw transcript import into Core;
- broad cleanup of `~/.codex`.
## Decision
Use the dry-run report as the next operator gate. If cleanup is approved later, execute it as backup-first, archive-first, delete-last work. Core remains out of raw transcript retention authority.
## New Issues
- must-fix: existing Codex state has 1,038 active threads and no archived threads.
- follow-up: check whether Codex 0.137.0 adds native retention commands before custom cleanup.
+5
View File
@@ -461,3 +461,8 @@ items:
status: validated
source: .sot/03-PROTOCOLS/CTO-CORE-ROUTE-ADMISSION-GUARD-ISSUES.md
owner: ""
- id: CTO-WORK-093
title: Codex Retention Dry-Run Packet
status: validated
source: .sot/03-PROTOCOLS/CTO-CODEX-RETENTION-DRY-RUN-PACKET.md
owner: ""
+122
View File
@@ -0,0 +1,122 @@
#!/usr/bin/env python3
"""Report Codex retention pressure without reading transcript bodies."""
from __future__ import annotations
import argparse
import json
import os
import sqlite3
from pathlib import Path
from typing import Any
def bytes_on_disk(path: Path) -> int:
if not path.exists():
return 0
if path.is_file():
return path.stat().st_size
total = 0
for item in path.rglob("*"):
if item.is_file():
total += item.stat().st_size
return total
def count_session_files(sessions_dir: Path) -> dict[str, Any]:
files = list(sessions_dir.rglob("*.jsonl")) if sessions_dir.exists() else []
sizes = [path.stat().st_size for path in files]
return {
"jsonl_file_count": len(files),
"bytes": sum(sizes),
"files_over_10mb": sum(size >= 10 * 1024 * 1024 for size in sizes),
"files_over_50mb": sum(size >= 50 * 1024 * 1024 for size in sizes),
}
def query_one(db_path: Path, sql: str) -> list[Any]:
if not db_path.exists():
return []
with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn:
return list(conn.execute(sql).fetchone() or [])
def query_all(db_path: Path, sql: str) -> list[list[Any]]:
if not db_path.exists():
return []
with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn:
return [list(row) for row in conn.execute(sql).fetchall()]
def build_report(codex_home: Path) -> dict[str, Any]:
sessions_dir = codex_home / "sessions"
state_db = codex_home / "state_5.sqlite"
logs_db = codex_home / "logs_2.sqlite"
logs_wal = codex_home / "logs_2.sqlite-wal"
thread_rows = query_all(
state_db,
"select archived, count(*) from threads group by archived order by archived",
)
log_summary = query_one(
logs_db,
"select count(*), coalesce(sum(estimated_bytes), 0) from logs",
)
top_log_targets = query_all(
logs_db,
"""
select target, count(*) as rows, coalesce(sum(estimated_bytes), 0) as bytes
from logs
group by target
order by bytes desc
limit 8
""",
)
return {
"schema_version": "cto-codex-retention-pressure.v1",
"codex_home": str(codex_home),
"metadata_only": True,
"raw_transcript_bodies_read": False,
"mutation_performed": False,
"storage": {
"codex_home_bytes": bytes_on_disk(codex_home),
"sessions_bytes": bytes_on_disk(sessions_dir),
"logs_sqlite_bytes": bytes_on_disk(logs_db),
"logs_sqlite_wal_bytes": bytes_on_disk(logs_wal),
},
"sessions": count_session_files(sessions_dir),
"threads_by_archive_state": [
{"archived": bool(row[0]), "thread_count": row[1]} for row in thread_rows
],
"logs": {
"row_count": log_summary[0] if log_summary else 0,
"estimated_bytes": log_summary[1] if len(log_summary) > 1 else 0,
"top_targets": [
{"target": row[0], "rows": row[1], "estimated_bytes": row[2]}
for row in top_log_targets
],
},
"safe_default": "Use `codex exec --ephemeral` for disposable non-interactive runs.",
"blocked_without_approval": [
"direct `threads.archived` updates",
"session JSONL deletion",
"logs table deletion",
"SQLite vacuum or checkpoint",
"raw transcript import into Cortex OS Core",
],
}
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument(
"--codex-home",
default=os.environ.get("CODEX_HOME", str(Path.home() / ".codex")),
help="Codex home directory; default CODEX_HOME or ~/.codex.",
)
args = parser.parse_args()
print(json.dumps(build_report(Path(args.codex_home).expanduser()), indent=2, sort_keys=True))
return 0
if __name__ == "__main__":
raise SystemExit(main())