CC: Add Codex retention dry-run packet
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
---
|
||||
name: cto-codex-retention-dry-run-packet
|
||||
tier: T1
|
||||
status: validated
|
||||
owner: jp
|
||||
source: CTO-WORK-093
|
||||
last_reviewed: 2026-06-04
|
||||
description: Local CTO dry-run packet for Codex retention pressure.
|
||||
---
|
||||
|
||||
# CTO Codex Retention Dry-Run Packet
|
||||
|
||||
Local planning SOT only. Not a Core Protocol. Not active Core authority.
|
||||
|
||||
## Claim
|
||||
|
||||
Codex retention pressure is measurable without reading raw transcripts or mutating `~/.codex`.
|
||||
|
||||
## Context
|
||||
|
||||
Core S410 already reduced Core-owned successful Validator stdout. Remaining pressure is external Codex state: session JSONL files and Codex SQLite logs. CTO records the operator-facing dry-run boundary because Codex is an admitted model provider surface, not Cortex OS authority.
|
||||
|
||||
Measured state on 2026-06-04:
|
||||
|
||||
| Surface | Value |
|
||||
| --- | --- |
|
||||
| `~/.codex` | 5.4G |
|
||||
| `~/.codex/sessions` | 2.3G |
|
||||
| session JSONL files | 1,038 |
|
||||
| JSONL files over 10MB | 51 |
|
||||
| JSONL files over 50MB | 3 |
|
||||
| `logs_2.sqlite` | 2.7G |
|
||||
| `logs_2.sqlite-wal` | 268M |
|
||||
| log rows | 1,140,357 |
|
||||
| archived threads | 0 |
|
||||
| active threads | 1,038 |
|
||||
|
||||
Largest log families are responses websocket, `codex_otel.log_only`, and `codex_otel.trace_safe`.
|
||||
|
||||
## Dry-Run Tool
|
||||
|
||||
`python3 tools/report_codex_retention_pressure.py` emits metadata-only JSON:
|
||||
|
||||
- storage bytes;
|
||||
- session JSONL counts and size bands;
|
||||
- thread archive counts;
|
||||
- log row count and top log targets;
|
||||
- blocked mutation list.
|
||||
|
||||
It does not read transcript bodies, update SQLite, delete files, vacuum databases, archive threads, mutate Core, start Runtime, read secrets, change Codex config, or claim product readiness.
|
||||
|
||||
## Policy
|
||||
|
||||
Prevention default: use `codex exec --ephemeral` for disposable non-interactive worker runs.
|
||||
|
||||
Blocked without explicit operator approval:
|
||||
|
||||
- direct `threads.archived` updates;
|
||||
- session JSONL deletion;
|
||||
- logs table deletion;
|
||||
- SQLite vacuum or checkpoint;
|
||||
- raw transcript import into Core;
|
||||
- broad cleanup of `~/.codex`.
|
||||
|
||||
## Decision
|
||||
|
||||
Use the dry-run report as the next operator gate. If cleanup is approved later, execute it as backup-first, archive-first, delete-last work. Core remains out of raw transcript retention authority.
|
||||
|
||||
## New Issues
|
||||
|
||||
- must-fix: existing Codex state has 1,038 active threads and no archived threads.
|
||||
- follow-up: check whether Codex 0.137.0 adds native retention commands before custom cleanup.
|
||||
@@ -461,3 +461,8 @@ items:
|
||||
status: validated
|
||||
source: .sot/03-PROTOCOLS/CTO-CORE-ROUTE-ADMISSION-GUARD-ISSUES.md
|
||||
owner: ""
|
||||
- id: CTO-WORK-093
|
||||
title: Codex Retention Dry-Run Packet
|
||||
status: validated
|
||||
source: .sot/03-PROTOCOLS/CTO-CODEX-RETENTION-DRY-RUN-PACKET.md
|
||||
owner: ""
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Report Codex retention pressure without reading transcript bodies."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def bytes_on_disk(path: Path) -> int:
|
||||
if not path.exists():
|
||||
return 0
|
||||
if path.is_file():
|
||||
return path.stat().st_size
|
||||
total = 0
|
||||
for item in path.rglob("*"):
|
||||
if item.is_file():
|
||||
total += item.stat().st_size
|
||||
return total
|
||||
|
||||
|
||||
def count_session_files(sessions_dir: Path) -> dict[str, Any]:
|
||||
files = list(sessions_dir.rglob("*.jsonl")) if sessions_dir.exists() else []
|
||||
sizes = [path.stat().st_size for path in files]
|
||||
return {
|
||||
"jsonl_file_count": len(files),
|
||||
"bytes": sum(sizes),
|
||||
"files_over_10mb": sum(size >= 10 * 1024 * 1024 for size in sizes),
|
||||
"files_over_50mb": sum(size >= 50 * 1024 * 1024 for size in sizes),
|
||||
}
|
||||
|
||||
|
||||
def query_one(db_path: Path, sql: str) -> list[Any]:
|
||||
if not db_path.exists():
|
||||
return []
|
||||
with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn:
|
||||
return list(conn.execute(sql).fetchone() or [])
|
||||
|
||||
|
||||
def query_all(db_path: Path, sql: str) -> list[list[Any]]:
|
||||
if not db_path.exists():
|
||||
return []
|
||||
with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn:
|
||||
return [list(row) for row in conn.execute(sql).fetchall()]
|
||||
|
||||
|
||||
def build_report(codex_home: Path) -> dict[str, Any]:
|
||||
sessions_dir = codex_home / "sessions"
|
||||
state_db = codex_home / "state_5.sqlite"
|
||||
logs_db = codex_home / "logs_2.sqlite"
|
||||
logs_wal = codex_home / "logs_2.sqlite-wal"
|
||||
thread_rows = query_all(
|
||||
state_db,
|
||||
"select archived, count(*) from threads group by archived order by archived",
|
||||
)
|
||||
log_summary = query_one(
|
||||
logs_db,
|
||||
"select count(*), coalesce(sum(estimated_bytes), 0) from logs",
|
||||
)
|
||||
top_log_targets = query_all(
|
||||
logs_db,
|
||||
"""
|
||||
select target, count(*) as rows, coalesce(sum(estimated_bytes), 0) as bytes
|
||||
from logs
|
||||
group by target
|
||||
order by bytes desc
|
||||
limit 8
|
||||
""",
|
||||
)
|
||||
return {
|
||||
"schema_version": "cto-codex-retention-pressure.v1",
|
||||
"codex_home": str(codex_home),
|
||||
"metadata_only": True,
|
||||
"raw_transcript_bodies_read": False,
|
||||
"mutation_performed": False,
|
||||
"storage": {
|
||||
"codex_home_bytes": bytes_on_disk(codex_home),
|
||||
"sessions_bytes": bytes_on_disk(sessions_dir),
|
||||
"logs_sqlite_bytes": bytes_on_disk(logs_db),
|
||||
"logs_sqlite_wal_bytes": bytes_on_disk(logs_wal),
|
||||
},
|
||||
"sessions": count_session_files(sessions_dir),
|
||||
"threads_by_archive_state": [
|
||||
{"archived": bool(row[0]), "thread_count": row[1]} for row in thread_rows
|
||||
],
|
||||
"logs": {
|
||||
"row_count": log_summary[0] if log_summary else 0,
|
||||
"estimated_bytes": log_summary[1] if len(log_summary) > 1 else 0,
|
||||
"top_targets": [
|
||||
{"target": row[0], "rows": row[1], "estimated_bytes": row[2]}
|
||||
for row in top_log_targets
|
||||
],
|
||||
},
|
||||
"safe_default": "Use `codex exec --ephemeral` for disposable non-interactive runs.",
|
||||
"blocked_without_approval": [
|
||||
"direct `threads.archived` updates",
|
||||
"session JSONL deletion",
|
||||
"logs table deletion",
|
||||
"SQLite vacuum or checkpoint",
|
||||
"raw transcript import into Cortex OS Core",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--codex-home",
|
||||
default=os.environ.get("CODEX_HOME", str(Path.home() / ".codex")),
|
||||
help="Codex home directory; default CODEX_HOME or ~/.codex.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(json.dumps(build_report(Path(args.codex_home).expanduser()), indent=2, sort_keys=True))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user