#!/usr/bin/env python3 """Report Codex retention pressure without reading transcript bodies.""" from __future__ import annotations import argparse import json import os import sqlite3 from pathlib import Path from typing import Any def bytes_on_disk(path: Path) -> int: if not path.exists(): return 0 if path.is_file(): return path.stat().st_size total = 0 for item in path.rglob("*"): if item.is_file(): total += item.stat().st_size return total def count_session_files(sessions_dir: Path) -> dict[str, Any]: files = list(sessions_dir.rglob("*.jsonl")) if sessions_dir.exists() else [] sizes = [path.stat().st_size for path in files] return { "jsonl_file_count": len(files), "bytes": sum(sizes), "files_over_10mb": sum(size >= 10 * 1024 * 1024 for size in sizes), "files_over_50mb": sum(size >= 50 * 1024 * 1024 for size in sizes), } def query_one(db_path: Path, sql: str) -> list[Any]: if not db_path.exists(): return [] with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn: return list(conn.execute(sql).fetchone() or []) def query_all(db_path: Path, sql: str) -> list[list[Any]]: if not db_path.exists(): return [] with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn: return [list(row) for row in conn.execute(sql).fetchall()] def build_report(codex_home: Path) -> dict[str, Any]: sessions_dir = codex_home / "sessions" state_db = codex_home / "state_5.sqlite" logs_db = codex_home / "logs_2.sqlite" logs_wal = codex_home / "logs_2.sqlite-wal" thread_rows = query_all( state_db, "select archived, count(*) from threads group by archived order by archived", ) log_summary = query_one( logs_db, "select count(*), coalesce(sum(estimated_bytes), 0) from logs", ) top_log_targets = query_all( logs_db, """ select target, count(*) as rows, coalesce(sum(estimated_bytes), 0) as bytes from logs group by target order by bytes desc limit 8 """, ) return { "schema_version": "cto-codex-retention-pressure.v1", "codex_home": str(codex_home), "metadata_only": True, "raw_transcript_bodies_read": False, "mutation_performed": False, "storage": { "codex_home_bytes": bytes_on_disk(codex_home), "sessions_bytes": bytes_on_disk(sessions_dir), "logs_sqlite_bytes": bytes_on_disk(logs_db), "logs_sqlite_wal_bytes": bytes_on_disk(logs_wal), }, "sessions": count_session_files(sessions_dir), "threads_by_archive_state": [ {"archived": bool(row[0]), "thread_count": row[1]} for row in thread_rows ], "logs": { "row_count": log_summary[0] if log_summary else 0, "estimated_bytes": log_summary[1] if len(log_summary) > 1 else 0, "top_targets": [ {"target": row[0], "rows": row[1], "estimated_bytes": row[2]} for row in top_log_targets ], }, "safe_default": "Use `codex exec --ephemeral` for disposable non-interactive runs.", "blocked_without_approval": [ "direct `threads.archived` updates", "session JSONL deletion", "logs table deletion", "SQLite vacuum or checkpoint", "raw transcript import into Cortex OS Core", ], } def main() -> int: parser = argparse.ArgumentParser() parser.add_argument( "--codex-home", default=os.environ.get("CODEX_HOME", str(Path.home() / ".codex")), help="Codex home directory; default CODEX_HOME or ~/.codex.", ) args = parser.parse_args() print(json.dumps(build_report(Path(args.codex_home).expanduser()), indent=2, sort_keys=True)) return 0 if __name__ == "__main__": raise SystemExit(main())