123 lines
3.9 KiB
Python
123 lines
3.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Report Codex retention pressure without reading transcript bodies."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
def bytes_on_disk(path: Path) -> int:
|
|
if not path.exists():
|
|
return 0
|
|
if path.is_file():
|
|
return path.stat().st_size
|
|
total = 0
|
|
for item in path.rglob("*"):
|
|
if item.is_file():
|
|
total += item.stat().st_size
|
|
return total
|
|
|
|
|
|
def count_session_files(sessions_dir: Path) -> dict[str, Any]:
|
|
files = list(sessions_dir.rglob("*.jsonl")) if sessions_dir.exists() else []
|
|
sizes = [path.stat().st_size for path in files]
|
|
return {
|
|
"jsonl_file_count": len(files),
|
|
"bytes": sum(sizes),
|
|
"files_over_10mb": sum(size >= 10 * 1024 * 1024 for size in sizes),
|
|
"files_over_50mb": sum(size >= 50 * 1024 * 1024 for size in sizes),
|
|
}
|
|
|
|
|
|
def query_one(db_path: Path, sql: str) -> list[Any]:
|
|
if not db_path.exists():
|
|
return []
|
|
with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn:
|
|
return list(conn.execute(sql).fetchone() or [])
|
|
|
|
|
|
def query_all(db_path: Path, sql: str) -> list[list[Any]]:
|
|
if not db_path.exists():
|
|
return []
|
|
with sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) as conn:
|
|
return [list(row) for row in conn.execute(sql).fetchall()]
|
|
|
|
|
|
def build_report(codex_home: Path) -> dict[str, Any]:
|
|
sessions_dir = codex_home / "sessions"
|
|
state_db = codex_home / "state_5.sqlite"
|
|
logs_db = codex_home / "logs_2.sqlite"
|
|
logs_wal = codex_home / "logs_2.sqlite-wal"
|
|
thread_rows = query_all(
|
|
state_db,
|
|
"select archived, count(*) from threads group by archived order by archived",
|
|
)
|
|
log_summary = query_one(
|
|
logs_db,
|
|
"select count(*), coalesce(sum(estimated_bytes), 0) from logs",
|
|
)
|
|
top_log_targets = query_all(
|
|
logs_db,
|
|
"""
|
|
select target, count(*) as rows, coalesce(sum(estimated_bytes), 0) as bytes
|
|
from logs
|
|
group by target
|
|
order by bytes desc
|
|
limit 8
|
|
""",
|
|
)
|
|
return {
|
|
"schema_version": "cto-codex-retention-pressure.v1",
|
|
"codex_home": str(codex_home),
|
|
"metadata_only": True,
|
|
"raw_transcript_bodies_read": False,
|
|
"mutation_performed": False,
|
|
"storage": {
|
|
"codex_home_bytes": bytes_on_disk(codex_home),
|
|
"sessions_bytes": bytes_on_disk(sessions_dir),
|
|
"logs_sqlite_bytes": bytes_on_disk(logs_db),
|
|
"logs_sqlite_wal_bytes": bytes_on_disk(logs_wal),
|
|
},
|
|
"sessions": count_session_files(sessions_dir),
|
|
"threads_by_archive_state": [
|
|
{"archived": bool(row[0]), "thread_count": row[1]} for row in thread_rows
|
|
],
|
|
"logs": {
|
|
"row_count": log_summary[0] if log_summary else 0,
|
|
"estimated_bytes": log_summary[1] if len(log_summary) > 1 else 0,
|
|
"top_targets": [
|
|
{"target": row[0], "rows": row[1], "estimated_bytes": row[2]}
|
|
for row in top_log_targets
|
|
],
|
|
},
|
|
"safe_default": "Use `codex exec --ephemeral` for disposable non-interactive runs.",
|
|
"blocked_without_approval": [
|
|
"direct `threads.archived` updates",
|
|
"session JSONL deletion",
|
|
"logs table deletion",
|
|
"SQLite vacuum or checkpoint",
|
|
"raw transcript import into Cortex OS Core",
|
|
],
|
|
}
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"--codex-home",
|
|
default=os.environ.get("CODEX_HOME", str(Path.home() / ".codex")),
|
|
help="Codex home directory; default CODEX_HOME or ~/.codex.",
|
|
)
|
|
args = parser.parse_args()
|
|
print(json.dumps(build_report(Path(args.codex_home).expanduser()), indent=2, sort_keys=True))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|