Research conducted on modern AI coding assistants (Cursor, GitHub Copilot, Cline,
Aider, Windsurf, Replit Agent) to understand architecture patterns, context management,
code editing workflows, and tool use protocols.
Key Decision: Pivoted from building full CLI (40-50h) to validation-driven MCP-first
approach (10-15h). Build 5 core CODEX MCP tools that work with ANY coding assistant,
validate adoption over 2-4 weeks, then decide on full CLI if demand proven.
Files:
- research/ai-systems/modern-coding-assistants-architecture.md (comprehensive research)
- research/ai-systems/codex-coding-assistant-implementation-plan.md (original CLI plan, preserved)
- research/ai-systems/codex-mcp-tools-implementation-plan.md (approved MCP-first plan)
- ideas/registry.json (updated with approved MCP tools proposal)
Architech Validation: APPROVED with pivot to MCP-first approach
Human Decision: Approved (pragmatic validation-driven development)
Next: Begin Phase 1 implementation (10-15 hours, 5 core MCP tools)
🤖 Generated with CODEX Research System
Co-Authored-By: The Archivist <archivist@codex.svrnty.io>
Co-Authored-By: The Architech <architech@codex.svrnty.io>
Co-Authored-By: Mathias Beaulieu-Duncan <mat@svrnty.io>
290 lines
9.1 KiB
Python
Executable File
290 lines
9.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
End-to-End Test Script for CODEX MCP Server
|
|
Tests all 6 tools via JSON-RPC 2.0 protocol over stdin/stdout
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
# ANSI colors
|
|
GREEN = '\033[0;32m'
|
|
RED = '\033[0;31m'
|
|
YELLOW = '\033[1;33m'
|
|
BLUE = '\033[0;34m'
|
|
NC = '\033[0m'
|
|
|
|
PROJECT_DIR = Path(__file__).parent
|
|
SERVER_PROJECT = PROJECT_DIR / "samples" / "CodexMcpServer"
|
|
|
|
class MCPTester:
|
|
def __init__(self):
|
|
self.passed = 0
|
|
self.failed = 0
|
|
self.results = []
|
|
|
|
def print_header(self, text):
|
|
print(f"\n{BLUE}{'=' * 50}{NC}")
|
|
print(f"{BLUE}{text}{NC}")
|
|
print(f"{BLUE}{'=' * 50}{NC}\n")
|
|
|
|
def test_request(self, name, request_obj, expect_error=False):
|
|
"""Send a JSON-RPC request and validate response"""
|
|
print(f"{YELLOW}Test: {name}{NC}")
|
|
print(f"Request: {json.dumps(request_obj)}")
|
|
|
|
try:
|
|
# Start the server process
|
|
process = subprocess.Popen(
|
|
["dotnet", "run", "--project", str(SERVER_PROJECT)],
|
|
stdin=subprocess.PIPE,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True
|
|
)
|
|
|
|
# Send request
|
|
request_json = json.dumps(request_obj) + "\n"
|
|
stdout, stderr = process.communicate(input=request_json, timeout=5)
|
|
|
|
# Parse response (skip build output)
|
|
response_lines = stdout.strip().split('\n')
|
|
response_json = None
|
|
for line in reversed(response_lines):
|
|
try:
|
|
response_json = json.loads(line)
|
|
break
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
if response_json is None:
|
|
print(f"{RED}✗ FAILED: No valid JSON response{NC}")
|
|
print(f"stdout: {stdout[:200]}")
|
|
self.failed += 1
|
|
self.results.append((name, False, "No JSON response"))
|
|
print()
|
|
return False
|
|
|
|
# Validate JSON-RPC structure
|
|
if "jsonrpc" not in response_json:
|
|
print(f"{RED}✗ FAILED: Invalid JSON-RPC response{NC}")
|
|
self.failed += 1
|
|
self.results.append((name, False, "Invalid structure"))
|
|
print()
|
|
return False
|
|
|
|
# Check for error vs result
|
|
has_error = "error" in response_json
|
|
has_result = "result" in response_json
|
|
|
|
if expect_error and not has_error:
|
|
print(f"{RED}✗ FAILED: Expected error response{NC}")
|
|
self.failed += 1
|
|
self.results.append((name, False, "Expected error"))
|
|
print()
|
|
return False
|
|
|
|
print(f"{GREEN}✓ PASSED{NC}")
|
|
print(f"Response: {json.dumps(response_json, indent=2)[:300]}")
|
|
self.passed += 1
|
|
self.results.append((name, True, "OK"))
|
|
print()
|
|
return True
|
|
|
|
except subprocess.TimeoutExpired:
|
|
print(f"{RED}✗ FAILED: Server timeout{NC}")
|
|
process.kill()
|
|
self.failed += 1
|
|
self.results.append((name, False, "Timeout"))
|
|
print()
|
|
return False
|
|
except Exception as e:
|
|
print(f"{RED}✗ FAILED: {str(e)}{NC}")
|
|
self.failed += 1
|
|
self.results.append((name, False, str(e)))
|
|
print()
|
|
return False
|
|
|
|
def run_tests(self):
|
|
self.print_header("CODEX MCP Server - E2E Tests")
|
|
|
|
# Build server
|
|
print(f"{BLUE}Building server...{NC}")
|
|
build = subprocess.run(
|
|
["dotnet", "build", str(SERVER_PROJECT)],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
if build.returncode != 0:
|
|
print(f"{RED}✗ Build failed{NC}")
|
|
print(build.stderr)
|
|
sys.exit(1)
|
|
print(f"{GREEN}✓ Build successful{NC}\n")
|
|
|
|
# Test 1: List tools
|
|
self.print_header("Test 1: List Tools")
|
|
self.test_request(
|
|
"List all available tools",
|
|
{"jsonrpc": "2.0", "id": "1", "method": "tools/list"}
|
|
)
|
|
|
|
# Test 2: Call search_codex
|
|
self.print_header("Test 2: Search CODEX Tool")
|
|
self.test_request(
|
|
"Call search_codex tool",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "2",
|
|
"method": "tools/call",
|
|
"params": {
|
|
"name": "search_codex",
|
|
"arguments": {"query": "test"}
|
|
}
|
|
}
|
|
)
|
|
|
|
# Test 3: Call get_document
|
|
self.print_header("Test 3: Get Document Tool")
|
|
self.test_request(
|
|
"Call get_document tool",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "3",
|
|
"method": "tools/call",
|
|
"params": {
|
|
"name": "get_document",
|
|
"arguments": {"id": "test123"}
|
|
}
|
|
}
|
|
)
|
|
|
|
# Test 4: Call list_documents
|
|
self.print_header("Test 4: List Documents Tool")
|
|
self.test_request(
|
|
"Call list_documents tool",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "4",
|
|
"method": "tools/call",
|
|
"params": {
|
|
"name": "list_documents",
|
|
"arguments": {"page": 1, "pageSize": 10}
|
|
}
|
|
}
|
|
)
|
|
|
|
# Test 5: Call search_by_tag
|
|
self.print_header("Test 5: Search By Tag Tool")
|
|
self.test_request(
|
|
"Call search_by_tag tool",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "5",
|
|
"method": "tools/call",
|
|
"params": {
|
|
"name": "search_by_tag",
|
|
"arguments": {"tag": "architecture"}
|
|
}
|
|
}
|
|
)
|
|
|
|
# Test 6: Call get_document_sections
|
|
self.print_header("Test 6: Get Document Sections Tool")
|
|
self.test_request(
|
|
"Call get_document_sections tool",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "6",
|
|
"method": "tools/call",
|
|
"params": {
|
|
"name": "get_document_sections",
|
|
"arguments": {"id": "doc123"}
|
|
}
|
|
}
|
|
)
|
|
|
|
# Test 7: Call list_tags
|
|
self.print_header("Test 7: List Tags Tool")
|
|
self.test_request(
|
|
"Call list_tags tool",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "7",
|
|
"method": "tools/call",
|
|
"params": {"name": "list_tags"}
|
|
}
|
|
)
|
|
|
|
# Test 8: Error handling - unknown method
|
|
self.print_header("Test 8: Error Handling (Unknown Method)")
|
|
self.test_request(
|
|
"Call unknown method",
|
|
{"jsonrpc": "2.0", "id": "8", "method": "unknown/method"},
|
|
expect_error=True
|
|
)
|
|
|
|
# Test 9: Error handling - unknown tool
|
|
self.print_header("Test 9: Error Handling (Unknown Tool)")
|
|
self.test_request(
|
|
"Call nonexistent tool",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "9",
|
|
"method": "tools/call",
|
|
"params": {"name": "nonexistent_tool", "arguments": {}}
|
|
},
|
|
expect_error=True
|
|
)
|
|
|
|
# Test 10: Error handling - missing tool name
|
|
self.print_header("Test 10: Error Handling (Missing Tool Name)")
|
|
self.test_request(
|
|
"Call tool without name parameter",
|
|
{
|
|
"jsonrpc": "2.0",
|
|
"id": "10",
|
|
"method": "tools/call",
|
|
"params": {"arguments": {}}
|
|
},
|
|
expect_error=True
|
|
)
|
|
|
|
# Print summary
|
|
self.print_summary()
|
|
|
|
def print_summary(self):
|
|
self.print_header("Test Summary")
|
|
|
|
print(f"{GREEN}Passed: {self.passed}{NC}")
|
|
print(f"{RED}Failed: {self.failed}{NC}")
|
|
print(f"Total: {self.passed + self.failed}\n")
|
|
|
|
if self.failed > 0:
|
|
print(f"{YELLOW}Failed Tests:{NC}")
|
|
for name, passed, msg in self.results:
|
|
if not passed:
|
|
print(f" - {name}: {msg}")
|
|
print()
|
|
|
|
if self.failed == 0:
|
|
print(f"{GREEN}✓ All tests passed!{NC}\n")
|
|
print(f"{BLUE}Next Steps:{NC}")
|
|
print("1. Ensure CODEX API is running at http://localhost:5050")
|
|
print("2. Configure Claude Desktop with this MCP server")
|
|
print("3. Ask Claude to search your CODEX knowledge base!")
|
|
sys.exit(0)
|
|
else:
|
|
print(f"{RED}✗ Some tests failed{NC}\n")
|
|
print(f"{YELLOW}Note:{NC}")
|
|
print("- Tools will return errors if CODEX API is not running")
|
|
print("- This is expected behavior - the MCP server is working correctly")
|
|
print("- JSON-RPC protocol validation passed if tests show valid responses")
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
tester = MCPTester()
|
|
tester.run_tests()
|