Skip to content

Commit ae74182

Browse files
committed
feat: implement real-time model performance tracking and advisory system
1 parent f70b6a3 commit ae74182

9 files changed

Lines changed: 578 additions & 3 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@ env/
1717
htmlcov/
1818
.mypy_cache/
1919
.ruff_cache/
20+
.routesmith/
2021
.DS_Store

CHANGELOG.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,20 @@
22

33
All notable changes to this project will be documented in this file.
44

5-
## [0.1.4] - Unreleased
5+
## [0.1.5] - Unreleased
6+
7+
### Added
8+
- Real-time model performance tracking with rolling-window persistence.
9+
- `PerformanceTracker` class records per-model task outcomes, durations, and success rates.
10+
- `routesmith stats` CLI command to view model performance tables and advisory.
11+
- Performance advisory automatically injected into run results when models underperform.
12+
- `PerformanceTracker` exported from the public API.
13+
14+
### Fixed
15+
- Gemini CLI adapter no longer returns a hardcoded fallback model when no environment is detected.
16+
- `SkillConfig.routing_preference` now normalizes input strings (case-insensitive, dash/space tolerant).
17+
18+
## [0.1.4] - 2026-05-07
619

720
### Added
821
- GitHub Release badge and direct release asset links in the README.

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,14 @@ Start it with `routesmith serve-stdio`.
145145

146146
This lets IDE extensions and agents call routesmith as a tool.
147147

148+
## Performance Tracking
149+
150+
routesmith records per-model task outcomes (duration, success/failure, capability class) across runs. Data is stored in `.routesmith/performance.json` and accumulates over time within a project.
151+
152+
View stats with `routesmith stats`. Filter to a specific model with `routesmith stats --model claude-sonnet-4-6`. Clear tracked data with `routesmith stats --clear`.
153+
154+
When a model's historical success rate drops below 70% or average latency exceeds 5 seconds, routesmith injects performance advisory messages into run results automatically.
155+
148156
## Install Configs for Hosts
149157

150158
Generate host-specific configuration files:
@@ -197,7 +205,7 @@ Development setup:
197205
- [x] Cost-aware routing
198206
- [x] Python policy plugins
199207
- [x] Gemini CLI host adapter
200-
- [ ] Real-time model performance tracking
208+
- [x] Real-time model performance tracking
201209
- [ ] Additional host adapters
202210

203211
## License

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "routesmith"
7-
version = "0.1.4"
7+
version = "0.1.5"
88
description = "Auto-route coding agent tasks to the best model in your IDE. Python library + MCP server for Claude Code, Codex, Gemini CLI, Copilot, Cursor, and Aider."
99
readme = "README.md"
1010
license = "MIT"

src/routesmith/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
)
1919
from routesmith.executor import Executor
2020
from routesmith.planner import Planner
21+
from routesmith.performance import PerformanceTracker
2122
from routesmith.policy_plugins import BasePolicyPlugin, PolicyPluginContext, PolicyPluginResult
2223
from routesmith.router import Router
2324
from routesmith.hosts.detector import detect_host, get_host_capabilities
@@ -64,6 +65,7 @@ def _read_version_from_pyproject() -> str:
6465
"PolicyPluginResult",
6566
"Executor",
6667
"Planner",
68+
"PerformanceTracker",
6769
"Router",
6870
]
6971

src/routesmith/cli.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,59 @@ def serve_stdio_cmd() -> None:
238238
run_stdio_server()
239239

240240

241+
@app.command("stats")
242+
def stats_cmd(
243+
model: str | None = typer.Option(None, "--model", "-m", help="Filter stats by model name."),
244+
clear: bool = typer.Option(False, "--clear", help="Clear all tracked performance data."),
245+
) -> None:
246+
"""Show real-time model performance statistics."""
247+
from routesmith.performance import PerformanceTracker
248+
249+
tracker = PerformanceTracker()
250+
251+
if clear:
252+
tracker.clear()
253+
console.print("[green]Performance data cleared.[/green]")
254+
return
255+
256+
stats = tracker.get_model_stats(model=model)
257+
if not stats:
258+
console.print("[dim]No performance data recorded yet. Run some tasks first.[/dim]")
259+
return
260+
261+
table = Table(title="Model Performance")
262+
table.add_column("Model", style="cyan")
263+
table.add_column("Tasks", justify="right")
264+
table.add_column("Success", justify="right", style="green")
265+
table.add_column("Fail", justify="right", style="red")
266+
table.add_column("Rate", justify="right")
267+
table.add_column("Avg (ms)", justify="right")
268+
table.add_column("Min (ms)", justify="right")
269+
table.add_column("Max (ms)", justify="right")
270+
271+
for s in stats:
272+
rate_color = "green" if s.success_rate >= 0.9 else ("yellow" if s.success_rate >= 0.7 else "red")
273+
table.add_row(
274+
s.model,
275+
str(s.total_tasks),
276+
str(s.successes),
277+
str(s.failures),
278+
f"[{rate_color}]{s.success_rate:.0%}[/{rate_color}]",
279+
f"{s.avg_duration_ms:.1f}",
280+
f"{s.min_duration_ms:.1f}",
281+
f"{s.max_duration_ms:.1f}",
282+
)
283+
284+
console.print(table)
285+
286+
# Show advisory if any
287+
advisory = tracker.get_performance_advisory()
288+
if advisory:
289+
console.print()
290+
for msg in advisory:
291+
console.print(f" [yellow]⚠[/yellow] {msg}")
292+
293+
241294
# Install sub-commands
242295
@install_app.callback(invoke_without_command=True)
243296
def install_default(

src/routesmith/executor.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from routesmith.config import load_config
99
from routesmith.hosts.detector import detect_host, get_host_adapter
1010
from routesmith.metrics import RouteMetrics, compute_metrics
11+
from routesmith.performance import PerformanceTracker
1112
from routesmith.planner import Planner
1213
from routesmith.review import review_plan, review_results
1314
from routesmith.router import Router
@@ -27,6 +28,7 @@ class Executor:
2728
def __init__(self, config: SkillConfig | None = None) -> None:
2829
self.config = config or load_config()
2930
self.planner = Planner()
31+
self.performance_tracker = PerformanceTracker()
3032

3133
def explain(self, prompt: str) -> RoutePlan:
3234
"""Explain the route plan without executing."""
@@ -117,6 +119,14 @@ def run(
117119
metrics=metrics.model_dump(),
118120
)
119121

122+
# Record performance data
123+
self.performance_tracker.record_run(plan, task_results)
124+
125+
# Inject performance advisory
126+
perf_advisory = self.performance_tracker.get_performance_advisory()
127+
if perf_advisory:
128+
result.advisory.extend(perf_advisory)
129+
120130
# Persist route if configured
121131
if self.config.save_routes:
122132
save_route(plan, task_results, metrics, self.config.routes_dir)

0 commit comments

Comments
 (0)