Skip to content

Commit dced977

Browse files
authored
feat: per-op byte cap for around:/grep_around: (#241) (#262)
* feat: per-op byte cap for around:/grep_around: (#241) A large :N context on a file of long (minified) lines could dump hundreds of KB in one op, blowing the caller's context budget (~340KB observed). Add _cap_context_window: cap output at 16KB (configurable via builtin-ops.<op>.max_bytes / SUPERTOOL_<OP>_MAX_BYTES), truncate at a line boundary, append a footer pointing at the narrower tools (smaller :N, between:). Applied to around: (single-file + dir-aggregate) and the op_grep context branch (grep_around: and grep:-with-context). 6 TDD tests (red proven by neutralizing the cap). Docs: configuration.md table, operations/search.md cap section, .supertool[.example].json entries. Co-Authored-By: Max <noreply> * fix: address #262 review — dir-aggregate cap, boundary, grep:context docs - Remove the per-file cap from _around_one_file; cap only at op_around's single-file and dir-aggregate returns. Fixes double-capping (per-file + aggregate both 16KB) and interleaved truncation footers in dir fan-out. - _cap_context_window: nl >= 0 (was > 0) so a newline at offset 0 still trims; consistent byte-accounting for the dropped count. - Tests: +dir-aggregate cap (single footer), +single-giant-line no-newline. - Docs: note grep:PATTERN:PATH:LIMIT:CONTEXT shares the grep_around cap; align .supertool.json descriptions. Co-Authored-By: Max <noreply>
1 parent 07da665 commit dced977

7 files changed

Lines changed: 146 additions & 11 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ supertool
1919
.claude/worktrees/
2020

2121
.max/
22+
.max-ci.log

.supertool.example.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,15 @@
7878
},
7979
"around": {
8080
"syntax": "around:PATTERN:PATH[:N]",
81-
"description": "First match + N lines ctx (def 10). 1-shot grep+read",
82-
"example": "around:def main:src/app/Module.py:15"
81+
"description": "First match + N lines ctx (def 10). 1-shot grep+read. Output capped ~16KB (max_bytes) — truncates at line boundary",
82+
"example": "around:def main:src/app/Module.py:15",
83+
"max_bytes": 16000
8384
},
8485
"grep_around": {
8586
"syntax": "grep_around:PATTERN:PATH[:N[:LIMIT]]",
86-
"description": "Every match + N ctx lines (def 3, limit 10). Bulk usage scan",
87-
"example": "grep_around:CommandDriveCreateFile:src/:5"
87+
"description": "Every match + N ctx lines (def 3, limit 10). Bulk usage scan. Output capped ~16KB (max_bytes) — truncates at line boundary",
88+
"example": "grep_around:CommandDriveCreateFile:src/:5",
89+
"max_bytes": 16000
8890
},
8991
"glob": {
9092
"syntax": "glob:PATTERN",

.supertool.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,13 @@
5252
},
5353
"around": {
5454
"syntax": "around:PATTERN:PATH[:N]",
55-
"description": "First match + N lines ctx (def 10). 1-shot grep+read",
55+
"description": "First match + N lines ctx (def 10). 1-shot grep+read. Output capped ~16KB (max_bytes) — truncates at line boundary",
5656
"example": "around:def dispatch:supertool.py:15",
5757
"hint": true
5858
},
5959
"grep_around": {
6060
"syntax": "grep_around:PATTERN:PATH[:N[:LIMIT]]",
61-
"description": "Every match + N ctx lines (def 3, limit 10). Bulk usage scan",
61+
"description": "Every match + N ctx lines (def 3, limit 10). Bulk usage scan. Output capped ~16KB (max_bytes) — truncates at line boundary",
6262
"example": "grep_around:def dispatch:supertool.py:5",
6363
"hint": true
6464
},

docs/configuration.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ Entries document built-in operations (`syntax`, `description`, `example`). Set `
9191
| `read` | `max_bytes` | 20000 | Max bytes per read (truncates at cap) |
9292
| `grep` | `max_results` | 10 | Default result limit when not specified in the op |
9393
| `grep` | `extensions` | `[]` (all files) | Restrict grep to these file patterns (e.g. `["*.py", "*.js"]`). Empty = search all files |
94+
| `around` | `max_bytes` | 16000 | Max bytes for an `around:` context window (truncates at a line boundary) |
95+
| `grep_around` | `max_bytes` | 16000 | Max bytes for a `grep_around:` (and `grep:`-with-context) window |
9496
| `glob` | `max_results` | 50 | Max files returned |
9597

9698
Example — increase read cap and restrict grep to PHP/XML:

docs/operations/search.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,17 @@ Pattern-based ops for finding content across files or zooming into a known locat
99
| `grep` | `grep:PATTERN:PATH` or `grep:PATTERN:PATH:LIMIT` | 10 results default, code + doc extensions only. **Auto-reads** full file if PATH is a concrete file < 20KB with a match. |
1010
| `grep` (context) | `grep:PATTERN:PATH:LIMIT:CONTEXT` | Show CONTEXT lines before/after each match (like `grep -C`). Match lines: `path:lineno:content`. Context lines: `path-lineno-content`. Non-adjacent groups separated by `--`. |
1111
| `grep` (count) | `grep:PATTERN:PATH:LIMIT:CONTEXT:count` | Return match counts per file instead of content. Output: `filepath:COUNT` per line. |
12-
| `grep_around` | `grep_around:PATTERN:PATH` or `grep_around:PATTERN:PATH:N:LIMIT` | Every match across files with N lines context (default N=3, LIMIT=10). Alias for `grep:PATTERN:PATH:LIMIT:CONTEXT` with sane defaults — useful for "show me how everyone uses this". |
13-
| `around` | `around:PATTERN:PATH` or `around:PATTERN:PATH:N` | Show N lines (default 10) before and after the **first** match of PATTERN in a single file. Uses line-numbered output like `read`. |
12+
| `grep_around` | `grep_around:PATTERN:PATH` or `grep_around:PATTERN:PATH:N:LIMIT` | Every match across files with N lines context (default N=3, LIMIT=10). Alias for `grep:PATTERN:PATH:LIMIT:CONTEXT` with sane defaults — useful for "show me how everyone uses this". Output capped at ~16KB (see below). |
13+
| `around` | `around:PATTERN:PATH` or `around:PATTERN:PATH:N` | Show N lines (default 10) before and after the **first** match of PATTERN in a single file. Uses line-numbered output like `read`. Output capped at ~16KB (see below). |
1414
| `around_line` | `around_line:PATH:LINE` or `around_line:PATH:LINE:N` | Show N lines (default 10) of context around a specific line number. Target line marked with ``. |
1515
| `between` | `between:SYMBOL:PATH` or `between:re:START:END:PATH` | Return a chunk of a file. **Symbol mode (default):** full body of a named function/method/class via tree-sitter (PHP, Python, JS, TS, Go, Rust, Java, Ruby — symbols with `::` like PHP `Foo::bar` work). **Pattern mode (`re:` prefix):** inclusive line slice from first line matching START regex to first line after matching END regex (language-agnostic). |
1616

17+
## Output cap
18+
19+
A large `:N` context on a file of long (e.g. minified) lines can over-fetch — one op can dump hundreds of KB and blow your context budget. `around:` and `grep_around:` cap their output at **~16KB**, truncating at a line boundary with a footer that points at the narrower tools (smaller `:N`, or `between:` for a whole symbol). Tune via `builtin-ops.around.max_bytes` / `builtin-ops.grep_around.max_bytes` in `.supertool.json` (or `SUPERTOOL_AROUND_MAX_BYTES` / `SUPERTOOL_GREP_AROUND_MAX_BYTES`). See [configuration.md](../configuration.md#builtin-ops).
20+
21+
`grep:` with an explicit `CONTEXT` argument (`grep:PATTERN:PATH:LIMIT:CONTEXT`) shares the `grep_around:` code path, so it is capped under the same `grep_around.max_bytes` budget. Plain `grep:` (no context) is unaffected — it has its own `LIMIT`/`max_results` bound.
22+
1723
## Common patterns
1824

1925
Find all usages of a function across a codebase, with 2 lines of context:

supertool.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def _safe_relpath(path: str, start: str = ".") -> str:
151151
# Override via ops.batch.max_ops in .supertool.json for one-off bulk runs.
152152
MAX_BATCH_OPS = 1000
153153
MAX_READ_BYTES = 20000 # ~20KB cap — prevents Claude Code "Output too large"
154+
MAX_AROUND_BYTES = 16000 # per-op cap for around:/grep_around: context windows (#241)
154155
CHAR_WINDOW_CHARS = 1000 # head/tail peek window for minified single-line files
155156
MINIFIED_LINE_CHARS = 5000 # a single line this long means line-based view is useless
156157
MAX_GREP_RESULTS = 10
@@ -1181,7 +1182,7 @@ def op_grep(pattern: str, path: str = ".", limit: int = 0,
11811182
else:
11821183
out.append(f" {lineno}-{content}\n")
11831184
out.append("\n")
1184-
return "".join(out)
1185+
return _cap_context_window("".join(out), "grep_around")
11851186

11861187
hits = _grep_recursive(pattern, path, limit, excl)
11871188
count = len(hits)
@@ -1211,6 +1212,32 @@ def op_grep(pattern: str, path: str = ".", limit: int = 0,
12111212
_AROUND_DIR_MAX_FILES = 20
12121213

12131214

1215+
def _cap_context_window(text: str, op_name: str) -> str:
1216+
"""Cap a context-window op's output at a byte budget (#241).
1217+
1218+
around:/grep_around: with a large :N on a file of long (e.g. minified)
1219+
lines can emit hundreds of KB in one op, blowing the caller's context.
1220+
Truncate at the last line boundary within the cap and append a footer
1221+
that points at the narrower tools. Configurable via
1222+
builtin-ops.<op>.max_bytes or SUPERTOOL_<OP>_MAX_BYTES.
1223+
"""
1224+
cap = _get_op_int(op_name, "max_bytes", MAX_AROUND_BYTES)
1225+
encoded = text.encode("utf-8", errors="surrogateescape")
1226+
if len(encoded) <= cap:
1227+
return text
1228+
clipped = encoded[:cap].decode("utf-8", errors="ignore")
1229+
# Truncate at the last line boundary so we never cut mid-line. nl == -1
1230+
# means a single line longer than the cap — nothing to trim to, pass the
1231+
# partial through (the footer still flags it).
1232+
nl = clipped.rfind("\n")
1233+
if nl >= 0:
1234+
clipped = clipped[:nl + 1]
1235+
dropped = len(encoded) - len(clipped.encode("utf-8", errors="ignore"))
1236+
return (clipped +
1237+
f"… truncated (~{dropped} more bytes) — narrow context (:N) "
1238+
f"or use between: for the whole symbol\n")
1239+
1240+
12141241
def _around_one_file(regex: "re.Pattern[str]", path: str, n: int) -> str:
12151242
"""Render the first match of regex in file at path with n lines context.
12161243
@@ -1300,15 +1327,15 @@ def op_around(pattern: str, path: str, n: int = 10) -> str:
13001327
if len(hits) >= _AROUND_DIR_MAX_FILES:
13011328
header += f", capped at {_AROUND_DIR_MAX_FILES}"
13021329
header += f", scanned {scanned})\n"
1303-
return header + "".join(hits)
1330+
return _cap_context_window(header + "".join(hits), "around")
13041331

13051332
if not os.path.isfile(path):
13061333
return f"ERROR: file not found: {path}\n"
13071334

13081335
rendered = _around_one_file(regex, path, n)
13091336
if not rendered:
13101337
return f"(no match for {pattern!r} in {path})\n\n"
1311-
return rendered
1338+
return _cap_context_window(rendered, "around")
13121339

13131340

13141341
def op_between_symbol(symbol: str, path: str) -> str:

tests/test_around_byte_cap_241.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""Regression tests for #241 — around:/grep_around: per-op byte cap.
2+
3+
A large context (:N) on a file of long (minified) lines could dump hundreds
4+
of KB in a single op, blowing the caller's context budget. The ops now cap
5+
their output at a byte budget, truncating at a line boundary and appending a
6+
footer that points at the narrower tools (smaller :N, between:).
7+
"""
8+
from __future__ import annotations
9+
10+
from pathlib import Path
11+
12+
import supertool
13+
14+
15+
# A long single line so a few of them blow past the 16KB cap fast.
16+
LONG = "x" * 4000
17+
18+
19+
def _make_long_file(tmp_path: Path, n_lines: int = 40) -> Path:
20+
f = tmp_path / "big.js"
21+
body = "\n".join(f"line{i} {LONG} TARGET{i}" for i in range(n_lines)) + "\n"
22+
f.write_text(body)
23+
return f
24+
25+
26+
def test_around_caps_large_window(tmp_path: Path) -> None:
27+
f = _make_long_file(tmp_path)
28+
out = supertool.op_around("TARGET20", str(f), 40)
29+
assert len(out.encode()) <= 16000 + 200, f"not capped: {len(out)} bytes"
30+
assert "truncated" in out
31+
assert "between:" in out
32+
33+
34+
def test_around_truncates_at_line_boundary(tmp_path: Path) -> None:
35+
f = _make_long_file(tmp_path)
36+
out = supertool.op_around("TARGET20", str(f), 40)
37+
# Everything before the footer must end on a complete line (newline).
38+
footer_idx = out.index("… truncated")
39+
body = out[:footer_idx]
40+
assert body.endswith("\n"), "truncation cut mid-line"
41+
42+
43+
def test_around_small_file_not_capped(tmp_path: Path) -> None:
44+
f = tmp_path / "small.py"
45+
f.write_text("a\nb\nTARGET\nd\ne\n")
46+
out = supertool.op_around("TARGET", str(f), 2)
47+
assert "truncated" not in out
48+
assert "TARGET" in out
49+
50+
51+
def test_grep_around_caps_large_window(tmp_path: Path) -> None:
52+
f = _make_long_file(tmp_path)
53+
# grep_around routes through op_grep with context > 0.
54+
out = supertool.op_grep("TARGET", str(f), limit=50, context=40)
55+
assert len(out.encode()) <= 16000 + 200, f"not capped: {len(out)} bytes"
56+
assert "truncated" in out
57+
58+
59+
def test_grep_no_context_not_capped_by_window(tmp_path: Path) -> None:
60+
"""Plain grep (context=0) takes a different branch — no window footer."""
61+
f = tmp_path / "small.py"
62+
f.write_text("alpha\nTARGET here\nbeta\n")
63+
out = supertool.op_grep("TARGET", str(f), limit=10, context=0)
64+
assert "truncated (~" not in out
65+
assert "TARGET" in out
66+
67+
68+
def test_around_env_override(tmp_path: Path, monkeypatch) -> None:
69+
f = _make_long_file(tmp_path)
70+
monkeypatch.setenv("SUPERTOOL_AROUND_MAX_BYTES", "4000")
71+
out = supertool.op_around("TARGET20", str(f), 40)
72+
assert len(out.encode()) <= 4000 + 200, f"env cap ignored: {len(out)} bytes"
73+
assert "truncated" in out
74+
75+
76+
def test_around_dir_aggregate_capped(tmp_path: Path) -> None:
77+
"""Dir fan-out caps the TOTAL op output, not just each file (#241 review)."""
78+
d = tmp_path / "pkg"
79+
d.mkdir()
80+
for i in range(10):
81+
(d / f"f{i}.js").write_text(f"head{i}\n{LONG} TARGET\n{LONG}\n")
82+
out = supertool.op_around("TARGET", str(d), 20)
83+
assert len(out.encode()) <= 16000 + 200, f"dir aggregate not capped: {len(out)}"
84+
assert "truncated" in out
85+
# Exactly one truncation footer — the per-file cap was removed, so footers
86+
# don't appear interleaved between files.
87+
assert out.count("… truncated") == 1
88+
89+
90+
def test_around_single_giant_line_no_newline(tmp_path: Path) -> None:
91+
"""A single line longer than the cap: pass the partial through + footer,
92+
never crash (nl == -1 branch)."""
93+
f = tmp_path / "min.js"
94+
f.write_text("TARGET " + "y" * 40000) # one line, no trailing context lines
95+
out = supertool.op_around("TARGET", str(f), 0)
96+
assert len(out.encode()) <= 16000 + 200, f"giant line not capped: {len(out)}"
97+
assert "truncated" in out

0 commit comments

Comments
 (0)