Skip to content

Commit 213f1d9

Browse files
committed
- Remove the per-file cap from _around_one_file; cap only at op_around's
single-file and dir-aggregate returns. Fixes double-capping (per-file + aggregate both 16KB) and interleaved truncation footers in dir fan-out. - _cap_context_window: nl >= 0 (was > 0) so a newline at offset 0 still trims; consistent byte-accounting for the dropped count. - Tests: +dir-aggregate cap (single footer), +single-giant-line no-newline. - Docs: note grep:PATTERN:PATH:LIMIT:CONTEXT shares the grep_around cap; align .supertool.json descriptions. Co-Authored-By: Max <noreply>
1 parent 12d2409 commit 213f1d9

4 files changed

Lines changed: 35 additions & 6 deletions

File tree

.supertool.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,13 @@
5252
},
5353
"around": {
5454
"syntax": "around:PATTERN:PATH[:N]",
55-
"description": "First match + N lines ctx (def 10). 1-shot grep+read. Output capped ~16KB (max_bytes)",
55+
"description": "First match + N lines ctx (def 10). 1-shot grep+read. Output capped ~16KB (max_bytes) — truncates at line boundary",
5656
"example": "around:def dispatch:supertool.py:15",
5757
"hint": true
5858
},
5959
"grep_around": {
6060
"syntax": "grep_around:PATTERN:PATH[:N[:LIMIT]]",
61-
"description": "Every match + N ctx lines (def 3, limit 10). Bulk usage scan. Output capped ~16KB (max_bytes)",
61+
"description": "Every match + N ctx lines (def 3, limit 10). Bulk usage scan. Output capped ~16KB (max_bytes) — truncates at line boundary",
6262
"example": "grep_around:def dispatch:supertool.py:5",
6363
"hint": true
6464
},

docs/operations/search.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Pattern-based ops for finding content across files or zooming into a known locat
1818

1919
A large `:N` context on a file of long (e.g. minified) lines can over-fetch — one op can dump hundreds of KB and blow your context budget. `around:` and `grep_around:` cap their output at **~16KB**, truncating at a line boundary with a footer that points at the narrower tools (smaller `:N`, or `between:` for a whole symbol). Tune via `builtin-ops.around.max_bytes` / `builtin-ops.grep_around.max_bytes` in `.supertool.json` (or `SUPERTOOL_AROUND_MAX_BYTES` / `SUPERTOOL_GREP_AROUND_MAX_BYTES`). See [configuration.md](../configuration.md#builtin-ops).
2020

21+
`grep:` with an explicit `CONTEXT` argument (`grep:PATTERN:PATH:LIMIT:CONTEXT`) shares the `grep_around:` code path, so it is capped under the same `grep_around.max_bytes` budget. Plain `grep:` (no context) is unaffected — it has its own `LIMIT`/`max_results` bound.
22+
2123
## Common patterns
2224

2325
Find all usages of a function across a codebase, with 2 lines of context:

supertool.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,10 +1225,13 @@ def _cap_context_window(text: str, op_name: str) -> str:
12251225
if len(encoded) <= cap:
12261226
return text
12271227
clipped = encoded[:cap].decode("utf-8", errors="ignore")
1228+
# Truncate at the last line boundary so we never cut mid-line. nl == -1
1229+
# means a single line longer than the cap — nothing to trim to, pass the
1230+
# partial through (the footer still flags it).
12281231
nl = clipped.rfind("\n")
1229-
if nl > 0:
1232+
if nl >= 0:
12301233
clipped = clipped[:nl + 1]
1231-
dropped = len(encoded) - len(clipped.encode("utf-8", errors="surrogateescape"))
1234+
dropped = len(encoded) - len(clipped.encode("utf-8", errors="ignore"))
12321235
return (clipped +
12331236
f"… truncated (~{dropped} more bytes) — narrow context (:N) "
12341237
f"or use between: for the whole symbol\n")
@@ -1272,7 +1275,7 @@ def _around_one_file(regex: "re.Pattern[str]", path: str, n: int) -> str:
12721275
marker = "→" if i == match_lineno else " "
12731276
out.append(f"{i + 1:>6}{marker}{lines[i]}")
12741277
out.append("\n")
1275-
return _cap_context_window("".join(out), "around")
1278+
return "".join(out)
12761279

12771280

12781281
def op_around(pattern: str, path: str, n: int = 10) -> str:
@@ -1331,7 +1334,7 @@ def op_around(pattern: str, path: str, n: int = 10) -> str:
13311334
rendered = _around_one_file(regex, path, n)
13321335
if not rendered:
13331336
return f"(no match for {pattern!r} in {path})\n\n"
1334-
return rendered
1337+
return _cap_context_window(rendered, "around")
13351338

13361339

13371340
def op_between_symbol(symbol: str, path: str) -> str:

tests/test_around_byte_cap_241.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,27 @@ def test_around_env_override(tmp_path: Path, monkeypatch) -> None:
7171
out = supertool.op_around("TARGET20", str(f), 40)
7272
assert len(out.encode()) <= 4000 + 200, f"env cap ignored: {len(out)} bytes"
7373
assert "truncated" in out
74+
75+
76+
def test_around_dir_aggregate_capped(tmp_path: Path) -> None:
77+
"""Dir fan-out caps the TOTAL op output, not just each file (#241 review)."""
78+
d = tmp_path / "pkg"
79+
d.mkdir()
80+
for i in range(10):
81+
(d / f"f{i}.js").write_text(f"head{i}\n{LONG} TARGET\n{LONG}\n")
82+
out = supertool.op_around("TARGET", str(d), 20)
83+
assert len(out.encode()) <= 16000 + 200, f"dir aggregate not capped: {len(out)}"
84+
assert "truncated" in out
85+
# Exactly one truncation footer — the per-file cap was removed, so footers
86+
# don't appear interleaved between files.
87+
assert out.count("… truncated") == 1
88+
89+
90+
def test_around_single_giant_line_no_newline(tmp_path: Path) -> None:
91+
"""A single line longer than the cap: pass the partial through + footer,
92+
never crash (nl == -1 branch)."""
93+
f = tmp_path / "min.js"
94+
f.write_text("TARGET " + "y" * 40000) # one line, no trailing context lines
95+
out = supertool.op_around("TARGET", str(f), 0)
96+
assert len(out.encode()) <= 16000 + 200, f"giant line not capped: {len(out)}"
97+
assert "truncated" in out

0 commit comments

Comments
 (0)