gorecodes · gorecodes · May 23, 2026 · May 23, 2026
diff --git a/README.md b/README.md
@@ -433,8 +433,8 @@ Overlay add is disabled by default. To enable it, set `ARBOR_ENABLE_OVERLAY_ADD=
 - **Installed packages** — filter installed packages, open package details, inspect metadata, USE state, and runtime dependencies
 - **Search packages** — search the Portage tree and jump to the selected package
 - **USE flags** — inspect global USE state, package-specific overrides, installed build state, and mismatch indicators
-- **Install / Uninstall** — pretend first, stream live output, resume running jobs, and require approval before the real root action starts
-- **Autounmask flow** — for masked install targets, Arbor can write accepted keywords to `/etc/portage/package.accept_keywords`
+- **Install / Uninstall** — pretend first, stream live output, resume running jobs, and require approval before the real root action starts. After a successful pretend, a **build-time ETA badge** is shown using local `emerge.log` history. The badge colour indicates confidence: green means all packages have been built on this machine before (reliable), yellow means some packages fall back to category averages (partial), grey means no local history exists and the figure is a rough estimate only. A legend is always visible alongside the badge.
+- **Autounmask flow** — for masked or USE-constrained install targets, Arbor detects both keyword masks and required USE flag changes from the pretend output, then writes the necessary entries to `/etc/portage/package.accept_keywords/arbor-accepted` and `/etc/portage/package.use/arbor-accepted` respectively, before re-running the pretend automatically
 - **etc-update review** — after successful installs, pending `._cfg*` files can be reviewed and resolved in the UI
 - **Maintenance** — sync, check `@world`, update `@world`, run preserved-rebuild, and depclean with approval on privileged steps
 - **Overlays** — list configured overlays, sync them, remove them, and optionally add new ones with explicit danger acknowledgement plus approval

diff --git a/backend/arbor/emerge_log.py b/backend/arbor/emerge_log.py
@@ -1,5 +1,6 @@
 """
-emerge_log.py — Parse /var/log/emerge.log to compute per-category compile times.
+emerge_log.py — Parse /var/log/emerge.log to compute per-category compile times
+and per-CP ETA estimates.
 
 Runs entirely in the arbor web process (no root needed — emerge.log is 644).
 Results are cached in memory and invalidated automatically when the file changes.
@@ -17,9 +18,26 @@
 _RE_START = re.compile(r"^(\d+):\s+>>> emerge \(\d+ of \d+\) (\S+) to /")
 _RE_END   = re.compile(r"^(\d+):\s+::: completed emerge \(\d+ of \d+\) (\S+) to /")
 
-# Module-level cache: (mtime_at_last_read, result).
-# Written only by _get_cached(); safe for single-process async use.
+# Module-level caches: (mtime_at_last_read, result).
+# Written only by their respective _get_*_cached(); safe for single-process async use.
 _cache: tuple[float, dict[str, int]] | None = None
+_cp_cache: tuple[float, dict[str, list[int]]] | None = None
+
+# Keep at most this many build times per CP (most recent wins).
+_CP_MAX_SAMPLES = 5
+
+
+def _atom_to_cp(atom: str) -> str:
+    """Strip version from cat/pkg-ver → cat/pkg using portage, with regex fallback."""
+    try:
+        from portage.versions import cpv_getkey
+        cp = cpv_getkey(atom)
+        if cp:
+            return cp
+    except Exception:
+        pass
+    m = re.match(r'^([a-zA-Z0-9+_][a-zA-Z0-9+_./-]*/[a-zA-Z0-9+_][a-zA-Z0-9+_.-]*)', atom)
+    return m.group(1) if m else atom
 
 
 def _parse_emerge_log(path: Path = EMERGE_LOG) -> dict[str, int]:
@@ -64,6 +82,41 @@ def _parse_emerge_log(path: Path = EMERGE_LOG) -> dict[str, int]:
     return dict(sorted(totals.items(), key=lambda kv: kv[1], reverse=True))
 
 
+def _parse_emerge_log_per_cp(path: Path = EMERGE_LOG) -> dict[str, list[int]]:
+    """Read emerge.log and return {cp: [last N build times in seconds]}."""
+    in_progress: dict[str, int] = {}
+    cp_times: dict[str, list[int]] = {}
+
+    try:
+        with path.open("r", errors="replace", buffering=65536) as fh:
+            for line in fh:
+                m = _RE_START.match(line)
+                if m:
+                    ts, atom = int(m.group(1)), m.group(2)
+                    in_progress[atom] = ts
+                    continue
+
+                m = _RE_END.match(line)
+                if m:
+                    ts, atom = int(m.group(1)), m.group(2)
+                    start = in_progress.pop(atom, None)
+                    if start is None:
+                        continue
+                    delta = ts - start
+                    if delta <= 0:
+                        continue
+                    cp = _atom_to_cp(atom)
+                    times = cp_times.setdefault(cp, [])
+                    times.append(delta)
+                    if len(times) > _CP_MAX_SAMPLES:
+                        times.pop(0)
+
+    except (FileNotFoundError, PermissionError):
+        pass
+
+    return cp_times
+
+
 def _get_cached() -> dict[str, int]:
     """
     Return cached result if emerge.log hasn't changed since last read,
@@ -84,7 +137,88 @@ def _get_cached() -> dict[str, int]:
     return result
 
 
+def _get_cp_cached() -> dict[str, list[int]]:
+    global _cp_cache
+
+    try:
+        mtime = EMERGE_LOG.stat().st_mtime
+    except (FileNotFoundError, PermissionError):
+        return {}
+
+    if _cp_cache is not None and _cp_cache[0] == mtime:
+        return _cp_cache[1]
+
+    result = _parse_emerge_log_per_cp()
+    _cp_cache = (mtime, result)
+    return result
+
+
+def estimate_eta(atoms: list[str]) -> dict:
+    """
+    Given a list of CPV atoms (e.g. from a pretend output), return an ETA estimate.
+
+    Confidence levels (per item):
+      "exact"    — this CP has been built before on this machine
+      "category" — CP unknown, using category average
+      "global"   — category also unknown, using global average
+      "unknown"  — no history at all (fresh system)
+    """
+    cp_times = _get_cp_cached()
+
+    # Category averages as first fallback.
+    cat_sum: dict[str, float] = {}
+    cat_count: dict[str, int] = {}
+    for cp, times in cp_times.items():
+        cat = cp.split("/")[0]
+        avg = sum(times) / len(times)
+        cat_sum[cat] = cat_sum.get(cat, 0.0) + avg
+        cat_count[cat] = cat_count.get(cat, 0) + 1
+    cat_avgs: dict[str, float] = {
+        cat: cat_sum[cat] / cat_count[cat] for cat in cat_sum
+    }
+
+    # Global average as second fallback.
+    all_times = [t for times in cp_times.values() for t in times]
+    global_avg = sum(all_times) / len(all_times) if all_times else 0.0
+
+    items = []
+    total = 0
+    rough = False
+
+    for atom in atoms:
+        cp = _atom_to_cp(atom)
+        cat = cp.split("/")[0]
+
+        if cp in cp_times:
+            times = cp_times[cp]
+            secs = round(sum(times) / len(times))
+            confidence = "exact"
+        elif cat in cat_avgs:
+            secs = round(cat_avgs[cat])
+            confidence = "category"
+            rough = True
+        elif global_avg:
+            secs = round(global_avg)
+            confidence = "global"
+            rough = True
+        else:
+            secs = 0
+            confidence = "unknown"
+            rough = True
+
+        total += secs
+        items.append({"cp": cp, "seconds": secs, "confidence": confidence})
+
+    return {"total_seconds": total, "rough": rough, "items": items}
+
+
 async def compile_time_by_category() -> dict[str, int]:
     """Async entry point — offloads the blocking file read to a thread pool."""
     loop = asyncio.get_running_loop()
     return await loop.run_in_executor(None, _get_cached)
+
+
+async def compile_time_estimate(atoms: list[str]) -> dict:
+    """Async entry point for ETA estimation."""
+    loop = asyncio.get_running_loop()
+    return await loop.run_in_executor(None, estimate_eta, atoms)
diff --git a/backend/arbor/main.py b/backend/arbor/main.py
@@ -37,7 +37,7 @@
     verify_csrf_tokens,
 )
 from .daemon_client import query, query_all, query_one
-from .emerge_log import compile_time_by_category
+from .emerge_log import compile_time_by_category, compile_time_estimate
 from .local_auth import dummy_password_hash, find_user_by_username, has_local_users, mark_login_success, record_login_failure, verify_password
 from .login_throttle import login_retry_after, register_login_failure, register_login_success
 from .session import clear_session_cookie, create_session, record_step_up, revoke_all_sessions, revoke_session, set_session_cookie, session_cookie_name
@@ -983,6 +983,22 @@ async def analytics_compile_time(auth: Auth):
     return await compile_time_by_category()
 
 
+@app.post("/api/analytics/eta-estimate")
+async def analytics_eta_estimate(auth: Auth, request: Request):
+    """
+    Given a list of CPV atoms from a pretend output, return a build-time estimate.
+    Read-only: no privilege required beyond authentication.
+    """
+    body = await _json_object_body(request)
+    if isinstance(body, JSONResponse):
+        return body
+    atoms = body.get("atoms", [])
+    if not isinstance(atoms, list):
+        return JSONResponse(status_code=400, content={"error": "atoms must be a list"})
+    atoms = [str(a) for a in atoms if isinstance(a, str)][:100]
+    return await compile_time_estimate(atoms)
+
+
 @app.post("/api/emerge/etc-update/resolve")
 async def etc_update_resolve(auth: Auth, request: Request):
     require_min_role("owner")

diff --git a/backend/daemon/main.py b/backend/daemon/main.py
@@ -2239,14 +2239,15 @@ async def cmd_emerge_pretend(args):
             yield {"line": line}
         await proc.wait()
         full = "\n".join(lines)
-        # Only flag needs_unmask when emerge actually failed due to masking
+        # Only flag needs_unmask when emerge actually failed due to masking or USE changes
         needs_unmask = proc.returncode != 0 and any(s in full for s in [
             "autounmask-write",
             "package.accept_keywords",
             "package.license",
             "package.unmask",
             "missing keyword",
             "masked by: ~",
+            "USE changes are necessary",
         ])
         yield {"done": True, "returncode": proc.returncode, "needs_unmask": needs_unmask}
     finally:
@@ -2257,6 +2258,68 @@ async def cmd_emerge_pretend(args):
     r"-\s+([\w.+@/-]+(?:-[\d][\w.+@-]*)?)::\S+\s+\(masked by:\s+(~[\w-]+|missing)\s+keyword"
 )
 
+# Matches a USE-change line emitted by emerge --autounmask=y, e.g.:
+#   >=media-libs/libvpx-1.16.0 postproc
+#   =dev-libs/openssl-3.4.0:0/3 -bindist tls-heartbeat
+_USE_FLAG_TOKEN_RE = re.compile(r'^-?[a-zA-Z0-9_][a-zA-Z0-9_-]*$')
+_USE_CHANGE_LINE_RE = re.compile(
+    r'^([<>=~!]?=?[a-z][a-z0-9+._-]*/[a-zA-Z0-9+._-][a-zA-Z0-9+._/-]*'
+    r'(?:-\d[\w.+@-]*)?(?::[\w.+/-]+)?)\s+(-?[a-zA-Z0-9_][a-zA-Z0-9_\s+=-]*)$'
+)
+
+
+def _parse_use_changes(text: str) -> list[tuple[str, str]]:
+    """Extract (atom, flags_str) pairs from the USE-change block in autounmask output."""
+    entries: list[tuple[str, str]] = []
+    in_block = False
+    for line in text.splitlines():
+        stripped = line.strip()
+        if "USE changes are necessary" in stripped:
+            in_block = True
+            continue
+        if not in_block:
+            continue
+        if not stripped or stripped.startswith("#") or stripped.startswith("(see"):
+            continue
+        # A non-comment non-empty line outside a USE block signals a new section.
+        if stripped.startswith("The following") or stripped.startswith("!"):
+            in_block = False
+            continue
+        m = _USE_CHANGE_LINE_RE.match(stripped)
+        if not m:
+            continue
+        atom_raw, flags_raw = m.group(1).strip(), m.group(2).strip()
+        # Validate each flag token.
+        flags = [f for f in flags_raw.split() if _USE_FLAG_TOKEN_RE.match(f)]
+        if not flags:
+            continue
+        entries.append((atom_raw, " ".join(flags)))
+    return entries
+
+
+def _write_use_flags(entries: list[tuple[str, str]]) -> tuple[str, list[str], list[str]]:
+    """Write [(atom, flags_str), ...] to package.use/arbor-accepted.
+
+    Returns (path, list_of_written_lines, list_of_rejected).
+    """
+    use_path = Path("/etc/portage/package.use")
+    target = use_path / "arbor-accepted" if use_path.is_dir() else use_path
+    target.parent.mkdir(parents=True, exist_ok=True)
+    existing = target.read_text() if target.exists() else ""
+    written: list[str] = []
+    rejected: list[str] = []
+    with open(target, "a") as f:
+        for atom, flags in entries:
+            if not _valid_atom(atom):
+                rejected.append(f"{atom!r} {flags!r}")
+                continue
+            line = f"{atom} {flags}\n"
+            if line not in existing:
+                f.write(f"# Added by arbor\n{line}")
+                existing += line
+                written.append(f"{atom} {flags}")
+    return str(target), written, rejected
+
 
 async def cmd_emerge_autounmask(args):
     """Scan masked deps and write keyword entries to package.accept_keywords/arbor-accepted."""
@@ -2299,29 +2362,40 @@ async def cmd_emerge_autounmask(args):
             stderr=asyncio.subprocess.STDOUT,
             env=_EMERGE_ENV,
         )
+        unmask_lines = []
         async for raw in proc2.stdout:
-            yield {"line": _ANSI.sub("", raw.decode(errors="replace").rstrip())}
+            line = _ANSI.sub("", raw.decode(errors="replace").rstrip())
+            unmask_lines.append(line)
+            yield {"line": line}
         await proc2.wait()
+        unmask_full = "\n".join(unmask_lines)
 
-        # Step 3 — parse the plain-pretend output for "masked by" lines and write
-        # keyword entries to our own file under /etc/portage/package.accept_keywords.
-        # We never touch any other portage config file: USE/license/mask changes
-        # the user must apply manually.
-        entries = []
+        # Step 3 — write keyword entries for masked-by-keyword packages.
+        kw_entries = []
         for m in _MASKED_RE.finditer(scan_full):
             cpv_raw, kw_raw = m.group(1), m.group(2)
             kw = "**" if kw_raw == "missing" else kw_raw
-            entries.append((_normalize_atom(cpv_raw), kw))
-        entries.append((atom, "**"))  # always accept the main atom
+            kw_entries.append((_normalize_atom(cpv_raw), kw))
+        kw_entries.append((atom, "**"))  # always accept the main atom
 
-        kw_file, written, rejected = await in_thread(_write_keywords, entries)
-        if written:
-            for w in written:
+        kw_file, kw_written, kw_rejected = await in_thread(_write_keywords, kw_entries)
+        if kw_written:
+            for w in kw_written:
                 yield {"line": f"-- wrote '{w}' → {kw_file}"}
         else:
             yield {"line": f"-- no new keyword entries needed in {kw_file}"}
-        for r in rejected:
-            yield {"line": f"-- rejected invalid entry: {r}"}
+        for r in kw_rejected:
+            yield {"line": f"-- rejected invalid keyword entry: {r}"}
+
+        # Step 4 — write USE flag changes required by the autounmask output.
+        use_entries = _parse_use_changes(unmask_full)
+        if use_entries:
+            use_file, use_written, use_rejected = await in_thread(_write_use_flags, use_entries)
+            if use_written:
+                for w in use_written:
+                    yield {"line": f"-- wrote USE '{w}' → {use_file}"}
+            for r in use_rejected:
+                yield {"line": f"-- rejected invalid USE entry: {r}"}
 
         yield {"done": True, "returncode": 0}
     finally: