Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ Overlay add is disabled by default. To enable it, set `ARBOR_ENABLE_OVERLAY_ADD=
- **Installed packages** — filter installed packages, open package details, inspect metadata, USE state, and runtime dependencies
- **Search packages** — search the Portage tree and jump to the selected package
- **USE flags** — inspect global USE state, package-specific overrides, installed build state, and mismatch indicators
- **Install / Uninstall** — pretend first, stream live output, resume running jobs, and require approval before the real root action starts
- **Autounmask flow** — for masked install targets, Arbor can write accepted keywords to `/etc/portage/package.accept_keywords`
- **Install / Uninstall** — pretend first, stream live output, resume running jobs, and require approval before the real root action starts. After a successful pretend, a **build-time ETA badge** is shown using local `emerge.log` history. The badge colour indicates confidence: green means all packages have been built on this machine before (reliable), yellow means some packages fall back to category averages (partial), grey means no local history exists and the figure is a rough estimate only. A legend is always visible alongside the badge.
- **Autounmask flow** — for masked or USE-constrained install targets, Arbor detects both keyword masks and required USE flag changes from the pretend output, then writes the necessary entries to `/etc/portage/package.accept_keywords/arbor-accepted` and `/etc/portage/package.use/arbor-accepted` respectively, before re-running the pretend automatically
- **etc-update review** — after successful installs, pending `._cfg*` files can be reviewed and resolved in the UI
- **Maintenance** — sync, check `@world`, update `@world`, run preserved-rebuild, and depclean with approval on privileged steps
- **Overlays** — list configured overlays, sync them, remove them, and optionally add new ones with explicit danger acknowledgement plus approval
Expand Down
140 changes: 137 additions & 3 deletions backend/arbor/emerge_log.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
emerge_log.py — Parse /var/log/emerge.log to compute per-category compile times.
emerge_log.py — Parse /var/log/emerge.log to compute per-category compile times
and per-CP ETA estimates.

Runs entirely in the arbor web process (no root needed — emerge.log is 644).
Results are cached in memory and invalidated automatically when the file changes.
Expand All @@ -17,9 +18,26 @@
_RE_START = re.compile(r"^(\d+):\s+>>> emerge \(\d+ of \d+\) (\S+) to /")
_RE_END = re.compile(r"^(\d+):\s+::: completed emerge \(\d+ of \d+\) (\S+) to /")

# Module-level cache: (mtime_at_last_read, result).
# Written only by _get_cached(); safe for single-process async use.
# Module-level caches: (mtime_at_last_read, result).
# Written only by their respective _get_*_cached(); safe for single-process async use.
_cache: tuple[float, dict[str, int]] | None = None
_cp_cache: tuple[float, dict[str, list[int]]] | None = None

# Keep at most this many build times per CP (most recent wins).
_CP_MAX_SAMPLES = 5


def _atom_to_cp(atom: str) -> str:
"""Strip version from cat/pkg-ver → cat/pkg using portage, with regex fallback."""
try:
from portage.versions import cpv_getkey
cp = cpv_getkey(atom)
if cp:
return cp
except Exception:
pass
m = re.match(r'^([a-zA-Z0-9+_][a-zA-Z0-9+_./-]*/[a-zA-Z0-9+_][a-zA-Z0-9+_.-]*)', atom)
return m.group(1) if m else atom


def _parse_emerge_log(path: Path = EMERGE_LOG) -> dict[str, int]:
Expand Down Expand Up @@ -64,6 +82,41 @@ def _parse_emerge_log(path: Path = EMERGE_LOG) -> dict[str, int]:
return dict(sorted(totals.items(), key=lambda kv: kv[1], reverse=True))


def _parse_emerge_log_per_cp(path: Path = EMERGE_LOG) -> dict[str, list[int]]:
"""Read emerge.log and return {cp: [last N build times in seconds]}."""
in_progress: dict[str, int] = {}
cp_times: dict[str, list[int]] = {}

try:
with path.open("r", errors="replace", buffering=65536) as fh:
for line in fh:
m = _RE_START.match(line)
if m:
ts, atom = int(m.group(1)), m.group(2)
in_progress[atom] = ts
continue

m = _RE_END.match(line)
if m:
ts, atom = int(m.group(1)), m.group(2)
start = in_progress.pop(atom, None)
if start is None:
continue
delta = ts - start
if delta <= 0:
continue
cp = _atom_to_cp(atom)
times = cp_times.setdefault(cp, [])
times.append(delta)
if len(times) > _CP_MAX_SAMPLES:
times.pop(0)

except (FileNotFoundError, PermissionError):
pass

return cp_times


def _get_cached() -> dict[str, int]:
"""
Return cached result if emerge.log hasn't changed since last read,
Expand All @@ -84,7 +137,88 @@ def _get_cached() -> dict[str, int]:
return result


def _get_cp_cached() -> dict[str, list[int]]:
global _cp_cache

try:
mtime = EMERGE_LOG.stat().st_mtime
except (FileNotFoundError, PermissionError):
return {}

if _cp_cache is not None and _cp_cache[0] == mtime:
return _cp_cache[1]

result = _parse_emerge_log_per_cp()
_cp_cache = (mtime, result)
return result


def estimate_eta(atoms: list[str]) -> dict:
"""
Given a list of CPV atoms (e.g. from a pretend output), return an ETA estimate.

Confidence levels (per item):
"exact" — this CP has been built before on this machine
"category" — CP unknown, using category average
"global" — category also unknown, using global average
"unknown" — no history at all (fresh system)
"""
cp_times = _get_cp_cached()

# Category averages as first fallback.
cat_sum: dict[str, float] = {}
cat_count: dict[str, int] = {}
for cp, times in cp_times.items():
cat = cp.split("/")[0]
avg = sum(times) / len(times)
cat_sum[cat] = cat_sum.get(cat, 0.0) + avg
cat_count[cat] = cat_count.get(cat, 0) + 1
cat_avgs: dict[str, float] = {
cat: cat_sum[cat] / cat_count[cat] for cat in cat_sum
}

# Global average as second fallback.
all_times = [t for times in cp_times.values() for t in times]
global_avg = sum(all_times) / len(all_times) if all_times else 0.0

items = []
total = 0
rough = False

for atom in atoms:
cp = _atom_to_cp(atom)
cat = cp.split("/")[0]

if cp in cp_times:
times = cp_times[cp]
secs = round(sum(times) / len(times))
confidence = "exact"
elif cat in cat_avgs:
secs = round(cat_avgs[cat])
confidence = "category"
rough = True
elif global_avg:
secs = round(global_avg)
confidence = "global"
rough = True
else:
secs = 0
confidence = "unknown"
rough = True

total += secs
items.append({"cp": cp, "seconds": secs, "confidence": confidence})

return {"total_seconds": total, "rough": rough, "items": items}


async def compile_time_by_category() -> dict[str, int]:
"""Async entry point — offloads the blocking file read to a thread pool."""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, _get_cached)


async def compile_time_estimate(atoms: list[str]) -> dict:
"""Async entry point for ETA estimation."""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, estimate_eta, atoms)
18 changes: 17 additions & 1 deletion backend/arbor/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
verify_csrf_tokens,
)
from .daemon_client import query, query_all, query_one
from .emerge_log import compile_time_by_category
from .emerge_log import compile_time_by_category, compile_time_estimate
from .local_auth import dummy_password_hash, find_user_by_username, has_local_users, mark_login_success, record_login_failure, verify_password
from .login_throttle import login_retry_after, register_login_failure, register_login_success
from .session import clear_session_cookie, create_session, record_step_up, revoke_all_sessions, revoke_session, set_session_cookie, session_cookie_name
Expand Down Expand Up @@ -983,6 +983,22 @@ async def analytics_compile_time(auth: Auth):
return await compile_time_by_category()


@app.post("/api/analytics/eta-estimate")
async def analytics_eta_estimate(auth: Auth, request: Request):
"""
Given a list of CPV atoms from a pretend output, return a build-time estimate.
Read-only: no privilege required beyond authentication.
"""
body = await _json_object_body(request)
if isinstance(body, JSONResponse):
return body
atoms = body.get("atoms", [])
if not isinstance(atoms, list):
return JSONResponse(status_code=400, content={"error": "atoms must be a list"})
atoms = [str(a) for a in atoms if isinstance(a, str)][:100]
return await compile_time_estimate(atoms)


@app.post("/api/emerge/etc-update/resolve")
async def etc_update_resolve(auth: Auth, request: Request):
require_min_role("owner")
Expand Down
102 changes: 88 additions & 14 deletions backend/daemon/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,14 +2239,15 @@ async def cmd_emerge_pretend(args):
yield {"line": line}
await proc.wait()
full = "\n".join(lines)
# Only flag needs_unmask when emerge actually failed due to masking
# Only flag needs_unmask when emerge actually failed due to masking or USE changes
needs_unmask = proc.returncode != 0 and any(s in full for s in [
"autounmask-write",
"package.accept_keywords",
"package.license",
"package.unmask",
"missing keyword",
"masked by: ~",
"USE changes are necessary",
])
yield {"done": True, "returncode": proc.returncode, "needs_unmask": needs_unmask}
finally:
Expand All @@ -2257,6 +2258,68 @@ async def cmd_emerge_pretend(args):
r"-\s+([\w.+@/-]+(?:-[\d][\w.+@-]*)?)::\S+\s+\(masked by:\s+(~[\w-]+|missing)\s+keyword"
)

# Matches a USE-change line emitted by emerge --autounmask=y, e.g.:
# >=media-libs/libvpx-1.16.0 postproc
# =dev-libs/openssl-3.4.0:0/3 -bindist tls-heartbeat
_USE_FLAG_TOKEN_RE = re.compile(r'^-?[a-zA-Z0-9_][a-zA-Z0-9_-]*$')
_USE_CHANGE_LINE_RE = re.compile(
r'^([<>=~!]?=?[a-z][a-z0-9+._-]*/[a-zA-Z0-9+._-][a-zA-Z0-9+._/-]*'
r'(?:-\d[\w.+@-]*)?(?::[\w.+/-]+)?)\s+(-?[a-zA-Z0-9_][a-zA-Z0-9_\s+=-]*)$'
)


def _parse_use_changes(text: str) -> list[tuple[str, str]]:
"""Extract (atom, flags_str) pairs from the USE-change block in autounmask output."""
entries: list[tuple[str, str]] = []
in_block = False
for line in text.splitlines():
stripped = line.strip()
if "USE changes are necessary" in stripped:
in_block = True
continue
if not in_block:
continue
if not stripped or stripped.startswith("#") or stripped.startswith("(see"):
continue
# A non-comment non-empty line outside a USE block signals a new section.
if stripped.startswith("The following") or stripped.startswith("!"):
in_block = False
continue
m = _USE_CHANGE_LINE_RE.match(stripped)
if not m:
continue
atom_raw, flags_raw = m.group(1).strip(), m.group(2).strip()
# Validate each flag token.
flags = [f for f in flags_raw.split() if _USE_FLAG_TOKEN_RE.match(f)]
if not flags:
continue
entries.append((atom_raw, " ".join(flags)))
return entries


def _write_use_flags(entries: list[tuple[str, str]]) -> tuple[str, list[str], list[str]]:
"""Write [(atom, flags_str), ...] to package.use/arbor-accepted.

Returns (path, list_of_written_lines, list_of_rejected).
"""
use_path = Path("/etc/portage/package.use")
target = use_path / "arbor-accepted" if use_path.is_dir() else use_path
target.parent.mkdir(parents=True, exist_ok=True)
existing = target.read_text() if target.exists() else ""
written: list[str] = []
rejected: list[str] = []
with open(target, "a") as f:
for atom, flags in entries:
if not _valid_atom(atom):
rejected.append(f"{atom!r} {flags!r}")
continue
line = f"{atom} {flags}\n"
if line not in existing:
f.write(f"# Added by arbor\n{line}")
existing += line
written.append(f"{atom} {flags}")
return str(target), written, rejected


async def cmd_emerge_autounmask(args):
"""Scan masked deps and write keyword entries to package.accept_keywords/arbor-accepted."""
Expand Down Expand Up @@ -2299,29 +2362,40 @@ async def cmd_emerge_autounmask(args):
stderr=asyncio.subprocess.STDOUT,
env=_EMERGE_ENV,
)
unmask_lines = []
async for raw in proc2.stdout:
yield {"line": _ANSI.sub("", raw.decode(errors="replace").rstrip())}
line = _ANSI.sub("", raw.decode(errors="replace").rstrip())
unmask_lines.append(line)
yield {"line": line}
await proc2.wait()
unmask_full = "\n".join(unmask_lines)

# Step 3 — parse the plain-pretend output for "masked by" lines and write
# keyword entries to our own file under /etc/portage/package.accept_keywords.
# We never touch any other portage config file: USE/license/mask changes
# the user must apply manually.
entries = []
# Step 3 — write keyword entries for masked-by-keyword packages.
kw_entries = []
for m in _MASKED_RE.finditer(scan_full):
cpv_raw, kw_raw = m.group(1), m.group(2)
kw = "**" if kw_raw == "missing" else kw_raw
entries.append((_normalize_atom(cpv_raw), kw))
entries.append((atom, "**")) # always accept the main atom
kw_entries.append((_normalize_atom(cpv_raw), kw))
kw_entries.append((atom, "**")) # always accept the main atom

kw_file, written, rejected = await in_thread(_write_keywords, entries)
if written:
for w in written:
kw_file, kw_written, kw_rejected = await in_thread(_write_keywords, kw_entries)
if kw_written:
for w in kw_written:
yield {"line": f"-- wrote '{w}' → {kw_file}"}
else:
yield {"line": f"-- no new keyword entries needed in {kw_file}"}
for r in rejected:
yield {"line": f"-- rejected invalid entry: {r}"}
for r in kw_rejected:
yield {"line": f"-- rejected invalid keyword entry: {r}"}

# Step 4 — write USE flag changes required by the autounmask output.
use_entries = _parse_use_changes(unmask_full)
if use_entries:
use_file, use_written, use_rejected = await in_thread(_write_use_flags, use_entries)
if use_written:
for w in use_written:
yield {"line": f"-- wrote USE '{w}' → {use_file}"}
for r in use_rejected:
yield {"line": f"-- rejected invalid USE entry: {r}"}

yield {"done": True, "returncode": 0}
finally:
Expand Down
Loading
Loading