Skip to content

Commit 0f71b6f

Browse files
tcconnallytcconnallyclaude
authored
harden(#433): keyfile 0600-at-creation, remember input bounds, watcher symlink reject, doctor freshness (#434)
* harden(#433): keyfile 0600-at-creation, remember input bounds, watcher symlink reject, doctor freshness Defense-in-depth items from the 2026-07-03 CoS deep dive (v2.14.0). No data-loss or default-build behavior change. M1 — encryption keyfile permission race (main.rs keygen): the secret key was written, then chmod'd 0600 in a second step, leaving a brief world-readable window. Create the file with mode(0o600) via OpenOptions on Unix so the permission is set at inode creation; keep the explicit set_permissions as a defense-in-depth retighten for a pre-existing looser path. L — remember input bounds (tools.rs handle_remember): cap category (256), key (1024), and body_json (4 MiB). category/key are indexed, hashed for identity, and fed to FTS, so an unbounded key was a DoS-via-huge-key vector. Caps sit far above any legitimate use. L — file-watcher symlink reject (connectors/file_watcher.rs): scan_dir used is_dir()/is_file(), which follow symlinks — a symlink planted in a watched dir could escape the configured root despite SECURITY.md's "paths canonicalized" claim. Skip entries whose own file_type() is a symlink, before any recursion or read. N4 — doctor freshness/liveness (main.rs run_doctor): add a "last write N days ago" line (WARN past 14d) read from plaintext timestamp columns via a read-only connection (no encryption key needed), so a stale vault (harvest stopped) is visible instead of silently reported healthy. Not included here (tracked separately in #433): * M2 (bind workspace_hash into the audit-chain hash) needs a chain-format version bump + migration — it changes verify semantics for every existing chain, so it's not a drop-in defense-in-depth patch. * M3 (purge redaction workspace scoping) is already addressed by #417/#426: the journal-redaction JOIN is workspace-scoped (JRN_MATCH), with the residual default-workspace over-redaction documented and intentional (GDPR: never under-redact). * L dedup alloc bound: decode is already a bounded streaming decoder (SigDecoder, bounded by cand_count) — no unbounded pre-alloc remains. Tests: remember rejects oversized key/category + accepts normal; watcher ingests regular files and (unix) skips a symlink escaping the root. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(#433): write_all needs &[u8], not &String (cfg(unix) keygen path) The #[cfg(unix)] keygen branch is only compiled on Unix, so the Windows MSVC check that validated this PR never saw it. write_all takes &[u8]; generate_key() returns a String, so pass key.as_bytes(). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: tcconnally <hermes@perseus.observer> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent df33c7f commit 0f71b6f

3 files changed

Lines changed: 202 additions & 2 deletions

File tree

src/connectors/file_watcher.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,18 @@ impl FileWatcher {
8686
let entry = entry.map_err(|e| format!("Dir entry error: {}", e))?;
8787
let path = entry.path();
8888

89+
// #433 L: reject symlinked entries. `is_dir()`/`is_file()` follow
90+
// symlinks, so a symlink planted inside a watched directory could
91+
// point outside the configured root (SECURITY.md claims watched
92+
// paths are canonicalized). `entry.file_type()` reports the link
93+
// itself without traversing it, so we can skip it before any
94+
// recursion or read.
95+
match entry.file_type() {
96+
Ok(ft) if ft.is_symlink() => continue,
97+
Ok(_) => {}
98+
Err(_) => continue,
99+
}
100+
89101
if path.is_dir() {
90102
// Skip hidden dirs and common non-content dirs
91103
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
@@ -159,3 +171,57 @@ impl Connector for FileWatcher {
159171
&self.last_sync
160172
}
161173
}
174+
175+
#[cfg(test)]
176+
mod tests {
177+
use super::*;
178+
179+
fn cfg_for(dir: &std::path::Path) -> FileWatcherConfig {
180+
FileWatcherConfig {
181+
enabled: true,
182+
paths: vec![dir.to_string_lossy().to_string()],
183+
extensions: vec![".md".to_string()],
184+
debounce_ms: 0,
185+
}
186+
}
187+
188+
#[test]
189+
fn scan_ingests_regular_files() {
190+
let dir = std::env::temp_dir().join(format!("pv-fw-reg-{}", uuid::Uuid::new_v4()));
191+
std::fs::create_dir_all(&dir).unwrap();
192+
std::fs::write(dir.join("note.md"), "hello").unwrap();
193+
let fw = FileWatcher::new(cfg_for(&dir));
194+
let docs = fw.fetch().expect("fetch");
195+
assert_eq!(docs.len(), 1);
196+
assert!(docs[0].body_json.contains("hello"));
197+
let _ = std::fs::remove_dir_all(&dir);
198+
}
199+
200+
// #433 L: a symlink planted in a watched dir must be skipped, not followed
201+
// out of the configured root.
202+
#[cfg(unix)]
203+
#[test]
204+
fn scan_skips_symlinked_files() {
205+
use std::os::unix::fs::symlink;
206+
let root = std::env::temp_dir().join(format!("pv-fw-link-{}", uuid::Uuid::new_v4()));
207+
let outside = std::env::temp_dir().join(format!("pv-fw-out-{}", uuid::Uuid::new_v4()));
208+
std::fs::create_dir_all(&root).unwrap();
209+
std::fs::create_dir_all(&outside).unwrap();
210+
// Secret target lives OUTSIDE the watched root.
211+
let secret = outside.join("secret.md");
212+
std::fs::write(&secret, "TOP SECRET").unwrap();
213+
// Symlink inside the watched root points at it.
214+
symlink(&secret, root.join("link.md")).unwrap();
215+
// A real file that must still be ingested.
216+
std::fs::write(root.join("real.md"), "ok").unwrap();
217+
218+
let fw = FileWatcher::new(cfg_for(&root));
219+
let docs = fw.fetch().expect("fetch");
220+
assert_eq!(docs.len(), 1, "only the real file, not the symlink");
221+
assert!(docs[0].body_json.contains("ok"));
222+
assert!(!docs.iter().any(|d| d.body_json.contains("TOP SECRET")));
223+
224+
let _ = std::fs::remove_dir_all(&root);
225+
let _ = std::fs::remove_dir_all(&outside);
226+
}
227+
}

src/main.rs

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,32 @@ fn print_json<T: serde::Serialize>(value: &T) {
786786
/// #272: `perseus-vault doctor` — validate the local install + config and report
787787
/// which MCP clients Perseus Vault works with. ASCII-only output (cross-platform
788788
/// console safe).
789+
/// #433 N4: age in days since the most recent entity/journal write, or `None`
790+
/// when the DB is empty or unreadable. Uses a read-only connection and
791+
/// plaintext timestamp columns, so it needs no encryption key.
792+
fn latest_write_age_days(db_path: &str) -> Option<f64> {
793+
let conn = rusqlite::Connection::open_with_flags(
794+
db_path,
795+
rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY,
796+
)
797+
.ok()?;
798+
let max_of = |sql: &str| -> Option<i64> {
799+
conn.query_row(sql, [], |r| r.get::<_, Option<i64>>(0))
800+
.ok()
801+
.flatten()
802+
};
803+
let ent =
804+
max_of("SELECT MAX(COALESCE(recorded_at_unix_ms, created_at_unix_ms)) FROM entities");
805+
let jrn = max_of("SELECT MAX(created_at_unix_ms) FROM journal");
806+
let latest = [ent, jrn].into_iter().flatten().max()?;
807+
let now = std::time::SystemTime::now()
808+
.duration_since(std::time::UNIX_EPOCH)
809+
.ok()?
810+
.as_millis() as i64;
811+
let age_ms = (now - latest).max(0);
812+
Some(age_ms as f64 / (1000.0 * 60.0 * 60.0 * 24.0))
813+
}
814+
789815
fn run_doctor(db_path: &str) {
790816
println!("perseus-vault doctor — v{}", env!("CARGO_PKG_VERSION"));
791817
match std::env::current_exe() {
@@ -802,6 +828,22 @@ fn run_doctor(db_path: &str) {
802828
};
803829
println!(" database: {} ({})", db_path, db_status);
804830

831+
// #433 N4: freshness/liveness — surface a stale vault instead of silently
832+
// reporting "healthy" while the harvest/writer has quietly stopped. Reads
833+
// the most recent write timestamp from plaintext columns, so it needs no
834+
// encryption key.
835+
if dbp.exists() {
836+
const STALE_AFTER_DAYS: f64 = 14.0;
837+
match latest_write_age_days(db_path) {
838+
Some(days) if days > STALE_AFTER_DAYS => println!(
839+
" freshness: [WARN] last write {:.1} days ago (> {:.0} days) — is the harvest/writer running?",
840+
days, STALE_AFTER_DAYS
841+
),
842+
Some(days) => println!(" freshness: last write {:.1} days ago", days),
843+
None => println!(" freshness: (no writes recorded yet)"),
844+
}
845+
}
846+
805847
println!("\nMCP stdio config (identical for every client below):");
806848
println!(" command: perseus-vault");
807849
println!(" args: [\"serve\", \"--db\", \"{}\"]", db_path);
@@ -1214,9 +1256,34 @@ fn main() {
12141256
}
12151257

12161258
let key = crate::encryption::EncryptionManager::generate_key();
1217-
match std::fs::write(&expanded, &key) {
1259+
// #433 M1: create the key file with 0600 *at creation time* so the
1260+
// secret is never briefly world-readable in the window between the
1261+
// write and a follow-up chmod. On Unix, OpenOptions::mode applies
1262+
// the permission when the inode is created (umask can only remove
1263+
// bits, never widen past 0600).
1264+
let write_result: std::io::Result<()> = {
1265+
#[cfg(unix)]
1266+
{
1267+
use std::io::Write;
1268+
use std::os::unix::fs::OpenOptionsExt;
1269+
std::fs::OpenOptions::new()
1270+
.write(true)
1271+
.create(true)
1272+
.truncate(true)
1273+
.mode(0o600)
1274+
.open(&expanded)
1275+
.and_then(|mut f| f.write_all(key.as_bytes()))
1276+
}
1277+
#[cfg(not(unix))]
1278+
{
1279+
std::fs::write(&expanded, &key)
1280+
}
1281+
};
1282+
match write_result {
12181283
Ok(_) => {
1219-
// Set restrictive permissions (owner read-only)
1284+
// Defense-in-depth: if the path already existed with looser
1285+
// perms, create+truncate does not retighten it, so re-assert
1286+
// 0600 explicitly.
12201287
#[cfg(unix)]
12211288
{
12221289
use std::os::unix::fs::PermissionsExt;

src/tools.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,34 @@ pub fn handle_remember(db: &Database, args: Value) -> Result<String, String> {
477477
return Err(format!("body_json is not valid JSON: {}", e));
478478
}
479479

480+
// #433 L: bound input sizes. category/key are indexed, hashed for identity
481+
// (category, key, workspace_hash), and fed to FTS — an unbounded key is a
482+
// DoS-via-huge-key vector. These caps sit far above any legitimate use.
483+
const MAX_CATEGORY_LEN: usize = 256;
484+
const MAX_KEY_LEN: usize = 1024;
485+
const MAX_BODY_LEN: usize = 4 * 1024 * 1024; // 4 MiB
486+
if a.category.len() > MAX_CATEGORY_LEN {
487+
return Err(format!(
488+
"category too long: {} bytes (max {})",
489+
a.category.len(),
490+
MAX_CATEGORY_LEN
491+
));
492+
}
493+
if a.key.len() > MAX_KEY_LEN {
494+
return Err(format!(
495+
"key too long: {} bytes (max {})",
496+
a.key.len(),
497+
MAX_KEY_LEN
498+
));
499+
}
500+
if a.body_json.len() > MAX_BODY_LEN {
501+
return Err(format!(
502+
"body_json too long: {} bytes (max {})",
503+
a.body_json.len(),
504+
MAX_BODY_LEN
505+
));
506+
}
507+
480508
// Merge recall_when into body_json if provided
481509
let body = if a.recall_when.is_empty() {
482510
a.body_json
@@ -4607,6 +4635,45 @@ mod tests {
46074635
);
46084636
let _ = std::fs::remove_file(&path);
46094637
}
4638+
4639+
// ─── #433 L: input length bounds on remember ─────────────────
4640+
4641+
#[test]
4642+
fn remember_rejects_oversized_key() {
4643+
let (db, path) = temp_db();
4644+
let huge_key = "k".repeat(2000); // > MAX_KEY_LEN (1024)
4645+
let err = handle_remember(
4646+
&db,
4647+
json!({"category": "facts", "key": huge_key, "body_json": "{}"}),
4648+
)
4649+
.expect_err("oversized key must be rejected");
4650+
assert!(err.contains("key too long"), "unexpected error: {err}");
4651+
let _ = std::fs::remove_file(&path);
4652+
}
4653+
4654+
#[test]
4655+
fn remember_rejects_oversized_category() {
4656+
let (db, path) = temp_db();
4657+
let huge_cat = "c".repeat(500); // > MAX_CATEGORY_LEN (256)
4658+
let err = handle_remember(
4659+
&db,
4660+
json!({"category": huge_cat, "key": "k", "body_json": "{}"}),
4661+
)
4662+
.expect_err("oversized category must be rejected");
4663+
assert!(err.contains("category too long"), "unexpected error: {err}");
4664+
let _ = std::fs::remove_file(&path);
4665+
}
4666+
4667+
#[test]
4668+
fn remember_accepts_normal_sizes() {
4669+
let (db, path) = temp_db();
4670+
handle_remember(
4671+
&db,
4672+
json!({"category": "facts", "key": "normal-key", "body_json": "{\"a\":1}"}),
4673+
)
4674+
.expect("normally-sized remember must succeed");
4675+
let _ = std::fs::remove_file(&path);
4676+
}
46104677
}
46114678

46124679

0 commit comments

Comments
 (0)