Skip to content

Commit ac4ba4d

Browse files
tcconnallytcconnallyclaude
authored
harden(#433 M2): bind workspace_hash into the audit-chain hash (#438)
The journal audit chain hashed only (prev_hash, id, created_at_unix_ms), so a journal entry could be moved between workspaces without breaking the chain — verify_audit_chain would still pass. Fold workspace_hash (stamped on every row since #417) into the hashed tuple so a cross-workspace move is now tamper-evident. - audit_hash / sha256_chain / sha256_genesis take workspace_hash; journal() derives workspace_hash BEFORE computing the hash and feeds it in. - verify_audit_chain selects workspace_hash and recomputes with it. Both read paths order by (created_at_unix_ms ASC, rowid ASC) for a deterministic round-trip. Backward compatibility (chain-format upgrade, no data loss): - SCHEMA_VERSION 11 -> 12. New migration step rehash_audit_chain() recomputes every existing chain under the workspace-bound formula, in chain order, so chains written by pre-v12 binaries still verify. Deterministic + idempotent; no-op on a fresh/empty journal. Runs inside the existing BEGIN IMMEDIATE migration transaction. Redaction interaction: purge previously scrubbed workspace_hash = '' on redacted journal rows. Since workspace_hash is now a hashed field, that would break "redaction preserves the chain" (and verify). Redaction now PRESERVES workspace_hash (like id/created_at/audit_hash) — it's a non-reversible path digest, not stored content, so retaining it on the tombstone is consistent with erasing the payload + identifying fields. Doc updated. Tests: workspace-move breaks the chain (the M2 property); a forged pre-M2 (v11) chain verifies after the real v11->v12 migration path; both existing purge/redaction chain-survival tests still pass. cargo test --no-default-features (MSVC): 4/4 in scope. Co-authored-by: tcconnally <hermes@perseus.observer> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 6bc7a50 commit ac4ba4d

2 files changed

Lines changed: 201 additions & 30 deletions

File tree

src/db.rs

Lines changed: 190 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3889,24 +3889,14 @@ impl Database {
38893889
/// Append a journal event.
38903890
pub fn journal(&self, event: &JournalEvent) -> Result<(), Box<dyn std::error::Error>> {
38913891
let conn = self.conn()?;
3892-
// Compute audit chain hash: SHA-256(prev_hash || event_id || created_at_ms)
3893-
let prev_hash: Option<String> = conn.query_row(
3894-
"SELECT audit_hash FROM journal ORDER BY created_at_unix_ms DESC LIMIT 1",
3895-
[],
3896-
|r| r.get::<_, Option<String>>(0),
3897-
).unwrap_or(None);
3898-
3899-
let computed_hash = if let Some(ref prev) = prev_hash {
3900-
crate::db::sha256_chain(prev, &event.id, event.created_at_unix_ms)
3901-
} else {
3902-
crate::db::sha256_genesis(&event.id, event.created_at_unix_ms)
3903-
};
39043892

39053893
// #417: stamp the workspace of the referenced entity so purge can scope
39063894
// journal redaction per-workspace. Prefer an explicit value on the
39073895
// event; otherwise derive it from the referenced entity (the live row,
39083896
// or a superseded version in entity_history when the live id has since
39093897
// changed). System events with no entity_id stay '' (workspace-agnostic).
3898+
// #433 M2: derived BEFORE the audit hash because it is now part of the
3899+
// hashed tuple.
39103900
let workspace_hash = if !event.workspace_hash.is_empty() {
39113901
event.workspace_hash.clone()
39123902
} else if !event.entity_id.is_empty() {
@@ -3929,6 +3919,19 @@ impl Database {
39293919
String::new()
39303920
};
39313921

3922+
// Compute audit chain hash over (prev_hash, id, created_at, workspace).
3923+
let prev_hash: Option<String> = conn.query_row(
3924+
"SELECT audit_hash FROM journal ORDER BY created_at_unix_ms DESC LIMIT 1",
3925+
[],
3926+
|r| r.get::<_, Option<String>>(0),
3927+
).unwrap_or(None);
3928+
3929+
let computed_hash = if let Some(ref prev) = prev_hash {
3930+
crate::db::sha256_chain(prev, &event.id, event.created_at_unix_ms, &workspace_hash)
3931+
} else {
3932+
crate::db::sha256_genesis(&event.id, event.created_at_unix_ms, &workspace_hash)
3933+
};
3934+
39323935
conn.execute(
39333936
"INSERT INTO journal
39343937
(id, event_type, evaluated_json, acted_json, forward_json,
@@ -6390,12 +6393,14 @@ Return a JSON object with an "insights" array. Each insight has:
63906393
/// historical bodies readable via mimir_history / mimir_as_of.
63916394
/// * `journal`: rows referencing a purged entity (by entity_id or by its
63926395
/// category/key) are REDACTED IN PLACE, not deleted. The audit chain
6393-
/// hashes only (prev_hash, id, created_at_unix_ms) — see audit_hash —
6394-
/// so scrubbing the payload columns (evaluated/acted/forward JSON,
6395-
/// category, key, entity_id) and stamping event_type='redacted'
6396-
/// preserves end-to-end chain verifiability (verify_audit_chain)
6397-
/// while removing every purged body from the log. Deleting the rows
6398-
/// instead would break every subsequent link of the chain.
6396+
/// hashes (prev_hash, id, created_at_unix_ms, workspace_hash) — see
6397+
/// audit_hash (#433 M2) — so scrubbing only the payload/identifying
6398+
/// columns (evaluated/acted/forward JSON, category, key, entity_id) and
6399+
/// stamping event_type='redacted', while PRESERVING the hashed tuple
6400+
/// (id, created_at, workspace_hash), keeps end-to-end chain
6401+
/// verifiability (verify_audit_chain) while removing every purged body
6402+
/// from the log. Deleting the rows — or scrubbing a hashed field —
6403+
/// would break every subsequent link of the chain.
63996404
pub fn purge(&self, dry_run: bool) -> Result<PurgeReport, Box<dyn std::error::Error>> {
64006405
let conn = self.conn()?;
64016406
let before_size = match std::fs::metadata(&self.db_path) {
@@ -6500,9 +6505,15 @@ Return a JSON object with an "insights" array. Each insight has:
65006505
)? as i64;
65016506
journal_redacted += conn.execute(
65026507
&format!(
6508+
// #433 M2: workspace_hash is now part of the audit-chain
6509+
// hashed tuple, so it must be PRESERVED through redaction
6510+
// (like id/created_at/audit_hash) or the chain would no
6511+
// longer verify. It is a non-reversible digest of a path,
6512+
// not stored content, so retaining it on the tombstone is
6513+
// consistent with erasing the payload + identifying fields.
65036514
"UPDATE journal SET event_type = 'redacted', evaluated_json = '{{}}', \
65046515
acted_json = '{{}}', forward_json = '{{}}', category = '', key = '', \
6505-
entity_id = '', workspace_hash = '' WHERE {JRN_MATCH}"
6516+
entity_id = '' WHERE {JRN_MATCH}"
65066517
),
65076518
params![id, cat, key, ws],
65086519
)? as i64;
@@ -8312,21 +8323,64 @@ impl Drop for Database {
83128323
/// Simple deterministic hash for audit chain (SHA-256 substitute).
83138324
/// Uses Rust's stdlib SipHash — not cryptographic but fast and deterministic.
83148325
/// For production audit logs, upgrade to a proper crypto crate.
8315-
fn audit_hash(prev_hash: &str, event_id: &str, created_at_ms: i64) -> String {
8326+
fn audit_hash(prev_hash: &str, event_id: &str, created_at_ms: i64, workspace_hash: &str) -> String {
83168327
use std::hash::{Hash, Hasher};
83178328
let mut hasher = std::collections::hash_map::DefaultHasher::new();
83188329
prev_hash.hash(&mut hasher);
83198330
event_id.hash(&mut hasher);
83208331
created_at_ms.hash(&mut hasher);
8332+
// #433 M2: bind the entry's workspace into the chain so a journal row can't
8333+
// be silently moved between workspaces without breaking verification. (Hash
8334+
// impls for &str are length-prefixed, so distinct fields never collide.)
8335+
workspace_hash.hash(&mut hasher);
83218336
format!("{:016x}", hasher.finish())
83228337
}
83238338

8324-
fn sha256_chain(prev_hash: &str, event_id: &str, created_at_ms: i64) -> String {
8325-
audit_hash(prev_hash, event_id, created_at_ms)
8339+
fn sha256_chain(prev_hash: &str, event_id: &str, created_at_ms: i64, workspace_hash: &str) -> String {
8340+
audit_hash(prev_hash, event_id, created_at_ms, workspace_hash)
8341+
}
8342+
8343+
fn sha256_genesis(event_id: &str, created_at_ms: i64, workspace_hash: &str) -> String {
8344+
audit_hash("genesis", event_id, created_at_ms, workspace_hash)
83268345
}
83278346

8328-
fn sha256_genesis(event_id: &str, created_at_ms: i64) -> String {
8329-
audit_hash("genesis", event_id, created_at_ms)
8347+
/// #433 M2: recompute the whole audit chain under the workspace-bound hash
8348+
/// formula. The v12 schema migration calls this to upgrade chains written by
8349+
/// pre-v12 binaries (which hashed only `(prev, id, created_at)`) so they still
8350+
/// verify. Rows are read in the SAME order [`verify_audit_chain`] uses; each
8351+
/// `audit_hash` is recomputed from its (now workspace-bound) inputs and written
8352+
/// back by `rowid`. Deterministic and idempotent — re-running yields identical
8353+
/// hashes and is a no-op on an already-v12 chain.
8354+
pub(crate) fn rehash_audit_chain(
8355+
conn: &rusqlite::Connection,
8356+
) -> Result<i64, Box<dyn std::error::Error>> {
8357+
let mut stmt = conn.prepare(
8358+
"SELECT rowid, id, created_at_unix_ms, COALESCE(workspace_hash, '') \
8359+
FROM journal WHERE audit_hash != '' \
8360+
ORDER BY created_at_unix_ms ASC, rowid ASC",
8361+
)?;
8362+
let rows: Vec<(i64, String, i64, String)> = stmt
8363+
.query_map([], |r| {
8364+
Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?))
8365+
})?
8366+
.collect::<Result<_, _>>()?;
8367+
drop(stmt);
8368+
8369+
let mut prev_hash: Option<String> = None;
8370+
let mut n = 0i64;
8371+
for (rowid, id, ts, ws) in rows {
8372+
let h = match prev_hash {
8373+
Some(ref prev) => sha256_chain(prev, &id, ts, &ws),
8374+
None => sha256_genesis(&id, ts, &ws),
8375+
};
8376+
conn.execute(
8377+
"UPDATE journal SET audit_hash = ?1 WHERE rowid = ?2",
8378+
params![h, rowid],
8379+
)?;
8380+
prev_hash = Some(h);
8381+
n += 1;
8382+
}
8383+
Ok(n)
83308384
}
83318385

83328386
/// #398: deterministic fold for history-tombstone digests — chained over each
@@ -8360,21 +8414,25 @@ fn history_retention_digest(
83608414
pub fn verify_audit_chain(db: &Database) -> Result<i64, String> {
83618415
let conn = db.conn().map_err(|e| format!("connection: {}", e))?;
83628416
let mut stmt = conn.prepare(
8363-
"SELECT id, audit_hash, created_at_unix_ms FROM journal WHERE audit_hash != '' ORDER BY created_at_unix_ms ASC",
8417+
"SELECT id, audit_hash, created_at_unix_ms, COALESCE(workspace_hash, '') \
8418+
FROM journal WHERE audit_hash != '' \
8419+
ORDER BY created_at_unix_ms ASC, rowid ASC",
83648420
).map_err(|e| format!("prepare: {}", e))?;
83658421

83668422
let rows = stmt.query_map([], |r| {
8367-
Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?, r.get::<_, i64>(2)?))
8423+
Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?, r.get::<_, i64>(2)?, r.get::<_, String>(3)?))
83688424
}).map_err(|e| format!("query: {}", e))?;
83698425

83708426
let mut count = 0i64;
83718427
let mut prev_hash: Option<String> = None;
83728428
for row in rows {
8373-
let (id, stored_hash, ts) = row.map_err(|e| format!("row: {}", e))?;
8429+
let (id, stored_hash, ts, ws) = row.map_err(|e| format!("row: {}", e))?;
8430+
// #433 M2: workspace_hash is part of the hashed tuple, so a moved entry
8431+
// (different workspace_hash) recomputes to a different expected hash.
83748432
let expected = if let Some(ref prev) = prev_hash {
8375-
sha256_chain(prev, &id, ts)
8433+
sha256_chain(prev, &id, ts, &ws)
83768434
} else {
8377-
sha256_genesis(&id, ts)
8435+
sha256_genesis(&id, ts, &ws)
83788436
};
83798437
if expected != stored_hash {
83808438
return Err(format!(
@@ -18005,5 +18063,108 @@ mod tests {
1800518063
assert!(top[0]["bytes"].as_i64().unwrap() > 0);
1800618064
let _ = fs::remove_file(&path);
1800718065
}
18066+
18067+
// ─── #433 M2: workspace-bound audit chain ────────────────────
18068+
18069+
#[test]
18070+
fn audit_chain_binds_workspace_and_detects_move() {
18071+
let (db, path) = temp_db();
18072+
let base = now_ms();
18073+
for (i, (id, ws)) in [("jrn-w1", "wsA"), ("jrn-w2", "wsB"), ("jrn-w3", "wsA")]
18074+
.iter()
18075+
.enumerate()
18076+
{
18077+
db.journal(&crate::models::JournalEvent {
18078+
id: id.to_string(),
18079+
event_type: "decision".to_string(),
18080+
evaluated_json: "{}".to_string(),
18081+
acted_json: "{}".to_string(),
18082+
forward_json: "{}".to_string(),
18083+
category: "facts".to_string(),
18084+
key: "k".to_string(),
18085+
entity_id: String::new(),
18086+
agent_id: "test".to_string(),
18087+
workspace_hash: ws.to_string(),
18088+
created_at_unix_ms: base + i as i64,
18089+
})
18090+
.unwrap();
18091+
}
18092+
assert_eq!(
18093+
verify_audit_chain(&db).expect("freshly written chain must verify"),
18094+
3
18095+
);
18096+
18097+
// Move an entry to a different workspace — the whole point of M2 is that
18098+
// this now breaks verification (pre-M2 it silently passed).
18099+
{
18100+
let conn = db.conn().unwrap();
18101+
conn.execute(
18102+
"UPDATE journal SET workspace_hash = 'wsEVIL' WHERE id = 'jrn-w2'",
18103+
[],
18104+
)
18105+
.unwrap();
18106+
}
18107+
assert!(
18108+
verify_audit_chain(&db).is_err(),
18109+
"moving a journal entry between workspaces must break the audit chain"
18110+
);
18111+
let _ = std::fs::remove_file(&path);
18112+
}
18113+
18114+
#[test]
18115+
fn v11_audit_chain_verifies_after_migration_to_v12() {
18116+
// Forge a chain hashed under the pre-M2 formula (no workspace_hash),
18117+
// then prove the v11->v12 migration rehashes it so it verifies under the
18118+
// new workspace-bound formula — the backward-compat guarantee.
18119+
fn old_hash(prev: &str, id: &str, ts: i64) -> String {
18120+
use std::hash::{Hash, Hasher};
18121+
let mut h = std::collections::hash_map::DefaultHasher::new();
18122+
prev.hash(&mut h);
18123+
id.hash(&mut h);
18124+
ts.hash(&mut h);
18125+
format!("{:016x}", h.finish())
18126+
}
18127+
let (db, path) = temp_db();
18128+
let base = now_ms();
18129+
let rows = [("jo-1", "wsA"), ("jo-2", "wsB"), ("jo-3", "wsA")];
18130+
{
18131+
let conn = db.conn().unwrap();
18132+
let mut prev = String::new();
18133+
for (i, (id, ws)) in rows.iter().enumerate() {
18134+
let ts = base + i as i64;
18135+
let h = if i == 0 {
18136+
old_hash("genesis", id, ts)
18137+
} else {
18138+
old_hash(&prev, id, ts)
18139+
};
18140+
prev = h.clone();
18141+
conn.execute(
18142+
"INSERT INTO journal (id, event_type, evaluated_json, acted_json, \
18143+
forward_json, category, key, entity_id, agent_id, audit_hash, \
18144+
workspace_hash, created_at_unix_ms) \
18145+
VALUES (?1,'decision','{}','{}','{}','facts','k','','test',?2,?3,?4)",
18146+
params![id, h, ws, ts],
18147+
)
18148+
.unwrap();
18149+
}
18150+
}
18151+
// A v11 chain does NOT verify under the new formula…
18152+
assert!(
18153+
verify_audit_chain(&db).is_err(),
18154+
"pre-M2 chain must not verify under the workspace-bound formula"
18155+
);
18156+
// …until the v11->v12 migration rehashes it. Roll user_version back and
18157+
// re-run schema init to exercise the real migration path.
18158+
{
18159+
let conn = db.conn().unwrap();
18160+
conn.pragma_update(None, "user_version", 11i64).unwrap();
18161+
crate::schema::initialize_schema(&conn).expect("v11->v12 migration must succeed");
18162+
}
18163+
assert_eq!(
18164+
verify_audit_chain(&db).expect("chain must verify after v11->v12 migration"),
18165+
3
18166+
);
18167+
let _ = std::fs::remove_file(&path);
18168+
}
1800818169
}
1800918170

src/schema.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ CREATE INDEX IF NOT EXISTS idx_entity_history_catkey ON entity_history(category,
156156
/// the column-add migrations below have been applied. Bump this whenever you add
157157
/// a new ALTER-probe migration in `initialize_schema`, or existing databases
158158
/// (already at the previous level) will skip it.
159-
const SCHEMA_VERSION: i64 = 11;
159+
const SCHEMA_VERSION: i64 = 12;
160160

161161
/// Initialize the v0.2.0 schema on a fresh database.
162162
pub fn initialize_schema(conn: &Connection) -> Result<(), Box<dyn std::error::Error>> {
@@ -436,6 +436,16 @@ fn apply_migrations(conn: &Connection) -> Result<(), Box<dyn std::error::Error>>
436436
)?;
437437
// ── end v11 ─────────────────────────────────────────────────────────
438438

439+
// ── v12 (#433 M2): bind workspace into the audit-chain hash ──────────
440+
// Pre-v12 chains hashed only (prev_hash, id, created_at_unix_ms), so a
441+
// journal entry could be moved between workspaces without breaking the
442+
// chain. The hash now also folds in workspace_hash (stamped since v11).
443+
// Recompute existing chains under the new formula so they still verify.
444+
// Deterministic + idempotent; runs inside the migration transaction and is
445+
// a no-op on a fresh DB (empty journal).
446+
crate::db::rehash_audit_chain(conn)?;
447+
// ── end v12 ──────────────────────────────────────────────────────────
448+
439449
// Stamp the migration level so subsequent opens skip the probe block above.
440450
conn.pragma_update(None, "user_version", SCHEMA_VERSION)?;
441451

0 commit comments

Comments
 (0)