Skip to content

Commit f74a5e6

Browse files
tcconnallyclaude
andcommitted
fix: workspace-scoped entity identity — share/federate copy instead of clobbering (#339)
remember()'s identity was (category, key): a cross-workspace write with a colliding key took the UPDATE path and overwrote the other workspace's row in place. mimir_share's "clone into target workspace" was therefore a destructive MOVE of the source entity (source workspace lost it, the fresh mem-* id was discarded, retrieval stats clobbered), and mimir_federate's vault_import had the same failure mode for every remapped entity. Changes: - Identity is now (category, key, workspace_hash): remember()'s existence lookup includes workspace_hash, and the schema's unique index becomes idx_entities_category_key_ws (SCHEMA_VERSION 4). The index is created in the gated migration block after the workspace_hash ALTER — on a legacy DB the ungated DDL runs before that column exists. Create-then-drop so uniqueness is never unenforced; safe on populated DBs because the old constraint was strictly tighter. - get_entity(category, key) picks deterministically when the same key now exists in several workspaces (global '' first, then lexicographic) instead of whichever row SQLite visited. - forget()'s FTS cleanup uses IN, not a scalar subquery — it archives every workspace's row for (category, key), so it must clean FTS for all of them. Single-workspace vaults (workspace_hash = '' everywhere) behave identically. Tests: remember_identity_is_workspace_scoped_so_share_copies_instead_of_moving, migrates_unique_index_to_workspace_scoped_identity. Suite: 161 passed / 0 failed. Fixes #339 Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent f132a13 commit f74a5e6

2 files changed

Lines changed: 147 additions & 10 deletions

File tree

src/db.rs

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,10 +1495,16 @@ impl Database {
14951495
entity.body_json.clone()
14961496
};
14971497

1498+
// Identity is (category, key, workspace_hash) — #339. Matching on
1499+
// (category, key) alone made a cross-workspace write with a colliding
1500+
// key take the UPDATE path and overwrite the other workspace's row in
1501+
// place: mimir_share's "clone into target workspace" was actually a
1502+
// destructive MOVE of the source entity. Single-workspace vaults
1503+
// (workspace_hash = "" everywhere) are unaffected.
14981504
let existing_id: Option<String> = conn
14991505
.query_row(
1500-
"SELECT id FROM entities WHERE category = ?1 AND key = ?2",
1501-
params![entity.category, entity.key],
1506+
"SELECT id FROM entities WHERE category = ?1 AND key = ?2 AND workspace_hash = ?3",
1507+
params![entity.category, entity.key, entity.workspace_hash],
15021508
|r| r.get(0),
15031509
)
15041510
.ok();
@@ -2420,8 +2426,14 @@ impl Database {
24202426
created_at_unix_ms, last_accessed_unix_ms, NULL as embedding,
24212427
always_on, certainty, workspace_hash, agent_id, visibility,
24222428
follow_count, miss_count, follow_rate, efficacy_status
2423-
FROM entities WHERE category = ?1 AND key = ?2 LIMIT 1",
2429+
FROM entities WHERE category = ?1 AND key = ?2
2430+
ORDER BY workspace_hash ASC, id ASC LIMIT 1",
24242431
)?;
2432+
// With workspace-scoped identity (#339) the same (category, key) can
2433+
// legitimately exist in several workspaces. Callers without a
2434+
// workspace in hand get a DETERMINISTIC pick: the global ('') row
2435+
// first, then the lexicographically-first workspace — not whichever
2436+
// row SQLite happened to visit.
24252437

24262438
let mut rows = stmt.query_map(params![category, key], |row| {
24272439
entity_from_row(row, self.encryption.as_ref())
@@ -2450,10 +2462,13 @@ impl Database {
24502462
WHERE category = ?3 AND key = ?4 AND archived = 0",
24512463
params![reason, now_ms(), category, key],
24522464
)?;
2453-
// Clean FTS5 index for archived entity
2465+
// Clean FTS5 index for archived entity/entities. IN, not `=`: forget
2466+
// archives every row matching (category, key) — which since #339 can
2467+
// be one per workspace — so the FTS cleanup must cover all of them,
2468+
// not just the single rowid the old scalar subquery returned.
24542469
if affected > 0 {
24552470
let _ = tx.execute(
2456-
"DELETE FROM entities_fts WHERE rowid = (SELECT rowid FROM entities WHERE category = ?1 AND key = ?2)",
2471+
"DELETE FROM entities_fts WHERE rowid IN (SELECT rowid FROM entities WHERE category = ?1 AND key = ?2 AND archived = 1)",
24572472
params![category, key],
24582473
);
24592474
}
@@ -6966,6 +6981,52 @@ mod tests {
69666981
let _ = fs::remove_file(&path);
69676982
}
69686983

6984+
#[test]
6985+
fn remember_identity_is_workspace_scoped_so_share_copies_instead_of_moving() {
6986+
// #339: identity was (category, key), so handle_share's clone into a
6987+
// target workspace matched the SOURCE row and updated it in place —
6988+
// a destructive move (source workspace lost the entity, fresh id
6989+
// discarded, stats clobbered). Identity is now
6990+
// (category, key, workspace_hash).
6991+
let (db, path) = temp_db();
6992+
6993+
let mut a = make_entity("id-a", "note", "shared-key", r#"{"n":1}"#);
6994+
a.workspace_hash = "ws-alpha".to_string();
6995+
db.remember(&a).unwrap();
6996+
6997+
// Simulate handle_share's clone into another workspace: same
6998+
// (category, key), different workspace, fresh id.
6999+
let mut clone = a.clone();
7000+
clone.workspace_hash = "ws-beta".to_string();
7001+
clone.id = "mem-fresh".to_string();
7002+
let (id, action) = db.remember(&clone).unwrap();
7003+
assert_eq!(action, "created", "cross-workspace clone must INSERT, not update the source");
7004+
assert_eq!(id, "mem-fresh");
7005+
7006+
// Source untouched in its home workspace; copy exists in the target.
7007+
let src = db.get_entity_by_id_public("id-a").unwrap().unwrap();
7008+
assert_eq!(src.workspace_hash, "ws-alpha", "source must not move");
7009+
let cp = db.get_entity_by_id_public("mem-fresh").unwrap().unwrap();
7010+
assert_eq!(cp.workspace_hash, "ws-beta");
7011+
7012+
// Same-workspace re-remember still takes the idempotent update path.
7013+
let (id2, action2) = db.remember(&a).unwrap();
7014+
assert_eq!(id2, "id-a");
7015+
assert_eq!(action2, "updated");
7016+
7017+
// get_entity without a workspace in hand picks deterministically
7018+
// (lexicographically-first workspace when no global '' row exists).
7019+
let picked = db.get_entity("note", "shared-key").unwrap().unwrap();
7020+
assert_eq!(picked.workspace_hash, "ws-alpha");
7021+
7022+
// forget archives every workspace's copy and cleans FTS for all.
7023+
assert!(db.forget("note", "shared-key", "test cleanup").unwrap());
7024+
assert!(db.get_entity_by_id_public("id-a").unwrap().unwrap().archived);
7025+
assert!(db.get_entity_by_id_public("mem-fresh").unwrap().unwrap().archived);
7026+
7027+
let _ = fs::remove_file(&path);
7028+
}
7029+
69697030
#[test]
69707031
fn context_scopes_to_workspace_including_always_on() {
69717032
// context() feeds mimir_context and `prepare` — a scoped call must not

src/schema.rs

Lines changed: 81 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@ CREATE TABLE IF NOT EXISTS entities (
5353
efficacy_status TEXT DEFAULT 'unverified' -- 'unverified' | 'useful' | 'dead'
5454
);
5555
56-
CREATE UNIQUE INDEX IF NOT EXISTS idx_entities_category_key ON entities(category, key);
56+
-- Identity index: (category, key, workspace_hash) — #339. Created in
57+
-- initialize_schema's gated block, NOT here: on a legacy DB this ungated DDL
58+
-- runs before the ALTER that adds workspace_hash, so an index referencing the
59+
-- column here would fail the whole batch.
5760
5861
-- Recall ranking index: lets the browse path (WHERE archived=0 [+ residual
5962
-- filters] ORDER BY retrieval_count DESC, last_accessed_unix_ms DESC LIMIT k)
@@ -89,9 +92,9 @@ CREATE TABLE IF NOT EXISTS state (
8992
);
9093
9194
-- Superseded fact versions (v2.4.0 — bi-temporal facts). When a remember()
92-
-- overwrites an existing (category,key) with new content, the prior row is
93-
-- snapshotted here with invalidated_at set, so live reads stay one-row-per-key
94-
-- (entities + its UNIQUE(category,key) are untouched) while history is kept for
95+
-- overwrites an existing (category,key,workspace_hash) with new content, the prior
96+
-- row is snapshotted here with invalidated_at set, so live reads stay one-row-per-key
97+
-- (entities + its UNIQUE(category,key,workspace_hash) are untouched) while history is kept for
9598
-- as-of / time-travel queries. A version was live during
9699
-- [recorded_at_unix_ms, invalidated_at_unix_ms). superseded_by points at the
97100
-- live entity id that replaced it. body_json carries the same encryption as
@@ -137,7 +140,7 @@ CREATE INDEX IF NOT EXISTS idx_entity_history_catkey ON entity_history(category,
137140
/// the column-add migrations below have been applied. Bump this whenever you add
138141
/// a new ALTER-probe migration in `initialize_schema`, or existing databases
139142
/// (already at the previous level) will skip it.
140-
const SCHEMA_VERSION: i64 = 3;
143+
const SCHEMA_VERSION: i64 = 4;
141144

142145
/// Initialize the v0.2.0 schema on a fresh database.
143146
pub fn initialize_schema(conn: &Connection) -> Result<(), Box<dyn std::error::Error>> {
@@ -262,6 +265,20 @@ pub fn initialize_schema(conn: &Connection) -> Result<(), Box<dyn std::error::Er
262265
ON entities(invalidated_at_unix_ms);",
263266
)?;
264267

268+
// v4 (#339): identity becomes (category, key, workspace_hash). A plain
269+
// (category, key) uniqueness made cross-workspace key collisions
270+
// unstorable, which is what forced mimir_share's "copy into workspace" to
271+
// clobber the source row. Created here (after the workspace_hash ALTER,
272+
// like idx_entities_invalidated) rather than in the ungated DDL. Safe on
273+
// a populated DB — the old constraint was strictly tighter, so no
274+
// existing rows can collide. Create-then-drop, so uniqueness is never
275+
// unenforced.
276+
conn.execute_batch(
277+
"CREATE UNIQUE INDEX IF NOT EXISTS idx_entities_category_key_ws \
278+
ON entities(category, key, workspace_hash); \
279+
DROP INDEX IF EXISTS idx_entities_category_key;",
280+
)?;
281+
265282
// Stamp the migration level so subsequent opens skip the probe block above.
266283
conn.pragma_update(None, "user_version", SCHEMA_VERSION)?;
267284

@@ -604,6 +621,65 @@ mod tests {
604621
assert_eq!(v, SCHEMA_VERSION);
605622
}
606623

624+
#[test]
625+
fn migrates_unique_index_to_workspace_scoped_identity() {
626+
// v4 (#339): a v3-era DB with the two-column unique index and existing
627+
// rows must come out with the three-column index, the old index
628+
// dropped, and cross-workspace key collisions storable.
629+
let (conn, _path) = temp_db();
630+
initialize_schema(&conn).expect("fresh init");
631+
// Rewind to the v3 state: old index back, new index gone, version 3.
632+
conn.execute_batch(
633+
"DROP INDEX IF EXISTS idx_entities_category_key_ws;
634+
CREATE UNIQUE INDEX idx_entities_category_key ON entities(category, key);
635+
PRAGMA user_version = 3;",
636+
)
637+
.unwrap();
638+
conn.execute(
639+
"INSERT INTO entities (id, category, key, body_json, workspace_hash, created_at_unix_ms, last_accessed_unix_ms)
640+
VALUES ('mig-a', 'note', 'k', '{}', 'ws-alpha', 0, 0)",
641+
[],
642+
)
643+
.unwrap();
644+
645+
initialize_schema(&conn).expect("v3 -> v4 migration");
646+
647+
let old_idx: i64 = conn
648+
.query_row(
649+
"SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_entities_category_key'",
650+
[],
651+
|r| r.get(0),
652+
)
653+
.unwrap();
654+
assert_eq!(old_idx, 0, "old two-column unique index must be dropped");
655+
let new_idx: i64 = conn
656+
.query_row(
657+
"SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_entities_category_key_ws'",
658+
[],
659+
|r| r.get(0),
660+
)
661+
.unwrap();
662+
assert_eq!(new_idx, 1, "workspace-scoped unique index must exist");
663+
664+
// Same (category, key) in a different workspace now inserts…
665+
conn.execute(
666+
"INSERT INTO entities (id, category, key, body_json, workspace_hash, created_at_unix_ms, last_accessed_unix_ms)
667+
VALUES ('mig-b', 'note', 'k', '{}', 'ws-beta', 0, 0)",
668+
[],
669+
)
670+
.expect("cross-workspace key collision must be storable after v4");
671+
// …while a true duplicate in the SAME workspace is still rejected.
672+
assert!(
673+
conn.execute(
674+
"INSERT INTO entities (id, category, key, body_json, workspace_hash, created_at_unix_ms, last_accessed_unix_ms)
675+
VALUES ('mig-c', 'note', 'k', '{}', 'ws-alpha', 0, 0)",
676+
[],
677+
)
678+
.is_err(),
679+
"same-workspace duplicate must still violate uniqueness"
680+
);
681+
}
682+
607683
#[test]
608684
fn adds_bitemporal_columns_and_backfills_recorded_at() {
609685
// A legacy DB (no bi-temporal columns) with one row predating the migration.

0 commit comments

Comments
 (0)