Skip to content

Commit 98556c4

Browse files
tcconnallyclaude
andauthored
feat(schema): bi-temporal fact columns + migration (D1) (#249)
First step toward bi-temporal facts (#247 follow-on): give entities two time axes plus a supersession link so a fact can be retired without deleting history. This PR is purely additive plumbing — schema + migration only, no read/write behavior change. The write-path supersession (D2) and as-of recall (D3) build on these columns. Adds to entities: - valid_from_unix_ms / valid_to_unix_ms — valid time (when true in the world) - recorded_at_unix_ms / invalidated_at_unix_ms — transaction time (when Mimir knew it / retired it) - supersedes / superseded_by — supersession links Migration (user_version 1 -> 2): gated ALTER probes add each column on existing DBs, recorded_at is backfilled to created_at, and idx_entities_invalidated is created after the column exists (it can't live in the ungated DDL, which runs before the ALTERs on a migrating DB). All NULL/'' = "valid since creation, currently true, never superseded" — existing rows keep their exact meaning. Tests: legacy-DB migration adds all six columns + backfills recorded_at and leaves rows live/unbounded; fresh DB has the columns and the live-fact index. Full suite: 75 passing. Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 33f3513 commit 98556c4

1 file changed

Lines changed: 127 additions & 2 deletions

File tree

src/schema.rs

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,17 @@ CREATE TABLE IF NOT EXISTS entities (
3030
certainty REAL DEFAULT 0.5,
3131
workspace_hash TEXT DEFAULT '',
3232
agent_id TEXT DEFAULT '',
33-
visibility TEXT DEFAULT 'workspace'
33+
visibility TEXT DEFAULT 'workspace',
34+
-- Bi-temporal facts (v2.4.0). Two time axes plus a supersession link, so a
35+
-- fact can be retired without deleting history. All NULL/'' here means
36+
-- \"valid since creation, currently true, never superseded\" — the behavior
37+
-- before bi-temporal support, so existing rows need no interpretation change.
38+
valid_from_unix_ms INTEGER, -- when the fact became true in the world (NULL = since creation)
39+
valid_to_unix_ms INTEGER, -- when it stopped being true (NULL = still true)
40+
recorded_at_unix_ms INTEGER, -- transaction time: when Mimir first knew it (backfilled = created_at)
41+
invalidated_at_unix_ms INTEGER, -- transaction time: when Mimir retired it (NULL = live)
42+
supersedes TEXT DEFAULT '', -- id of the entity this one replaced
43+
superseded_by TEXT DEFAULT '' -- id of the entity that replaced this one
3444
);
3545
3646
CREATE UNIQUE INDEX IF NOT EXISTS idx_entities_category_key ON entities(category, key);
@@ -73,7 +83,7 @@ CREATE TABLE IF NOT EXISTS state (
7383
/// the column-add migrations below have been applied. Bump this whenever you add
7484
/// a new ALTER-probe migration in `initialize_schema`, or existing databases
7585
/// (already at the previous level) will skip it.
76-
const SCHEMA_VERSION: i64 = 1;
86+
const SCHEMA_VERSION: i64 = 2;
7787

7888
/// Initialize the v0.2.0 schema on a fresh database.
7989
pub fn initialize_schema(conn: &Connection) -> Result<(), Box<dyn std::error::Error>> {
@@ -146,6 +156,42 @@ pub fn initialize_schema(conn: &Connection) -> Result<(), Box<dyn std::error::Er
146156
conn.execute_batch("ALTER TABLE entities ADD COLUMN visibility TEXT DEFAULT 'workspace';")?;
147157
}
148158

159+
// Add bi-temporal columns (v2.4.0 — bi-temporal facts). Valid time
160+
// (valid_from/valid_to), transaction time (recorded_at/invalidated_at), and
161+
// supersession links. All additive; existing rows keep their meaning.
162+
if conn.prepare("SELECT valid_from_unix_ms FROM entities LIMIT 1").is_err() {
163+
conn.execute_batch("ALTER TABLE entities ADD COLUMN valid_from_unix_ms INTEGER;")?;
164+
}
165+
if conn.prepare("SELECT valid_to_unix_ms FROM entities LIMIT 1").is_err() {
166+
conn.execute_batch("ALTER TABLE entities ADD COLUMN valid_to_unix_ms INTEGER;")?;
167+
}
168+
if conn.prepare("SELECT recorded_at_unix_ms FROM entities LIMIT 1").is_err() {
169+
conn.execute_batch("ALTER TABLE entities ADD COLUMN recorded_at_unix_ms INTEGER;")?;
170+
}
171+
if conn.prepare("SELECT invalidated_at_unix_ms FROM entities LIMIT 1").is_err() {
172+
conn.execute_batch("ALTER TABLE entities ADD COLUMN invalidated_at_unix_ms INTEGER;")?;
173+
}
174+
if conn.prepare("SELECT supersedes FROM entities LIMIT 1").is_err() {
175+
conn.execute_batch("ALTER TABLE entities ADD COLUMN supersedes TEXT DEFAULT '';")?;
176+
}
177+
if conn.prepare("SELECT superseded_by FROM entities LIMIT 1").is_err() {
178+
conn.execute_batch("ALTER TABLE entities ADD COLUMN superseded_by TEXT DEFAULT '';")?;
179+
}
180+
// Backfill transaction time for pre-existing rows: a fact's recorded_at is
181+
// when Mimir first stored it, i.e. its created_at. (No-op on a fresh DB.)
182+
conn.execute_batch(
183+
"UPDATE entities SET recorded_at_unix_ms = created_at_unix_ms \
184+
WHERE recorded_at_unix_ms IS NULL;",
185+
)?;
186+
187+
// Live-fact filter index. Created here (not in the ungated DDL) because it
188+
// references invalidated_at_unix_ms, which on a migrating DB only exists
189+
// after the ALTER above. NULL = live; recall will exclude non-NULL rows.
190+
conn.execute_batch(
191+
"CREATE INDEX IF NOT EXISTS idx_entities_invalidated \
192+
ON entities(invalidated_at_unix_ms);",
193+
)?;
194+
149195
// Stamp the migration level so subsequent opens skip the probe block above.
150196
conn.pragma_update(None, "user_version", SCHEMA_VERSION)?;
151197

@@ -488,6 +534,85 @@ mod tests {
488534
assert_eq!(v, SCHEMA_VERSION);
489535
}
490536

537+
#[test]
538+
fn adds_bitemporal_columns_and_backfills_recorded_at() {
539+
// A legacy DB (no bi-temporal columns) with one row predating the migration.
540+
let (conn, _path) = temp_db();
541+
conn.execute_batch(
542+
"CREATE TABLE entities (
543+
id TEXT PRIMARY KEY, category TEXT NOT NULL DEFAULT 'general', key TEXT NOT NULL,
544+
body_json TEXT NOT NULL DEFAULT '{}', archived INTEGER DEFAULT 0,
545+
retrieval_count INTEGER DEFAULT 0,
546+
created_at_unix_ms INTEGER NOT NULL, last_accessed_unix_ms INTEGER NOT NULL
547+
);
548+
CREATE TABLE journal (
549+
id TEXT PRIMARY KEY, entity_id TEXT DEFAULT '',
550+
created_at_unix_ms INTEGER NOT NULL
551+
);",
552+
)
553+
.unwrap();
554+
conn.execute(
555+
"INSERT INTO entities (id, category, key, body_json, created_at_unix_ms, last_accessed_unix_ms)
556+
VALUES ('e1', 'general', 'k', '{}', 111, 222)",
557+
[],
558+
)
559+
.unwrap();
560+
assert!(
561+
conn.prepare("SELECT recorded_at_unix_ms FROM entities LIMIT 1").is_err(),
562+
"precondition: legacy table lacks the bi-temporal columns"
563+
);
564+
565+
initialize_schema(&conn).expect("migrate legacy db to bi-temporal schema");
566+
567+
// All six bi-temporal columns must now exist.
568+
for col in [
569+
"valid_from_unix_ms",
570+
"valid_to_unix_ms",
571+
"recorded_at_unix_ms",
572+
"invalidated_at_unix_ms",
573+
"supersedes",
574+
"superseded_by",
575+
] {
576+
assert!(
577+
conn.prepare(&format!("SELECT {col} FROM entities LIMIT 1")).is_ok(),
578+
"column {col} must be added during migration"
579+
);
580+
}
581+
582+
// recorded_at backfilled to created_at; the row is live (not invalidated)
583+
// and unbounded in valid time — i.e. unchanged in meaning.
584+
let recorded: i64 = conn
585+
.query_row("SELECT recorded_at_unix_ms FROM entities WHERE id='e1'", [], |r| r.get(0))
586+
.unwrap();
587+
assert_eq!(recorded, 111, "recorded_at must backfill to created_at");
588+
let invalidated: Option<i64> = conn
589+
.query_row("SELECT invalidated_at_unix_ms FROM entities WHERE id='e1'", [], |r| r.get(0))
590+
.unwrap();
591+
assert_eq!(invalidated, None, "existing rows must be live (not invalidated)");
592+
let valid_from: Option<i64> = conn
593+
.query_row("SELECT valid_from_unix_ms FROM entities WHERE id='e1'", [], |r| r.get(0))
594+
.unwrap();
595+
assert_eq!(valid_from, None, "existing rows must be valid since creation");
596+
597+
let v: i64 = conn.query_row("PRAGMA user_version", [], |r| r.get(0)).unwrap();
598+
assert_eq!(v, SCHEMA_VERSION);
599+
}
600+
601+
#[test]
602+
fn fresh_db_has_bitemporal_columns_and_live_index() {
603+
let (conn, _path) = temp_db();
604+
initialize_schema(&conn).expect("init schema");
605+
assert!(conn.prepare("SELECT invalidated_at_unix_ms FROM entities LIMIT 1").is_ok());
606+
let idx: i64 = conn
607+
.query_row(
608+
"SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_entities_invalidated'",
609+
[],
610+
|r| r.get(0),
611+
)
612+
.unwrap();
613+
assert_eq!(idx, 1, "idx_entities_invalidated should be created on a fresh DB");
614+
}
615+
491616
#[test]
492617
fn creates_recall_ranking_index() {
493618
let (conn, _path) = temp_db();

0 commit comments

Comments
 (0)