Skip to content

Commit c88af7b

Browse files
tcconnallyclaude
andauthored
perf: gate schema migration probes behind PRAGMA user_version (#208) (#216)
initialize_schema ran up to eight throwaway `SELECT col ... LIMIT 1` probes on every Database::open to detect missing columns — and open is called several times per process, so on a fully-migrated DB this is pure wasted work every boot. Gate the probe block on PRAGMA user_version: read it after the (still-ungated, all-IF-NOT-EXISTS) DDL, return early when already at SCHEMA_VERSION, and stamp the version once the column-add migrations have run. The DDL stays ungated so it still back-fills newer objects (e.g. idx_entities_recall) on older databases. Pre-existing databases sit at user_version=0, so they pay the probe pass exactly once more (idempotent — columns already exist, no ALTERs fire) and are then stamped; every subsequent open skips the block. A documented SCHEMA_VERSION constant flags that adding a future probe requires a bump. Addresses #208 item 4 (item 2, ONNX session cache, remains blocked by #212). Tests: stamps_user_version_and_gates_migration_probes (fresh init stamps the version; re-init is a data-preserving no-op) and migrates_pre_versioned_db_ missing_a_column (a user_version=0 legacy DB missing `visibility` still gets the column added, then stamped). 41 passed / 0 failed (MSVC), build warning-free. Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent c9088e5 commit c88af7b

1 file changed

Lines changed: 88 additions & 0 deletions

File tree

src/schema.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,27 @@ CREATE TABLE IF NOT EXISTS state (
6969
);
7070
";
7171

72+
/// Current schema migration level, stamped into `PRAGMA user_version` once all
73+
/// the column-add migrations below have been applied. Bump this whenever you add
74+
/// a new ALTER-probe migration in `initialize_schema`, or existing databases
75+
/// (already at the previous level) will skip it.
76+
const SCHEMA_VERSION: i64 = 1;
77+
7278
/// Initialize the v0.2.0 schema on a fresh database.
7379
pub fn initialize_schema(conn: &Connection) -> Result<(), Box<dyn std::error::Error>> {
80+
// DDL is all IF NOT EXISTS, so it stays ungated: it both creates a fresh DB
81+
// and back-fills newer objects (e.g. idx_entities_recall) on older ones.
7482
conn.execute_batch(DDL_V0_2_0)?;
7583

84+
// The column-add migrations below each prepare a throwaway `SELECT col LIMIT 1`
85+
// probe. `open` runs several times per process, so on a fully-migrated DB this
86+
// is pure wasted work. Gate it on PRAGMA user_version: run once when behind,
87+
// then stamp current and skip on every subsequent open. (#208)
88+
let user_version: i64 = conn.query_row("PRAGMA user_version", [], |r| r.get(0))?;
89+
if user_version >= SCHEMA_VERSION {
90+
return Ok(());
91+
}
92+
7693
// Add embedding column if it doesn't exist (migration from v0.2.0)
7794
let has_embedding: bool = conn
7895
.prepare("SELECT embedding FROM entities LIMIT 1")
@@ -129,6 +146,9 @@ pub fn initialize_schema(conn: &Connection) -> Result<(), Box<dyn std::error::Er
129146
conn.execute_batch("ALTER TABLE entities ADD COLUMN visibility TEXT DEFAULT 'workspace';")?;
130147
}
131148

149+
// Stamp the migration level so subsequent opens skip the probe block above.
150+
conn.pragma_update(None, "user_version", SCHEMA_VERSION)?;
151+
132152
Ok(())
133153
}
134154

@@ -400,6 +420,74 @@ mod tests {
400420
assert!(is_v0_2_0(&conn).unwrap());
401421
}
402422

423+
#[test]
424+
fn stamps_user_version_and_gates_migration_probes() {
425+
// Fresh init stamps the current schema version.
426+
let (conn, _path) = temp_db();
427+
initialize_schema(&conn).expect("init schema");
428+
let v: i64 = conn
429+
.query_row("PRAGMA user_version", [], |r| r.get(0))
430+
.unwrap();
431+
assert_eq!(v, SCHEMA_VERSION, "fresh init must stamp the schema version");
432+
433+
// Re-running on an already-current DB is a no-op that preserves data and
434+
// leaves the version untouched (the probe block is skipped).
435+
conn.execute(
436+
"INSERT INTO entities (id, category, key, body_json, created_at_unix_ms, last_accessed_unix_ms)
437+
VALUES ('v-test', 'insight', 'k', '{}', 0, 0)",
438+
[],
439+
)
440+
.unwrap();
441+
initialize_schema(&conn).expect("re-init should be a no-op");
442+
let v2: i64 = conn
443+
.query_row("PRAGMA user_version", [], |r| r.get(0))
444+
.unwrap();
445+
assert_eq!(v2, SCHEMA_VERSION);
446+
let count: i64 = conn
447+
.query_row("SELECT COUNT(*) FROM entities WHERE id='v-test'", [], |r| r.get(0))
448+
.unwrap();
449+
assert_eq!(count, 1, "re-init must not drop data");
450+
}
451+
452+
#[test]
453+
fn migrates_pre_versioned_db_missing_a_column() {
454+
// Simulate a legacy DB at user_version=0 that predates the visibility
455+
// column: the gate must still run the probes and add the column, then
456+
// stamp the version so later opens skip.
457+
let (conn, _path) = temp_db();
458+
// Base v0.2.0 columns the DDL's indexes reference, but WITHOUT the
459+
// later ALTER-added columns (embedding/always_on/certainty/
460+
// workspace_hash/agent_id/visibility, journal agent_id/audit_hash).
461+
conn.execute_batch(
462+
"CREATE TABLE entities (
463+
id TEXT PRIMARY KEY, category TEXT NOT NULL DEFAULT 'general', key TEXT NOT NULL,
464+
body_json TEXT NOT NULL DEFAULT '{}', archived INTEGER DEFAULT 0,
465+
retrieval_count INTEGER DEFAULT 0,
466+
created_at_unix_ms INTEGER NOT NULL, last_accessed_unix_ms INTEGER NOT NULL
467+
);
468+
CREATE TABLE journal (
469+
id TEXT PRIMARY KEY, entity_id TEXT DEFAULT '',
470+
created_at_unix_ms INTEGER NOT NULL
471+
);",
472+
)
473+
.unwrap();
474+
assert!(
475+
conn.prepare("SELECT visibility FROM entities LIMIT 1").is_err(),
476+
"precondition: legacy table lacks visibility"
477+
);
478+
479+
initialize_schema(&conn).expect("migrate legacy db");
480+
481+
assert!(
482+
conn.prepare("SELECT visibility FROM entities LIMIT 1").is_ok(),
483+
"visibility column must be added during gated migration"
484+
);
485+
let v: i64 = conn
486+
.query_row("PRAGMA user_version", [], |r| r.get(0))
487+
.unwrap();
488+
assert_eq!(v, SCHEMA_VERSION);
489+
}
490+
403491
#[test]
404492
fn creates_recall_ranking_index() {
405493
let (conn, _path) = temp_db();

0 commit comments

Comments
 (0)