Skip to content

Commit b6d9f53

Browse files
committed
fix: resolve 10 Pass 2 code review issues (#38 through #47)
- #38 (HIGH): Wrap decay_tick in explicit transaction to avoid per-statement fsync - #39 (MEDIUM): Add skip_side_effects flag — context() no longer triggers recall writes - #40 (MEDIUM): Skip FTS5 when include_archived=true — archived entities have no FTS5 entries - #41 (MEDIUM): Fix vault_import frontmatter parser — line-based, handles --- in body - #42 (LOW): traverse_chain returns proper Err instead of error-as-data Ok(json) - #43 (LOW): _traverse_links logs DB errors instead of silently skipping - #44 (LOW): Extract entity_from_row() helper — eliminates 100 lines of duplication - #45 (LOW): Add max_nodes param to traverse for breadth limiting (default 100) - #46 (LOW): Enforce 64KB per-field limit on journal entries - #47 (LOW): vault_export uses whitelist charset sanitization Closes #38, Closes #39, Closes #40, Closes #41, Closes #42, Closes #43, Closes #44, Closes #45, Closes #46, Closes #47
1 parent 342f43b commit b6d9f53

4 files changed

Lines changed: 142 additions & 119 deletions

File tree

src/db.rs

Lines changed: 114 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ impl Database {
110110
let mut auto_archived = 0i64;
111111
let now_val = now;
112112

113+
// Wrap in explicit transaction to avoid per-statement fsync
114+
let _ = self.conn.execute_batch("BEGIN");
115+
113116
for row in rows {
114117
let (id, last_access) = row?;
115118
let new_decay = Self::compute_decay(last_access, now_val);
@@ -134,6 +137,8 @@ impl Database {
134137
}
135138
}
136139

140+
let _ = self.conn.execute_batch("COMMIT");
141+
137142
Ok(DecayReport {
138143
entities_checked: total,
139144
entities_updated: updated,
@@ -396,10 +401,15 @@ impl Database {
396401
param_values.push(Box::new(format!("%{}%", word.replace('\'', "''"))));
397402
}
398403

399-
conditions.push(format!(
400-
"((rowid IN (SELECT rowid FROM entities_fts WHERE entities_fts MATCH ?1)) OR {})",
401-
like_clauses.join(" OR ")
402-
));
404+
// When include_archived, skip FTS5 — archived entities have no FTS5 entries
405+
if params.include_archived {
406+
conditions.push(like_clauses.join(" OR "));
407+
} else {
408+
conditions.push(format!(
409+
"((rowid IN (SELECT rowid FROM entities_fts WHERE entities_fts MATCH ?1)) OR {})",
410+
like_clauses.join(" OR ")
411+
));
412+
}
403413
}
404414
}
405415

@@ -471,51 +481,24 @@ impl Database {
471481

472482
let mut stmt = self.conn.prepare(&sql)?;
473483
let rows = stmt.query_map(param_refs.as_slice(), |row| {
474-
let tags_str: String = row.get::<_, String>(6).unwrap_or_else(|_| "[]".to_string());
475-
let links_str: String = row
476-
.get::<_, String>(13)
477-
.unwrap_or_else(|_| "[]".to_string());
478-
479-
let tags: Vec<String> = serde_json::from_str(&tags_str).unwrap_or_default();
480-
let links: Vec<MemoryLink> = serde_json::from_str(&links_str).unwrap_or_default();
481-
let archived: i32 = row.get(11).unwrap_or(0);
482-
let verified: i32 = row.get(14).unwrap_or(0);
483-
484-
Ok(Entity {
485-
id: row.get(0)?,
486-
category: row.get(1)?,
487-
key: row.get(2)?,
488-
body_json: row.get(3)?,
489-
status: row.get(4)?,
490-
entity_type: row.get(5)?,
491-
tags,
492-
decay_score: row.get(7)?,
493-
retrieval_count: row.get(8)?,
494-
layer: row.get(9)?,
495-
topic_path: row.get(10)?,
496-
archived: archived != 0,
497-
archive_reason: row.get(12)?,
498-
links,
499-
verified: verified != 0,
500-
source: row.get(15)?,
501-
created_at_unix_ms: row.get(16)?,
502-
last_accessed_unix_ms: row.get(17)?,
503-
})
484+
entity_from_row(row)
504485
})?;
505486

506487
let mut items = Vec::new();
507488
for row in rows {
508489
let entity = row?;
509490
// Update retrieval count, recency, decay boost, and layer
510-
let new_count = entity.retrieval_count + 1;
511-
let boosted_decay = Self::boost_decay(entity.decay_score);
512-
let new_layer = Self::compute_layer(new_count);
513-
let _ = self.conn.execute(
514-
"UPDATE entities SET retrieval_count = ?1,
515-
last_accessed_unix_ms = ?2, decay_score = ?3, layer = ?4
516-
WHERE id = ?5",
517-
params![new_count, now_ms(), boosted_decay, new_layer, entity.id],
518-
);
491+
if !params.skip_side_effects {
492+
let new_count = entity.retrieval_count + 1;
493+
let boosted_decay = Self::boost_decay(entity.decay_score);
494+
let new_layer = Self::compute_layer(new_count);
495+
let _ = self.conn.execute(
496+
"UPDATE entities SET retrieval_count = ?1,
497+
last_accessed_unix_ms = ?2, decay_score = ?3, layer = ?4
498+
WHERE id = ?5",
499+
params![new_count, now_ms(), boosted_decay, new_layer, entity.id],
500+
);
501+
}
519502
items.push(entity);
520503
}
521504

@@ -538,35 +521,7 @@ impl Database {
538521
)?;
539522

540523
let mut rows = stmt.query_map(params![category, key], |row| {
541-
let tags_str: String = row.get::<_, String>(6).unwrap_or_else(|_| "[]".to_string());
542-
let links_str: String = row
543-
.get::<_, String>(13)
544-
.unwrap_or_else(|_| "[]".to_string());
545-
let tags: Vec<String> = serde_json::from_str(&tags_str).unwrap_or_default();
546-
let links: Vec<MemoryLink> = serde_json::from_str(&links_str).unwrap_or_default();
547-
let archived: i32 = row.get(11).unwrap_or(0);
548-
let verified: i32 = row.get(14).unwrap_or(0);
549-
550-
Ok(Entity {
551-
id: row.get(0)?,
552-
category: row.get(1)?,
553-
key: row.get(2)?,
554-
body_json: row.get(3)?,
555-
status: row.get(4)?,
556-
entity_type: row.get(5)?,
557-
tags,
558-
decay_score: row.get(7)?,
559-
retrieval_count: row.get(8)?,
560-
layer: row.get(9)?,
561-
topic_path: row.get(10)?,
562-
archived: archived != 0,
563-
archive_reason: row.get(12)?,
564-
links,
565-
verified: verified != 0,
566-
source: row.get(15)?,
567-
created_at_unix_ms: row.get(16)?,
568-
last_accessed_unix_ms: row.get(17)?,
569-
})
524+
entity_from_row(row)
570525
})?;
571526

572527
if let Some(row) = rows.next() {
@@ -908,11 +863,10 @@ impl Database {
908863
category: &str,
909864
key: &str,
910865
max_depth: i64,
866+
max_nodes: i64,
911867
) -> Result<serde_json::Value, Box<dyn std::error::Error>> {
912-
let root = match self.get_entity(category, key)? {
913-
Some(e) => e,
914-
None => return Ok(serde_json::json!({"error": "entity not found"})),
915-
};
868+
let root = self.get_entity(category, key)?
869+
.ok_or_else(|| format!("entity not found: {}/{}", category, key))?;
916870

917871
// Get root links
918872
let links_json: String = self
@@ -933,7 +887,7 @@ impl Database {
933887
let mut traversed = Vec::new();
934888

935889
visited.insert(root.id.clone());
936-
self._traverse_links(&root.id, &mut traversed, &mut visited, max_depth, 0);
890+
self._traverse_links(&root.id, &mut traversed, &mut visited, max_depth, max_nodes, 0);
937891

938892
let chain = serde_json::json!({
939893
"entity": {
@@ -955,9 +909,10 @@ impl Database {
955909
traversed: &mut Vec<serde_json::Value>,
956910
visited: &mut std::collections::HashSet<String>,
957911
max_depth: i64,
912+
max_nodes: i64,
958913
current_depth: i64,
959914
) {
960-
if current_depth >= max_depth {
915+
if current_depth >= max_depth || traversed.len() as i64 >= max_nodes {
961916
return;
962917
}
963918

@@ -977,7 +932,8 @@ impl Database {
977932
continue;
978933
}
979934

980-
if let Ok(Some(entity)) = self.get_entity_by_id(&link.target_id) {
935+
match self.get_entity_by_id(&link.target_id) {
936+
Ok(Some(entity)) => {
981937
visited.insert(link.target_id.clone());
982938

983939
// Get this entity's outgoing links
@@ -1007,7 +963,14 @@ impl Database {
1007963

1008964
traversed.push(node.clone());
1009965

1010-
self._traverse_links(&entity.id, traversed, visited, max_depth, current_depth + 1);
966+
self._traverse_links(&entity.id, traversed, visited, max_depth, max_nodes, current_depth + 1);
967+
}
968+
Ok(None) => {
969+
// Dangling link — target entity no longer exists
970+
}
971+
Err(e) => {
972+
eprintln!("mimir: traverse error reading entity {}: {}", link.target_id, e);
973+
}
1011974
}
1012975
}
1013976
}
@@ -1022,34 +985,7 @@ impl Database {
1022985
FROM entities WHERE id = ?1",
1023986
)?;
1024987
let mut rows = stmt.query_map(params![id], |row| {
1025-
let tags_str: String = row.get::<_, String>(6).unwrap_or_else(|_| "[]".to_string());
1026-
let links_str: String = row
1027-
.get::<_, String>(13)
1028-
.unwrap_or_else(|_| "[]".to_string());
1029-
let tags: Vec<String> = serde_json::from_str(&tags_str).unwrap_or_default();
1030-
let links: Vec<MemoryLink> = serde_json::from_str(&links_str).unwrap_or_default();
1031-
let archived: i32 = row.get(11).unwrap_or(0);
1032-
let verified: i32 = row.get(14).unwrap_or(0);
1033-
Ok(Entity {
1034-
id: row.get(0)?,
1035-
category: row.get(1)?,
1036-
key: row.get(2)?,
1037-
body_json: row.get(3)?,
1038-
status: row.get(4)?,
1039-
entity_type: row.get(5)?,
1040-
tags,
1041-
decay_score: row.get(7)?,
1042-
retrieval_count: row.get(8)?,
1043-
layer: row.get(9)?,
1044-
topic_path: row.get(10)?,
1045-
archived: archived != 0,
1046-
archive_reason: row.get(12)?,
1047-
links,
1048-
verified: verified != 0,
1049-
source: row.get(15)?,
1050-
created_at_unix_ms: row.get(16)?,
1051-
last_accessed_unix_ms: row.get(17)?,
1052-
})
988+
entity_from_row(row)
1053989
})?;
1054990
if let Some(row) = rows.next() {
1055991
Ok(Some(row?))
@@ -1228,10 +1164,10 @@ impl Database {
12281164
created,
12291165
accessed,
12301166
) = row?;
1231-
// Sanitize id for filesystem: replace path separators
1167+
// Sanitize id for filesystem: only alphanumeric, hyphen, underscore
12321168
let safe_id: String = id
12331169
.chars()
1234-
.map(|c| if c == '/' || c == '\\' { '_' } else { c })
1170+
.map(|c| if c.is_alphanumeric() || c == '-' || c == '_' { c } else { '_' })
12351171
.collect();
12361172
let filename = format!("{}.md", safe_id);
12371173
let filepath = vault.join(&filename);
@@ -1325,15 +1261,41 @@ last_accessed: {}
13251261
}
13261262
};
13271263

1328-
// Parse YAML frontmatter (simple — between --- markers)
1329-
let parts: Vec<&str> = content.splitn(3, "---").collect();
1330-
if parts.len() < 3 {
1331-
errors.push(format!("{}: no frontmatter", path.display()));
1264+
// Parse YAML frontmatter: find opening and closing --- on their own lines
1265+
let mut lines = content.lines().peekable();
1266+
// Skip leading blank lines
1267+
while let Some(line) = lines.peek() {
1268+
if line.trim().is_empty() {
1269+
lines.next();
1270+
} else {
1271+
break;
1272+
}
1273+
}
1274+
// Find opening ---
1275+
match lines.next() {
1276+
Some(line) if line.trim() == "---" => {}
1277+
_ => {
1278+
errors.push(format!("{}: no frontmatter", path.display()));
1279+
continue;
1280+
}
1281+
}
1282+
// Read frontmatter lines until closing ---
1283+
let mut fm_lines = Vec::new();
1284+
let mut found_close = false;
1285+
for line in lines.by_ref() {
1286+
if line.trim() == "---" {
1287+
found_close = true;
1288+
break;
1289+
}
1290+
fm_lines.push(line);
1291+
}
1292+
if !found_close {
1293+
errors.push(format!("{}: unclosed frontmatter", path.display()));
13321294
continue;
13331295
}
1334-
1335-
let fm = parts[1];
1336-
let body = parts[2].trim().to_string();
1296+
let fm = fm_lines.join("\n");
1297+
// Remaining lines are the body
1298+
let body: String = lines.collect::<Vec<_>>().join("\n").trim().to_string();
13371299

13381300
// Extract fields from frontmatter
13391301
let get_fm = |key: &str| -> String {
@@ -1449,9 +1411,10 @@ last_accessed: {}
14491411
let mut all_entities = Vec::new();
14501412

14511413
if categories.is_empty() {
1452-
// No filter — get top entities overall
1414+
// No filter — get top entities overall (read-only, no side effects)
14531415
let params = RecallParams {
14541416
limit,
1417+
skip_side_effects: true,
14551418
..RecallParams::default()
14561419
};
14571420
all_entities = self.recall(&params)?;
@@ -1460,6 +1423,7 @@ last_accessed: {}
14601423
let params = RecallParams {
14611424
category: Some(cat.clone()),
14621425
limit,
1426+
skip_side_effects: true,
14631427
..RecallParams::default()
14641428
};
14651429
let mut batch = self.recall(&params)?;
@@ -1507,6 +1471,38 @@ fn truncate_str(s: &str, max_len: usize) -> String {
15071471
}
15081472
}
15091473

1474+
/// Extract an Entity from a SQLite row (shared across recall, get_entity, get_entity_by_id).
1475+
fn entity_from_row(row: &rusqlite::Row) -> rusqlite::Result<crate::models::Entity> {
1476+
use crate::models::{Entity, MemoryLink};
1477+
let tags_str: String = row.get::<_, String>(6).unwrap_or_else(|_| "[]".to_string());
1478+
let links_str: String = row.get::<_, String>(13).unwrap_or_else(|_| "[]".to_string());
1479+
let tags: Vec<String> = serde_json::from_str(&tags_str).unwrap_or_default();
1480+
let links: Vec<MemoryLink> = serde_json::from_str(&links_str).unwrap_or_default();
1481+
let archived: i32 = row.get(11).unwrap_or(0);
1482+
let verified: i32 = row.get(14).unwrap_or(0);
1483+
1484+
Ok(Entity {
1485+
id: row.get(0)?,
1486+
category: row.get(1)?,
1487+
key: row.get(2)?,
1488+
body_json: row.get(3)?,
1489+
status: row.get(4)?,
1490+
entity_type: row.get(5)?,
1491+
tags,
1492+
decay_score: row.get(7)?,
1493+
retrieval_count: row.get(8)?,
1494+
layer: row.get(9)?,
1495+
topic_path: row.get(10)?,
1496+
archived: archived != 0,
1497+
archive_reason: row.get(12)?,
1498+
links,
1499+
verified: verified != 0,
1500+
source: row.get(15)?,
1501+
created_at_unix_ms: row.get(16)?,
1502+
last_accessed_unix_ms: row.get(17)?,
1503+
})
1504+
}
1505+
15101506
#[cfg(test)]
15111507
mod tests {
15121508
use super::*;

src/mcp.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,11 @@ fn list_tools(id: Option<Value>) -> JsonRpcResponse {
971971
"type": "integer",
972972
"default": 3,
973973
"description": "Maximum traversal depth from the starting entity"
974+
},
975+
"max_nodes": {
976+
"type": "integer",
977+
"default": 100,
978+
"description": "Maximum total nodes to traverse before stopping"
974979
}
975980
},
976981
"required": [

src/models.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ pub struct RecallParams {
134134
pub min_decay: f64,
135135
pub topic_path: Option<String>,
136136
pub include_archived: bool,
137+
pub skip_side_effects: bool,
137138
}
138139

139140
impl Default for RecallParams {
@@ -146,6 +147,7 @@ impl Default for RecallParams {
146147
min_decay: 0.0,
147148
topic_path: None,
148149
include_archived: false,
150+
skip_side_effects: false,
149151
}
150152
}
151153
}

0 commit comments

Comments
 (0)