Skip to content

Commit 9df6fad

Browse files
authored
Implement issues #185 (autocohere), #186 (supersede), #187 (maintenance) (#188)
#185 mimir_autocohere: Full atomic grooming pass combining cohere, decay, compact #186 mimir_supersede: Creates supersedes relationship and deprecates old entity #187 mimir_maintenance: Dedup, orphan detection, vacuum, reindex with dry_run - Added AutocohereArgs, SupersedeArgs, MaintenanceArgs deserialization structs - Added handle_autocohere, handle_supersede, handle_maintenance handlers - Added db.rs functions: file_size_bytes, update_entity_status, deduplicate_entities, detect_orphan_journal_entries, detect_orphan_links, vacuum - Registered all 3 new tools in mcp.rs with full input/output schemas - CohereArgs refactored to be explicit struct with field mapping to CohereParams
1 parent 1ec37d4 commit 9df6fad

3 files changed

Lines changed: 526 additions & 8 deletions

File tree

src/db.rs

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1772,6 +1772,13 @@ impl Database {
17721772
schema::gather_stats(&self.conn, &self.db_path)
17731773
}
17741774

1775+
/// Get database file size in bytes.
1776+
pub fn file_size_bytes(&self) -> Result<u64, Box<dyn std::error::Error>> {
1777+
let path = std::path::Path::new(&self.db_path);
1778+
let metadata = std::fs::metadata(path)?;
1779+
Ok(metadata.len())
1780+
}
1781+
17751782
/// Migrate from v0.1.x database.
17761783
pub fn migrate_from_v0_1(
17771784
&self,
@@ -1919,7 +1926,141 @@ impl Database {
19191926
}
19201927
}
19211928

1922-
/// Get entity by ID (internal helper).
1929+
/// Update an entity's status (e.g., to "deprecated").
1930+
pub fn update_entity_status(
1931+
&self,
1932+
id: &str,
1933+
status: &str,
1934+
reason: &str,
1935+
) -> Result<(), Box<dyn std::error::Error>> {
1936+
self.conn.execute(
1937+
"UPDATE entities SET status = ?1, archive_reason = ?2, last_accessed_unix_ms = ?3 WHERE id = ?4",
1938+
params![status, reason, now_ms(), id],
1939+
)?;
1940+
Ok(())
1941+
}
1942+
1943+
/// Find entities with identical (category, key) and merge/archive duplicates, keeping the newest.
1944+
/// Returns the number of entities archived.
1945+
pub fn deduplicate_entities(&self, dry_run: bool) -> Result<i64, Box<dyn std::error::Error>> {
1946+
let mut archived_count = 0i64;
1947+
1948+
// Find duplicate (category, key) pairs, keeping the newest `created_at_unix_ms`.
1949+
let mut stmt = self.conn.prepare(
1950+
"SELECT T1.id, T1.category, T1.key FROM entities AS T1 JOIN (
1951+
SELECT category, key, MAX(created_at_unix_ms) as max_created_at
1952+
FROM entities
1953+
GROUP BY category, key
1954+
HAVING COUNT(*) > 1
1955+
) AS T2 ON T1.category = T2.category AND T1.key = T2.key
1956+
WHERE T1.created_at_unix_ms < T2.max_created_at AND T1.archived = 0"
1957+
)?;
1958+
1959+
let rows = stmt.query_map([], |row| {
1960+
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?))
1961+
})?;
1962+
1963+
let mut ids_to_archive = Vec::new();
1964+
for row in rows {
1965+
let (id, category, key) = row?;
1966+
ids_to_archive.push(id);
1967+
eprintln!(
1968+
"mimir: deduplicate_entities: found duplicate {}/{} (will archive oldest)",
1969+
category, key
1970+
);
1971+
}
1972+
1973+
if !dry_run && !ids_to_archive.is_empty() {
1974+
let placeholders = ids_to_archive
1975+
.iter()
1976+
.map(|_| "?")
1977+
.collect::<Vec<_>>()
1978+
.join(", ");
1979+
1980+
let tx = self.conn.unchecked_transaction()?;
1981+
let now = now_ms();
1982+
1983+
// Archive duplicates
1984+
let update_sql = format!(
1985+
"UPDATE entities SET archived = 1, archive_reason = 'deduplicate', last_accessed_unix_ms = ?1 WHERE id IN ({})",
1986+
placeholders
1987+
);
1988+
1989+
// Build the params list: first the timestamp, then all IDs
1990+
let mut param_refs: Vec<&dyn rusqlite::types::ToSql> = Vec::new();
1991+
let now_box: Box<dyn rusqlite::types::ToSql> = Box::new(now);
1992+
param_refs.push(now_box.as_ref());
1993+
let id_boxes: Vec<Box<dyn rusqlite::types::ToSql>> = ids_to_archive.iter().map(|s| Box::new(s.clone()) as Box<dyn rusqlite::types::ToSql>).collect();
1994+
for b in &id_boxes {
1995+
param_refs.push(b.as_ref());
1996+
}
1997+
archived_count = tx.execute(&update_sql, param_refs.as_slice())? as i64;
1998+
1999+
// Clean FTS5 index for archived entities
2000+
let delete_sql = format!(
2001+
"DELETE FROM entities_fts WHERE rowid IN (SELECT rowid FROM entities WHERE id IN ({}) )",
2002+
placeholders
2003+
);
2004+
let id_param_refs: Vec<&dyn rusqlite::types::ToSql> = id_boxes.iter().map(|b| b.as_ref()).collect();
2005+
tx.execute(&delete_sql, id_param_refs.as_slice())?;
2006+
tx.commit()?;
2007+
}
2008+
2009+
Ok(archived_count)
2010+
}
2011+
2012+
/// Detect journal entries pointing to archived/deleted entities.
2013+
/// Returns the number of orphan journal entries found.
2014+
pub fn detect_orphan_journal_entries(&self) -> Result<i64, Box<dyn std::error::Error>> {
2015+
let count: i64 = self.conn.query_row(
2016+
"SELECT COUNT(*) FROM journal WHERE entity_id IS NOT NULL AND entity_id != '' AND entity_id NOT IN (SELECT id FROM entities)",
2017+
[],
2018+
|r| r.get(0),
2019+
)?;
2020+
Ok(count)
2021+
}
2022+
2023+
/// Detect links pointing to archived/deleted entities.
2024+
/// Returns the number of orphan links found.
2025+
pub fn detect_orphan_links(&self) -> Result<i64, Box<dyn std::error::Error>> {
2026+
let mut orphan_count = 0i64;
2027+
let mut stmt = self.conn.prepare(
2028+
"SELECT id, links FROM entities WHERE links != '[]'"
2029+
)?;
2030+
let rows = stmt.query_map([], |row| {
2031+
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2032+
})?;
2033+
2034+
for row in rows {
2035+
let (_entity_id, links_json) = row?;
2036+
let mut links: Vec<MemoryLink> = serde_json::from_str(&links_json).unwrap_or_default();
2037+
let original_len = links.len();
2038+
2039+
links.retain(|link| {
2040+
let target_exists: bool = self.conn.query_row(
2041+
"SELECT COUNT(*) FROM entities WHERE id = ?1",
2042+
params![&link.target_id],
2043+
|r| r.get(0),
2044+
).unwrap_or(0) > 0;
2045+
target_exists
2046+
});
2047+
2048+
if links.len() < original_len {
2049+
orphan_count += (original_len - links.len()) as i64;
2050+
// For dry_run, we just count. For actual run, we would update the entity.
2051+
// In this read-only detection function, we don't update.
2052+
}
2053+
}
2054+
Ok(orphan_count)
2055+
}
2056+
2057+
/// Run SQLite VACUUM command to reclaim space.
2058+
pub fn vacuum(&self) -> Result<(), Box<dyn std::error::Error>> {
2059+
self.conn.execute_batch("VACUUM")?;
2060+
Ok(())
2061+
}
2062+
2063+
/// Get a single entity by ID (internal helper).
19232064
fn get_entity_by_id(&self, id: &str) -> Result<Option<Entity>, Box<dyn std::error::Error>> {
19242065
let mut stmt = self.conn.prepare(
19252066
"SELECT id, category, key, body_json, status, type, tags,

src/mcp.rs

Lines changed: 142 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1853,10 +1853,145 @@ fn list_tools(id: Option<Value>) -> JsonRpcResponse {
18531853
"created_at_unix_ms": {"type": "integer"}
18541854
}
18551855
},
1856-
"annotations": {
1857-
"destructiveHint": true
1856+
\"annotations\": {
1857+
\"destructiveHint\": true
1858+
}
1859+
},
1860+
{
1861+
\"name\": \"mimir_autocohere\",
1862+
\"description\": \"Run a full atomic grooming pass: cohere (promote, link, archive), then decay (recalculate Ebbinghaus decay), then compact (archive below threshold). Returns a summary report. Use dry_run=true to preview without changes.\",
1863+
\"inputSchema\": {
1864+
\"type\": \"object\",
1865+
\"properties\": {
1866+
\"dry_run\": {
1867+
\"type\": \"boolean\",
1868+
\"description\": \"If true, preview changes without writing\",
1869+
\"default\": false
1870+
}
1871+
}
1872+
},
1873+
\"outputSchema\": {
1874+
\"type\": \"object\",
1875+
\"properties\": {
1876+
\"promoted_entities\": {\"type\": \"integer\", \"description\": \"Entities promoted during cohere\"},
1877+
\"links_created\": {\"type\": \"integer\", \"description\": \"Auto-links created during cohere\"},
1878+
\"archived_entities\": {\"type\": \"integer\", \"description\": \"Entities archived (cohere + compact)\"},
1879+
\"decay_updates\": {\"type\": \"integer\", \"description\": \"Entities whose decay score was updated\"},
1880+
\"compact_archived_count\": {\"type\": \"integer\", \"description\": \"Entities archived during compact step\"},
1881+
\"db_size_delta_bytes\": {\"type\": \"integer\", \"description\": \"Change in SQLite file size in bytes\"},
1882+
\"dry_run\": {\"type\": \"boolean\"}
1883+
}
1884+
},
1885+
\"annotations\": {
1886+
\"destructiveHint\": true
1887+
}
1888+
},
1889+
{
1890+
\"name\": \"mimir_supersede\",
1891+
\"description\": \"Create a 'supersedes' relationship from a new fact to an old one, setting the old entity's status to 'deprecated'. Use this when a newer entity makes an older one obsolete.\",
1892+
\"inputSchema\": {
1893+
\"type\": \"object\",
1894+
\"properties\": {
1895+
\"from_category\": {
1896+
\"type\": \"string\",
1897+
\"description\": \"Category of the OLD entity being superseded\"
1898+
},
1899+
\"from_key\": {
1900+
\"type\": \"string\",
1901+
\"description\": \"Key of the OLD entity being superseded\"
1902+
},
1903+
\"to_category\": {
1904+
\"type\": \"string\",
1905+
\"description\": \"Category of the NEW entity that supersedes\"
1906+
},
1907+
\"to_key\": {
1908+
\"type\": \"string\",
1909+
\"description\": \"Key of the NEW entity that supersedes\"
1910+
},
1911+
\"reason\": {
1912+
\"type\": \"string\",
1913+
\"description\": \"Reason for superseding (recorded in archive_reason)\",
1914+
\"default\": \"\"
1915+
},
1916+
\"relationship\": {
1917+
\"type\": \"string\",
1918+
\"description\": \"Link relationship type (default: 'supersedes')\",
1919+
\"default\": \"supersedes\"
1920+
}
1921+
},
1922+
\"required\": [\"from_category\", \"from_key\", \"to_category\", \"to_key\"]
1923+
},
1924+
\"outputSchema\": {
1925+
\"type\": \"object\",
1926+
\"properties\": {
1927+
\"from_entity_id\": {\"type\": \"string\", \"description\": \"ID of the old (superseded) entity\"},
1928+
\"from_entity_category\": {\"type\": \"string\"},
1929+
\"from_entity_key\": {\"type\": \"string\"},
1930+
\"to_entity_id\": {\"type\": \"string\", \"description\": \"ID of the new (superseding) entity\"},
1931+
\"to_entity_category\": {\"type\": \"string\"},
1932+
\"to_entity_key\": {\"type\": \"string\"},
1933+
\"relationship\": {\"type\": \"string\"},
1934+
\"status_updated\": {\"type\": \"string\", \"description\": \"New status of the old entity (always 'deprecated')\"}
1935+
}
1936+
},
1937+
\"annotations\": {
1938+
\"destructiveHint\": true
18581939
}
1859-
}]"###
1940+
},
1941+
{
1942+
\"name\": \"mimir_maintenance\",
1943+
\"description\": \"Database maintenance operations: deduplicate entities with identical (category, key), detect orphan journal entries and links, vacuum (reclaim disk space), reindex FTS5. Set dry_run=true to preview. Use 'all' to run everything.\",
1944+
\"inputSchema\": {
1945+
\"type\": \"object\",
1946+
\"properties\": {
1947+
\"dedup\": {
1948+
\"type\": \"boolean\",
1949+
\"description\": \"Find duplicate (category, key) entities and archive the oldest\",
1950+
\"default\": false
1951+
},
1952+
\"orphans\": {
1953+
\"type\": \"boolean\",
1954+
\"description\": \"Detect journal entries and links pointing to non-existent entities\",
1955+
\"default\": false
1956+
},
1957+
\"vacuum\": {
1958+
\"type\": \"boolean\",
1959+
\"description\": \"Run SQLite VACUUM to reclaim disk space\",
1960+
\"default\": false
1961+
},
1962+
\"reindex\": {
1963+
\"type\": \"boolean\",
1964+
\"description\": \"Rebuild the FTS5 search index from entities table\",
1965+
\"default\": false
1966+
},
1967+
\"all\": {
1968+
\"type\": \"boolean\",
1969+
\"description\": \"Run all maintenance operations (dedup, orphans, vacuum, reindex)\",
1970+
\"default\": false
1971+
},
1972+
\"dry_run\": {
1973+
\"type\": \"boolean\",
1974+
\"description\": \"If true, preview changes without writing\",
1975+
\"default\": false
1976+
}
1977+
}
1978+
},
1979+
\"outputSchema\": {
1980+
\"type\": \"object\",
1981+
\"properties\": {
1982+
\"dedup_archived\": {\"type\": \"integer\", \"description\": \"Number of duplicate entities archived\"},
1983+
\"orphan_journal_entries_found\": {\"type\": \"integer\", \"description\": \"Orphan journal entries detected\"},
1984+
\"orphan_links_found\": {\"type\": \"integer\", \"description\": \"Orphan links detected\"},
1985+
\"vacuum_reclaimed_bytes\": {\"type\": \"integer\", \"description\": \"Disk space reclaimed by VACUUM\"},
1986+
\"reindex_rows_affected\": {\"type\": \"integer\", \"description\": \"Rows reindexed into FTS5\"},
1987+
\"dry_run\": {\"type\": \"boolean\"},
1988+
\"errors\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}, \"description\": \"Errors encountered during maintenance\"}
1989+
}
1990+
},
1991+
\"annotations\": {
1992+
\"destructiveHint\": true
1993+
}
1994+
}]\"###
18601995
).expect("tools JSON must be valid");
18611996

18621997
JsonRpcResponse {
@@ -1927,6 +2062,10 @@ fn call_tool(name: &str, db: &Database, args: Value, _id: Option<Value>) -> Stri
19272062
"mimir_synthesize" => tools::handle_synthesize(db, args).map_err(|e| e.to_string()),
19282063
"mimir_bench" => tools::handle_bench(db, args).map_err(|e| e.to_string()),
19292064

2065+
"mimir_autocohere" => tools::handle_autocohere(db, args).map_err(|e| e.to_string()),
2066+
"mimir_supersede" => tools::handle_supersede(db, args).map_err(|e| e.to_string()),
2067+
"mimir_maintenance" => tools::handle_maintenance(db, args).map_err(|e| e.to_string()),
2068+
19302069
_ => Err(format!("Unknown tool: {}", name)),
19312070
};
19322071

0 commit comments

Comments
 (0)