Skip to content

Commit 9ebccf6

Browse files
tcconnallytcconnallyclaude
authored
feat(recall): keep conversation out of the shared recall/context surface by default (#298) (#302)
Raw auto-captured `conversation` turns match almost any broad query and, under the retrieval-frequency value model (#298), dominate recall and bury curated facts — degrading every Perseus render that pulls Mimir context (perseus#525). Exclude a configurable set of free-form categories (default: `conversation`) from recall whenever no explicit `category` filter is given. An explicit `category=conversation` is the opt-in to see them; operators can change or disable the list via the MIMIR_EXCLUDE_CATEGORIES env var (comma-separated, empty = disabled). Applied to both the FTS5 path and the hybrid keyword arm, so it covers the default mimir_recall and the mimir_context pre-load. Adds recall_excludes_conversation_by_default_but_returns_it_when_requested. Scope: the pure-vector (mode=dense) arm is not yet filtered — follow-up. The durable other half is the value-model fix (#298: demotion + dampened boost). Note: not locally compiled — this Windows box lacks the C toolchain a build dependency needs; relying on CI for build + test. Co-authored-by: tcconnally <hermes@perseus.observer> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 3e423e9 commit 9ebccf6

1 file changed

Lines changed: 86 additions & 0 deletions

File tree

src/db.rs

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,22 @@ impl Default for LlmConfig {
9797
}
9898
}
9999

100+
/// Categories kept out of the shared recall/context ranking surface unless a
101+
/// caller asks for them explicitly (by `category`). Default: `conversation` —
102+
/// raw auto-captured turns otherwise dominate broad recall and bury curated
103+
/// facts (#298/#525). Override the list — or disable it entirely with an empty
104+
/// value — via the `MIMIR_EXCLUDE_CATEGORIES` env var (comma-separated).
105+
fn excluded_recall_categories() -> Vec<String> {
106+
match std::env::var("MIMIR_EXCLUDE_CATEGORIES") {
107+
Ok(v) => v
108+
.split(',')
109+
.map(|s| s.trim().to_string())
110+
.filter(|s| !s.is_empty())
111+
.collect(),
112+
Err(_) => vec!["conversation".to_string()],
113+
}
114+
}
115+
100116
impl Database {
101117
/// Open a database at `path`, initializing the v0.2.0 schema if needed.
102118
pub fn open(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
@@ -1658,6 +1674,17 @@ impl Database {
16581674
}
16591675
}
16601676

1677+
// #298/#525: when no explicit category was requested, keep free-form
1678+
// high-volume categories (default: conversation) out of the ranking
1679+
// surface so raw auto-captured turns don't bury curated facts. An
1680+
// explicit category filter (above) is the opt-in to see them.
1681+
if params.category.as_deref().map_or(true, |c| c.is_empty()) {
1682+
for cat in excluded_recall_categories() {
1683+
conditions.push(format!("category != ?{}", param_values.len() + 1));
1684+
param_values.push(Box::new(cat));
1685+
}
1686+
}
1687+
16611688
// Filter by type
16621689
if let Some(ref t) = params.entity_type {
16631690
if !t.is_empty() {
@@ -1930,6 +1957,14 @@ impl Database {
19301957
param_values.push(Box::new(cat.clone()));
19311958
}
19321959
}
1960+
// #298/#525: mirror the FTS path — exclude free-form categories from the
1961+
// hybrid keyword arm too when no explicit category was requested.
1962+
if params.category.as_deref().map_or(true, |c| c.is_empty()) {
1963+
for cat in excluded_recall_categories() {
1964+
conditions.push(format!("e.category != ?{}", param_values.len() + 1));
1965+
param_values.push(Box::new(cat));
1966+
}
1967+
}
19331968
if let Some(ref t) = params.entity_type {
19341969
if !t.is_empty() {
19351970
conditions.push(format!("e.type = ?{}", param_values.len() + 1));
@@ -4736,6 +4771,57 @@ mod tests {
47364771
let _ = fs::remove_file(&path);
47374772
}
47384773

4774+
#[test]
4775+
fn recall_excludes_conversation_by_default_but_returns_it_when_requested() {
4776+
// #298/#525: broad recall keeps free-form conversation out of the ranking
4777+
// surface so raw turns don't bury curated facts; an explicit category
4778+
// filter opts back in.
4779+
let (db, path) = temp_db();
4780+
db.remember(&make_entity(
4781+
"e-conv",
4782+
"conversation",
4783+
"turn-z",
4784+
r#"{"note":"chatter about widgets"}"#,
4785+
))
4786+
.unwrap();
4787+
db.remember(&make_entity(
4788+
"e-dec",
4789+
"decision",
4790+
"widget-choice",
4791+
r#"{"note":"we chose widgets"}"#,
4792+
))
4793+
.unwrap();
4794+
4795+
let broad = RecallParams {
4796+
query: "widgets".to_string(),
4797+
limit: 10,
4798+
..RecallParams::default()
4799+
};
4800+
let hits = db.recall(&broad).unwrap();
4801+
assert!(
4802+
hits.iter().all(|e| e.category != "conversation"),
4803+
"conversation must be excluded from default recall"
4804+
);
4805+
assert!(
4806+
hits.iter().any(|e| e.category == "decision"),
4807+
"curated decision should still surface in default recall"
4808+
);
4809+
4810+
let explicit = RecallParams {
4811+
query: "widgets".to_string(),
4812+
category: Some("conversation".to_string()),
4813+
limit: 10,
4814+
..RecallParams::default()
4815+
};
4816+
let hits2 = db.recall(&explicit).unwrap();
4817+
assert!(
4818+
hits2.iter().any(|e| e.category == "conversation"),
4819+
"explicit category=conversation must return conversation entities"
4820+
);
4821+
4822+
let _ = fs::remove_file(&path);
4823+
}
4824+
47394825
#[test]
47404826
fn decay_tick_floors_verified_and_never_archives_them() {
47414827
// #298: a verified curated fact and an unverified turn, equally stale.

0 commit comments

Comments
 (0)