-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmemroos.eval.yaml
More file actions
73 lines (60 loc) · 1.84 KB
/
Copy pathmemroos.eval.yaml
File metadata and controls
73 lines (60 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
judge_model:
provider: anthropic
model: claude-haiku-4-5-20251001
model_family: anthropic
prompt_template_version: v1
golden_sets:
default: ./golden-sets/business-ops-50.jsonl
per_role:
sales: ./golden-sets/sales-50.jsonl
support: ./golden-sets/support-50.jsonl
finance: ./golden-sets/finance-50.jsonl
ops: ./golden-sets/ops-50.jsonl
scorers:
l1_capability: [tool_call_schema, json_valid, on_task, memory_recall_l1]
l2_quality: [rubric_5pt_faithful, rubric_5pt_useful, rubric_5pt_policy, memory_recall_l2, trajectory_multi_step]
l3_outcome: [completion_rate, escalation_rate, ttr_p50, operator_approval, cost_per_task]
weights:
l1: 0.25
l2: 0.5
l3: 0.25
weight_presets:
outcome-weighted: { l1: 0.1, l2: 0.4, l3: 0.5 }
quality-weighted: { l1: 0.2, l2: 0.6, l3: 0.2 }
compliance-weighted: { l1: 0.4, l2: 0.4, l3: 0.2 }
active_preset: null
drift_guard:
golden_agreement_floor: 0.85
judge_rotation_requires_rebaseline: true
seal:
reflection_threshold: 0.6
auto_apply: false
proposal_types: [noop_test]
agents:
# per-agent overrides can be added here
business_ops:
poll_interval_seconds: 300
correlation_id_field: memroos_correlation_id
finance:
enabled: false
transaction_label: transaction
reconciliation_label: reconciliation
exception_label: exception
golden_set: ./golden-sets/finance-reconciliation.jsonl
compliance:
data_residency_enabled: false
audit_retention_days: 365
allowed_local_hosts: [localhost, 127.0.0.1, ::1, host.docker.internal, ollama, vllm]
adapters_enabled: [hubspot, intercom, quickbooks, bank_reconciliation]
cove:
enabled: false
max_verification_questions: 4
parallel_verification: true
companies:
default:
l3_sub_weights:
completion_rate: 0.35
escalation_rate: 0.25
ttr_p50: 0.2
operator_approval_rate: 0.1
cost_per_task: 0.1