Skip to content

Commit 7805bd7

Browse files
authored
[amazon_rose_forest] add hierarchical clustering (#71)
1 parent 43764f1 commit 7805bd7

5 files changed

Lines changed: 130 additions & 18 deletions

File tree

src/core/hierarchical.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
use crate::core::vector::Vector;
2+
3+
#[derive(Debug, Clone)]
4+
pub struct Cluster {
5+
pub centroid: Vector,
6+
pub members: Vec<Vector>,
7+
}
8+
9+
impl Cluster {
10+
fn new(vector: Vector) -> Self {
11+
Self {
12+
centroid: vector.clone(),
13+
members: vec![vector],
14+
}
15+
}
16+
17+
fn recompute_centroid(&mut self) {
18+
if self.members.is_empty() {
19+
return;
20+
}
21+
let dimensions = self.members[0].dimensions;
22+
let mut sums = vec![0.0f32; dimensions];
23+
for v in &self.members {
24+
for (i, val) in v.values.iter().enumerate() {
25+
sums[i] += val;
26+
}
27+
}
28+
let len_inv = 1.0 / self.members.len() as f32;
29+
for sum in &mut sums {
30+
*sum *= len_inv;
31+
}
32+
self.centroid = Vector::new(sums);
33+
}
34+
}
35+
36+
/// Perform a simple agglomerative clustering using Euclidean distance.
37+
/// Clusters are merged until the closest pair has distance greater than
38+
/// `threshold`.
39+
pub fn cluster_vectors(vectors: &[Vector], threshold: f32) -> Vec<Cluster> {
40+
let mut clusters: Vec<Cluster> = vectors.iter().cloned().map(Cluster::new).collect();
41+
if clusters.is_empty() {
42+
return clusters;
43+
}
44+
loop {
45+
let mut best_dist = f32::MAX;
46+
let mut best_pair: Option<(usize, usize)> = None;
47+
for i in 0..clusters.len() {
48+
for j in (i + 1)..clusters.len() {
49+
let dist = clusters[i]
50+
.centroid
51+
.euclidean_distance(&clusters[j].centroid);
52+
if dist < best_dist {
53+
best_dist = dist;
54+
best_pair = Some((i, j));
55+
}
56+
}
57+
}
58+
match best_pair {
59+
Some((i, j)) if best_dist <= threshold => {
60+
let mut members = clusters[i].members.clone();
61+
members.extend(clusters[j].members.clone());
62+
clusters[i].members = members;
63+
clusters[i].recompute_centroid();
64+
clusters.remove(j);
65+
}
66+
_ => break,
67+
}
68+
}
69+
clusters
70+
}
71+
72+
#[cfg(test)]
73+
mod tests {
74+
use super::*;
75+
76+
#[test]
77+
fn test_basic_clustering() {
78+
let v1 = Vector::new(vec![0.0, 0.0]);
79+
let v2 = Vector::new(vec![0.1, -0.1]);
80+
let v3 = Vector::new(vec![5.0, 5.0]);
81+
let v4 = Vector::new(vec![5.2, 4.9]);
82+
let clusters = cluster_vectors(&[v1, v2, v3, v4], 0.5);
83+
assert_eq!(clusters.len(), 2);
84+
}
85+
86+
#[test]
87+
fn test_single_cluster_when_threshold_large() {
88+
let vectors = vec![
89+
Vector::new(vec![0.0, 0.0]),
90+
Vector::new(vec![1.0, 0.0]),
91+
Vector::new(vec![0.0, 1.0]),
92+
];
93+
let clusters = cluster_vectors(&vectors, 10.0);
94+
assert_eq!(clusters.len(), 1);
95+
assert_eq!(clusters[0].members.len(), 3);
96+
}
97+
98+
#[test]
99+
fn test_no_merge_when_threshold_small() {
100+
let vectors = vec![Vector::new(vec![0.0, 0.0]), Vector::new(vec![1.0, 1.0])];
101+
let clusters = cluster_vectors(&vectors, 0.1);
102+
assert_eq!(clusters.len(), 2);
103+
}
104+
}

src/core/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod centroid;
22
pub mod centroid_crdt;
3+
pub mod hierarchical;
34
pub mod metrics;
45
pub mod vector;

src/lib.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1+
pub mod ad4m;
2+
pub mod code_analysis;
13
pub mod consciousness;
24
pub mod core;
35
pub mod darwin;
6+
pub mod evaluation;
47
pub mod governance;
8+
pub mod hypothesis;
59
pub mod intelligence;
10+
pub mod llm;
611
pub mod nerv;
712
pub mod network;
813
pub mod server;
914
pub mod sharding;
1015
pub mod utils;
11-
pub mod llm;
12-
pub mod code_analysis;
13-
pub mod hypothesis;
14-
pub mod evaluation;
15-
pub mod ad4m;
16-
1716

1817
// Export common types for easier access
1918
pub use crate::consciousness::ad4m_bridge::Ad4mBridge;
2019
pub use crate::consciousness::introspection::Introspection;
2120
pub use crate::consciousness::swarm::Swarm;
2221
pub use crate::core::centroid::Centroid;
2322
pub use crate::core::centroid_crdt::CentroidCRDT;
23+
pub use crate::core::hierarchical::{cluster_vectors, Cluster};
2424
pub use crate::core::vector::Vector;
2525
pub use crate::darwin::self_improvement::SelfImprovementEngine;
2626
pub use crate::governance::dao::Dao;

src/nerv/replication.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,11 @@ impl ReplicationManager {
5151
info!("Removed peer {} from replication manager", peer_id);
5252
}
5353

54-
<<<<<<< Updated upstream
5554
pub async fn start_replication(
5655
self: Arc<Self>,
5756
shard_id: Uuid,
5857
target_node: &str,
5958
) -> Result<Uuid> {
60-
=======
61-
pub async fn start_replication(&self, shard_id: Uuid, target_node: &str) -> Result<Uuid> {
62-
>>>>>>> Stashed changes
6359
// Verify target node is in peers
6460
if !self.peers.read().await.contains(target_node) {
6561
return Err(anyhow!("Target node {} is not a known peer", target_node));
@@ -83,11 +79,7 @@ impl ReplicationManager {
8379

8480
// Spawn task to handle replication
8581
let task_id_clone = task_id;
86-
<<<<<<< Updated upstream
8782
let self_clone = Arc::clone(&self);
88-
=======
89-
let self_clone = Arc::new(self.clone());
90-
>>>>>>> Stashed changes
9183

9284
tokio::spawn(async move {
9385
if let Err(e) = self_clone.execute_replication(task_id_clone).await {
@@ -167,9 +159,6 @@ impl ReplicationManager {
167159
}
168160
}
169161

170-
<<<<<<< Updated upstream
171-
// Support cloning for the manager to allow sharing between threads
172-
=======
173162
// Support cloning for the manager to allow sharing between threads
174163
impl Clone for ReplicationManager {
175164
fn clone(&self) -> Self {
@@ -183,4 +172,3 @@ impl Clone for ReplicationManager {
183172
}
184173
}
185174
}
186-
>>>>>>> Stashed changes

src/sharding/manager.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,3 +467,22 @@ impl ShardManager {
467467
Ok(distribution)
468468
}
469469
}
470+
471+
// Support cloning for the manager to allow sharing between threads
472+
impl Clone for ShardManager {
473+
fn clone(&self) -> Self {
474+
// Note: This creates a new instance with the same node_id and metrics
475+
// but empty collections. The collections are meant to be
476+
// accessed through the original instance's RwLocks.
477+
Self {
478+
metrics: self.metrics.clone(),
479+
node_id: self.node_id.clone(),
480+
shards: RwLock::new(HashMap::new()),
481+
shard_assignments: RwLock::new(HashMap::new()),
482+
migrations: RwLock::new(HashMap::new()),
483+
indices: RwLock::new(HashMap::new()),
484+
shard_loads: RwLock::new(HashMap::new()),
485+
}
486+
}
487+
}
488+

0 commit comments

Comments
 (0)