-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimhash_test.go
More file actions
122 lines (93 loc) · 2.53 KB
/
Copy pathsimhash_test.go
File metadata and controls
122 lines (93 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package simhash
import (
"testing"
)
func TestSimhash_AddFeature(t *testing.T) {
s := NewSimhash()
err := s.AddFeature("test", 1)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if len(s.features) != 1 {
t.Fatalf("expected 1 feature, got %d", len(s.features))
}
if string(s.features[0].value) != "test" {
t.Fatalf("expected feature value 'test', got %s", string(s.features[0].value))
}
if s.features[0].weight != 1 {
t.Fatalf("expected feature weight 1, got %d", s.features[0].weight)
}
}
func TestSimhash_GenerateToken(t *testing.T) {
s := NewSimhash()
s.AddFeature("test", 1)
s.AddFeature("example", 2)
token := s.GenerateToken()
if len(token) == 0 {
t.Fatal("expected a non-empty token")
}
}
func TestHammingDistance(t *testing.T) {
a := uint64(0b101010)
b := uint64(0b111000)
dist := HammingDistance(a, b)
expected := 2
if dist != expected {
t.Fatalf("expected Hamming distance %d, got %d", expected, dist)
}
}
func TestComputeSimilarityWithLowDifference(t *testing.T) {
s1 := NewSimhash()
s1.AddFeature("hello", 5)
s1.AddFeature("world", 1)
s2 := NewSimhash()
s2.AddFeature("hello", 5)
s2.AddFeature("golang", 1)
token1 := s1.GenerateToken()
token2 := s2.GenerateToken()
similarity := ComputeSimilarity(token1, token2)
expectedSimilarity := 90.0
if similarity < expectedSimilarity {
t.Fatalf("expected a similarity score %f", similarity)
}
if similarity > 100.0 || similarity < 0.00 {
t.Fatalf("expected similarity between 0 and 100, got %f", similarity)
}
}
func TestComputeSimilarityWithHighDifference(t *testing.T) {
s1 := NewSimhash()
s1.AddFeature("hello", 1)
s1.AddFeature("world", 5)
s2 := NewSimhash()
s2.AddFeature("hello", 1)
s2.AddFeature("golang", 5)
token1 := s1.GenerateToken()
token2 := s2.GenerateToken()
similarity := ComputeSimilarity(token1, token2)
expectedSimilarity := 60.0
if similarity > expectedSimilarity {
t.Fatalf("expected a similarity score %f", similarity)
}
if similarity > 100.0 || similarity < 0.00 {
t.Fatalf("expected similarity between 0 and 100, got %f", similarity)
}
}
func TestToBytes(t *testing.T) {
tests := []struct {
input any
expected []byte
}{
{"test", []byte("test")},
{42, []byte{42, 0, 0, 0, 0, 0, 0, 0}},
{42.2, []byte{154, 153, 153, 153, 153, 25, 69, 64}},
}
for _, test := range tests {
output, err := toBytes(test.input)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if string(output) != string(test.expected) {
t.Fatalf("expected %v, got %v", test.expected, output)
}
}
}