Skip to content

Commit 32aaecb

Browse files
Add v3 reference format matching
1 parent ab5da1b commit 32aaecb

2 files changed

Lines changed: 326 additions & 26 deletions

File tree

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
from pathlib import Path
2+
import sys
3+
4+
import pandas as pd
5+
6+
7+
REPO_ROOT = Path(__file__).resolve().parents[1]
8+
sys.path.insert(0, str(REPO_ROOT))
9+
10+
from versions.v3.src.matching.deterministic_rules import ( # noqa: E402
11+
find_deterministic_matches,
12+
find_exact_matches,
13+
find_reference_format_matches,
14+
)
15+
16+
17+
def test_reference_format_match_when_raw_reference_differs_but_normalized_matches():
18+
bank = pd.DataFrame(
19+
{
20+
"source_row_id": [2],
21+
"bank_transaction_id": ["B001"],
22+
"account_id": ["ACC1"],
23+
"currency": ["CAD"],
24+
"direction": ["credit"],
25+
"amount_numeric": [100.00],
26+
"canonical_date": ["2026-03-12"],
27+
"raw_reference": ["ref-001"],
28+
"normalized_reference": ["REF001"],
29+
"counterparty": ["Client A"],
30+
}
31+
)
32+
33+
ledger = pd.DataFrame(
34+
{
35+
"source_row_id": [10],
36+
"ledger_transaction_id": ["L001"],
37+
"account_id": ["ACC1"],
38+
"currency": ["CAD"],
39+
"direction": ["credit"],
40+
"amount_numeric": [100.00],
41+
"canonical_date": ["2026-03-12"],
42+
"raw_reference": ["REF 001"],
43+
"normalized_reference": ["REF001"],
44+
"counterparty": ["Client A"],
45+
}
46+
)
47+
48+
exact = find_exact_matches(bank, ledger, run_id="test_run")
49+
reference_format = find_reference_format_matches(
50+
canonical_bank=bank,
51+
canonical_ledger=ledger,
52+
run_id="test_run",
53+
existing_links=exact,
54+
)
55+
56+
assert exact.empty
57+
assert len(reference_format) == 1
58+
assert reference_format.loc[0, "match_type"] == "REFERENCE_FORMAT_MATCH"
59+
assert reference_format.loc[0, "stage_detected"] == "deterministic_reference_format"
60+
assert reference_format.loc[0, "confidence_score"] == 0.92
61+
assert reference_format.loc[0, "bank_source_row_id"] == 2
62+
assert reference_format.loc[0, "ledger_source_row_id"] == 10
63+
64+
65+
def test_reference_format_does_not_match_when_normalized_reference_differs():
66+
bank = pd.DataFrame(
67+
{
68+
"source_row_id": [2],
69+
"bank_transaction_id": ["B001"],
70+
"account_id": ["ACC1"],
71+
"currency": ["CAD"],
72+
"direction": ["credit"],
73+
"amount_numeric": [100.00],
74+
"canonical_date": ["2026-03-12"],
75+
"raw_reference": ["ref-001"],
76+
"normalized_reference": ["REF001"],
77+
"counterparty": ["Client A"],
78+
}
79+
)
80+
81+
ledger = pd.DataFrame(
82+
{
83+
"source_row_id": [10],
84+
"ledger_transaction_id": ["L001"],
85+
"account_id": ["ACC1"],
86+
"currency": ["CAD"],
87+
"direction": ["credit"],
88+
"amount_numeric": [100.00],
89+
"canonical_date": ["2026-03-12"],
90+
"raw_reference": ["REF 999"],
91+
"normalized_reference": ["REF999"],
92+
"counterparty": ["Client A"],
93+
}
94+
)
95+
96+
result = find_reference_format_matches(
97+
canonical_bank=bank,
98+
canonical_ledger=ledger,
99+
run_id="test_run",
100+
)
101+
102+
assert result.empty
103+
104+
105+
def test_deterministic_matching_runs_reference_format_before_timing():
106+
bank = pd.DataFrame(
107+
{
108+
"source_row_id": [2],
109+
"bank_transaction_id": ["B001"],
110+
"account_id": ["ACC1"],
111+
"currency": ["CAD"],
112+
"direction": ["credit"],
113+
"amount_numeric": [100.00],
114+
"canonical_date": ["2026-03-12"],
115+
"raw_reference": ["ref-001"],
116+
"normalized_reference": ["REF001"],
117+
"counterparty": ["Client A"],
118+
}
119+
)
120+
121+
ledger = pd.DataFrame(
122+
{
123+
"source_row_id": [10],
124+
"ledger_transaction_id": ["L001"],
125+
"account_id": ["ACC1"],
126+
"currency": ["CAD"],
127+
"direction": ["credit"],
128+
"amount_numeric": [100.00],
129+
"canonical_date": ["2026-03-12"],
130+
"raw_reference": ["REF 001"],
131+
"normalized_reference": ["REF001"],
132+
"counterparty": ["Client A"],
133+
}
134+
)
135+
136+
result = find_deterministic_matches(
137+
canonical_bank=bank,
138+
canonical_ledger=ledger,
139+
run_id="test_run",
140+
)
141+
142+
assert len(result) == 1
143+
assert result.loc[0, "match_type"] == "REFERENCE_FORMAT_MATCH"

0 commit comments

Comments
 (0)