33StatsPAI now separates API lifecycle from numerical validation evidence:
44``stability='stable'`` means the public signature is locked, while
55``validation_status='certified'`` / ``'validated'`` carries the
6- parity-evidence signal. This script keeps the old risk visible by
7- counting stable API entries that still lack a parity-test reference in
6+ validation-evidence signal. This script keeps the old risk visible by
7+ counting stable API entries that still lack either registry-attached
8+ validation evidence or a parity-test reference in
89``tests/reference_parity/`` + ``tests/external_parity/``.
910
1011The catch: until v1.13 every newly-registered function was *implicitly*
1112``stable`` (the field's default), so the catalogue's ~970 stable
1213entries currently mix two populations:
1314
14- * **Parity-test backed** — at least one test in
15+ * **Validation-backed** — the registry marks the function
16+ ``certified`` / ``validated`` or at least one test in
1517 ``tests/reference_parity/`` or ``tests/external_parity/`` exercises
1618 the function with R / Stata / paper-replication numbers.
1719* **API-stable but unbacked** — the public API is stable, but no
6062#: a parity test before --check fails. Bumped when we deliberately add
6163#: hand-written entries faster than parity tests. Decrease over time as
6264#: the audit gets cleaned up.
63- UNBACKED_HANDWRITTEN_FLOOR = 220
65+ UNBACKED_HANDWRITTEN_FLOOR = 190
6466
6567#: Regex matching ``sp.<name>(`` references in test source. Used to
6668#: attribute parity coverage to public ``sp.*`` symbols.
@@ -125,6 +127,7 @@ def _registry_specs():
125127def collect () -> dict :
126128 registry , hand_written = _registry_specs ()
127129 backed , sources = _backed_functions ()
130+ evidence_sources : Dict [str , List [str ]] = {k : list (v ) for k , v in sources .items ()}
128131
129132 stable_handwritten : List [str ] = []
130133 stable_auto : List [str ] = []
@@ -144,12 +147,22 @@ def collect() -> dict:
144147 continue
145148 # spec.stability == "stable"
146149 is_hand = name in hand_written
150+ registry_backed = spec .validation_status in {"certified" , "validated" }
151+ if registry_backed :
152+ notes = list (getattr (spec , "validation_notes" , []) or [])
153+ if not notes :
154+ notes = [f"registry validation_status={ spec .validation_status } " ]
155+ evidence_sources .setdefault (name , [])
156+ for note in notes :
157+ if note not in evidence_sources [name ]:
158+ evidence_sources [name ].append (note )
159+ is_backed = name in backed or registry_backed
147160 if is_hand :
148161 stable_handwritten .append (name )
149- (backed_handwritten if name in backed else unbacked_handwritten ).append (name )
162+ (backed_handwritten if is_backed else unbacked_handwritten ).append (name )
150163 else :
151164 stable_auto .append (name )
152- (backed_auto if name in backed else unbacked_auto ).append (name )
165+ (backed_auto if is_backed else unbacked_auto ).append (name )
153166
154167 return {
155168 "totals" : {
@@ -170,6 +183,11 @@ def collect() -> dict:
170183 for _ in p .rglob ("test_*.py" )
171184 ),
172185 "symbols_referenced_in_parity_tests" : len (backed ),
186+ "registry_validated_symbols" : sum (
187+ 1 for spec in registry .values ()
188+ if spec .stability == "stable"
189+ and spec .validation_status in {"certified" , "validated" }
190+ ),
173191 },
174192 "lists" : {
175193 "unbacked_handwritten" : sorted (unbacked_handwritten ),
@@ -178,7 +196,7 @@ def collect() -> dict:
178196 "deprecated" : sorted (deprecated ),
179197 },
180198 "sources" : {
181- name : srcs for name , srcs in sources .items ()
199+ name : srcs for name , srcs in evidence_sources .items ()
182200 # Only carry backed-handwritten sources in the JSON payload —
183201 # auto-registered specs aren't the focus of this audit.
184202 if name in set (backed_handwritten )
@@ -206,7 +224,7 @@ def render_report(stats: dict, *, show_unbacked: bool = False) -> str:
206224 lines .append (f" experimental : { t ['experimental' ]} " )
207225 lines .append (f" deprecated : { t ['deprecated' ]} " )
208226 lines .append ("" )
209- lines .append ("Parity coverage (sp.<name> referenced in parity tests) " )
227+ lines .append ("Validation coverage" )
210228 lines .append ("-" * 50 )
211229 lines .append (
212230 f" parity test files : "
@@ -216,6 +234,10 @@ def render_report(stats: dict, *, show_unbacked: bool = False) -> str:
216234 f" distinct sp.* symbols referenced : "
217235 f"{ p ['symbols_referenced_in_parity_tests' ]} "
218236 )
237+ lines .append (
238+ f" registry certified/validated : "
239+ f"{ p ['registry_validated_symbols' ]} "
240+ )
219241 lines .append (
220242 f" stable hand-written, BACKED : "
221243 f"{ p ['backed_handwritten' ]} "
@@ -238,9 +260,9 @@ def render_report(stats: dict, *, show_unbacked: bool = False) -> str:
238260 lines .append ("-" * 50 )
239261 lines .append (
240262 "* UNBACKED hand-written: a maintainer wrote a stable public "
241- "API, but this audit found no parity-test reference. Add a "
242- "test, attach validation evidence, or mark immature APIs "
243- "experimental."
263+ "API, but this audit found no registry validation evidence and "
264+ "no parity- test reference. Add evidence, add a test, or mark "
265+ "immature APIs experimental."
244266 )
245267 lines .append (
246268 "* UNBACKED auto-registered: classified as stable by default. "
@@ -263,14 +285,15 @@ def check_drift(stats: dict) -> int:
263285 if n > floor :
264286 print (
265287 f"FAIL: { n } hand-written stable API entries lack parity tests "
266- f"(floor: { floor } ). Either add tests, attach validation "
267- f"evidence, or downgrade immature APIs to experimental." ,
288+ f"or registry validation evidence (floor: { floor } ). Either "
289+ f"add evidence, add tests, or downgrade immature APIs to "
290+ f"experimental." ,
268291 file = sys .stderr ,
269292 )
270293 return 1
271294 print (
272295 f"OK: { n } hand-written stable API entries lack parity tests "
273- f"(floor: { floor } )."
296+ f"or registry validation evidence (floor: { floor } )."
274297 )
275298 return 0
276299
0 commit comments