Skip to content

test(rlasso): hdm parity for rlasso_effects (multi-target) + scope note #292

test(rlasso): hdm parity for rlasso_effects (multi-target) + scope note

test(rlasso): hdm parity for rlasso_effects (multi-target) + scope note #292

name: Citation Audit
# Enforces CLAUDE.md §10 "zero-hallucination" red line. Four gates run
# in dependency order — Gate N only runs if Gate 0..N-1 passed:
#
# 0. pytest on the auditor test suites themselves — if a regex / stopword
# tweak broke the auditors, all downstream verdicts are meaningless.
#
# 1. tools/audit_bib_duplicates.py — paper.bib must have no duplicate
# bib keys (biblatex error), no duplicate DOIs, no duplicate arXiv
# ids (same paper under two keys = citation drift).
#
# 2. tools/audit_bib_coverage.py — every ``[@bibkey]`` in src/ / docs/ /
# paper.md must resolve to a paper.bib entry (dangling = hard fail,
# would render as ``???`` in pandoc). Orphans (unused bib entries)
# are reported but do not fail CI — they're valuable methodological
# references that just haven't been wired back to docstrings yet.
#
# 3. tools/audit_citations.py — every arXiv / NBER / DOI reference in
# src/ and docs/ must match primary-source metadata (authors /
# year / title). Fails on MISMATCH or UNRESOLVED under --strict.
#
# Audit reports are uploaded as artifacts so reviewers can see the
# full verdict.
#
# Security: no untrusted PR inputs flow into `run:` commands — only the
# repo's own scripts under tools/ are executed.
on:
pull_request:
branches: [ main ]
paths:
- 'src/**'
- 'docs/**'
- 'paper.bib'
- 'paper.md'
- 'tools/audit_citations.py'
- 'tools/audit_bib_duplicates.py'
- 'tools/audit_bib_coverage.py'
- 'tools/suggest_bibkey_backfills.py'
- 'tests/test_audit_citations.py'
- 'tests/test_audit_bib_duplicates.py'
- 'tests/test_audit_bib_coverage.py'
- 'tests/test_suggest_bibkey_backfills.py'
- 'tools/audit_retractions.py'
- 'tests/test_audit_retractions.py'
- '.github/workflows/citation-audit.yml'
push:
branches: [ main ]
paths:
- 'src/**'
- 'docs/**'
- 'paper.bib'
- 'paper.md'
- 'tools/audit_citations.py'
- 'tools/audit_bib_duplicates.py'
- 'tools/audit_bib_coverage.py'
- 'tools/suggest_bibkey_backfills.py'
- 'tests/test_audit_citations.py'
- 'tests/test_audit_bib_duplicates.py'
- 'tests/test_audit_bib_coverage.py'
- 'tests/test_suggest_bibkey_backfills.py'
- 'tools/audit_retractions.py'
- 'tests/test_audit_retractions.py'
- '.github/workflows/citation-audit.yml'
workflow_dispatch:
# Opt into Node 24 runtime early for all JavaScript actions.
# GitHub's documented migration path (blog post 2025-09-19) before the
# hard cutover on 2026-06-02. Keeps existing action versions intact.
# Security: pure runner config, no user input.
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: 'true'
concurrency:
group: citation-audit-${{ github.ref }}
cancel-in-progress: true
jobs:
audit:
name: Audit arXiv / NBER / DOI citations
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Check out repository
uses: actions/checkout@v5
- name: Set up Python 3.13
uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Install certifi + pytest
# certifi for the citation auditor's HTTPS calls; pytest to run
# the auditor test suite that pins the §10 regex regressions.
run: python -m pip install --quiet certifi pytest
- name: Gate 0 — auditor test suite
# Fast (<2s) stdlib-only tests guarding DOI regex / Q-network
# stopword / parse_bib brace-balance / [@bibkey] extractor
# regressions. Runs BEFORE the live auditors so a broken auditor
# can't silently pass.
#
# -o 'addopts=' overrides the project-wide pyproject.toml addopts
# (which pull in --cov=statspai for local dev); the auditor tests
# are stdlib-only and don't need the full coverage toolchain.
#
# --noconftest skips tests/conftest.py, which pre-imports
# ``scipy.optimize`` to stabilise the PyO3 type registry for the
# full pytest session. The auditor tests are stdlib-only and
# never touch scipy, and this Gate-0 image deliberately doesn't
# install scipy to keep the image small.
run: |
python -m pytest \
tests/test_audit_citations.py \
tests/test_audit_bib_duplicates.py \
tests/test_audit_bib_coverage.py \
tests/test_suggest_bibkey_backfills.py \
tests/test_audit_retractions.py \
--noconftest -o 'addopts=' -q
- name: Gate 1 — paper.bib duplicate auditor (strict)
id: bib_audit
# Stdlib only, no network. A broken bibliography invalidates
# downstream checks.
run: python tools/audit_bib_duplicates.py --strict
- name: Gate 2 — paper.bib coverage auditor (strict-dangling)
id: coverage_audit
# Stdlib only. Every ``[@bibkey]`` in src/ / docs/ / paper.md
# must resolve to a paper.bib entry — a dangling ref would
# render as ``???`` in pandoc's paper.md build.
#
# Orphans are deliberately NOT gated: paper.bib carries
# methodological references from JOSS submission that haven't
# been wired back to docstrings yet. Those will be retired as
# the bibkey graph is backfilled.
run: python tools/audit_bib_coverage.py --strict-dangling --hide-orphans
- name: Gate 3 — citation auditor (strict)
id: citation_audit
# Live verification against arXiv / NBER / Crossref. --strict:
# unresolved IDs fail alongside mismatches, so a typo that
# breaks primary-source lookup is caught early.
#
# Exit-code contract (tools/audit_citations.py main()):
# 0 — clean.
# 1 — mismatch, or a GENUINE unresolved id (source reachable
# but the id is absent) → a real §10 zero-hallucination
# failure. Blocks the merge.
# 2 — soft failure: the ONLY unresolved ids were transient
# upstream errors (arXiv / Crossref 429 rate-limit on the
# shared runner IP, or a network blip). NOT a bad citation,
# so it must not block a merge — we surface it as a warning
# and pass. The auditor already retries 429/5xx with
# back-off before giving up.
run: |
set +e
python tools/audit_citations.py --strict --out audit_report.md
code=$?
if [ "$code" -eq 2 ]; then
echo "::warning title=Citation audit soft failure::Auditor could not reach arXiv/Crossref (rate limit / network); no mismatch detected — treating as a soft pass (exit 2)."
exit 0
fi
exit "$code"
- name: Upload citation audit report
if: always()
uses: actions/upload-artifact@v6
with:
name: citation-audit-report
path: audit_report.md
retention-days: 30
- name: Summarise to GitHub step summary
if: always()
run: |
{
echo "### Citation Audit"
echo ""
if [ -f audit_report.md ]; then
sed -n '3p' audit_report.md
fi
echo ""
echo "Full citation report is attached as the \`citation-audit-report\` artifact."
} >> "$GITHUB_STEP_SUMMARY"