Add agent false-pass gate proof (#36)

yangfei222666-9 · web-flow · commit 7533cae41de1 · 2026-06-24T21:04:20.000+08:00
Co-authored-by: yangfei222666-9 &lt;261852489+yangfei222666-9@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -32,6 +32,7 @@
 This is the main TaijiOS repository.
 
 - Reviewer start page: [docs/START_HERE_FOR_REVIEWERS.md](docs/START_HERE_FOR_REVIEWERS.md) — 5-minute public review path, exact verdicts, and no-overclaim boundaries.
+- Agent Reliability False-Pass Gate: `python scripts/check_false_pass_gate.py --self-test examples/false_pass_gate/fixtures` — local proof that unsupported "done" claims and missing `cannot_claim` boundaries are blocked before success language is accepted.
 - SpaceXAI proof packet: [docs/SPACE_X_AI_PROOF_PACKET.md](docs/SPACE_X_AI_PROOF_PACKET.md) — human-review evidence for an evidence-first AI agent runtime; not SpaceX endorsement, not production readiness, and not real hardware control.
 - 权威边界文档: [Product Spine](docs/architecture/PRODUCT_SPINE_AUTHORITY.md), [Provider Gate](docs/provider/PROVIDER_BOUNDARY_GATE.md), [Direct LLM Caller](docs/provider/DIRECT_LLM_CALLER_BOUNDARY.md), [Multi-Model Gate](docs/provider/MULTI_MODEL_ARCHITECTURE_GATE.md), [Runtime Matrix](docs/runtime/RUNTIME_MATURITY_MATRIX.md), [HSDL](docs/design/HSDL_CANONICAL_SPEC_v0.1.md), [小九通天录](xiaojiu_tongtianlu/BOUNDARY.md), [Life Systems](life_systems/BIOSECURITY_BOUNDARY.md) — docs-only review gates; not repo-level PASS, runtime readiness, or provider readiness.
 - Machine-readable proof index: [docs/proof_index.json](docs/proof_index.json)
diff --git a/docs/START_HERE_FOR_REVIEWERS.md b/docs/START_HERE_FOR_REVIEWERS.md
@@ -23,6 +23,7 @@ The core claim is narrow: agent progress should require parseable evidence befor
 ```bash
 pip install -e .
 bash scripts/replay_public_demo.sh
+python scripts/check_false_pass_gate.py --self-test examples/false_pass_gate/fixtures
 ```
 
 The replay script writes to a temporary output directory by default so review
@@ -42,6 +43,25 @@ Then inspect:
 - `docs/SPACE_X_AI_PROOF_PACKET.md`
 - `AUDIT_EVIDENCE.md`
 
+## Agent Reliability: False-Pass Gate
+
+The False-Pass Gate checks whether an AI-agent success claim has passing evidence
+and explicit `cannot_claim` boundaries. It is intentionally local and synthetic:
+
+```bash
+python scripts/check_false_pass_gate.py --self-test examples/false_pass_gate/fixtures
+```
+
+Expected local result:
+
+```text
+self_test=PASS cases=3
+```
+
+This gate can support `LOCAL_VALIDATED` after the self-test and pytest pass. It
+does not prove remote CI, public adoption, production readiness, provider/API
+readiness, or recruiting validation.
+
 ## Verdict Semantics
 
 - `PASS`: a named local check passed with parseable evidence.
@@ -64,3 +84,4 @@ Do not claim:
 - Provider/API readiness unless a scoped live probe verifies it.
 - Release evidence `PASS` while `AUDIT_EVIDENCE.md` remains template-only.
 - Local demo `PASS` as repo-wide `PASS`.
+- Agent "done", "ready", or "complete" claims without passing evidence and explicit `cannot_claim` boundaries.
diff --git a/docs/proof_index.json b/docs/proof_index.json
@@ -37,6 +37,13 @@
       "command": "bash scripts/replay_public_demo.sh --runs 100",
       "verdict": "PENDING",
       "limitation": "The 100-run and 1,000-run batches are plans until executed and recorded."
+    },
+    {
+      "claim": "The Agent Reliability False-Pass Gate blocks unsupported AI-agent done/readiness claims and missing cannot_claim boundaries.",
+      "evidence_file": "scripts/check_false_pass_gate.py and examples/false_pass_gate/fixtures/",
+      "command": "python scripts/check_false_pass_gate.py --self-test examples/false_pass_gate/fixtures",
+      "verdict": "LOCAL_VALIDATED when self-test and pytest pass",
+      "limitation": "Synthetic local fixtures only; this does not prove remote CI, public proof update, production readiness, provider/API readiness, recruiting validation, or canonical truth."
     }
   ],
   "blocked_claims": [
@@ -45,6 +52,7 @@
     "hardware_control",
     "provider_api_readiness",
     "release_evidence_pass",
-    "trading_or_order_authority"
+    "trading_or_order_authority",
+    "false_pass_gate_without_evidence"
   ]
 }
diff --git a/examples/false_pass_gate/fixtures/fail_missing_cannot_claim.json b/examples/false_pass_gate/fixtures/fail_missing_cannot_claim.json
@@ -0,0 +1,14 @@
+{
+  "case_id": "fail_missing_cannot_claim",
+  "agent_claim": "Local validation passed.",
+  "success_claims": ["local_validated"],
+  "evidence": [
+    {
+      "type": "command",
+      "command": "python -m pytest tests/test_false_pass_gate.py -q",
+      "status": "PASS"
+    }
+  ],
+  "cannot_claim": [],
+  "expected_verdict": "BLOCKED"
+}
diff --git a/examples/false_pass_gate/fixtures/fail_unsupported_done.json b/examples/false_pass_gate/fixtures/fail_unsupported_done.json
@@ -0,0 +1,11 @@
+{
+  "case_id": "fail_unsupported_done",
+  "agent_claim": "Done. Everything is ready.",
+  "success_claims": ["done", "ready"],
+  "evidence": [],
+  "cannot_claim": [
+    "remote_ci_pass",
+    "production_readiness"
+  ],
+  "expected_verdict": "BLOCKED"
+}
diff --git a/examples/false_pass_gate/fixtures/pass_with_evidence.json b/examples/false_pass_gate/fixtures/pass_with_evidence.json
@@ -0,0 +1,18 @@
+{
+  "case_id": "pass_with_evidence",
+  "agent_claim": "Local demo is complete.",
+  "success_claims": ["done"],
+  "evidence": [
+    {
+      "type": "command",
+      "command": "python examples/quickstart_minimal.py",
+      "status": "PASS"
+    }
+  ],
+  "cannot_claim": [
+    "remote_ci_pass",
+    "production_readiness",
+    "public_adoption"
+  ],
+  "expected_verdict": "PASS"
+}
diff --git a/scripts/check_false_pass_gate.py b/scripts/check_false_pass_gate.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""Block unsupported AI-agent success claims before they become evidence."""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Any
+
+
+SUCCESS_TOKENS = {
+    "done",
+    "ready",
+    "complete",
+    "completed",
+    "success",
+    "succeeded",
+    "pass",
+    "passed",
+    "local_validated",
+}
+
+
+def _as_list(value: Any) -> list[Any]:
+    if value is None:
+        return []
+    if isinstance(value, list):
+        return value
+    return [value]
+
+
+def _text_tokens(case: dict[str, Any]) -> set[str]:
+    values = _as_list(case.get("success_claims"))
+    if case.get("agent_claim"):
+        values.append(case["agent_claim"])
+    return {str(value).lower().replace("-", "_") for value in values}
+
+
+def _is_success_claim(case: dict[str, Any]) -> bool:
+    tokens = _text_tokens(case)
+    return any(token in SUCCESS_TOKENS for token in tokens) or any(
+        word in token for token in tokens for word in SUCCESS_TOKENS
+    )
+
+
+def _has_passing_evidence(case: dict[str, Any]) -> bool:
+    for item in _as_list(case.get("evidence")):
+        if not isinstance(item, dict):
+            continue
+        status = str(item.get("status", "")).upper()
+        has_pointer = any(item.get(key) for key in ("command", "file", "artifact", "url", "value"))
+        if status == "PASS" and item.get("type") and has_pointer:
+            return True
+    return False
+
+
+def evaluate_case(case: dict[str, Any]) -> tuple[str, list[str]]:
+    reasons: list[str] = []
+
+    if _is_success_claim(case) and not _has_passing_evidence(case):
+        reasons.append("missing_passing_evidence")
+
+    if not _as_list(case.get("cannot_claim")):
+        reasons.append("missing_cannot_claim")
+
+    if reasons:
+        return "BLOCKED", reasons
+    return "PASS", ["evidence_and_boundaries_present"]
+
+
+def load_case(path: Path) -> dict[str, Any]:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def format_case_result(path: Path, actual: str, expected: str | None, reasons: list[str]) -> str:
+    case_id = load_case(path).get("case_id", path.stem)
+    fields = [
+        f"case_id={case_id}",
+        f"actual={actual}",
+        f"reasons={','.join(reasons)}",
+    ]
+    if expected:
+        fields.append(f"expected={expected}")
+    return " ".join(fields)
+
+
+def run_case(path: Path) -> int:
+    case = load_case(path)
+    actual, reasons = evaluate_case(case)
+    print(format_case_result(path, actual, case.get("expected_verdict"), reasons))
+    return 0 if actual == "PASS" else 1
+
+
+def run_self_test(fixtures_dir: Path) -> int:
+    paths = sorted(fixtures_dir.glob("*.json"))
+    failures: list[str] = []
+
+    for path in paths:
+        case = load_case(path)
+        actual, reasons = evaluate_case(case)
+        expected = str(case.get("expected_verdict", "")).upper()
+        print(format_case_result(path, actual, expected, reasons))
+        if actual != expected:
+            failures.append(path.name)
+
+    if failures:
+        print(f"self_test=FAIL cases={len(paths)} failures={','.join(failures)}")
+        return 1
+
+    print(f"self_test=PASS cases={len(paths)}")
+    return 0
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Check false-pass evidence gates.")
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("--case", type=Path, help="Evaluate one JSON case file.")
+    group.add_argument("--self-test", type=Path, help="Evaluate all JSON fixtures in a directory.")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    if args.case:
+        return run_case(args.case)
+    return run_self_test(args.self_test)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_false_pass_gate.py b/tests/test_false_pass_gate.py
@@ -0,0 +1,58 @@
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+
+PROJECT_ROOT = Path(__file__).parent.parent
+SCRIPT = PROJECT_ROOT / "scripts" / "check_false_pass_gate.py"
+FIXTURES = PROJECT_ROOT / "examples" / "false_pass_gate" / "fixtures"
+
+
+def run_gate(*args: str) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        [sys.executable, str(SCRIPT), *args],
+        cwd=PROJECT_ROOT,
+        text=True,
+        capture_output=True,
+        timeout=10,
+    )
+
+
+def test_self_test_fixture_suite_passes():
+    result = run_gate("--self-test", str(FIXTURES))
+
+    assert result.returncode == 0, result.stderr
+    assert "self_test=PASS" in result.stdout
+    assert "cases=3" in result.stdout
+
+
+def test_unsupported_done_claim_is_blocked():
+    result = run_gate("--case", str(FIXTURES / "fail_unsupported_done.json"))
+
+    assert result.returncode == 1
+    assert "case_id=fail_unsupported_done" in result.stdout
+    assert "actual=BLOCKED" in result.stdout
+    assert "missing_passing_evidence" in result.stdout
+
+
+def test_case_with_passing_evidence_and_boundaries_passes():
+    result = run_gate("--case", str(FIXTURES / "pass_with_evidence.json"))
+
+    assert result.returncode == 0, result.stderr
+    assert "case_id=pass_with_evidence" in result.stdout
+    assert "actual=PASS" in result.stdout
+
+
+def test_proof_index_records_false_pass_gate_claim_and_limits():
+    proof_index = json.loads((PROJECT_ROOT / "docs" / "proof_index.json").read_text())
+
+    matching_claims = [
+        claim
+        for claim in proof_index["claims"]
+        if "False-Pass Gate" in claim["claim"]
+    ]
+    assert matching_claims
+    assert matching_claims[0]["verdict"] == "LOCAL_VALIDATED when self-test and pytest pass"
+    assert "remote CI" in matching_claims[0]["limitation"]
+    assert "false_pass_gate_without_evidence" in proof_index["blocked_claims"]