Skip to content

Daily Provider Check #52

Daily Provider Check

Daily Provider Check #52

name: Daily Provider Check
# Daily zero-quota sweep. For each unique (provider, key), fetch the
# provider's /v1/models catalog and verify: the key isn't rejected
# (401/403) and every model_id we have configured in ModelSpec is still
# listed upstream. No LLM inference runs, so transient upstream 429/503
# throttling (OpenRouter :free global rate-limit, Gemini high demand,
# Cerebras traffic spikes) does NOT turn this check red.
#
# Actual inference probing (one prompt per slot) lives in
# deploy-backend.yml's post-deploy integration test and in a manual
# workflow_dispatch against /providers/full when we want a deep sweep.
on:
schedule:
- cron: "0 10 * * *" # Daily at 10:00 UTC
workflow_dispatch:
inputs:
base_url:
description: "Backend base URL to probe"
required: false
default: "https://deeppin.duckdns.org"
jobs:
probe:
runs-on: ubuntu-latest
steps:
- name: Probe /health/providers/keys
env:
BASE_URL: ${{ inputs.base_url || 'https://deeppin.duckdns.org' }}
run: |
set -e
echo "Probing $BASE_URL/health/providers/keys ..."
# Zero-quota check: one /v1/models GET per unique (provider, key).
# Pure network I/O, so 60s is plenty.
http_status=$(curl -s -o response.json -w "%{http_code}" \
--max-time 60 \
"$BASE_URL/health/providers/keys")
echo "HTTP status: $http_status"
echo "=== Full response ==="
cat response.json | python3 -m json.tool
echo "====================="
# Group failures by cause so oncall can tell at a glance whether
# a key is dead, a configured model got deprecated upstream, or
# something else went wrong.
python3 <<'PY'
import json, sys
with open("response.json") as f:
data = json.load(f)
print(f"\nSummary: total={data['total']} ok={data['ok']} failed={data['failed']}")
failures = [r for r in data["results"] if not r["ok"]]
if not failures:
print("\nAll keys valid, no model drift.")
sys.exit(0)
print("\nFailures by category:")
for r in failures:
prov = r["provider"]
key = r.get("key", "?")
if r.get("key_valid") is False:
err = (r.get("error") or "").replace("\n", " ")[:200]
print(f" [KEY INVALID] {prov} [{key}]: {err}")
elif r.get("missing_models"):
miss = ", ".join(r["missing_models"])
print(f" [MODEL DRIFT] {prov} [{key}]: missing {miss}")
else:
err = (r.get("error") or "").replace("\n", " ")[:200]
status = r.get("status_code", "?")
print(f" [OTHER {status}] {prov} [{key}]: {err}")
sys.exit(1)
PY