Skip to content

Commit dd8a864

Browse files
author
aratea-bot
committed
chore(auto): daily run 2026-06-23 (learning capture + finalize + capture + manifest)
1 parent aa0390d commit dd8a864

52 files changed

Lines changed: 26058 additions & 2923 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

dashboard/public/predictor_manifest.json

Lines changed: 2558 additions & 2558 deletions
Large diffs are not rendered by default.

predictor/data/ledger/paper_bets.csv

Lines changed: 317 additions & 301 deletions
Large diffs are not rendered by default.

predictor/data/predictions/forward_20260623T200521Z.json

Lines changed: 19636 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"generated_at": "20260623T203319Z",
3+
"n_capture_files": 19,
4+
"n_unique_markets": 2442,
5+
"n_resolved": 2178,
6+
"summary": {
7+
"climatology": {
8+
"n": 2178,
9+
"base_rate": 0.16666666666666666,
10+
"accuracy_at_0.5": 0.738751147842057,
11+
"brier_score": 0.169194909908179,
12+
"brier_baseline_constant": 0.13888888888888892,
13+
"brier_skill_score": -0.21820335133888857,
14+
"log_loss": 0.5431247893030308,
15+
"top1": {
16+
"n_events": 363,
17+
"top1_correct": 63,
18+
"top1_accuracy": 0.17355371900826447
19+
}
20+
},
21+
"forecast_blend": {
22+
"n": 2178,
23+
"base_rate": 0.16666666666666666,
24+
"accuracy_at_0.5": 0.8319559228650137,
25+
"brier_score": 0.14187192249583958,
26+
"brier_baseline_constant": 0.13888888888888892,
27+
"brier_skill_score": -0.021477841970044764,
28+
"log_loss": 0.4574195889553987,
29+
"top1": {
30+
"n_events": 363,
31+
"top1_correct": 81,
32+
"top1_accuracy": 0.2231404958677686
33+
}
34+
},
35+
"ensemble": {
36+
"n": 2178,
37+
"base_rate": 0.16666666666666666,
38+
"accuracy_at_0.5": 0.8370064279155188,
39+
"brier_score": 0.12284976925728895,
40+
"brier_baseline_constant": 0.13888888888888892,
41+
"brier_skill_score": 0.1154816613475198,
42+
"log_loss": 0.38803798190507144,
43+
"top1": {
44+
"n_events": 363,
45+
"top1_correct": 128,
46+
"top1_accuracy": 0.3526170798898072
47+
}
48+
},
49+
"kalshi_mid": {
50+
"n": 2178,
51+
"base_rate": 0.16666666666666666,
52+
"accuracy_at_0.5": 0.8847566574839302,
53+
"brier_score": 0.07969688934802571,
54+
"brier_baseline_constant": 0.13888888888888892,
55+
"brier_skill_score": 0.426182396694215,
56+
"log_loss": 0.25411937967654497
57+
}
58+
}
59+
}

predictor/runs/286/POST_RUN.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
**Run 286 — résolu YES · Multi-model A/B**
2+
3+
Event : Lowest temperature in New York City on Jun 22, 2026?
4+
Bin cible : `KXLOWTNYC-26JUN22-B67.5` · Outcome : YES · Low observée (bin gagnant) : 67-68°F
5+
6+
Modèles en course (⭐ = best Brier sur ce run) :
7+
- `vendor_ensemble` (champion) — p_yes=0.460, Brier=0.2919, P&L réel=$+80.62
8+
- `learned_v2` (challenger) — p_yes=0.797, Brier=0.0411, P&L théorique=$+80.62 ⭐
9+
- `kalshi_mid_baseline` (baseline) — p_yes=0.305, Brier=0.4830, P&L théorique=$+80.62
10+
11+
Verdict run 286 : Challenger `learned_v2` ahead this run.
12+
13+
Champion actuel : `vendor_ensemble` (la ligne réelle du ledger paper_bets.csv = celle de ce modèle).
14+
Challengers et baselines : positions shadow, P&L théorique, pas d'exposition réelle.
15+
16+
Compteur Phase 1 : voir `dashboard/public/predictor_manifest.json` après rebuild.
17+
18+
Règle de promotion : un challenger n'est pas promoté sur un seul win. Il faut N>=10 résolus avec rolling-mean Brier strictement inférieur ET sign test 1-sided p<0.10. Cf. `predictor/runs_learning/CHAMPION.json`.
19+
20+
Log complet : https://github.com/Elladriel80/aratea/blob/main/predictor/runs/286/report.json

predictor/runs/286/report.json

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,71 @@
124124
}
125125
],
126126
"resolution": {
127-
"outcome": null,
127+
"outcome": "yes",
128128
"observed_value_f": null,
129-
"ts_utc": null,
130-
"pnl_usd": null
129+
"observed_range_f": [
130+
67.0,
131+
68.0
132+
],
133+
"winning_bin_ticker": "KXLOWTNYC-26JUN22-B67.5",
134+
"ts_utc": "2026-06-23T20:33:19Z",
135+
"champion_pnl_usd": 80.62,
136+
"champion_payout_usd": 116.0,
137+
"champion_cost_usd": 35.38,
138+
"champion_won": true
131139
}
132140
}
133141
],
134-
"scoring": null,
142+
"scoring": {
143+
"outcome": "yes",
144+
"by_model": {
145+
"vendor_ensemble": {
146+
"role": "champion",
147+
"method": "ensemble",
148+
"p_yes": 0.4597,
149+
"brier": 0.2919,
150+
"won": true,
151+
"pnl_usd": 80.62,
152+
"pnl_type": "actual"
153+
},
154+
"learned_v2": {
155+
"role": "challenger",
156+
"method": "learned_v2",
157+
"p_yes": 0.7973,
158+
"brier": 0.0411,
159+
"won": true,
160+
"pnl_usd": 80.62,
161+
"pnl_type": "theoretical"
162+
},
163+
"kalshi_mid_baseline": {
164+
"role": "baseline",
165+
"method": "kalshi_mid",
166+
"p_yes": 0.305,
167+
"brier": 0.483,
168+
"won": true,
169+
"pnl_usd": 80.62,
170+
"pnl_type": "theoretical"
171+
}
172+
},
173+
"ranking_by_brier": [
174+
{
175+
"model": "learned_v2",
176+
"brier": 0.0411
177+
},
178+
{
179+
"model": "vendor_ensemble",
180+
"brier": 0.2919
181+
},
182+
{
183+
"model": "kalshi_mid_baseline",
184+
"brier": 0.483
185+
}
186+
],
187+
"best_brier_model": "learned_v2",
188+
"champion_at_time_of_run": "vendor_ensemble",
189+
"champion_is_best": false,
190+
"n_datapoints": 1,
191+
"note": "Run 286 multi-model trade. Single point \u2014 directional only, not statistically significant. The champion promotion rule requires N>=10 with sign test p<0.10."
192+
},
135193
"notes": "Auto-captured by daily_auto.py on 2026-06-21T20:05:28Z. Selection rule: top-3 median bin(s) per event with |edge_vs_mid|>=0.05, spread<=0.08. Champion vendor_ensemble edge=+0.155, side=YES, Kelly capped size=$35.49 (bankroll=$1774.55, portfolio_heat_after_register=2.0%)."
136194
}

predictor/runs/287/POST_RUN.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
**Run 287 — résolu NO · Multi-model A/B**
2+
3+
Event : Lowest temperature in New York City on Jun 22, 2026?
4+
Bin cible : `KXLOWTNYC-26JUN22-B65.5` · Outcome : NO · Low observée (bin gagnant) : 67-68°F
5+
6+
Modèles en course (⭐ = best Brier sur ce run) :
7+
- `vendor_ensemble` (champion) — p_yes=0.223, Brier=0.0496, P&L réel=$+18.63 ⭐
8+
- `learned_v2` (challenger) — p_yes=0.256, Brier=0.0655, P&L théorique=$+18.63
9+
- `kalshi_mid_baseline` (baseline) — p_yes=0.345, Brier=0.1190, P&L théorique=$+18.63
10+
11+
Verdict run 287 : Champion best ✓.
12+
13+
Champion actuel : `vendor_ensemble` (la ligne réelle du ledger paper_bets.csv = celle de ce modèle).
14+
Challengers et baselines : positions shadow, P&L théorique, pas d'exposition réelle.
15+
16+
Compteur Phase 1 : voir `dashboard/public/predictor_manifest.json` après rebuild.
17+
18+
Règle de promotion : un challenger n'est pas promoté sur un seul win. Il faut N>=10 résolus avec rolling-mean Brier strictement inférieur ET sign test 1-sided p<0.10. Cf. `predictor/runs_learning/CHAMPION.json`.
19+
20+
Log complet : https://github.com/Elladriel80/aratea/blob/main/predictor/runs/287/report.json

predictor/runs/287/report.json

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,71 @@
124124
}
125125
],
126126
"resolution": {
127-
"outcome": null,
127+
"outcome": "no",
128128
"observed_value_f": null,
129-
"ts_utc": null,
130-
"pnl_usd": null
129+
"observed_range_f": [
130+
67.0,
131+
68.0
132+
],
133+
"winning_bin_ticker": "KXLOWTNYC-26JUN22-B67.5",
134+
"ts_utc": "2026-06-23T20:33:19Z",
135+
"champion_pnl_usd": 18.63,
136+
"champion_payout_usd": 54.0,
137+
"champion_cost_usd": 35.37,
138+
"champion_won": true
131139
}
132140
}
133141
],
134-
"scoring": null,
142+
"scoring": {
143+
"outcome": "no",
144+
"by_model": {
145+
"vendor_ensemble": {
146+
"role": "champion",
147+
"method": "ensemble",
148+
"p_yes": 0.2228,
149+
"brier": 0.0496,
150+
"won": true,
151+
"pnl_usd": 18.63,
152+
"pnl_type": "actual"
153+
},
154+
"learned_v2": {
155+
"role": "challenger",
156+
"method": "learned_v2",
157+
"p_yes": 0.256,
158+
"brier": 0.0655,
159+
"won": true,
160+
"pnl_usd": 18.63,
161+
"pnl_type": "theoretical"
162+
},
163+
"kalshi_mid_baseline": {
164+
"role": "baseline",
165+
"method": "kalshi_mid",
166+
"p_yes": 0.345,
167+
"brier": 0.119,
168+
"won": true,
169+
"pnl_usd": 18.63,
170+
"pnl_type": "theoretical"
171+
}
172+
},
173+
"ranking_by_brier": [
174+
{
175+
"model": "vendor_ensemble",
176+
"brier": 0.0496
177+
},
178+
{
179+
"model": "learned_v2",
180+
"brier": 0.0655
181+
},
182+
{
183+
"model": "kalshi_mid_baseline",
184+
"brier": 0.119
185+
}
186+
],
187+
"best_brier_model": "vendor_ensemble",
188+
"champion_at_time_of_run": "vendor_ensemble",
189+
"champion_is_best": true,
190+
"n_datapoints": 1,
191+
"note": "Run 287 multi-model trade. Single point \u2014 directional only, not statistically significant. The champion promotion rule requires N>=10 with sign test p<0.10."
192+
},
135193
"notes": "Auto-captured by daily_auto.py on 2026-06-21T20:05:28Z. Selection rule: top-3 median bin(s) per event with |edge_vs_mid|>=0.05, spread<=0.08. Champion vendor_ensemble edge=-0.122, side=NO, Kelly capped size=$35.49 (bankroll=$1774.55, portfolio_heat_after_register=4.0%)."
136194
}

predictor/runs/288/POST_RUN.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
**Run 288 — résolu NO · Multi-model A/B**
2+
3+
Event : Lowest temperature in New York City on Jun 22, 2026?
4+
Bin cible : `KXLOWTNYC-26JUN22-B63.5` · Outcome : NO · Low observée (bin gagnant) : 67-68°F
5+
6+
Modèles en course (⭐ = best Brier sur ce run) :
7+
- `vendor_ensemble` (champion) — p_yes=0.035, Brier=0.0012, P&L réel=$+4.60 ⭐
8+
- `learned_v2` (challenger) — p_yes=0.068, Brier=0.0046, P&L théorique=$+4.60
9+
- `kalshi_mid_baseline` (baseline) — p_yes=0.115, Brier=0.0132, P&L théorique=$+4.60
10+
11+
Verdict run 288 : Champion best ✓.
12+
13+
Champion actuel : `vendor_ensemble` (la ligne réelle du ledger paper_bets.csv = celle de ce modèle).
14+
Challengers et baselines : positions shadow, P&L théorique, pas d'exposition réelle.
15+
16+
Compteur Phase 1 : voir `dashboard/public/predictor_manifest.json` après rebuild.
17+
18+
Règle de promotion : un challenger n'est pas promoté sur un seul win. Il faut N>=10 résolus avec rolling-mean Brier strictement inférieur ET sign test 1-sided p<0.10. Cf. `predictor/runs_learning/CHAMPION.json`.
19+
20+
Log complet : https://github.com/Elladriel80/aratea/blob/main/predictor/runs/288/report.json

predictor/runs/288/report.json

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,71 @@
124124
}
125125
],
126126
"resolution": {
127-
"outcome": null,
127+
"outcome": "no",
128128
"observed_value_f": null,
129-
"ts_utc": null,
130-
"pnl_usd": null
129+
"observed_range_f": [
130+
67.0,
131+
68.0
132+
],
133+
"winning_bin_ticker": "KXLOWTNYC-26JUN22-B67.5",
134+
"ts_utc": "2026-06-23T20:33:20Z",
135+
"champion_pnl_usd": 4.6,
136+
"champion_payout_usd": 40.0,
137+
"champion_cost_usd": 35.4,
138+
"champion_won": true
131139
}
132140
}
133141
],
134-
"scoring": null,
142+
"scoring": {
143+
"outcome": "no",
144+
"by_model": {
145+
"vendor_ensemble": {
146+
"role": "champion",
147+
"method": "ensemble",
148+
"p_yes": 0.0348,
149+
"brier": 0.0012,
150+
"won": true,
151+
"pnl_usd": 4.6,
152+
"pnl_type": "actual"
153+
},
154+
"learned_v2": {
155+
"role": "challenger",
156+
"method": "learned_v2",
157+
"p_yes": 0.0679,
158+
"brier": 0.0046,
159+
"won": true,
160+
"pnl_usd": 4.6,
161+
"pnl_type": "theoretical"
162+
},
163+
"kalshi_mid_baseline": {
164+
"role": "baseline",
165+
"method": "kalshi_mid",
166+
"p_yes": 0.115,
167+
"brier": 0.0132,
168+
"won": true,
169+
"pnl_usd": 4.6,
170+
"pnl_type": "theoretical"
171+
}
172+
},
173+
"ranking_by_brier": [
174+
{
175+
"model": "vendor_ensemble",
176+
"brier": 0.0012
177+
},
178+
{
179+
"model": "learned_v2",
180+
"brier": 0.0046
181+
},
182+
{
183+
"model": "kalshi_mid_baseline",
184+
"brier": 0.0132
185+
}
186+
],
187+
"best_brier_model": "vendor_ensemble",
188+
"champion_at_time_of_run": "vendor_ensemble",
189+
"champion_is_best": true,
190+
"n_datapoints": 1,
191+
"note": "Run 288 multi-model trade. Single point \u2014 directional only, not statistically significant. The champion promotion rule requires N>=10 with sign test p<0.10."
192+
},
135193
"notes": "Auto-captured by daily_auto.py on 2026-06-21T20:05:28Z. Selection rule: top-3 median bin(s) per event with |edge_vs_mid|>=0.05, spread<=0.08. Champion vendor_ensemble edge=-0.080, side=NO, Kelly capped size=$35.49 (bankroll=$1774.55, portfolio_heat_after_register=6.0%)."
136194
}

0 commit comments

Comments
 (0)