Office365SelfHostedLLMConnector/TokenUsageEvent.cs at main · elohcrypto/Office365SelfHostedLLMConnector · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;

namespace Office365SelfHostedLLMConnector;

// Append-only event log of LLM token usage. One row per assistant turn, written
// AFTER the chat persists successfully (same code path as Messages). Decoupled
// from the messages table so admin analytics queries (GROUP BY day, user, model)
// don't have to scan a wide STI table. Kept narrow on purpose — every column is
// hit by at least one admin query.
//
// What we don't store yet: cost (would need per-model pricing config), tool-call
// counts (would need parsing the assistant message), and image-gen tokens
// (different units — slice 2 if we add OpenAI image billing).
[Table("token_usage_events")]
public class TokenUsageEvent
{
    [Column("id")]
    public Guid Id { get; set; } = Guid.NewGuid();

    // Who consumed the tokens. Required — anonymous /chat doesn't write events.
    [Column("user_id")]
    public Guid UserId { get; set; }

    // Nullable so future non-conversation events (eg standalone image gen) can
    // be logged without inventing a fake conversation row. Today the persisted
    // chat endpoint always sets it.
    [Column("conversation_id")]
    public Guid? ConversationId { get; set; }

    // FK to the assistant message that produced these tokens. Lets us join back
    // when displaying "which message used the most tokens?" in the admin UI.
    [Column("message_id")]
    public Guid? MessageId { get; set; }

    // Which model was billed. Comes from ChatService.ModelId at request time, so
    // a future model switch is visible per-row instead of being lost in totals.
    [Column("model_id")]
    [MaxLength(128)]
    public required string ModelId { get; set; }

    // 'word' | 'excel' | 'powerpoint' | 'outlook' | 'unknown' — captured from the
    // taskpane so admin can see "PPT users burn 3x the tokens." Free-form string,
    // no CHECK constraint, since new Office hosts get added by Microsoft.
    [Column("host")]
    [MaxLength(32)]
    public string? Host { get; set; }

    // 'chat' for now. Reserved values: 'image', 'embedding' — slot for future
    // billing surfaces without another schema change.
    [Column("kind")]
    [MaxLength(16)]
    public required string Kind { get; set; } = "chat";

    [Column("prompt_tokens")]
    public int PromptTokens { get; set; }

    [Column("completion_tokens")]
    public int CompletionTokens { get; set; }

    // Stored even though derivable, because the upstream gateway sometimes
    // returns a `total` that doesn't equal prompt+completion (cached-input
    // discounts, reasoning tokens). Trust upstream's number.
    [Column("total_tokens")]
    public int TotalTokens { get; set; }

    // True when the prompt/completion counts above were computed by our local
    // chars/4 heuristic because the upstream proxy stripped the streaming
    // usage chunk. False when they came straight from the LLM. Lets admin
    // queries say "X% of measured tokens were estimates" instead of treating
    // them as equal-fidelity data, and lets the UI render "≈" in the badge.
    [Column("is_estimated")]
    public bool IsEstimated { get; set; }

    // UTC always. Admin date-range filters bucket by date_trunc('day', created_at).
    [Column("created_at")]
    public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
}