-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathImageService.cs
More file actions
182 lines (159 loc) · 8.04 KB
/
Copy pathImageService.cs
File metadata and controls
182 lines (159 loc) · 8.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
using System.Text;
using System.Text.Json;
using ImageMagick;
[assembly: System.Runtime.CompilerServices.InternalsVisibleTo("Office365SelfHostedLLMConnector.Tests")]
namespace Office365SelfHostedLLMConnector;
// Image generation proxy. Calls upstream /v1/images/generations (OpenAI-compatible),
// returns base64 image data to the taskpane, which feeds it straight into
// PowerPoint.js' addImage — no intermediate storage.
//
// Upstream confirmed via probe (see ADR-0007 once written):
// POST /v1/images/generations with model=grok-imagine-image returns
// { "created": ..., "data": [ { "b64_json": "<base64-jpeg>" } ] }
//
// Models routable through the proxy's allowlist:
// grok-imagine-image (default, ~5s)
// grok-imagine-image-quality (higher fidelity, slower; reserved for "make it sharp")
//
// Transparent backgrounds (probed 2026-05-22):
// The upstream `background:"transparent"` param is silently ignored — grok always
// returns opaque JPEG (no alpha channel exists in JPEG anyway). For icon-style
// prompts we (a) append a "white background, flat vector" hint that nudges the
// model toward a uniform background, then (b) chroma-key near-white pixels to
// alpha and re-encode as PNG via ImageMagick. Works only for icon/logo prompts
// — photos would lose any genuinely-white objects (clouds, paper, snow).
//
// Shares the UA-rewriting HttpClient with ChatService so the proxy's WAF
// doesn't 403 us on the way out.
public sealed class ImageService
{
private readonly string _baseUrl;
private readonly string _apiKey;
private readonly string _defaultModel;
private readonly HttpClient _httpClient;
public ImageService(string baseUrl, string apiKey, string defaultModel, HttpClient httpClient)
{
_baseUrl = baseUrl.TrimEnd('/');
_apiKey = apiKey;
_defaultModel = defaultModel;
_httpClient = httpClient;
}
// Words that signal the user wants a small, slide-friendly graphic rather than
// a full photo. When any appear in the prompt we (1) append a clean-background
// hint AND (2) post-process the returned JPEG to chroma-key its background
// to alpha. We do BOTH because (1) alone leaves a visible white rectangle on
// colored slides; (2) alone gives noisy edges if the upstream chose a busy bg.
private static readonly string[] IconKeywords =
{
"icon", "logo", "illustration", "symbol", "emblem",
"badge", "pictogram", "glyph", "sticker",
};
private const string IconStyleSuffix =
", on a solid white background, flat vector icon style, centered, no shadows";
// Pixels within this percentage of pure white become transparent. JPEG
// compression smears "white" into a range of 245-255, so anything tighter
// leaves a fringe. Loose enough (~12%) clips the edge antialiasing too,
// giving a hard-edged but clean cutout — appropriate for flat icons.
private const double WhiteFuzzPercent = 12.0;
public async Task<ImageGenerateResult> GenerateAsync(
string prompt, string? model, string? size, CancellationToken ct = default)
{
var chosen = string.IsNullOrWhiteSpace(model) ? _defaultModel : model;
var chosenSize = string.IsNullOrWhiteSpace(size) ? "1024x1024" : size;
var (effectivePrompt, isIcon) = RewriteForIcon(prompt);
var body = new
{
model = chosen,
prompt = effectivePrompt,
n = 1,
size = chosenSize,
};
using var req = new HttpRequestMessage(HttpMethod.Post, $"{_baseUrl}/images/generations")
{
Content = new StringContent(JsonSerializer.Serialize(body), Encoding.UTF8, "application/json"),
};
req.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", _apiKey);
var sw = System.Diagnostics.Stopwatch.StartNew();
Console.WriteLine($"[image] generate model={chosen} size={chosenSize} prompt={effectivePrompt.Length}ch (orig={prompt.Length}ch) icon={isIcon} head=\"{Truncate(prompt, 100)}\"");
using var resp = await _httpClient.SendAsync(req, ct);
var raw = await resp.Content.ReadAsStringAsync(ct);
if (!resp.IsSuccessStatusCode)
{
Console.WriteLine($"[image] FAIL http={(int)resp.StatusCode} body={Truncate(raw, 300)} elapsed={sw.ElapsedMilliseconds}ms");
throw new ImageGenerationException((int)resp.StatusCode, raw);
}
using var doc = JsonDocument.Parse(raw);
if (!doc.RootElement.TryGetProperty("data", out var data) || data.GetArrayLength() == 0)
throw new ImageGenerationException(502, "upstream returned no data array");
var first = data[0];
if (!first.TryGetProperty("b64_json", out var b64Prop))
throw new ImageGenerationException(502, "upstream missing b64_json field");
var b64 = b64Prop.GetString() ?? throw new ImageGenerationException(502, "b64_json was null");
if (!isIcon)
{
Console.WriteLine($"[image] OK jpeg b64len={b64.Length} elapsed={sw.ElapsedMilliseconds}ms");
return new ImageGenerateResult(b64, "image/jpeg", chosen);
}
// Icon path: decode JPEG, chroma-key near-white → alpha, re-encode PNG.
// Failures here are non-fatal: we still have a usable JPEG to return.
try
{
var pngB64 = RemoveWhiteBackgroundToPng(b64);
Console.WriteLine($"[image] OK png(cutout) b64len={pngB64.Length} (jpeg was {b64.Length}) elapsed={sw.ElapsedMilliseconds}ms");
return new ImageGenerateResult(pngB64, "image/png", chosen);
}
catch (Exception ex)
{
Console.WriteLine($"[image] WARN cutout failed ({ex.GetType().Name}: {ex.Message}); falling back to opaque JPEG");
return new ImageGenerateResult(b64, "image/jpeg", chosen);
}
}
private static string Truncate(string s, int max) => s.Length <= max ? s : s[..max] + "...";
// Case-insensitive whole-prompt scan with three-tier decision:
// 1. "transparent" in prompt → cutout, leave prompt alone (LLM/user explicit)
// 2. "background" in prompt (without "transparent") → user wants a REAL
// background; skip cutout entirely to avoid keying out near-white pixels
// that belong to the requested scene
// 3. icon keyword (icon/logo/…) → append our white-background suffix AND cutout
// else → passthrough (probably a photo)
internal static (string prompt, bool isIcon) RewriteForIcon(string prompt)
{
if (string.IsNullOrWhiteSpace(prompt)) return (prompt, false);
var lower = prompt.ToLowerInvariant();
if (lower.Contains("transparent")) return (prompt, true);
if (lower.Contains("background")) return (prompt, false);
foreach (var kw in IconKeywords)
{
if (lower.Contains(kw))
return (prompt.TrimEnd('.', ' ') + IconStyleSuffix, true);
}
return (prompt, false);
}
// Chroma-key near-white pixels to alpha and emit a 32-bit RGBA PNG.
// Trim() crops the now-transparent borders so PowerPoint doesn't get a
// 1024×1024 sprite where 800×800 is empty alpha.
private static string RemoveWhiteBackgroundToPng(string jpegB64)
{
var bytes = Convert.FromBase64String(jpegB64);
using var image = new MagickImage(bytes);
image.ColorFuzz = new Percentage(WhiteFuzzPercent);
image.Alpha(AlphaOption.Set);
image.Transparent(MagickColors.White);
image.Trim();
image.ResetPage();
image.Format = MagickFormat.Png32;
return Convert.ToBase64String(image.ToByteArray());
}
}
public sealed record ImageGenerateResult(string B64Json, string MimeType, string Model);
public sealed class ImageGenerationException : Exception
{
public int StatusCode { get; }
public string Body { get; }
public ImageGenerationException(int statusCode, string body)
: base($"image generation failed: HTTP {statusCode}")
{
StatusCode = statusCode;
Body = body;
}
}