-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpkg.go
More file actions
636 lines (594 loc) · 17.6 KB
/
Copy pathpkg.go
File metadata and controls
636 lines (594 loc) · 17.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
package main
import (
"context"
"fmt"
"html"
"log"
"net/http"
"net/url"
"regexp"
"sort"
"strings"
"sync"
"time"
"github.com/mymmrac/telego"
th "github.com/mymmrac/telego/telegohandler"
)
type overlay struct {
name string // short display name
repo string // GitHub owner/name
branch string
}
// shared outbound User-Agent for all HTTP requests this bot makes (override via user_agent config).
var userAgent = "gentoo-zh-verify-bot"
// overlays searched by /pkg, populated from config at startup (default gentoo-zh + guru).
var overlays []overlay
func configurePkg(cfg *Config) {
if cfg.UserAgent != "" {
userAgent = cfg.UserAgent
}
if len(cfg.Overlays) == 0 {
overlays = []overlay{
{name: "gentoo-zh", repo: "microcai/gentoo-zh", branch: "master"},
{name: "guru", repo: "gentoo/guru", branch: "master"},
}
return
}
overlays = nil
for _, o := range cfg.Overlays {
br := o.Branch
if br == "" {
br = "master"
}
name := o.Name
if name == "" {
name = o.Repo
}
overlays = append(overlays, overlay{name: name, repo: o.Repo, branch: br})
}
}
const pkgCacheTTL = 6 * time.Hour
const verCacheTTL = 6 * time.Hour
const maxHitsPerSource = 8
// pkgCacheMax bounds the version/info caches (keyed by user-supplied atoms): at the limit
// the cache is dropped wholesale rather than grown unboundedly. Far above the realistic
// query universe, so it essentially never triggers in normal use.
const pkgCacheMax = 2000
const pkgRetryFloor = 3 * time.Minute // throttle refresh retries after a failure (avoids GitHub rate-limit storms)
// pkgCache holds, per overlay, a map of "category/package" atom -> latest version string.
type pkgCache struct {
mu sync.Mutex
pkgs map[string]map[string]string
fetched time.Time
lastAttempt time.Time
refreshing bool
}
var pkgC = &pkgCache{pkgs: map[string]map[string]string{}}
// isPkgPath reports whether p looks like a Gentoo "category/package" path.
func isPkgPath(p string) bool {
i := strings.IndexByte(p, '/')
if i < 1 || strings.Contains(p[i+1:], "/") {
return false
}
switch p[:i] {
case "metadata", "profiles", "eclass", "licenses", "scripts", ".github", ".gitlab":
return false
}
cat := p[:i]
return strings.Contains(cat, "-") || cat == "virtual"
}
// splitVer breaks a Gentoo version into comparable tokens.
func splitVer(v string) []string {
return strings.FieldsFunc(v, func(r rune) bool { return r == '.' || r == '-' || r == '_' })
}
// verLess reports whether version a is older than b (best-effort; good enough to pick "latest").
func verLess(a, b string) bool {
as, bs := splitVer(a), splitVer(b)
n := len(as)
if len(bs) < n {
n = len(bs)
}
for i := 0; i < n; i++ {
if c := cmpToken(as[i], bs[i]); c != 0 {
return c < 0
}
}
if len(as) == len(bs) {
return false
}
// Equal up to the shorter length; the version with an extra trailing token is either a
// pre-release (_alpha/_beta/_pre/_rc — OLDER than the bare release) or a patch/revision/
// extra component (_p, -r, .N — NEWER). Decide by that token's Gentoo suffix weight.
aLonger := len(as) > len(bs)
var extra string // the first token only the longer side has (index n into it)
if aLonger {
extra = as[n]
} else {
extra = bs[n]
}
if suffixWeight(extra) < 0 { // longer side is a pre-release => it is the OLDER one
return aLonger
}
return !aLonger // longer side is a patch/revision/extra component => the NEWER one
}
// suffixWeight ranks a Gentoo version suffix token relative to the bare release (0):
// negative for pre-releases (_alpha < _beta < _pre < _rc), positive for everything newer
// (a patch _pN, a revision -rN, or an extra numeric component).
func suffixWeight(tok string) int {
switch {
case strings.HasPrefix(tok, "alpha"):
return -4
case strings.HasPrefix(tok, "beta"):
return -3
case strings.HasPrefix(tok, "pre"):
return -2
case strings.HasPrefix(tok, "rc"):
return -1
default:
return 1
}
}
// cmpToken compares two version tokens with natural ordering: digit runs compare
// numerically (so "r10" > "r2", not the string order where "r10" < "r2"), other runs
// compare byte-wise. Returns -1, 0 or 1.
func cmpToken(a, b string) int {
ai, bi := 0, 0
isDigit := func(c byte) bool { return c >= '0' && c <= '9' }
for ai < len(a) && bi < len(b) {
if isDigit(a[ai]) && isDigit(b[bi]) {
aj, bj := ai, bi
for aj < len(a) && isDigit(a[aj]) {
aj++
}
for bj < len(b) && isDigit(b[bj]) {
bj++
}
if c := cmpNum(a[ai:aj], b[bi:bj]); c != 0 {
return c
}
ai, bi = aj, bj
} else {
if a[ai] != b[bi] {
if a[ai] < b[bi] {
return -1
}
return 1
}
ai++
bi++
}
}
switch { // the token with more left is "greater" (e.g. "r" < "r2")
case len(a)-ai < len(b)-bi:
return -1
case len(a)-ai > len(b)-bi:
return 1
default:
return 0
}
}
// cmpNum compares two digit strings as numbers, without integer overflow. Returns -1/0/1.
func cmpNum(a, b string) int {
a, b = strings.TrimLeft(a, "0"), strings.TrimLeft(b, "0")
switch {
case len(a) != len(b):
if len(a) < len(b) {
return -1
}
return 1
case a < b:
return -1
case a > b:
return 1
default:
return 0
}
}
// ebuildAtomVer extracts ("cat/pkg", "version") from an ebuild blob path "cat/pkg/pkg-VER.ebuild".
func ebuildAtomVer(path string) (string, string, bool) {
if !strings.HasSuffix(path, ".ebuild") {
return "", "", false
}
slash := strings.LastIndexByte(path, '/')
if slash < 0 {
return "", "", false
}
dir := path[:slash] // cat/pkg
file := path[slash+1:] // pkg-VER.ebuild
pkg := dir[strings.LastIndexByte(dir, '/')+1:]
ver := strings.TrimSuffix(file, ".ebuild")
ver = strings.TrimPrefix(ver, pkg+"-")
if ver == "" || strings.Contains(ver, "/") {
return "", "", false
}
return dir, ver, true
}
// treeURL returns the GitHub web tree URL for an atom in this overlay.
func (o overlay) treeURL(atom string) string {
return "https://github.com/" + o.repo + "/tree/" + o.branch + "/" + atom
}
// fetchOverlay returns atom -> latest version for one overlay, via the cached GitHub recursive tree.
func fetchOverlay(ctx context.Context, o overlay) (map[string]string, error) {
u := fmt.Sprintf("https://api.github.com/repos/%s/git/trees/%s?recursive=1", o.repo, o.branch)
hdr := http.Header{"Accept": {"application/vnd.github+json"}}
if githubToken != "" {
hdr.Set("Authorization", "Bearer "+githubToken)
}
var tree struct {
Tree []struct {
Path string `json:"path"`
Type string `json:"type"`
} `json:"tree"`
Truncated bool `json:"truncated"`
}
if err := httpGetJSON(ctx, u, hdr, &tree); err != nil {
return nil, err
}
pkgs := map[string]string{}
for _, e := range tree.Tree {
if e.Type != "blob" {
continue
}
atom, ver, ok := ebuildAtomVer(e.Path)
if !ok || !isPkgPath(atom) {
continue
}
if cur, seen := pkgs[atom]; !seen || verLess(cur, ver) {
pkgs[atom] = ver
}
}
if tree.Truncated {
log.Printf("pkg cache: %s tree truncated (%d entries)", o.repo, len(tree.Tree))
}
return pkgs, nil
}
func (pc *pkgCache) refresh(ctx context.Context) {
pc.mu.Lock()
fresh := len(pc.pkgs) > 0 && time.Since(pc.fetched) < pkgCacheTTL
// throttle retries after a failure: don't re-attempt within pkgRetryFloor, so a
// failing overlay can't make every /pkg re-hit the GitHub API (rate-limit storm)
throttled := time.Since(pc.lastAttempt) < pkgRetryFloor
if fresh || pc.refreshing || throttled {
pc.mu.Unlock()
return
}
pc.refreshing = true
pc.lastAttempt = time.Now()
pc.mu.Unlock()
defer func() { pc.mu.Lock(); pc.refreshing = false; pc.mu.Unlock() }()
allOK := true
for _, o := range overlays {
m, err := fetchOverlay(ctx, o)
if err != nil {
log.Printf("pkg cache: %v", err)
allOK = false
continue
}
pc.mu.Lock()
pc.pkgs[o.name] = m
pc.mu.Unlock()
log.Printf("pkg cache: %s -> %d packages", o.name, len(m))
}
// only mark fresh when every overlay succeeded, so a transient failure on one
// doesn't freeze partial results for the whole TTL
if allOK {
pc.mu.Lock()
pc.fetched = time.Now()
pc.mu.Unlock()
}
}
func pn(atom string) string { return atom[strings.IndexByte(atom, '/')+1:] }
func (pc *pkgCache) search(name string) map[string][]string {
low := strings.ToLower(name)
full := strings.Contains(low, "/") // query includes a category -> match the whole atom
res := map[string][]string{}
pc.mu.Lock()
defer pc.mu.Unlock()
for ov, atoms := range pc.pkgs {
var exact, sub []string
for atom := range atoms {
p := strings.ToLower(pn(atom))
if full {
p = strings.ToLower(atom)
}
if p == low {
exact = append(exact, atom)
} else if strings.Contains(p, low) {
sub = append(sub, atom)
}
}
sort.Strings(exact)
sort.Strings(sub)
hits := append(exact, sub...)
if len(hits) > maxHitsPerSource {
hits = hits[:maxHitsPerSource]
}
if len(hits) > 0 {
res[ov] = hits
}
}
return res
}
func (pc *pkgCache) overlayVer(ov, atom string) string {
pc.mu.Lock()
defer pc.mu.Unlock()
if m, ok := pc.pkgs[ov]; ok {
return m[atom]
}
return ""
}
// verInfo: amd64-stable version and the newest version of an official-tree package.
type verInfo struct {
stable, latest string
fetched time.Time
}
var verC = struct {
mu sync.Mutex
m map[string]verInfo
}{m: map[string]verInfo{}}
// pkgVersionJSON is one entry of packages.gentoo.org's package "versions" array.
type pkgVersionJSON struct {
Version string `json:"version"`
Keywords []string `json:"keywords"`
}
// pickStableLatest scans versions (newest-first, as packages.gentoo.org returns them)
// for the newest non-live version (latest) and the newest amd64-stable version (stable).
func pickStableLatest(versions []pkgVersionJSON) (stable, latest string) {
for _, vv := range versions {
if strings.HasPrefix(vv.Version, "9999") { // skip live ebuilds
continue
}
if latest == "" {
latest = vv.Version
}
if stable == "" {
for _, kw := range vv.Keywords {
if kw == "amd64" {
stable = vv.Version
break
}
}
}
if latest != "" && stable != "" {
break
}
}
return stable, latest
}
// pkgVersion returns (amd64-stable, newest) versions for a "cat/pkg" atom via packages.gentoo.org JSON.
func pkgVersion(ctx context.Context, atom string) (string, string) {
verC.mu.Lock()
if v, ok := verC.m[atom]; ok && time.Since(v.fetched) < verCacheTTL {
verC.mu.Unlock()
return v.stable, v.latest
}
verC.mu.Unlock()
var pj struct {
Versions []pkgVersionJSON `json:"versions"`
}
if err := httpGetJSON(ctx, "https://packages.gentoo.org/packages/"+atom+".json", nil, &pj); err != nil || len(pj.Versions) == 0 {
return "", ""
}
stable, latest := pickStableLatest(pj.Versions)
verC.mu.Lock()
if len(verC.m) >= pkgCacheMax {
verC.m = map[string]verInfo{}
}
verC.m[atom] = verInfo{stable: stable, latest: latest, fetched: time.Now()}
verC.mu.Unlock()
return stable, latest
}
var pkgHrefRe = regexp.MustCompile(`/packages/([a-z][a-z0-9-]+/[A-Za-z0-9][A-Za-z0-9+_.\-]*)`)
// searchMainTree queries packages.gentoo.org (official tree) and extracts matching atoms.
func searchMainTree(ctx context.Context, name string) []string {
// A "category/package" query is an exact atom — resolve it directly via the
// authoritative JSON (the search page doesn't match slashed queries well).
if strings.Contains(name, "/") && isPkgPath(strings.ToLower(name)) {
if s, l := pkgVersion(ctx, name); s != "" || l != "" {
return []string{name}
}
return nil
}
body, err := httpGetBody(ctx, "https://packages.gentoo.org/packages/search?q="+url.QueryEscape(name), 2<<20)
if err != nil {
log.Printf("main tree search: %v", err)
return nil
}
return rankSearchHits(body, name)
}
// rankSearchHits extracts package atoms from the search-results HTML and re-ranks them by relevance
// to the query (deduped, capped at maxHitsPerSource). Split from the fetch so a fixture of the page
// guards the href regex against a silent "0 hits" if packages.gentoo.org's markup drifts.
//
// Re-rank rationale: a package literally named the query, or whose CATEGORY contains it (sys-kernel/*
// for "kernel"), is more relevant than an incidental substring match (dev-ml/core_kernel). We do NOT
// drop non-matches — Gentoo strips version suffixes (fcitx5 → app-i18n/fcitx) — so the server's fuzzy
// hits stay (score 0) in page order.
func rankSearchHits(body []byte, name string) []string {
seen := map[string]bool{}
low := strings.ToLower(name)
type scored struct {
atom string
score int
}
var items []scored
for _, m := range pkgHrefRe.FindAllStringSubmatch(string(body), -1) {
atom := m[1]
if seen[atom] || !isPkgPath(atom) {
continue
}
seen[atom] = true
items = append(items, scored{atom, pkgRelevance(atom, low)})
}
sort.SliceStable(items, func(i, j int) bool { return items[i].score > items[j].score })
hits := make([]string, 0, len(items))
for _, it := range items {
hits = append(hits, it.atom)
}
if len(hits) > maxHitsPerSource {
hits = hits[:maxHitsPerSource]
}
return hits
}
// pkgRelevance scores how well an atom matches a bare query, to rank search results.
func pkgRelevance(atom, q string) int {
cat := ""
if i := strings.IndexByte(atom, '/'); i > 0 {
cat = strings.ToLower(atom[:i])
}
p := strings.ToLower(pn(atom))
switch {
case p == q:
return 100
case strings.Contains(cat, q):
return 50
case strings.HasPrefix(p, q):
return 30
case strings.Contains(p, q):
return 10
default:
return 0
}
}
func commandArg(text string) string {
// Split on the first run of whitespace so a tab/newline-separated argument (e.g. a
// pasted "/pkg\nvim") is handled, not just a single space.
fields := strings.Fields(text)
if len(fields) < 2 {
return ""
}
return strings.TrimSpace(strings.Join(fields[1:], " "))
}
// onPkg handles /pkg <name> — searches the official tree + the configured overlays, with versions.
func (v *Verifier) onPkg(ctx *th.Context, update telego.Update) error {
msg := update.Message
if msg == nil || !v.queryAllowed(ctx, msg) {
return nil
}
bot := ctx.Bot()
c := ctx.Context()
q := commandArg(msg.Text)
if q == "" {
v.replyLookupPlain(c, bot, msg.Chat.ID, msg.MessageID, "用法:/pkg <包名>,例如 /pkg vim,或粘贴链接 /pkg https://packages.gentoo.org/packages/app-editors/vim")
return nil
}
q = normalizeQuery(q)
hc, cancel := context.WithTimeout(c, 25*time.Second)
defer cancel()
pkgC.refresh(hc)
ovRes := pkgC.search(q)
mainRes := searchMainTree(hc, q)
// fetch official-tree versions concurrently
vm := map[string][2]string{}
if len(mainRes) > 0 {
var wg sync.WaitGroup
var vmu sync.Mutex
for _, a := range mainRes {
wg.Add(1)
go func(a string) {
defer wg.Done()
s, l := pkgVersion(hc, a)
vmu.Lock()
vm[a] = [2]string{s, l}
vmu.Unlock()
}(a)
}
wg.Wait()
}
plain := renderPkg(q, mainRes, vm, ovRes)
rich := ""
if v.isRichEnabled() {
rich = renderPkgRich(q, mainRes, vm, ovRes)
}
v.sendRichOrHTML(c, bot, msg.Chat.ID, msg.MessageID, rich, plain)
return nil
}
// renderPkg builds the plain-HTML /pkg result (regular sendMessage; \n line breaks work).
func renderPkg(q string, mainRes []string, vm map[string][2]string, ovRes map[string][]string) string {
esc := html.EscapeString
var b strings.Builder
fmt.Fprintf(&b, "🔎 <b>%s</b> 的搜索结果", esc(q))
found := false
if len(mainRes) > 0 {
found = true
b.WriteString("\n\n📦 <b>官方树 gentoo</b>")
for _, a := range mainRes {
ver := ""
if vm[a][0] != "" {
ver = " — " + esc(vm[a][0]) // amd64-stable: no symbol
} else if vm[a][1] != "" {
ver = " — ~" + esc(vm[a][1]) // testing only: ~arch
}
fmt.Fprintf(&b, "\n • <a href=\"%s\">%s</a>%s",
esc("https://packages.gentoo.org/packages/"+a), esc(a), ver)
}
}
for _, o := range overlays {
hits := ovRes[o.name]
if len(hits) == 0 {
continue
}
found = true
fmt.Fprintf(&b, "\n\n🧩 <b>%s</b>", esc(o.name))
for _, a := range hits {
ver := ""
if vv := pkgC.overlayVer(o.name, a); vv != "" {
ver = " — ~" + esc(vv) // overlay packages are testing (~arch)
}
fmt.Fprintf(&b, "\n • <a href=\"%s\">%s</a>%s",
esc(o.treeURL(a)), esc(a), ver)
}
}
if !found {
b.WriteString("\n\n没找到匹配的包,换个更短的关键词试试?")
} else {
b.WriteString("\n\n<i>~ 为测试版(~arch);无符号为 amd64 稳定版</i>")
}
return b.String()
}
// renderPkgRich builds the Bot API 10.1 rich /pkg: a heading + official-tree <ul>, and
// each overlay as a collapsed <details><ul>. Block tags only (rich ignores newlines).
func renderPkgRich(q string, mainRes []string, vm map[string][2]string, ovRes map[string][]string) string {
esc := html.EscapeString
var b strings.Builder
fmt.Fprintf(&b, "<h3>🔎 %s 的搜索结果</h3>", esc(q))
found := false
if len(mainRes) > 0 {
found = true
b.WriteString("<h4>📦 官方树 gentoo</h4><ul>")
for _, a := range mainRes {
ver := ""
if vm[a][0] != "" {
ver = " — " + esc(vm[a][0])
} else if vm[a][1] != "" {
ver = " — ~" + esc(vm[a][1])
}
fmt.Fprintf(&b, "<li><a href=\"%s\">%s</a>%s</li>",
esc("https://packages.gentoo.org/packages/"+a), esc(a), ver)
}
b.WriteString("</ul>")
}
for _, o := range overlays {
hits := ovRes[o.name]
if len(hits) == 0 {
continue
}
found = true
fmt.Fprintf(&b, "<details><summary>🧩 <b>%s</b>(%d)</summary><ul>", esc(o.name), len(hits))
for _, a := range hits {
ver := ""
if vv := pkgC.overlayVer(o.name, a); vv != "" {
ver = " — ~" + esc(vv)
}
fmt.Fprintf(&b, "<li><a href=\"%s\">%s</a>%s</li>",
esc(o.treeURL(a)), esc(a), ver)
}
b.WriteString("</ul></details>")
}
if !found {
b.WriteString("<p>没找到匹配的包,换个更短的关键词试试?</p>")
} else {
b.WriteString("<footer><i>~ 为测试版(~arch);无符号为 amd64 稳定版</i></footer>")
}
return b.String()
}