Skip to content

Commit 92915ec

Browse files
committed
Add trust signals to enrichment output
1 parent 5648d78 commit 92915ec

2 files changed

Lines changed: 49 additions & 8 deletions

File tree

skill/scripts/enrich_lead.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -445,10 +445,37 @@ def summarize(text, snippets, source_url=None):
445445
return None
446446

447447

448-
def compute_confidence(domain, emails, phones, summary, warnings, best_contact_meta=None):
448+
def build_trust_signals(domain, emails, phones, summary, warnings, best_contact_meta=None, site_verification=None):
449+
warning_penalty = round(min(0.3, 0.05 * len(warnings)), 2)
450+
signals = {
451+
"has_domain": bool(domain),
452+
"has_summary": bool(summary),
453+
"email_count": len(emails or []),
454+
"phone_count": len(phones or []),
455+
"warning_count": len(warnings or []),
456+
"warning_penalty": warning_penalty,
457+
"site_verified": bool(site_verification and site_verification.get("verified")),
458+
"site_verification_score": round((site_verification or {}).get("score", 0.0), 2),
459+
"best_contact": {
460+
"present": bool(best_contact_meta),
461+
"official": bool(best_contact_meta and best_contact_meta.get("official")),
462+
"strong": bool(best_contact_meta and best_contact_meta.get("strong")),
463+
"weak": bool(best_contact_meta and best_contact_meta.get("weak")),
464+
"tier": best_contact_meta.get("tier") if best_contact_meta else None,
465+
},
466+
}
467+
return signals
468+
469+
470+
def compute_confidence(domain, emails, phones, summary, warnings, best_contact_meta=None, site_verification=None):
471+
signals = build_trust_signals(domain, emails, phones, summary, warnings, best_contact_meta, site_verification)
449472
score = 0.05
450-
if domain:
451-
score += 0.3
473+
if signals["has_domain"]:
474+
score += 0.25
475+
if signals["site_verified"]:
476+
score += 0.1
477+
elif signals["site_verification_score"] >= 1.0:
478+
score += 0.05
452479
if emails:
453480
score += 0.15
454481
if phones:
@@ -466,8 +493,8 @@ def compute_confidence(domain, emails, phones, summary, warnings, best_contact_m
466493
score -= 0.1
467494
elif emails:
468495
score -= 0.1
469-
score -= min(0.3, 0.05 * len(warnings))
470-
return round(max(0.0, min(1.0, score)), 2)
496+
score -= signals["warning_penalty"]
497+
return round(max(0.0, min(1.0, score)), 2), signals
471498

472499

473500
def enrich(company, region=None, domain=None, query_mode="smart"):
@@ -553,9 +580,18 @@ def enrich(company, region=None, domain=None, query_mode="smart"):
553580
"social_links": social_values,
554581
"snippets": snippets[:5],
555582
"confidence": 0.0,
583+
"trust_signals": {},
556584
"warnings": dedupe(warnings),
557585
}
558-
result["confidence"] = compute_confidence(result["primary_domain"], result["emails"], result["phones"], result["summary"], result["warnings"], best_contact_meta)
586+
result["confidence"], result["trust_signals"] = compute_confidence(
587+
result["primary_domain"],
588+
result["emails"],
589+
result["phones"],
590+
result["summary"],
591+
result["warnings"],
592+
best_contact_meta,
593+
site_verification,
594+
)
559595
return result
560596

561597

tests/test_enrich_lead.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,11 @@ def test_summarize_removes_obvious_junk(self):
5555
def test_compute_confidence_penalizes_weak_external_contact(self):
5656
weak_meta = mod.classify_contact_email("press@gmail.com", "acme.com")
5757
strong_meta = mod.classify_contact_email("hello@acme.com", "acme.com")
58-
weak = mod.compute_confidence("acme.com", ["press@gmail.com"], [], "summary", ["Only weak outreach contacts found"], weak_meta)
59-
strong = mod.compute_confidence("acme.com", ["hello@acme.com"], [], "summary", [], strong_meta)
58+
weak, weak_signals = mod.compute_confidence("acme.com", ["press@gmail.com"], [], "summary", ["Only weak outreach contacts found"], weak_meta)
59+
strong, strong_signals = mod.compute_confidence("acme.com", ["hello@acme.com"], [], "summary", [], strong_meta)
6060
self.assertLess(weak, strong)
61+
self.assertTrue(weak_signals["best_contact"]["weak"])
62+
self.assertTrue(strong_signals["best_contact"]["official"])
6163

6264
def test_enrich_warns_when_only_weak_contacts_exist(self):
6365
original_build_queries = mod.build_queries
@@ -96,6 +98,7 @@ def fake_parse_page(url, base_domain):
9698
self.assertEqual(result["best_contact_source"]["source_url"], "https://acme.com")
9799
self.assertIn("Only weak outreach contacts found", result["warnings"])
98100
self.assertLess(result["confidence"], 0.6)
101+
self.assertTrue(result["trust_signals"]["best_contact"]["weak"])
99102

100103
def test_summarize_uses_longest_snippet_fallback(self):
101104
out = mod.summarize(None, ["short", "this is a much longer snippet about a company and what it does"])
@@ -194,6 +197,8 @@ def fake_parse_page(url, base_domain):
194197
self.assertEqual(result["phone_sources"]["+49 30 123456"]["source_url"], "https://acme.com")
195198
self.assertEqual(result["summary_source"]["source_url"], "https://acme.com")
196199
self.assertTrue(result["site_verification"]["verified"])
200+
self.assertTrue(result["trust_signals"]["site_verified"])
201+
self.assertEqual(result["trust_signals"]["email_count"], 2)
197202

198203

199204
if __name__ == "__main__":

0 commit comments

Comments
 (0)