Skip to content

Commit beb488d

Browse files
viki shiviki shi
authored andcommitted
update violations
1 parent 8409ff3 commit beb488d

4 files changed

Lines changed: 1695 additions & 825 deletions

File tree

pipeline/export/to_frontend.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,11 @@ def build_case(row: Dict[str, Any]) -> Dict[str, Any]:
191191
outcome = data.get("outcome") or row.get("outcome") or ""
192192

193193
sector = norm_sector(data.get("sector") or row.get("sector"))
194-
violations = map_violation_type(violation_type, data_types)
194+
violations = map_violation_type(
195+
violation_type, data_types,
196+
legal_bases=data.get("legal_bases_violated") or [],
197+
summary=data.get("summary") or data.get("what_they_did") or "",
198+
)
195199
impacted_individuals = format_impacted(individuals_int) if (individuals_int and individuals_int > 0) else "Unknown"
196200
severity = severity_from_impact_and_data(individuals_int, data_types)
197201

pipeline/validation/normalizer.py

Lines changed: 119 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,15 @@
3636
]
3737

3838
VIOLATION_TYPES = [
39-
"Misrepresentation of practices",
40-
"Failure to disclose practices",
41-
"Health breach notification failure",
42-
"Excessive retention of childrens data",
43-
"Failure of parent control over childrens data",
44-
"Failure to obtain parental consent",
39+
"Unauthorized Data Collection",
40+
"Data Breach & Negligence",
41+
"Unauthorized Disclosure/Selling",
42+
"Failure to Honor Consumer Rights",
43+
"Misleading Privacy Policies",
44+
"Invasion of Seclusion",
45+
"False Light/Misappropriation",
46+
"Improper Data Disposal",
47+
"Illegal Monitoring/Surveillance",
4548
]
4649

4750

@@ -106,31 +109,122 @@ def _involves_health_data(data_types: str) -> bool:
106109
return any(k in d for k in _HEALTH_KEYWORDS)
107110

108111

109-
def map_violation_type(solove_type: str | None, data_types: str = "") -> List[str]:
110-
"""Map Solove taxonomy string to frontend ViolationType[] values."""
112+
def map_violation_type(solove_type: str | None, data_types: str = "",
113+
legal_bases: list | None = None,
114+
summary: str = "") -> List[str]:
115+
"""Map Solove taxonomy + case context to frontend ViolationType[] values.
116+
117+
9 violation types:
118+
- Unauthorized Data Collection: collecting without consent, pre-ticked boxes, ignoring opt-out
119+
- Data Breach & Negligence: poor security leading to unauthorized access
120+
- Unauthorized Disclosure/Selling: sharing/selling data to third parties without authorization
121+
- Failure to Honor Consumer Rights: not fulfilling access/delete/correct requests (CCPA, GDPR rights)
122+
- Misleading Privacy Policies: deceptive/inaccurate privacy representations
123+
- Invasion of Seclusion: intruding into private space/affairs
124+
- False Light/Misappropriation: using name/likeness without consent, publishing private facts
125+
- Improper Data Disposal: failing to securely destroy records
126+
- Illegal Monitoring/Surveillance: tracking, cameras, web session recording without consent
127+
"""
111128
if not solove_type:
112-
return []
129+
return ["Unauthorized Data Collection"]
113130
t = solove_type.lower()
131+
dt = (data_types or "").lower()
132+
bases_str = " ".join(legal_bases or []).lower()
133+
s = (summary or "").lower()
114134

115135
result: List[str] = []
116136

117-
if any(k in t for k in [
118-
"disclosure", "exposure", "accessibility",
119-
"breach of confidentiality", "secondary use",
120-
]):
121-
result.append("Failure to disclose practices")
122-
if any(k in t for k in ["insecurity", "security"]) and _involves_health_data(data_types):
123-
result.append("Health breach notification failure")
124-
if any(k in t for k in ["children", "child", "minor", "coppa"]):
125-
result.append("Failure to obtain parental consent")
126-
if any(k in t for k in [
127-
"surveillance", "interrogation", "intrusion",
128-
"decisional interference", "aggregation", "identification",
129-
]):
130-
result.append("Misrepresentation of practices")
131-
137+
# ── Data Breach & Negligence ──────────────────────────────────────────
138+
if (any(k in t for k in ["insecurity"]) or
139+
any(k in s for k in [
140+
"breach", "hack", "unauthorized access", "data breach", "leaked",
141+
"exposed", "security", "inadequate safeguard", "failed to protect",
142+
"compromised", "vulnerability",
143+
])):
144+
result.append("Data Breach & Negligence")
145+
146+
# ── Unauthorized Disclosure/Selling ───────────────────────────────────
147+
if (any(k in t for k in [
148+
"breach of confidentiality", "disclosure", "increased accessibility",
149+
]) or
150+
any(k in s for k in [
151+
"sold", "selling", "shared", "disclosed", "transferred",
152+
"third part", "without authorization", "without consent",
153+
"data broker", "sold data",
154+
])):
155+
result.append("Unauthorized Disclosure/Selling")
156+
157+
# ── Misleading Privacy Policies ───────────────────────────────────────
158+
if (any(k in t for k in ["distortion"]) or
159+
any(k in s for k in [
160+
"misrepresent", "deceptive", "false", "misleading", "claimed",
161+
"privacy policy", "represented that", "unfair", "fraudulent",
162+
"inaccurate", "failed to disclose",
163+
])):
164+
result.append("Misleading Privacy Policies")
165+
166+
# ── Unauthorized Data Collection ──────────────────────────────────────
167+
if (any(k in t for k in [
168+
"surveillance", "interrogation", "aggregation",
169+
"secondary use", "exclusion",
170+
]) or
171+
any(k in s for k in [
172+
"without consent", "without notice", "collected", "pre-ticked",
173+
"opt-out", "opted out", "tracking", "gathered", "harvested",
174+
"scraped", "profiling",
175+
])):
176+
result.append("Unauthorized Data Collection")
177+
178+
# ── Illegal Monitoring/Surveillance ───────────────────────────────────
179+
if (any(k in t for k in ["surveillance", "intrusion"]) or
180+
any(k in s for k in [
181+
"monitor", "surveillance", "tracking", "recorded", "session recording",
182+
"chat box", "wiretap", "camera", "gps", "geolocation", "spyware",
183+
"keystroke", "screen capture",
184+
])):
185+
result.append("Illegal Monitoring/Surveillance")
186+
187+
# ── Failure to Honor Consumer Rights ──────────────────────────────────
188+
if (any(k in t for k in ["exclusion"]) or
189+
any(k in s for k in [
190+
"access request", "deletion request", "right to delete",
191+
"right to access", "right to correct", "opt-out request",
192+
"failed to respond", "consumer rights", "data subject request",
193+
"dsar", "right to erasure", "right to rectification",
194+
]) or
195+
any(k in bases_str for k in [
196+
"ccpa", "cpra", "art. 15", "art. 17", "art. 16",
197+
"right of access", "right to erasure",
198+
])):
199+
result.append("Failure to Honor Consumer Rights")
200+
201+
# ── Invasion of Seclusion ─────────────────────────────────────────────
202+
if (any(k in t for k in ["intrusion", "decisional interference"]) or
203+
any(k in s for k in [
204+
"intrusion", "seclusion", "private space", "private affairs",
205+
"unwanted contact", "stalking", "harassment",
206+
])):
207+
result.append("Invasion of Seclusion")
208+
209+
# ── False Light/Misappropriation ──────────────────────────────────────
210+
if (any(k in t for k in ["appropriation", "exposure"]) or
211+
any(k in s for k in [
212+
"likeness", "false light", "misappropriation", "name or image",
213+
"identity theft", "impersonat", "deepfake", "private facts",
214+
])):
215+
result.append("False Light/Misappropriation")
216+
217+
# ── Improper Data Disposal ────────────────────────────────────────────
218+
if any(k in s for k in [
219+
"disposal", "dispose", "destroy", "shredding", "retention",
220+
"retained", "kept longer", "failed to delete", "improper disposal",
221+
"data retention",
222+
]):
223+
result.append("Improper Data Disposal")
224+
225+
# ── Default fallback ──────────────────────────────────────────────────
132226
if not result:
133-
result.append("Misrepresentation of practices")
227+
result.append("Unauthorized Data Collection")
134228

135229
return [v for v in dict.fromkeys(result) if v in VIOLATION_TYPES]
136230

src/data/cases.ts

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@ export function parseCompanyWorth(worth: string): number {
1515
}
1616

1717
export type ViolationType =
18-
| "Misrepresentation of practices"
19-
| "Failure to disclose practices"
20-
| "Health breach notification failure"
21-
| "Excessive retention of childrens data"
22-
| "Failure of parent control over childrens data"
23-
| "Failure to obtain parental consent";
18+
| "Unauthorized Data Collection"
19+
| "Data Breach & Negligence"
20+
| "Unauthorized Disclosure/Selling"
21+
| "Failure to Honor Consumer Rights"
22+
| "Misleading Privacy Policies"
23+
| "Invasion of Seclusion"
24+
| "False Light/Misappropriation"
25+
| "Improper Data Disposal"
26+
| "Illegal Monitoring/Surveillance";
2427

2528
export type Jurisdiction = "US FTC" | "California DOJ" | "UK ICO" | "Singapore PDPC" | "EU GDPR" | "EU EDPB" | "Australia OAIC";
2629

@@ -88,12 +91,15 @@ export const JURISDICTIONS: Jurisdiction[] = [
8891
];
8992

9093
export const VIOLATION_TYPES: ViolationType[] = [
91-
"Misrepresentation of practices",
92-
"Failure to disclose practices",
93-
"Health breach notification failure",
94-
"Excessive retention of childrens data",
95-
"Failure of parent control over childrens data",
96-
"Failure to obtain parental consent",
94+
"Unauthorized Data Collection",
95+
"Data Breach & Negligence",
96+
"Unauthorized Disclosure/Selling",
97+
"Failure to Honor Consumer Rights",
98+
"Misleading Privacy Policies",
99+
"Invasion of Seclusion",
100+
"False Light/Misappropriation",
101+
"Improper Data Disposal",
102+
"Illegal Monitoring/Surveillance",
97103
];
98104

99105
export const SECTORS: Sector[] = [

0 commit comments

Comments
 (0)