-
Notifications
You must be signed in to change notification settings - Fork 391
Expand file tree
/
Copy pathimpl.py
More file actions
88 lines (73 loc) · 3.18 KB
/
Copy pathimpl.py
File metadata and controls
88 lines (73 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# COPYRIGHT NOTICE
# This file is part of the "Universal Biomedical Skills" project.
# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>
# All Rights Reserved.
#
# This code is proprietary and confidential.
# Unauthorized copying of this file, via any medium is strictly prohibited.
#
# Provenance: Authenticated by MD BABU MIA
import argparse
import json
import sys
from typing import List, Dict, Any
class CRISPRPredictor:
"""
Simulates off-target prediction for CRISPR/Cas9.
"""
def __init__(self):
# Mock Genome Database: A small set of sequences that look somewhat like targets
# In a real app, this would query a Bowtie/BWA index or an API like Cas-OFFinder.
self.reference_genome = [
{"chrom": "chr1", "loc": 10500, "seq": "GAGTCCGAGCAGAAGAAGAA", "gene": "TARGET_GENE"}, # Perfect match
{"chrom": "chr2", "loc": 23040, "seq": "GAGTCCGAGCAGAAGAAGAT", "gene": "OFF_TARGET_1"}, # 1 mismatch
{"chrom": "chr5", "loc": 50012, "seq": "GAGTCCGAGCAGAAGTTGAA", "gene": "OFF_TARGET_2"}, # 2 mismatches
{"chrom": "chrX", "loc": 99320, "seq": "TTGTCCGAGCAGAAGAAGAA", "gene": "OFF_TARGET_3"}, # 2 mismatches (5' end)
{"chrom": "chr8", "loc": 11000, "seq": "AAAAAAAAAAAAAAAAAAAA", "gene": "CONTROL"}, # No match
]
def predict(self, sgRNA: str, pam: str = "NGG") -> Dict[str, Any]:
sgRNA = sgRNA.upper()
results = []
for site in self.reference_genome:
ref_seq = site["seq"]
mismatches = self._count_mismatches(sgRNA, ref_seq)
# Simple risk logic
risk = "NONE"
if mismatches == 0:
risk = "ON_TARGET"
elif mismatches <= 2:
risk = "HIGH"
elif mismatches <= 4:
risk = "MEDIUM"
else:
risk = "LOW"
if risk != "LOW":
results.append({
"locus": f"{site['chrom']}:{site['loc']}",
"sequence": ref_seq,
"gene": site["gene"],
"mismatches": mismatches,
"risk_level": risk
})
return {
"query_sequence": sgRNA,
"pam_constraint": pam,
"off_targets_found": len(results),
"predictions": results
}
def _count_mismatches(self, seq1: str, seq2: str) -> int:
if len(seq1) != len(seq2):
return 99 # Length mismatch
return sum(1 for a, b in zip(seq1, seq2) if a != b)
def main():
parser = argparse.ArgumentParser(description="CRISPR Off-Target Predictor")
parser.add_argument("--sequence", type=str, required=True, help="20nt sgRNA sequence")
parser.add_argument("--pam", type=str, default="NGG", help="PAM sequence (default: NGG)")
args = parser.parse_args()
predictor = CRISPRPredictor()
result = predictor.predict(args.sequence, args.pam)
print(json.dumps(result, indent=2))
if __name__ == "__main__":
from typing import Any # lazy import for typing in main
main()
__AUTHOR_SIGNATURE__ = "9a7f3c2e-MD-BABU-MIA-2026-MSSM-SECURE"