Skip to content

Commit cba8e33

Browse files
committed
Add multi-sample VCF support with simple build autodetect, manual select
1 parent 8669436 commit cba8e33

7 files changed

Lines changed: 175 additions & 37 deletions

File tree

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ CrossMap (`pip3 install CrossMap`), conversion chain file (`crossmap/GRCh38_to_N
3434
the ISOGG spreadsheet in csv format (`'SNP Index - Human.csv'`). Outputs a `haploy_map.txt`
3535
file which is used by haploy_find.py. See haploy.py for details.
3636

37+
## haploy_anno_import.py
38+
39+
This example script is used to import your own annotations that can be attached to the reported tree nodes. As an example the script will
40+
import YFull person IDs, and also some selected FTDNA project files. Open project chart tables in a browser, select page
41+
size so that everything fits in one page, and edit the corresponding lines at the end of the script. Snipsa will load any
42+
files starting with haploy_annodb. An example file is included.
43+
3744
## haplomt_find.py
3845

3946
This small tool reads a raw SNP data file and lists MT chromosome haplogroup information. You must first initialize the mutation

haplomt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ def print_data(do_print=True):
6464
return rep
6565

6666

67-
def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force=''):
67+
def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', vcf_sample='', force_build=0):
6868
rep=''
69-
snpset, meta = snpload.load(fname, ['MT'])
69+
snpset, meta = snpload.load(fname, ['MT'], vcf_sample=vcf_sample, force_build=force_build)
7070
if 'MT' not in snpset:
7171
return "No MT data found\n"
7272

haplomt_find.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@
1111
force=''
1212
filt=''
1313
all=False
14+
force_build=0
15+
vcf_sample=''
1416

1517
parser = argparse.ArgumentParser()
1618
parser.add_argument('-s', '--single', help='Analyse a path for single group')
1719
parser.add_argument('-a', '--all', action='store_true', help='Show listing of all found mutations')
1820
parser.add_argument('-n', '--num', help='Show num best matches')
21+
parser.add_argument('-v', '--vcf-sample', help='VCF sample select (regexp)')
22+
parser.add_argument('-b', '--build', help='Force build36/37&38 input')
1923
parser.add_argument('file', nargs='+')
2024

2125
args = parser.parse_args()
@@ -27,6 +31,10 @@
2731
n_multi = int(args.num)
2832
if args.all:
2933
all=True
34+
if args.vcf_sample:
35+
vcf_sample = args.vcf_sample
36+
if args.build:
37+
force_build = int(args.build)
3038

3139
if len(args.file) < 1:
3240
print(sys.argv[0]+" <filename>")
@@ -37,7 +45,7 @@
3745
print("DB loaded!")
3846

3947
print("Loading chr data...")
40-
rep = haplomt.report(args.file[0], n_single, do_all=all, filt=filt, force=force)
48+
rep = haplomt.report(args.file[0], n_single, do_all=all, filt=filt, force=force, vcf_sample=vcf_sample, force_build=force_build)
4149
print(rep)
4250

4351
else:
@@ -48,7 +56,7 @@
4856
lookfor = args.file[0].split(',')
4957
for fname in args.file[1:]:
5058
#try:
51-
snpset, meta = snpload.load(fname, ['MT'])
59+
snpset, meta = snpload.load(fname, ['MT'], vcf_sample=vcf_sample, force_build=force_build)
5260

5361
if 'MT' not in snpset:
5462
print('%s: no MT data'%fname)

haploy.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ def path_str(ut, n):
133133

134134
return rep
135135

136-
def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', min_match_level=0):
136+
def report(fname, n, do_uptree=True, do_extra=True, do_all=False, filt='', force='', min_match_level=0, vcf_sample='', force_build=0):
137137
rep=''
138-
snpset, meta = snpload.load(fname, ['Y'])
138+
snpset, meta = snpload.load(fname, ['Y'], vcf_sample=vcf_sample, force_build=force_build)
139139
if 'Y' not in snpset:
140140
return "No Y data found\n"
141141

@@ -714,7 +714,7 @@ def show_db2():
714714
print(m)
715715

716716
blacklist_etc='M8990' #str etc
717-
blacklist_yb='Z1908 Y13952 Z6171 PF1401 M11813 YSC0001289 BY2821 M11836 M3745 M11838 M11843'
717+
blacklist_yb='M3745'
718718
blacklist_yf='Z8834 Z7451 YP1757 YP2129 YP1822 YP1795 YP2228 YP1809 YP2229 YP1948 YP2226 YP1827 L508'
719719
blacklist_yf+=' Y125394 Y125393 Y125392 Y125391 Y125390 Y125389 Y125396 Y125397 Y125408 [report-spacer] V1896 PAGE65.1 Y2363 PF3515 PF3512 PF3507 PF3596 Z6023 M547 A3073 Z1716 PF5827 PF1534 PF6011 PF2634 PF2635'
720720
blacklist_rootambi='BY229589 Z2533 DFZ77 M11801 FT227770 Y3946 Y125419 FT227767 Y1578 CTS12490 FT227774 YP1740 Y125394 Y125393 Y125392 Y125391 Y125390' #TODO

haploy_find.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@
1414
min_match_level=0
1515
min_tree_load_level=0
1616
new_yfind=1
17+
force_build=0
18+
vcf_sample=''
1719

1820
parser = argparse.ArgumentParser()
1921
parser.add_argument('-s', '--single', help='Analyse a path for single group')
2022
parser.add_argument('-a', '--all', action='store_true', help='Show listing of all found mutations')
2123
parser.add_argument('-n', '--num', help='Show num best matches')
2224
parser.add_argument('-q', '--quick', help='Quick mode')
25+
parser.add_argument('-v', '--vcf-sample', help='VCF sample select (regexp)')
26+
parser.add_argument('-b', '--build', help='Force build36/37&38 input')
2327
parser.add_argument('file', nargs='+')
2428

2529
args = parser.parse_args()
@@ -34,6 +38,10 @@
3438
if args.quick:
3539
min_match_level=int(args.quick)
3640
min_tree_load_level=int(args.quick)
41+
if args.vcf_sample:
42+
vcf_sample = args.vcf_sample
43+
if args.build:
44+
force_build = int(args.build)
3745

3846
if len(args.file) < 2:
3947

@@ -42,7 +50,7 @@
4250
haploy.load_db2j(min_tree_load_level=min_tree_load_level)
4351
print("DB loaded!")
4452
haploy.load_annotations('haploy_annodb_*.txt')
45-
rep = haploy.report(args.file[0], n_single, do_all=all, filt=filt, force=force, min_match_level=min_match_level)
53+
rep = haploy.report(args.file[0], n_single, do_all=all, filt=filt, force=force, min_match_level=min_match_level, vcf_sample=vcf_sample, force_build=force_build)
4654
print(rep)
4755
else:
4856
# keep old one available
@@ -69,7 +77,8 @@
6977
haploy.load_db2j(min_tree_load_level=min_tree_load_level)
7078
print("DB loaded!")
7179
for fname in args.file[1:]:
72-
snpset, meta = snpload.load(fname, ['Y'])
80+
#TODO: loop over vcf samples
81+
snpset, meta = snpload.load(fname, ['Y'], vcf_sample=vcf_sample, force_build=force_build)
7382

7483
if 'Y' not in snpset:
7584
print('%s: no Y data'%fname)

snipsa-gui.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,16 @@ def handle_findmt():
4444
do_uptree = report_snps.get()
4545
do_all = report_all.get()
4646
force = pathvar.get()
47+
vcf_sample = vcfvar.get()
48+
buildstr = buildvar.get()
49+
force_build=0
50+
if buildstr == 'Build36': force_build = 36
51+
if buildstr == 'Build37': force_build = 37
52+
if buildstr == 'Build38': force_build = 38
4753
fname = fnamevar.get()
4854
print("Reporting file: "+fname)
4955
num = int(numbox.get())
50-
rep = haplomt.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force)
56+
rep = haplomt.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force, vcf_sample=vcf_sample, force_build=force_build)
5157
print("Done")
5258
scr.config(state=tk.NORMAL)
5359
scr.delete('1.0', tk.END)
@@ -64,10 +70,16 @@ def handle_findy():
6470
do_uptree = report_snps.get()
6571
do_all = report_all.get()
6672
force = pathvar.get()
73+
vcf_sample = vcfvar.get()
74+
buildstr = buildvar.get()
75+
force_build=0
76+
if buildstr == 'Build36': force_build = 36
77+
if buildstr == 'Build37': force_build = 37
78+
if buildstr == 'Build38': force_build = 38
6779
fname = fnamevar.get()
6880
print("Reporting file: "+fname)
6981
num = int(numbox.get())
70-
rep = haploy.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force)
82+
rep = haploy.report(fname, num, do_uptree=do_uptree, do_extra=do_uptree, do_all=do_all, filt=force, force=force, vcf_sample=vcf_sample, force_build=force_build)
7183
print("Done")
7284
scr.config(state=tk.NORMAL)
7385
scr.delete('1.0', tk.END)
@@ -86,12 +98,25 @@ def handle_findy():
8698
# File
8799
fframe=tk.Frame(hdrframe)
88100
fframe.pack(fill='both')
101+
if bam_support:
102+
cbutton3 = tk.Button(fframe, text="Import BAM", command=handle_bam_select)
103+
cbutton3.pack(side=tk.LEFT)
89104
button = tk.Button(fframe, text="Choose file", command=handle_file_select)
90105
button.pack(side=tk.LEFT)
91106
fnamevar = tk.StringVar()
92107
fnamevar.set("No file selected")
93108
fnamelabel=tk.Label(fframe, textvariable=fnamevar, anchor='w')
94-
fnamelabel.pack(side=tk.RIGHT, fill='both', expand=True)
109+
fnamelabel.pack(side=tk.LEFT, fill='both', expand=True)
110+
vcflabel=tk.Label(fframe, text='VCF sample:', anchor='w')
111+
vcflabel.pack(side=tk.LEFT, fill='both')
112+
vcfvar = tk.StringVar()
113+
vcfbox = tk.Entry(fframe, textvariable=vcfvar,width=10)
114+
vcfbox.pack(side=tk.LEFT)
115+
buildvar = tk.StringVar()
116+
buildchoices = {'Auto', 'Build36', 'Build37', 'Build38'}
117+
buildvar.set('Auto')
118+
builddropdown = tk.OptionMenu(fframe, buildvar, *buildchoices)
119+
builddropdown.pack(side=tk.LEFT)
95120

96121
# Settings
97122
sframe=tk.Frame(hdrframe)
@@ -123,9 +148,6 @@ def handle_findy():
123148
cbutton1.pack(side=tk.LEFT, fill='x', expand=True)
124149
cbutton2 = tk.Button(cframe, text="Find Y", command=handle_findy)
125150
cbutton2.pack(side=tk.LEFT, fill='x', expand=True)
126-
if bam_support:
127-
cbutton3 = tk.Button(cframe, text="Import BAM", command=handle_bam_select)
128-
cbutton3.pack(side=tk.LEFT, fill='x', expand=True)
129151

130152
# Result area
131153
scr=scrolledtext.ScrolledText(window, wrap=tk.WORD)

0 commit comments

Comments
 (0)