-
Notifications
You must be signed in to change notification settings - Fork 33
Expand file tree
/
Copy pathNAM_plot_Supp.R_linux.sh
More file actions
76 lines (49 loc) · 2.17 KB
/
Copy pathNAM_plot_Supp.R_linux.sh
File metadata and controls
76 lines (49 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
###PREP annotation files for supplement figure 8
## plotting was done in local R, so the files in original state are too big
## for this script, all gene annotation files (all ending with 1.gff)
## and all TE annotation files (all ending with .gff3 ) are in the same directory
module load BEDTools/2.26.0-GCCcore-8.3.0
# TE annotations
ls *.gff3 > fils
while read f
do
#line to modify col 1
awk '{print $1}' $f | sed 's/.*_//' > new_col1
#line to modify type col
awk '{print $9}' $f | q | sed 's/;Class.*//' > new_col2
awk '{print $9}' $f | sed 's/.*Classification=//' | sed 's/;Seq.*//' > new_col3
awk '{ print $4"\t"$5}' $f > mid
paste new_col1 mid new_col2 new_col3 > check
#only the chr line
sed -n '/^chr/p' check> check_chr.bed
#get the line name to subset the arrays relavent, then intersect with the annotations
tar=$(sed -n '7 p' $f | sed 's/_.*//')
if [[ "$tar" == "Il14H" ]]; then tar="IL14H" ; fi
if [[ "$tar" == "Ms71" ]]; then tar="MS71" ; fi
sed -n "/^$tar/p" NAM_array_coords.tsv > sub
awk '{ print $2"\t"$4"\t"$5}' sub | sed -n '/^chr/p' > sub.bed
bedtools intersect -a sub.bed -b check_chr.bed -wb > reduced_check_chr.bed
#final file for each line
awk '{ print $1"\t"$2"\t"$3"\t"$7"_"$8}' reduced_check_chr.bed > mod_$f
done < fils
# gene annotations
ls zea*1.gff > fils2
#convert first column to all upper for easier filtering
awk '$1 = toupper($1)' NAM_array_coords.tsv > ed_NAM_array_coords.tsv
#TE files for matching
ls mod*.gff3 > match_TE
awk '$1 = toupper($1)' match_TE > match_TE_col2
paste match_TE match_TE_col2 > new_match_TE
while read f
do
awk -F '\t' '$3 == "gene" { print }' $f | awk '{ print $1"\t"$4"\t"$5"\t"$3}' | sed -n '/^chr/p' > gene.bed
tar=$(echo $f | sed 's/_core.*//' | sed 's/.*mays//')
tarup=${tar^^}
sed -n "/^$tarup/p" ed_NAM_array_coords.tsv | awk '{print $2"\t"$4"\t"$5}'| sed -n '/^chr/p' > sub.bed
bedtools intersect -a sub.bed -b gene.bed -wb > reduced_gene.bed
awk '{ print $1"\t"$2"\t"$3"\t"$7}' reduced_gene.bed > mod_$f
#now match to the TE file
match=$(awk "/$tarup/" new_match_TE | awk '{print $1}')
cat mod_$f $match | bedtools sort -i | uniq > all_$tarup.bed
done < fils2
########