Skip to content

Commit c5370bb

Browse files
author
your.name
committed
Add scripts.
1 parent 2318a96 commit c5370bb

32 files changed

Lines changed: 19488 additions & 1 deletion

.DS_Store

6 KB
Binary file not shown.

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
cat("===========================================================================================
2+
META-ANALYSIS ATHERO-EXPRESS METHYLATION STUDIES 450K 1 & 2
3+
--- correction of eCigarettes variable ---
4+
5+
Version: v1.0
6+
7+
Last update: 2018-06-27
8+
Written by: Sander W. van der Laan (s.w.vanderlaan-2@umcutrecht.nl);
9+
Marten A. Siemelink
10+
11+
Description: Script to handling eCigarettes and number of pack-years.
12+
13+
Requirements: R version 3.4.1 (2017-06-30) -- 'Single Candle', Linux CentOS7, Mac OS X El Capitan+
14+
15+
===========================================================================================")
16+
cat("\n===========================================================================================")
17+
cat("CLEAR THE BOARD")
18+
rm(list = ls())
19+
20+
cat("\n===========================================================================================")
21+
cat("GENERAL R SETUP")
22+
### FUNCTION TO INSTALL PACKAGES
23+
### This function will automatically check in both CRAN and Bioconductor. This is
24+
### a function found by Sander W. van der Laan online from @Samir:
25+
### http://stackoverflow.com/questions/4090169/elegant-way-to-check-for-missing-packages-and-install-them
26+
###
27+
cat("\n* Creating funxtion to install and load packages...")
28+
install.packages.auto <- function(x) {
29+
x <- as.character(substitute(x))
30+
if (isTRUE(x %in% .packages(all.available = TRUE))) {
31+
eval(parse(text = sprintf("require(\"%s\")", x)))
32+
} else {
33+
# Update installed packages - this may mean a full upgrade of R, which in turn
34+
# may not be warrented.
35+
#update.packages(ask = FALSE)
36+
eval(parse(text = sprintf("install.packages(\"%s\", dependencies = TRUE, repos = \"https://cloud.r-project.org/\")", x)))
37+
}
38+
if (isTRUE(x %in% .packages(all.available = TRUE))) {
39+
eval(parse(text = sprintf("require(\"%s\")", x)))
40+
} else {
41+
source("http://bioconductor.org/biocLite.R")
42+
# Update installed packages - this may mean a full upgrade of R, which in turn
43+
# may not be warrented.
44+
#biocLite(character(), ask = FALSE)
45+
eval(parse(text = sprintf("biocLite(\"%s\")", x)))
46+
eval(parse(text = sprintf("require(\"%s\")", x)))
47+
}
48+
}
49+
# In this case I'm keeping track of the various packages, as versions and
50+
# actual loading of the libraries gave issues before.
51+
cat("\n* General packages...\n")
52+
# for survival analyses
53+
install.packages.auto("survival")
54+
install.packages.auto("survminer")
55+
# for general statistics
56+
install.packages.auto("Hmisc")
57+
install.packages.auto("openxlsx")
58+
install.packages.auto("devtools")
59+
install.packages.auto("dplyr")
60+
install.packages.auto("data.table")
61+
install.packages.auto("tableone")
62+
install.packages.auto("haven")
63+
# for methylation/rna data
64+
install.packages.auto("RMySQL") # "RMySQL" will be phased out: https://github.com/r-dbi/RMySQL,
65+
# will be replaced by RMariaDB (https://github.com/r-dbi/RMariaDB)
66+
install.packages.auto("GenomicFeatures")
67+
install.packages.auto("bumphunter")
68+
install.packages.auto("minfi")
69+
install.packages.auto("SummarizedExperiment")
70+
install.packages.auto("IlluminaHumanMethylation450kmanifest")
71+
install.packages.auto("IlluminaHumanMethylation450kanno.ilmn12.hg19")
72+
install.packages.auto("FDb.InfiniumMethylation.hg19")
73+
install.packages.auto("TxDb.Hsapiens.UCSC.hg19.knownGene")
74+
install.packages.auto("org.Hs.eg.db")
75+
install.packages.auto("AnnotationDbi")
76+
# for plotting
77+
install.packages.auto("pheatmap")
78+
install.packages.auto("qqman")
79+
install.packages.auto("forestplot")
80+
# for meta-analysis
81+
install.packages.auto("meta")
82+
install.packages.auto("bacon")
83+
84+
# The actual DNAmArray package
85+
cat("\n* DNAmArray package...\n")
86+
# Also refer to:
87+
# - https://molepi.github.io/DNAmArray_workflow/index.html
88+
# - https://github.com/molepi/DNAmArray
89+
# - https://github.com/bbmri-nl/BBMRIomics
90+
library(devtools)
91+
install_github("molepi/DNAmArray", force = FALSE)
92+
library(DNAmArray)
93+
install_github("molepi/omicsPrint", ref = "R3.4", force = FALSE)
94+
library(omicsPrint)
95+
install_github("bbmri-nl/BBMRIomics", subdir = "BBMRIomics", force = FALSE)
96+
library(BBMRIomics)
97+
98+
### Create datestamp
99+
Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
100+
Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")
101+
102+
### UtrechtSciencePark Colours Scheme
103+
###
104+
### Website to convert HEX to RGB: http://hex.colorrrs.com.
105+
### For some functions you should divide these numbers by 255.
106+
###
107+
### No. Color HEX RGB CMYK CHR MAF/INFO
108+
### --------------------------------------------------------------------------------------------------------------------
109+
### 1 yellow #FBB820 (251,184,32) (0,26.69,87.25,1.57) => 1 or 1.0 > INFO
110+
### 2 gold #F59D10 (245,157,16) (0,35.92,93.47,3.92) => 2
111+
### 3 salmon #E55738 (229,87,56) (0,62.01,75.55,10.2) => 3 or 0.05 < MAF < 0.2 or 0.4 < INFO < 0.6
112+
### 4 darkpink #DB003F ((219,0,63) (0,100,71.23,14.12) => 4
113+
### 5 lightpink #E35493 (227,84,147) (0,63,35.24,10.98) => 5 or 0.8 < INFO < 1.0
114+
### 6 pink #D5267B (213,38,123) (0,82.16,42.25,16.47) => 6
115+
### 7 hardpink #CC0071 (204,0,113) (0,0,0,0) => 7
116+
### 8 lightpurple #A8448A (168,68,138) (0,0,0,0) => 8
117+
### 9 purple #9A3480 (154,52,128) (0,0,0,0) => 9
118+
### 10 lavendel #8D5B9A (141,91,154) (0,0,0,0) => 10
119+
### 11 bluepurple #705296 (112,82,150) (0,0,0,0) => 11
120+
### 12 purpleblue #686AA9 (104,106,169) (0,0,0,0) => 12
121+
### 13 lightpurpleblue #6173AD (97,115,173/101,120,180) (0,0,0,0) => 13
122+
### 14 seablue #4C81BF (76,129,191) (0,0,0,0) => 14
123+
### 15 skyblue #2F8BC9 (47,139,201) (0,0,0,0) => 15
124+
### 16 azurblue #1290D9 (18,144,217) (0,0,0,0) => 16 or 0.01 < MAF < 0.05 or 0.2 < INFO < 0.4
125+
### 17 lightazurblue #1396D8 (19,150,216) (0,0,0,0) => 17
126+
### 18 greenblue #15A6C1 (21,166,193) (0,0,0,0) => 18
127+
### 19 seaweedgreen #5EB17F (94,177,127) (0,0,0,0) => 19
128+
### 20 yellowgreen #86B833 (134,184,51) (0,0,0,0) => 20
129+
### 21 lightmossgreen #C5D220 (197,210,32) (0,0,0,0) => 21
130+
### 22 mossgreen #9FC228 (159,194,40) (0,0,0,0) => 22 or MAF > 0.20 or 0.6 < INFO < 0.8
131+
### 23 lightgreen #78B113 (120,177,19) (0,0,0,0) => 23/X
132+
### 24 green #49A01D (73,160,29) (0,0,0,0) => 24/Y
133+
### 25 grey #595A5C (89,90,92) (0,0,0,0) => 25/XY or MAF < 0.01 or 0.0 < INFO < 0.2
134+
### 26 lightgrey #A2A3A4 (162,163,164) (0,0,0,0) => 26/MT
135+
###
136+
### ADDITIONAL COLORS
137+
### 27 midgrey #D7D8D7
138+
### 28 very lightgrey #ECECEC
139+
### 29 white #FFFFFF
140+
### 30 black #000000
141+
### --------------------------------------------------------------------------------------------------------------------
142+
143+
uithof_color = c("#FBB820","#F59D10","#E55738","#DB003F","#E35493","#D5267B",
144+
"#CC0071","#A8448A","#9A3480","#8D5B9A","#705296","#686AA9",
145+
"#6173AD","#4C81BF","#2F8BC9","#1290D9","#1396D8","#15A6C1",
146+
"#5EB17F","#86B833","#C5D220","#9FC228","#78B113","#49A01D",
147+
"#595A5C","#A2A3A4", "#D7D8D7", "#ECECEC", "#FFFFFF", "#000000")
148+
149+
uithof_color_legend = c("#FBB820", "#F59D10", "#E55738", "#DB003F", "#E35493",
150+
"#D5267B", "#CC0071", "#A8448A", "#9A3480", "#8D5B9A",
151+
"#705296", "#686AA9", "#6173AD", "#4C81BF", "#2F8BC9",
152+
"#1290D9", "#1396D8", "#15A6C1", "#5EB17F", "#86B833",
153+
"#C5D220", "#9FC228", "#78B113", "#49A01D", "#595A5C",
154+
"#A2A3A4", "#D7D8D7", "#ECECEC", "#FFFFFF", "#000000")
155+
156+
### ----------------------------------------------------------------------------
157+
158+
cat("===========================================================================================")
159+
cat("\nSETUP ANALYSIS")
160+
# Assess where we are
161+
getwd()
162+
# Set locations
163+
### Operating System Version
164+
165+
### Mac Pro
166+
# ROOT_loc = "/Volumes/EliteProQx2Media"
167+
168+
### MacBook
169+
ROOT_loc = "/Users/swvanderlaan"
170+
171+
### SOME VARIABLES WE NEED DOWN THE LINE
172+
PROJECTDATASET = "AEMS450KMETA"
173+
PROJECTNAME = "metasmoke"
174+
SUBPROJECTNAME1 = "AEMS450K1"
175+
SUBPROJECTNAME2 = "AEMS450K2"
176+
EWAS_trait = "SmokerCurrent" # Phenotype
177+
178+
INP_AE_loc = paste0(ROOT_loc, "/PLINK/_AE_Originals")
179+
INP_AEMS450K1_loc = paste0(INP_AE_loc, "/AEMS450K1")
180+
INP_AEMS450K2_loc = paste0(INP_AE_loc, "/AEMS450K2")
181+
182+
EPIGENETICS_loc = paste0(ROOT_loc, "/PLINK/analyses/epigenetics")
183+
RES_AEMS450K1_loc = paste0(EPIGENETICS_loc, "/AEMS450K1")
184+
RES_AEMS450K2_loc = paste0(EPIGENETICS_loc, "/AEMS450K2")
185+
186+
ifelse(!dir.exists(file.path(EPIGENETICS_loc, "/",PROJECTDATASET)),
187+
dir.create(file.path(EPIGENETICS_loc, "/",PROJECTDATASET)),
188+
FALSE)
189+
INP_loc = paste0(EPIGENETICS_loc, "/",PROJECTDATASET)
190+
191+
cat("\nCreate a new analysis directory...")
192+
ifelse(!dir.exists(file.path(INP_loc, "/",PROJECTNAME)),
193+
dir.create(file.path(INP_loc, "/",PROJECTNAME)),
194+
FALSE)
195+
ANALYSIS_loc = paste0(INP_loc,"/",PROJECTNAME)
196+
ifelse(!dir.exists(file.path(ANALYSIS_loc, "/PLOTS")),
197+
dir.create(file.path(ANALYSIS_loc, "/PLOTS")),
198+
FALSE)
199+
PLOT_loc = paste0(ANALYSIS_loc,"/PLOTS")
200+
ifelse(!dir.exists(file.path(PLOT_loc, "/COX")),
201+
dir.create(file.path(PLOT_loc, "/COX")),
202+
FALSE)
203+
COX_loc = paste0(PLOT_loc,"/COX")
204+
ifelse(!dir.exists(file.path(PLOT_loc, "/QC")),
205+
dir.create(file.path(PLOT_loc, "/QC")),
206+
FALSE)
207+
QC_loc = paste0(PLOT_loc,"/QC")
208+
ifelse(!dir.exists(file.path(ANALYSIS_loc, "/OUTPUT")),
209+
dir.create(file.path(ANALYSIS_loc, "/OUTPUT")),
210+
FALSE)
211+
OUT_loc = paste0(ANALYSIS_loc, "/OUTPUT")
212+
213+
cat("===========================================================================================")
214+
cat("\nLOAD ATHERO-EXPRESS METHYLATION STUDY DATASETS")
215+
setwd(INP_loc)
216+
list.files()
217+
218+
cat(paste0("\n\n* Load ",PROJECTDATASET," data..."))
219+
cat("\n - loading B/Mvalues of blood samples...") # not available for AEMS450K2
220+
load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.MvaluesQCIMP.blood.RData"))
221+
load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.BvaluesQCIMP.blood.RData"))
222+
aems450k1.MvaluesQCblood = MvaluesQCblood
223+
aems450k1.BvaluesQCblood = BvaluesQCblood
224+
225+
cat("\n - loading B/Mvalues of plaque samples...")
226+
load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.MvaluesQCIMP.plaque.RData"))
227+
load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.BvaluesQCIMP.plaque.RData"))
228+
cat("\n - renaming these...")
229+
aems450k1.MvaluesQCplaque = MvaluesQCplaque
230+
aems450k1.BvaluesQCplaque = BvaluesQCplaque
231+
232+
load(paste0(INP_AEMS450K2_loc,"/20170822.aems450k2.MvaluesQCIMP.plaque.RData"))
233+
load(paste0(INP_AEMS450K2_loc,"/20170822.aems450k2.BvaluesQCIMP.plaque.RData"))
234+
cat("\n - renaming these...")
235+
aems450k2.MvaluesQCplaque = MvaluesQCplaque
236+
aems450k2.BvaluesQCplaque = BvaluesQCplaque
237+
rm(MvaluesQCplaque, BvaluesQCplaque, MvaluesQCblood, BvaluesQCblood)
238+
239+
240+
cat("===========================================================================================")
241+
cat("\n[ CORRECTING ESTIMATED NUMBER OF PACK YEARS -- as a response of reviewer question ]")
242+
243+
GENOMIC_loc = "location to SPSS data on our UMC server"
244+
AEDATA_loc = paste0(GENOMIC_loc,"/AE-AAA_GS_DBs")
245+
AEdata = as.data.table(read_spss(paste0(AEDATA_loc,"/2017-4_AtheroExpressDatabase_ScientificAE_20171207.sav")))
246+
247+
AEdataForUpdate <- subset(AEdata, select = c(STUDY_NUMBER, ePackYearsSmoking))
248+
setDF(AEdataForUpdate)
249+
str(AEdataForUpdate)
250+
AEdataForUpdate$STUDY_NUMBER <- as.numeric(AEdataForUpdate$STUDY_NUMBER)
251+
AEdataForUpdate$ePackYearsSmoking <- as.numeric(AEdataForUpdate$ePackYearsSmoking)
252+
str(AEdataForUpdate)
253+
254+
keep.aems450k1.MvaluesQCblood <- aems450k1.MvaluesQCblood$STUDY_NUMBER
255+
keep.aems450k1.BvaluesQCblood <- aems450k1.BvaluesQCblood$STUDY_NUMBER
256+
keep.aems450k1.MvaluesQCplaque <- aems450k1.MvaluesQCplaque$STUDY_NUMBER
257+
keep.aems450k1.BvaluesQCplaque <- aems450k1.BvaluesQCplaque$STUDY_NUMBER
258+
keep.aems450k2.MvaluesQCplaque <- aems450k2.MvaluesQCplaque$STUDY_NUMBER
259+
keep.aems450k2.BvaluesQCplaque <- aems450k2.BvaluesQCplaque$STUDY_NUMBER
260+
261+
AEdataForUpdate.aems450k1.MvaluesQCblood <- AEdataForUpdate[keep.aems450k1.MvaluesQCblood,]
262+
AEdataForUpdate.aems450k1.BvaluesQCblood <- AEdataForUpdate[keep.aems450k1.BvaluesQCblood,]
263+
AEdataForUpdate.aems450k1.MvaluesQCplaque <- AEdataForUpdate[keep.aems450k1.MvaluesQCplaque,]
264+
AEdataForUpdate.aems450k1.BvaluesQCplaque <- AEdataForUpdate[keep.aems450k1.BvaluesQCplaque,]
265+
AEdataForUpdate.aems450k2.MvaluesQCplaque <- AEdataForUpdate[keep.aems450k2.MvaluesQCplaque,]
266+
AEdataForUpdate.aems450k2.BvaluesQCplaque <- AEdataForUpdate[keep.aems450k2.BvaluesQCplaque,]
267+
268+
aems450k1.MvaluesQCblood$ePackYearsSmoking <- NULL
269+
aems450k1.BvaluesQCblood$ePackYearsSmoking <- NULL
270+
aems450k1.MvaluesQCplaque$ePackYearsSmoking <- NULL
271+
aems450k1.BvaluesQCplaque$ePackYearsSmoking <- NULL
272+
aems450k2.MvaluesQCplaque$ePackYearsSmoking <- NULL
273+
aems450k2.BvaluesQCplaque$ePackYearsSmoking <- NULL
274+
275+
colData(aems450k1.MvaluesQCblood) <- cbind(colData(aems450k1.MvaluesQCblood), AEdataForUpdate.aems450k1.MvaluesQCblood)
276+
colData(aems450k1.BvaluesQCblood) <- cbind(colData(aems450k1.BvaluesQCblood), AEdataForUpdate.aems450k1.BvaluesQCblood)
277+
colData(aems450k1.MvaluesQCplaque) <- cbind(colData(aems450k1.MvaluesQCplaque), AEdataForUpdate.aems450k1.MvaluesQCplaque)
278+
colData(aems450k1.BvaluesQCplaque) <- cbind(colData(aems450k1.BvaluesQCplaque), AEdataForUpdate.aems450k1.BvaluesQCplaque)
279+
colData(aems450k2.MvaluesQCplaque) <- cbind(colData(aems450k2.MvaluesQCplaque), AEdataForUpdate.aems450k2.MvaluesQCplaque)
280+
colData(aems450k2.BvaluesQCplaque) <- cbind(colData(aems450k2.BvaluesQCplaque), AEdataForUpdate.aems450k2.BvaluesQCplaque)
281+
282+
### SANITY CHECK
283+
# summary(aems450k1.MvaluesQCblood$ePackYearsSmoking)
284+
# summary(aems450k1.BvaluesQCblood$ePackYearsSmoking)
285+
# summary(aems450k1.MvaluesQCplaque$ePackYearsSmoking)
286+
# summary(aems450k1.BvaluesQCplaque$ePackYearsSmoking)
287+
# summary(aems450k2.MvaluesQCplaque$ePackYearsSmoking)
288+
# summary(aems450k2.BvaluesQCplaque$ePackYearsSmoking)
289+
290+
cat("\n*** saving UPDATE data in between steps ***")
291+
save(aems450k1.MvaluesQCblood, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.MvaluesQCIMP.blood.RData"))
292+
save(aems450k1.BvaluesQCblood, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.BvaluesQCIMP.blood.RData"))
293+
save(aems450k1.MvaluesQCplaque, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.MvaluesQCIMP.plaque.RData"))
294+
save(aems450k1.BvaluesQCplaque, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.BvaluesQCIMP.plaque.RData"))
295+
save(aems450k2.MvaluesQCplaque, file = paste0(INP_AEMS450K2_loc,"/",Today,".aems450k2.MvaluesQCIMP.plaque.RData"))
296+
save(aems450k2.BvaluesQCplaque, file = paste0(INP_AEMS450K2_loc,"/",Today,".aems450k2.BvaluesQCIMP.plaque.RData"))
297+
298+
cat("\n*** removing the B-value data - as we don't need that ***")
299+
300+
rm(aems450k1.BvaluesQCblood, aems450k1.BvaluesQCplaque, aems450k2.BvaluesQCplaque)
301+

0 commit comments

Comments
 (0)