|
| 1 | +cat("=========================================================================================== |
| 2 | + META-ANALYSIS ATHERO-EXPRESS METHYLATION STUDIES 450K 1 & 2 |
| 3 | + --- correction of eCigarettes variable --- |
| 4 | + |
| 5 | + Version: v1.0 |
| 6 | + |
| 7 | + Last update: 2018-06-27 |
| 8 | + Written by: Sander W. van der Laan (s.w.vanderlaan-2@umcutrecht.nl); |
| 9 | + Marten A. Siemelink |
| 10 | + |
| 11 | + Description: Script to handling eCigarettes and number of pack-years. |
| 12 | + |
| 13 | + Requirements: R version 3.4.1 (2017-06-30) -- 'Single Candle', Linux CentOS7, Mac OS X El Capitan+ |
| 14 | + |
| 15 | + ===========================================================================================") |
| 16 | +cat("\n===========================================================================================") |
| 17 | +cat("CLEAR THE BOARD") |
| 18 | +rm(list = ls()) |
| 19 | + |
| 20 | +cat("\n===========================================================================================") |
| 21 | +cat("GENERAL R SETUP") |
| 22 | +### FUNCTION TO INSTALL PACKAGES |
| 23 | +### This function will automatically check in both CRAN and Bioconductor. This is |
| 24 | +### a function found by Sander W. van der Laan online from @Samir: |
| 25 | +### http://stackoverflow.com/questions/4090169/elegant-way-to-check-for-missing-packages-and-install-them |
| 26 | +### |
| 27 | +cat("\n* Creating funxtion to install and load packages...") |
| 28 | +install.packages.auto <- function(x) { |
| 29 | + x <- as.character(substitute(x)) |
| 30 | + if (isTRUE(x %in% .packages(all.available = TRUE))) { |
| 31 | + eval(parse(text = sprintf("require(\"%s\")", x))) |
| 32 | + } else { |
| 33 | + # Update installed packages - this may mean a full upgrade of R, which in turn |
| 34 | + # may not be warrented. |
| 35 | + #update.packages(ask = FALSE) |
| 36 | + eval(parse(text = sprintf("install.packages(\"%s\", dependencies = TRUE, repos = \"https://cloud.r-project.org/\")", x))) |
| 37 | + } |
| 38 | + if (isTRUE(x %in% .packages(all.available = TRUE))) { |
| 39 | + eval(parse(text = sprintf("require(\"%s\")", x))) |
| 40 | + } else { |
| 41 | + source("http://bioconductor.org/biocLite.R") |
| 42 | + # Update installed packages - this may mean a full upgrade of R, which in turn |
| 43 | + # may not be warrented. |
| 44 | + #biocLite(character(), ask = FALSE) |
| 45 | + eval(parse(text = sprintf("biocLite(\"%s\")", x))) |
| 46 | + eval(parse(text = sprintf("require(\"%s\")", x))) |
| 47 | + } |
| 48 | +} |
| 49 | +# In this case I'm keeping track of the various packages, as versions and |
| 50 | +# actual loading of the libraries gave issues before. |
| 51 | +cat("\n* General packages...\n") |
| 52 | +# for survival analyses |
| 53 | +install.packages.auto("survival") |
| 54 | +install.packages.auto("survminer") |
| 55 | +# for general statistics |
| 56 | +install.packages.auto("Hmisc") |
| 57 | +install.packages.auto("openxlsx") |
| 58 | +install.packages.auto("devtools") |
| 59 | +install.packages.auto("dplyr") |
| 60 | +install.packages.auto("data.table") |
| 61 | +install.packages.auto("tableone") |
| 62 | +install.packages.auto("haven") |
| 63 | +# for methylation/rna data |
| 64 | +install.packages.auto("RMySQL") # "RMySQL" will be phased out: https://github.com/r-dbi/RMySQL, |
| 65 | +# will be replaced by RMariaDB (https://github.com/r-dbi/RMariaDB) |
| 66 | +install.packages.auto("GenomicFeatures") |
| 67 | +install.packages.auto("bumphunter") |
| 68 | +install.packages.auto("minfi") |
| 69 | +install.packages.auto("SummarizedExperiment") |
| 70 | +install.packages.auto("IlluminaHumanMethylation450kmanifest") |
| 71 | +install.packages.auto("IlluminaHumanMethylation450kanno.ilmn12.hg19") |
| 72 | +install.packages.auto("FDb.InfiniumMethylation.hg19") |
| 73 | +install.packages.auto("TxDb.Hsapiens.UCSC.hg19.knownGene") |
| 74 | +install.packages.auto("org.Hs.eg.db") |
| 75 | +install.packages.auto("AnnotationDbi") |
| 76 | +# for plotting |
| 77 | +install.packages.auto("pheatmap") |
| 78 | +install.packages.auto("qqman") |
| 79 | +install.packages.auto("forestplot") |
| 80 | +# for meta-analysis |
| 81 | +install.packages.auto("meta") |
| 82 | +install.packages.auto("bacon") |
| 83 | + |
| 84 | +# The actual DNAmArray package |
| 85 | +cat("\n* DNAmArray package...\n") |
| 86 | +# Also refer to: |
| 87 | +# - https://molepi.github.io/DNAmArray_workflow/index.html |
| 88 | +# - https://github.com/molepi/DNAmArray |
| 89 | +# - https://github.com/bbmri-nl/BBMRIomics |
| 90 | +library(devtools) |
| 91 | +install_github("molepi/DNAmArray", force = FALSE) |
| 92 | +library(DNAmArray) |
| 93 | +install_github("molepi/omicsPrint", ref = "R3.4", force = FALSE) |
| 94 | +library(omicsPrint) |
| 95 | +install_github("bbmri-nl/BBMRIomics", subdir = "BBMRIomics", force = FALSE) |
| 96 | +library(BBMRIomics) |
| 97 | + |
| 98 | +### Create datestamp |
| 99 | +Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d") |
| 100 | +Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y") |
| 101 | + |
| 102 | +### UtrechtSciencePark Colours Scheme |
| 103 | +### |
| 104 | +### Website to convert HEX to RGB: http://hex.colorrrs.com. |
| 105 | +### For some functions you should divide these numbers by 255. |
| 106 | +### |
| 107 | +### No. Color HEX RGB CMYK CHR MAF/INFO |
| 108 | +### -------------------------------------------------------------------------------------------------------------------- |
| 109 | +### 1 yellow #FBB820 (251,184,32) (0,26.69,87.25,1.57) => 1 or 1.0 > INFO |
| 110 | +### 2 gold #F59D10 (245,157,16) (0,35.92,93.47,3.92) => 2 |
| 111 | +### 3 salmon #E55738 (229,87,56) (0,62.01,75.55,10.2) => 3 or 0.05 < MAF < 0.2 or 0.4 < INFO < 0.6 |
| 112 | +### 4 darkpink #DB003F ((219,0,63) (0,100,71.23,14.12) => 4 |
| 113 | +### 5 lightpink #E35493 (227,84,147) (0,63,35.24,10.98) => 5 or 0.8 < INFO < 1.0 |
| 114 | +### 6 pink #D5267B (213,38,123) (0,82.16,42.25,16.47) => 6 |
| 115 | +### 7 hardpink #CC0071 (204,0,113) (0,0,0,0) => 7 |
| 116 | +### 8 lightpurple #A8448A (168,68,138) (0,0,0,0) => 8 |
| 117 | +### 9 purple #9A3480 (154,52,128) (0,0,0,0) => 9 |
| 118 | +### 10 lavendel #8D5B9A (141,91,154) (0,0,0,0) => 10 |
| 119 | +### 11 bluepurple #705296 (112,82,150) (0,0,0,0) => 11 |
| 120 | +### 12 purpleblue #686AA9 (104,106,169) (0,0,0,0) => 12 |
| 121 | +### 13 lightpurpleblue #6173AD (97,115,173/101,120,180) (0,0,0,0) => 13 |
| 122 | +### 14 seablue #4C81BF (76,129,191) (0,0,0,0) => 14 |
| 123 | +### 15 skyblue #2F8BC9 (47,139,201) (0,0,0,0) => 15 |
| 124 | +### 16 azurblue #1290D9 (18,144,217) (0,0,0,0) => 16 or 0.01 < MAF < 0.05 or 0.2 < INFO < 0.4 |
| 125 | +### 17 lightazurblue #1396D8 (19,150,216) (0,0,0,0) => 17 |
| 126 | +### 18 greenblue #15A6C1 (21,166,193) (0,0,0,0) => 18 |
| 127 | +### 19 seaweedgreen #5EB17F (94,177,127) (0,0,0,0) => 19 |
| 128 | +### 20 yellowgreen #86B833 (134,184,51) (0,0,0,0) => 20 |
| 129 | +### 21 lightmossgreen #C5D220 (197,210,32) (0,0,0,0) => 21 |
| 130 | +### 22 mossgreen #9FC228 (159,194,40) (0,0,0,0) => 22 or MAF > 0.20 or 0.6 < INFO < 0.8 |
| 131 | +### 23 lightgreen #78B113 (120,177,19) (0,0,0,0) => 23/X |
| 132 | +### 24 green #49A01D (73,160,29) (0,0,0,0) => 24/Y |
| 133 | +### 25 grey #595A5C (89,90,92) (0,0,0,0) => 25/XY or MAF < 0.01 or 0.0 < INFO < 0.2 |
| 134 | +### 26 lightgrey #A2A3A4 (162,163,164) (0,0,0,0) => 26/MT |
| 135 | +### |
| 136 | +### ADDITIONAL COLORS |
| 137 | +### 27 midgrey #D7D8D7 |
| 138 | +### 28 very lightgrey #ECECEC |
| 139 | +### 29 white #FFFFFF |
| 140 | +### 30 black #000000 |
| 141 | +### -------------------------------------------------------------------------------------------------------------------- |
| 142 | + |
| 143 | +uithof_color = c("#FBB820","#F59D10","#E55738","#DB003F","#E35493","#D5267B", |
| 144 | + "#CC0071","#A8448A","#9A3480","#8D5B9A","#705296","#686AA9", |
| 145 | + "#6173AD","#4C81BF","#2F8BC9","#1290D9","#1396D8","#15A6C1", |
| 146 | + "#5EB17F","#86B833","#C5D220","#9FC228","#78B113","#49A01D", |
| 147 | + "#595A5C","#A2A3A4", "#D7D8D7", "#ECECEC", "#FFFFFF", "#000000") |
| 148 | + |
| 149 | +uithof_color_legend = c("#FBB820", "#F59D10", "#E55738", "#DB003F", "#E35493", |
| 150 | + "#D5267B", "#CC0071", "#A8448A", "#9A3480", "#8D5B9A", |
| 151 | + "#705296", "#686AA9", "#6173AD", "#4C81BF", "#2F8BC9", |
| 152 | + "#1290D9", "#1396D8", "#15A6C1", "#5EB17F", "#86B833", |
| 153 | + "#C5D220", "#9FC228", "#78B113", "#49A01D", "#595A5C", |
| 154 | + "#A2A3A4", "#D7D8D7", "#ECECEC", "#FFFFFF", "#000000") |
| 155 | + |
| 156 | +### ---------------------------------------------------------------------------- |
| 157 | + |
| 158 | +cat("===========================================================================================") |
| 159 | +cat("\nSETUP ANALYSIS") |
| 160 | +# Assess where we are |
| 161 | +getwd() |
| 162 | +# Set locations |
| 163 | +### Operating System Version |
| 164 | + |
| 165 | +### Mac Pro |
| 166 | +# ROOT_loc = "/Volumes/EliteProQx2Media" |
| 167 | + |
| 168 | +### MacBook |
| 169 | +ROOT_loc = "/Users/swvanderlaan" |
| 170 | + |
| 171 | +### SOME VARIABLES WE NEED DOWN THE LINE |
| 172 | +PROJECTDATASET = "AEMS450KMETA" |
| 173 | +PROJECTNAME = "metasmoke" |
| 174 | +SUBPROJECTNAME1 = "AEMS450K1" |
| 175 | +SUBPROJECTNAME2 = "AEMS450K2" |
| 176 | +EWAS_trait = "SmokerCurrent" # Phenotype |
| 177 | + |
| 178 | +INP_AE_loc = paste0(ROOT_loc, "/PLINK/_AE_Originals") |
| 179 | +INP_AEMS450K1_loc = paste0(INP_AE_loc, "/AEMS450K1") |
| 180 | +INP_AEMS450K2_loc = paste0(INP_AE_loc, "/AEMS450K2") |
| 181 | + |
| 182 | +EPIGENETICS_loc = paste0(ROOT_loc, "/PLINK/analyses/epigenetics") |
| 183 | +RES_AEMS450K1_loc = paste0(EPIGENETICS_loc, "/AEMS450K1") |
| 184 | +RES_AEMS450K2_loc = paste0(EPIGENETICS_loc, "/AEMS450K2") |
| 185 | + |
| 186 | +ifelse(!dir.exists(file.path(EPIGENETICS_loc, "/",PROJECTDATASET)), |
| 187 | + dir.create(file.path(EPIGENETICS_loc, "/",PROJECTDATASET)), |
| 188 | + FALSE) |
| 189 | +INP_loc = paste0(EPIGENETICS_loc, "/",PROJECTDATASET) |
| 190 | + |
| 191 | +cat("\nCreate a new analysis directory...") |
| 192 | +ifelse(!dir.exists(file.path(INP_loc, "/",PROJECTNAME)), |
| 193 | + dir.create(file.path(INP_loc, "/",PROJECTNAME)), |
| 194 | + FALSE) |
| 195 | +ANALYSIS_loc = paste0(INP_loc,"/",PROJECTNAME) |
| 196 | +ifelse(!dir.exists(file.path(ANALYSIS_loc, "/PLOTS")), |
| 197 | + dir.create(file.path(ANALYSIS_loc, "/PLOTS")), |
| 198 | + FALSE) |
| 199 | +PLOT_loc = paste0(ANALYSIS_loc,"/PLOTS") |
| 200 | +ifelse(!dir.exists(file.path(PLOT_loc, "/COX")), |
| 201 | + dir.create(file.path(PLOT_loc, "/COX")), |
| 202 | + FALSE) |
| 203 | +COX_loc = paste0(PLOT_loc,"/COX") |
| 204 | +ifelse(!dir.exists(file.path(PLOT_loc, "/QC")), |
| 205 | + dir.create(file.path(PLOT_loc, "/QC")), |
| 206 | + FALSE) |
| 207 | +QC_loc = paste0(PLOT_loc,"/QC") |
| 208 | +ifelse(!dir.exists(file.path(ANALYSIS_loc, "/OUTPUT")), |
| 209 | + dir.create(file.path(ANALYSIS_loc, "/OUTPUT")), |
| 210 | + FALSE) |
| 211 | +OUT_loc = paste0(ANALYSIS_loc, "/OUTPUT") |
| 212 | + |
| 213 | +cat("===========================================================================================") |
| 214 | +cat("\nLOAD ATHERO-EXPRESS METHYLATION STUDY DATASETS") |
| 215 | +setwd(INP_loc) |
| 216 | +list.files() |
| 217 | + |
| 218 | +cat(paste0("\n\n* Load ",PROJECTDATASET," data...")) |
| 219 | +cat("\n - loading B/Mvalues of blood samples...") # not available for AEMS450K2 |
| 220 | +load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.MvaluesQCIMP.blood.RData")) |
| 221 | +load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.BvaluesQCIMP.blood.RData")) |
| 222 | +aems450k1.MvaluesQCblood = MvaluesQCblood |
| 223 | +aems450k1.BvaluesQCblood = BvaluesQCblood |
| 224 | + |
| 225 | +cat("\n - loading B/Mvalues of plaque samples...") |
| 226 | +load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.MvaluesQCIMP.plaque.RData")) |
| 227 | +load(paste0(INP_AEMS450K1_loc,"/20170721.aems450k1.BvaluesQCIMP.plaque.RData")) |
| 228 | +cat("\n - renaming these...") |
| 229 | +aems450k1.MvaluesQCplaque = MvaluesQCplaque |
| 230 | +aems450k1.BvaluesQCplaque = BvaluesQCplaque |
| 231 | + |
| 232 | +load(paste0(INP_AEMS450K2_loc,"/20170822.aems450k2.MvaluesQCIMP.plaque.RData")) |
| 233 | +load(paste0(INP_AEMS450K2_loc,"/20170822.aems450k2.BvaluesQCIMP.plaque.RData")) |
| 234 | +cat("\n - renaming these...") |
| 235 | +aems450k2.MvaluesQCplaque = MvaluesQCplaque |
| 236 | +aems450k2.BvaluesQCplaque = BvaluesQCplaque |
| 237 | +rm(MvaluesQCplaque, BvaluesQCplaque, MvaluesQCblood, BvaluesQCblood) |
| 238 | + |
| 239 | + |
| 240 | +cat("===========================================================================================") |
| 241 | +cat("\n[ CORRECTING ESTIMATED NUMBER OF PACK YEARS -- as a response of reviewer question ]") |
| 242 | + |
| 243 | +GENOMIC_loc = "location to SPSS data on our UMC server" |
| 244 | +AEDATA_loc = paste0(GENOMIC_loc,"/AE-AAA_GS_DBs") |
| 245 | +AEdata = as.data.table(read_spss(paste0(AEDATA_loc,"/2017-4_AtheroExpressDatabase_ScientificAE_20171207.sav"))) |
| 246 | + |
| 247 | +AEdataForUpdate <- subset(AEdata, select = c(STUDY_NUMBER, ePackYearsSmoking)) |
| 248 | +setDF(AEdataForUpdate) |
| 249 | +str(AEdataForUpdate) |
| 250 | +AEdataForUpdate$STUDY_NUMBER <- as.numeric(AEdataForUpdate$STUDY_NUMBER) |
| 251 | +AEdataForUpdate$ePackYearsSmoking <- as.numeric(AEdataForUpdate$ePackYearsSmoking) |
| 252 | +str(AEdataForUpdate) |
| 253 | + |
| 254 | +keep.aems450k1.MvaluesQCblood <- aems450k1.MvaluesQCblood$STUDY_NUMBER |
| 255 | +keep.aems450k1.BvaluesQCblood <- aems450k1.BvaluesQCblood$STUDY_NUMBER |
| 256 | +keep.aems450k1.MvaluesQCplaque <- aems450k1.MvaluesQCplaque$STUDY_NUMBER |
| 257 | +keep.aems450k1.BvaluesQCplaque <- aems450k1.BvaluesQCplaque$STUDY_NUMBER |
| 258 | +keep.aems450k2.MvaluesQCplaque <- aems450k2.MvaluesQCplaque$STUDY_NUMBER |
| 259 | +keep.aems450k2.BvaluesQCplaque <- aems450k2.BvaluesQCplaque$STUDY_NUMBER |
| 260 | + |
| 261 | +AEdataForUpdate.aems450k1.MvaluesQCblood <- AEdataForUpdate[keep.aems450k1.MvaluesQCblood,] |
| 262 | +AEdataForUpdate.aems450k1.BvaluesQCblood <- AEdataForUpdate[keep.aems450k1.BvaluesQCblood,] |
| 263 | +AEdataForUpdate.aems450k1.MvaluesQCplaque <- AEdataForUpdate[keep.aems450k1.MvaluesQCplaque,] |
| 264 | +AEdataForUpdate.aems450k1.BvaluesQCplaque <- AEdataForUpdate[keep.aems450k1.BvaluesQCplaque,] |
| 265 | +AEdataForUpdate.aems450k2.MvaluesQCplaque <- AEdataForUpdate[keep.aems450k2.MvaluesQCplaque,] |
| 266 | +AEdataForUpdate.aems450k2.BvaluesQCplaque <- AEdataForUpdate[keep.aems450k2.BvaluesQCplaque,] |
| 267 | + |
| 268 | +aems450k1.MvaluesQCblood$ePackYearsSmoking <- NULL |
| 269 | +aems450k1.BvaluesQCblood$ePackYearsSmoking <- NULL |
| 270 | +aems450k1.MvaluesQCplaque$ePackYearsSmoking <- NULL |
| 271 | +aems450k1.BvaluesQCplaque$ePackYearsSmoking <- NULL |
| 272 | +aems450k2.MvaluesQCplaque$ePackYearsSmoking <- NULL |
| 273 | +aems450k2.BvaluesQCplaque$ePackYearsSmoking <- NULL |
| 274 | + |
| 275 | +colData(aems450k1.MvaluesQCblood) <- cbind(colData(aems450k1.MvaluesQCblood), AEdataForUpdate.aems450k1.MvaluesQCblood) |
| 276 | +colData(aems450k1.BvaluesQCblood) <- cbind(colData(aems450k1.BvaluesQCblood), AEdataForUpdate.aems450k1.BvaluesQCblood) |
| 277 | +colData(aems450k1.MvaluesQCplaque) <- cbind(colData(aems450k1.MvaluesQCplaque), AEdataForUpdate.aems450k1.MvaluesQCplaque) |
| 278 | +colData(aems450k1.BvaluesQCplaque) <- cbind(colData(aems450k1.BvaluesQCplaque), AEdataForUpdate.aems450k1.BvaluesQCplaque) |
| 279 | +colData(aems450k2.MvaluesQCplaque) <- cbind(colData(aems450k2.MvaluesQCplaque), AEdataForUpdate.aems450k2.MvaluesQCplaque) |
| 280 | +colData(aems450k2.BvaluesQCplaque) <- cbind(colData(aems450k2.BvaluesQCplaque), AEdataForUpdate.aems450k2.BvaluesQCplaque) |
| 281 | + |
| 282 | +### SANITY CHECK |
| 283 | +# summary(aems450k1.MvaluesQCblood$ePackYearsSmoking) |
| 284 | +# summary(aems450k1.BvaluesQCblood$ePackYearsSmoking) |
| 285 | +# summary(aems450k1.MvaluesQCplaque$ePackYearsSmoking) |
| 286 | +# summary(aems450k1.BvaluesQCplaque$ePackYearsSmoking) |
| 287 | +# summary(aems450k2.MvaluesQCplaque$ePackYearsSmoking) |
| 288 | +# summary(aems450k2.BvaluesQCplaque$ePackYearsSmoking) |
| 289 | + |
| 290 | +cat("\n*** saving UPDATE data in between steps ***") |
| 291 | +save(aems450k1.MvaluesQCblood, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.MvaluesQCIMP.blood.RData")) |
| 292 | +save(aems450k1.BvaluesQCblood, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.BvaluesQCIMP.blood.RData")) |
| 293 | +save(aems450k1.MvaluesQCplaque, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.MvaluesQCIMP.plaque.RData")) |
| 294 | +save(aems450k1.BvaluesQCplaque, file = paste0(INP_AEMS450K1_loc,"/",Today,".aems450k1.BvaluesQCIMP.plaque.RData")) |
| 295 | +save(aems450k2.MvaluesQCplaque, file = paste0(INP_AEMS450K2_loc,"/",Today,".aems450k2.MvaluesQCIMP.plaque.RData")) |
| 296 | +save(aems450k2.BvaluesQCplaque, file = paste0(INP_AEMS450K2_loc,"/",Today,".aems450k2.BvaluesQCIMP.plaque.RData")) |
| 297 | + |
| 298 | +cat("\n*** removing the B-value data - as we don't need that ***") |
| 299 | + |
| 300 | +rm(aems450k1.BvaluesQCblood, aems450k1.BvaluesQCplaque, aems450k2.BvaluesQCplaque) |
| 301 | + |
0 commit comments