This R Markdown (RMD) file generates figures for the 2025 Lin
manuscript.
https://github.com/benlin-UAB/2025_Lin_ComboSyntheticLethality_Manuscript
This code section starts script timing, loads R libraries, sets file directory path and seed for analysis, and imports bm_hEGFR. bm_hEGFR is a dataframe that contains matched ensembl gene_id and mouse and human gene names. This object translates between mouse and human genes.
Script Timing Start
ptm <- proc.time()
Docker mount directory
Data <- paste0("/Data/")
Load Libraries
library(tibble)
library(rtracklayer)
library(biomaRt)
library(dplyr)
library(Biobase)
library(tximport)
library(DESeq2)
library(ggplot2)
library(scales)
library(grid)
library(gridExtra)
library(cowplot)
library(colorRamps)
library(RColorBrewer)
library(pheatmap)
library(tidyverse)
library(ReportingTools)
library(BiocParallel)
library(factoextra)
library(EnhancedVolcano)
library(gplots)
library(grDevices)
library(rmarkdown)
library(RSQLite)
library(GSVA)
library(msigdbr)
library(fgsea)
library(UpSetR)
library(ComplexHeatmap)
library(circlize)
library(vsn)
library(gprofiler2)
library(ggpubr)
library(PCAtools)
library(readxl)
Set.Seed
# This will set the random number generator
set.seed(888)
bm_hEGFR: Human to mouse genes translation matrix
bm_hEGFR <- readRDS(paste0(Data,"Input/Figures/bm_hEGFR.rds"))
Docker mount directory
Data <- paste0("/Data/")
dir.create(paste0(Data,"Figures/")) # Only need to make this directory once
Save Environment: Setup environment that is imported for each figure
system.time(save.image(file=paste0("/Data/Input/Figures/setup_workspace.RData")))
## user system elapsed
## 0.027 0.000 0.034
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 16972190 906.5 29379045 1569.1 29379045 1569.1
## Vcells 30108694 229.8 80630088 615.2 63941846 487.9
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("Receptor_Kinases_Heatmap")
input_dir <- paste0(Data,"Input/Figures/",fig.label,"/")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
Import data
# Import batch corrected values
DEP_Bl_Par_fs <- readRDS(tail(grep("DEP_Bl_Par_fs.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Par_fs"), recursive = F, full.names = T), value = T),1))
# copy over transformed values
vsd <- DEP_Bl_Par_fs
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="HGNC_Receptor_Kinases")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Extract matrix values
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotation labels
annotation_col<-
HeatmapAnnotation(df = data.frame(Model = vsd_Heatmap_GOI@colData$cell_line_abbr,
Serum = vsd_Heatmap_GOI@colData$serum),
col = list(Model =
c(
'C' = 'black',
'CEv3' = 'red'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = paste0("Protein Kinases Receptors"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat)*unit(5, "mm"),
height = nrow(mat)*unit(4, "mm")
)
draw(Heatmap)
# Save Heatmap
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, "FS Protein Kinases Receptors", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
Import data
# Import batch corrected values
DEP_Bl_Par_ss <- readRDS(tail(grep("DEP_Bl_Par_ss.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Par_ss"), recursive = F, full.names = T), value = T),1))
# copy over transformed values
vsd <- DEP_Bl_Par_ss
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="HGNC_Receptor_Kinases")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Extract matrix values
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotation labels
annotation_col<-
HeatmapAnnotation(df = data.frame(Model = vsd_Heatmap_GOI@colData$cell_line_abbr,
Serum = vsd_Heatmap_GOI@colData$serum),
col = list(Model =
c(
'C' = 'black',
'CEv3' = 'red'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = paste0("Protein Kinases Receptors"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat)*unit(5, "mm"),
height = nrow(mat)*unit(4, "mm")
)
draw(Heatmap)
# Save Heatmap
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, "SS Protein Kinases Receptors", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
Import data
# Import batch corrected values
dds_bl_Par_fs <- readRDS(tail(grep("bl_Par_fs.rds",list.files(paste0(Data,"Output/RNA/dds_EDA_subset/bl_Par_fs"), recursive = F, full.names = T), value = T),1))
# vst transformation
vsd<-vst(dds_bl_Par_fs, blind=T, nsub=1000, fitType = "parametric")
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="HGNC_Receptor_Kinases")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Extract matrix values
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotation labels
annotation_col<-
HeatmapAnnotation(df = data.frame(Model = vsd_Heatmap_GOI@colData$cell_line_abbr,
Serum = vsd_Heatmap_GOI@colData$serum),
col = list(Model =
c(
'C' = 'black',
'CEv3' = 'red'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = paste0("mRNA Kinases Receptors"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat)*unit(1.5, "mm"),
height = nrow(mat)*unit(2.5, "mm")
)
draw(Heatmap)
# Save Heatmap
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
# width = ncol(mat_list) * 5 + (0.1 * ncol(mat_list) * 5)
# height = nrow(mat_list) * 10 + (0.1 * nrow(mat_list) * 5)
pdf(file=paste0(figures_dir, "FS mRNA Kinases Receptors", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
# Graphics termination
if(!is.null(dev.list())) dev.off()
## null device
## 1
if(!is.null(dev.list())) graphics.off()
Import data
# Import batch corrected values
dds_bl_Par_ss <- readRDS(tail(grep("bl_Par_ss.rds",list.files(paste0(Data,"Output/RNA/dds_EDA_subset/bl_Par_ss"), recursive = F, full.names = T), value = T),1))
# vst transformation
vsd<-vst(dds_bl_Par_ss, blind=T, nsub=1000, fitType = "parametric")
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="HGNC_Receptor_Kinases")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Extract matrix values
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotation labels
annotation_col<-
HeatmapAnnotation(df = data.frame(Model = vsd_Heatmap_GOI@colData$cell_line_abbr,
Serum = vsd_Heatmap_GOI@colData$serum),
col = list(Model =
c(
'C' = 'black',
'CEv3' = 'red'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Define color scale from blue to white to red (for -4 to 4)
color_scale <- colorRamp2(c(-4, 0, 4), c("blue", "white", "red"))
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = paste0("mRNA Kinases Receptors"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat)*unit(5, "mm"),
height = nrow(mat)*unit(2.5, "mm"),
col = color_scale,
heatmap_legend_param = list(
title = "Value", # Title of the scale bar
legend_height = unit(4, "cm"), # Height of the legend
legend_width = unit(1, "cm"), # Width of the legend
labels_gp = gpar(fontsize = 10) # Font size for the legend labels
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = paste0("mRNA Kinases Receptors"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat)*unit(5, "mm"),
height = nrow(mat)*unit(2.5, "mm"))
draw(Heatmap)
# Save Heatmap
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
# width = ncol(mat_list) * 5 + (0.1 * ncol(mat_list) * 5)
# height = nrow(mat_list) * 10 + (0.1 * nrow(mat_list) * 5)
pdf(file=paste0(figures_dir, "SS mRNA Kinases Receptors", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
# Graphics termination
if(!is.null(dev.list())) dev.off()
## null device
## 1
if(!is.null(dev.list())) graphics.off()
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17002379 908.1 29379045 1569.1 29379045 1569.1
## Vcells 30170808 230.2 80630088 615.2 63941846 487.9
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("parental_volcano_plots")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
# limma results direct
output_dir_res <- paste0(Data,"Output/MIB-BL/DEP_Bl_Par/","Limma_res/")
# Import comps of interest
coi <- read.csv(paste0(output_dir_res, "comp_9_CEv3.full-CEv3.starve.csv") , row.names = 1)
# Title
VolTitle <- "DE protein in CEv3 cells: FS vs SS"
# Plot Volcano
volcano_plot <-
EnhancedVolcano(
coi,
lab=row.names(coi),
x="logFC",
y="adj.P.Val",
title= paste0(VolTitle),
pCutoff =0.05,
#FCcutoff =lfc,
pointSize = 2.0,
labSize = 5.0
)
volcano_plot
# Save Volcano
ggsave(filename = paste0(figures_dir,VolTitle,".pdf"), plot=volcano_plot)
## Saving 7 x 5 in image
# limma results direct
output_dir_res <- paste0(Data,"Output/MIB-BL/DEP_Bl_Par/","Limma_res/")
# Import comps of interest
coi <- read.csv(paste0(output_dir_res, "comp_1_C.full-C.starve.csv") , row.names = 1)
# Title
VolTitle <- "DE protein in C cells: FS vs SS"
# Plot Volcano
volcano_plot <-
EnhancedVolcano(
coi,
lab=row.names(coi),
x="logFC",
y="adj.P.Val",
title= paste0(VolTitle),
pCutoff =0.05,
#FCcutoff =lfc,
pointSize = 2.0,
labSize = 5.0
)
volcano_plot
# Save Volcano
ggsave(filename = paste0(figures_dir,VolTitle,".pdf"), plot=volcano_plot)
## Saving 7 x 5 in image
# limma results direct
output_dir_res <- paste0(Data,"Output/RNA/dds_EDA_subset/bl_Par/","DE2_res/")
# Import comps of interest
coi <- read.csv(paste0(output_dir_res, "comp_6_full_CEv3_NA_0-vs-starve_CEv3_NA_0.csv") , row.names = 1)
# Title
VolTitle <- "DE mRNA in CEv3 cells: FS vs SS"
# Plot Volcano
volcano_plot <-
EnhancedVolcano(
coi,
lab=row.names(coi),
x="log2FoldChange",
y="padj",
title= paste0(VolTitle),
pCutoff =0.05,
FCcutoff =1,
pointSize = 2.0,
labSize = 5.0
)
volcano_plot
# Save Volcano
ggsave(filename = paste0(figures_dir,VolTitle,".pdf"), plot=volcano_plot)
## Saving 7 x 5 in image
# limma results direct
output_dir_res <- paste0(Data,"Output/RNA/dds_EDA_subset/bl_Par/","DE2_res/")
# Import comps of interest
coi <- read.csv(paste0(output_dir_res, "comp_2_full_C_NA_0-vs-starve_C_NA_0.csv") , row.names = 1)
# Title
VolTitle <- "DE mRNA in C cells: FS vs SS"
# Plot Volcano
volcano_plot <-
EnhancedVolcano(
coi,
lab=row.names(coi),
x="log2FoldChange",
y="padj",
title= paste0(VolTitle),
pCutoff =0.05,
FCcutoff =1,
pointSize = 2.0,
labSize = 5.0
)
volcano_plot
# Save Volcano
ggsave(filename = paste0(figures_dir,VolTitle,".pdf"), plot=volcano_plot)
## Saving 7 x 5 in image
EGFR TKI resistance signature (in vitro models)
Isogeneic in vitro models of resistance display heterogeneous activation of alternative signaling pathways. Raw counts were imported as an SE object and transformed using the vst() function from DESeq2. The WikiPathway geneset containing genes associated with EGFR TKI resistance was obtained from MSIGDB. The top 25 most variably expressed genes in this set are displayed as a scaled heatmap.
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17016475 908.8 29379045 1569.1 29379045 1569.1
## Vcells 30288941 231.1 80630088 615.2 63941846 487.9
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("EGFR_TKI_Resistance_sig")
input_dir <- paste0(Data,"Input/Figures/",fig.label,"/")
output_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(output_dir)
Import SE and vst transformation
# Import SE object of samples
dds_Bl_Res_fs <- readRDS(tail(grep("bl_Res_fs.rds",list.files(paste0(Data,"Output/RNA/dds_EDA_subset/bl_Res_fs"), recursive = T, full.names = T), value = T),1))
# vst transformation
vsd<-vst(dds_Bl_Res_fs, blind=T, nsub=1000, fitType = "parametric")
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
Make Heatmap ### Fig.2E
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Filter for top 25 variable genes
#vsd_Heatmap_GOI <- vsd_Heatmap_GOI[head(order(rowVars(as.matrix(assay(vsd_Heatmap_GOI))),decreasing=T), 25), ]
# Extract matrix values from vsd_Heatmap_GOI
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotations
annotation_col <- HeatmapAnnotation(df = data.frame("Model" = factor(vsd_Heatmap_GOI@colData$cell_line_abbr,
# Set legend order
levels= c("CEv3","E4","E5","G1","G5","G8","G12")),
"Sen" = vsd_Heatmap_GOI@colData$EGFR_TKI_sen),
which="column",
col = list("Model" =
c(
'CEv3' = 'red',
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Sen =
c('Resistant'='brown',
'Sensitive'='blue')
))
# Generate Heatmap
heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = "FS_WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE",
show_column_names = F,
show_row_names = F
)
# Save Heatmap
pdf(file=paste0(output_dir,"FS_WP_EGFR_TKI_Res.pdf"))
draw(heatmap)
dev.off()
## png
## 2
# Draw Heatmap
draw(heatmap)
# Graphics termination
if(!is.null(dev.list())) dev.off()
## null device
## 1
if(!is.null(dev.list())) graphics.off()
Import SE and vst transformation
# Import SE object of samples
dds_Bl_Res_ss <- readRDS(tail(grep("bl_Res_ss.rds",list.files(paste0(Data,"Output/RNA/dds_EDA_subset/bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# vst transformation
vsd<-vst(dds_Bl_Res_ss, blind=T, nsub=1000, fitType = "parametric")
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
Make Heatmap ### Fig.2F
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Filter for top 25 variable genes
# vsd_Heatmap_GOI <- vsd_Heatmap_GOI[head(order(rowVars(as.matrix(assay(vsd_Heatmap_GOI))),decreasing=T), 25), ]
# Extract matrix values from vsd_Heatmap_GOI
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotations
annotation_col <- HeatmapAnnotation(df = data.frame("Model" = factor(vsd_Heatmap_GOI@colData$cell_line_abbr,
# Set legend order
levels= c("CEv3","E4","E5","G1","G5","G8","G12")),
"Sen" = vsd_Heatmap_GOI@colData$EGFR_TKI_sen),
which="column",
col = list("Model" =
c(
'CEv3' = 'red',
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Sen =
c('Resistant'='brown',
'Sensitive'='blue')
))
# Generate Heatmap
heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = "SS_WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE",
show_column_names = F,
show_row_names = F
)
# Save Heatmap
pdf(file=paste0(output_dir,"SS_WP_EGFR_TKI_Res.pdf"))
draw(heatmap)
dev.off()
## png
## 2
# Draw Heatmap
draw(heatmap)
# Graphics termination
if(!is.null(dev.list())) dev.off()
## null device
## 1
if(!is.null(dev.list())) graphics.off()
Import SE and vst transformation
# Import Protein vsd
DEP_Bl_Res_fs <- readRDS(tail(grep("DEP_Bl_Res_fs.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs"), recursive = T, full.names = T), value = T),1))
DEP_Bl_Res_ss <- readRDS(tail(grep("DEP_Bl_Res_ss.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# vst transformation
vsd<- DEP_Bl_Res_fs
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
Make Heatmap ### Fig.2G
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Filter for top 25 variable genes
#vsd_Heatmap_GOI <- vsd_Heatmap_GOI[head(order(rowVars(as.matrix(assay(vsd_Heatmap_GOI))),decreasing=T), 25), ]
# Extract matrix values from vsd_Heatmap_GOI
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotations
annotation_col <- HeatmapAnnotation(df = data.frame("Model" = factor(vsd_Heatmap_GOI@colData$cell_line_abbr,
# Set legend order
levels= c("CEv3","E4","E5","G1","G5","G8","G12")),
"Sen" = vsd_Heatmap_GOI@colData$EGFR_TKI_sen),
which="column",
col = list("Model" =
c(
'CEv3' = 'red',
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Sen =
c('Resistant'='brown',
'Sensitive'='blue')
))
# Generate Heatmap
heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = "Protein_FS_WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE",
show_column_names = F,
show_row_names = T
)
# Save Heatmap
pdf(file=paste0(output_dir,"Protein_FS_WP_EGFR_TKI_Res.pdf"))
draw(heatmap)
dev.off()
## png
## 2
# Draw Heatmap
draw(heatmap)
# Graphics termination
if(!is.null(dev.list())) dev.off()
## null device
## 1
if(!is.null(dev.list())) graphics.off()
Import SE and vst transformation
# Import Protein vsd
DEP_Bl_Res_fs <- readRDS(tail(grep("DEP_Bl_Res_fs.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs"), recursive = T, full.names = T), value = T),1))
DEP_Bl_Res_ss <- readRDS(tail(grep("DEP_Bl_Res_ss.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# vst transformation
vsd<- DEP_Bl_Res_ss
Import genesets
# Import GOI for heatmap
Heatmap_GOI_df <- read.csv(paste0(input_dir,"Heatmap_GOI.csv"))
# Convert heatmap GOI to mouse
convert_human2mouse <-merge(bm_hEGFR,Heatmap_GOI_df, by.x="hsapiens_homolog_associated_gene_name", by.y="WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE")
# Extract mouse genes
Heatmap_GOI_df <- data.frame(convert_human2mouse[,"external_gene_name"])
# Rename columns
colnames(Heatmap_GOI_df) <- c("gene_symbol")
Make Heatmap ### Fig.2H
# Subset vsd to genes in GOI
vsd_Heatmap_GOI <- vsd[row.names(vsd) %in% Heatmap_GOI_df$gene_symbol,]
# Filter for top 25 variable genes
#vsd_Heatmap_GOI <- vsd_Heatmap_GOI[head(order(rowVars(as.matrix(assay(vsd_Heatmap_GOI))),decreasing=T), 25), ]
# Extract matrix values from vsd_Heatmap_GOI
mat <- as.matrix(assay(vsd_Heatmap_GOI))
# Heatmap annotations
annotation_col <- HeatmapAnnotation(df = data.frame("Model" = factor(vsd_Heatmap_GOI@colData$cell_line_abbr,
# Set legend order
levels= c("CEv3","E4","E5","G1","G5","G8","G12")),
"Sen" = vsd_Heatmap_GOI@colData$EGFR_TKI_sen),
which="column",
col = list("Model" =
c(
'CEv3' = 'red',
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Sen =
c('Resistant'='brown',
'Sensitive'='blue')
))
# Generate Heatmap
heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat))),
top_annotation = annotation_col,
name = " ",
column_title = "Protein_SS_WP_EGFR_TYROSINE_KINASE_INHIBITOR_RESISTANCE",
show_column_names = F,
show_row_names = T
)
# Save Heatmap
pdf(file=paste0(output_dir,"Protein_SS_WP_EGFR_TKI_Res.pdf"))
draw(heatmap)
dev.off()
## png
## 2
# Draw Heatmap
draw(heatmap)
# Graphics termination
if(!is.null(dev.list())) dev.off()
## null device
## 1
if(!is.null(dev.list())) graphics.off()
Directories
fig.label <- paste0("Res_signature_volcanos")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
fig.label <- paste0("Res_signature_volcanos")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
## Warning in dir.create(figures_dir): '/Data/Figures/Res_signature_volcanos'
## already exists
# limma results direct
output_dir_res <- paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs/","Limma_res/")
# Summarize of comparisions
comps <- read.csv(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs/","comps_all.csv"), row.names = 1)
# import limma res
subset_limmaRes <- list()
for (i in seq_along(row.names(comps))){
subset_limmaRes[[comps$comp[i]]] <-
read.csv(paste0(output_dir_res,"comp_",i,"_",comps[i,5],".csv"), row.names = 1)
}
# Extract comps of interest
coi <- comps[comps$comp_denom %in% c("CEv3.full"),]$comp
comp_list_oi <- subset_limmaRes[names(subset_limmaRes) %in% coi]
# Plot Volcano
volcano_plots_ls <- list()
for (i in seq_along(comp_list_oi)){
plot(volcano_plots_ls[[names(comp_list_oi)[i]]] <-
EnhancedVolcano(
comp_list_oi[[i]],
lab=row.names(comp_list_oi[[i]]),
x="logFC",
y="adj.P.Val",
title= paste0("Protein ",names(comp_list_oi)[[i]]),
pCutoff =0.05,
#FCcutoff =lfc,
pointSize = 2.0,
labSize = 5.0
))}
# Save Volcano
for (i in seq_along(volcano_plots_ls)){
ggsave(filename = paste0(figures_dir,"comp_", i, "_", names(volcano_plots_ls)[[i]],".pdf"), plot=volcano_plots_ls[[i]])
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
# limma results direct
output_dir_res <- paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss/","Limma_res/")
# Summarize of comparisions
comps <- read.csv(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss/","comps_all.csv"), row.names = 1)
# import limma res
subset_limmaRes <- list()
for (i in seq_along(row.names(comps))){
subset_limmaRes[[comps$comp[i]]] <-
read.csv(paste0(output_dir_res,"comp_",i,"_",comps[i,5],".csv"), row.names = 1)
}
# Extract comps of interest
coi <- comps[comps$comp_denom %in% c("CEv3.starve"),]$comp
comp_list_oi <- subset_limmaRes[names(subset_limmaRes) %in% coi]
# Plot Volcano
volcano_plots_ls <- list()
for (i in seq_along(comp_list_oi)){
plot(volcano_plots_ls[[names(comp_list_oi)[i]]] <-
EnhancedVolcano(
comp_list_oi[[i]],
lab=row.names(comp_list_oi[[i]]),
x="logFC",
y="adj.P.Val",
title= paste0("Protein ",names(comp_list_oi)[[i]]),
pCutoff =0.05,
#FCcutoff =lfc,
pointSize = 2.0,
labSize = 5.0
))}
# Save Volcano
for (i in seq_along(volcano_plots_ls)){
ggsave(filename = paste0(figures_dir,"comp_", i, "_", names(volcano_plots_ls)[[i]],".pdf"), plot=volcano_plots_ls[[i]])
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17003642 908.1 29379045 1569.1 29379045 1569.1
## Vcells 30172334 230.2 80630088 615.2 63941846 487.9
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("Baseline_kinase_upset")
input_dir <- paste0(Data,"Input/Figures/",fig.label,"/")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
Import Limma Res fs
DEP_Bl_Res_fs_limma_res_padj <- read.csv(paste0(Data,"/Output/MIB-BL/DEP_Bl_Res_fs/Limma_res/","All_coi_padj.csv"), row.names = 1)
# Extract significant genes
E4_sig_genes <- row.names(subset(DEP_Bl_Res_fs_limma_res_padj, DEP_Bl_Res_fs_limma_res_padj$adj.P.Val_E4.full.CEv3.full<0.05))
E5_sig_genes <- row.names(subset(DEP_Bl_Res_fs_limma_res_padj, DEP_Bl_Res_fs_limma_res_padj$adj.P.Val_E5.full.CEv3.full<0.05))
G1_sig_genes <- row.names(subset(DEP_Bl_Res_fs_limma_res_padj, DEP_Bl_Res_fs_limma_res_padj$adj.P.Val_G1.full.CEv3.full<0.05))
G5_sig_genes <- row.names(subset(DEP_Bl_Res_fs_limma_res_padj, DEP_Bl_Res_fs_limma_res_padj$adj.P.Val_G5.full.CEv3.full<0.05))
G8_sig_genes <- row.names(subset(DEP_Bl_Res_fs_limma_res_padj, DEP_Bl_Res_fs_limma_res_padj$adj.P.Val_G8.full.CEv3.full<0.05))
G12_sig_genes <- row.names(subset(DEP_Bl_Res_fs_limma_res_padj, DEP_Bl_Res_fs_limma_res_padj$adj.P.Val_G12.full.CEv3.full<0.05))
# Convert to list
DEP_Bl_Res_fs_sigGenes_ls <- list(
E4_fs=E4_sig_genes,
E5_fs=E5_sig_genes,
G1_fs=G1_sig_genes,
G5_fs=G5_sig_genes,
G8_fs=G8_sig_genes,
G12_fs=G12_sig_genes
)
# Remove hEGFR
DEP_Bl_Res_fs_sigGenes_ls <- lapply(DEP_Bl_Res_fs_sigGenes_ls, function(x) subset(x,!x=="hEGFR"))
# Directories
output_dir_upset <- paste0(figures_dir)
graph_dir_upset <- paste0(figures_dir)
# Convert list to binary matrix
Upset_matrix <- ComplexHeatmap::list_to_matrix(DEP_Bl_Res_fs_sigGenes_ls)
# Save Upset_matrix
write.csv(Upset_matrix, file = paste0(output_dir_upset, "baseline_FS_upset_matrix",".csv"))
# convert list to combination matrix
combo_matrix <- ComplexHeatmap::make_comb_mat(Upset_matrix)
# Color by intersection
intersection_degree <- comb_degree(combo_matrix)
comb_col <- ifelse(intersection_degree > 1, "blue", "red")
# Generate upset plot
upset_plot <- ComplexHeatmap::UpSet(combo_matrix, comb_col = comb_col,
row_names_gp =gpar(fontsize = 12) ,
top_annotation = upset_top_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
annotation_name_rot = 90,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
right_annotation = upset_right_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
pt_size = unit(8, "pt"))
upset_plot
# Save upset_plot
pdf(file=paste0(graph_dir_upset, "baseline_FS_upset" , ".pdf"))
draw(upset_plot)
dev.off()
## png
## 2
# Combination matrix
combo_matrix
## A combination matrix with 6 sets and 29 combinations.
## ranges of combination set size: c(1, 35).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## E4_fs E5_fs G1_fs G5_fs G8_fs G12_fs code size
## x 000100 35
## x 000010 11
## x x 010100 10
## x 010000 6
## x x 000110 5
## x x x x 011110 3
## x x x 010110 3
## x x x 010101 3
##
## Sets are:
## set size
## E4_fs 10
## E5_fs 39
## G1_fs 21
## G5_fs 76
## G8_fs 33
## G12_fs 21
comb_name(combo_matrix)
## [1] "111111" "111110" "110111" "101111" "011111" "110101" "101101" "100111"
## [9] "011110" "011101" "001111" "111000" "011100" "011001" "010110" "010101"
## [17] "011000" "010100" "010001" "001100" "001010" "001001" "000110" "000101"
## [25] "000011" "010000" "000100" "000010" "000001"
# Extract all from upset plot
upset_genes_ls <- list()
for(i in seq_along(comb_name(combo_matrix))){
upset_genes_ls[[paste0((set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[1],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[2],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[3],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[4],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[5],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[6])]] <-
extract_comb(combo_matrix,comb_name(combo_matrix)[i])
}
# Convert to DF
upset_genes_df <- do.call(cbind, lapply(upset_genes_ls, function(x) c(x, rep(NA, max(sapply(upset_genes_ls, length)) - length(x)))))
# Save genes
write.csv(upset_genes_df, file = paste0(output_dir_upset,"baseline_FS_upset_genes_df",".csv"))
# Extract unique genes
grep("NA_NA_NA_NA_NA",colnames(upset_genes_df))
## [1] 26 27 28 29
DEP_Bl_Res_fs_unique_genes <- c()
for(i in (grep("NA_NA_NA_NA_NA",colnames(upset_genes_df)))){
DEP_Bl_Res_fs_unique_genes<- c(DEP_Bl_Res_fs_unique_genes,upset_genes_df[,i][!is.na( upset_genes_df[,i])])
}
# Subset unique genes DF
DEP_Bl_Res_fs_unique_genes_df <- upset_genes_df[,colnames(upset_genes_df)[grep("NA_NA_NA_NA_NA",colnames(upset_genes_df))]]
colnames(DEP_Bl_Res_fs_unique_genes_df) <- gsub("_NA_NA_NA_NA_NA","",colnames(DEP_Bl_Res_fs_unique_genes_df))
# Convert to list
DEP_Bl_Res_fs_unique_genes_ls <- list()
for (i in seq_along(colnames(DEP_Bl_Res_fs_unique_genes_df))){
DEP_Bl_Res_fs_unique_genes_ls[[colnames(DEP_Bl_Res_fs_unique_genes_df)[i]]] <-
c(DEP_Bl_Res_fs_unique_genes_df[,i][!is.na(DEP_Bl_Res_fs_unique_genes_df[,i])])
}
# Extract overlapping genes column names
colnames(upset_genes_df)[(grep("NA_NA_NA_NA_NA",colnames(upset_genes_df), invert = T))]
## [1] "E4_fs_E5_fs_G1_fs_G5_fs_G8_fs_G12_fs"
## [2] "E4_fs_E5_fs_G1_fs_G5_fs_G8_fs_NA"
## [3] "E4_fs_E5_fs_G5_fs_G8_fs_G12_fs_NA"
## [4] "E4_fs_G1_fs_G5_fs_G8_fs_G12_fs_NA"
## [5] "E5_fs_G1_fs_G5_fs_G8_fs_G12_fs_NA"
## [6] "E4_fs_E5_fs_G5_fs_G12_fs_NA_NA"
## [7] "E4_fs_G1_fs_G5_fs_G12_fs_NA_NA"
## [8] "E4_fs_G5_fs_G8_fs_G12_fs_NA_NA"
## [9] "E5_fs_G1_fs_G5_fs_G8_fs_NA_NA"
## [10] "E5_fs_G1_fs_G5_fs_G12_fs_NA_NA"
## [11] "G1_fs_G5_fs_G8_fs_G12_fs_NA_NA"
## [12] "E4_fs_E5_fs_G1_fs_NA_NA_NA"
## [13] "E5_fs_G1_fs_G5_fs_NA_NA_NA"
## [14] "E5_fs_G1_fs_G12_fs_NA_NA_NA"
## [15] "E5_fs_G5_fs_G8_fs_NA_NA_NA"
## [16] "E5_fs_G5_fs_G12_fs_NA_NA_NA"
## [17] "E5_fs_G1_fs_NA_NA_NA_NA"
## [18] "E5_fs_G5_fs_NA_NA_NA_NA"
## [19] "E5_fs_G12_fs_NA_NA_NA_NA"
## [20] "G1_fs_G5_fs_NA_NA_NA_NA"
## [21] "G1_fs_G8_fs_NA_NA_NA_NA"
## [22] "G1_fs_G12_fs_NA_NA_NA_NA"
## [23] "G5_fs_G8_fs_NA_NA_NA_NA"
## [24] "G5_fs_G12_fs_NA_NA_NA_NA"
## [25] "G8_fs_G12_fs_NA_NA_NA_NA"
# Extract overlapping genes
DEP_Bl_Res_fs_ol_genes <- c()
for (i in (colnames(upset_genes_df)[(grep("NA_NA_NA_NA_NA",colnames(upset_genes_df), invert = T))])){
DEP_Bl_Res_fs_ol_genes <-
c(DEP_Bl_Res_fs_ol_genes,upset_genes_df[,i][!is.na( upset_genes_df[,i] )])
}
Import Limma Res ss
DEP_Bl_Res_ss_limma_res_padj <- read.csv(paste0(Data,"/Output/MIB-BL/DEP_Bl_Res_ss/Limma_res/","All_coi_padj.csv"), row.names = 1)
DEP_Bl_Res_ss_limma_res_lfc <- read.csv(paste0(Data,"/Output/MIB-BL/DEP_Bl_Res_ss/Limma_res/","All_coi_l2fc.csv"), row.names = 1)
# Extract significant genes
E4_sig_genes <- row.names(subset(DEP_Bl_Res_ss_limma_res_padj, DEP_Bl_Res_ss_limma_res_padj$adj.P.Val_E4.starve.CEv3.starve<0.05))
E5_sig_genes <- row.names(subset(DEP_Bl_Res_ss_limma_res_padj, DEP_Bl_Res_ss_limma_res_padj$adj.P.Val_E5.starve.CEv3.starve<0.05))
G1_sig_genes <- row.names(subset(DEP_Bl_Res_ss_limma_res_padj, DEP_Bl_Res_ss_limma_res_padj$adj.P.Val_G1.starve.CEv3.starve<0.05))
G5_sig_genes <- row.names(subset(DEP_Bl_Res_ss_limma_res_padj, DEP_Bl_Res_ss_limma_res_padj$adj.P.Val_G5.starve.CEv3.starve<0.05))
G8_sig_genes <- row.names(subset(DEP_Bl_Res_ss_limma_res_padj, DEP_Bl_Res_ss_limma_res_padj$adj.P.Val_G8.starve.CEv3.starve<0.05))
G12_sig_genes <- row.names(subset(DEP_Bl_Res_ss_limma_res_padj, DEP_Bl_Res_ss_limma_res_padj$adj.P.Val_G12.starve.CEv3.starve<0.05))
# Convert to list
DEP_Bl_Res_ss_sigGenes_ls <- list(
E4_ss=E4_sig_genes,
E5_ss=E5_sig_genes,
G1_ss=G1_sig_genes,
G5_ss=G5_sig_genes,
G8_ss=G8_sig_genes,
G12_ss=G12_sig_genes
)
# Remove hEGFR
DEP_Bl_Res_ss_sigGenes_ls <- lapply(DEP_Bl_Res_ss_sigGenes_ls, function(x) subset(x,!x=="hEGFR"))
# Directories
output_dir_upset <- paste0(figures_dir)
graph_dir_upset <- paste0(figures_dir)
# Convert list to binary matrix
Upset_matrix <- ComplexHeatmap::list_to_matrix(DEP_Bl_Res_ss_sigGenes_ls)
# Save Upset_matrix
write.csv(Upset_matrix, file = paste0(output_dir_upset, "baseline_ss_upset_matrix",".csv"))
# convert list to combination matrix
combo_matrix <- ComplexHeatmap::make_comb_mat(Upset_matrix)
# Color by intersection
intersection_degree <- comb_degree(combo_matrix)
comb_col <- ifelse(intersection_degree > 1, "blue", "red")
# Generate upset plot
upset_plot <- ComplexHeatmap::UpSet(combo_matrix, comb_col = comb_col,
row_names_gp =gpar(fontsize = 12) ,
top_annotation = upset_top_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
annotation_name_rot = 90,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
right_annotation = upset_right_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
pt_size = unit(8, "pt"))
upset_plot
# Save upset_plot
pdf(file=paste0(graph_dir_upset, "baseline_ss_upset" , ".pdf"))
draw(upset_plot)
dev.off()
## png
## 2
# Combination matrix
combo_matrix
## A combination matrix with 6 sets and 26 combinations.
## ranges of combination set size: c(1, 18).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## E4_ss E5_ss G1_ss G5_ss G8_ss G12_ss code size
## x x x x x x 111111 18
## x 000001 14
## x x x x 001111 7
## x x 001100 4
## x x x x x 011111 3
## x x 001001 3
## x 000100 3
## x x x x x 101111 2
##
## Sets are:
## set size
## E4_ss 28
## E5_ss 35
## G1_ss 46
## G5_ss 47
## G8_ss 40
## G12_ss 58
comb_name(combo_matrix)
## [1] "111111" "111110" "111101" "111011" "110111" "101111" "011111" "100111"
## [9] "011110" "010111" "001111" "110001" "011001" "010110" "001101" "110000"
## [17] "010100" "001100" "001010" "001001" "000011" "010000" "001000" "000100"
## [25] "000010" "000001"
# Extract all from upset plot
upset_genes_ls <- list()
for(i in seq_along(comb_name(combo_matrix))){
upset_genes_ls[[paste0((set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[1],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[2],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[3],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[4],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[5],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[6])]] <-
extract_comb(combo_matrix,comb_name(combo_matrix)[i])
}
# Convert to DF
upset_genes_df <- do.call(cbind, lapply(upset_genes_ls, function(x) c(x, rep(NA, max(sapply(upset_genes_ls, length)) - length(x)))))
# Save genes
write.csv(upset_genes_df, file = paste0(output_dir_upset,"baseline_ss_upset_genes_df",".csv"))
# Extract unique genes
grep("NA_NA_NA_NA_NA",colnames(upset_genes_df))
## [1] 22 23 24 25 26
DEP_Bl_Res_ss_unique_genes <- c()
for(i in (grep("NA_NA_NA_NA_NA",colnames(upset_genes_df)))){
DEP_Bl_Res_ss_unique_genes<- c(DEP_Bl_Res_ss_unique_genes,upset_genes_df[,i][!is.na( upset_genes_df[,i])])
}
# Subset unique genes DF
DEP_Bl_Res_ss_unique_genes_df <- upset_genes_df[,colnames(upset_genes_df)[grep("NA_NA_NA_NA_NA",colnames(upset_genes_df))]]
colnames(DEP_Bl_Res_ss_unique_genes_df) <- gsub("_NA_NA_NA_NA_NA","",colnames(DEP_Bl_Res_ss_unique_genes_df))
# Convert to list
DEP_Bl_Res_ss_unique_genes_ls <- list()
for (i in seq_along(colnames(DEP_Bl_Res_ss_unique_genes_df))){
DEP_Bl_Res_ss_unique_genes_ls[[colnames(DEP_Bl_Res_ss_unique_genes_df)[i]]] <-
c(DEP_Bl_Res_ss_unique_genes_df[,i][!is.na(DEP_Bl_Res_ss_unique_genes_df[,i])])
}
# Extract overlapping genes column names
colnames(upset_genes_df)[(grep("NA_NA_NA_NA_NA",colnames(upset_genes_df), invert = T))]
## [1] "E4_ss_E5_ss_G1_ss_G5_ss_G8_ss_G12_ss"
## [2] "E4_ss_E5_ss_G1_ss_G5_ss_G8_ss_NA"
## [3] "E4_ss_E5_ss_G1_ss_G5_ss_G12_ss_NA"
## [4] "E4_ss_E5_ss_G1_ss_G8_ss_G12_ss_NA"
## [5] "E4_ss_E5_ss_G5_ss_G8_ss_G12_ss_NA"
## [6] "E4_ss_G1_ss_G5_ss_G8_ss_G12_ss_NA"
## [7] "E5_ss_G1_ss_G5_ss_G8_ss_G12_ss_NA"
## [8] "E4_ss_G5_ss_G8_ss_G12_ss_NA_NA"
## [9] "E5_ss_G1_ss_G5_ss_G8_ss_NA_NA"
## [10] "E5_ss_G5_ss_G8_ss_G12_ss_NA_NA"
## [11] "G1_ss_G5_ss_G8_ss_G12_ss_NA_NA"
## [12] "E4_ss_E5_ss_G12_ss_NA_NA_NA"
## [13] "E5_ss_G1_ss_G12_ss_NA_NA_NA"
## [14] "E5_ss_G5_ss_G8_ss_NA_NA_NA"
## [15] "G1_ss_G5_ss_G12_ss_NA_NA_NA"
## [16] "E4_ss_E5_ss_NA_NA_NA_NA"
## [17] "E5_ss_G5_ss_NA_NA_NA_NA"
## [18] "G1_ss_G5_ss_NA_NA_NA_NA"
## [19] "G1_ss_G8_ss_NA_NA_NA_NA"
## [20] "G1_ss_G12_ss_NA_NA_NA_NA"
## [21] "G8_ss_G12_ss_NA_NA_NA_NA"
# Extract overlapping genes
DEP_Bl_Res_ss_ol_genes <- c()
for (i in (colnames(upset_genes_df)[(grep("NA_NA_NA_NA_NA",colnames(upset_genes_df), invert = T))])){
DEP_Bl_Res_ss_ol_genes <-
c(DEP_Bl_Res_ss_ol_genes,upset_genes_df[,i][!is.na( upset_genes_df[,i] )])
}
# fs and ss kinases that are not shared among any clone will be combined to generate the clone specific signature. Unique kinases identified in fs serum condition but are not unique in ss serum condition will be removed and vice versa. We have identified these genes as "intersect_genes"
# Lise to compare
# DEP_Bl_Res_fs_unique_genes_ls
# DEP_Bl_Res_ss_unique_genes_ls
# DEP_Bl_Res_fs_ol_genes
# DEP_Bl_Res_ss_ol_genes
fs_unique <- c()
for (i in seq_along(DEP_Bl_Res_fs_unique_genes_ls)){
genes <- c(DEP_Bl_Res_fs_unique_genes_ls[[i]])
fs_unique <- c(fs_unique,genes)
}
fs_ol <- c()
for (i in seq_along(DEP_Bl_Res_fs_ol_genes)){
genes <- c(DEP_Bl_Res_fs_ol_genes[[i]])
fs_ol <- c(fs_ol,genes)
}
ss_unique <- c()
for (i in seq_along(DEP_Bl_Res_ss_unique_genes_ls)){
genes <- c(DEP_Bl_Res_ss_unique_genes_ls[[i]])
ss_unique <- c(ss_unique,genes)
}
ss_ol <- c()
for (i in seq_along(DEP_Bl_Res_ss_ol_genes)){
genes <- c(DEP_Bl_Res_ss_ol_genes[[i]])
ss_ol <- c(ss_ol,genes)
}
# Genes that intersect
intersect(fs_unique,ss_ol)
## [1] "Fgfr1" "Tnik" "Aurka" "Cdk5" "Egfr" "Map2k4" "Mapk15"
## [8] "Melk" "Pikfyve" "Plk4" "Ttk" "Clk4" "Csk" "Map4k5"
## [15] "Peak1"
intersect(fs_ol,ss_unique)
## [1] "Stk10" "Prkaa2" "Camk1d" "Acvr1b" "Cdk14" "Map4k4" "Camk2a" "Camk2d"
intersect_genes <- c(intersect(fs_unique,ss_ol),intersect(fs_ol,ss_unique) )
intersect_genes
## [1] "Fgfr1" "Tnik" "Aurka" "Cdk5" "Egfr" "Map2k4" "Mapk15"
## [8] "Melk" "Pikfyve" "Plk4" "Ttk" "Clk4" "Csk" "Map4k5"
## [15] "Peak1" "Stk10" "Prkaa2" "Camk1d" "Acvr1b" "Cdk14" "Map4k4"
## [22] "Camk2a" "Camk2d"
# These are genes that change in only 1 resistant line
# DEP_Bl_Res_fs_unique_genes_ls
# DEP_Bl_Res_ss_unique_genes_ls
# Filter out intersecting genes
filt_DEP_Bl_Res_fs_unique_genes_ls <- lapply(DEP_Bl_Res_fs_unique_genes_ls, function(x) x[!(x %in% intersect_genes)])
filt_DEP_Bl_Res_ss_unique_genes_ls <- lapply(DEP_Bl_Res_ss_unique_genes_ls, function(x) x[!(x %in% intersect_genes)])
# Combine fs and ss genes
DEP_Bl_Res_unique_genes_ls <- c(filt_DEP_Bl_Res_fs_unique_genes_ls,filt_DEP_Bl_Res_ss_unique_genes_ls)
# Directories
output_dir_upset <- paste0(figures_dir)
graph_dir_upset <- paste0(figures_dir)
# Convert list to binary matrix
Upset_matrix <- ComplexHeatmap::list_to_matrix(DEP_Bl_Res_unique_genes_ls)
# Save Upset_matrix
write.csv(Upset_matrix, file = paste0(output_dir_upset, "Baseline_Clone_Kinase_Sig_Upset_matrix",".csv"))
# convert list to combination matrix
combo_matrix <- ComplexHeatmap::make_comb_mat(Upset_matrix)
# Generate upset plot
upset_plot <- ComplexHeatmap::UpSet(combo_matrix, comb_col = "red" ,
row_names_gp =gpar(fontsize = 12) ,
top_annotation = upset_top_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
annotation_name_rot = 90,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
right_annotation = upset_right_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
pt_size = unit(8, "pt"))
upset_plot
# Save upset_plot
pdf(file=paste0(graph_dir_upset, "Baseline_Clone_Kinase_Sig_Upset" , ".pdf"))
draw(upset_plot)
dev.off()
## png
## 2
# Combination matrix
combo_matrix
## A combination matrix with 9 sets and 11 combinations.
## ranges of combination set size: c(1, 23).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## E5_fs G5_fs G8_fs G12_fs E5_ss G1_ss G5_ss G8_ss G12_ss code size
## x 010000000 23
## x 001000000 6
## x 100000000 4
## x 000000001 4
## x x 010000100 2
## x x 000100001 2
## x x 010000001 1
## x x 001010000 1
##
## Sets are:
## set size
## E5_fs 4
## G5_fs 26
## G8_fs 7
## G12_fs 2
## E5_ss 1
## G1_ss 1
## G5_ss 3
## G8_ss 1
## G12_ss 7
comb_name(combo_matrix)
## [1] "010000100" "010000001" "001010000" "000100001" "100000000" "010000000"
## [7] "001000000" "000001000" "000000100" "000000010" "000000001"
# Extract all from upset plot
upset_genes_ls <- list()
for(i in seq_along(comb_name(combo_matrix))){
upset_genes_ls[[paste0((set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[1],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[2],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[3],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[4],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[5],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[6],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[7],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[8],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[9],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[10],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[11])]] <-
extract_comb(combo_matrix,comb_name(combo_matrix)[i])
}
# Convert to DF
upset_genes_df <- do.call(cbind, lapply(upset_genes_ls, function(x) c(x, rep(NA, max(sapply(upset_genes_ls, length)) - length(x)))))
# Save genes
write.csv(upset_genes_df, file = paste0(output_dir_upset,"Baseline_Clone_Kinase_Sig_genes_df",".csv"))
# Extract unique genes
grep("_NA_NA_NA_NA_NA_NA_NA_NA_NA_NA",colnames(upset_genes_df))
## [1] 5 6 7 8 9 10 11
DEP_Bl_Res_unique_genes_ol <- c()
for(i in (grep("_NA_NA_NA_NA_NA_NA_NA_NA_NA_NA",colnames(upset_genes_df)))){
DEP_Bl_Res_unique_genes_ol<- c(DEP_Bl_Res_unique_genes_ol,upset_genes_df[,i][!is.na( upset_genes_df[,i])])
}
# Subset unique genes DF
DEP_Bl_Res_unique_genes_ol_df <- upset_genes_df[,colnames(upset_genes_df)[grep("_NA_NA_NA_NA_NA_NA_NA_NA_NA_NA",colnames(upset_genes_df))]]
colnames(DEP_Bl_Res_unique_genes_ol_df) <- gsub("_NA_NA_NA_NA_NA_NA_NA_NA_NA_NA","",colnames(DEP_Bl_Res_unique_genes_ol_df))
# Convert to list
DEP_Bl_Res_unique_genes_ol_ls <- list()
for (i in seq_along(colnames(DEP_Bl_Res_unique_genes_ol_df))){
DEP_Bl_Res_unique_genes_ol_ls[[colnames(DEP_Bl_Res_unique_genes_ol_df)[i]]] <-
c(DEP_Bl_Res_unique_genes_ol_df[,i][!is.na(DEP_Bl_Res_unique_genes_ol_df[,i])])
}
# Extract overlapping genes column names
colnames(upset_genes_df)[(grep("_NA_NA_NA_NA_NA_NA_NA_NA_NA_NA",colnames(upset_genes_df), invert = T))]
## [1] "G5_fs_G5_ss_NA_NA_NA_NA_NA_NA_NA_NA_NA"
## [2] "G5_fs_G12_ss_NA_NA_NA_NA_NA_NA_NA_NA_NA"
## [3] "G8_fs_E5_ss_NA_NA_NA_NA_NA_NA_NA_NA_NA"
## [4] "G12_fs_G12_ss_NA_NA_NA_NA_NA_NA_NA_NA_NA"
# Extract overlapping genes
DEP_Bl_Res_unique_genes_ol_ol_genes <- c()
for (i in (colnames(upset_genes_df)[(grep("_NA_NA_NA_NA_NA_NA_NA_NA_NA_NA",colnames(upset_genes_df), invert = T))])){
DEP_Bl_Res_unique_genes_ol_ol_genes <-
c(DEP_Bl_Res_unique_genes_ol_ol_genes,upset_genes_df[,i][!is.na( upset_genes_df[,i] )])
}
DEP_Bl_Res_unique_genes_ol_ls
## $E5_fs
## [1] "Gsk3b" "Mapk10" "Phkg2" "Pik3r4"
##
## $G5_fs
## [1] "Axl" "Bub1b" "Cdk13" "Chek2" "Chka" "Clk3" "Dck"
## [8] "Fn3krp" "Ikbke" "Jak1" "Lrrk2" "Lyn" "Mark3" "Nek3"
## [15] "Nek9" "Prkaa1" "Prkacb" "Prkce" "Rps6kc1" "Stk38" "Stk38l"
## [22] "Tesk1" "Tnk2"
##
## $G8_fs
## [1] "Bmpr1a" "Csnk2a1" "Mapk8" "Mark2" "Prkd3" "Ulk1"
##
## $G1_ss
## [1] "Chkb"
##
## $G5_ss
## [1] "Brsk2"
##
## $G8_ss
## [1] "Nrbp1"
##
## $G12_ss
## [1] "Braf" "Cdk1" "Cdk4" "Jak3"
names(DEP_Bl_Res_unique_genes_ol_ls)
## [1] "E5_fs" "G5_fs" "G8_fs" "G1_ss" "G5_ss" "G8_ss" "G12_ss"
E5_BL_sig <- DEP_Bl_Res_unique_genes_ol_ls[["E5_fs"]]
G1_BL_sig <- DEP_Bl_Res_unique_genes_ol_ls[["G1_fs"]]
G5_BL_sig <- DEP_Bl_Res_unique_genes_ol_ls[["G5_fs"]]
G8_BL_sig <- DEP_Bl_Res_unique_genes_ol_ls[["G8_fs"]]
G12_BL_sig <- c(DEP_Bl_Res_unique_genes_ol_ls[["G12_fs"]],DEP_Bl_Res_unique_genes_ol_ls[["G12_ss"]])
BL_sig_ls <- list(E5_BL_sig=E5_BL_sig,
G1_BL_sig=G1_BL_sig,
G5_BL_sig=G5_BL_sig,
G8_BL_sig=G8_BL_sig,
G12_BL_sig=G12_BL_sig)
BL_sig_df <- do.call(cbind, lapply(BL_sig_ls, function(x) c(x, rep(NA, max(sapply(BL_sig_ls, length)) - length(x)))))
write.csv(BL_sig_df, file = paste0(output_dir_upset,"Clone_baseline_sig",".csv"), na="")
DEP_Bl_Res_unique_genes_ol_ls
## $E5_fs
## [1] "Gsk3b" "Mapk10" "Phkg2" "Pik3r4"
##
## $G5_fs
## [1] "Axl" "Bub1b" "Cdk13" "Chek2" "Chka" "Clk3" "Dck"
## [8] "Fn3krp" "Ikbke" "Jak1" "Lrrk2" "Lyn" "Mark3" "Nek3"
## [15] "Nek9" "Prkaa1" "Prkacb" "Prkce" "Rps6kc1" "Stk38" "Stk38l"
## [22] "Tesk1" "Tnk2"
##
## $G8_fs
## [1] "Bmpr1a" "Csnk2a1" "Mapk8" "Mark2" "Prkd3" "Ulk1"
##
## $G1_ss
## [1] "Chkb"
##
## $G5_ss
## [1] "Brsk2"
##
## $G8_ss
## [1] "Nrbp1"
##
## $G12_ss
## [1] "Braf" "Cdk1" "Cdk4" "Jak3"
names(DEP_Bl_Res_unique_genes_ol_ls)
## [1] "E5_fs" "G5_fs" "G8_fs" "G1_ss" "G5_ss" "G8_ss" "G12_ss"
E5_BL_sig <- c(DEP_Bl_Res_unique_genes_ol_ls[["E5_fs"]],DEP_Bl_Res_unique_genes_ol_ls[["E5_ss"]])
G1_BL_sig <- c(DEP_Bl_Res_unique_genes_ol_ls[["G1_fs"]],DEP_Bl_Res_unique_genes_ol_ls[["G1_ss"]])
G5_BL_sig <- c(DEP_Bl_Res_unique_genes_ol_ls[["G5_fs"]],DEP_Bl_Res_unique_genes_ol_ls[["G5_ss"]])
G8_BL_sig <- c(DEP_Bl_Res_unique_genes_ol_ls[["G8_fs"]],DEP_Bl_Res_unique_genes_ol_ls[["G8_ss"]])
G12_BL_sig <- c(DEP_Bl_Res_unique_genes_ol_ls[["G12_fs"]],DEP_Bl_Res_unique_genes_ol_ls[["G12_ss"]])
# Genes that overlap and are not influenced by serum
OL_BL_sig <- intersect(serum_unique_genes_ls[["fs"]],serum_unique_genes_ls[["ss"]])
BL_sig_ls <- list(E5_BL_sig=E5_BL_sig,
G1_BL_sig=G1_BL_sig,
G5_BL_sig=G5_BL_sig,
G8_BL_sig=G8_BL_sig,
G12_BL_sig=G12_BL_sig,
OL_BL_sig=OL_BL_sig)
BL_sig_df <- do.call(cbind, lapply(BL_sig_ls, function(x) c(x, rep(NA, max(sapply(BL_sig_ls, length)) - length(x)))))
# Save CSV
write.csv(BL_sig_df, file = paste0(output_dir_upset,"All_baseline_kinase_sig",".csv"), na="")
# Sanity check. There should be no differences between the baseline signatures generated in this Figures script compared to the EDA script.
# Import figures sigs
BL_sig_df_figures <- read.csv(paste0(output_dir_upset,"All_baseline_kinase_sig.csv"), header =TRUE, fileEncoding = "UTF-8-BOM", row.names = 1, na.strings = NA)
# Import EDA sigs
BL_sig_df_EDA <- read.csv("/Data/Output/MIB_BL_sig/ss_fs_ol/BL_sig.csv", header =TRUE, fileEncoding = "UTF-8-BOM", row.names = 1, na.strings = NA)
# Compare results
waldo::compare(BL_sig_df_figures,BL_sig_df_EDA)
## ✔ No differences
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17011034 908.5 29379045 1569.1 29379045 1569.1
## Vcells 30189946 230.4 80630088 615.2 63941846 487.9
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("Baseline_signatures")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
# BL_sig
BL_sigs_df <- read.csv(paste0(Data,"Figures/","Baseline_kinase_upset/","All_baseline_kinase_sig.csv"), row.names = 1, na.strings = c("","NA"))
# Pivot
df_long <- BL_sigs_df %>%
pivot_longer(cols = everything(), values_to = "gene")
# Remove NA
BL_sig <- subset(df_long,!df_long$gene=="")
# # Remove OL sig
BL_sig <- subset(BL_sig,BL_sig$name!="OL_BL_sig")
# Remove hEGFR
BL_sig <- subset(BL_sig,!BL_sig$gene=="hEGFR")
# Remove text
BL_sig$name <- gsub("_BL_sig","",BL_sig$name)
# Import Protein vsd
DEP_Bl_Res_fs <- readRDS(tail(grep("DEP_Bl_Res_fs.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs"), recursive = T, full.names = T), value = T),1))
DEP_Bl_Res_ss <- readRDS(tail(grep("DEP_Bl_Res_ss.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# # Import mRNA vsd
# DEP_Bl_Res_fs <- readRDS(tail(grep("bl_Res_fs_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_fs"), recursive = T, full.names = T), value = T),1))
# DEP_Bl_Res_ss <- readRDS(tail(grep("bl_Res_ss_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_ss"), recursive = T, full.names = T), value = T),1))
#
# Combined full and starved Protein
DEP_Bl_Res_all <- cbind(DEP_Bl_Res_fs,DEP_Bl_Res_ss)
# Already log2 transformed
vsd <- DEP_Bl_Res_fs
# Subset dds to contain genes in sig
vsd_sig <-vsd[row.names(vsd) %in% BL_sig$gene,]
# Divide by mean of CEv3
CEv3_samples <- row.names(subset(colData(vsd_sig),colData(vsd_sig)$cell_line_abbr=="CEv3"))
# Get mean of CEv3
vsd_CEv3 <- vsd_sig[,colnames(vsd_sig) %in% CEv3_samples]
CEv3_RowMean <- data.frame(rowMeans(assay(vsd_CEv3)[,colnames(vsd_CEv3) %in% CEv3_samples]))
# Check
row.names(assay(vsd_sig))==row.names(CEv3_RowMean)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Calculate Normalized values
vsd_sig_norm <- vsd_sig
assay(vsd_sig_norm) <- sweep(assay(vsd_sig),1,CEv3_RowMean[,1])
# Remove CEv3 samples
vsd_sig_norm <- vsd_sig_norm[,!(colnames(vsd_sig_norm) %in% CEv3_samples)]
# Heatmap_GOI_df_ls
heatmap_goi <- data.frame(BL_sig)
heatmap_goi$name <- as.factor(heatmap_goi$name)
row.names(heatmap_goi) <- heatmap_goi$gene
# Extract matrix values
mat_list <- as.matrix(assay(vsd_sig_norm))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Model = factor(vsd_sig_norm@colData$cell_line_abbr,
# Set legend order
levels= c("E4","E5","G1","G5","G8","G12")),
Serum = vsd_sig_norm@colData$serum),
col = list(Model =
c(
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = vsd_sig_norm@colData$cell_line_abbr,
column_title = "Protein FS",
row_split = heatmap_goi[row.names(mat_list),]["name"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
row_title_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, "Protein_fs_sig", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
# Import Protein vsd
DEP_Bl_Res_fs <- readRDS(tail(grep("DEP_Bl_Res_fs.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs"), recursive = T, full.names = T), value = T),1))
DEP_Bl_Res_ss <- readRDS(tail(grep("DEP_Bl_Res_ss.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# DEP_Bl_Res_fs <- readRDS(tail(grep("bl_Res_fs_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_fs"), recursive = T, full.names = T), value = T),1))
# DEP_Bl_Res_ss <- readRDS(tail(grep("bl_Res_ss_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# Combined full and starved Protein
DEP_Bl_Res_all <- cbind(DEP_Bl_Res_fs,DEP_Bl_Res_ss)
# Already log2 transformed
vsd <- DEP_Bl_Res_ss
# Subset dds to contain genes in sig
vsd_sig <-vsd[row.names(vsd) %in% BL_sig$gene,]
# Divide by mean of CEv3
CEv3_samples <- row.names(subset(colData(vsd_sig),colData(vsd_sig)$cell_line_abbr=="CEv3"))
# Get mean of CEv3
vsd_CEv3 <- vsd_sig[,colnames(vsd_sig) %in% CEv3_samples]
CEv3_RowMean <- data.frame(rowMeans(assay(vsd_CEv3)[,colnames(vsd_CEv3) %in% CEv3_samples]))
# Check
row.names(assay(vsd_sig))==row.names(CEv3_RowMean)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Calculate Normalized values
vsd_sig_norm <- vsd_sig
assay(vsd_sig_norm) <- sweep(assay(vsd_sig),1,CEv3_RowMean[,1])
# Remove CEv3 samples
vsd_sig_norm <- vsd_sig_norm[,!(colnames(vsd_sig_norm) %in% CEv3_samples)]
# Heatmap_GOI_df_ls
heatmap_goi <- data.frame(BL_sig)
heatmap_goi$name <- as.factor(heatmap_goi$name)
row.names(heatmap_goi) <- heatmap_goi$gene
# Extract matrix values
mat_list <- as.matrix(assay(vsd_sig_norm))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Model = factor(vsd_sig_norm@colData$cell_line_abbr,
# Set legend order
levels= c("E4","E5","G1","G5","G8","G12")),
Serum = vsd_sig_norm@colData$serum),
col = list(Model =
c(
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = vsd_sig_norm@colData$cell_line_abbr,
column_title = "Protein SS",
row_split = heatmap_goi[row.names(mat_list),]["name"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
row_title_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, "Protein_ss_sig", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
# BL_sig
BL_sigs_df <- read.csv(paste0(Data,"Figures/","Baseline_kinase_upset/","All_baseline_kinase_sig.csv"), row.names = 1, na.strings = c("","NA"))
# Pivot
df_long <- BL_sigs_df %>%
pivot_longer(cols = everything(), values_to = "gene")
# Remove NA
BL_sig <- subset(df_long,!df_long$gene=="")
# Remove unique sig
BL_sig <- subset(BL_sig,BL_sig$name=="OL_BL_sig")
# Remove hEGFR
BL_sig <- subset(BL_sig,!BL_sig$gene=="hEGFR")
# Remove text
BL_sig$name <- gsub("_BL_sig","",BL_sig$name)
# Import Protein vsd
DEP_Bl_Res_fs <- readRDS(tail(grep("DEP_Bl_Res_fs.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs"), recursive = T, full.names = T), value = T),1))
DEP_Bl_Res_ss <- readRDS(tail(grep("DEP_Bl_Res_ss.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# # # Import mRNA vsd
# # DEP_Bl_Res_fs <- readRDS(tail(grep("bl_Res_fs_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_fs"), recursive = T, full.names = T), value = T),1))
# # DEP_Bl_Res_ss <- readRDS(tail(grep("bl_Res_ss_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# #
#
# # Combined full and starved Protein
# DEP_Bl_Res_all <- cbind(DEP_Bl_Res_fs,DEP_Bl_Res_ss)
# Already log2 transformed
vsd <- DEP_Bl_Res_fs
# Subset dds to contain genes in sig
vsd_sig <-vsd[row.names(vsd) %in% BL_sig$gene,]
# Divide by mean of CEv3
CEv3_samples <- row.names(subset(colData(vsd_sig),colData(vsd_sig)$cell_line_abbr=="CEv3"))
# Get mean of CEv3
vsd_CEv3 <- vsd_sig[,colnames(vsd_sig) %in% CEv3_samples]
CEv3_RowMean <- data.frame(rowMeans(assay(vsd_CEv3)[,colnames(vsd_CEv3) %in% CEv3_samples]))
# Check
row.names(assay(vsd_sig))==row.names(CEv3_RowMean)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Calculate Normalized values
vsd_sig_norm <- vsd_sig
assay(vsd_sig_norm) <- sweep(assay(vsd_sig),1,CEv3_RowMean[,1])
# Clip values
assay(vsd_sig_norm) <- pmax(pmin(assay(vsd_sig_norm),4),-4)
# Remove CEv3 samples
vsd_sig_norm <- vsd_sig_norm[,!(colnames(vsd_sig_norm) %in% CEv3_samples)]
# Heatmap_GOI_df_ls
heatmap_goi <- data.frame(BL_sig)
heatmap_goi$name <- as.factor(heatmap_goi$name)
row.names(heatmap_goi) <- heatmap_goi$gene
# Extract matrix values
mat_list <- as.matrix(assay(vsd_sig_norm))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Model = factor(vsd_sig_norm@colData$cell_line_abbr,
# Set legend order
levels= c("E4","E5","G1","G5","G8","G12")),
Serum = vsd_sig_norm@colData$serum),
col = list(Model =
c(
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
mat_list,
top_annotation = annotation_col,
name = " ",
column_split = vsd_sig_norm@colData$cell_line_abbr,
column_title = "Protein FS",
row_split = heatmap_goi[row.names(mat_list),]["name"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
row_title_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, "OL_Protein_fs_sig", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
# Import Protein vsd
DEP_Bl_Res_fs <- readRDS(tail(grep("DEP_Bl_Res_fs.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs"), recursive = T, full.names = T), value = T),1))
DEP_Bl_Res_ss <- readRDS(tail(grep("DEP_Bl_Res_ss.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# DEP_Bl_Res_fs <- readRDS(tail(grep("bl_Res_fs_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_fs"), recursive = T, full.names = T), value = T),1))
# DEP_Bl_Res_ss <- readRDS(tail(grep("bl_Res_ss_mv_.rds",list.files(paste0(Data,"Output/MIB-BL/DEP_EDA_subset/bl_Res_ss"), recursive = T, full.names = T), value = T),1))
# Combined full and starved Protein
DEP_Bl_Res_all <- cbind(DEP_Bl_Res_fs,DEP_Bl_Res_ss)
# Already log2 transformed
vsd <- DEP_Bl_Res_ss
# Subset dds to contain genes in sig
vsd_sig <-vsd[row.names(vsd) %in% BL_sig$gene,]
# Divide by mean of CEv3
CEv3_samples <- row.names(subset(colData(vsd_sig),colData(vsd_sig)$cell_line_abbr=="CEv3"))
# Get mean of CEv3
vsd_CEv3 <- vsd_sig[,colnames(vsd_sig) %in% CEv3_samples]
CEv3_RowMean <- data.frame(rowMeans(assay(vsd_CEv3)[,colnames(vsd_CEv3) %in% CEv3_samples]))
# Check
row.names(assay(vsd_sig))==row.names(CEv3_RowMean)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Calculate Normalized values
vsd_sig_norm <- vsd_sig
assay(vsd_sig_norm) <- sweep(assay(vsd_sig),1,CEv3_RowMean[,1])
# Clip values
assay(vsd_sig_norm) <- pmax(pmin(assay(vsd_sig_norm),4),-4)
# Remove CEv3 samples
vsd_sig_norm <- vsd_sig_norm[,!(colnames(vsd_sig_norm) %in% CEv3_samples)]
# Heatmap_GOI_df_ls
heatmap_goi <- data.frame(BL_sig)
heatmap_goi$name <- as.factor(heatmap_goi$name)
row.names(heatmap_goi) <- heatmap_goi$gene
# Extract matrix values
mat_list <- as.matrix(assay(vsd_sig_norm))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Model = factor(vsd_sig_norm@colData$cell_line_abbr,
# Set legend order
levels= c("E4","E5","G1","G5","G8","G12")),
Serum = vsd_sig_norm@colData$serum),
col = list(Model =
c(
'E4' = 'deepskyblue4',
'E5' = 'turquoise3',
'G1' = 'purple4',
'G5' = 'mediumorchid',
'G8' = 'deeppink',
'G12' = 'pink2'
),
Serum =
c("full"='firebrick',
"starve"='royalblue'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
mat_list,
top_annotation = annotation_col,
name = " ",
column_split = vsd_sig_norm@colData$cell_line_abbr,
column_title = "Protein SS",
row_split = heatmap_goi[row.names(mat_list),]["name"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
row_title_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, "OL_Protein_ss_sig", ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
EGFR TKI resistance signature (in vitro models)
# BL_sig
BL_sigs_df <- read.csv(paste0(Data,"Figures/","Baseline_kinase_upset/","All_baseline_kinase_sig.csv"), row.names = 1, na.strings = c("","NA"))
# Pivot
df_long <- BL_sigs_df %>%
pivot_longer(cols = everything(), values_to = "gene")
# Remove NA
BL_sig <- subset(df_long,!df_long$gene=="")
# Remove hEGFR
BL_sig <- subset(BL_sig,!BL_sig$gene=="hEGFR")
# Remove text
BL_sig$name <- gsub("_BL_sig","",BL_sig$name)
fig.label <- paste0("Baseline_sig_corr")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
# limma results direct
output_dir_res <- paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs/","Limma_res/")
# Summarize of limma comparisions
comps_limma <- read.csv(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_fs/","comps_all.csv"), row.names = 1)
# import limma res
subset_limmaRes <- list()
for (i in seq_along(row.names(comps_limma))){
subset_limmaRes[[comps_limma$comp[i]]] <-
read.csv(paste0(output_dir_res,"comp_",i,"_",comps_limma[i,5],".csv"), row.names = 1)
}
# Extract comps_limma of interest
coi_limma <- comps_limma[comps_limma$comp_denom %in% c("CEv3.full"),]$comp
comp_list_oi_limma <- subset_limmaRes[names(subset_limmaRes) %in% coi_limma]
# DESEQ2 results direct
output_dir_res <- paste0(Data,"Output/RNA/dds_EDA_subset/bl_Res_fs/","DE2_res/")
# Summarize of DESEQ2 comparisions
comps_DE2 <- read.csv(paste0(Data,"Output/RNA/dds_EDA_subset/bl_Res_fs/","comps_all.csv"), row.names = 1)
# import DE2 res
subset_DE2Res <- list()
for (i in seq_along(comps_DE2$comp_name)){
subset_DE2Res[[comps_DE2$comp_name[i]]] <-
read.csv(paste0(output_dir_res,"comp_",i,"_",comps_DE2$comp_name[i],".csv"), row.names = 1)
}
# Extract comps_DE2 of interest
coi_DE2 <- comps_DE2[comps_DE2$comp_denom %in% c("full_CEv3_NA_0"),]$comp_name
comp_list_oi_DE2 <- subset_DE2Res[names(subset_DE2Res) %in% coi_DE2]
# Comps combined
comps_df_limma <- comps_limma[comps_limma$comp_denom %in% c("CEv3.full"),]
colnames(comps_df_limma) <- paste(colnames(comps_df_limma),"protein",sep = "_")
comps_df_DE2 <- comps_DE2[comps_DE2$comp_denom %in% c("full_CEv3_NA_0"),]
colnames(comps_df_DE2) <- paste(colnames(comps_df_DE2),"mRNA",sep = "_")
# Cbind
comps_combined <- cbind(comps_df_limma,comps_df_DE2)
comps_combined
## comp_type_protein comp_num_protein comp_denom_protein comp_name_protein
## 7 condition E4.full CEv3.full E4.full-vs-CEv3.full
## 13 condition E5.full CEv3.full E5.full-vs-CEv3.full
## 19 condition G1.full CEv3.full G1.full-vs-CEv3.full
## 25 condition G12.full CEv3.full G12.full-vs-CEv3.full
## 31 condition G5.full CEv3.full G5.full-vs-CEv3.full
## 37 condition G8.full CEv3.full G8.full-vs-CEv3.full
## comp_protein comp_type_mRNA comp_num_mRNA comp_denom_mRNA
## 7 E4.full-CEv3.full serum_group full_E4_NA_0 full_CEv3_NA_0
## 13 E5.full-CEv3.full serum_group full_E5_NA_0 full_CEv3_NA_0
## 19 G1.full-CEv3.full serum_group full_G1_NA_0 full_CEv3_NA_0
## 25 G12.full-CEv3.full serum_group full_G12_NA_0 full_CEv3_NA_0
## 31 G5.full-CEv3.full serum_group full_G5_NA_0 full_CEv3_NA_0
## 37 G8.full-CEv3.full serum_group full_G8_NA_0 full_CEv3_NA_0
## comp_name_mRNA
## 7 full_E4_NA_0-vs-full_CEv3_NA_0
## 13 full_E5_NA_0-vs-full_CEv3_NA_0
## 19 full_G1_NA_0-vs-full_CEv3_NA_0
## 25 full_G12_NA_0-vs-full_CEv3_NA_0
## 31 full_G5_NA_0-vs-full_CEv3_NA_0
## 37 full_G8_NA_0-vs-full_CEv3_NA_0
# Remove genes not in signature
# Remove protein genes not in signature
comp_list_oi_limma_filt <- lapply(comp_list_oi_limma, function(x) subset(x,row.names(x) %in% c(BL_sig$gene)))
# Remove RNA genes not in signature
comp_list_oi_DE2_filt <- lapply(comp_list_oi_DE2, function(x) subset(x,row.names(x) %in% c(BL_sig$gene)))
# Change for each
corr_comp <- comps_combined[1,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[2,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[3,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[4,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[5,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[6,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
fig.label <- paste0("Baseline_sig_corr")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
## Warning in dir.create(figures_dir): '/Data/Figures/Baseline_sig_corr' already
## exists
# limma results direct
output_dir_res <- paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss/","Limma_res/")
# Summarize of limma comparisions
comps_limma <- read.csv(paste0(Data,"Output/MIB-BL/DEP_Bl_Res_ss/","comps_all.csv"), row.names = 1)
# import limma res
subset_limmaRes <- list()
for (i in seq_along(row.names(comps_limma))){
subset_limmaRes[[comps_limma$comp[i]]] <-
read.csv(paste0(output_dir_res,"comp_",i,"_",comps_limma[i,5],".csv"), row.names = 1)
}
# Extract comps_limma of interest
coi_limma <- comps_limma[comps_limma$comp_denom %in% c("CEv3.starve"),]$comp
comp_list_oi_limma <- subset_limmaRes[names(subset_limmaRes) %in% coi_limma]
# DESEQ2 results direct
output_dir_res <- paste0(Data,"Output/RNA/dds_EDA_subset/bl_Res_ss/","DE2_res/")
# Summarize of DESEQ2 comparisions
comps_DE2 <- read.csv(paste0(Data,"Output/RNA/dds_EDA_subset/bl_Res_ss/","comps_all.csv"), row.names = 1)
# import DE2 res
subset_DE2Res <- list()
for (i in seq_along(comps_DE2$comp_name)){
subset_DE2Res[[comps_DE2$comp_name[i]]] <-
read.csv(paste0(output_dir_res,"comp_",i,"_",comps_DE2$comp_name[i],".csv"), row.names = 1)
}
# Extract comps_DE2 of interest
coi_DE2 <- comps_DE2[comps_DE2$comp_denom %in% c("starve_CEv3_NA_0"),]$comp_name
comp_list_oi_DE2 <- subset_DE2Res[names(subset_DE2Res) %in% coi_DE2]
# Comps combined
comps_df_limma <- comps_limma[comps_limma$comp_denom %in% c("CEv3.starve"),]
colnames(comps_df_limma) <- paste(colnames(comps_df_limma),"protein",sep = "_")
comps_df_DE2 <- comps_DE2[comps_DE2$comp_denom %in% c("starve_CEv3_NA_0"),]
colnames(comps_df_DE2) <- paste(colnames(comps_df_DE2),"mRNA",sep = "_")
# Cbind
comps_combined <- cbind(comps_df_limma,comps_df_DE2)
comps_combined
## comp_type_protein comp_num_protein comp_denom_protein
## 7 condition E4.starve CEv3.starve
## 13 condition E5.starve CEv3.starve
## 19 condition G1.starve CEv3.starve
## 25 condition G12.starve CEv3.starve
## 31 condition G5.starve CEv3.starve
## 37 condition G8.starve CEv3.starve
## comp_name_protein comp_protein comp_type_mRNA
## 7 E4.starve-vs-CEv3.starve E4.starve-CEv3.starve serum_group
## 13 E5.starve-vs-CEv3.starve E5.starve-CEv3.starve serum_group
## 19 G1.starve-vs-CEv3.starve G1.starve-CEv3.starve serum_group
## 25 G12.starve-vs-CEv3.starve G12.starve-CEv3.starve serum_group
## 31 G5.starve-vs-CEv3.starve G5.starve-CEv3.starve serum_group
## 37 G8.starve-vs-CEv3.starve G8.starve-CEv3.starve serum_group
## comp_num_mRNA comp_denom_mRNA comp_name_mRNA
## 7 starve_E4_NA_0 starve_CEv3_NA_0 starve_E4_NA_0-vs-starve_CEv3_NA_0
## 13 starve_E5_NA_0 starve_CEv3_NA_0 starve_E5_NA_0-vs-starve_CEv3_NA_0
## 19 starve_G1_NA_0 starve_CEv3_NA_0 starve_G1_NA_0-vs-starve_CEv3_NA_0
## 25 starve_G12_NA_0 starve_CEv3_NA_0 starve_G12_NA_0-vs-starve_CEv3_NA_0
## 31 starve_G5_NA_0 starve_CEv3_NA_0 starve_G5_NA_0-vs-starve_CEv3_NA_0
## 37 starve_G8_NA_0 starve_CEv3_NA_0 starve_G8_NA_0-vs-starve_CEv3_NA_0
# Remove genes not in signature
# Remove protein genes not in signature
comp_list_oi_limma_filt <- lapply(comp_list_oi_limma, function(x) subset(x,row.names(x) %in% c(BL_sig$gene)))
# Remove RNA genes not in signature
comp_list_oi_DE2_filt <- lapply(comp_list_oi_DE2, function(x) subset(x,row.names(x) %in% c(BL_sig$gene)))
# Change for each
corr_comp <- comps_combined[1,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[2,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[3,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[4,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[5,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
# Change for each
corr_comp <- comps_combined[6,]
# Import L2FC for mRNA and MIBS
RNA_LFC <- data.frame(comp_list_oi_DE2_filt[corr_comp[,"comp_name_mRNA"]])
MIBs_LFC <- data.frame(comp_list_oi_limma_filt[corr_comp[,"comp_protein"]])
# row.names to column
RNA_LFC$genes <- row.names(RNA_LFC)
MIBs_LFC$genes <- row.names(MIBs_LFC)
# Join RNA and MIBs
proteo.transcriptomic_LFC <- left_join(RNA_LFC,MIBs_LFC , by= c("genes"="genes"))
# Generate labels
proteo.transcriptomic_LFC$mRNA_sig <- ifelse(proteo.transcriptomic_LFC[[grep("padj",colnames(proteo.transcriptomic_LFC),value = T)]] < 0.05, "Yes", "No")
proteo.transcriptomic_LFC$MIB_sig <- ifelse(proteo.transcriptomic_LFC[[grep("adj.P.Val",colnames(proteo.transcriptomic_LFC),value = T)]]< 0.05, "Yes", "No")
proteo.transcriptomic_LFC$mRNA_dir <- ifelse(proteo.transcriptomic_LFC[[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
proteo.transcriptomic_LFC$MIB_dir <- ifelse(proteo.transcriptomic_LFC[[grep("logFC",colnames(proteo.transcriptomic_LFC),value = T)]] < 0, "Down", "Up")
# Generate labels
proteo.transcriptomic_LFC$corr <-
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"Both",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "Yes" &
proteo.transcriptomic_LFC$mRNA_sig == "No"),
"MIBs",
ifelse((proteo.transcriptomic_LFC$MIB_sig == "No" &
proteo.transcriptomic_LFC$mRNA_sig == "Yes"),
"mRNA",
"Neither"
)
)
)
# Label Neither
proteo.transcriptomic_LFC$corr[is.na(proteo.transcriptomic_LFC$corr)] <- "Neither"
# # Label kinase receptors
# proteo.transcriptomic_LFC$Receptor <-ifelse(proteo.transcriptomic_LFC$gene %in% Heatmap_GOI_df$gene_symbol, "Yes","No")
# Convert to factors
proteo.transcriptomic_LFC$mRNA_sig <- as.factor(proteo.transcriptomic_LFC$mRNA_sig)
proteo.transcriptomic_LFC$MIB_sig <- as.factor(proteo.transcriptomic_LFC$MIB_sig)
proteo.transcriptomic_LFC$mRNA_dir <- as.factor(proteo.transcriptomic_LFC$mRNA_dir)
proteo.transcriptomic_LFC$MIB_dir <- as.factor(proteo.transcriptomic_LFC$MIB_dir)
proteo.transcriptomic_LFC$corr <- as.factor(proteo.transcriptomic_LFC$corr)
# Rename Columns
names(proteo.transcriptomic_LFC)[grep("logFC",colnames(proteo.transcriptomic_LFC))] <- "MIBs"
names(proteo.transcriptomic_LFC)[grep("log2FoldChange",colnames(proteo.transcriptomic_LFC))] <- "RNA"
# Calculate correlation
# Perform correlation test
corr_test <- cor.test(proteo.transcriptomic_LFC$MIBs, proteo.transcriptomic_LFC$RNA,
method = "pearson", use = "complete.obs")
# Extract values
# Correlation coefficient
corr_val <- corr_test$estimate
# P-value
p_val <- corr_test$p.value
# Calculate annotation positions
x_pos <- max(proteo.transcriptomic_LFC$MIBs, na.rm = TRUE)*(.60)
y_pos <- min(proteo.transcriptomic_LFC$RNA, na.rm = TRUE)*(.95)
# Generate Graph
corr_plot <- ggplot(proteo.transcriptomic_LFC, aes(x = MIBs , y = RNA)) +
geom_point(aes(color = corr), size = 3) +
geom_label_repel(
aes(
label = ifelse((MIB_sig == "Yes" | mRNA_sig == "Yes"),
as.character(genes),
''
)
),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
max.overlaps = 30
) +
labs(x = 'Protein (Log2 FC)', y ='mRNA (Log2 FC)' ) +
ggtitle(paste0(corr_comp["comp_num_protein"])) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_color_manual(
"Significant",
values = c(
"Both" = "#984EA3",
"MIBs" = "#E41A1C",
"mRNA" = "#377EB8",
"Neither" = "#4DAF4A"
)
) +
labs(caption = paste0("Pearson r = ", round(corr_val, 2), ", p = ", signif(p_val, 3))) +
theme_bw() +
theme(legend.position = 'right',
plot.caption = element_text(size = 10)) +
theme(panel.grid.minor = element_blank())
corr_plot
# Save graph
ggsave(paste0(figures_dir,corr_comp["comp_num_protein"],"_correlation_scatter_plot.pdf"),
plot = corr_plot,
dpi = 300)
## Saving 7 x 5 in image
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17036384 909.9 29379045 1569.1 29379045 1569.1
## Vcells 30772727 234.8 80630088 615.2 68676585 524.0
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("K_means_Protein")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
DEP_TMT_EDA_bc_ls <- readRDS(tail(grep("DEP_TMT_EDA_bc_ls.rds",list.files(paste0(Data,"Output/MIB-DYN/DEP_TMT_EDA"), recursive = F, full.names = T), value = T),1))
cell_lines <- c("CEv3", "G12", "G8" , "G5" , "G1" , "E5" , "E4")
DEP_TMT_EDA_bc_ls <- subset(DEP_TMT_EDA_bc_ls, names(DEP_TMT_EDA_bc_ls) %in% cell_lines)
z_mat_clut_clid_ls <- readRDS(tail(grep("z_mat_clut_clid_ls.rds",list.files(paste0(Data,"Output/MIBs_TMT_Clustering_sig"), recursive = F, full.names = T), value = T),1))
cell_line <- "CEv3"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## CEv3_0_3 CEv3_4_3 CEv3_24_3 CEv3_48_3 CEv3_0_2 CEv3_4_2 CEv3_24_2
## Ephb6 10.68249 10.82549 10.30425 10.44725 10.26549 10.40266 11.31381
## Clk2 11.17959 11.47481 11.29211 11.58733 11.40183 11.55016 11.41988
## Cdk14 15.63858 15.60457 15.81604 15.78204 15.46983 15.47296 15.82875
## CEv3_48_2 CEv3_0_1 CEv3_4_1 CEv3_24_1 CEv3_48_1
## Ephb6 10.27752 9.109216 11.38174 11.07577 10.69275
## Clk2 11.16197 11.226276 11.16175 11.55051 11.59531
## Cdk14 16.06969 15.406320 15.41139 15.99484 16.02868
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," 3 µM Afatinib"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
cell_line <- "E4"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## E4_0_3 E4_4_3 E4_24_3 E4_48_3 E4_0_2 E4_4_2 E4_24_2 E4_48_2
## Ephb6 10.98430 9.579127 11.89995 9.796101 10.08914 10.75481 10.62178 10.79375
## Clk2 11.47462 11.002741 11.38384 11.672643 12.03896 11.71303 11.21791 10.56394
## Cdk14 15.59657 15.719474 15.58775 15.937437 15.80059 15.71282 15.58410 15.74373
## E4_0_1 E4_4_1 E4_24_1 E4_48_1
## Ephb6 10.49936 10.27654 10.17994 11.30363
## Clk2 11.83164 11.26819 10.74766 11.68636
## Cdk14 15.74055 15.75927 15.76916 15.57226
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," 3 µM Afatinib"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
cell_line <- "E5"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## E5_0_3 E5_4_3 E5_24_3 E5_48_3 E5_0_2 E5_4_2 E5_24_2 E5_48_2
## Ephb6 10.59338 9.832043 11.58410 10.24996 9.855916 10.72428 10.32860 11.35069
## Clk2 11.49583 11.364881 11.26482 11.40831 12.150021 10.78238 11.55655 11.04489
## Cdk14 15.51152 15.656648 15.74422 15.92885 15.870361 15.65576 15.71617 15.59894
## E5_0_1 E5_4_1 E5_24_1 E5_48_1
## Ephb6 10.89725 10.31484 10.58140 10.46599
## Clk2 10.42662 12.21909 12.15539 10.73274
## Cdk14 15.70311 15.69519 15.76096 15.68197
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," 3 µM Afatinib"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
cell_line <- "G1"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## G1_0_3 G1_4_3 G1_24_3 G1_48_3 G1_0_2 G1_4_2 G1_24_2
## Ephb6 12.35218 9.923954 10.59147 9.391875 11.02092 9.627123 10.93850
## Clk2 11.41938 10.437778 12.03918 11.637502 11.73389 11.663805 11.45614
## Cdk14 15.70389 15.693532 15.75560 15.688213 15.85381 15.810032 15.72459
## G1_48_2 G1_0_1 G1_4_1 G1_24_1 G1_48_1
## Ephb6 10.67294 10.66850 10.07908 10.16059 11.35131
## Clk2 10.68001 11.93990 10.70227 11.35739 11.53428
## Cdk14 15.45281 15.80339 15.73347 15.77710 15.52727
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," 3 µM Afatinib"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
cell_line <- "G5"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## G5_0_3 G5_4_3 G5_24_3 G5_48_3 G5_0_2 G5_4_2 G5_24_2 G5_48_2
## Ephb6 10.86333 10.47290 10.97387 9.949383 10.51614 10.39393 10.35411 10.9953
## Clk2 11.64048 10.41854 12.23136 11.243459 11.86143 10.32421 11.89099 11.4572
## Cdk14 15.63683 15.61355 15.78884 15.802018 15.82302 15.61422 15.88150 15.5225
## G5_0_1 G5_4_1 G5_24_1 G5_48_1
## Ephb6 10.28904 11.58499 9.47473 10.91071
## Clk2 11.76923 11.20426 11.59601 10.96434
## Cdk14 15.65833 15.87292 15.56035 15.74963
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," 3 µM Afatinib"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
cell_line <- "G8"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## G8_0_3 G8_4_3 G8_24_3 G8_48_3 G8_0_2 G8_4_2 G8_24_2 G8_48_2
## Ephb6 10.78237 9.866003 11.38664 10.22447 10.36894 9.288625 11.32991 11.27201
## Clk2 10.68290 11.974350 11.35180 11.52479 11.98440 11.570600 11.35020 10.62864
## Cdk14 15.74492 15.529006 15.76179 15.80552 15.75625 15.670907 15.77320 15.64088
## G8_0_1 G8_4_1 G8_24_1 G8_48_1
## Ephb6 10.68510 10.32048 10.54113 10.71277
## Clk2 11.11465 10.19307 11.84348 12.38263
## Cdk14 15.77279 15.66797 15.67658 15.72389
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," 3 µM Afatinib"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
cell_line <- "G12"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## G12_0_3 G12_4_3 G12_24_3 G12_48_3 G12_0_2 G12_4_2 G12_24_2 G12_48_2
## Ephb6 11.20689 10.33340 10.79278 9.926418 10.44957 10.74142 11.34920 9.719294
## Clk2 11.86405 11.85775 10.88359 10.928449 12.14746 11.72663 11.12153 10.538218
## Cdk14 15.70419 15.62284 15.76715 15.747049 15.75912 15.62125 15.81001 15.650847
## G12_0_1 G12_4_1 G12_24_1 G12_48_1
## Ephb6 9.854582 11.01427 11.03232 10.35831
## Clk2 9.146014 11.91558 12.04035 12.43190
## Cdk14 15.712025 15.53654 15.79311 15.79955
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," 3 µM Afatinib"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
Directories
fig.label <- paste0("K_means_Upset")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
cell_lines <- c("CEv3", "G12", "G8" , "G5" , "G1" , "E5" , "E4")
z_mat_clut_clid_ls <- readRDS(tail(grep("z_mat_clut_clid_ls.rds",list.files(paste0(Data,"Output/MIBs_TMT_Clustering_sig"), recursive = F, full.names = T), value = T),1))
z_mat_clut_clid_ls_res <- z_mat_clut_clid_ls
# Extract Res group
z_mat_clut_clid_ls_res <- subset(z_mat_clut_clid_ls, names(z_mat_clut_clid_ls) %in% c("CEv3", "G12", "G8" , "G5" , "G1" , "E5" , "E4"))
# Extract genes in each cluster for all cell lines
df_cluster_summary <- data.frame()
for(i in seq_along(z_mat_clut_clid_ls_res)){
summary <- z_mat_clut_clid_ls_res[[i]] %>% summarise(label, gene=row.names(z_mat_clut_clid_ls_res[[i]]), Cell.Line = paste0(names(z_mat_clut_clid_ls_res)[i]))
df_cluster_summary <- rbind(df_cluster_summary,summary)
}
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_cluster_summary$Cell.cluster <- paste0(df_cluster_summary$Cell.Line,".",df_cluster_summary$label)
# Find intersecting genes
intersecting_vectors_ls <- list(down=subset(df_cluster_summary,df_cluster_summary$label=="down")$gene,
up=subset(df_cluster_summary,df_cluster_summary$label=="up")$gene)
intersect_genes <- intersect(intersecting_vectors_ls$down,intersecting_vectors_ls$up)
intersect_genes
## [1] "Chka" "Prkca" "Lyn" "Dck" "Wee1" "Aurka" "Axl" "Aak1" "Cdk17"
## [10] "Ddr1" "Mertk" "Tnk2" "Nuak1" "Egfr" "Lats1"
# List of vectors
cluster_vectors_ls <- list()
for(i in seq_along(unique(df_cluster_summary$Cell.cluster))){
cluster_vectors_ls[[unique(df_cluster_summary$Cell.cluster)[i]]] <-
subset(df_cluster_summary, df_cluster_summary$Cell.cluster==unique(df_cluster_summary$Cell.cluster)[i])$gene
}
# Filter out intersecting genes
cluster_vectors_filt_ls <- lapply(cluster_vectors_ls, function(x) x[!(x %in% intersect_genes)])
# Separate into up and down
cluster_vectors_down_ls <- subset(cluster_vectors_filt_ls,names(cluster_vectors_filt_ls) %in% c(grep("down",names(cluster_vectors_filt_ls),value=T)))
cluster_vectors_up_ls <- subset(cluster_vectors_filt_ls,names(cluster_vectors_filt_ls) %in% c(grep("up",names(cluster_vectors_filt_ls),value=T)))
# Directories
output_dir_upset <- paste0(figures_dir)
graph_dir_upset <- paste0(figures_dir)
# Convert list to binary matrix
Upset_matrix <- ComplexHeatmap::list_to_matrix(cluster_vectors_up_ls)
# Save Upset_matrix
write.csv(Upset_matrix, file = paste0(output_dir_upset, "FullUpset_matrix",".csv"))
# convert list to combination matrix
combo_matrix <- ComplexHeatmap::make_comb_mat(Upset_matrix)
# Color by intersection
intersection_degree <- comb_degree(combo_matrix)
comb_col <- ifelse(intersection_degree > 1, "blue", "red")
# Generate upset plot
upset_plot <- ComplexHeatmap::UpSet(combo_matrix , comb_col = comb_col,
row_names_gp =gpar(fontsize = 12) ,
top_annotation = upset_top_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
annotation_name_rot = 90,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
right_annotation = upset_right_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
pt_size = unit(8, "pt"))
upset_plot
# Save upset_plot
pdf(file=paste0(graph_dir_upset, "CEv3_up_upset" , ".pdf"))
draw(upset_plot)
dev.off()
## png
## 2
# Combination matrix
combo_matrix
## A combination matrix with 7 sets and 24 combinations.
## ranges of combination set size: c(1, 26).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## CEv3.up G12.up G8.up G5.up G1.up E5.up E4.up code size
## x 1000000 26
## x 0100000 6
## x 0000001 5
## x x 1000001 4
## x x x x x x 1110111 3
## x x 1000100 3
## x 0000010 3
## x 0001000 2
##
## Sets are:
## set size
## CEv3.up 49
## G12.up 12
## G8.up 9
## G5.up 7
## G1.up 16
## E5.up 14
## E4.up 21
comb_name(combo_matrix)
## [1] "1111111" "1110111" "1011111" "1010111" "1010101" "1001011" "1000111"
## [8] "0010111" "1100010" "1001100" "1000101" "1100000" "1010000" "1001000"
## [15] "1000100" "1000010" "1000001" "0000101" "1000000" "0100000" "0001000"
## [22] "0000100" "0000010" "0000001"
# Extract all from upset plot
upset_genes_ls <- list()
for(i in seq_along(comb_name(combo_matrix))){
upset_genes_ls[[paste0((set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[1],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[2],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[3],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[4],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[5],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[6],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[7])]] <-
extract_comb(combo_matrix,comb_name(combo_matrix)[i])
}
# Convert to DF
upset_genes_df <- do.call(cbind, lapply(upset_genes_ls, function(x) c(x, rep(NA, max(sapply(upset_genes_ls, length)) - length(x)))))
# Save genes
write.csv(upset_genes_df, file = paste0(output_dir_upset,"CEv3_up_upset",".csv"), na="")
# Combination matrix
combo_matrix
## A combination matrix with 7 sets and 24 combinations.
## ranges of combination set size: c(1, 26).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## CEv3.up G12.up G8.up G5.up G1.up E5.up E4.up code size
## x 1000000 26
## x 0100000 6
## x 0000001 5
## x x 1000001 4
## x x x x x x 1110111 3
## x x 1000100 3
## x 0000010 3
## x 0001000 2
##
## Sets are:
## set size
## CEv3.up 49
## G12.up 12
## G8.up 9
## G5.up 7
## G1.up 16
## E5.up 14
## E4.up 21
# Find all positions in combo_matrix where there is greater than 1 overlap
positions <- which(sapply(comb_name(combo_matrix), function(x) sum(strsplit(x, "")[[1]] == "1") == 1))
positions[[1]]-1
## [1] 18
# Vector for at least one overlaping set.
extraction_vector <- comb_name(combo_matrix)[1:positions[[1]]-1]
# ol_up_genes overlap genes
ol_up_genes <- c()
for (i in seq_along(extraction_vector)){
x <- extract_comb(combo_matrix,extraction_vector[[i]])
ol_up_genes <- c(ol_up_genes,x)
}
# Unique CEv3.up genes
cluster_vectors_up_ls[["CEv3.up"]][!(cluster_vectors_up_ls[["CEv3.up"]] %in% ol_up_genes)]
## [1] "Cdk14" "Bmpr2" "Csnk2a2" "Mapk14" "Mapk1" "Rock1" "Epha4"
## [8] "Taok1" "Trim28" "Ipmk" "Pak4" "Prkd2" "Rps6ka5" "Prkd3"
## [15] "Stk38" "Frk" "Brd4" "Camk2a" "Map2k4" "Acvr1b" "Cdk6"
## [22] "Fn3k" "Agk" "Prkce" "Syk" "Map3k12"
CEv3_up_unique <- subset(z_mat_clut_clid_ls_res[["CEv3"]],(row.names(z_mat_clut_clid_ls_res[["CEv3"]]) %in% cluster_vectors_up_ls[["CEv3.up"]][!(cluster_vectors_up_ls[["CEv3.up"]] %in% ol_up_genes)]))
CEv3_up_unique
## X0.CEv3 X4.CEv3 X24.CEv3 X48.CEv3 cluster gene
## Cdk14 -0.85026747 -0.83784858 0.57599805 1.53342738 2 Cdk14
## Bmpr2 -1.43076332 -0.32817862 0.18356374 1.90237954 2 Bmpr2
## Csnk2a2 -0.94614422 -0.61836321 0.93500958 0.59231922 2 Csnk2a2
## Mapk14 1.00177587 0.82719332 1.18149808 1.25134178 2 Mapk14
## Mapk1 -0.09698722 0.82167549 -0.65482540 1.47458195 2 Mapk1
## Rock1 0.21196482 0.06939306 0.98557974 1.54295346 2 Rock1
## Epha4 0.59793740 0.77414277 1.03206078 0.90822558 2 Epha4
## Taok1 -2.27508553 -1.36668761 -0.55783774 0.01407270 2 Taok1
## Trim28 -1.38132679 -1.25165159 0.12945218 0.21424044 2 Trim28
## Ipmk -0.43050556 -0.22351241 0.50997861 1.59000796 2 Ipmk
## Pak4 -1.54692918 -0.54408595 0.82339273 2.68762239 2 Pak4
## Prkd2 0.52156910 0.78302418 1.04030735 1.17043218 2 Prkd2
## Rps6ka5 0.97466743 0.75874691 1.02223000 0.89797324 2 Rps6ka5
## Prkd3 1.07824802 0.98082268 1.15333721 1.21771049 2 Prkd3
## Stk38 -0.73660707 -1.28238244 -0.03353734 3.11042041 2 Stk38
## Frk -1.81687895 1.55884511 -0.95654918 0.50323786 2 Frk
## Brd4 -1.15521923 0.33142408 1.26731848 -0.09450051 2 Brd4
## Camk2a -0.61020576 -0.70672091 -0.39595885 0.04647524 2 Camk2a
## Map2k4 0.96319877 0.99073101 0.74055177 0.78983888 2 Map2k4
## Acvr1b -1.68025586 0.94844701 0.29875717 -0.87039922 2 Acvr1b
## Cdk6 -2.06582457 -0.41245116 0.24709236 -0.37324882 2 Cdk6
## Fn3k -2.77402260 -0.94275962 1.02298461 0.71154949 2 Fn3k
## Agk 0.67317137 0.75295568 1.20017400 1.24934117 2 Agk
## Prkce -0.81162939 1.86571917 -0.95129469 0.25585400 2 Prkce
## Syk -0.50040938 -1.38853361 0.99295946 0.23267378 2 Syk
## Map3k12 -2.74809649 1.80636749 0.73057402 0.68284867 2 Map3k12
## mean_0 mean_48 difference label
## Cdk14 -0.7438161 0.8648825 1.608699 up
## Bmpr2 -0.7438161 0.8648825 1.608699 up
## Csnk2a2 -0.7438161 0.8648825 1.608699 up
## Mapk14 -0.7438161 0.8648825 1.608699 up
## Mapk1 -0.7438161 0.8648825 1.608699 up
## Rock1 -0.7438161 0.8648825 1.608699 up
## Epha4 -0.7438161 0.8648825 1.608699 up
## Taok1 -0.7438161 0.8648825 1.608699 up
## Trim28 -0.7438161 0.8648825 1.608699 up
## Ipmk -0.7438161 0.8648825 1.608699 up
## Pak4 -0.7438161 0.8648825 1.608699 up
## Prkd2 -0.7438161 0.8648825 1.608699 up
## Rps6ka5 -0.7438161 0.8648825 1.608699 up
## Prkd3 -0.7438161 0.8648825 1.608699 up
## Stk38 -0.7438161 0.8648825 1.608699 up
## Frk -0.7438161 0.8648825 1.608699 up
## Brd4 -0.7438161 0.8648825 1.608699 up
## Camk2a -0.7438161 0.8648825 1.608699 up
## Map2k4 -0.7438161 0.8648825 1.608699 up
## Acvr1b -0.7438161 0.8648825 1.608699 up
## Cdk6 -0.7438161 0.8648825 1.608699 up
## Fn3k -0.7438161 0.8648825 1.608699 up
## Agk -0.7438161 0.8648825 1.608699 up
## Prkce -0.7438161 0.8648825 1.608699 up
## Syk -0.7438161 0.8648825 1.608699 up
## Map3k12 -0.7438161 0.8648825 1.608699 up
# Save genes
write.csv(CEv3_up_unique, file = paste0(output_dir_upset,"EGFRi_up_Unique_Sig",".csv"))
# write.csv(ol_up_genes, file = paste0(output_dir_upset,"OL_CEv3_genes",".csv"))
# Directories
output_dir_upset <- paste0(figures_dir)
graph_dir_upset <- paste0(figures_dir)
# Convert list to binary matrix
Upset_matrix <- ComplexHeatmap::list_to_matrix(cluster_vectors_down_ls)
# Save Upset_matrix
write.csv(Upset_matrix, file = paste0(output_dir_upset, "FullUpset_matrix",".csv"))
# convert list to combination matrix
combo_matrix <- ComplexHeatmap::make_comb_mat(Upset_matrix)
# Color by intersection
intersection_degree <- comb_degree(combo_matrix)
comb_col <- ifelse(intersection_degree > 1, "blue", "red")
# Generate upset plot
upset_plot <- ComplexHeatmap::UpSet(combo_matrix , comb_col=comb_col,
row_names_gp =gpar(fontsize = 12) ,
top_annotation = upset_top_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
annotation_name_rot = 90,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
right_annotation = upset_right_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
pt_size = unit(8, "pt"))
upset_plot
# Save upset_plot
pdf(file=paste0(graph_dir_upset, "CEv3_down_upset" , ".pdf"))
draw(upset_plot)
dev.off()
## png
## 2
# Combination matrix
combo_matrix
## A combination matrix with 7 sets and 21 combinations.
## ranges of combination set size: c(1, 17).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## CEv3.down G12.down G8.down G5.down G1.down E5.down E4.down code size
## x 1000000 17
## x x x x x x x 1111111 6
## x x x 1000101 3
## x x 1000001 3
## x 0000100 3
## x 0010000 2
## x 0000010 2
## x 0000001 2
##
## Sets are:
## set size
## CEv3.down 36
## G12.down 10
## G8.down 14
## G5.down 8
## G1.down 16
## E5.down 16
## E4.down 19
comb_name(combo_matrix)
## [1] "1111111" "0011111" "1110010" "1000111" "1100010" "1010010" "1000101"
## [8] "1000011" "0010101" "1010000" "1000010" "1000001" "0110000" "0100001"
## [15] "0000110" "1000000" "0010000" "0001000" "0000100" "0000010" "0000001"
# Extract all from upset plot
upset_genes_ls <- list()
for(i in seq_along(comb_name(combo_matrix))){
upset_genes_ls[[paste0((set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[1],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[2],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[3],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[4],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[5],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[6],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[7])]] <-
extract_comb(combo_matrix,comb_name(combo_matrix)[i])
}
# Convert to DF
upset_genes_df <- do.call(cbind, lapply(upset_genes_ls, function(x) c(x, rep(NA, max(sapply(upset_genes_ls, length)) - length(x)))))
# Save genes
write.csv(upset_genes_df, file = paste0(output_dir_upset,"CEv3_down_upset",".csv"), na="")
# Combination matrix
combo_matrix
## A combination matrix with 7 sets and 21 combinations.
## ranges of combination set size: c(1, 17).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## CEv3.down G12.down G8.down G5.down G1.down E5.down E4.down code size
## x 1000000 17
## x x x x x x x 1111111 6
## x x x 1000101 3
## x x 1000001 3
## x 0000100 3
## x 0010000 2
## x 0000010 2
## x 0000001 2
##
## Sets are:
## set size
## CEv3.down 36
## G12.down 10
## G8.down 14
## G5.down 8
## G1.down 16
## E5.down 16
## E4.down 19
# Find all positions in combo_matrix where there is greater than 1 overlap
positions <- which(sapply(comb_name(combo_matrix), function(x) sum(strsplit(x, "")[[1]] == "1") == 1))
positions[[1]]-1
## [1] 15
# Vector for at least one overlaping set.
extraction_vector <- comb_name(combo_matrix)[1:positions[[1]]-1]
# Extract overlap genes
ol_down_genes <- c()
for (i in seq_along(extraction_vector)){
x <- extract_comb(combo_matrix,extraction_vector[[i]])
ol_down_genes <- c(ol_down_genes,x)
}
# Unique CEv3.down genes
cluster_vectors_down_ls[["CEv3.down"]][!(cluster_vectors_down_ls[["CEv3.down"]] %in% ol_down_genes)]
## [1] "Src" "Map2k1" "Ttk" "Acvr1" "Jak1" "Fer" "Yes1"
## [8] "Cdk18" "Abl2" "Map2k2" "Mylk" "Map3k11" "Cdk9" "Stk4"
## [15] "Rps6ka6" "Taok3" "Pdk3"
CEv3_down_unique <- subset(z_mat_clut_clid_ls_res[["CEv3"]],(row.names(z_mat_clut_clid_ls_res[["CEv3"]]) %in% cluster_vectors_down_ls[["CEv3.down"]][!(cluster_vectors_down_ls[["CEv3.down"]] %in% ol_down_genes)]))
CEv3_down_unique
## X0.CEv3 X4.CEv3 X24.CEv3 X48.CEv3 cluster gene mean_0
## Src 0.18375404 -1.14474734 -0.7774591 -1.40808565 1 Src 1.089074
## Map2k1 1.53082987 1.41902874 0.3435011 -0.02545134 1 Map2k1 1.089074
## Ttk 1.22124388 0.68736180 -1.0576630 -0.76025328 1 Ttk 1.089074
## Acvr1 1.44332372 0.90333422 -0.6501203 -1.12530439 1 Acvr1 1.089074
## Jak1 0.11618765 1.23105994 -0.2980831 -0.98399349 1 Jak1 1.089074
## Fer 0.60390038 -0.33614869 -0.2277677 -0.78456403 1 Fer 1.089074
## Yes1 -0.40030276 -0.96191941 -1.1457190 -1.95497804 1 Yes1 1.089074
## Cdk18 0.64218826 0.59064911 -0.2333814 0.27146179 1 Cdk18 1.089074
## Abl2 0.06510473 -0.37612557 -1.4342216 -0.33807370 1 Abl2 1.089074
## Map2k2 -0.22491831 0.01461992 -0.8822386 -0.77063516 1 Map2k2 1.089074
## Mylk 1.52454943 0.82140003 0.5523557 0.11947589 1 Mylk 1.089074
## Map3k11 1.07903423 1.87349504 -1.9474021 -0.10581666 1 Map3k11 1.089074
## Cdk9 0.91204126 0.97103945 0.6747749 0.04709299 1 Cdk9 1.089074
## Stk4 1.07401382 0.57192253 -0.9236907 -1.53543971 1 Stk4 1.089074
## Rps6ka6 2.56859198 -0.41568159 -1.4922327 -1.41605916 1 Rps6ka6 1.089074
## Taok3 0.36248143 -0.94117975 -1.0554392 -0.12576311 1 Taok3 1.089074
## Pdk3 0.92948579 -0.25123452 -1.0196371 1.12220350 1 Pdk3 1.089074
## mean_48 difference label
## Src -0.9586105 -2.047685 down
## Map2k1 -0.9586105 -2.047685 down
## Ttk -0.9586105 -2.047685 down
## Acvr1 -0.9586105 -2.047685 down
## Jak1 -0.9586105 -2.047685 down
## Fer -0.9586105 -2.047685 down
## Yes1 -0.9586105 -2.047685 down
## Cdk18 -0.9586105 -2.047685 down
## Abl2 -0.9586105 -2.047685 down
## Map2k2 -0.9586105 -2.047685 down
## Mylk -0.9586105 -2.047685 down
## Map3k11 -0.9586105 -2.047685 down
## Cdk9 -0.9586105 -2.047685 down
## Stk4 -0.9586105 -2.047685 down
## Rps6ka6 -0.9586105 -2.047685 down
## Taok3 -0.9586105 -2.047685 down
## Pdk3 -0.9586105 -2.047685 down
# Save genes
write.csv(CEv3_down_unique, file = paste0(output_dir_upset,"EGFRi_down_Unique_Sig",".csv"))
# write.csv(ol_down_genes, file = paste0(output_dir_upset,"CEv3_down_OL_CEv3_genes",".csv"))
Comparisons with other signatures
# Get EGFRi Signature
CEv3_unique <- rbind(CEv3_down_unique,CEv3_up_unique)
# Save EGFRi signature
write.csv(CEv3_unique, file = paste0(output_dir_upset,"EGFRi_sig",".csv"))
write.csv(row.names(CEv3_unique), file = paste0(output_dir_upset,"EGFRi_sig_genes",".csv"))
# Import signatures
BL_sigs_df <- read.csv(paste0(Data,"Figures/","Baseline_kinase_upset/","All_baseline_kinase_sig.csv"), row.names = 1, na.strings = c("","NA"))
# Extract list of OL
BL_sig_list <- list()
for (i in seq_along(colnames(BL_sigs_df))){
BL_sig_list[[paste0(colnames(BL_sigs_df)[i])]] <-
BL_sigs_df[[paste0(colnames(BL_sigs_df)[i])]][!is.na(BL_sigs_df[[paste0(colnames(BL_sigs_df)[i])]])]
}
CEv3_sig_comp_list <- c(BL_sig_list,
list(EGFRi_sig=row.names(CEv3_unique)))
# Directories
output_dir_upset <- paste0(figures_dir)
graph_dir_upset <- paste0(figures_dir)
# Convert list to binary matrix
Upset_matrix <- ComplexHeatmap::list_to_matrix(CEv3_sig_comp_list)
# Save Upset_matrix
write.csv(Upset_matrix, file = paste0(output_dir_upset, "CEv3_sig_upset_matrix",".csv"))
# convert list to combination matrix
combo_matrix <- ComplexHeatmap::make_comb_mat(Upset_matrix)
# Color by intersection
intersection_degree <- comb_degree(combo_matrix)
comb_col <- ifelse(intersection_degree > 1, "blue", "red")
# Generate upset plot
upset_plot <- ComplexHeatmap::UpSet(combo_matrix, comb_col = comb_col,
row_names_gp =gpar(fontsize = 12) ,
top_annotation = upset_top_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
annotation_name_rot = 90,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
right_annotation = upset_right_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
pt_size = unit(8, "pt"))
upset_plot
# Save upset_plot
pdf(file=paste0(graph_dir_upset, "EGFRi_sig_upset" , ".pdf"))
draw(upset_plot)
dev.off()
## png
## 2
# Combination matrix
combo_matrix
## A combination matrix with 7 sets and 10 combinations.
## ranges of combination set size: c(1, 35).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## E5_BL_sig G1_BL_sig G5_BL_sig G8_BL_sig G12_BL_sig OL_BL_sig EGFRi_sig code size
## x 0000001 35
## x 0010000 21
## x 0000010 19
## x 0001000 6
## x x 0000011 4
## x 1000000 4
## x 0000100 4
## x x 0010001 3
##
## Sets are:
## set size
## E5_BL_sig 4
## G1_BL_sig 1
## G5_BL_sig 24
## G8_BL_sig 7
## G12_BL_sig 4
## OL_BL_sig 23
## EGFRi_sig 43
comb_name(combo_matrix)
## [1] "0010001" "0001001" "0000011" "1000000" "0100000" "0010000" "0001000"
## [8] "0000100" "0000010" "0000001"
# Extract all from upset plot
upset_genes_ls <- list()
for(i in seq_along(comb_name(combo_matrix))){
upset_genes_ls[[paste0((set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[1],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[2],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[3],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[4],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[5],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[6],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[7])]] <-
extract_comb(combo_matrix,comb_name(combo_matrix)[i])
}
# Convert to DF
upset_genes_df <- do.call(cbind, lapply(upset_genes_ls, function(x) c(x, rep(NA, max(sapply(upset_genes_ls, length)) - length(x)))))
# Save genes
write.csv(upset_genes_df, file = paste0(output_dir_upset,"EGFRi_sig_upset_genes_df",".csv"), na = "")
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17046840 910.4 29379045 1569.1 29379045 1569.1
## Vcells 30796531 235.0 80630088 615.2 68676585 524.0
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("EGFRi_CEv3_heatmap")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
# Transformed counts
DEP_TMT_EDA_bc_ls <- readRDS(tail(grep("DEP_TMT_EDA_bc_ls.rds",list.files(paste0(Data,"Output/MIB-DYN/DEP_TMT_EDA"), recursive = F, full.names = T), value = T),1))
cell_lines <- c("CEv3")
DEP_TMT_EDA_bc_ls <- subset(DEP_TMT_EDA_bc_ls, names(DEP_TMT_EDA_bc_ls) %in% cell_lines)
# K means clustering results
z_mat_clut_clid_ls <- readRDS(tail(grep("z_mat_clut_clid_ls.rds",list.files(paste0(Data,"Output/MIBs_TMT_Clustering_sig"), recursive = F, full.names = T), value = T),1))
# EGFRi sig
# BL_sig
EGFRi_sig <- read.csv(paste0(Data,"Figures/K_means_Upset/","EGFRi_sig_genes.csv"), row.names = 1, na.strings = c("","NA"))
cell_line <- "CEv3"
# Values are already transformed. Extract to vsd
vsd <- DEP_TMT_EDA_bc_ls[[cell_line]]
head(assay(vsd),3)
## CEv3_0_3 CEv3_4_3 CEv3_24_3 CEv3_48_3 CEv3_0_2 CEv3_4_2 CEv3_24_2
## Ephb6 10.68249 10.82549 10.30425 10.44725 10.26549 10.40266 11.31381
## Clk2 11.17959 11.47481 11.29211 11.58733 11.40183 11.55016 11.41988
## Cdk14 15.63858 15.60457 15.81604 15.78204 15.46983 15.47296 15.82875
## CEv3_48_2 CEv3_0_1 CEv3_4_1 CEv3_24_1 CEv3_48_1
## Ephb6 10.27752 9.109216 11.38174 11.07577 10.69275
## Clk2 11.16197 11.226276 11.16175 11.55051 11.59531
## Cdk14 16.06969 15.406320 15.41139 15.99484 16.02868
kmeans_res_clusters <- z_mat_clut_clid_ls[[cell_line]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Further subset to contain genes in EGFRi_sig
cluster_vsd <- cluster_vsd[row.names(cluster_vsd) %in% EGFRi_sig[,1],]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$Timepoint),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$Timepoint,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$Timepoint)],
column_title = paste0(cell_line," EGFRi signature"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 8),
width = ncol(mat_list)*unit(5, "mm"),
height = nrow(mat_list)*unit(2.5, "mm")
)
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, cell_line, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
Setup
# Clean environment
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 17047223 910.5 29379045 1569.1 29379045 1569.1
## Vcells 30797211 235.0 80630088 615.2 68676585 524.0
# Import setup workspace
load(paste0("/Data/Input/Figures/setup_workspace.RData"))
Directories
fig.label <- paste0("mRNA_dynTKI_heatmap")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
Afat_mRNA_dynTKI <- readRDS(tail(grep("Afat_CEv3_starve.rds",list.files(paste0(Data,"Output/RNA/dds_EDA_subset/Afat_CEv3_starve"), recursive = F, full.names = T), value = T),1))
Ner_mRNA_dynTKI <- readRDS(tail(grep("Ner_CEv3_1_starve.rds",list.files(paste0(Data,"Output/RNA/dds_EDA_subset/Ner_CEv3_1_starve"), recursive = F, full.names = T), value = T),1))
z_mat_clut_clid_ls <- readRDS(tail(grep("z_mat_clut_clid_ls.rds",list.files(paste0(Data,"Output/mRNA_dynTKI/Sig_CLustering"), recursive = F, full.names = T), value = T),1))
EGFR_TKI <- "Afatinib"
# vsd transformation
vsd <- vst(Afat_mRNA_dynTKI, blind=T, nsub=1000, fitType = "parametric")
head(assay(vsd),3)
## SL0287 SL0288 SL0289 SL0291 SL0292 SL0293 SL0295
## A3galt2 5.495460 5.636452 5.329207 5.564018 5.561867 5.419903 5.469103
## A4galt 5.713336 5.264711 5.328221 5.826911 6.137919 5.473699 5.710202
## AA467197 7.269067 7.366156 7.299568 7.412651 7.476263 7.548706 7.353965
## SL0296 SL0297 SL0142 SL0509 SL0508
## A3galt2 5.560635 5.986753 5.740035 5.166584 5.578503
## A4galt 5.177933 5.379598 5.175840 4.943919 5.169280
## AA467197 7.162764 6.984482 6.251441 6.266900 6.465730
kmeans_res_clusters <- z_mat_clut_clid_ls[[EGFR_TKI]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$time_h),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$time_h,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$time_h)],
column_title = paste0(EGFR_TKI," 3 µM "),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = F,
width = ncol(mat_list)*unit(5, "mm")
)
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, EGFR_TKI, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
EGFR_TKI <- "Neratinib"
# vsd transformation
vsd <- vst(Ner_mRNA_dynTKI, blind=T, nsub=1000, fitType = "parametric")
head(assay(vsd),3)
## SL0516 SL0520 SL0522 SL0523 SL0528 SL0530 SL0531
## A3galt2 5.085811 4.604922 5.127056 4.604922 5.192937 5.078871 4.940448
## A4galt 4.604922 5.732461 4.604922 4.604922 4.604922 4.943306 5.123150
## AA467197 6.740424 6.692577 6.583756 6.797205 7.155811 6.757587 7.085793
## SL0536 SL0538 SL0142 SL0509 SL0508
## A3galt2 5.050433 4.958770 5.586894 4.880472 5.387296
## A4galt 4.604922 5.151261 4.871761 4.604922 4.863482
## AA467197 6.589938 7.160624 6.154378 6.168303 6.396225
kmeans_res_clusters <- z_mat_clut_clid_ls[[EGFR_TKI]]
# Subset dds to contain genes in the clustering analysis
cluster_vsd <-vsd[row.names(vsd) %in% row.names(kmeans_res_clusters),]
# Heatmap_GOI_df_ls
heatmap_goi <- kmeans_res_clusters
# Extract matrix values
mat_list <- as.matrix(assay(cluster_vsd))
# Heatmap annotation labels
annotation_col <- HeatmapAnnotation(df = data.frame(Time = cluster_vsd@colData$time_h),
col = list(Time=
c(
'0'='black',
'4'='blue',
'24'='purple',
'48'='orange'
)
))
# Generate Heatmap
Heatmap <- ComplexHeatmap::Heatmap(
t(scale(t(mat_list))),
top_annotation = annotation_col,
name = " ",
column_split = cluster_vsd@colData$time_h,
column_order = colnames(cluster_vsd)[order(cluster_vsd@colData$time_h)],
column_title = paste0(EGFR_TKI," 3 µM"),
row_split = heatmap_goi[row.names(mat_list),]["label"] ,
row_gap = unit(1, "mm"),
show_column_names = F,
show_row_names = F,
width = ncol(mat_list)*unit(5, "mm")
)
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
draw(Heatmap)
# Get height and width of heatmap for pdf
width = convertX(ComplexHeatmap:::width(draw(Heatmap)), "inch", valueOnly = TRUE)
height = convertY(ComplexHeatmap:::height(draw(Heatmap)), "inch", valueOnly = TRUE)
pdf(file=paste0(figures_dir, EGFR_TKI, ".pdf"), width = width+(0.01*width), height = height+(0.01*height))
draw(Heatmap)
dev.off()
## png
## 2
Directories
fig.label <- paste0("mRNA_dynTKI_upset")
figures_dir <- paste0(Data,"Figures/",fig.label,"/")
dir.create(figures_dir)
z_mat_clut_clid_ls <- readRDS(tail(grep("z_mat_clut_clid_ls.rds",list.files(paste0(Data,"Output/mRNA_dynTKI/Sig_CLustering"), recursive = F, full.names = T), value = T),1))
# Extract Res group
z_mat_clut_clid_ls_res <- subset(z_mat_clut_clid_ls, names(z_mat_clut_clid_ls) %in% c("Afatinib", "Neratinib"))
# Extract genes in each cluster for all cell lines
df_cluster_summary <- data.frame()
for(i in seq_along(z_mat_clut_clid_ls_res)){
summary <- z_mat_clut_clid_ls_res[[i]] %>% summarise(label, gene=row.names(z_mat_clut_clid_ls_res[[i]]), Drug = paste0(names(z_mat_clut_clid_ls_res)[i]))
df_cluster_summary <- rbind(df_cluster_summary,summary)
}
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_cluster_summary$Drug.cluster <- paste0(df_cluster_summary$Drug,".",df_cluster_summary$label)
# List of vectors
cluster_vectors_ls <- list()
for(i in seq_along(unique(df_cluster_summary$Drug.cluster))){
cluster_vectors_ls[[unique(df_cluster_summary$Drug.cluster)[i]]] <-
subset(df_cluster_summary, df_cluster_summary$Drug.cluster==unique(df_cluster_summary$Drug.cluster)[i])$gene
}
# Find intersecting genes
intersect_genes <- c(intersect(cluster_vectors_ls$Afatinib.down,cluster_vectors_ls$Neratinib.up),intersect(cluster_vectors_ls$Afatinib.up,cluster_vectors_ls$Neratinib.down))
# Filter out intersecting genes
cluster_vectors_filt_ls <- lapply(cluster_vectors_ls, function(x) x[!(x %in% intersect_genes)])
# Directories
output_dir_upset <- paste0(figures_dir)
graph_dir_upset <- paste0(figures_dir)
# Convert list to binary matrix
Upset_matrix <- ComplexHeatmap::list_to_matrix(cluster_vectors_filt_ls)
# Save Upset_matrix
write.csv(Upset_matrix, file = paste0(output_dir_upset, "FullUpset_matrix",".csv"))
# convert list to combination matrix
combo_matrix <- ComplexHeatmap::make_comb_mat(Upset_matrix)
# Color by intersection
intersection_degree <- comb_degree(combo_matrix)
comb_col <- ifelse(intersection_degree > 1, "blue", "red")
# Generate upset plot
upset_plot <- ComplexHeatmap::UpSet(combo_matrix , comb_col=comb_col,
row_names_gp =gpar(fontsize = 12) ,
top_annotation = upset_top_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
annotation_name_rot = 90,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
right_annotation = upset_right_annotation(
combo_matrix,
add_numbers = TRUE,
numbers_gp = gpar(fontsize = 11),
numbers_rot=0,
axis_param=list (gp = gpar(fontsize = 11)),
annotation_name_gp= gpar(fontsize = 14)),
pt_size = unit(8, "pt"))
upset_plot
# Save upset_plot
pdf(file=paste0(graph_dir_upset, "mRNA_dynTKI_upset" , ".pdf"))
draw(upset_plot)
dev.off()
## png
## 2
# Combination matrix
combo_matrix
## A combination matrix with 4 sets and 6 combinations.
## ranges of combination set size: c(92, 980).
## mode for the combination size: distinct.
## sets are on rows.
##
## Combination sets are:
## Afatinib.down Afatinib.up Neratinib.down Neratinib.up code size
## x x 1010 844
## x x 0101 980
## x 1000 92
## x 0100 246
## x 0010 702
## x 0001 789
##
## Sets are:
## set size
## Afatinib.down 936
## Afatinib.up 1226
## Neratinib.down 1546
## Neratinib.up 1769
comb_name(combo_matrix)
## [1] "1010" "0101" "1000" "0100" "0010" "0001"
# Extract all from upset plot
upset_genes_ls <- list()
for(i in seq_along(comb_name(combo_matrix))){
upset_genes_ls[[paste0((set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[1],"_",(set_name(combo_matrix)["1"==strsplit(comb_name(combo_matrix)[i],"")[[1]]])[2])]] <-
extract_comb(combo_matrix,comb_name(combo_matrix)[i])
}
# Convert to DF
upset_genes_df <- do.call(cbind, lapply(upset_genes_ls, function(x) c(x, rep(NA, max(sapply(upset_genes_ls, length)) - length(x)))))
# Save genes
write.csv(upset_genes_df, file = paste0(output_dir_upset,"upset_genes_df",".csv"))
# Extract genes
mRNA_OL_down <- upset_genes_ls[["Afatinib.down_Neratinib.down"]]
mRNA_OL_up <- upset_genes_ls[["Afatinib.up_Neratinib.up"]]
mRNA_Afat_down <- upset_genes_ls[["Afatinib.down_NA"]]
mRNA_Afat_up <- upset_genes_ls[["Afatinib.up_NA"]]
mRNA_Ner_down <- upset_genes_ls[["Neratinib.down_NA"]]
mRNA_Ner_up <- upset_genes_ls[["Neratinib.up_NA"]]
# Save genes
write.csv(mRNA_OL_down, file = paste0(output_dir_upset,"mRNA_OL_down",".csv"))
write.csv(mRNA_OL_up, file = paste0(output_dir_upset,"mRNA_OL_up",".csv"))
write.csv(mRNA_Afat_down, file = paste0(output_dir_upset,"mRNA_Afat_down",".csv"))
write.csv(mRNA_Afat_up, file = paste0(output_dir_upset,"mRNA_Afat_up",".csv"))
write.csv(mRNA_Ner_down, file = paste0(output_dir_upset,"mRNA_Ner_down",".csv"))
write.csv(mRNA_Ner_up, file = paste0(output_dir_upset,"mRNA_Ner_up",".csv"))
proc.time() - ptm
## user system elapsed
## 57.502 0.849 65.044
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel grid stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] readxl_1.4.3 circlize_0.4.16
## [3] fgsea_1.28.0 ReportingTools_2.42.3
## [5] knitr_1.49 factoextra_1.0.7
## [7] cluster_2.1.8 gtools_3.9.5
## [9] limma_3.58.1 DEP_1.24.0
## [11] PCAtools_2.14.0 ggpubr_0.6.0
## [13] gprofiler2_0.2.3 vsn_3.70.0
## [15] ComplexHeatmap_2.18.0 UpSetR_1.4.0
## [17] msigdbr_7.5.1 GSVA_1.50.5
## [19] RSQLite_2.3.9 rmarkdown_2.29
## [21] gplots_3.2.0 EnhancedVolcano_1.20.0
## [23] ggrepel_0.9.6 org.Hs.eg.db_3.18.0
## [25] AnnotationDbi_1.64.1 BiocParallel_1.36.0
## [27] lubridate_1.9.4 forcats_1.0.0
## [29] stringr_1.5.1 purrr_1.0.4
## [31] readr_2.1.5 tidyr_1.3.1
## [33] tidyverse_2.0.0 pheatmap_1.0.12
## [35] RColorBrewer_1.1-3 colorRamps_2.3.4
## [37] cowplot_1.1.3 gridExtra_2.3
## [39] scales_1.3.0 ggplot2_3.5.1
## [41] DESeq2_1.42.1 SummarizedExperiment_1.32.0
## [43] MatrixGenerics_1.14.0 matrixStats_1.5.0
## [45] tximport_1.30.0 Biobase_2.62.0
## [47] dplyr_1.1.4 biomaRt_2.58.2
## [49] rtracklayer_1.62.0 GenomicRanges_1.54.1
## [51] GenomeInfoDb_1.38.8 IRanges_2.36.0
## [53] S4Vectors_0.40.2 BiocGenerics_0.48.1
## [55] tibble_3.2.1
##
## loaded via a namespace (and not attached):
## [1] R.methodsS3_1.8.2 dichromat_2.0-0.1
## [3] GSEABase_1.64.0 progress_1.2.3
## [5] nnet_7.3-20 DT_0.33
## [7] Biostrings_2.70.3 HDF5Array_1.30.1
## [9] vctrs_0.6.5 digest_0.6.37
## [11] png_0.1-8 shape_1.4.6.1
## [13] MSnbase_2.28.1 MASS_7.3-60
## [15] reshape2_1.4.4 foreach_1.5.2
## [17] httpuv_1.6.15 withr_3.0.2
## [19] xfun_0.51 survival_3.8-3
## [21] memoise_2.0.1 gmm_1.8
## [23] systemfonts_1.2.1 ragg_1.3.3
## [25] zoo_1.8-13 GlobalOptions_0.1.2
## [27] R.oo_1.27.0 Formula_1.2-5
## [29] prettyunits_1.2.0 GGally_2.2.1
## [31] KEGGREST_1.42.0 promises_1.3.2
## [33] httr_1.4.7 rstatix_0.7.2
## [35] restfulr_0.0.15 rhdf5filters_1.14.1
## [37] rhdf5_2.46.1 rstudioapi_0.17.1
## [39] PFAM.db_3.18.0 generics_0.1.3
## [41] base64enc_0.1-3 babelgene_22.9
## [43] curl_6.2.0 ncdf4_1.23
## [45] zlibbioc_1.48.2 ScaledMatrix_1.10.0
## [47] GenomeInfoDbData_1.2.11 SparseArray_1.2.4
## [49] RBGL_1.78.0 xtable_1.8-4
## [51] doParallel_1.0.17 evaluate_1.0.3
## [53] S4Arrays_1.2.1 BiocFileCache_2.10.2
## [55] preprocessCore_1.64.0 hms_1.1.3
## [57] irlba_2.3.5.1 colorspace_2.1-1
## [59] filelock_1.0.3 magrittr_2.0.3
## [61] Rgraphviz_2.46.0 later_1.4.1
## [63] lattice_0.22-6 MsCoreUtils_1.14.1
## [65] genefilter_1.84.0 XML_3.99-0.18
## [67] Hmisc_5.2-2 pillar_1.10.1
## [69] iterators_1.0.14 caTools_1.18.3
## [71] compiler_4.3.2 beachmat_2.18.1
## [73] stringi_1.8.4 Category_2.68.0
## [75] GenomicAlignments_1.38.2 tmvtnorm_1.6
## [77] plyr_1.8.9 crayon_1.5.3
## [79] abind_1.4-8 BiocIO_1.12.0
## [81] locfit_1.5-9.11 waldo_0.6.1
## [83] bit_4.5.0.1 sandwich_3.1-1
## [85] pcaMethods_1.94.0 fastmatch_1.1-6
## [87] codetools_0.2-20 textshaping_1.0.0
## [89] BiocSingular_1.18.0 bslib_0.9.0
## [91] biovizBase_1.50.0 GetoptLong_1.0.5
## [93] plotly_4.10.4 mime_0.12
## [95] splines_4.3.2 Rcpp_1.0.14
## [97] dbplyr_2.5.0 sparseMatrixStats_1.14.0
## [99] cellranger_1.1.0 blob_1.2.4
## [101] clue_0.3-66 mzR_2.36.0
## [103] AnnotationFilter_1.26.0 mzID_1.40.0
## [105] checkmate_2.3.2 DelayedMatrixStats_1.24.0
## [107] ggsignif_0.6.4 Matrix_1.6-5
## [109] statmod_1.5.0 tzdb_0.4.0
## [111] pkgconfig_2.0.3 tools_4.3.2
## [113] cachem_1.1.0 viridisLite_0.4.2
## [115] DBI_1.2.3 impute_1.76.0
## [117] fastmap_1.2.0 shinydashboard_0.7.2
## [119] Rsamtools_2.18.0 broom_1.0.7
## [121] sass_0.4.9 BiocManager_1.30.25
## [123] ggstats_0.8.0 VariantAnnotation_1.48.1
## [125] graph_1.80.0 carData_3.0-5
## [127] rpart_4.1.24 farver_2.1.2
## [129] yaml_2.3.10 AnnotationForge_1.44.0
## [131] foreign_0.8-88 cli_3.6.4
## [133] lifecycle_1.0.4 mvtnorm_1.3-3
## [135] backports_1.5.0 annotate_1.80.0
## [137] timechange_0.3.0 gtable_0.3.6
## [139] rjson_0.2.23 jsonlite_1.9.0
## [141] edgeR_4.0.16 bitops_1.0-9
## [143] bit64_4.6.0-1 assertthat_0.2.1
## [145] jquerylib_0.1.4 dqrng_0.4.1
## [147] imputeLCMD_2.1 R.utils_2.12.3
## [149] lazyeval_0.2.2 shiny_1.10.0
## [151] htmltools_0.5.8.1 affy_1.80.0
## [153] GO.db_3.18.0 rappdirs_0.3.3
## [155] ensembldb_2.26.0 glue_1.8.0
## [157] XVector_0.42.0 RCurl_1.98-1.16
## [159] MALDIquant_1.22.3 BSgenome_1.70.2
## [161] R6_2.6.1 SingleCellExperiment_1.24.0
## [163] labeling_0.4.3 GenomicFeatures_1.54.4
## [165] ggbio_1.50.0 Rhdf5lib_1.24.2
## [167] DelayedArray_0.28.0 tidyselect_1.2.1
## [169] ProtGenerics_1.34.0 htmlTable_2.4.3
## [171] GOstats_2.68.0 xml2_1.3.6
## [173] car_3.1-3 rsvd_1.0.5
## [175] munsell_0.5.1 KernSmooth_2.23-26
## [177] affyio_1.72.0 data.table_1.16.4
## [179] htmlwidgets_1.6.4 hwriter_1.3.2.1
## [181] rlang_1.1.5 norm_1.0-11.1
## [183] OrganismDbi_1.44.0