Analysis of the Heart Disease Dataset

Load the data from here, and the description is here. The original dataset comes from here and corresponds to the processed cleveland data

Perform an EDA on the dataset

#Load the dataset.
heart_df <- read.csv("data/heart_disease_dataset.csv",sep = " ")
colnames(heart_df) <- c("Age", "Sex", "ChestPainType","RestingBloodPressure", "Cholestherol", "FastingBloodSugar", "Resting_EC", "MaxHeartRate", "ExerciseInducedAngina","Peak","Slope","MajorVessels","Thalassemia", "Diagnosis","ID")

#Clean the data.
heart_df$Sex <- ifelse(heart_df$Sex == 1, "Male", "Female")
heart_df <- heart_df %>% 
  mutate(ChestPainType = recode(ChestPainType, '1' = "TypicalAngina", '2' = "AtypicalAngina", '3' = "NonAnginal", '4' = "Asymptomatic")) %>% 
  mutate(FastingBloodSugar  = recode (FastingBloodSugar, '1' = '>120', '0' = '<120')) %>% 
  mutate(Resting_EC = recode (Resting_EC, '0' = 'Normal', '1'='ST-T', '2'= 'P.V.Hyperthrophy')) %>% 
  mutate(ExerciseInducedAngina = recode (ExerciseInducedAngina, '0' = 'No', '1'='Yes')) %>% 
  mutate(Slope = recode (Slope, '1' = 'UpSloping', '2'='Flat', '3'= 'DownSlopping')) %>% 
  mutate(Thalassemia = recode (Thalassemia, '7' = 'ReversableDefect', '6'='FixedDefect', '3'= 'Normal','?' = NA_character_)) %>% 
  mutate(Diagnosis = recode (Diagnosis, '0' = 'NoDisease', '1'='Disease', '2'='Disease', '3'='Disease','4'='Disease')) %>% 
  mutate(MajorVessels = if_else(MajorVessels == '?', NA_character_, MajorVessels))

# Create boxplots using ggplot2 (with outliers).
plot1 <- ggplot(heart_df, aes(x = "", y = Age)) +
  geom_boxplot() +
  labs(title = "Boxplot for Age", y = "Age Values") +
plot2 <- ggplot(heart_df, aes(x = "", y = Cholestherol)) +
  geom_boxplot() +
  labs(title = "Boxplot for Cholestherol", y = "Cholestherol Values") +
plot3 <- ggplot(heart_df, aes(x = "", y = MaxHeartRate)) +
  geom_boxplot() +
  labs(title = "Boxplot for MaxHeartRate", y = "MaxHeartRate Values") +
plot4 <- ggplot(heart_df, aes(x = "", y = RestingBloodPressure)) +
  geom_boxplot() +
  labs(title = "Boxplot for RestingBloodPressure", y = "RestingBloodPressure Values") +
plot5 <- ggplot(heart_df, aes(x = "", y = Peak)) +
  geom_boxplot() +
  labs(title = "Boxplot for Peak", y = "Peak Values") +

plot_grid(plot1, plot2, plot3, plot4, plot5, ncol = 3)

#Remove outliers.
remove_outliers_column <- function(x) {
  iqr_val <- IQR(x, na.rm = TRUE)
  lower_bound <- quantile(x, 0.25, na.rm = TRUE) - 1.5 * iqr_val
  upper_bound <- quantile(x, 0.75, na.rm = TRUE) + 1.5 * iqr_val
  x[x < lower_bound] <- lower_bound
  x[x > upper_bound] <- upper_bound
heart_df_filtered <- heart_df
columns_to_filter <- c("Age", "Cholestherol", "MaxHeartRate", "RestingBloodPressure", "Peak")

for (col in columns_to_filter) {
  heart_df_filtered[[col]] <- remove_outliers_column(heart_df[[col]])

# Create boxplots using ggplot2 (without outliers).
plot1_filtered <- ggplot(heart_df_filtered, aes(x = "", y = Age, fill = "Age")) +
  geom_boxplot() +
  labs(title = "Boxplot for Age (Filtered)", y = "Age Values") +
plot2_filtered <- ggplot(heart_df_filtered, aes(x = "", y = Cholestherol, fill = "Cholestherol")) +
  geom_boxplot() +
  labs(title = "Boxplot for Cholestherol (Filtered)", y = "Cholestherol Values") +
plot3_filtered <- ggplot(heart_df_filtered, aes(x = "", y = MaxHeartRate, fill = "MaxHeartRate")) +
  geom_boxplot() +
  labs(title = "Boxplot for MaxHeartRate (Filtered)", y = "MaxHeartRate Values") +
plot4_filtered <- ggplot(heart_df_filtered, aes(x = "", y = RestingBloodPressure, fill = "RestingBloodPressure")) +
  geom_boxplot() +
  labs(title = "Boxplot for RestingBloodPressure (Filtered)", y = "RestingBloodPressure Values") +
plot5_filtered <- ggplot(heart_df_filtered, aes(x = "", y = Peak, fill = "Peak")) +
  geom_boxplot() +
  labs(title = "Boxplot for Peak (Filtered)", y = "Peak Values") +
plot_grid(plot1_filtered, plot2_filtered, plot3_filtered, plot4_filtered, plot5_filtered, ncol = 3)

# Create densityplots.
plot_Age <- ggplot(data = heart_df_filtered, aes( x = Age)) + 
  geom_histogram(aes(y = ..density..), fill = 'deepskyblue1', color = 'black' ,binwidth = 5) +
  geom_density(fill = '#F0FFFF', alpha = 0.5) 
plot_Cholestherol <- ggplot(data = heart_df_filtered, aes( x = Cholestherol)) + 
  geom_histogram(aes(y = ..density..), fill = 'deepskyblue1', color = 'black' ,binwidth = 5) +
  geom_density(fill = '#F0FFFF', alpha = 0.5) 
plot_MaxHeartRate <- ggplot(data = heart_df_filtered, aes( x = MaxHeartRate)) + 
  geom_histogram(aes(y = ..density..), fill = 'deepskyblue1', color = 'black' , binwidth = 5) +
  geom_density(fill = '#F0FFFF', alpha = 0.5) 
plot_RestingBloodPressure <- ggplot(data = heart_df_filtered, aes( x = RestingBloodPressure)) + 
  geom_histogram(aes(y = ..density..), fill = 'deepskyblue1', color = 'black' , binwidth = 5) +
  geom_density(fill = '#F0FFFF', alpha = 0.5) 
plot_Peak <- ggplot(data = heart_df_filtered, aes( x = Peak)) + 
  geom_histogram(aes(y = ..density..), fill = 'deepskyblue1', color = 'black', binwidth = 0.1) +
  geom_density(fill = '#F0FFFF', alpha = 0.5) 

plot_grid(plot_Age,plot_Cholestherol,plot_MaxHeartRate,plot_RestingBloodPressure,plot_Peak, ncol = 3)
qq_plot_Age <- ggplot(data = heart_df_filtered, aes(sample = Age)) +
  stat_qq(distribution = qnorm, color = 'deepskyblue1') +
  stat_qq_line(distribution = qnorm, color = 'red') +
  ggtitle("Q-Q Plot for Age")

# Create Q-Q plots.
qq_plot_Cholestherol <- ggplot(data = heart_df_filtered, aes(sample = Cholestherol)) +
  stat_qq(distribution = qnorm, color = 'deepskyblue1') +
  stat_qq_line(distribution = qnorm, color = 'red') +
  ggtitle("Q-Q Plot for Cholestherol")
qq_plot_MaxHeartRate <- ggplot(data = heart_df_filtered, aes(sample = MaxHeartRate)) +
  stat_qq(distribution = qnorm, color = 'deepskyblue1') +
  stat_qq_line(distribution = qnorm, color = 'red') +
  ggtitle("Q-Q Plot for MaxHeartRate")
qq_plot_RestingBloodPressure <- ggplot(data = heart_df_filtered, aes(sample = RestingBloodPressure)) +
  stat_qq(distribution = qnorm, color = 'deepskyblue1') +
  stat_qq_line(distribution = qnorm, color = 'red') +
  ggtitle("Q-Q Plot for RestingBloodPressure")
qq_plot_Peak <- ggplot(data = heart_df_filtered, aes(sample = Peak)) +
  stat_qq(distribution = qnorm, color = 'deepskyblue1') +
  stat_qq_line(distribution = qnorm, color = 'red') +
  ggtitle("Q-Q Plot for Peak")

grid.arrange(qq_plot_Age, qq_plot_Cholestherol, qq_plot_MaxHeartRate, 
             qq_plot_RestingBloodPressure, qq_plot_Peak, nrow = 2)

#Set alpha at. 0.005

#Ho - Data follows a normal distribution.
#Ha - Data does not follow a normal distribution.
# Normality test for Age
shapiro_test_Age <- shapiro.test(heart_df_filtered$Age) #Can't reject Ho
cat("Shapiro-Wilk test for Age:\n", "W =", shapiro_test_Age$statistic, ", p-value =", shapiro_test_Age$p.value, "\n\n") 
## Shapiro-Wilk test for Age:
##  W = 0.9864633 , p-value = 0.006068642
# Normality test for Cholestherol
shapiro_test_Cholestherol <- shapiro.test(heart_df_filtered$Cholestherol) #Can't reject Ho
cat("Shapiro-Wilk test for Cholestherol:\n", "W =", shapiro_test_Cholestherol$statistic, ", p-value =", shapiro_test_Cholestherol$p.value, "\n\n")
## Shapiro-Wilk test for Cholestherol:
##  W = 0.9888511 , p-value = 0.02006615
# Normality test for MaxHeartRate
shapiro_test_MaxHeartRate <- shapiro.test(heart_df_filtered$MaxHeartRate) #Reject Ho
cat("Shapiro-Wilk test for MaxHeartRate:\n", "W =", shapiro_test_MaxHeartRate$statistic, ", p-value =", shapiro_test_MaxHeartRate$p.value, "\n\n")
## Shapiro-Wilk test for MaxHeartRate:
##  W = 0.9765386 , p-value = 7.244153e-05
# Normality test for RestingBloodPressure
shapiro_test_RestingBloodPressure <- shapiro.test(heart_df_filtered$RestingBloodPressure) #Reject Ho
cat("Shapiro-Wilk test for RestingBloodPressure:\n", "W =", shapiro_test_RestingBloodPressure$statistic, ", p-value =", shapiro_test_RestingBloodPressure$p.value, "\n\n")
## Shapiro-Wilk test for RestingBloodPressure:
##  W = 0.9745627 , p-value = 3.309576e-05
# Normality test for Peak
shapiro_test_Peak <- shapiro.test(heart_df_filtered$Peak) # Reject Ho
cat("Shapiro-Wilk test for Peak:\n", "W =", shapiro_test_Peak$statistic, ", p-value =", shapiro_test_Peak$p.value, "\n\n")
## Shapiro-Wilk test for Peak:
##  W = 0.8517292 , p-value = 2.155173e-16
#Barplots for categorical variables. 
plot_sex <- ggplot(heart_df_filtered, aes(x=Sex, fill=as.factor(Sex))) + 
  geom_bar() +
  scale_fill_manual(values = c("#FFB6C1", "#87CEFA") ) +
plot_chest_pain <- ggplot(heart_df_filtered, aes(x = ChestPainType, fill = ChestPainType)) +
  geom_bar() +
  scale_fill_brewer(palette = "Set2") +  
  theme_minimal() +  
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(hjust = 0.5)  
  ) +
plot_fasting_blood_sugar <- ggplot(heart_df_filtered, aes(x = FastingBloodSugar, fill = FastingBloodSugar)) +
  geom_bar() +
  scale_fill_brewer(palette = "Set2") +  
  theme_minimal() +  
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(hjust = 0.5)  
  ) +
plot_exercise_Resting_EC <- plot_resting_ec <- ggplot(heart_df_filtered, aes(x = Resting_EC, fill = Resting_EC)) +
  geom_bar() +
  scale_fill_brewer(palette = "Set2") +  
  theme_minimal() +  
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(hjust = 0.5)  
  ) +
plot_exercise_induced_angina <- ggplot(heart_df_filtered, aes(x = ExerciseInducedAngina, fill = ExerciseInducedAngina)) +
  geom_bar() +
  scale_fill_manual(values = c("#CD0000", "#00CD00") ) +  
  theme_minimal() +  
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(hjust = 0.5)  
  ) +
plot_slope <- ggplot(heart_df_filtered, aes(x = Slope, fill = Slope)) +
  geom_bar() +
  scale_fill_manual(values = c("#EE5C42", "#00E5EE", "#00EE76") ) +  
  theme_minimal() +  
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(hjust = 0.5)  
  ) +
plot_major_vessels <- ggplot(heart_df_filtered, aes(x = MajorVessels, fill = MajorVessels)) +
  geom_bar() +
  scale_fill_brewer(palette = "Set7") +  
  theme_minimal() +  
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(hjust = 0.5)  
  ) +
plot_Thalassemia <- ggplot(heart_df_filtered, aes(x=Thalassemia, fill=as.factor(Thalassemia))) + 
  geom_bar() +
  scale_fill_brewer(palette = "Set3") +
  theme(legend.position="none") +
plot_diagnosis <- ggplot(heart_df_filtered, aes(x=Diagnosis, fill=as.factor(Diagnosis))) + 
  geom_bar() +
  scale_fill_manual(values = c("#CD0000", "#00CD00") ) +
plot_grid(plot_sex, plot_chest_pain, plot_fasting_blood_sugar, plot_exercise_Resting_EC, plot_exercise_induced_angina, plot_slope, plot_major_vessels, plot_Thalassemia, plot_diagnosis, ncol = 3)

Create visualizations in order to show which variables seem to be more associated with heart disease

# List of variables to plot
variables_to_plot <- c( "Sex", "ChestPainType", "FastingBloodSugar", "Resting_EC", "ExerciseInducedAngina","Slope","MajorVessels","Thalassemia")

# Function to create a bar plot
create_bar_plot <- function(variable) {
  ggplot(heart_df_filtered, aes_string(x = variable, fill = "Diagnosis")) +
    geom_bar(position = "dodge") +
    labs(title = paste(variable, "vs Diagnosis"),
         x = variable,
         y = "Count") +

# Creating plots using a loop
barplots_2vars <- lapply(variables_to_plot, create_bar_plot)
# Displaying all plots in a grid
plot_grid(plotlist = barplots_2vars, ncol = 2)

# List of continuous variables
continuous_vars <- c("Age","RestingBloodPressure", "Cholestherol", "MaxHeartRate","Peak")

# Function to create a density plot
create_density_plot <- function(variable) {
  ggplot(heart_df_filtered, aes_string(x = variable, fill = "Diagnosis")) +
    geom_density(alpha = 0.5) +
    labs(title = paste("Density of", variable, "vs Diagnosis"),
         x = variable,
         y = "Density") +

# Creating density plots using a loop
density_plots <- lapply(continuous_vars, create_density_plot)

# Displaying all density plots in a grid
plot_grid(plotlist = density_plots, ncol = 2)

# Function to create a violin plot
create_violin_plot <- function(variable) {
  ggplot(heart_df_filtered, aes_string(y = variable, x = "Diagnosis", fill = "Diagnosis")) +
    geom_violin(width = 1.4) +
    geom_boxplot(width = 0.1, color = "grey", alpha = 0.2) +
    scale_fill_viridis(discrete = TRUE) +
    theme_ipsum() +
    theme(legend.position = "none", plot.title = element_text(size = 11)) +
    ggtitle(paste(variable, "vs Diagnosis"))

# Creating violin plots using a loop
violin_plots <- lapply(continuous_vars, create_violin_plot)

# Displaying all violin plots in a grid
plot_grid(plotlist = violin_plots, ncol = 2)
#Since the number of males is higher of the females this data can lead to biased assumptions. Even thoguh we have found that Heart Disease is more prevalent in men than women in this dataset. Most of the disease group reported asymptomatic chestpain. Thalassemia patients with thalassemia defects are also more prone to have Heart Disease. Non anginal chest pain has less proportion of disease patients than typical anginal. Increased Number of major vessels led to higher proportion of disease, since it means that the major vessels diameter is narrower. The group of Patients that experienced exercise induced angina has a larger proportion of disease than those who didn’t.

We found that Age and Maximum Heart Rate are the variables that could be more related to a higher risk of hearth disease. Cholesterol is also distributed differently between Disease and no Disease groups. Furthermore Peak variable, which is the magnitude of the depression of the ST segment, is also found to be higher on Disease group.

2 Difference in mortality rates in hospitalized COVID-19 patients

Using the supplementary material from the Difference in mortality rates in hospitalized COVID-19 patients identified by cytokine profile clustering using a machine learning approach: An outcome prediction alternative, perform the following tasks

Reproduce Figure 1 from the publication

#Open supplementary tables. Make the first Row the column names. 
table1 <- read.xlsx("data/Table_1.xlsx" , startRow = 2)
table2 <- read.xlsx("data/Table_2.xlsx" , startRow = 2)

#Clean Table 1:
# Remove rows with NA in the ID column and extract unique records
unique_table1 <- table1 %>%
  filter(! %>%
  distinct(ID, .keep_all = TRUE) %>%
  select(ID, Age, Use.of.NIV, Use.of.AMV, ARDS.Diagnosis, Death)
# Delete rows with numbers in certain columns and with characters in Age
unique_table1.2 <- unique_table1 %>%
  filter(!grepl("^[0-9]+$", Use.of.NIV), 
         !grepl("^[0-9]+$", Use.of.AMV), 
         !grepl("^[0-9]+$", ARDS.Diagnosis), 
         !grepl("^[0-9]+$", Death),
         !grepl("^[A-Za-z]+$", Age))

#Cleaning Table 2:
#Change the first column name of "Table 2" to "ID"
colnames(table2)[1] <- "ID"
# Remove rows with NA in the ID column and extract unique records.
table2 <- table2 %>%   
  filter(! %>%
  distinct(ID, .keep_all = TRUE) %>%

# Define the columns to check as strings
columns_to_check <- c("IL-6", "CXCL10", "IL-38", "IL-8", "IFN-ɑ", "IL-10", "TNF-ɑ", "CCL2", "CCL3", "IFN-γ", "IL-1β", "G-CSF")

# Delete rows with characters in specified columns
unique_table2 <- table2 %>%
  filter(!if_any(all_of(columns_to_check), ~grepl("^[A-Za-z]+$", .)))

# Delete the rows with "NI" in specified columns
unique_table2.2 <- unique_table2 %>%
  filter(!if_any(columns_to_check, ~grepl("NI", .)))
# Identify common IDs in both tables
common_ids <- intersect(unique_table1.2$ID, unique_table2.2$ID)

# Filter tables to keep only rows with common IDs
filtered_table1 <- unique_table1.2 %>% filter(ID %in% common_ids)
filtered_table2 <- unique_table2.2 %>% filter(ID %in% common_ids)

# Make the classification column with groups "G1, G2, G3, G4" based on expert-based criteria.
table1.classification <- filtered_table1 %>%
  mutate(Classification = case_when(
    Use.of.NIV == "No" & Use.of.AMV == "No" & ARDS.Diagnosis == "No" ~ "G1",
    ARDS.Diagnosis == "No" & (Use.of.NIV == "Yes" | Use.of.AMV == "Yes") ~ "G2",
    Use.of.NIV == "Yes" & Use.of.AMV == "No" & ARDS.Diagnosis == "Yes" ~ "G3",
    ARDS.Diagnosis == "Yes" & Use.of.AMV == "Yes" & (Use.of.NIV == "Yes" | Use.of.NIV == "No") ~ "G4",
    TRUE ~ NA_character_
  )) %>%
# Remove rows with NA in the Classification column

# Figure 1 A.
Fig1_a <- ggplot(table1.classification, aes(x = Age)) + 
  geom_histogram(binwidth = 10, fill = "lightblue", color = "black") +
  scale_y_continuous(limits = c(0, 50)) +
  scale_x_continuous(breaks = seq(20, 100, by = 10), limits = c(20, 100)) +
  labs(x = "Age (years)", y = "Frequency (n)", title = "") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## Warning: Removed 2 rows containing missing values (`geom_bar()`).

#Figure 1 B.
first_column <- c("G1", "G2", "G3","G4")
second_column <- c("-", "-/+", "+","-/+")
third_column <- c("-", "+/-", "-","+")
forth_column <- c("-", "-", "+","+")
table1_classification <- data.frame(first_column, second_column, third_column, forth_column)
names = c("Clinical\n Classification", "NIV", "AMV", "ARDS")
colnames(table1_classification) <- names
rownames(table1_classification) <- c(" ","  ","   ","    ")

Fig1_b <- grid.arrange(top="Definition of the Clincal Classification",tableGrob(table1_classification))

Fig1_b_grob <- as_grob(plot = Fig1_b)

#Figure 1 C.
classification_freq <- table(table1.classification$Classification)
classification_df <-
names(classification_df) <- c("Classification", "Frequency")

Fig1_c <- ggplot(classification_df, aes(x = Classification, y = Frequency, fill = Classification)) +
  geom_bar(stat = "identity", width = 0.7, color = "black") +  
  geom_text(aes(label = Frequency), vjust = -0.3, color = "black") +  
  scale_fill_manual(values = c("aquamarine3", "khaki1", "mediumpurple1", "salmon")) +
  labs(title = "Clinical Classification", x = "Clinical Classification", y = "Frequency (n)") +
  ylim(0, 90) +
  theme_minimal() +
  theme(legend.position = "none") 

#Figure 1 D.
Death_freq <- table(table1.classification$Death)
Death_df <-
names(Death_df) <- c("VitalStatus", "Frequency")

Fig1_d <- ggplot(Death_df, aes(x = VitalStatus, y = Frequency, fill = VitalStatus)) +
  geom_bar(stat = "identity", width = 0.7, color = "black") +  
  geom_text(aes(label = Frequency), vjust = -0.3, color = "black") +  
  scale_fill_manual(values = c("turquoise", "gold")) +
  labs(title = "Vital status", x = "Death", y = "Frequency (n)") +
  ylim(0, 150) +
  theme_minimal() +
  theme(legend.position = "none")


grid.arrange(Fig1_a, Fig1_b_grob, Fig1_c, Fig1_d, ncol = 2)
## Warning: Removed 2 rows containing missing values (`geom_bar()`).

Reproduce Figure 2 from the publication

but instead of representing the clusters in the annotation, represent the groups (G1 to G4)

# Select the needed columns and rename the labels to make them more easy to handle
unique_table2.2 <- unique_table2.2 %>% 
         ,`TNF-É‘`,`IL-8`,CCL2) %>% 
  rename_all(~ gsub("-", "", .)) %>%
  #Turn all non numeric values to NA

#Merge table 2 with classification from table 1 with ID.
table2_1_merged <- unique_table2.2 %>% 
  inner_join(table1.classification %>% select(ID, Classification), by = "ID")

#Define color pallette and assign a color to each group
color <- brewer.pal(4, "Set1")
classification_colors <- setNames(color, c("G1", "G2", "G3", "G4"))
classifications <- table2_1_merged$Classification
classification_colside <- classification_colors[classifications]

#Plot using Heatmap
heatmap_plot <- heatmap(
  as.matrix(t(table2_1_merged[, 2:13])),
  cexCol = 0.1,
  xlab = "Patients",
  labCol = FALSE,
  col = brewer.pal(9, "Oranges"),
  scale = "column",
  ColSideColors = classification_colside,

legend("topright",legend=c("G1", "G2", "G3", "G4"),fill=classification_colors)
legend(legend = c("0.00", "50.0%", "100.0%"), 
       fill = (brewer.pal(9,"Oranges")[c(1,5,9)]), title = "Relative\nExpression",
       x = 0, y = 0.3)

#When plotting without clusters, groups are not clustered since there is nothing in the data that relates the groups, for that reason we see the group label unsorted.

Improve figure 2 of the publication

Add a second annotation with information of deathm and a third one with information of gender

#Clean table.
unique_table1.3 <- table1 %>%
  filter(! %>%
  distinct(ID, .keep_all = TRUE)
#Merge by ID only columns of gender and death from Table 1.
table12_group_gender <- inner_join(
  unique_table1.3 %>% select(ID, Gender, Death),
  by = "ID"

#Remove numeric character and right strip space character. 
table12_group_gender <- table12_group_gender %>%
  filter(!grepl("\\d", Gender))

#Get numeric data for Heatmap.
numeric_data <- table12_group_gender[, -c(1, 14, 15, 16)]

gender_info <- table12_group_gender$Gender
death_info <- table12_group_gender$Death
classification_info <- table12_group_gender$Classification
gender_info <- replace(gender_info, gender_info =="F ", "F")

ha <- HeatmapAnnotation(
    Deaths = death_info , 
    Gender = gender_info,
    Groups = classification_info, 
    col = list(Deaths = c("Yes" = "black", "No" = "gray"),
               Groups = c("G1" = "red", "G2" = "green", "G3" = "blue", "G4" = "purple"),
               Gender = c("M" = "royalblue", "F" = "pink")

  name = "Relative\nexpression",
  column_title = "Patients",
  column_title_side = "bottom",
  top_annotation = ha,
  col = brewer.pal(9, "Oranges"),
  heatmap_legend_param = list(
    title = "Relative\nexpression",
    at = c(-2,1,4),  
    labels = c("0", "50%", "100%")

