GitHub Repository for this Script
OrgMassSpecR Homepage

Heat maps and hierarchical clustering concisely display and compare sample contaminant profiles by similarity. They can also be used as a quality control step to check for missing or otherwise unexpected data. The following examples rely on the lattice and latticeExtra packages.

library(lattice)
library(latticeExtra)
## Loading required package: RColorBrewer

1 Basic Linear Scale

Here, non-detects were set to a value of zero. Missing values (“No IS”) were set to NA and appear as white (empty) cells. Compounds with very low values are binned as having apparent values of zero (blue).

# Read data
x <- read.csv("Example 1 Data.csv", stringsAsFactors = FALSE)

# Set up matrix by moving sample codes from first column to row names
sampleCodes <- x[, 1] # Grab sample codes
heatmapMatrix <- data.matrix(x[, -1]) # Remove first column
rownames(heatmapMatrix) <- sampleCodes # Assign as row names

# Clustering
# heatmapMatrix <- t(heatmapMatrix) # To flip the figure, if needed
rowDendrogram <- as.dendrogram(hclust(dist(heatmapMatrix)))
rowOrder <- order.dendrogram(rowDendrogram)
columnDendrogram <- as.dendrogram(hclust(dist(t(heatmapMatrix))))
columnOrder <- order.dendrogram(columnDendrogram)

# Plot
lattice.options(axis.padding=list(factor = 0.5)) # Sets padding between heatmap and box

p1 <- levelplot(heatmapMatrix[rowOrder, columnOrder],
                aspect = "fill", 
                scales = list(x = list(rot = 90), tck = c(1,0)),
                # Specify key color ramp
                colorkey = list(space = "left", 
                                col = colorRampPalette(c("blue", "red", "yellow"), 
                                                       space = "Lab")(100),
                                height = 0.75),
                # Specify plot color ramp (same as key)
                col.regions = colorRampPalette(c("blue", "red", "yellow"), 
                                               space = "Lab")(100),
                xlab = NULL, ylab = NULL, pretty = TRUE, 
                main = "Byproduct ratios in treated water samples",
                # Set range and number of breakpoints in intensity data,
                # length.out should match the number of number of colors
                # specified for the ke and plot.
                at = seq(0, 4500, length.out = 100),
                # Set dendogram position
                par.settings = list(layout.widths = list(axis.key.padding = -1.5),
                                    layout.heights = list(key.axis.padding = -1.5)),
                # Draw dendograms
                legend =
                  list(right = list(fun = dendrogramGrob,
                                    args = list(x = columnDendrogram, 
                                                side = "right", 
                                                size = 5)),                      
                       top = list(fun = dendrogramGrob,
                                  args = list(x = rowDendrogram, 
                                              side = "top", 
                                              size = 5)))
)

print(p1)

2 Linear Scale Emphasizing Non-Detects

In this example the color ramp is modified to highlight the difference between detects and non-detects.

p2 <- levelplot(heatmapMatrix[rowOrder, columnOrder],
          aspect = "fill", scales = list(x = list(rot = 90), tck = c(1,0)),
          colorkey = list(space = "left", 
                          col = colorRampPalette(c("blue", "red", "yellow"), 
                                                 space = "Lab")(10),
                          height = 0.75),
          col.regions = colorRampPalette(c("blue", "red", "yellow"), 
                                         space = "Lab")(10),
          xlab = NULL, ylab = NULL, cuts = 10, pretty = TRUE,
          main = "Byproduct ratios in treated water samples",
          par.settings = list(layout.widths = list(axis.key.padding = -1.5),
                              layout.heights = list(key.axis.padding = -1.5)),
          legend =
            list(right = list(fun = dendrogramGrob,
                              args = list(x = columnDendrogram, 
                                          side = "right", 
                                          size = 5)),                      
                 top = list(fun = dendrogramGrob,
                            args = list(x = rowDendrogram, 
                                        side = "top", 
                                        size = 5)))
)

print(p2)

3 Log Scale

This example illustrates generation of a log-scale heat map. Since the data contains non-detects that are set to zero, and taking log10(0) is not possible, the data is scaled by log10(x + 0.0001). The plot scale is then adjusted to show the original values. The example also illustrates:

# Read data
x <- read.csv("Example 2 Data.csv", stringsAsFactors = FALSE, na.strings = c("", "NA"))

# Subset data and prepate sample names
x <- x[x$Category1 == "natural", ]
x$SampleCode <- paste(x$Ecotype, " (", substr(x$Sample, (nchar(x$Sample) - 2) + 1, nchar(x$Sample)), ")", sep = "")
heatmapData <- x[, c("SampleCode", "Compound", "RelativeArea")]

# Convert to log scale
heatmapData$RelativeArea <- log10(heatmapData$RelativeArea + 0.0001) 

# Convert to wide format
heatmapData <- reshape(heatmapData,
                       v.names = "RelativeArea",
                       idvar = "SampleCode",
                       timevar = "Compound",
                       direction = "wide")

heatmapMatrix <- data.matrix(heatmapData[, -1]) # Remove first column

# Convert non-detects
heatmapMatrix[is.na(heatmapMatrix)] <- -4

# Prepare row and column names
rownames(heatmapMatrix) <- heatmapData$SampleCode
colnames(heatmapMatrix) <- sapply(strsplit(colnames(heatmapMatrix), split = ".", fixed = TRUE), "[[", j = 2)

# Clustering
heatmapMatrix <- t(heatmapMatrix)
rowDendrogram <- as.dendrogram(hclust(dist(heatmapMatrix)))
rowOrder <- order.dendrogram(rowDendrogram)
columnDendrogram <- as.dendrogram(hclust(dist(t(heatmapMatrix))))
columnOrder <- order.dendrogram(columnDendrogram)

lattice.options(axis.padding=list(factor = 0.5))

p3 <- levelplot(heatmapMatrix[rowOrder, columnOrder],
          aspect = "fill", 
          scales = list(x = list(rot = 90), tck = c(1,0)),
          # scales = list(x = list(draw = FALSE)), # To remove axis if needed
          colorkey = list(space = "left",
                          col = colorRampPalette(c("blue", "red", "yellow"), 
                                                 space = "Lab")(100),
                          height = 0.75,
                          labels = list(at = c(log10(0+0.0001),
                                               log10(0.001+0.0001),
                                               log10(0.01+0.0001),
                                               log10(0.1+0.0001),
                                               log10(1+0.0001),
                                               log10(10+0.0001)),
                                        labels = c(0, 0.001, 0.01, 0.1, 1, 10)
                          )
          ),
          col.regions = colorRampPalette(c("blue", "red", "yellow"), space = "Lab")(100),
          xlab = NULL, ylab = NULL, 
          main = "Relative abundance of natural products in dolphin ecotypes",
          at = seq(log10(0.0001), log10(10 + 0.0001), length.out = 100),
          par.settings = list(layout.widths = list(axis.key.padding = -1.5),
                              layout.heights = list(key.axis.padding = -1.5)),
          legend =
            list(right = list(fun = dendrogramGrob,
                              args = list(x = columnDendrogram, side = "right", size = 5)),
                 top = list(fun = dendrogramGrob,
                            args = list(x = rowDendrogram, side = "top", size = 5)))
)

print(p3)

# Extract code
# library(knitr)
# purl("HeatMaps.Rmd")