BACK TO EXCERCISE 1

We will now have a look at a dataset created during a basic ecology course for bachelor students. The data describe feeding- and survival rates of Daphnia from two different lakes, during two different temperature conditions. The full code of the students data analysis is pasted below.

Download the dataset here

dafnia <- readRDS("dafnia.rds")
head(dafnia)
##   Temp Lake   Feeding  Survival
## 1   23   LM  23920000 0.9841270
## 2   23   LM  60720000 0.1428571
## 3   23   LM  44160000 0.7619048
## 4   23   LM  23000000 0.9166667
## 5   23   LM 110400000 0.9545455
## 6   30   LM -13340000 0.6000000

Feeding Rate

  1. Create data summary
SumTable <- aggregate(dafnia$Feeding,
                      by = list(Lake = dafnia$Lake,
                                Temp = dafnia$Temp),
                      FUN = function(x) c(mean = mean(x),
                                          sd = sd(x),
                                          n = length(x)))
head(SumTable)
##   Lake Temp   x.mean     x.sd      x.n
## 1   LM   23 52440000 35974235        5
## 2   LS   23 93380000 35603723        4
## 3   LM   30 26404000 30647187        5
## 4   LS   30 36892000 36445240        5
  1. Format the table
SumTable <- do.call(data.frame, SumTable)
SumTable$se <- SumTable$x.sd / sqrt(SumTable$x.n)
colnames(SumTable) <- c("Lake", "Temp", "mean", "sd", "n", "se")
SumTable
##   Lake Temp     mean       sd n       se
## 1   LM   23 52440000 35974235 5 16088167
## 2   LS   23 93380000 35603723 4 17801861
## 3   LM   30 26404000 30647187 5 13705839
## 4   LS   30 36892000 36445240 5 16298807
  1. Plot data
library("ggplot2")

ggplot(data = SumTable, aes(x = factor(Temp),
                            y = mean, fill = factor(Lake)))+
  geom_bar(stat = "identity",
           position = position_dodge(width =0.9)) +
  geom_errorbar(aes(ymax = SumTable$mean+SumTable$se,
                    ymin = SumTable$mean-SumTable$se),
                position = position_dodge(width = 0.9),
                width = 0.25) +
  labs(x = "Temperatur (grC)", y = "Ätna Celler") +
  scale_fill_manual(values=c("royalblue1", "peachpuff1"), name="Sjö")

Survival Rate

  1. Create data summary
SumTable <- aggregate(dafnia$Survival,
                      by = list(Lake = dafnia$Lake,
                                Temp = dafnia$Temp),
                      FUN = function(x) c(mean = mean(x),
                                          sd = sd(x),
                                          n = length(x)))
head(SumTable)
##   Lake Temp    x.mean      x.sd       x.n
## 1   LM   23 0.7520202 0.3511324 5.0000000
## 2   LS   23 0.4500000 0.3915780 4.0000000
## 3   LM   30 0.4838574 0.4021670 5.0000000
## 4   LS   30 0.1100000 0.1673320 5.0000000
  1. Format the table
SumTable <- do.call(data.frame, SumTable)
SumTable$se <- SumTable$x.sd / sqrt(SumTable$x.n)
colnames(SumTable) <- c("Lake", "Temp", "mean", "sd", "n", "se")
SumTable
##   Lake Temp      mean        sd n         se
## 1   LM   23 0.7520202 0.3511324 5 0.15703118
## 2   LS   23 0.4500000 0.3915780 4 0.19578900
## 3   LM   30 0.4838574 0.4021670 5 0.17985453
## 4   LS   30 0.1100000 0.1673320 5 0.07483315
  1. Plot data
library("ggplot2")

ggplot(data = SumTable, aes(x = factor(Temp),
                            y = mean, fill = factor(Lake)))+
  geom_bar(stat = "identity",
           position = position_dodge(width =0.9)) +
  geom_errorbar(aes(ymax = SumTable$mean+SumTable$se,
                    ymin = SumTable$mean-SumTable$se),
                position = position_dodge(width = 0.9),
                width = 0.25) +
  labs(x = "Temperatur (grC)", y = "Överlevande") +
  scale_fill_manual(values=c("royalblue1", "peachpuff1"), name="Sjö")

1. Make code nicer by structuring the code in functions.

As you can see, the pretty long piece of code is repeated two times. Is it possible to restructure the code in functions, to avoid repetition, and make the code smaller?

2. Custumize the function

As a last step, try to generalize this function as much as you can. Can you make a function, or a set of function that creates any kind of bar plot in ggplot? Lets see how far we can go!

BACK TO EXCERCISE 1

3. Possible solution:

sumtable <- function(data = dafnia, y = "Feeding", x1 = "Lake", x2 = "Temp") {
  SumTable <- aggregate(data[[y]],
                      by = list(x1 = data[[x1]],
                                x2 = data[[x2]]),
                      FUN = function(x) c(mean = mean(x),
                                          sd = sd(x),
                                          n = length(x)))
  
  SumTable <- do.call(data.frame, SumTable)
  SumTable$se <- SumTable$x.sd / sqrt(SumTable$x.n)
  colnames(SumTable) <- c(x1, x2, "mean", "sd", "n", "se")
  return(SumTable)
  
}
plotbar <- function(data = dafnia, y = "Feeding", x = "Lake", fill = "Temp", ...) {
  require("ggplot2")
  data <-sumtable(data = data, y = y, x1 = x, x2 = fill)
  plot <- ggplot(data, aes(x = factor(data[[x]]),
                              y = mean, fill = factor(data[[fill]])))+
    geom_bar(stat = "identity",
             position = position_dodge(width =0.9), ...) +
    geom_errorbar(aes(ymax = data$mean+data$se,
                      ymin = data$mean-data$se),
                  position = position_dodge(width = 0.9),
                  width = 0.25) +
    labs(x = x, y = y) +
    scale_fill_manual(values = c("royalblue1", "peachpuff1"), name = fill)
    
  #return(data)
  return(plot)
}
plotbar(data = dafnia, y = "Feeding")

plotbar(data = dafnia, y = "Survival")