We will now have a look at a dataset created during a basic ecology course for bachelor students. The data describe feeding- and survival rates of Daphnia from two different lakes, during two different temperature conditions. The full code of the students data analysis is pasted below.
dafnia <- readRDS("dafnia.rds")
head(dafnia)
## Temp Lake Feeding Survival
## 1 23 LM 23920000 0.9841270
## 2 23 LM 60720000 0.1428571
## 3 23 LM 44160000 0.7619048
## 4 23 LM 23000000 0.9166667
## 5 23 LM 110400000 0.9545455
## 6 30 LM -13340000 0.6000000
SumTable <- aggregate(dafnia$Feeding,
by = list(Lake = dafnia$Lake,
Temp = dafnia$Temp),
FUN = function(x) c(mean = mean(x),
sd = sd(x),
n = length(x)))
head(SumTable)
## Lake Temp x.mean x.sd x.n
## 1 LM 23 52440000 35974235 5
## 2 LS 23 93380000 35603723 4
## 3 LM 30 26404000 30647187 5
## 4 LS 30 36892000 36445240 5
SumTable <- do.call(data.frame, SumTable)
SumTable$se <- SumTable$x.sd / sqrt(SumTable$x.n)
colnames(SumTable) <- c("Lake", "Temp", "mean", "sd", "n", "se")
SumTable
## Lake Temp mean sd n se
## 1 LM 23 52440000 35974235 5 16088167
## 2 LS 23 93380000 35603723 4 17801861
## 3 LM 30 26404000 30647187 5 13705839
## 4 LS 30 36892000 36445240 5 16298807
library("ggplot2")
ggplot(data = SumTable, aes(x = factor(Temp),
y = mean, fill = factor(Lake)))+
geom_bar(stat = "identity",
position = position_dodge(width =0.9)) +
geom_errorbar(aes(ymax = SumTable$mean+SumTable$se,
ymin = SumTable$mean-SumTable$se),
position = position_dodge(width = 0.9),
width = 0.25) +
labs(x = "Temperatur (grC)", y = "Ätna Celler") +
scale_fill_manual(values=c("royalblue1", "peachpuff1"), name="Sjö")
SumTable <- aggregate(dafnia$Survival,
by = list(Lake = dafnia$Lake,
Temp = dafnia$Temp),
FUN = function(x) c(mean = mean(x),
sd = sd(x),
n = length(x)))
head(SumTable)
## Lake Temp x.mean x.sd x.n
## 1 LM 23 0.7520202 0.3511324 5.0000000
## 2 LS 23 0.4500000 0.3915780 4.0000000
## 3 LM 30 0.4838574 0.4021670 5.0000000
## 4 LS 30 0.1100000 0.1673320 5.0000000
SumTable <- do.call(data.frame, SumTable)
SumTable$se <- SumTable$x.sd / sqrt(SumTable$x.n)
colnames(SumTable) <- c("Lake", "Temp", "mean", "sd", "n", "se")
SumTable
## Lake Temp mean sd n se
## 1 LM 23 0.7520202 0.3511324 5 0.15703118
## 2 LS 23 0.4500000 0.3915780 4 0.19578900
## 3 LM 30 0.4838574 0.4021670 5 0.17985453
## 4 LS 30 0.1100000 0.1673320 5 0.07483315
library("ggplot2")
ggplot(data = SumTable, aes(x = factor(Temp),
y = mean, fill = factor(Lake)))+
geom_bar(stat = "identity",
position = position_dodge(width =0.9)) +
geom_errorbar(aes(ymax = SumTable$mean+SumTable$se,
ymin = SumTable$mean-SumTable$se),
position = position_dodge(width = 0.9),
width = 0.25) +
labs(x = "Temperatur (grC)", y = "Överlevande") +
scale_fill_manual(values=c("royalblue1", "peachpuff1"), name="Sjö")
As you can see, the pretty long piece of code is repeated two times. Is it possible to restructure the code in functions, to avoid repetition, and make the code smaller?
As a last step, try to generalize this function as much as you can. Can you make a function, or a set of function that creates any kind of bar plot in ggplot? Lets see how far we can go!
sumtable <- function(data = dafnia, y = "Feeding", x1 = "Lake", x2 = "Temp") {
SumTable <- aggregate(data[[y]],
by = list(x1 = data[[x1]],
x2 = data[[x2]]),
FUN = function(x) c(mean = mean(x),
sd = sd(x),
n = length(x)))
SumTable <- do.call(data.frame, SumTable)
SumTable$se <- SumTable$x.sd / sqrt(SumTable$x.n)
colnames(SumTable) <- c(x1, x2, "mean", "sd", "n", "se")
return(SumTable)
}
plotbar <- function(data = dafnia, y = "Feeding", x = "Lake", fill = "Temp", ...) {
require("ggplot2")
data <-sumtable(data = data, y = y, x1 = x, x2 = fill)
plot <- ggplot(data, aes(x = factor(data[[x]]),
y = mean, fill = factor(data[[fill]])))+
geom_bar(stat = "identity",
position = position_dodge(width =0.9), ...) +
geom_errorbar(aes(ymax = data$mean+data$se,
ymin = data$mean-data$se),
position = position_dodge(width = 0.9),
width = 0.25) +
labs(x = x, y = y) +
scale_fill_manual(values = c("royalblue1", "peachpuff1"), name = fill)
#return(data)
return(plot)
}
plotbar(data = dafnia, y = "Feeding")
plotbar(data = dafnia, y = "Survival")