These slides are viewed best by Chrome or Firefox and occasionally need to be refreshed if elements did not load properly. See here for the PDF .
Press the right arrow to progress to the next slide!
Lecturer: Emi Tanaka
ETC5521.Clayton-x@monash.edu
Week 6 - Session 2
data(gathmann.bt, package = "agridat")df1 <- gathmann.bt %>% pivot_longer(-gen, values_to = "abundance", names_to = "species") %>% mutate(species = case_when(species=="thysan" ~ "Thrip", TRUE ~ "Spider"))skimr::skim(df1)
## ── Data Summary ────────────────────────## Values## Name df1 ## Number of rows 32 ## Number of columns 3 ## _______________________ ## Column type frequency: ## character 1 ## factor 1 ## numeric 1 ## ________________________ ## Group variables None ## ## ── Variable type: character ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate min max empty n_unique whitespace## 1 species 0 1 5 6 0 2 0## ## ── Variable type: factor ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate ordered n_unique top_counts ## 1 gen 0 1 FALSE 2 Bt: 16, ISO: 16## ## ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist ## 1 abundance 0 1 6.01 6.36 0 0.6 2.5 10.7 18.4 ▇▂▃▁▂
ggplot(df1, aes(gen, abundance, color = gen)) + ggbeeswarm::geom_quasirandom(size = 3) + facet_wrap(~species, scales = "free") + scale_color_discrete_qualitative() + guides(color = FALSE) + labs(x = "", y = "Abundance") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
data(gathmann.bt, package = "agridat")df1 <- gathmann.bt %>% pivot_longer(-gen, values_to = "abundance", names_to = "species") %>% mutate(species = case_when(species=="thysan" ~ "Thrip", TRUE ~ "Spider"))skimr::skim(df1)
## ── Data Summary ────────────────────────## Values## Name df1 ## Number of rows 32 ## Number of columns 3 ## _______________________ ## Column type frequency: ## character 1 ## factor 1 ## numeric 1 ## ________________________ ## Group variables None ## ## ── Variable type: character ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate min max empty n_unique whitespace## 1 species 0 1 5 6 0 2 0## ## ── Variable type: factor ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate ordered n_unique top_counts ## 1 gen 0 1 FALSE 2 Bt: 16, ISO: 16## ## ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist ## 1 abundance 0 1 6.01 6.36 0 0.6 2.5 10.7 18.4 ▇▂▃▁▂
ggplot(df1, aes(gen, abundance, color = gen)) + ggbeeswarm::geom_quasirandom(size = 3) + facet_wrap(~species, scales = "free") + scale_color_discrete_qualitative() + guides(color = FALSE) + labs(x = "", y = "Abundance") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
H0:μx−μy=0 vs. H1:μx−μy≠0
T∗=ˉX−ˉYSE(ˉX−ˉY).
with(gathmann.bt, t.test(thysan[gen=="ISO"], thysan[gen=="Bt"], alternative = "two.sided", var.equal = TRUE, conf.level = 0.95))
## ## Two Sample t-test## ## data: thysan[gen == "ISO"] and thysan[gen == "Bt"]## t = -3.2182, df = 14, p-value = 0.006192## alternative hypothesis: true difference in means is not equal to 0## 95 percent confidence interval:## -9.248813 -1.851187## sample estimates:## mean of x mean of y ## 8.725 14.275
]
Note significance test suggested is different in Achim Gathmann et al. (2006) “Impact of Bt Maize Pollen (MON810) on Lepidopteran Larvae Living on Accompanying Weeds.” Molecular Ecology 15: 2677–85.
H0:P(X>Y)=P(X<Y)
vs.
with(gathmann.bt, wilcox.test(thysan[gen=="ISO"], thysan[gen=="Bt"], alternative = "two.sided", conf.int = TRUE, conf.level = 0.95))
## ## Wilcoxon rank sum exact test## ## data: thysan[gen == "ISO"] and thysan[gen == "Bt"]## W = 7, p-value = 0.006993## alternative hypothesis: true location shift is not equal to 0## 95 percent confidence interval:## -9.4 -2.4## sample estimates:## difference in location ## -6.3
## gen thysan aranei## 1 Bt 16.6 0.80## 2 Bt 16.4 0.80## 3 Bt 11.0 0.60## 4 Bt 16.8 0.40## 5 Bt 10.6 0.60## 6 Bt 18.4 0.80## 7 Bt 14.2 0.00## 8 Bt 10.2 0.60## 9 ISO 6.2 0.75## 10 ISO 10.0 0.20## 11 ISO 11.8 1.00## 12 ISO 15.6 0.80## 13 ISO 7.6 0.00## 14 ISO 7.4 0.00## 15 ISO 7.2 0.60## 16 ISO 4.0 0.40
thysani=β0+β1I(geni=ISO)+ei where ei∼NID(0,σ2).
lm(thysan ~ gen, data = gathmann.bt) %>% confint("genISO", level = 0.95)
## 2.5 % 97.5 %## genISO -9.248813 -1.851187
## gen thysan aranei## 1 Bt 16.6 0.80## 2 Bt 16.4 0.80## 3 Bt 11.0 0.60## 4 Bt 16.8 0.40## 5 Bt 10.6 0.60## 6 Bt 18.4 0.80## 7 Bt 14.2 0.00## 8 Bt 10.2 0.60## 9 ISO 6.2 0.75## 10 ISO 10.0 0.20## 11 ISO 11.8 1.00## 12 ISO 15.6 0.80## 13 ISO 7.6 0.00## 14 ISO 7.4 0.00## 15 ISO 7.2 0.60## 16 ISO 4.0 0.40
thysani=β0+β1I(geni=ISO)+ei where ei∼NID(0,σ2).
lm(thysan ~ gen, data = gathmann.bt) %>% confint("genISO", level = 0.95)
## 2.5 % 97.5 %## genISO -9.248813 -1.851187
data(urquhart.feedlot, package = "agridat")df4 <- urquhart.feedlot %>% pivot_longer(c(weight1, weight2), names_to = "when", values_to = "weight") %>% mutate(when = factor(as.character(when), labels = c("initial", "final"), levels = c("weight1", "weight2")), diet = factor(diet, levels = c("High", "Medium", "Low")))skimr::skim(df4)
## ── Data Summary ────────────────────────## Values## Name df4 ## Number of rows 134 ## Number of columns 5 ## _______________________ ## Column type frequency: ## factor 2 ## numeric 3 ## ________________________ ## Group variables None ## ## ── Variable type: factor ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate ordered n_unique top_counts ## 1 diet 0 1 FALSE 3 Low: 64, Med: 46, Hig: 24## 2 when 0 1 FALSE 2 ini: 67, fin: 67 ## ## ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist ## 1 animal 0 1 34 19.4 1 17.2 34 50.8 67 ▇▇▇▇▇## 2 herd 0 1 25.6 10.7 3 19 31 34 36 ▂▁▂▂▇## 3 weight 0 1 863. 182. 530 705 842. 1027 1248 ▃▇▃▆▃
ggplot(df4, aes(when, weight, color = diet, group = animal)) + geom_point(size = 3) + facet_wrap(~herd, nrow = 2) + geom_line() + labs(x = "", y = "Weight", color = "Diet")
coef(lm((weight2 - weight1) ~ diet, data = urquhart.feedlot))
## (Intercept) dietLow dietMedium ## 332.666667 -4.666667 -33.971014
# herd needs to be factor not integerdat4 <- mutate(urquhart.feedlot, herdf = factor(herd))coef(lm((weight2 - weight1) ~ herdf + diet, data = dat4))
## (Intercept) herdf9 herdf16 herdf19 herdf24 herdf31 herdf32 herdf33 herdf34 herdf35 herdf36 dietLow dietMedium ## 354.257353 -91.148529 -51.312039 7.410059 -63.221311 -4.666667 -51.189338 -36.083555 -15.179622 -3.423367 -34.974083 2.872233 -23.580408
Last model is the same as modelling the final weight with the initial weight as a covariate with slope fixed to 1:
coef(lm(weight2 ~ offset(weight1) + herdf + diet, data = dat4))
## (Intercept) herdf9 herdf16 herdf19 herdf24 herdf31 herdf32 herdf33 herdf34 herdf35 herdf36 dietLow dietMedium ## 354.257353 -91.148529 -51.312039 7.410059 -63.221311 -4.666667 -51.189338 -36.083555 -15.179622 -3.423367 -34.974083 2.872233 -23.580408
Estimating slope for initial weight from the data:
coef(lm(weight2 ~ weight1 + herdf + diet, data = dat4))
## (Intercept) weight1 herdf9 herdf16 herdf19 herdf24 herdf31 herdf32 herdf33 herdf34 herdf35 herdf36 dietLow dietMedium ## 200.440174 1.243238 -79.102111 -51.238137 -6.864643 -75.406093 -33.044411 -56.517848 -62.152563 -23.610465 -29.219641 -75.555713 1.212633 -30.912720
dat4 <- lm(weight2 ~ weight1 + herdf + diet, data = dat4) %>% broom::augment() ggplot(dat4, aes(.fitted, .resid)) + geom_point(data = select(dat4, -herdf), size = 2, color = "gray") + geom_point(size = 2, aes(color = herdf)) + geom_hline(yintercept = 0) + labs(x = "Fitted values", y = "Residual") + scale_color_discrete_qualitative() + facet_wrap(~herdf, nrow = 2) + guides(color = FALSE)
data(gomez.nitrogen, package = "agridat")skimr::skim(gomez.nitrogen)
## ── Data Summary ────────────────────────## Values ## Name gomez.nitrogen## Number of rows 96 ## Number of columns 4 ## _______________________ ## Column type frequency: ## factor 3 ## numeric 1 ## ________________________ ## Group variables None ## ## ── Variable type: factor ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate ordered n_unique top_counts ## 1 trt 0 1 FALSE 8 T1: 12, T2: 12, T3: 12, T4: 12## 2 rep 0 1 FALSE 4 R1: 24, R2: 24, R3: 24, R4: 24## 3 stage 0 1 FALSE 3 P1: 32, P2: 32, P3: 32 ## ## ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist ## 1 nitro 0 1 2.24 0.796 1.11 1.46 2.21 2.93 3.84 ▇▅▅▅▃
ggplot(gomez.nitrogen, aes(trt, nitro, color = stage)) + geom_point(size = 3) + scale_color_discrete_qualitative() + labs(x = "Fertilizer treatment", y = "Soil nitrogen content (%)", color = "Growth stage")
data(gomez.nitrogen, package = "agridat")skimr::skim(gomez.nitrogen)
## ── Data Summary ────────────────────────## Values ## Name gomez.nitrogen## Number of rows 96 ## Number of columns 4 ## _______________________ ## Column type frequency: ## factor 3 ## numeric 1 ## ________________________ ## Group variables None ## ## ── Variable type: factor ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate ordered n_unique top_counts ## 1 trt 0 1 FALSE 8 T1: 12, T2: 12, T3: 12, T4: 12## 2 rep 0 1 FALSE 4 R1: 24, R2: 24, R3: 24, R4: 24## 3 stage 0 1 FALSE 3 P1: 32, P2: 32, P3: 32 ## ## ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist ## 1 nitro 0 1 2.24 0.796 1.11 1.46 2.21 2.93 3.84 ▇▅▅▅▃
ggplot(gomez.nitrogen, aes(trt, nitro, color = stage)) + geom_point(size = 3) + scale_color_discrete_qualitative() + labs(x = "Fertilizer treatment", y = "Soil nitrogen content (%)", color = "Growth stage")
data(gomez.nitrogen, package = "agridat")skimr::skim(gomez.nitrogen)
## ── Data Summary ────────────────────────## Values ## Name gomez.nitrogen## Number of rows 96 ## Number of columns 4 ## _______________________ ## Column type frequency: ## factor 3 ## numeric 1 ## ________________________ ## Group variables None ## ## ── Variable type: factor ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate ordered n_unique top_counts ## 1 trt 0 1 FALSE 8 T1: 12, T2: 12, T3: 12, T4: 12## 2 rep 0 1 FALSE 4 R1: 24, R2: 24, R3: 24, R4: 24## 3 stage 0 1 FALSE 3 P1: 32, P2: 32, P3: 32 ## ## ── Variable type: numeric ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist ## 1 nitro 0 1 2.24 0.796 1.11 1.46 2.21 2.93 3.84 ▇▅▅▅▃
ggplot(gomez.nitrogen, aes(trt, nitro, color = stage)) + geom_point(size = 3) + scale_color_discrete_qualitative() + labs(x = "Fertilizer treatment", y = "Soil nitrogen content (%)", color = "Growth stage")
lm(nitro ~ stage + trt, data = gomez.nitrogen)
fit1 <- lm(nitro ~ stage, data = gomez.nitrogen)fit1data <- broom::augment(fit1) %>% left_join(gomez.nitrogen, by=c("nitro", "stage")) %>% mutate(trt = fct_reorder(trt, .resid))ggplot(fit1data, aes(trt, .resid)) +geom_boxplot() + labs(x = "Fertilizer treatment", y = "Residual of fit1")
fit1 <- lm(nitro ~ stage, data = gomez.nitrogen)fit1data <- broom::augment(fit1) %>% left_join(gomez.nitrogen, by=c("nitro", "stage")) %>% mutate(trt = fct_reorder(trt, .resid))ggplot(fit1data, aes(trt, .resid)) +geom_boxplot() + labs(x = "Fertilizer treatment", y = "Residual of fit1")
fit2 <- lm(nitro ~ stage + trt, data = gomez.nitrogen)fit2data <- broom::augment(fit2) %>% mutate(trt = fct_reorder(trt, .resid))ggplot(fit2data, aes(trt, .resid)) + geom_boxplot() + labs(x = "Fertilizer treatment", y = "Residual of fit2")
library(emmeans)confint(pairs(emmeans(fit2, "trt"), adjust="none"))
## contrast estimate SE df lower.CL upper.CL## T1 - T2 -0.2117 0.116 86 -0.4420 0.018654## T1 - T3 -0.3375 0.116 86 -0.5678 -0.107180## T1 - T4 -0.2308 0.116 86 -0.4612 -0.000513## T1 - T5 -0.0717 0.116 86 -0.3020 0.158654## T1 - T6 -0.1492 0.116 86 -0.3795 0.081154## T1 - T7 -0.3592 0.116 86 -0.5895 -0.128846## T1 - T8 -0.2333 0.116 86 -0.4637 -0.003013## T2 - T3 -0.1258 0.116 86 -0.3562 0.104487## T2 - T4 -0.0192 0.116 86 -0.2495 0.211154## T2 - T5 0.1400 0.116 86 -0.0903 0.370320## T2 - T6 0.0625 0.116 86 -0.1678 0.292820## T2 - T7 -0.1475 0.116 86 -0.3778 0.082820## T2 - T8 -0.0217 0.116 86 -0.2520 0.208654## T3 - T4 0.1067 0.116 86 -0.1237 0.336987## T3 - T5 0.2658 0.116 86 0.0355 0.496154## T3 - T6 0.1883 0.116 86 -0.0420 0.418654## T3 - T7 -0.0217 0.116 86 -0.2520 0.208654## T3 - T8 0.1042 0.116 86 -0.1262 0.334487## T4 - T5 0.1592 0.116 86 -0.0712 0.389487## T4 - T6 0.0817 0.116 86 -0.1487 0.311987## T4 - T7 -0.1283 0.116 86 -0.3587 0.101987## T4 - T8 -0.0025 0.116 86 -0.2328 0.227820## T5 - T6 -0.0775 0.116 86 -0.3078 0.152820## T5 - T7 -0.2875 0.116 86 -0.5178 -0.057180## T5 - T8 -0.1617 0.116 86 -0.3920 0.068654## T6 - T7 -0.2100 0.116 86 -0.4403 0.020320## T6 - T8 -0.0842 0.116 86 -0.3145 0.146154## T7 - T8 0.1258 0.116 86 -0.1045 0.356154## ## Results are averaged over the levels of: stage ## Confidence level used: 0.95
Unadjusted
ˉX−ˉY±tn−t,1−α/2×SE(ˉX−ˉY) where α=0.05 and t is the number of treatments.
Bonferonni adjustment
ˉX−ˉY±tn−t,1−α/(2m)×SE(ˉX−ˉY) where m is the number of pairwise comparisons.
choose(8, 2)
## [1] 28
confint(pairs(emmeans(fit2, "trt"), adjust="bonferroni"))
## contrast estimate SE df lower.CL upper.CL## T1 - T2 -0.2117 0.116 86 -0.585 0.1619## T1 - T3 -0.3375 0.116 86 -0.711 0.0361## T1 - T4 -0.2308 0.116 86 -0.604 0.1427## T1 - T5 -0.0717 0.116 86 -0.445 0.3019## T1 - T6 -0.1492 0.116 86 -0.523 0.2244## T1 - T7 -0.3592 0.116 86 -0.733 0.0144## T1 - T8 -0.2333 0.116 86 -0.607 0.1402## T2 - T3 -0.1258 0.116 86 -0.499 0.2477## T2 - T4 -0.0192 0.116 86 -0.393 0.3544## T2 - T5 0.1400 0.116 86 -0.234 0.5136## T2 - T6 0.0625 0.116 86 -0.311 0.4361## T2 - T7 -0.1475 0.116 86 -0.521 0.2261## T2 - T8 -0.0217 0.116 86 -0.395 0.3519## T3 - T4 0.1067 0.116 86 -0.267 0.4802## T3 - T5 0.2658 0.116 86 -0.108 0.6394## T3 - T6 0.1883 0.116 86 -0.185 0.5619## T3 - T7 -0.0217 0.116 86 -0.395 0.3519## T3 - T8 0.1042 0.116 86 -0.269 0.4777## T4 - T5 0.1592 0.116 86 -0.214 0.5327## T4 - T6 0.0817 0.116 86 -0.292 0.4552## T4 - T7 -0.1283 0.116 86 -0.502 0.2452## T4 - T8 -0.0025 0.116 86 -0.376 0.3711## T5 - T6 -0.0775 0.116 86 -0.451 0.2961## T5 - T7 -0.2875 0.116 86 -0.661 0.0861## T5 - T8 -0.1617 0.116 86 -0.535 0.2119## T6 - T7 -0.2100 0.116 86 -0.584 0.1636## T6 - T8 -0.0842 0.116 86 -0.458 0.2894## T7 - T8 0.1258 0.116 86 -0.248 0.4994## ## Results are averaged over the levels of: stage ## Confidence level used: 0.95 ## Conf-level adjustment: bonferroni method for 28 estimates
ˆY=−0.002+0.979x1+0.998x2+0.973x3+0.995x4
lm(y ~ x1 + x2 + x3 + x4, data=mystery_data) %>% broom::tidy()
## # A tibble: 5 × 5## term estimate std.error statistic p.value## <chr> <dbl> <dbl> <dbl> <dbl>## 1 (Intercept) -0.00204 0.0109 -0.187 0.852## 2 x1 0.979 0.0151 64.8 0 ## 3 x2 0.998 0.0155 64.4 0 ## 4 x3 0.973 0.0154 63.1 0 ## 5 x4 0.995 0.0109 91.6 0
lm(y ~ x1 + x2 + x3 + x4, data=mystery_data) %>% broom::augment() %>% ggplot(aes(.fitted, .resid)) + geom_point() + labs(x="Fitted Values", y="Residual")
This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
Lecturer: Emi Tanaka
ETC5521.Clayton-x@monash.edu
Week 6 - Session 2
Lecturer: Emi Tanaka
ETC5521.Clayton-x@monash.edu
Week 6 - Session 2
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |