Push the knit button!

library(tidyverse) # contains ggplot2, dplyr, tidyr, etc
library(scales)
library(colorspace)
library(agridat) # for datasets

nass.cotton dataset

glimpse(nass.cotton)
## Rows: 2,338
## Columns: 4
## $ year  <int> 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866…
## $ state <fct> Alabama, Arkansas, Florida, Georgia, Louisiana, Mississippi, Nor…
## $ acres <dbl> 977000, 489000, 155000, 895000, 1020000, 1668000, 390000, 402000…
## $ yield <int> 120, 198, 123, 122, 57, 86, 124, 116, 102, 121, 325, 152, 189, 1…

Exercise 3.1

  • Note: the sequential palette is ag_GrnYl from colorspace.
  • Note: look carefully at the format of the legend.
# fill all ... and change eval = FALSE to eval = TRUE when done
ggplot(nass.cotton, aes(year, yield)) + 
  geom_...(aes(color = ...)) + 
  facet_wrap(~...) + 
  scale_y_...(name = "Yield") + 
  scale_color_continuous_sequential(palette = "ag_GrnYl",
                                    trans = "log10",
                                    breaks = trans_breaks("log10", function(x) 10^x),
                                    labels = trans_format("log10", math_format(10.^.x))) +
  labs(x = "Year", title = "US Cotton Yields", subtitle = "1900 to 2011") 

Exercise 3.2

# fill all ... and change eval = FALSE to eval = TRUE when done

# drop the state so it doesn't get facet wrapped
shadow_dat <- nass.cotton %>% mutate(state = NULL) 
ggplot(nass.cotton, aes(..., ...)) +
  geom_point(data = shadow_dat, color = "gray") +
  geom_point() +
  facet_wrap(~...) + 
  scale_y_log10() + 
  labs(...)

hazell.vegetables dataset

glimpse(hazell.vegetables)
## Rows: 6
## Columns: 5
## $ year     <fct> y1, y2, y3, y4, y5, y6
## $ carrot   <int> 292, 179, 114, 247, 426, 259
## $ celery   <int> -128, 560, 648, 544, 182, 850
## $ cucumber <int> 420, 187, 366, 249, 322, 159
## $ pepper   <int> 579, 639, 379, 924, 5, 569

Exercise 3.3

# fill all ... and change eval = FALSE to eval = TRUE when done
ggplot(hazell.vegetables, aes(year, celery, fill = celery > 0)) + 
  geom_...() + 
  geom_...(yintercept = 0) + 
  scale_...(labels = dollar) + 
  labs(y = "Gross profit", x = "Year",
       title = "Celery", fill = "Profit") +
  scale_fill_manual(labels = c("No", "Yes"),
                    values = c("#ff1a1a", "#008000")) +
  scale_x_...(labels = 1:6)

hanks.sprinkler dataset

glimpse(hanks.sprinkler)
## Rows: 108
## Columns: 7
## $ block   <fct> B1, B1, B1, B1, B1, B1, B1, B1, B1, B1, B1, B1, B1, B1, B1, B1…
## $ row     <int> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8,…
## $ subplot <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8,…
## $ gen     <fct> Luke, Luke, Luke, Luke, Luke, Luke, Luke, Luke, Luke, Luke, Lu…
## $ yield   <dbl> 2.4, 2.7, 5.6, 7.5, 7.9, 7.1, 6.1, 7.3, 7.4, 6.7, 3.8, 1.8, 2.…
## $ irr     <int> 1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 6, 5, 4,…
## $ dir     <fct> N, N, N, N, N, N, S, S, S, S, S, S, N, N, N, N, N, N, S, S, S,…

Exercise 3.4

  • You can change the order of appearance of legends by guides. Say guides(color = guide_legend(order = 1)) to change color to appear first.
# fill all ... and change eval = FALSE to eval = TRUE when done
ggplot(hanks.sprinkler, aes(factor(row), factor(subplot))) +
  ...(aes(fill = dir)) +
  ...(aes(color = factor(irr)), size = 9) +
  ...(aes(label = block)) +
  ...(values = c("black", "gray"), 
                    name = "Direction of\n sprinkler") + 
  scale_color_discrete_qualitative(palette = "Set 3") +
  ...(x = "Row", y = "Subplot", color = "Irrigation") + 
  # so that color legend appears before the fill legend
  guides(fill = guide_legend(order = 2),
         color = guide_legend(order = 1))

minnesota.barley.weather dataset

glimpse(minnesota.barley.weather)
## Rows: 719
## Columns: 8
## $ site   <fct> Morris, Morris, Morris, Morris, Morris, Morris, Morris, Morris,…
## $ year   <int> 1927, 1927, 1927, 1927, 1927, 1927, 1927, 1927, 1927, 1927, 192…
## $ mo     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, …
## $ cdd    <int> 0, 0, 0, 0, 3, 126, 101, 94, 142, 0, 0, 0, 0, 0, 0, 0, 30, 12, …
## $ hdd    <int> 1546, 1259, 993, 586, 368, 124, 37, 66, 219, 443, 1117, 1948, 1…
## $ precip <dbl> 0.78, 0.50, 2.62, 3.26, 2.66, 1.70, 2.73, 1.30, 2.42, 0.81, 1.2…
## $ min    <dbl> 3.9, 11.3, 23.7, 34.9, 41.4, 52.9, 54.5, 51.3, 50.2, 37.2, 17.8…
## $ max    <dbl> 23.0, 28.6, 42.1, 55.9, 64.9, 77.2, 79.5, 80.4, 74.5, 64.0, 34.…

Exercise 3.5

  • The plot shows the maximum and minimum temperatures in Celsius over time by site.
  • The line colors are #166ee0 and #e0161d.
# fill all ... and change eval = FALSE to eval = TRUE when done
df <- minnesota.barley.weather %>% 
  mutate(date = as.Date(paste(year, mo, "01", sep = "-")))
  
farenheight_to_celsius <- function(x) return((x - 32) * 5/9) 

ggplot(df, aes(x = ...)) + 
  ...(aes(y = farenheight_to_celsius(min)), color = "#166ee0") + 
  ...(aes(y = farenheight_to_celsius(max)), color = "#e0161d")  + 
  facet_wrap(~site) + 
  ...(name = "Temperature",
                     label = function(x) paste0(x, "°C")) +
  scale_x_date(breaks = scales::date_breaks(width = "3 year"),
               date_labels = "%Y",
               name = "Time")  +
  ggtitle("Minnesota Monthly Temperature 1927-1936")