Exercise 2.1

df <- bind_rows(mutate(nass.wheat, crop = "Wheat"),
                mutate(nass.corn, crop = "Corn"),
                mutate(nass.soybean, crop = "Soy")) %>% 
  filter(state %in% c("New York", "Michigan", "Indiana", "Montana", "Washington"))

ggplot(df, aes(year, yield)) + 
  geom_point() + 
  geom_smooth() + 
  facet_grid(state ~ crop)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Exercise 2.2

ggplot(nass.corn, aes(yield, fill = cut(year, 4, dig.lab = 4))) + 
  geom_density(alpha = 0.6) +
  labs(fill = "Period", x = "Average Yield", y = "Density")

Exercise 2.3

df2 <- left_join(rename(nass.wheat, 
                        yield_wheat = yield, acres_wheat = acres), 
                 rename(nass.corn, 
                        yield_corn = yield, acres_corn = acres),
                 by = c("year", "state"))

ggplot(df2, aes(yield_corn, yield_wheat)) + 
  geom_point(aes(color = year)) +
  geom_density_2d(color = "black") + 
  labs(x = "Yield of corn", y = "Yield of wheat", color = "Year")
## Warning: Removed 153 rows containing non-finite values (stat_density2d).
## Warning: Removed 153 rows containing missing values (geom_point).

Exercise 2.4

df3 <- bind_rows(mutate(nass.wheat, crop = "Wheat"),
                mutate(nass.corn, crop = "Corn"),
                mutate(nass.soybean, crop = "Soy")) 

ggplot(df3, aes(crop, yield)) + 
  geom_violin() + 
  geom_boxplot(width = 0.1)

Exercise 2.5

df4 <- nass.wheat %>% 
  filter(year == 2011) 
df4highlight <- filter(df4, acres > 4000000 | yield > 80)

ggplot(df4, aes(acres, yield)) + 
  geom_point() + 
  geom_point(data = df4highlight, color = "red") + 
  geom_text(data = df4highlight, aes(label = state), nudge_y = 5) +
  ggtitle("Year 2011")

Exercise 2.6

df5 <- glass %>% 
  mutate(id = paste0("glass", 1:n())) %>% 
  pivot_longer(-c(RI, type, id), names_to = "element", values_to = "oxide")

ggplot(df5, aes(RI, oxide)) + 
  geom_point(aes(color = type)) + 
  facet_wrap(~element, scale = "free_y") + 
  labs(x = "Refractive index", y = "Oxide content", color = "Glass")

Exercise 2.7

ggplot(df5, aes(type, oxide)) + 
  geom_violin() + 
  facet_wrap(~element, scale = "free_y")

Exercise 2.8

elements <- sort(c("Na", "Mg", "Al", "Si", "K", "Ca", "Ba", "Fe"))
nele <- length(elements)
# this uses `purrr` but many other ways to do this
# below is like using is two for loops, 
# but `purrrr::map_dfr` ensures the return type is data frame appended by row
df6 <- map_dfr(1:(nele - 1),  
               function(i) {
                 map_dfr((i + 1):nele, function(j) {
                   ele1 <- elements[i]
                   ele2 <- elements[j]
                   mutate(glass, 
                          element1 = ele1, element2 = ele2, 
                          oxide1 = glass[[ele1]], oxide2 = glass[[ele2]]) %>% 
                   select(RI, type, oxide1, oxide2, element1, element2)
                })}) 

cordat <- cor(select(glass, all_of(elements))) %>% 
  as_tibble() %>% 
  mutate(element1 = colnames(.)) %>% 
  pivot_longer(-element1, 
               names_to = "element2",
               values_to = "cor") %>% 
  mutate(i1 = as.integer(factor(element1)),
         i2 = as.integer(factor(element2))) %>% 
  filter(i1 > i2) %>% 
  # get the center points
  rowwise() %>% 
  mutate(oxide1 = mean(range(glass[[element1]])),
         oxide2 = mean(range(glass[[element2]])))

# also check out `GGally::ggpairs(select(glass, all_of(elements)))`
  
ggplot(df6, aes(oxide1, oxide2)) + 
  geom_point() + 
  # remember formula is y ~ x
  facet_grid(element2 ~ element1, scales = "free") +
  geom_text(data = cordat, aes(label = round(cor, 2)), color = "red", size = 3)

Exercise 2.9

fit <- lm(Al ~ Ba, data = glass) %>% 
  broom::augment()
ggplot(fit, aes(sample = .resid)) + 
  geom_qq() + 
  geom_qq_line(color = "red")

Exercise 2.10

ggplot(fit, aes(Ba, .resid)) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_point() +
  labs(y = "Residual")