Push the knit
button!
library(tidyverse) # contains ggplot2, dplyr, tidyr, etc
library(agridat) # for `nass.wheat`, `nass.corn`, `nass.soybean` datasets
library(catdata) # for `heart` data
nass.wheat
, nass.corn
, nass.soybean
datasetglimpse(nass.wheat)
## Rows: 5,963
## Columns: 4
## $ year <int> 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866…
## $ state <fct> Alabama, Arkansas, California, Connecticut, Delaware, Georgia, I…
## $ acres <dbl> 125000, 50000, 650000, 2000, 59000, 245000, 2300000, 1550000, 11…
## $ yield <dbl> 5.0, 6.5, 18.0, 17.5, 11.0, 4.0, 10.5, 10.0, 13.0, 19.0, 6.5, 13…
glimpse(nass.corn)
## Rows: 6,381
## Columns: 4
## $ year <int> 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866, 1866…
## $ state <fct> Alabama, Arkansas, California, Connecticut, Delaware, Florida, G…
## $ acres <dbl> 1050000, 280000, 42000, 57000, 200000, 125000, 1770000, 4925000,…
## $ yield <dbl> 9.0, 18.0, 28.0, 34.0, 23.0, 9.0, 6.0, 29.0, 36.5, 32.0, 28.0, 2…
glimpse(nass.soybean)
## Rows: 2,528
## Columns: 4
## $ year <int> 1924, 1924, 1924, 1924, 1924, 1924, 1924, 1924, 1924, 1924, 1924…
## $ state <fct> Alabama, Arkansas, Delaware, Georgia, Illinois, Indiana, Iowa, K…
## $ acres <dbl> 3000, 3000, 12000, 10000, 115000, 66000, 10000, 2000, 9000, 8000…
## $ yield <dbl> 6.5, 6.5, 11.0, 5.5, 12.0, 9.9, 12.0, 11.0, 9.5, 8.0, 11.8, 13.0…
geom_smooth
# fill all ... and change eval = FALSE to eval = TRUE when done
df <- bind_rows(mutate(nass.wheat, crop = "Wheat"),
mutate(nass.corn, crop = "Corn"),
mutate(nass.soybean, crop = "Soy")) %>%
filter(state %in% c("New York", "Michigan", "Indiana", "Montana", "Washington"))
ggplot(df, aes(..., ...)) +
geom_...() +
...() +
facet_grid(... ~ ...)
nass.corn
data.cut
function to divide a numerical variable into intervals.# fill all ... and change eval = FALSE to eval = TRUE when done
ggplot(nass.corn, aes(..., fill = cut(..., 4, dig.lab = 4))) +
geom_...(alpha = 0.6) +
labs(fill = "Period", x = "Average Yield", y = "Density")
geom_density_2d
# fill all ... and change eval = FALSE to eval = TRUE when done
df2 <- left_join(rename(nass.wheat,
yield_wheat = yield, acres_wheat = acres),
rename(nass.corn,
yield_corn = yield, acres_corn = acres),
by = c("year", "state"))
ggplot(df2, aes(..., ...)) +
geom_...(aes(color = ...)) +
geom_density_2d(color = "black") +
labs(x = "Yield of corn", y = "Yield of wheat", color = "Year")
# fill all ... and change eval = FALSE to eval = TRUE when done
df3 <- bind_rows(mutate(nass.wheat, crop = "Wheat"),
mutate(nass.corn, crop = "Corn"),
mutate(nass.soybean, crop = "Soy"))
ggplot(df3, aes(..., ...)) +
geom_...() +
geom_...(width = 0.1)
# fill all ... and change eval = FALSE to eval = TRUE when done
df4 <- nass.wheat %>%
filter(year == 2011)
df4highlight <- filter(df4, acres > 4000000 | yield > 80)
ggplot(df4, aes(..., yield)) +
...() +
...(data = df4highlight, color = "red") +
geom_text(data = df4highlight, aes(label = ...), nudge_y = 5) +
ggtitle("Year 2011")
glass
datasetdata("glass")
glimpse(glass)
## Rows: 214
## Columns: 10
## $ RI <dbl> 1.52101, 1.51761, 1.51618, 1.51766, 1.51742, 1.51596, 1.51743, 1.…
## $ Na <dbl> 13.64, 13.89, 13.53, 13.21, 13.27, 12.79, 13.30, 13.15, 14.04, 13…
## $ Mg <dbl> 4.49, 3.60, 3.55, 3.69, 3.62, 3.61, 3.60, 3.61, 3.58, 3.60, 3.46,…
## $ Al <dbl> 1.10, 1.36, 1.54, 1.29, 1.24, 1.62, 1.14, 1.05, 1.37, 1.36, 1.56,…
## $ Si <dbl> 71.78, 72.73, 72.99, 72.61, 73.08, 72.97, 73.09, 73.24, 72.08, 72…
## $ K <dbl> 0.06, 0.48, 0.39, 0.57, 0.55, 0.64, 0.58, 0.57, 0.56, 0.57, 0.67,…
## $ Ca <dbl> 8.75, 7.83, 7.78, 8.22, 8.07, 8.07, 8.17, 8.24, 8.30, 8.40, 8.09,…
## $ Ba <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Fe <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.26, 0.00, 0.00, 0.00, 0.11, 0.24,…
## $ type <fct> type1, type1, type1, type1, type1, type1, type1, type1, type1, ty…
glass
dataset after loaded by executing ?glass
in R.# fill all ... and change eval = FALSE to eval = TRUE when done
df5 <- glass %>%
mutate(id = paste0("glass", 1:n())) %>%
pivot_longer(-c(RI, type, id), names_to = "element", values_to = "oxide")
ggplot(df5, aes(..., ...)) +
geom_point(aes(color = ...)) +
facet_wrap(~..., scale = "free_y") +
labs(x = "Refractive index", y = "Oxide content", color = "Glass")
# fill all ... and change eval = FALSE to eval = TRUE when done
ggplot(df5, aes(..., ...)) +
geom_...() +
facet_wrap(~..., scale = "free_y")
GGally
) that does easily but try to challenge yourself without using it.# fill all ... and change eval = FALSE to eval = TRUE when done
elements <- sort(c("Na", "Mg", "Al", "Si", "K", "Ca", "Ba", "Fe"))
nele <- length(elements)
# this uses `purrr` but many other ways to do this
# below is like using is two for loops,
# but `purrrr::map_dfr` ensures the return type is data frame appended by row
df6 <- map_dfr(1:(nele - 1),
function(i) {
map_dfr((i + 1):nele, function(j) {
ele1 <- elements[i]
ele2 <- elements[j]
mutate(glass,
element1 = ele1, element2 = ele2,
oxide1 = glass[[ele1]], oxide2 = glass[[ele2]]) %>%
select(RI, type, oxide1, oxide2, element1, element2)
})})
cordat <- cor(select(glass, all_of(elements))) %>%
as_tibble() %>%
mutate(element1 = colnames(.)) %>%
pivot_longer(-element1,
names_to = "element2",
values_to = "cor") %>%
mutate(i1 = as.integer(factor(element1)),
i2 = as.integer(factor(element2))) %>%
filter(i1 > i2) %>%
# get the center points
rowwise() %>%
mutate(oxide1 = mean(range(glass[[element1]])),
oxide2 = mean(range(glass[[element2]])))
# also check out `GGally::ggpairs(select(glass, all_of(elements)))`
ggplot(df6, aes(..., ...)) +
geom_point() +
# remember formula is y ~ x
facet_grid(... ~ ..., scales = "free") +
geom_text(data = cordat, aes(label = round(cor, 2)), color = "red", size = 3)
Al ~ Ba
.broom::augment(lm(Al ~ Ba, data = glass))
to get a data frame with residuals easily.# fill all ... and change eval = FALSE to eval = TRUE when done
fit <- lm(Al ~ Ba, data = glass) %>%
broom::augment()
ggplot(fit, aes(sample = ...)) +
geom_qq() +
geom_qq_line(color = "red")
Al ~ Ba
.# fill all ... and change eval = FALSE to eval = TRUE when done
ggplot(fit, aes(Ba, .resid)) +
geom_...(yintercept = 0, linetype = "dashed") +
...() +
...(y = "Residual")