This is the supplementary material to an invited commentary for Basole et al. (2021). We provide all code that are used to generate the figures in the commentary in addition to other supplementary figures (and its code).
List of figures
ggplot2
.library(tidyverse)
library(ggtext)
library(patchwork)
library(readxl)
library(nullabor)
library(here)
library(janitor)
library(scales)
df_full <- read_xlsx(here("data/MaskedCoverage-Fig3.xlsx")) %>%
clean_names() %>%
add_row(state = c("OR", "WY", "SD", "WV", "DC", "AL")) %>%
mutate(row = case_when(
state %in% c("ME") ~ 1L,
state %in% c("VT", "NH") ~ 2L,
state %in% c("WA", "ID", "MT", "ND", "MN", "IL", "WI", "MI", "NY", "RI", "MA") ~ 3L,
state %in% c("OR", "NV", "WY", "SD", "IA", "IN", "OH", "PA", "NJ", "CT") ~ 4L,
state %in% c("CA", "UT", "CO", "NE", "MO", "KY", "WV", "VA", "MD", "DE") ~ 5L,
state %in% c("AZ", "NM", "KS", "AR", "TN", "NC", "SC", "DC") ~ 6L,
state %in% c("OK", "LA", "MS", "AL", "GA") ~ 7L,
state %in% c("TX", "FL") ~ 8L,
TRUE ~ 0L),
col = case_when(
state %in% c("WA", "OR", "CA") ~ 1L,
state %in% c("ID", "NV", "UT", "AZ") ~ 2L,
state %in% c("MT", "WY", "CO", "NM") ~ 3L,
state %in% c("ND", "SD", "NE", "KS", "OK", "TX") ~ 4L,
state %in% c("MN", "IA", "MO", "AR", "LA") ~ 5L,
state %in% c("IL", "IN", "KY", "TN", "MS") ~ 6L,
state %in% c("WI", "OH", "WV", "NC", "AL") ~ 7L,
state %in% c("MI", "PA", "VA", "SC", "GA") ~ 8L,
state %in% c("NY", "NJ", "MD", "DC", "FL") ~ 9L,
state %in% c("VT", "RI", "CT", "DE") ~ 10L,
state %in% c("ME", "NH", "MA") ~ 11L,
TRUE ~ 0L
))
df_miss <- df_full %>%
filter(!is.na(readmission_rate))
g1 <- df_miss %>%
mutate(y = readmission_rate * 100) %>%
ggplot(aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = y), alpha = 0.8) +
geom_text(aes(label = percent(y/100, 0.01)), nudge_y = -0.1, size = 2.5) +
labs(color = "Readmission Rate", size = "Coverage") +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$readmission_rate * 100), mid = "#ffffe0") +
theme_void() +
geom_text(data = df_full, aes(label = state), color = "black", nudge_y = 0.05) +
scale_size(range = c(3, 30)) +
scale_y_reverse() +
theme(plot.margin = margin(r = 30))
g2 <- g1 %+% mutate(df_miss, y = colorectal_cancer_screenings) +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$colorectal_cancer_screenings), mid = "#ffffe0") +
labs(color = "Cancer Screening Rate")
g1 + g2 + plot_layout(guides = "collect")
Figure S1: This figure recreates Figure 3 in Basole et al. (2021) using the ggplot2
R-package (Wickham 2016). The code is displayed above by clicking on the CODE button just above the right corner of this plot.
theme_set(theme_classic())
g1 <- ggplot(df_miss, aes(coverage_obscured * 100, readmission_rate * 100)) +
geom_point() +
labs(x = "Coverage (%)", y = "Readmission (%)") +
geom_smooth(method = loess, formula = y ~ x) +
annotate("richtext", x = 80, y = 15.3, label.color = NA, fill = "transparent", label = glue::glue("R<sup>2</sup> = {scales::comma(cor(df_miss$coverage_obscured, df_miss$readmission_rate)^2, 0.001)}"))
g2 <- ggplot(df_miss, aes(coverage_obscured * 100, colorectal_cancer_screenings)) +
geom_point() +
labs(x = "Coverage (%)", y = "Cancer Screening (%)") +
geom_smooth(method = loess, formula = y ~ x) +
annotate("richtext", x = 80, y = 73, label.color = NA, fill = "transparent", label = glue::glue("R<sup>2</sup> = {scales::comma(cor(df_miss$coverage_obscured, df_miss$colorectal_cancer_screenings)^2, 0.001)}"))
g1 + g2
Figure S2: The above figure show an alternative plot design to display the information in Figure S1 and is the same figure as Figure 1 in the main paper. The plot shows a scatter plot of percentage of readmission and coverage on the left and a scatter plot of percentage of cancer screening and coverage on the right. Both plots are superimposed by a local polynomial regression (displayed as a blue line) with confidence interval for the line (displayed in gray). The code is displayed above by clicking on the CODE button just above the right corner of this plot.
set.seed(2021)
lineup_data <- null_permute("colorectal_cancer_screenings") %>%
lineup(true = df_miss, n = 20, pos = 3)
plot_lineup_theirs <- ggplot(lineup_data, aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = colorectal_cancer_screenings), alpha = 0.8) +
theme_void() +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$colorectal_cancer_screenings), mid = "#ffffe0") +
scale_size(range = c(1, 5)) +
scale_y_reverse(expand = c(0.1, 0.2)) +
guides(color = "none", size = "none") +
facet_wrap(~.sample, ncol = 5) +
scale_x_continuous(expand = c(0.1, 0.1)) +
theme(legend.position = "bottom",
strip.text = element_text(size = 18, margin = margin(t = 3, b = 3)),
strip.background = element_rect(color = "black", size = 1.5))
plot_lineup_theirs
Figure S3: The above figure shows a lineup for this tile grid plot where one of the plots is made using the data, and the other nineteen plots are constructed after first permuting the percentage of cancer screening across different states with the missing value structure is preserved. Text and legends have been removed to minimise the bias in reading plots due to the reader being aware of the context. Which plot strikes the most different to you? The code is displayed above by clicking on the CODE button just above the right corner of this plot. Try the other lineups before finding the data plot position at the bottom of this document.
plot_lineup_ours <- ggplot(lineup_data, aes(coverage_obscured * 100, colorectal_cancer_screenings)) +
geom_point() +
geom_smooth(method = loess, formula = y ~ x) +
facet_wrap(~.sample, ncol = 5) +
scale_x_continuous(expand = c(0.1, 0.1)) +
theme(legend.position = "bottom",
strip.text = element_text(size = 18, margin = margin(t = 3, b = 3)),
strip.background = element_rect(color = "black", size = 1.5),
axis.text = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
axis.ticks.length = unit(0, "pt"))
plot_lineup_ours
Figure S4: The above figure shows a lineup for the scatter plot design used in Figure S2. The same data used to create Figure S3 (including the null data) is used to create this lineup. The code is displayed above by clicking on the CODE button just above the right corner of this plot. When you are ready, find the position of the data plot is revealed at the bottom of this document.
The following are plots based on data that purposely modifies cancer screening to induce a higher association with the coverage. This higher association is induced (as shown in the code below) by rearranging data by the coverage and modifying the cancer screening percentage so that it is ordered from low to high.
df_false <- df_miss %>%
arrange(coverage_obscured) %>%
mutate(colorectal_cancer_screenings = sort(colorectal_cancer_screenings))
lineup_false_data <- null_permute("colorectal_cancer_screenings") %>%
lineup(true = df_false, n = 20, pos = 5)
plot_lineup_theirs %+% lineup_false_data
Figure S5: The above shows a lineup for the tile grid plot design where the data was purposely manipulated to induce a higher association between the variables of interest. Which plot looks the most strikingly different to you? Try the next lineup to see if you can find the data plot before finding the position of the data plot here.
plot_lineup_ours %+% lineup_false_data
Figure S6: The above shows a lineup for the scatter plot design with the data that was purposely manipulated so that two variables mapped to the x-axis and y-axis have a higher association. How easy was it to spot the data plot compared to Figure S5? You can find the code to generate the above lineup plot by collapsing all codes.
The positions of the data plot for the lineup are as follows:
We expect that you would have struggled to find the data plto in Figure S3 and Figure S4 as we do not observe strong association between the cancer screening rate and coverage. Additionally, we expect that most would spot the data plot in Figure S5 and all would spot the data plot in Figure S6. For those that spot the data plot in Figure S5, we suspect it took longer than spotting the data plot in Figure S6.
We thank Basole et al. (2021) for supplying us the synthetic data to draw the above plots.
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.1 (2020-06-06)
## os macOS 10.16
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_AU.UTF-8
## ctype en_AU.UTF-8
## tz Australia/Melbourne
## date 2021-10-19
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.0.0)
## backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.2)
## bookdown 0.22.17 2021-08-07 [1] Github (rstudio/bookdown@9615b14)
## broom 0.7.9 2021-07-27 [1] CRAN (R 4.0.2)
## bslib 0.3.0 2021-09-02 [1] CRAN (R 4.0.2)
## cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.0.0)
## class 7.3-19 2021-05-03 [2] CRAN (R 4.0.2)
## cli 3.0.1 2021-07-17 [1] CRAN (R 4.0.2)
## cluster 2.1.2 2021-04-17 [2] CRAN (R 4.0.2)
## colorspace 2.0-2 2021-06-24 [1] CRAN (R 4.0.2)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.2)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.2)
## dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.0.2)
## DEoptimR 1.0-8 2016-11-19 [2] CRAN (R 4.0.0)
## digest 0.6.28 2021-09-23 [1] CRAN (R 4.0.2)
## diptest 0.76-0 2021-05-04 [2] CRAN (R 4.0.2)
## dplyr * 1.0.7 2021-06-18 [1] CRAN (R 4.0.2)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.2)
## evaluate 0.14 2019-05-28 [2] CRAN (R 4.0.0)
## fansi 0.5.0 2021-05-25 [1] CRAN (R 4.0.2)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.0.2)
## fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.0.2)
## flexmix 2.3-17 2020-10-12 [1] CRAN (R 4.0.2)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.0.2)
## fpc 2.2-9 2020-12-06 [2] CRAN (R 4.0.2)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## generics 0.1.0 2020-10-31 [2] CRAN (R 4.0.2)
## ggplot2 * 3.3.5 2021-06-25 [1] CRAN (R 4.0.2)
## ggtext * 0.1.1 2020-12-17 [1] CRAN (R 4.0.2)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## gridtext 0.1.4 2020-12-10 [1] CRAN (R 4.0.2)
## gtable 0.3.0 2019-03-25 [2] CRAN (R 4.0.0)
## haven 2.4.1 2021-04-23 [2] CRAN (R 4.0.2)
## here * 1.0.1 2020-12-13 [2] CRAN (R 4.0.2)
## highr 0.9 2021-04-16 [2] CRAN (R 4.0.2)
## hms 1.1.1 2021-09-26 [1] CRAN (R 4.0.2)
## htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.0.2)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
## janitor * 2.1.0 2021-01-05 [2] CRAN (R 4.0.2)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.2)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.2)
## kernlab 0.9-29 2019-11-12 [2] CRAN (R 4.0.0)
## knitr 1.34 2021-09-09 [1] CRAN (R 4.0.2)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.0.2)
## lattice 0.20-44 2021-05-02 [2] CRAN (R 4.0.2)
## lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.0.2)
## lubridate 1.7.10 2021-02-26 [1] CRAN (R 4.0.2)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.2)
## markdown 1.1 2019-08-07 [2] CRAN (R 4.0.0)
## MASS 7.3-54 2021-05-03 [1] CRAN (R 4.0.2)
## Matrix 1.3-3 2021-05-04 [2] CRAN (R 4.0.2)
## mclust 5.4.7 2020-11-20 [2] CRAN (R 4.0.2)
## mgcv 1.8-35 2021-04-18 [2] CRAN (R 4.0.2)
## modelr 0.1.8 2020-05-19 [2] CRAN (R 4.0.0)
## modeltools 0.2-23 2020-03-05 [2] CRAN (R 4.0.0)
## moments 0.14 2015-01-05 [2] CRAN (R 4.0.0)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.0.0)
## nlme 3.1-152 2021-02-04 [2] CRAN (R 4.0.2)
## nnet 7.3-16 2021-05-03 [2] CRAN (R 4.0.2)
## nullabor * 0.3.9 2020-02-25 [1] CRAN (R 4.0.2)
## patchwork * 1.1.1 2020-12-17 [1] CRAN (R 4.0.2)
## pillar 1.6.3 2021-09-26 [1] CRAN (R 4.0.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.0.0)
## prabclus 2.3-2 2020-01-08 [2] CRAN (R 4.0.0)
## purrr * 0.3.4 2020-04-17 [2] CRAN (R 4.0.0)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.0.1)
## Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.0.2)
## readr * 2.0.1 2021-08-10 [1] CRAN (R 4.0.2)
## readxl * 1.3.1 2019-03-13 [2] CRAN (R 4.0.0)
## reprex 2.0.0 2021-04-02 [1] CRAN (R 4.0.2)
## rlang 0.4.11 2021-04-30 [1] CRAN (R 4.0.2)
## rmarkdown 2.11 2021-09-14 [1] CRAN (R 4.0.2)
## robustbase 0.93-7 2021-01-04 [2] CRAN (R 4.0.2)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.2)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.1)
## rvest 1.0.1 2021-07-26 [1] CRAN (R 4.0.2)
## sass 0.4.0 2021-05-12 [1] CRAN (R 4.0.2)
## scales * 1.1.1 2020-05-11 [2] CRAN (R 4.0.0)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 4.0.0)
## snakecase 0.11.0 2019-05-25 [2] CRAN (R 4.0.0)
## stringi 1.7.4 2021-08-25 [1] CRAN (R 4.0.2)
## stringr * 1.4.0 2019-02-10 [2] CRAN (R 4.0.0)
## tibble * 3.1.5 2021-09-30 [1] CRAN (R 4.0.2)
## tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.0.2)
## tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.0.2)
## tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.0.2)
## tzdb 0.1.2 2021-07-20 [1] CRAN (R 4.0.2)
## utf8 1.2.2 2021-07-24 [1] CRAN (R 4.0.2)
## vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.2)
## withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.2)
## xfun 0.26 2021-09-14 [1] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [2] CRAN (R 4.0.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
##
## [1] /Users/etan0038/Library/R/4.0/library
## [2] /Library/Frameworks/R.framework/Versions/4.0/Resources/library