Push the knit
button!
library(tidyverse) # contains ggplot2, dplyr, tidyr, etc
tuberculosis
datasettb <- read_csv(here::here("data/TB_notifications_2020-07-01.csv")) %>%
dplyr::select(country, iso3, year, new_sp_m04:new_sp_fu) %>%
pivot_longer(cols=new_sp_m04:new_sp_fu, names_to="sexage", values_to="count") %>%
mutate(sexage = str_replace(sexage, "new_sp_", "")) %>%
mutate(sex=substr(sexage, 1, 1),
age=substr(sexage, 2, length(sexage))) %>%
dplyr::select(-sexage) %>%
filter(!(age %in% c("04", "014", "514", "u"))) %>%
filter(year > 1996, year < 2013) %>%
mutate(age_group = factor(age,
labels = c("15-24", "25-34", "35-44",
"45-54", "55-64", "65-"))) %>%
dplyr::select(country, year, age_group, sex, count)
# Filter Australia
tb_oz <- tb %>%
filter(country == "Australia")
# add your answer here!
“Is the proportion of TB incidence in males relative to females increasing with age?”
tb_oz %>%
filter(year == 2012) %>%
ggplot(aes(x=1, y=count, fill=age_group)) +
geom_bar(stat="identity", position="fill") +
facet_wrap(~sex, ncol=6) +
scale_fill_brewer("", palette="Dark2") +
xlab("") + ylab("") +
coord_polar(theta = "y")
# add your code here!
By using proportions by sex, this plot lost some trend difference between the sexes over years. Fix it.
tb_oz %>% group_by(year, age_group) %>%
summarise(p = count[sex=="m"]/sum(count)) %>%
ggplot(aes(x=year, y=p)) +
geom_hline(yintercept = 0.50, colour="white", size=2) +
geom_point() +
geom_smooth(se=F) +
facet_wrap(~age_group, ncol=6) +
ylab("proportion of males")
# add your code here!