Day 04
Carleton College
Stat 220 - Spring 2025
ggplot2
reviewggplot2
geom_point()
geom_histogram()
geom_boxplot()
geom_violin()
geom_bar()
x
and y
axiscolor
shape
alpha
size
labs()
.rmd
template for today at the course website08:00
We’re not quite satisfied….
Setting = choosing a certain value for an aesthetic
Examples:
scale_fill_manual()
scale_fill_brewer()
scale_color_viridis()
scale_shape_manual()
Recommended reading:
Let’s make Wednesdays navyblue
and Thursdays gold2
Theme: The non-data ink on your plots
ggplot2
themes
theme_grey()
theme_bw()
theme_linedraw()
theme_light()
theme_dark()
theme_minimal()
theme_classic()
theme_void()
theme_test()
ggthemes
themes
theme_clean()
theme_economist()
theme_excel()
theme_fivethirtyeight()
theme_gdocs()
theme_solarized()
theme_stata()
theme_tufte()
theme_wsj()
Apply theme_light()
to the histogram
00:30
?theme
theme(line, rect, text, title, aspect.ratio, axis.title, axis.title.x,
axis.title.x.top, axis.title.x.bottom, axis.title.y, axis.title.y.left,
axis.title.y.right, axis.text, axis.text.x, axis.text.x.top,
axis.text.x.bottom, axis.text.y, axis.text.y.left, axis.text.y.right,
axis.ticks, axis.ticks.x, axis.ticks.x.top, axis.ticks.x.bottom,
axis.ticks.y, axis.ticks.y.left, axis.ticks.y.right, axis.ticks.length,
axis.line, axis.line.x, axis.line.x.top, axis.line.x.bottom, axis.line.y,
axis.line.y.left, axis.line.y.right, legend.background, legend.margin,
legend.spacing, legend.spacing.x, legend.spacing.y, legend.key,
legend.key.size, legend.key.height, legend.key.width, legend.text,
legend.text.align, legend.title, legend.title.align, legend.position,
legend.direction, legend.justification, legend.box, legend.box.just,
legend.box.margin, legend.box.background, legend.box.spacing,
panel.background, panel.border, panel.spacing, panel.spacing.x,
panel.spacing.y, panel.grid, panel.grid.major, panel.grid.minor,
panel.grid.major.x, panel.grid.major.y, panel.grid.minor.x,
panel.grid.minor.y, panel.ontop, plot.background, plot.title,
plot.subtitle, plot.caption, plot.tag, plot.tag.position, plot.margin,
strip.background, strip.background.x, strip.background.y,
strip.placement, strip.text, strip.text.x, strip.text.y,
strip.switch.pad.grid, strip.switch.pad.wrap, ..., complete = FALSE,
validate = TRUE)
ggplot(season_summary) +
geom_histogram(
aes(x = imdb_mean, fill = day_of_week),
bins = 15,
color = "white"
) +
scale_fill_manual(values = c("gold2", "navyblue")) +
theme_minimal() +
theme(
legend.position = c(.15, .85),
legend.background = element_blank()
) +
labs(
x = "Season Average IMDB Rating",
y = "",
fill = "",
title = "Survivor is better on Thursdays"
)
ggplot(season_summary) +
geom_histogram(
aes(x = imdb_mean, fill = day_of_week),
bins = 15,
color = "white"
) +
scale_fill_manual(values = c("gold2", "navyblue")) +
theme_minimal() +
theme(
legend.position = c(.15, .85),
legend.background = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Season Average IMDB Rating",
y = "",
fill = "",
title = "Survivor is better on Thursdays"
)
ggplot(season_summary) +
geom_histogram(
aes(x = imdb_mean, fill = day_of_week),
bins = 15,
color = "white"
) +
scale_fill_manual(values = c("gold2", "navyblue")) +
scale_x_continuous(breaks = c(6, 7, 8, 9)) +
theme_minimal() +
theme(
legend.position = c(.15, .85),
legend.background = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Season Average IMDB Rating",
y = "",
fill = "",
title = "Survivor is better on Thursdays"
)
ggplot(season_summary) +
geom_histogram(
aes(x = imdb_mean, fill = day_of_week),
bins = 15,
color = "white"
) +
scale_fill_manual(values = c("gold2", "navyblue")) +
scale_x_continuous(breaks = c(6, 7, 8, 9)) +
theme_minimal() +
theme(
legend.position = c(.15, .85),
legend.background = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Season Average IMDB Rating",
y = "",
fill = "",
title = "Survivor is better on Thursdays"
) +
annotate("text",
x = 6,
y = 3,
label = "Lowest ratings \n occur on \n Wednesdays",
col = "navyblue")
What a huge effect!
But it isn’t the whole story
Wilke has good suggestions in chapters 5-16
Always stop and think about how easy it is to see the story
Try a few different options
00:30
One way to do this is by highlighting the important parts
Is this train schedule easy to read?
Does removing gridlines make it somewhat easier?
“We focus on four conventions which imbue visualisations with a sense of objectivity, transparency and facticity. These include: a) two-dimensional viewpoints; b) clean layouts; c) geometric shapes and lines; d) the inclusion of data sources.”
Now that we have the toolkit to make customizations to our plots, and some “rules” for good graphs, let’s break them!
Choose a graph (the one from class today, one from last class, one from homework, etc.)
Make it ugly
color
scaletheme
optionsExplain why it’s ugly (what “rules” are you breaking? what makes it an ineffective graph?)
Post to our slack #social channel when you’re done (you don’t have to post your explanation)