knitr::opts_chunk$set(comment=NA, fig.width=6, fig.height=6, echo = TRUE, eval = TRUE,
message = FALSE, warning = FALSE, dpi = 200, fig.align = 'center')
knitr::include_graphics("https://d21ii91i3y6o6h.cloudfront.net/gallery_images/from_proof/9296/small/1447173871/rstudio-hex-ggplot2-dot-psd.png")
In this practical you’ll practice plotting data with the ggplot2
package.
knitr::include_graphics("../_image/ggplot_cheatsheet_ss.png")
If you don’t have it already, you can access the ggplot2
cheatsheet here https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf. This has a nice overview of all the major functions in ggplot2.
ggplot2
. Try to go through each line of code and see how it works!# -----------------------------------------------
# Examples of using ggplot2 on the mpg data
# ------------------------------------------------
library(tidyverse) # Load tidyverse (which contains ggplot2!)
mpg # Look at the mpg data
# Just a blank space without any aesthetic mappings
ggplot(data = mpg)
# Now add a mapping where engine displacement (displ) and highway miles per gallon (hwy) are mapped to the x and y aesthetics
ggplot(data = mpg,
mapping = aes(x = displ, y = hwy)) # Map displ to x-axis and hwy to y-axis
# Add points with geom_point()
ggplot(data = mpg,
mapping = aes(x = displ, y = hwy)) +
geom_point()
# Add points with geom_count()
ggplot(data = mpg,
mapping = aes(x = displ, y = hwy)) +
geom_count()
# Again, but with some additional arguments
# Also using a new theme temporarily
ggplot(data = mpg,
mapping = aes(x = displ, y = hwy)) +
geom_point(col = "red", # Red points
size = 3, # Larger size
alpha = .5, # Transparent points
position = "jitter") + # Jitter the points
scale_x_continuous(limits = c(1, 15)) + # Axis limits
scale_y_continuous(limits = c(0, 50)) +
theme_minimal()
# Assign class to the color aesthetic and add labels with labs()
ggplot(data = mpg,
mapping = aes(x = displ, y = hwy, col = class)) + # Change color based on class column
geom_point(size = 3, position = 'jitter') +
labs(x = "Engine Displacement in Liters",
y = "Highway miles per gallon",
title = "MPG data",
subtitle = "Cars with higher engine displacement tend to have lower highway mpg",
caption = "Source: mpg data in ggplot2")
# Add a regression line for each class
ggplot(data = mpg,
mapping = aes(x = displ, y = hwy, color = class)) +
geom_point(size = 3, alpha = .9) +
geom_smooth(method = "lm")
# Add a regression line for all classes
ggplot(data = mpg,
mapping = aes(x = displ, y = hwy, color = class)) +
geom_point(size = 3, alpha = .9) +
geom_smooth(col = "blue", method = "lm")
# Facet by class
ggplot(data = mpg,
mapping = aes(x = displ,
y = hwy,
color = factor(cyl))) +
geom_point() +
facet_wrap(~ class)
# Another fancier example
ggplot(data = mpg,
mapping = aes(x = cty, y = hwy)) +
geom_count(aes(color = manufacturer)) + # Add count geom (see ?geom_count)
geom_smooth() + # smoothed line without confidence interval
geom_text(data = filter(mpg, cty > 25),
aes(x = cty,y = hwy,
label = rownames(filter(mpg, cty > 25))),
position = position_nudge(y = -1),
check_overlap = TRUE,
size = 5) +
labs(x = "City miles per gallon",
y = "Highway miles per gallon",
title = "City and Highway miles per gallon",
subtitle = "Numbers indicate cars with highway mpg > 25",
caption = "Source: mpg data in ggplot2",
color = "Manufacturer",
size = "Counts")
Open your bootcamp R project. Open a new R script and save it under the name plotting_practical.R
in your 2_Code
folder.
At the top of your script load the tidyverse
package (which includes ggplot2).
library(tidyverse)
library(speff2trial)
library(car)
library(FFTrees)
diamonds
dataset in the ggplot2
package shows information about 50,000 round cut diamonds. Print the diamonds
dataset, it should look like this:diamonds
# A tibble: 53,940 x 10
carat cut color clarity depth table price x y z
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39
# ... with 53,930 more rows
ggplot()
, create the following blank plot using the data
and mapping
arguments (but no geom).ggplot(data = diamonds,
mapping = aes(x = carat, y = price))
geom_point()
, add points showing the relationship between the number of carats in the diamonds (carat
) and its price (price
)ggplot(data = diamonds,
mapping = aes(x = carat, y = price)) +
geom_point()
alpha
argument to geom_point()
ggplot(data = diamonds,
mapping = aes(x = carat, y = price)) +
geom_point(alpha = .05)
color
aesthetic mapping, color the points by their cut.ggplot(data = diamonds,
mapping = aes(x = carat, y = price, color = cut)) +
geom_point(alpha = .05)
facet_wrap()
function, create different plots for each value of cut
.ggplot(data = diamonds,
mapping = aes(x = carat, y = price, color = cut)) +
geom_point(alpha = .05) +
facet_wrap(~ cut, nrow = 1)
geom_smooth()
function, add a black, smoothed mean line to each plot (You can also try turning the line into a regression line using the method
argument)ggplot(data = diamonds,
mapping = aes(x = carat, y = price, color = cut)) +
geom_point(alpha = .05) +
facet_wrap(~ cut, nrow = 1) +
geom_smooth(color = "black")
Look at the theme help menu with ?theme_bw()
to see a list of all of the standard ggplot themes. Then, try adding one of these themes to your previous plots to see how they change.
The ggthemes
package contains many additional themes. If you don’t have the package already, install it by running install.packages("ggthemes")
. Then, look at the ggthemes()
vignette by running the following code:
# Open the ggthemes vignette
vignette("ggthemes", package = "ggthemes")
mpg
data using the using the Five Thirty Eight theme. Note that cty
is on the x axis, and hwy
is on the y axis.library(ggthemes)
ggplot(data = mpg, aes(x = cty, y = hwy)) +
geom_point() +
labs(title = "City vs highway miles per gallon") +
facet_wrap(~manufacturer) +
theme_fivethirtyeight()
geom_density()
diamonds
data using the following template:data
argument to diamonds
carat
to the x aestheticgeom_density()
and set the fill color to "tomato1"
theme_minimal()
ggplot(data = XX,
mapping = aes(x = XX)) +
geom_density(fill = "XX") +
labs(x = "XX",
y = "XX",
title = "XX",
subtitle = "XX",
caption = "XX") +
theme_XX()
ggplot(data = diamonds,
mapping = aes(x = carat)) +
geom_density(fill = "tomato1") +
labs(x = "Carats",
y = "Count",
title = "Diamond Prices",
subtitle = "Created with ggplot!",
caption = "Source: diamonds dataset") +
theme_minimal()
geom_boxplot()
geom_boxplot()
. Then, create the following boxplot using the following templateggplot(data = XX,
mapping = aes(x = XX, y = log(XX), fill = XX)) +
geom_boxplot() +
labs(y = "XX",
x = "XX",
fill = "XX",
title = "XX",
subtitle = "XX") +
scale_fill_brewer(palette = "XX")
ggplot(data = diamonds,
mapping = aes(x = cut, y = log(price), fill = cut)) +
geom_boxplot() +
labs(y = "Price (log scale)", x = "Cut", color = "Cut",
title = "Distribution of diamond prices by cut",
subtitle = "Data come from a random sample of 1000 diamonds",
caption = "Source: diamonds dataset from ggplot2") +
scale_fill_brewer(palette = "YlGnBu")
geom_violin()
geom_violin()
. You can also change the color palette in the palette
argument to the scale_fill_brewer()
function. Look at the help menu with ?scale_fill_brewer()
to see all the possibilities. In the plot below, I’m using "Set1"
ggplot(data = diamonds,
mapping = aes(x = cut, y = log(price), fill = cut)) +
geom_violin() +
labs(y = "Price (log scale)", x = "Cut", color = "Cut",
title = "Distribution of diamond prices by cut",
subtitle = "Data come from a random sample of 1000 diamonds",
caption = "Source: diamonds dataset from ggplot2") +
scale_fill_brewer(palette = "Set1")
stat_summary()
function to add summary statistics as geoms to plots. Using the following template, create the following plot showing the mean prices of diamonds for each level of clarity.ggplot(data = XX,
mapping = aes(x = XX, y = XX)) +
stat_summary(fun.y = "mean",
geom = "bar",
fill = "white",
col = "black") +
labs(y = "XX",
x = "XX",
title = "XX",
caption = "XX")
ggplot(data = diamonds,
mapping = aes(x = clarity, y = price)) +
stat_summary(fun.y = "mean",
geom = "bar",
fill = "white",
col = "black") +
labs(y = "Mean Price",
x = "Clarity",
color = "Clarity",
title = "Mean diamond prices by Clarity",
caption = "Source: diamonds dataset from ggplot2")
mpg
dataframeggplot(data = mpg,
mapping = aes(x = manufacturer, y = hwy)) +
stat_summary(fun.y = "mean",
geom = "bar",
fill = "white",
col = "black") +
labs(y = "Highway Miles per gallon",
x = "Manufacturer",
title = "Average fuel efficiency for different car manufacturers",
caption = "Source: mpg dataset from ggplot2")
coord_flip()
. Using coord_flip()
, flip the x and y coordinates of your previous plot so it looks like this:ggplot(data = mpg,
mapping = aes(x = manufacturer, y = hwy)) +
stat_summary(fun.y = "mean",
geom = "bar",
fill = "white",
col = "black") +
labs(y = "Highway Miles per gallon",
x = "Manufacturer",
title = "Average fuel efficiency for different car manufacturers",
caption = "Source: mpg dataset from ggplot2") + coord_flip()
mpg
dataset, and save it as an object called myplot
myplot <- ggplot(data = mpg,
aes(x = cty, y = hwy)) +
geom_point() +
labs(x = "City Miles per Gallon",
y = "Highway Miles per Gallon",
title = "mpg data")
myplot
<-
add a regression line to the myplot
object with geom_smooth()
. Then evaluate the object to see the updated version. It should now look like this:myplot + geom_smooth()
ggsave()
, save the object as a pdf file called myplot.pdf
in your 3_Figures
folder. Set the width to 6 inches, and the height to 4 inches. Open the pdf outside of RStudio to make sure it worked!ggsave(myplot,
filename = "3_Figures/myplot.pdf",
width = 6,
height = 4,
device = "pdf")
midwest
dataset (it’s contained in ggplot2) and look at the help menu to see what values it contains. It should look like this:midwest
# A tibble: 437 x 28
PID county state area poptotal popdensity popwhite popblack
<int> <chr> <chr> <dbl> <int> <dbl> <int> <int>
1 561 ADAMS IL 0.052 66090 1271. 63917 1702
2 562 ALEXANDER IL 0.014 10626 759 7054 3496
3 563 BOND IL 0.022 14991 681. 14477 429
4 564 BOONE IL 0.017 30806 1812. 29344 127
5 565 BROWN IL 0.018 5836 324. 5264 547
6 566 BUREAU IL 0.05 35688 714. 35157 50
7 567 CALHOUN IL 0.017 5322 313. 5298 1
8 568 CARROLL IL 0.027 16805 622. 16519 111
9 569 CASS IL 0.024 13437 560. 13384 16
10 570 CHAMPAIGN IL 0.058 173025 2983. 146506 16559
# ... with 427 more rows, and 20 more variables: popamerindian <int>,
# popasian <int>, popother <int>, percwhite <dbl>, percblack <dbl>,
# percamerindan <dbl>, percasian <dbl>, percother <dbl>,
# popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
# poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
# percchildbelowpovert <dbl>, percadultpoverty <dbl>,
# percelderlypoverty <dbl>, inmetro <int>, category <chr>
ggplot(data = XX,
mapping = aes(x = XX, y = XX)) +
geom_point(aes(fill = XX, size = XX), shape = 21, color = "white") +
geom_smooth(aes(x = XX, y = XX)) +
labs(
x = "XX",
y = "XX",
title = "XX",
subtitle = "XX",
caption = "XX") +
scale_color_brewer(palette = "XX") +
scale_size(range = c(XX, XX)) +
guides(size = guide_legend(override.aes = list(col = "black")),
fill = guide_legend(override.aes = list(size = 5)))
ggplot(data = midwest,
mapping = aes(x = percollege, y = percpovertyknown)) +
geom_point(aes(fill = state, size = popdensity), shape = 21, color = "white") +
geom_smooth(aes(x = percollege, y = percpovertyknown)) +
labs(
x = "Percent with college education",
y = "Poverty rate",
title = "Midwest Data",
subtitle = "States with higher college education rates tend to have lower poverty rates",
caption = "Source: ggplot2 package") +
scale_color_brewer(palette = "Set1") +
scale_size(range = c(0, 12)) +
guides(size = guide_legend(override.aes = list(col = "black")),
fill = guide_legend(override.aes = list(size = 5)))
ggplot(data = XX,
mapping = aes(XX, fill = XX)) +
geom_density(alpha = XX) +
labs(title = "XX",
subtitle = "XX",
caption = "XX",
x = "XX",
y = "XX",
fill = "XX")
ggplot(data = midwest,
mapping = aes(percollege, fill = state)) +
geom_density(alpha = 0.8) +
labs(title = "College education rates",
subtitle = "For 5 Midwest states",
caption = "Source: midwest dataset in ggplot2",
x = "Percent of inhabitants with a college education",
y = "Density",
fill = "State") +
theme_bw()
geom_tile()
geom_tile()
function. Try creating the following heatplot of statistics of NBA players using the following template:# Read in nba data
nba_long <- read_csv("https://raw.githubusercontent.com/therbootcamp/therbootcamp.github.io/master/_sessions/_data/nba_long.csv")
# Look at the data
nba_long
ggplot(XX,
mapping = aes(x = XX, y = XX, fill = XX)) +
geom_tile(colour = "XX") +
scale_fill_gradientn(colors = c("XX", "XX", "XX"))+
labs(x = "XX",
y = "XX",
fill = "XX",
title = "NBA XX performance",
subtitle = "XX",
caption = "XX") +
coord_flip()
# Read in nba data
nba_long <- read_csv("https://raw.githubusercontent.com/therbootcamp/therbootcamp.github.io/master/_sessions/_data/nba_long.csv")
ggplot(nba_long,
mapping = aes(x = Name, y = measure, fill = value)) +
geom_tile(colour = "white") +
scale_fill_gradientn(colors = c("red", "white", "blue"))+
labs(x = "Player",
y = "Statistic",
fill = "Performance",
title = "NBA player performance",
subtitle = "Each tile represents how well the player performed on that statistic relative to other players.",
caption = "Source: https://learnr.wordpress.com/2010/01/26/ggplot2-quick-heatmap-plotting/") +
coord_flip() +
theme_minimal()
psavert
) from the economics
dataset.ggplot(data = XX, aes(x = XX, y = XX)) +
geom_line() +
labs(title = "XX",
subtitle = "XX",
caption = "XX",
y = "XX") +
geom_smooth()
# plot
ggplot(data = economics,
mapping = aes(x = date, y = psavert)) +
geom_line() +
geom_smooth() +
labs(title = "Personal Savings Rates Changes over Time",
subtitle = "Ratio of personal saving to disposable income",
caption = "Source: http://research.stlouisfed.org/fred2",
y = "Savings Rate %") +
theme_bw()
trial_act.csv
dataset. To do this, you’ll need to use both geom_boxplot()
and geom_point()
. To jitter the points, use the position
argument to geom_point()
, as well as the position_jitter()
function to control how much to jitter the points.ggplot(data = ACTG175,
aes(x = factor(arms), y = days, fill = factor(arms))) +
facet_wrap(~ drugs, nrow = 1, labeller = label_both) +
geom_boxplot(outlier.size = 0) +
labs(title = "Number of days until a major negative event",
subtitle = "For different treatment arms and separated by drug users and non drug users",
caption = "Source: ACTG175 dataset from the speff2trial package",
x = "Treatment Arm",
y = "Number of days until a major negative event",
fill = "Arm") +
theme_bw() +
scale_fill_brewer(palette = "Dark2") +
geom_point(alpha = .2,
position = position_jitter(w = 0.1, h = 0))
Many of the plots in this practical were taken from Selva Prabhakaran’s website http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html
The main ggplot2
webpage at http://ggplot2.tidyverse.org/ has great tutorials and examples.
ggplot2
is also great for making maps. For examples, check out Eric Anderson’s page at http://eriqande.github.io/rep-res-web/lectures/making-maps-with-R.html