R Ggplot Reference
Are you tired of Searching all over the internet for the ggplot functions you need? I am.
The purpose of this ggplot reference guide is to have the most commonly used ggplot functions documented in one place, so they can be searched easily.
Some useful references:
The mpg dataset is used to demo the functions in the library.
## manufacturer model displ year cyl trans drv cty hwy fl class
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
The basic structure of a ggplot is below. The parts bolded are required, and the rest is optional.
ggplot(data=<data>) + <Geom_func>(aes()
,stat=<stat>, position=<position>) +
<Coordinate_func> + <Facet_func> + <Scale_func> + <Theme_func>
Use theme_set() to set the theme globally.
theme_set(theme_light())
Labels
labs(
x = "New x axis label", y = "New y axis label",
title ="Add a title above the plot",
subtitle = "Add a subtitle below title",
caption = "Add a caption below plot",
<aes> = "New <aes> legend title")
Axis labels
theme(
# Change axis lines
axis.line = element_line(),
# Change axis ticks text labels: font color, size and face
axis.text = element_text(), # Change tick labels for all axes
axis.text.x = element_text(axis.text.x = element_text(angle = 90)), # Change x axis tick labels only, and rotate 90 degrees
axis.text.x.top = element_text(), # x axis tick labels on top axis
axis.text.y = element_text(), # Change y axis tick labels only
axis.text.y.right = element_text(),# y axis tick labels on top axis
# Change axis ticks line: font color, size, linetype and length
axis.ticks = element_line(), # Change ticks line fo all axes
axis.ticks.x = element_line(), # Change x axis ticks only
axis.ticks.y = element_line(), # Change y axis ticks only
axis.ticks.length = unit(3, "pt") # Change the length of tick marks
)
Legend
theme(
legend.position = "bottom", #position of legend
legend.title = element_blank(), #legend title
legend.text = element_text(size=14), #size of text in legend
legend.box = "horizontal") #direction of legend
Scale
Use Scale to control mapping of data values to aesthetic visual values.
# * = alpha, color, fill, linetype, shape, size
scale_*_continuous() # map continous values
scale_*_discrete() #map discrete values
scale_*_manual(values = c()) # map discrete values to manually chosen visual values
Colors
#use scale_fill_brewer() to change default palette
#
# Diverging:BrBG, PiYG, PRGn, PuOr, RdBu, RdGy, RdYlBu, RdYlGn, Spectral
# Qualitative:Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
# Sequential:Blues, BuGn, BuPu, GnBu, Greens, Greys, Oranges, OrRd, PuBu, PuBuGn, PuRd, Purples, RdPu, Reds, YlGn, YlGnBu, YlOrBr, YlOrRd
color_plot <- ggplot(mpg, aes(cty, hwy)) +
geom_point(aes(color=factor(cyl))) +
theme(legend.position="none")
cp1<-color_plot + scale_color_brewer(palette = "Set1")
cp2<-color_plot + scale_color_brewer() + theme_dark()
cp3<-color_plot + scale_color_brewer(palette = "Oranges")+ theme_dark()
grid.arrange(color_plot, cp1, cp2, cp3, ncol=2, nrow=2)
Geoms
One Variable
a <- ggplot(mpg, aes(hwy))
a.freq <- a + geom_freqpoly() + labs(title = "Frequency Plot")
a.den <- a + geom_density(color='lightblue', fill='lightblue', alpha=0.5, kernel='gaussian') +labs(title="Density Plot")
a.hist <- a + geom_histogram(binwidth = 5) + labs(title = "Histogram")
grid.arrange(a.freq, a.den,a.hist, ncol=3)
a + geom_freqpoly(color='red') +
geom_histogram(binwidth =3, fill='#f59582', alpha=0.3) +
labs(title ="Overlap" )
b <- ggplot(mpg, aes(sample=hwy))
b + geom_qq()+
geom_qq_line(fullrange=TRUE, distribution =stats::qnorm, color='red', weight=2, alpha=0.8) +
labs(title = 'QQ Plot')
Multiple Variables
continuous x , continuous y
c <- ggplot(mpg, aes(x=hwy, y=cty))
c.point <- c + geom_point(aes(color=as.factor(cyl))) +
labs(title='Scatter Plot', color="Cyl") +
scale_fill_discrete(labels=c("4", "5", "6", "8"),)+
theme(legend.position = "bottom",
legend.text=element_text(size=8),
legend.spacing=unit(0.8, 'cm'),
legend.title=element_text(size=8))
c.smooth <- c + geom_smooth(method = "auto") + labs(title="Smoothed Curve")
c.line <- c+ geom_line() + labs(title="Line Plot")
#use layout_matrix to define grid layout
grid.arrange(c.smooth,c.line,c.point, top='Highway vs. City Mileage',
layout_matrix = rbind(c(1, 2),
c(3, 3)))
c.text1<-mpg %>%
filter(manufacturer=='audi') %>%
ggplot(aes(x=hwy, y=cty)) + geom_point()+
geom_text(aes(label=as.factor(year)), nudge_x = 0.2, nudge_y = 0.2) +
xlim(22, 30)
c.text2 <- mpg %>%
filter(manufacturer=='audi') %>%
ggplot(aes(x=hwy, y=cty)) + geom_point()+
geom_label(aes(label=as.factor(year)))+
xlim(22, 32)
grid.arrange(c.text1, c.text2,ncol=2, top="Scatter Plot with Text")
discrete x , continuous y
d <- ggplot(mpg, aes(class, hwy))
xlab_rot <- theme(axis.text.x = element_text(angle = 90))
d.box <- d+geom_boxplot() +xlab_rot +
labs(title="boxplot")
d.violin <- d+geom_violin() + xlab_rot +
labs(title="violin plot")
grid.arrange(d.box,d.violin, ncol=2)
#geom_col shows plot with sum of hwy in each class
# d+geom_col(aes(fill=class)) +
# theme(legend.position = "none") +
# coord_flip()
#calculate mean hwy
mpg %>%
select(class, hwy) %>%
group_by(class) %>%
summarise(mean=mean(hwy)) -> mean_hwy
#flip direction to horizontal bar chart
#remove legend in theme()
#reorder class by mean hwy
mean_hwy %>% ggplot(aes(x=reorder(class,mean), y=mean, fill=mean)) +
geom_col()+
labs(title = "Average Highway Mileage by Class",
x = "Class", y="Average HWY")+
theme(legend.position ="none") +
geom_text(aes(label=round(mean, 2)),
color="white", hjust=1.5)+
coord_flip()
#bar chart position
e <- ggplot(mpg, aes(as.factor(cyl), fill=drv)) +
labs(x="Cylinder") +
scale_fill_discrete(label=c("4WD", "FWD", "RWD"))
e.dodge <- e+geom_bar(position = "dodge") + labs(title="Dodge")
e.fill <- e+geom_bar(position = "fill") + labs(title="Fill")
e.stack <- e + geom_bar(position="stack") + labs(title="Stack")
grid.arrange(e.dodge, e.fill, e.stack, ncol=3)
Use Facet to make subplots based on one or more discrete variables.
f<-mutate(mpg,Age=ifelse(year <2004, "Old", "New")) %>%
ggplot(aes(hwy, cty, color=Age)) + geom_point()
f.col <- f+facet_grid(.~factor(cyl)) + labs(title="Column")
f.row <- f+facet_grid(factor(cyl)~.) + labs(title="Row")
grid.arrange(f.col, f.row, ncol=2)
#both rows and columns
f + facet_grid(Age~factor(cyl))