R Ggplot Reference

4 minute read

Are you tired of Searching all over the internet for the ggplot functions you need? I am.

The purpose of this ggplot reference guide is to have the most commonly used ggplot functions documented in one place, so they can be searched easily.

Some useful references:

The mpg dataset is used to demo the functions in the library.

##   manufacturer model displ year cyl      trans drv cty hwy fl   class
## 1         audi    a4   1.8 1999   4   auto(l5)   f  18  29  p compact
## 2         audi    a4   1.8 1999   4 manual(m5)   f  21  29  p compact
## 3         audi    a4   2.0 2008   4 manual(m6)   f  20  31  p compact
## 4         audi    a4   2.0 2008   4   auto(av)   f  21  30  p compact
## 5         audi    a4   2.8 1999   6   auto(l5)   f  16  26  p compact
## 6         audi    a4   2.8 1999   6 manual(m5)   f  18  26  p compact

The basic structure of a ggplot is below. The parts bolded are required, and the rest is optional.

ggplot(data=<data>) + <Geom_func>(aes(),stat=<stat>, position=<position>) + <Coordinate_func> + <Facet_func> + <Scale_func> + <Theme_func>

Use theme_set() to set the theme globally.

theme_set(theme_light())

Labels

labs(
  x = "New x axis label", y = "New y axis label",
  title ="Add a title above the plot",
  subtitle = "Add a subtitle below title",
  caption = "Add a caption below plot",
  <aes> = "New <aes> legend title")

Axis labels

theme(
  # Change axis lines
  axis.line = element_line(),

  # Change axis ticks text labels: font color, size and face
  axis.text = element_text(),       # Change tick labels for all axes
  axis.text.x = element_text(axis.text.x = element_text(angle = 90)),     # Change x axis tick labels only, and rotate 90 degrees
  axis.text.x.top = element_text(), # x axis tick labels on top axis
  axis.text.y = element_text(),     # Change y axis tick labels only
  axis.text.y.right = element_text(),# y  axis tick labels on top axis

  # Change axis ticks line: font color, size, linetype and length
  axis.ticks = element_line(),      # Change ticks line fo all axes
  axis.ticks.x = element_line(),    # Change x axis ticks only
  axis.ticks.y = element_line(),    # Change y axis ticks only
  axis.ticks.length = unit(3, "pt") # Change the length of tick marks
)

Legend

theme(
  legend.position = "bottom",          #position of legend
  legend.title = element_blank(),      #legend title
  legend.text = element_text(size=14), #size of text in legend
  legend.box = "horizontal")           #direction of legend

Scale

Use Scale to control mapping of data values to aesthetic visual values.

# * = alpha, color, fill, linetype, shape, size

scale_*_continuous() # map continous values
scale_*_discrete()   #map discrete values
scale_*_manual(values = c()) # map discrete values to manually chosen visual values

Colors

#use scale_fill_brewer() to change default palette
#
# Diverging:BrBG, PiYG, PRGn, PuOr, RdBu, RdGy, RdYlBu, RdYlGn, Spectral
# Qualitative:Accent, Dark2, Paired, Pastel1, Pastel2, Set1, Set2, Set3
# Sequential:Blues, BuGn, BuPu, GnBu, Greens, Greys, Oranges, OrRd, PuBu, PuBuGn, PuRd, Purples, RdPu, Reds, YlGn, YlGnBu, YlOrBr, YlOrRd

color_plot <- ggplot(mpg, aes(cty, hwy)) +
  geom_point(aes(color=factor(cyl))) +
  theme(legend.position="none")


cp1<-color_plot + scale_color_brewer(palette = "Set1")
cp2<-color_plot + scale_color_brewer() + theme_dark()
cp3<-color_plot + scale_color_brewer(palette = "Oranges")+ theme_dark()

grid.arrange(color_plot, cp1, cp2, cp3, ncol=2, nrow=2)

Test image

Geoms

One Variable

a <- ggplot(mpg, aes(hwy))

a.freq <- a + geom_freqpoly() + labs(title = "Frequency Plot")
a.den <- a + geom_density(color='lightblue', fill='lightblue', alpha=0.5, kernel='gaussian') +labs(title="Density Plot")
a.hist <- a + geom_histogram(binwidth = 5) + labs(title = "Histogram")

grid.arrange(a.freq, a.den,a.hist, ncol=3)

Test image

a + geom_freqpoly(color='red') +
  geom_histogram(binwidth =3, fill='#f59582', alpha=0.3) +
  labs(title ="Overlap" )

Test image

b <- ggplot(mpg, aes(sample=hwy))

b + geom_qq()+
  geom_qq_line(fullrange=TRUE, distribution =stats::qnorm, color='red', weight=2, alpha=0.8) +
  labs(title = 'QQ Plot')

Test image

Multiple Variables

continuous x , continuous y

c <- ggplot(mpg, aes(x=hwy, y=cty))

c.point <- c + geom_point(aes(color=as.factor(cyl))) +
  labs(title='Scatter Plot', color="Cyl") +
  scale_fill_discrete(labels=c("4", "5", "6", "8"),)+
  theme(legend.position = "bottom",
        legend.text=element_text(size=8),
        legend.spacing=unit(0.8, 'cm'),
        legend.title=element_text(size=8))

c.smooth <- c + geom_smooth(method = "auto") + labs(title="Smoothed Curve")

c.line <- c+ geom_line() + labs(title="Line Plot")

#use layout_matrix to define grid layout
grid.arrange(c.smooth,c.line,c.point, top='Highway vs. City Mileage',
             layout_matrix = rbind(c(1, 2),
                        c(3, 3)))

Test image

c.text1<-mpg %>%
  filter(manufacturer=='audi') %>%
  ggplot(aes(x=hwy, y=cty)) + geom_point()+
  geom_text(aes(label=as.factor(year)), nudge_x = 0.2, nudge_y = 0.2) +
  xlim(22, 30)

c.text2 <- mpg %>%
  filter(manufacturer=='audi') %>%
  ggplot(aes(x=hwy, y=cty)) + geom_point()+
  geom_label(aes(label=as.factor(year)))+
  xlim(22, 32)

grid.arrange(c.text1, c.text2,ncol=2, top="Scatter Plot with Text")

Test image

discrete x , continuous y

d <- ggplot(mpg, aes(class, hwy))
xlab_rot <- theme(axis.text.x = element_text(angle = 90))

d.box <- d+geom_boxplot() +xlab_rot +
  labs(title="boxplot")
d.violin <- d+geom_violin() + xlab_rot +
  labs(title="violin plot")

grid.arrange(d.box,d.violin, ncol=2)

Test image

#geom_col shows plot with sum of hwy in each class
# d+geom_col(aes(fill=class)) +
#   theme(legend.position = "none") +
#   coord_flip()

#calculate mean hwy
mpg %>%
  select(class, hwy) %>%
  group_by(class) %>%
  summarise(mean=mean(hwy)) -> mean_hwy

#flip direction to horizontal bar chart
#remove legend in theme()
#reorder class by mean hwy
mean_hwy %>% ggplot(aes(x=reorder(class,mean), y=mean, fill=mean)) +
  geom_col()+
  labs(title = "Average Highway Mileage by Class",
       x = "Class", y="Average HWY")+
  theme(legend.position ="none") +
  geom_text(aes(label=round(mean, 2)),
            color="white", hjust=1.5)+
  coord_flip()

Test image

#bar chart position

e <- ggplot(mpg, aes(as.factor(cyl), fill=drv)) +
  labs(x="Cylinder") +
  scale_fill_discrete(label=c("4WD", "FWD", "RWD"))

e.dodge <- e+geom_bar(position = "dodge") + labs(title="Dodge")
e.fill <- e+geom_bar(position = "fill") + labs(title="Fill")
e.stack <- e + geom_bar(position="stack") + labs(title="Stack")

grid.arrange(e.dodge, e.fill, e.stack, ncol=3)

Test image

Use Facet to make subplots based on one or more discrete variables.

f<-mutate(mpg,Age=ifelse(year <2004, "Old", "New")) %>%
  ggplot(aes(hwy, cty, color=Age)) + geom_point()

f.col <- f+facet_grid(.~factor(cyl)) + labs(title="Column")
f.row <- f+facet_grid(factor(cyl)~.) + labs(title="Row")

grid.arrange(f.col, f.row, ncol=2)

Test image

#both rows and columns
f + facet_grid(Age~factor(cyl))

Test image

Updated: