Introduction to ggplot

Components of a ggplot layer

  1. data (in a data frame)
  2. aesthetic mappings (variables are mapped to aesthetics)
  3. geom (the geometric used to draw the layer; has specific aesthetics)
  4. stat (takes raw data and transforms it for useful plotting)
  5. position (adjustment to prevent overplotting)

Template for ggplot components

p1 <- ggplot(data= <DATA>) +
      aes(<MAPPINGS>) + 
      <GEOM_FUNCTION>(aes(<MAPPINGS>),
                      stat=<STAT>,
                      position=<POSITION>) +
                      <COORDINATE_FUNCTION> +
                      <FACET_FUNCTION>

print(p1)
ggsave(plot=p1, 
       filename="MyPlot",
       width=5,
       height=3,
       units="in",
       device="pdf")

Preliminaries

Use package Inkscape

library(ggplot2)
library(ggthemes)
library(patchwork)

d <- mpg # use built in mpg data frame
str(d)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
table(d$fl)
## 
##   c   d   e   p   r 
##   1   5   8  52 168

basic plotting

# basic histogram plot
 ggplot(data=d) +
      aes(x=hwy) +
      geom_histogram()

ggplot(data=d) +
  aes(x=hwy) +
  geom_histogram(fill="khaki",color="black")

# # basic density plot
ggplot(data=d) +
  aes(x=hwy) +
  geom_density(fill="mintcream",color="blue")

# basic scatter plot
ggplot(data=d) +
  aes(x=displ,y=hwy) +
  geom_point() +
  geom_smooth() +
  geom_smooth(method="lm",col="red")

# add a smoother
ggplot(data=d) +
  aes(x=displ,y=hwy) +
  geom_point() +
  geom_smooth()

# add a linear regression line
ggplot(data=d) +
  aes(x=displ,y=hwy) +
  geom_point() +
  geom_smooth(method = "lm",col="red")

# basic boxplot
ggplot(data=d) +
  aes(x=fl, y=cty) +
  geom_boxplot()

# basic boxplot
ggplot(data=d) +
  aes(x=fl, y=cty) +
  geom_boxplot(fill="thistle")

# basic barplot (long format)

ggplot(data=d) +
  aes(x=fl) +
  geom_bar(fill="thistle",color="black")

# bar plot with specified counts or meansw
 x_treatment <- c("Control","Low","High")
 y_response <- c(12,2.5,22.9)
 summary_data <- data.frame(x_treatment,y_response)
 
 ggplot(data=summary_data) +
   aes(x=x_treatment,y=y_response) +
   geom_col(fill=c("grey50","goldenrod","goldenrod"),col="black")

 # basic curves and functions
 my_vec <- seq(1,100,by=0.1)
 
 # plot simple mathematical functions
 d_frame <- data.frame(x=my_vec,y=sin(my_vec))
 ggplot(data=d_frame) +
   aes(x=x,y=y) +
   geom_line()

 # plot probability functions
 d_frame <- data.frame(x=my_vec,y=dgamma(my_vec,shape=5, scale=3))
 ggplot(data=d_frame) +
   aes(x=x,y=y) +
   geom_line()

 # plot user-defined functions
 my_fun <- function(x) sin(x) + 0.1*x
 d_frame <- data.frame(x=my_vec,y=my_fun(my_vec))
 ggplot(data=d_frame) +
   aes(x=x,y=y) +
   geom_line()

Themes and fonts

 p1 <- ggplot(data=d, mapping=aes(x=displ,y=cty)) + geom_point()
 print(p1)

 p1 + theme_classic() # no grid lines

 p1 + theme_linedraw() # black frame

 p1 + theme_dark() # good for brightly colored points

 p1 + theme_base() # mimics base R

 p1 + theme_par() # matches current par settings in base

 p1 + theme_void() # shows data only

 p1 + theme_solarized() # good for web pages

 p1 + theme_economist() # many specialized themes

 p1 + theme_grey() # ggplots default theme

Major theme modifications

 # use theme parameters to modify font and font size
 p1 + theme_classic(base_size=40,base_family="serif")

 # defaults: theme_grey, base_size=16,base_family="Helvetica")
 # font families (Mac): Times, Ariel, Monaco, Courier, Helvetica, serif,sans
 
# use coordinate_flip to invert entire plot
 p2 <- ggplot(data=d, mapping=aes(x=fl,fill=fl)) + geom_bar()
 print(p2)

 p2 + coord_flip() + theme_grey(base_size=20,base_family="sans")

Minor theme modifications

 p1 <- ggplot(data=d, mapping=aes(x=displ,y=cty)) + 
 geom_point(size=7,shape=21,color="black",fill="steelblue") +
 labs(title="My graph title here",
      subtitle="An extended subtitle that will print below the main title",
      x="My x axis label",
      y="My y axis label") +
      xlim(0,4) + ylim(0,20)
 print(p1)