Bar plots
# use to plot the counts of rows for a categorical variable
table(d$drv)
##
## 4 f r
## 103 106 25
p1 <- ggplot(d) +
aes(x=drv) +
geom_bar(color="black",fill="cornsilk")
print(p1)

# aesthetic mapping gives multiple groups for each bar
p1 <- ggplot(d) +
aes(x=drv,fill=fl) +
geom_bar()
print(p1)

# stacked, but need to adjust color transparency, which is "alpha"
p1 <- ggplot(d) +
aes(x=drv,fill=fl) +
geom_bar(alpha = 0.3, position="identity")
print(p1)

# better to use position = fill for stacking, but with equivalent height
p1 <- ggplot(d) +
aes(x=drv,fill=fl) +
geom_bar(position="fill")
print(p1)

# best to use position = dodge for multiple bars
p1 <- ggplot(d) +
aes(x=drv,fill=fl) +
geom_bar(position="dodge",color="black",size=1)
print(p1)

# more typical "bar plot" has heights as the values themselves
d_tiny <- tapply(X=d$hwy,INDEX=as.factor(d$fl),FUN=mean) #calculate the means
print (d_tiny)
## c d e p r
## 36.00000 33.60000 13.25000 25.23077 22.99405
d_tiny <- data.frame(hwy=d_tiny) # create a single-column data frame
print(d_tiny)
## hwy
## c 36.00000
## d 33.60000
## e 13.25000
## p 25.23077
## r 22.99405
d_tiny <- cbind(fl=row.names(d_tiny),d_tiny) #
print(d_tiny)
## fl hwy
## c c 36.00000
## d d 33.60000
## e e 13.25000
## p p 25.23077
## r r 22.99405
p2 <- ggplot(d_tiny) +
aes(x=fl,y=hwy,fill=fl) +
geom_col()
print(p2)

Use a box plot instead of standard “means” bars!
# basic boxplot is simple and informative
p1 <- ggplot(d) +
aes(x=fl,y=hwy,fill=fl) +
geom_boxplot()
print(p1)

# now overlay the raw data
p1 <- ggplot(d) +
aes(x=fl,y=hwy) +
geom_boxplot(fill="thistle",outlier.shape=NA) +
# geom_point()
geom_point(position=position_jitter(width=0.1, height=0.7),
color="grey60",size=2)
print(p1)

d <- mpg
# violin plot
# symmetric density plot
p1 <- ggplot(data=d) +
aes(x=drv, y=cty, fill=drv) +
geom_violin() +
geom_point(position=position_jitter(width=0.2,
height=0.3),color="black",size=0.4)
p1

# ridgeline plot
p2 <- ggplot(data=d) +
aes(x=cty,y=drv,fill=drv) +
ggridges::geom_density_ridges() +
ggridges::theme_ridges()
p2

# beeswarm plot
p3 <- ggplot(data=d) +
aes(x=drv,y=cty,color=drv) +
ggbeeswarm::geom_beeswarm(method = "center",size=2)
p3

# bubble plot
p4 <- ggplot(data=d) +
aes(x=displ,y=hwy,size=cty,fill=drv) +
geom_point(shape=21,color="black",stroke=0.5)
p4

# parallel coordinates plot
p5 <- GGally::ggparcoord(data=d,
columns = c(3,9), # c(3,5,8,9)
groupColumn = 7)
p5

# lollipop plot
table(d$fl)
##
## c d e p r
## 1 5 8 52 168
fuel_data <- data.frame(
table(d$fl),
fuel=c("Natural Gas",
"Diesel",
"Ethanol",
"Premium",
"Regular"))
fuel_data <- fuel_data[,-1] # remove tabled column
fuel_data$Freq <- fuel_data$Freq+100
p6 <- ggplot(data=fuel_data) +
aes(x=fuel, y=Freq) +
geom_segment(aes(x=fuel,
xend=fuel, y=0,
yend=Freq),
color="grey",
linewidth=2) +
geom_point( color="orange", size=7) +
labs(title="Fuel Type",
x="",
y="Count") +
coord_flip() +
theme_light(base_size=20,base_family=
"Monaco") +
theme(
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
plot.title.position = "plot",
plot.title = element_text(hjust = 0))
p6

# typical unsatisfactory pie chart
p7 <- ggpie::ggpie(data=mpg,
group_key="class",
count_type="full",
label_info="ratio",
label_type="none") # try circle
p7

# much better waffle plot
tabled_data <- as.data.frame(table(class=mpg$class))
p8 <- ggplot(data=tabled_data) +
aes(fill = class, values = Freq) +
waffle::geom_waffle(n_rows = 8, size = 0.33, colour = "white") +
coord_equal() +
theme_void()
p8

# highly effective scatterpie diagrams
d <- data.frame(x=rnorm(5), y=rnorm(5))
d$A <- abs(rnorm(5, sd=1))
d$B <- abs(rnorm(5, sd=2))
d$C <- abs(rnorm(5, sd=3))
p9 <- ggplot(data=d) +
scatterpie::geom_scatterpie(
aes(x=x, y=y),
pie_scale=4,
cols=c("A", "B","C")) +
coord_fixed()
# scale_fill_manual(values=c("coral","grey95","grey90"))
p9

# mosaic plots for proportional data (1 or 2 factors). All combinations of factors must be present!
# no zero values
# Simple 1-factor partition
# options hspine (default),vspine,vbar,hbar
city_tree <- expand.grid(Tree=c("Oak","Pine","Maple","Spruce","Beech"),City=c("Bur","Col","Win"))
city_tree$Freq <- c(100,2,25,
9,4,7,
3,30,30,
2,2,5,
6,6,6)
city_tree_long <- DescTools::Untable(city_tree)
d <- city_tree_long
p10 <- ggplot(data = d) +
geom_mosaic(aes(x = product(Tree),
fill=Tree),
divider="hspine") +
labs(title='Tree Type')
p10

# 2-factor partition
# divider options mosaic("h"),mosaic("v"),ddecker()
d <- city_tree_long
p11 <- ggplot(data = d) +
geom_mosaic(aes(x = product(Tree,City),
fill=Tree),
divider=mosaic("v")) +
labs(title='mosaic v')
p11

p12 <- ggplot(data = d) +
geom_mosaic(aes(x = product(Tree,City),
fill=Tree),
divider=mosaic("h")) +
labs(title='mosaic h')
p12

# 2 factor with conditioning
p13 <- ggplot(data = d) +
geom_mosaic(aes(x=product(Tree), fill = Tree,
conds = product(City))) +
labs(title='f(Tree | City)')
p13

# alternative to conditioning: faceting
p13 <- ggplot(data = d) +
geom_mosaic(aes(x = product(Tree), fill=Tree), divider = "vspine") +
labs(title='f(Tree | City)') +
facet_grid(~City) +
theme(aspect.ratio = 3,
axis.text.x = element_blank(),
axis.ticks.x = element_blank())
p13

# tree map for hierarchical frequency data
# in contrast to mosaic, this can accomodate missing data
# simple one dimensional example (from mpg)
d <- as.data.frame(table(Fuel=mpg$fl,Drive=mpg$drv))
d$Fuel <- rep(c("Compressed Gas",
"Diesel",
"Ethanol",
"Premium",
"Regular"),3)
d$Drive <- rep(c("Four Wheel",
"Front Wheel",
"Rear Wheel"),each=5)
treemap(dtf=d,
index="Fuel",
vSize="Freq",
type="index")

# now 2d with subgroups
treemap(dtf=d,
index=c("Fuel","Drive"),
vSize="Freq",
type="index")

treemap(dtf=d,
index=c("Drive","Fuel"),
vSize="Freq",
type="index")
