+ - 0:00:00
Notes for current slide
Notes for next slide

Data Visualization

Chapter 3. Data Visualization in R

Iñaki Úcar

Department of Statistics | uc3m-Santander Big Data Institute

Master in Computational Social Science

Licensed under Creative Commons Attribution CC BY 4.0 Last generated: 2023-01-25

1 / 24

Directory of Visualizations

Based on The R Graph Gallery

2 / 24

Flow

< Contents

Chord diagram Network Sankey Arc diagram Edge bundling


  • Visualization of interconnection between entities
  • Generally, two datasets are required: nodes and edges
  • Data wrangling is a bit different
3 / 24

Flow Network

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="kk") +
geom_node_point(
aes(size=Popularity)) +
theme(legend.position="top")

4 / 24

Flow Network

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="kk") +
geom_node_point(
aes(size=Popularity)) +
geom_edge_link(
arrow=arrow(
length=unit(0.1, "inches"),
type="closed")) +
theme(legend.position="top")

5 / 24

Flow Network

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="kk") +
geom_node_point(
aes(size=Popularity)) +
geom_edge_fan(
aes(alpha=after_stat(index)),
show.legend=FALSE) +
theme(legend.position="top")

6 / 24

Flow Network

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="kk") +
geom_node_point(
aes(size=Popularity)) +
geom_edge_fan(
aes(alpha=after_stat(index),
color=factor(year)),
show.legend=FALSE) +
theme(legend.position="top")

7 / 24

Flow Network

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="kk") +
geom_node_point(
aes(size=Popularity)) +
geom_edge_fan(
aes(alpha=after_stat(index)),
show.legend=FALSE) +
facet_edges(~year, ncol=1) +
theme(legend.position="top")

8 / 24

Flow Network

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="kk") +
geom_node_point(
aes(size=Popularity)) +
geom_edge_fan(
aes(alpha=after_stat(index)),
show.legend=FALSE) +
facet_edges(~year, ncol=1) +
theme_graph(base_size=16,
foreground="#a53253",
fg_text_colour="white") +
theme(legend.position="top")

9 / 24

Flow Arc diagram

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="linear") +
geom_node_point(
aes(size=Popularity)) +
geom_edge_arc(
aes(alpha=after_stat(index)),
show.legend=FALSE) +
facet_edges(~year, ncol=1) +
theme_graph(base_size=16,
foreground="#a53253",
fg_text_colour="white") +
theme(legend.position="top")

10 / 24

Flow Arc diagram

< Contents

library(ggraph)
library(tidygraph)
as_tbl_graph(highschool) |>
mutate(Popularity = centrality_degree(
mode="in")) |>
ggraph(layout="linear", circular=TRUE) +
coord_fixed() +
geom_node_point(
aes(size=Popularity)) +
geom_edge_arc(
aes(alpha=after_stat(index)),
show.legend=FALSE) +
facet_edges(~year, ncol=1) +
theme_graph(base_size=16,
foreground="#a53253",
fg_text_colour="white") +
theme(legend.position="top")

11 / 24

Flow Edge bundling

< Contents | Data from ggraph manual

library(ggraph)
library(tidygraph)
# flareGraph <- ...
# importFrom <- ...
# importTo <- ...
ggraph(flareGraph, "dendrogram",
circular=TRUE) +
coord_fixed() +
geom_node_point() +
theme_graph(base_size=16)

12 / 24

Flow Edge bundling

< Contents | Data from ggraph manual

library(ggraph)
library(tidygraph)
# flareGraph <- ...
# importFrom <- ...
# importTo <- ...
ggraph(flareGraph, "dendrogram",
circular=TRUE) +
coord_fixed() +
geom_node_point(
aes(filter=leaf, color=class)) +
theme_graph(base_size=16) +
theme(legend.position="top")

13 / 24

Flow Edge bundling

< Contents | Data from ggraph manual

library(ggraph)
library(tidygraph)
# flareGraph <- ...
# importFrom <- ...
# importTo <- ...
ggraph(flareGraph, "dendrogram",
circular=TRUE) +
coord_fixed() +
geom_node_point(
aes(filter=leaf, color=class)) +
geom_conn_bundle(
aes(color=after_stat(index)),
data=get_con(importFrom, importTo),
edge_alpha=0.25) +
theme_graph(base_size=16) +
theme(legend.position="top")

14 / 24

Flow Edge bundling

< Contents | Data from ggraph manual

library(ggraph)
library(tidygraph)
# flareGraph <- ...
# importFrom <- ...
# importTo <- ...
ggraph(flareGraph, "dendrogram",
circular=TRUE) +
coord_fixed() +
geom_node_point(
aes(filter=leaf, color=class)) +
geom_conn_bundle(
aes(color=after_stat(index)),
data=get_con(importFrom, importTo),
edge_alpha=0.25) +
scale_edge_colour_distiller(
NULL, guide="edge_direction") +
theme_graph(base_size=16) +
theme(legend.position="top")

15 / 24

Flow Chord diagram

< Contents

  • Out of the scope
  • Requires circlize, chorddiag
  • See the source

16 / 24

Flow Sankey / Alluvial

< Contents

# remotes::install_github(
# "davidsjoberg/ggsankey")
df <- mtcars |>
ggsankey::make_long(
cyl, vs, am, gear, carb)
ggplot(df) +
aes(x, next_x=next_x,
node=node, next_node=next_node,
fill=factor(node)) +
ggsankey::geom_sankey() +
scale_fill_viridis_d() +
labs(x=NULL) +
theme(legend.position="top")

17 / 24

Flow Sankey / Alluvial

< Contents

# remotes::install_github(
# "davidsjoberg/ggsankey")
df <- mtcars |>
ggsankey::make_long(
cyl, vs, am, gear, carb)
ggplot(df) +
aes(x, next_x=next_x,
node=node, next_node=next_node,
fill=factor(node)) +
ggsankey::geom_sankey(
flow.alpha=0.6,
node.color="gray30") +
scale_fill_viridis_d() +
labs(x=NULL) +
theme(legend.position="top")

18 / 24

Flow Sankey / Alluvial

< Contents

# remotes::install_github(
# "davidsjoberg/ggsankey")
df <- mtcars |>
ggsankey::make_long(
cyl, vs, am, gear, carb)
ggplot(df) +
aes(x, next_x=next_x,
node=node, next_node=next_node,
fill=factor(node),
label=node) +
ggsankey::geom_sankey(
flow.alpha=0.6,
node.color="gray30") +
ggsankey::geom_sankey_label(
size=3, color="white",
fill="gray40") +
scale_fill_viridis_d() +
labs(x=NULL) +
theme(legend.position="none")

19 / 24

Flow Sankey / Alluvial

< Contents

# remotes::install_github(
# "davidsjoberg/ggsankey")
df <- mtcars |>
ggsankey::make_long(
cyl, vs, am, gear, carb)
ggplot(df) +
aes(x, next_x=next_x,
node=node, next_node=next_node,
fill=factor(node),
label=node) +
ggsankey::geom_sankey(
flow.alpha=0.6,
node.color="gray30") +
ggsankey::geom_sankey_label(
size=3, color="white",
fill="gray40") +
scale_fill_viridis_d() +
ggsankey::theme_sankey(base_size=16) +
labs(x=NULL) +
theme(legend.position="none")

20 / 24

Flow Geospatial network

< Contents

library(tidygraph)
library(sfnetworks)
net <- as_sfnetwork(roxel, directed=F) |>
activate("nodes") |>
mutate(bc = centrality_betweenness())
ggplot() +
geom_sf(data=sf::st_as_sf(net, "edges"),
color="grey50") +
geom_sf(data=sf::st_as_sf(net, "nodes"),
aes(color=bc)) +
scale_color_viridis_c() +
theme(legend.position=c(1, 0),
legend.justification=c(1, 0))

21 / 24

Flow Geospatial network

< Contents

library(ggraph)
library(tidygraph)
library(sfnetworks)
net <- as_sfnetwork(roxel, directed=F) |>
activate("nodes") |>
mutate(bc = centrality_betweenness())
ggraph(net) +
geom_edge_link(color="grey50") +
geom_node_point(aes(color=bc)) +
scale_color_viridis_c() +
theme(legend.position=c(1, 0),
legend.justification=c(1, 0))

22 / 24

Flow Geospatial network

< Contents

library(ggraph)
library(tidygraph)
library(sfnetworks)
net <- as_sfnetwork(roxel, directed=F) |>
activate("nodes") |>
mutate(bc = centrality_betweenness())
layout_sf <- function(graph) {
graph <- activate(graph, "nodes")
data.frame(
x=sf::st_coordinates(graph)[,"X"],
y=sf::st_coordinates(graph)[,"Y"])
}
ggraph(net, layout=layout_sf) +
geom_edge_link(color="grey50") +
geom_node_point(aes(color=bc)) +
scale_color_viridis_c() +
theme(legend.position=c(1, 0),
legend.justification=c(1, 0))

23 / 24

Flow Geospatial network

< Contents

library(ggraph)
library(tidygraph)
library(sfnetworks)
net <- as_sfnetwork(roxel, directed=F) |>
activate("nodes") |>
mutate(bc = centrality_betweenness())
layout_sf <- function(graph) {
graph <- activate(graph, "nodes")
data.frame(
x=sf::st_coordinates(graph)[,"X"],
y=sf::st_coordinates(graph)[,"Y"])
}
ggraph(net, layout=layout_sf) +
geom_edge_link(color="grey50") +
geom_node_point(aes(color=bc)) +
coord_sf(crs=sf::st_crs(net)) +
scale_color_viridis_c() +
theme(legend.position=c(1, 0),
legend.justification=c(1, 0))

24 / 24

Directory of Visualizations

Based on The R Graph Gallery

2 / 24
Paused

Help

Keyboard shortcuts

, , Pg Up, k Go to previous slide
, , Pg Dn, Space, j Go to next slide
Home Go to first slide
End Go to last slide
Number + Return Go to specific slide
b / m / f Toggle blackout / mirrored / fullscreen mode
c Clone slideshow
p Toggle presenter mode
t Restart the presentation timer
?, h Toggle this help
sToggle scribble toolbox
Esc Back to slideshow