class: center, middle, inverse, title-slide .title[ # Data Analysis and Visualization ] .subtitle[ ## Chapter 3. Exploratory Data Analysis in R ] .author[ ### Iñaki Úcar ] .institute[ ### Department of Statistics | uc3m-Santander Big Data Institute ] .institute[ ### Bachelor in Data and Business Analytics ] .date[ ###
Licensed under Creative Commons Attribution
CC BY 4.0
Last generated: 2025-09-06
] --- class: base24 # Catalog of Graphs and Applications .footnote[Based on [The R Graph Gallery](https://r-graph-gallery.com/)] .pull-left[ - .distribution[[Distribution](ch3_1.html#3)].icons[      ] - .correlation[[Correlation](ch3_2.html#3)].icons[       ] - .ranking[[Ranking](ch3_3.html#3)].icons[       ] - .part[[Part of a Whole](ch3_4.html#3)].icons[       ] ] .pull-right[ - .evolution[[Evolution](ch3_5.html#3)].icons[      ] - .map[[Map](ch3_6.html#3)].icons[       ] - .flow[[Flow](ch3_7.html#3)].icons[      ] - .other[[Other resources](ch3_8.html#3)].icons[     ] ] --- class: correlation # Correlation .footnote[[< Contents](#2)] [](https://r-graph-gallery.com/scatterplot.html) [](https://r-graph-gallery.com/heatmap.html) [](https://r-graph-gallery.com/correlogram.html) [](https://r-graph-gallery.com/bubble-chart.html) [](https://r-graph-gallery.com/connected-scatterplot.html) [](https://r-graph-gallery.com/2d-density-chart.html) .bottom[Scatter] .bottom[Heatmap] .bottom[Correlogram] .bottom[Bubble] .bottom[Connected scatter] .bottom[Density 2D] <br> .black.font120[ - Visualization of the **relationship** between two variables - Two continuous, or two discrete, or mixed - Options to include a third one ] --- class: correlation # Correlation  Scatter .footnote[[< Contents](#2)] .pull-left-mod[ ``` r gapminder::gapminder |> filter(year == 1997) |> ggplot() + aes(gdpPercap, lifeExp) + scale_x_log10() + * geom_point() ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Scatter .footnote[[< Contents](#2)] .pull-left-mod[ ``` r gapminder::gapminder |> filter(year == 1997) |> ggplot() + aes(gdpPercap, lifeExp) + scale_x_log10() + * geom_point(aes(color=continent)) + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-color-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Bubble .footnote[[< Contents](#2)] .pull-left-mod[ ``` r gapminder::gapminder |> filter(year == 1997) |> ggplot() + aes(gdpPercap, lifeExp) + scale_x_log10() + geom_point(aes(color=continent, * size=pop)) + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-bubble-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Bubble .footnote[[< Contents](#2)] .pull-left-mod[ ``` r gapminder::gapminder |> filter(year == 1997) |> ggplot() + aes(gdpPercap, lifeExp) + scale_x_log10() + geom_point(aes(color=continent, size=pop), * alpha=0.7) + * scale_size_area(max_size=20) + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-bubble-scale-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Bubble .footnote[[< Contents](#2)] .pull-left-mod[ ``` r gapminder::gapminder |> filter(year == 1997) |> ggplot() + aes(gdpPercap, lifeExp) + scale_x_log10() + geom_point(aes(color=continent, size=pop), alpha=0.7) + scale_size_area(max_size=20) + * geom_smooth() + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-smooth-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Bubble .footnote[[< Contents](#2)] .pull-left-mod[ ``` r gapminder::gapminder |> filter(year == 1997) |> ggplot() + aes(gdpPercap, lifeExp) + scale_x_log10() + geom_point(aes(color=continent, size=pop), alpha=0.7) + scale_size_area(max_size=20) + * geom_smooth(method="lm") + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-smooth-lm-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Connected Scatter .footnote[[< Contents](#2)] .pull-left-mod[ ``` r gapminder::gapminder |> filter(year == 1997) |> ggplot() + aes(gdpPercap, lifeExp) + scale_x_log10() + geom_point(aes(color=continent)) + * geom_line(aes(color=continent)) + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-connected-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Connected Scatter .footnote[[< Contents](#2)] .pull-left-mod[ ``` r babynames::babynames |> filter(name %in% c( "Ashley", "Amanda")) |> filter(sex == "F") |> filter(year > 1970) |> select(year, name, n) |> spread(key = name, value=n, -1) |> ggplot() + * aes(Amanda, Ashley, color=year) + geom_point() + * geom_path() + scale_color_viridis_c() + theme(legend.position=c(0, 1), legend.justification=c(0, 1)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-connected-path-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Connected Scatter .footnote[[< Contents](#2)] .pull-left-mod[ ``` r df <- babynames::babynames |> filter(name %in% c( "Ashley", "Amanda")) |> filter(sex == "F") |> filter(year > 1970) |> select(year, name, n) |> spread(key = name, value=n, -1) *text <- df |> filter(year %in% c( 1972, 1980, 1984, 1987, 2012)) ggplot(df) + aes(Amanda, Ashley, color=year) + * geom_path(arrow=arrow( * angle=15, type="closed", * length=unit(0.1, "inches"))) + scale_color_viridis_c() + * geom_label(aes(label=year), text) + theme(legend.position=c(0, 1), legend.justification=c(0, 1)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-connected-path-label-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Scatter .footnote[[< Contents](#2)] .pull-left-mod[ ``` r ggplot(faithful) + aes(eruptions, waiting) + * geom_point() ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-distribution-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Scatter .footnote[[< Contents](#2)] .pull-left-mod[ ``` r ggplot(faithful) + aes(eruptions, waiting) + geom_point() + * ggside::geom_xsidehistogram() ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/scatter-distribution-margin-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Density 2D .footnote[[< Contents](#2)] .pull-left-mod[ ``` r ggplot(faithful) + aes(eruptions, waiting) + * geom_bin2d() + scale_fill_viridis_c() + ggside::geom_xsidehistogram() + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/density2d-histogram-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Density 2D .footnote[[< Contents](#2)] .pull-left-mod[ ``` r ggplot(faithful) + aes(eruptions, waiting) + * geom_hex() + scale_fill_viridis_c() + ggside::geom_xsidehistogram() + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/density2d-hex-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Density 2D .footnote[[< Contents](#2)] .pull-left-mod[ ``` r ggplot(faithful) + aes(eruptions, waiting) + * geom_density2d_filled() + ggside::geom_xsidehistogram() + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/density2d-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Density 2D .footnote[[< Contents](#2)] .pull-left-mod[ ``` r ggplot(faithfuld) + * aes(eruptions, waiting, fill=density) + * geom_raster() + scale_fill_viridis_c() + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/density2d-raster-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Density 2D .footnote[[< Contents](#2)] .pull-left-mod[ ``` r ggplot(faithfuld) + aes(eruptions, waiting, fill=density) + * geom_raster(interpolate=TRUE) + scale_fill_viridis_c() + theme(legend.position=c(1, 0), legend.justification=c(1, 0)) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/density2d-raster-interpolate-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Heatmap .footnote[[< Contents](#2)] .pull-left-mod[ ``` r mtcars |> tibble::rownames_to_column("model") |> gather("key", "value", -model) |> ggplot() + aes(key, model, fill=value) + * geom_tile() + scale_fill_viridis_c( * trans="pseudo_log") + labs(x=NULL, y=NULL) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/heatmap-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Correlogram .footnote[[< Contents](#2)] .pull-left-mod[ ``` r mtcars |> cor(mtcars) |> * ggcorrplot::ggcorrplot() + theme(legend.position="top") ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/correlation-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Correlogram .footnote[[< Contents](#2)] .pull-left-mod[ ``` r mtcars |> cor(mtcars) |> ggcorrplot::ggcorrplot( * hc.order=TRUE, * outline.color="white") + theme(legend.position="top") ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/correlation-order-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Correlogram .footnote[[< Contents](#2)] .pull-left-mod[ ``` r mtcars |> cor(mtcars) |> ggcorrplot::ggcorrplot( hc.order=TRUE, * method="circle") + theme(legend.position="top") ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/correlation-circle-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Correlogram .footnote[[< Contents](#2)] .pull-left-mod[ ``` r GGally::ggpairs(iris) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/correlogram-1.png" style="display: block; margin: auto;" /> ] --- class: correlation # Correlation  Correlogram .footnote[[< Contents](#2)] .pull-left-mod[ ``` r GGally::ggpairs( iris, * aes(color=Species) ) ``` ] .pull-right-mod[ <img src="ch3_files/figure-html/correlogram-color-1.png" style="display: block; margin: auto;" /> ]