Data Visualization

University of St.Gallen

Gilbert Fontana

October 27, 2022

Outline

  • About me πŸ‘‹
  • How I work with ggplot2 πŸ’»
    • Walkthrough of a plot πŸ“Š
  • Questions and discussions πŸ’­

About me

Education πŸŽ“

  • Bsc in Political Science
  • Bsc in Economics
  • Msc in Economics & Econometrics

Experience πŸ› οΈ

  • Previously
    • Analyst at Statistic Sweden
    • Analyst at Kommuninvest
  • Currently
    • Statistical Programmer at Saco
    • Creator at Visual Capitalist

#TidyTuesday

  • A weekly data project by the
    R4DS Online Learning Community
    and Thomas Mock
  • A great way to practice data
    visualization and learn from
    others
    !

Basic theory of ggplot

ggplot is based on the foundations in Leland Wilkinson’s book: The Grammar of Graphics.

It is built around the following principles:

  • Data
  • Aesthetics aes()
  • Statistics stat_
  • Scales scale_
  • Geometries geom_
  • Facets facet_
  • Coordinates coord_
  • Themes theme_

Visualization walkthrough

Visualization walkthrough

Packages πŸ“¦

#### LIBS ####
library(tidyverse)
library(showtext)
library(MetBrewer)
  • tidyverse
    • ggplot2
    • dplyr
    • tidyr
    • readr
    • purrr
    • tibble
    • stringr
    • forcats
  • showtext
    • A package for handling fonts
  • MetBrewer
    • Palettes inspired by works at the Metropolitan Museum of Art in New York

Visualization walkthrough

Data πŸ—ƒοΈ

#### DATA ####
df <- read_csv("https://raw.githubusercontent.com/gilbertfontana/External/main/University%20of%20St.%20Gallen/tax_data.csv")


country year centaxgdp
Denmark 1820 4
Denmark 1821 5
Denmark 1822 5
Denmark 1823 5
Denmark 1824 5

Visualization walkthrough

Style 🎨

#### STYLE ####
font <- "Oswald"
font_add_google(family=font, font, db_cache = FALSE)
showtext_auto(enable = TRUE) 
theme_set(theme_minimal(base_family = font))
bg <- "#334756"
txt_col <- "#DEA2A2"

Visualization walkthrough

Step 1

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line()

Visualization walkthrough

Step 2

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country))

Visualization walkthrough

Step 3

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(aes(color=country))

Visualization walkthrough

Step 4

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country))

Visualization walkthrough

Step 5

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  theme(legend.position = "none")

Visualization walkthrough

Step 6

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(aes(label=country)) +
  theme(legend.position = "none")

Visualization walkthrough

Step 7

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(data = df %>% filter(year=="2020"),
            aes(label=country)) +
  theme(legend.position = "none")

Visualization walkthrough

Step 8

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(data = df %>% filter(year=="2020"),
            aes(label=country,
                color=country),
            hjust = -.2,
            family = font,
            size = 4) +
  theme(legend.position = "none")

Visualization walkthrough

Step 9

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(data = df %>% filter(year=="2020"),
            aes(label=country,
                color=country),
            hjust = -.2,
            family=font,
            size = 4) +
  coord_cartesian(clip = "off") +
  theme(legend.position = "none",
        plot.margin = margin(30, 30, 30, 30))

Visualization walkthrough

Step 10

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(data = df %>% filter(year=="2020"),
            aes(label=country,
                color=country),
            hjust = -.2,
            family = font,
            size = 4) +
  coord_cartesian(clip = "off") +
  scale_color_manual(
    values = met.brewer(name = "Paquin",
                        type = "discrete",
                        n = 4)) +
  theme(legend.position = "none",
        plot.margin = margin(30, 30, 30, 30))

Visualization walkthrough

Step 11

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(data = df %>% filter(year=="2020"),
            aes(label=country,
                color=country),
            hjust = -.2,
            family = font,
            size = 4) +
  coord_cartesian(clip = "off") +
  scale_color_manual(
    values = met.brewer(name = "Paquin",
                        type = "discrete",
                        n = 4)) +
  scale_y_continuous(limits = c(0,40),
                     expand = c(0,0)) +
  theme(legend.position = "none",
        plot.margin = margin(30, 30, 30, 30))

Visualization walkthrough

Step 12

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(data = df %>% filter(year=="2020"),
            aes(label=country,
                color=country),
            hjust = -.2,
            family = font,
            size = 4) +
  coord_cartesian(clip = "off") +
  scale_color_manual(
    values = met.brewer(name = "Paquin",
                        type = "discrete",
                        n = 4)) +
  scale_y_continuous(limits = c(0,40),
                     expand = c(0,0)) +
  theme(
    panel.grid = element_blank(),
    axis.text = element_text(color = txt_col,
                             size = 10),
    axis.title = element_text(color = txt_col,
                              size = 12),
    legend.position = "none",
    plot.margin = margin(30, 30, 30, 30),
    plot.background = element_rect(fill = bg,
                                   color = bg)
  )

Visualization walkthrough

Step 13

df %>% 
  ggplot(aes(x=year,
             y=centaxgdp,
             group=country)) +
  geom_line(aes(color=country)) +
  geom_point(data = df %>% filter(year=="2020"),
             aes(color=country)) +
  geom_text(data = df %>% filter(year=="2020"),
            aes(label=country,
                color=country),
            hjust = -.2,
            family = font,
            size = 4) +
  coord_cartesian(clip = "off") +
  scale_color_manual(
    values = met.brewer(name = "Paquin",
                        type = "discrete",
                        n= 4 )) +
  scale_y_continuous(limits = c(0,40),
                     expand = c(0,0)) +
  labs(title = "The Rise of the\nNordic Welfare States",
       caption = "Gilbert Fontana | Data: Andersson & Brambor (2019) and Eurostat",
       x = "",
       y = "Central government tax revenue as a share of GDP (%)") +
  theme(
    panel.grid = element_blank(),
    axis.text = element_text(color = txt_col,
                             size = 10),
    axis.title = element_text(color = txt_col,
                              size = 12),
    plot.title = element_text(size = 36,
                              hjust =.5,
                              color = txt_col,
                              face = "bold",
                              margin = margin(0,0,30,0)),
    plot.caption = element_text(hjust = .5,
                                color= txt_col),
    legend.position = "none",
    plot.margin = margin(30, 30, 30, 30),
    plot.background = element_rect(fill = bg,
                                   color = bg)
  )

Visualization walkthrough

Thank you!