library(readxl)
library(dplyr)
library(stringr)
library(tidyr)
library(countrycode)
library(wbstats)
library(here)
library(openxlsx)
library(ggplot2)
library(patchwork)
source(here("utils/theme_and_colors_IMF.R"))
Average Annual Contributions to GDP Growth in CAPDR Countries
This document explains the steps to analyze GDP contributions using R. The analysis involves reading GDP and population data, cleaning and processing the data, calculating growth rates, and visualizing the contributions of different sectors to GDP growth.
Load Necessary Libraries
First, we load the necessary libraries for data manipulation, visualization, and reading Excel files.
Define the File Path
We define the file path to the Excel file containing GDP data.
<- here("databases/Download-GDPconstant-USD-countries.xlsx") file_path
Read GDP Data
We read the specific sheet from the Excel file into a tibble, skipping the first two lines.
<- read_excel(file_path, sheet = "Download-GDPconstant-USD-countr",
gdp_data skip = 2) %>%
as_tibble()
Clean and Process GDP Data
We select relevant columns, convert CountryID
to ISO3C codes, and rename columns for clarity. We also filter out rows that contain irrelevant indicators (mainly GDP from the supply side) and select CAPDR countries nad the USA.
<- gdp_data %>%
gdp_cleaned select(CountryID, Country, IndicatorName, `1997`, `2022`) %>%
mutate(iso3c = countrycode(CountryID, origin = "un", destination = "iso3c")) %>%
select(iso3c, IndicatorName, `1997`, `2022`) %>%
rename(y1997 = `1997`, y2022 = `2022`) %>%
filter(!str_detect(IndicatorName, "ISIC")) %>%
filter(!str_detect(IndicatorName, "Total Value Added")) %>%
filter(iso3c %in% c('GTM', 'SLV', 'NIC', 'HND', 'CRI', 'PAN', 'DOM', 'USA'))
Load and Process Population Data
We load population data from the World Bank API, filter it for the years 1997 and 2022, and pivot it to a wide format.
<- wb_data(indicator = "SP.POP.TOTL", start_date = 1997,
population_data end_date = 2022) %>%
mutate(year = date) %>%
select(iso3c, year, SP.POP.TOTL) %>%
rename(population = SP.POP.TOTL) %>%
filter(year %in% c(1997, 2022)) %>%
filter(population > 1e6)
<- population_data %>%
population_wide pivot_wider(names_from = year, values_from = population) %>%
rename(pop1997 = `1997`, pop2022 = `2022`)
Merge GDP and Population Data
We merge the GDP and population data, and calculate per capita GDP for the years 1997 and 2022.
<- merge(gdp_cleaned, population_wide, by = "iso3c") %>%
merged_data mutate(cap1997 = y1997 / pop1997, cap2022 = y2022 / pop2022)
Filter and Select Relevant Rows and Columns
We filter the merged data to keep only relevant indicators and select necessary columns.
<- merged_data %>%
indicator_data filter(IndicatorName %in% c(
"Gross capital formation",
"Exports of goods and services",
"Final consumption expenditure",
"Imports of goods and services",
"Gross Domestic Product (GDP)"
%>%
)) select(iso3c, IndicatorName, cap1997, cap2022)
head(indicator_data)
iso3c IndicatorName cap1997 cap2022
1 CRI Final consumption expenditure 6240.266 10608.361
2 CRI Gross capital formation 1261.141 2308.282
3 CRI Exports of goods and services 1922.507 4953.209
4 CRI Imports of goods and services 2139.096 4136.014
5 CRI Gross Domestic Product (GDP) 7085.988 13634.294
6 DOM Exports of goods and services 1073.437 1971.169
Calculate Growth Rates
We calculate the annual growth rates for each indicator.
<- 2022 - 1997
years <- indicator_data %>%
growth_data mutate(
annual_growth_rate = (cap2022 / cap1997)^(1 / years) - 1
)head(growth_data)
iso3c IndicatorName cap1997 cap2022 annual_growth_rate
1 CRI Final consumption expenditure 6240.266 10608.361 0.02145163
2 CRI Gross capital formation 1261.141 2308.282 0.02447415
3 CRI Exports of goods and services 1922.507 4953.209 0.03858189
4 CRI Imports of goods and services 2139.096 4136.014 0.02672484
5 CRI Gross Domestic Product (GDP) 7085.988 13634.294 0.02652443
6 DOM Exports of goods and services 1073.437 1971.169 0.02460834
Separate GDP and Sector Data
We separate the total GDP data from the sector data.
<- growth_data %>% filter(IndicatorName == "Gross Domestic Product (GDP)")
gdp_data
head(gdp_data)
iso3c IndicatorName cap1997 cap2022 annual_growth_rate
1 CRI Gross Domestic Product (GDP) 7085.988 13634.294 0.02652443
2 DOM Gross Domestic Product (GDP) 3520.539 8729.392 0.03699101
3 GTM Gross Domestic Product (GDP) 3016.865 4380.474 0.01502937
4 HND Gross Domestic Product (GDP) 1815.946 2480.337 0.01254960
5 NIC Gross Domestic Product (GDP) 1317.902 2188.313 0.02049071
6 PAN Gross Domestic Product (GDP) 6484.947 15976.815 0.03672447
<- growth_data %>% filter(IndicatorName != "Gross Domestic Product (GDP)")
sector_data
head(sector_data)
iso3c IndicatorName cap1997 cap2022 annual_growth_rate
1 CRI Final consumption expenditure 6240.2663 10608.361 0.02145163
2 CRI Gross capital formation 1261.1413 2308.282 0.02447415
3 CRI Exports of goods and services 1922.5075 4953.209 0.03858189
4 CRI Imports of goods and services 2139.0961 4136.014 0.02672484
5 DOM Exports of goods and services 1073.4375 1971.169 0.02460834
6 DOM Gross capital formation 974.7255 2617.848 0.04030930
Calculate Contributions to GDP per Capita Growth
We calculate the contributions of each sector to GDP per capita growth:
<- sector_data %>%
contributions mutate(
annual_contribution = ifelse(IndicatorName == "Imports of goods and services", -annual_growth_rate * share1997, annual_growth_rate * share1997)
%>%
) group_by(iso3c) %>%
summarize(
annual_contribution_total = sum(annual_contribution),
annual_contribution_capital_formation =
sum(ifelse(IndicatorName == "Gross capital formation",
0)),
annual_contribution, annual_contribution_exports =
sum(ifelse(IndicatorName == "Exports of goods and services",
0)),
annual_contribution, annual_contribution_consumption =
sum(ifelse(IndicatorName == "Final consumption expenditure",
0)),
annual_contribution, annual_contribution_imports =
sum(ifelse(IndicatorName == "Imports of goods and services",
0))
annual_contribution,
)
head(contributions)
# A tibble: 6 × 6
iso3c annual_contribution_total annual_contribution_c…¹ annual_contribution_…²
<chr> <dbl> <dbl> <dbl>
1 CRI 0.0256 0.00436 0.0105
2 DOM 0.0366 0.0112 0.00750
3 GTM 0.0139 0.00202 0.00115
4 HND 0.0110 0.000169 0.00325
5 NIC 0.0162 0.00150 0.0116
6 PAN 0.0370 0.0122 0.0164
# ℹ abbreviated names: ¹annual_contribution_capital_formation,
# ²annual_contribution_exports
# ℹ 2 more variables: annual_contribution_consumption <dbl>,
# annual_contribution_imports <dbl>
Renaming columns
We rename columns for clarity.
<- contributions %>%
contribution_summary select(iso3c, annual_contribution_total,
annual_contribution_capital_formation,
annual_contribution_exports,
annual_contribution_consumption, %>%
annual_contribution_imports) rename(
GDP = annual_contribution_total,
Investment = annual_contribution_capital_formation,
Exports = annual_contribution_exports,
Consumption = annual_contribution_consumption,
Imports = annual_contribution_imports
%>%
) arrange(desc(GDP))
# View the contribution summary
print(contribution_summary)
# A tibble: 8 × 6
iso3c GDP Investment Exports Consumption Imports
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 PAN 0.0370 0.0122 0.0164 0.0235 -0.0151
2 DOM 0.0366 0.0112 0.00750 0.0287 -0.0108
3 CRI 0.0256 0.00436 0.0105 0.0189 -0.00807
4 NIC 0.0162 0.00150 0.0116 0.0168 -0.0136
5 SLV 0.0160 0.00180 0.00692 0.0168 -0.00958
6 USA 0.0150 0.00388 0.00214 0.0126 -0.00355
7 GTM 0.0139 0.00202 0.00115 0.0164 -0.00569
8 HND 0.0110 0.000169 0.00325 0.0169 -0.00931
Transform Data for Visualization
We transform the data to a long format for visualization with ggplot2
.
<- contribution_summary %>%
data_long pivot_longer(cols = c(GDP, Investment, Exports, Consumption, Imports),
names_to = "Sector", values_to = "Value") %>%
mutate(iso3c = factor(iso3c, levels = unique(contribution_summary$iso3c)))
<- data_long %>%
data_long mutate(group = ifelse(Sector %in% c("Consumption", "Investment", "Exports"),
"left", "right"))
# Ensure the order of the factors in the desired stack order
<- data_long %>%
data_long mutate(Sector = factor(Sector,
levels = c("Exports", "Investment", "Consumption", "Imports", "GDP")))
$Value <- 100 * data_long$Value data_long
Define Custom Color Palette
We define a custom color palette for the sectors.
<- c(
custom_colors "Exports" = "#e41a1c",
"Investment" = "#FF7F0E",
"Consumption" = "#377eb8",
"Imports" = "#4daf4a"
)
Plot the Data
We plot the data using ggplot2
.
<- ggplot(data_long %>% filter(Sector != "GDP"),
fig aes(x = iso3c, y = Value, fill = Sector)) +
geom_col() +
geom_point(data = data_long %>% filter(Sector == "GDP"),
aes(x = iso3c, y = Value), size = 3.4,
position = position_nudge(y = 0)) +
xlab("") + ylab("") +
theme_imf_panel() +
theme(
legend.position = "bottom",
legend.title = element_blank()
+
) scale_fill_manual(values = custom_colors) +
labs(
title = "Contribution to Average Annual Per Capita GDP Growth, 1997-2022",
subtitle = "(Percentage points)"
+
) geom_text(aes(label = sprintf("%.1f", Value)),
position = position_stack(vjust = 0.5),
size = 3, color = "white")
# Print the plot
plot(fig)
We notice that the legend for GDP is not included in the graph. In order to do this, we need to ensure that “GDP” is treated as part of the fill
aesthetic in ggplot
. However, since we are using geom_point
for “GDP,” it does not automatically show up in the legend. To fix this, we can assign a distinct shape
aesthetic to differentiate it from the fill
aesthetic.
<- ggplot(data_long, aes(x = iso3c, y = Value, fill = Sector)) +
fig geom_col(data = data_long %>% filter(Sector != "GDP")) +
geom_point(data = data_long %>% filter(Sector == "GDP"),
aes(shape = Sector),
size = 3.4,
position = position_nudge(y = 0)) +
xlab("") +
ylab("") +
theme_imf_panel() +
theme(
legend.position = "bottom",
legend.title = element_blank()
+
) scale_fill_manual(values = custom_colors) +
scale_shape_manual(values = c(GDP = 16)) + # Customize the point shape for "GDP"
labs(
title = "Contribution to Average Annual Per Capita GDP Growth, 1997-2022",
subtitle = "(Percentage points)"
+
) geom_text(data = data_long %>% filter(Sector != "GDP"),
aes(label = sprintf("%.1f", Value)),
position = position_stack(vjust = 0.5),
size = 3,
color = "white")
# Print the plot
plot(fig)