library(readxl)
library(dplyr)
library(stringr)
library(tidyr)
library(countrycode)
library(wbstats)
library(here)
library(openxlsx)
library(ggplot2)
library(patchwork)
source(here("utils/theme_and_colors_IMF.R"))Average Annual Contributions to GDP Growth in CAPDR Countries
This document explains the steps to analyze GDP contributions using R. The analysis involves reading GDP and population data, cleaning and processing the data, calculating growth rates, and visualizing the contributions of different sectors to GDP growth.
Load Necessary Libraries
First, we load the necessary libraries for data manipulation, visualization, and reading Excel files.
Define the File Path
We define the file path to the Excel file containing GDP data.
file_path <- here("databases/Download-GDPconstant-USD-countries.xlsx")Read GDP Data
We read the specific sheet from the Excel file into a tibble, skipping the first two lines.
gdp_data <- read_excel(file_path, sheet = "Download-GDPconstant-USD-countr",
skip = 2) %>%
as_tibble()Clean and Process GDP Data
We select relevant columns, convert CountryID to ISO3C codes, and rename columns for clarity. We also filter out rows that contain irrelevant indicators (mainly GDP from the supply side) and select CAPDR countries nad the USA.
gdp_cleaned <- gdp_data %>%
select(CountryID, Country, IndicatorName, `1997`, `2022`) %>%
mutate(iso3c = countrycode(CountryID, origin = "un", destination = "iso3c")) %>%
select(iso3c, IndicatorName, `1997`, `2022`) %>%
rename(y1997 = `1997`, y2022 = `2022`) %>%
filter(!str_detect(IndicatorName, "ISIC")) %>%
filter(!str_detect(IndicatorName, "Total Value Added")) %>%
filter(iso3c %in% c('GTM', 'SLV', 'NIC', 'HND', 'CRI', 'PAN', 'DOM', 'USA'))Load and Process Population Data
We load population data from the World Bank API, filter it for the years 1997 and 2022, and pivot it to a wide format.
population_data <- wb_data(indicator = "SP.POP.TOTL", start_date = 1997,
end_date = 2022) %>%
mutate(year = date) %>%
select(iso3c, year, SP.POP.TOTL) %>%
rename(population = SP.POP.TOTL) %>%
filter(year %in% c(1997, 2022)) %>%
filter(population > 1e6)
population_wide <- population_data %>%
pivot_wider(names_from = year, values_from = population) %>%
rename(pop1997 = `1997`, pop2022 = `2022`)Merge GDP and Population Data
We merge the GDP and population data, and calculate per capita GDP for the years 1997 and 2022.
merged_data <- merge(gdp_cleaned, population_wide, by = "iso3c") %>%
mutate(cap1997 = y1997 / pop1997, cap2022 = y2022 / pop2022)Filter and Select Relevant Rows and Columns
We filter the merged data to keep only relevant indicators and select necessary columns.
indicator_data <- merged_data %>%
filter(IndicatorName %in% c(
"Gross capital formation",
"Exports of goods and services",
"Final consumption expenditure",
"Imports of goods and services",
"Gross Domestic Product (GDP)"
)) %>%
select(iso3c, IndicatorName, cap1997, cap2022)
head(indicator_data) iso3c IndicatorName cap1997 cap2022
1 CRI Final consumption expenditure 6240.266 10608.361
2 CRI Gross capital formation 1261.141 2308.282
3 CRI Exports of goods and services 1922.507 4953.209
4 CRI Imports of goods and services 2139.096 4136.014
5 CRI Gross Domestic Product (GDP) 7085.988 13634.294
6 DOM Exports of goods and services 1073.437 1971.169
Calculate Growth Rates
We calculate the annual growth rates for each indicator.
years <- 2022 - 1997
growth_data <- indicator_data %>%
mutate(
annual_growth_rate = (cap2022 / cap1997)^(1 / years) - 1
)
head(growth_data) iso3c IndicatorName cap1997 cap2022 annual_growth_rate
1 CRI Final consumption expenditure 6240.266 10608.361 0.02145163
2 CRI Gross capital formation 1261.141 2308.282 0.02447415
3 CRI Exports of goods and services 1922.507 4953.209 0.03858189
4 CRI Imports of goods and services 2139.096 4136.014 0.02672484
5 CRI Gross Domestic Product (GDP) 7085.988 13634.294 0.02652443
6 DOM Exports of goods and services 1073.437 1971.169 0.02460834
Separate GDP and Sector Data
We separate the total GDP data from the sector data.
gdp_data <- growth_data %>% filter(IndicatorName == "Gross Domestic Product (GDP)")
head(gdp_data) iso3c IndicatorName cap1997 cap2022 annual_growth_rate
1 CRI Gross Domestic Product (GDP) 7085.988 13634.294 0.02652443
2 DOM Gross Domestic Product (GDP) 3520.539 8729.392 0.03699101
3 GTM Gross Domestic Product (GDP) 3016.865 4380.474 0.01502937
4 HND Gross Domestic Product (GDP) 1815.946 2480.337 0.01254960
5 NIC Gross Domestic Product (GDP) 1317.902 2188.313 0.02049071
6 PAN Gross Domestic Product (GDP) 6484.947 15976.815 0.03672447
sector_data <- growth_data %>% filter(IndicatorName != "Gross Domestic Product (GDP)")
head(sector_data) iso3c IndicatorName cap1997 cap2022 annual_growth_rate
1 CRI Final consumption expenditure 6240.2663 10608.361 0.02145163
2 CRI Gross capital formation 1261.1413 2308.282 0.02447415
3 CRI Exports of goods and services 1922.5075 4953.209 0.03858189
4 CRI Imports of goods and services 2139.0961 4136.014 0.02672484
5 DOM Exports of goods and services 1073.4375 1971.169 0.02460834
6 DOM Gross capital formation 974.7255 2617.848 0.04030930
Calculate Contributions to GDP per Capita Growth
We calculate the contributions of each sector to GDP per capita growth:
contributions <- sector_data %>%
mutate(
annual_contribution = ifelse(IndicatorName == "Imports of goods and services", -annual_growth_rate * share1997, annual_growth_rate * share1997)
) %>%
group_by(iso3c) %>%
summarize(
annual_contribution_total = sum(annual_contribution),
annual_contribution_capital_formation =
sum(ifelse(IndicatorName == "Gross capital formation",
annual_contribution, 0)),
annual_contribution_exports =
sum(ifelse(IndicatorName == "Exports of goods and services",
annual_contribution, 0)),
annual_contribution_consumption =
sum(ifelse(IndicatorName == "Final consumption expenditure",
annual_contribution, 0)),
annual_contribution_imports =
sum(ifelse(IndicatorName == "Imports of goods and services",
annual_contribution, 0))
)
head(contributions)# A tibble: 6 × 6
iso3c annual_contribution_total annual_contribution_c…¹ annual_contribution_…²
<chr> <dbl> <dbl> <dbl>
1 CRI 0.0256 0.00436 0.0105
2 DOM 0.0366 0.0112 0.00750
3 GTM 0.0139 0.00202 0.00115
4 HND 0.0110 0.000169 0.00325
5 NIC 0.0162 0.00150 0.0116
6 PAN 0.0370 0.0122 0.0164
# ℹ abbreviated names: ¹annual_contribution_capital_formation,
# ²annual_contribution_exports
# ℹ 2 more variables: annual_contribution_consumption <dbl>,
# annual_contribution_imports <dbl>
Renaming columns
We rename columns for clarity.
contribution_summary <- contributions %>%
select(iso3c, annual_contribution_total,
annual_contribution_capital_formation,
annual_contribution_exports,
annual_contribution_consumption,
annual_contribution_imports) %>%
rename(
GDP = annual_contribution_total,
Investment = annual_contribution_capital_formation,
Exports = annual_contribution_exports,
Consumption = annual_contribution_consumption,
Imports = annual_contribution_imports
) %>%
arrange(desc(GDP))
# View the contribution summary
print(contribution_summary)# A tibble: 8 × 6
iso3c GDP Investment Exports Consumption Imports
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 PAN 0.0370 0.0122 0.0164 0.0235 -0.0151
2 DOM 0.0366 0.0112 0.00750 0.0287 -0.0108
3 CRI 0.0256 0.00436 0.0105 0.0189 -0.00807
4 NIC 0.0162 0.00150 0.0116 0.0168 -0.0136
5 SLV 0.0160 0.00180 0.00692 0.0168 -0.00958
6 USA 0.0150 0.00386 0.00213 0.0125 -0.00354
7 GTM 0.0139 0.00202 0.00115 0.0164 -0.00569
8 HND 0.0110 0.000169 0.00325 0.0169 -0.00931
Transform Data for Visualization
We transform the data to a long format for visualization with ggplot2.
data_long <- contribution_summary %>%
pivot_longer(cols = c(GDP, Investment, Exports, Consumption, Imports),
names_to = "Sector", values_to = "Value") %>%
mutate(iso3c = factor(iso3c, levels = unique(contribution_summary$iso3c)))
data_long <- data_long %>%
mutate(group = ifelse(Sector %in% c("Consumption", "Investment", "Exports"),
"left", "right"))
# Ensure the order of the factors in the desired stack order
data_long <- data_long %>%
mutate(Sector = factor(Sector,
levels = c("Exports", "Investment", "Consumption", "Imports", "GDP")))
data_long$Value <- 100 * data_long$ValueDefine Custom Color Palette
We define a custom color palette for the sectors.
custom_colors <- c(
"Exports" = "#e41a1c",
"Investment" = "#FF7F0E",
"Consumption" = "#377eb8",
"Imports" = "#4daf4a"
)Plot the Data
We plot the data using ggplot2.
fig <- ggplot(data_long %>% filter(Sector != "GDP"),
aes(x = iso3c, y = Value, fill = Sector)) +
geom_col() +
geom_point(data = data_long %>% filter(Sector == "GDP"),
aes(x = iso3c, y = Value), size = 3.4,
position = position_nudge(y = 0)) +
xlab("") + ylab("") +
theme_imf_panel() +
theme(
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = custom_colors) +
labs(
title = "Contribution to Average Annual Per Capita GDP Growth, 1997-2022",
subtitle = "(Percentage points)"
) +
geom_text(aes(label = sprintf("%.1f", Value)),
position = position_stack(vjust = 0.5),
size = 3, color = "white")
# Print the plot
plot(fig)
We notice that the legend for GDP is not included in the graph. In order to do this, we need to ensure that “GDP” is treated as part of the fill aesthetic in ggplot. However, since we are using geom_point for “GDP,” it does not automatically show up in the legend. To fix this, we can assign a distinct shape aesthetic to differentiate it from the fill aesthetic.
fig <- ggplot(data_long, aes(x = iso3c, y = Value, fill = Sector)) +
geom_col(data = data_long %>% filter(Sector != "GDP")) +
geom_point(data = data_long %>% filter(Sector == "GDP"),
aes(shape = Sector),
size = 3.4,
position = position_nudge(y = 0)) +
xlab("") +
ylab("") +
theme_imf_panel() +
theme(
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = custom_colors) +
scale_shape_manual(values = c(GDP = 16)) + # Customize the point shape for "GDP"
labs(
title = "Contribution to Average Annual Per Capita GDP Growth, 1997-2022",
subtitle = "(Percentage points)"
) +
geom_text(data = data_long %>% filter(Sector != "GDP"),
aes(label = sprintf("%.1f", Value)),
position = position_stack(vjust = 0.5),
size = 3,
color = "white")
# Print the plot
plot(fig)