# Load necessary libraries
library(readxl)
library(dplyr)
library(stringr)
library(tidyr)
library(countrycode)
library(ggplot2)
library(openxlsx)
library(here)
source(here("utils/theme_and_colors_IMF.R"))Sectoral shares in GDP
Introduction
Analyze GDP shares across different sectors for selected countries using R. We’ll load the necessary libraries, import data from Excel, clean and prepare it, then create a stacked bar chart showing sectoral shares for each country.
Loading Data
First, let’s define the file path and read the specific sheet from the Excel file into a tibble, starting from the third row.
# Define the file path
file_path <- "databases/Download-Shares-countries.xlsx"
# Read the specific sheet into a tibble, starting from the third row
gdp_data <- read.xlsx(xlsxFile = here(file_path), sheet = "Download-Shares-countries", startRow = 3) %>%
as_tibble()Data Cleaning and Transformation
Next, we will clean and transform the data by selecting the relevant columns, converting the CountryID to ISO3 country codes, and filtering out unnecessary rows.
# Select relevant columns and convert CountryID to iso3c
gdp_cleaned <- gdp_data %>%
select(CountryID, Country, IndicatorName, `2022`) %>%
mutate(iso3c = countrycode::countrycode(CountryID, origin = "un", destination = "iso3c")) %>%
select(iso3c, IndicatorName, `2022`) %>%
rename(y2022 = `2022`) %>%
filter(!str_detect(IndicatorName, "ISIC")) %>%
filter(!str_detect(IndicatorName, "Total Value Added")) %>%
filter(iso3c %in% c("GTM", "SLV", "NIC", "HND", "CRI", "PAN", "DOM", "USA"))Warning: There was 1 warning in `mutate()`.
ℹ In argument: `iso3c = countrycode::countrycode(CountryID, origin = "un",
destination = "iso3c")`.
Caused by warning:
! Some values were not matched unambiguously: 200, 230, 412, 530, 720, 736, 810, 835, 836, 886, 890
Now, we will filter and select the relevant rows and columns for our analysis.
# Filter and select relevant rows and columns
sector_data <- gdp_cleaned %>%
filter(IndicatorName %in% c(
"Gross capital formation",
"General government final consumption expenditure",
"Household consumption expenditure (including Non-profit institutions serving households)",
"Exports of goods and services",
"Imports of goods and services"
)) %>%
select(iso3c, IndicatorName, y2022) %>%
filter(!is.na(iso3c)) %>%
pivot_wider(names_from = IndicatorName, values_from = y2022) %>%
rename(
Household_consumption = `Household consumption expenditure (including Non-profit institutions serving households)`,
Government_consumption = `General government final consumption expenditure`,
Investment = `Gross capital formation`,
Exports = `Exports of goods and services`,
Imports = `Imports of goods and services`
)
# Reverse the values of Imports
sector_data$Imports <- -sector_data$ImportsNext, we will pivot the data to long format for easier visualization.
# Pivot to long format
sector_long <- sector_data %>%
pivot_longer(cols = c(Household_consumption, Government_consumption, Investment, Exports, Imports), names_to = "Indicator")We will order the bars by the share of household consumption and ensure the order of the factors in the desired stack order.
# Order bars by the share of household consumption
household_consumption <- sector_long %>%
filter(Indicator == "Household_consumption")
ordering <- household_consumption %>%
arrange(value) %>%
pull(iso3c)
sector_long <- sector_long %>%
mutate(iso3c = factor(iso3c, levels = ordering)) %>%
mutate(Indicator = factor(Indicator, levels = c("Imports", "Exports", "Investment", "Government_consumption", "Household_consumption")))Visualization
Now, we will create a custom color palette and plot the data with the specified order using ggplot2.
# Define custom color palette
custom_colors <- c(
"Exports" = "#e41a1c",
"Investment" = "#FF7F0E",
"Government_consumption" = "#56B4E9",
"Household_consumption" = "#377eb8",
"Imports" = "#4daf4a"
)
# Plot the data with the specified order
figr <- ggplot(sector_long, aes(x = iso3c, y = value, fill = Indicator)) +
geom_col() +
xlab("") + ylab("") +
theme_imf_panel() + # Adjust this to your preferred theme
theme(
legend.position = "bottom",
legend.title = element_blank()
) +
scale_fill_manual(values = custom_colors) +
labs(
title = "Sectoral Shares in GDP, 2022",
subtitle = "(Percentage points)"
) +
geom_text(aes(label = sprintf("%.1f", value)),
position = position_stack(vjust = 0.5),
size = 3,
color = "white")
# Print the plot
print(figr)
Saving the Plot
Finally, we will save the plot as a PNG file with a high resolution.
# Save the plot
ggsave(
here("figures/share-expenditures.png"),
plot = figr,
dpi = 600,
width = 8.5*1.2,
height = 5.5*1.2 ,
units = "in"
)Conclusion
You’ve now seen how to analyze and visualize GDP shares across sectors. We loaded the data, cleaned it, and created a stacked bar chart. You can adapt this approach for other economic indicators and datasets.
You’ve now covered the complete workflow from data loading to saving the plot. Adjust the theme and plot details to suit your preferences.