South Korea’s Demographic Crisis: The Possible Cause(s) in Declining Birthrate

Author

D’Angelo Francis and Su Yeon Seo, Georgetown University McCourt School of Public Policy

Code
# make sure to load all packages here up top. If you need to install a package, include the commented code here as well :)

library(fontawesome) # install.packages("fontawesome") - for waffle graphs
library(styler) # install.packages("styler") - check for Tidy style guide adherence
library(lintr) # install.packages("lintr") - check for Tidy style guide adherence
library(tidyverse)
library(readxl)
library(haven)
library(tidyverse)
library(patchwork) # easy combine plots
library(tigris) # direct download of shape files from US Cenus 
library(sf) # manipulate shapefiles 
library(tidycensus) # shapefiles from Census 
library(rgeoboundaries) # international boundaries 
library(ggrepel)  # install.packages("ggrepel")
library(janitor)  # install.packages("janitor")
library(ggthemes) # install.packages("ggthemes")
library(crsuggest) # install.packages("crsuggest")


options(scipen=999) # no scientific notation for plots

Our Research Question

How has Gross Regional Domestic Product (GRDP), female labor force participation, and women’s rights affected fertility rates among South Koreans?

Graph 1A and 1B - South Korean Fertility Rates and GDP by Province

Code
# getting geospatial data prepped

rok <- geoboundaries(
  country = "Republic of Korea",
  adm_lvl = "adm1")

rok_cities <- 
  read_sf("data/cities.geojson") |>
  clean_names() |>
  filter(ctry == "South Korea")
  
rok_major_cities <-
  rok_cities |>
  filter(name %in% c("Seoul","Busan","Incheon","Ulsan","Daegu","Daejeon","Gwangju"))

# bringing in fertility data source , cleaning data up

rok_fertility_province_2022 <- 
  read_csv("data/Agespecific_Fertility_Rate_Province__20241008012056.csv") |>
  clean_names() |>
  pivot_longer(
    cols = starts_with("x", ignore.case = TRUE),
    names_to = "year",
    values_to = "fertility_rate"
    ) |>
  rename(shapeName = by_province_1 , age = by_age_1) |>
  select(-by_variant_1) |>
  filter(shapeName != "Whole country", age == "TFR", year == "x2022") |>
  mutate(shapeName = case_when(
    shapeName == "Chungcheongbuk-do" ~ "North Chungcheong",
    shapeName == "Chungcheongnam-do" ~ "South Chungcheong",
    shapeName == "Jeollabuk-do" ~ "North Jeolla",
    shapeName == "Jeollanam-do" ~ "South Jeolla",
    shapeName == "Gyeongsangbuk-do" ~ "North Gyeongsang",
    shapeName == "Gyeongsangnam-do" ~ "South Gyeongsang",
    shapeName == "Jeju-do" ~ "Jeju",
    shapeName == "Gangwon-do" ~ "Gangwon",
    shapeName == "Gyeonggi-do" ~ "Gyeonggi",
    .default = shapeName
    )
  ) 

rok_fertility_shapefile_2022 <- 
  full_join(rok, rok_fertility_province_2022, by = join_by(shapeName)) |>
  st_as_sf(crs =  5178 )

rok_fertility_shapefile_metropolitan_2022 <-
  rok_fertility_shapefile_2022 |>
  filter(shapeName %in% c("Seoul", "Busan", "Daegu", "Incheon", "Gwuangju", "Daejeon", "Ulsan"))

# plot code

korea_fertility_plot_2022 <-
  ggplot() + 
  geom_sf(data = rok_fertility_shapefile_2022, aes(fill = fertility_rate)) +
  geom_sf(data=rok_major_cities)+
  geom_label_repel(data = rok_major_cities, mapping = aes(x = lon, y = lat,label = name), label.size = 0.01, size = 2.5, color = "black", arrow = arrow()) +
  scale_fill_continuous(
    name = "Total Fertility Rate \n Number of Children",
    low = "#fff",
    high = "pink"
  ) +
  theme_void() +
   theme(text = element_text(face = "bold")) +
   labs(title = "South Korea's Fertility Rate",
       subtitle = "South Korea is below replacement rate(2.1 children) nationwide, \n especially in metropolitan areas",
       caption = "Source: Statistics Korea")

korea_fertility_plot_2022

South Korea’s Fertility Rate by Province

To find the variables potentially related to South Korea’s fertility rate, we first analyzed the data from the Korean Statistical Information Service (KOSIS) to see the relationship between “province” (the area of residence) and “fertility rate”. Graph 1A shows that the fertility rate in South Korea is below the replacement rate (2.1 children) nationwide, particularly low in metropolitan areas. The fertility rates are lower in urban areas compared to rural areas, with Seoul having the lowest fertility rate out of all regions.

Code
rok_grdp_2022 <-
  read_excel("data/Korean GRDP Wikipedia Excel import.xlsx") |>
  clean_names() |>
  mutate(region = case_when(
    region == "Gangwon Province, South Korea" ~ "Gangwon",
    region == "Gyeonggi Province" ~ "Gyeonggi",
    region == "Jeju Province" ~ "Jeju",
    region == "North Chungcheong Province" ~ "North Chungcheong",
    region == "North Gyeongsang Province" ~ "North Gyeongsang",
    region == "North Jeolla Province" ~ "North Jeolla",
    region == "South Chungcheong Province" ~ "South Chungcheong",
    region == "South Gyeongsang Province" ~ "South Gyeongsang",
    region == "South Jeolla Province" ~ "South Jeolla",
    .default = region
  )
  ) |> 
  rename(shapeName = region)


# do gsub to get gdp data to numeric

rok_grdp_2022$gdp_won <- 
 as.numeric(gsub("KR₩|trillion", "", rok_grdp_2022$gdp_won))


rok_grdp_2022$gdp_us <-
  as.numeric(gsub("US\\$|billion","",rok_grdp_2022$gdp_us))


# Korean GRDP data source , cleaning data up

rok_gdp_shapefile_2022 <- 
  full_join(rok, rok_grdp_2022, by = join_by(shapeName)) |>
  st_as_sf(crs =  5178)

korea_grdp_plot_2022 <-
  ggplot() + 
  geom_sf(data = rok_gdp_shapefile_2022, aes(fill = gdp_us)) +
  geom_sf(data=rok_major_cities)+
  geom_label_repel(data = rok_major_cities, mapping = aes(x = lon, y = lat,label = name), label.size = 0.01, size = 2.5, color = "black", arrow = arrow()) + 
  scale_fill_gradient(
    name = "Gross Domestic Regional Product (2022 $US)",
    low = "#fff",
    high = "#0F64CD",
    scales::dollar_format()
  ) + 
  theme_void() +
  theme(text = element_text(face = "bold")) +
  labs(title = "South Korea's Gross Domestic Product (GDP) by Region",
       subtitle = "South Korea's Gross Domestic Product (GDP) gets higher as it gets closer to the capital \n(Seoul and Gyeonggi) region",
       caption = "Source: The World Bank")

korea_grdp_plot_2022

South Korea’s 2022 Nominal Gross Regional Domestic Product (GDRP)

We also analyzed the data from The World Bank to see the relationship between “economic productivity (GDP)” and “fertility rate” by region. Graph 1 shows that the urban areas with relatively low fertility rates tend to have a higher gross domestic regional product (GDRP), noticeable around the capital area of Seoul and Gyeonggi province where businesses and infrastructures are concentrated but also with higher level of competition over jobs and regional resources in the higher productivity areas.

Graph 2 - Observing changes in South Korea’s GDP and Fertility Rates over time: 1970 - 2023

Code
# bringing in fertility data over time

rok_fertility_1970_2023 <-
  read_csv("data/Vital_Statistics_of_Korea_20241.csv",
           col_types = cols(`1970` = col_double(), 
                            `1971` = col_double(), 
                            `1972` = col_double(), 
                            `1973` = col_double(), 
                            `1974` = col_double(), 
                            `1975` = col_double(),
                            `1976` = col_double(),
                            `1977` = col_double(),
                            `1978` = col_double(),
                            `1979` = col_double(),
                            `1980` = col_double(),
                            `1981` = col_double(),
                            `1982` = col_double(),
                            `1983` = col_double(),
                            `1984` = col_double(),
                            `1985` = col_double(),
                            `1986` = col_double(),
                            `1987` = col_double(),
                            `1988` = col_double(),
                            `1989` = col_double(),
                            `1990` = col_double(),
                            `1991` = col_double(),
                            `1992` = col_double(),
                            `1993` = col_double(),
                            `1994` = col_double(),
                            `1995` = col_double(),
                            `1996` = col_double(),
                            `1997` = col_double(),
                            `1998` = col_double(),
                            `1999` = col_double(),
                            `2000` = col_double(),
                            `2001` = col_double(),
                            `2002` = col_double(),
                            `2003` = col_double(),
                            `2004` = col_double(),
                            `2005` = col_double(),
                            `2006` = col_double(),
                            `2007` = col_double(),
                            `2008` = col_double(),
                            `2009` = col_double(),
                            `2010` = col_double(),
                            `2011` = col_double(),
                            `2012` = col_double(),
                            `2013` = col_double(),
                            `2014` = col_double(),
                            `2015` = col_double(),
                            `2016` = col_double(),
                            `2017` = col_double(),
                            `2018` = col_double(),
                            `2019` = col_double(),
                            `2020` = col_double(),
                            `2021` = col_double(),
                            `2022` = col_double(),
                            `2023` = col_double())
           ) |>
  pivot_longer(
    cols = 2:55,
    names_to = "year") |>
  pivot_wider(
    names_from = "By items",
    values_from = value
  ) |>
  clean_names() |>
  select(year, total_fertility_rate_persons) 

rok_fertility_1970_2023$year <- as.numeric(rok_fertility_1970_2023$year)


# bringing in global gdp data , cleaning up

world_gdp_2023 <- 
  read_excel("data/gdp_2023.xls") |>
  clean_names() # from janitor() package 

world_gdp_2023 <- 
  world_gdp_2023 |>
  pivot_longer(
    cols = starts_with("x", ignore.case = TRUE),
    names_to = "year",
    values_to = "gdp",
    values_drop_na = TRUE
  ) |>
  select(-indicator_code) # not needed 

world_gdp_2023$year <- as.numeric(gsub("x","",world_gdp_2023$year)) # prep for time series viz 

rok_gdp_1970_2023 <- 
  world_gdp_2023 |>
  filter(country_code == "KOR") |>
  mutate(gdp = gdp/10e6)

rok_gdp_fertility <-
  full_join(x = rok_fertility_1970_2023, y = rok_gdp_1970_2023, by = join_by(year))


# create south korea's fertility-gdp relationship map

ggplot(data = rok_gdp_fertility, aes(x = gdp, y = total_fertility_rate_persons, label = year)) +
  geom_point() +
  geom_text_repel(size = 3) + 
  geom_smooth(se = FALSE) +
  labs(
    title = "The Relationship between \nGross Domestic Product (GDP) & Fertility Rate in South Korea: 1970 - 2023",
    subtitle = str_wrap("Over the past 50 years, South Korea's GDP has increased while the Fertility Rate has decreased"),
    caption = "Source: The World Bank, Korean Statistical Information Service (KOSIS)",
    x = "GDP (US$ - 2023)",
    y = "Total Fertility Rate (Persons)") + 
  theme_minimal() + 
  theme(
    panel.grid = element_blank(),
    axis.line = element_line() 
  ) +
  theme(text = element_text(face = "bold"))

For intuitive comprehension of the relationship between Gross Domestic Product (GDP) and the Fertility Rate in South Korea, We analyzed the data from The World Bank and KOSIS. Graph 2 shows that as the country’s GDP grew over time, its fertility rate decreased in reverse relationship.

Graph 3 - South Korea’s Fertility Rate and Female Employment Rate

Code
# bringing in south korea's female employment rate data , cleaning up

rok_female_employment_rate <-
  read_csv("data/Summary_of_economically_active_pop._by_gender_20241011083457.csv")|>
  select(starts_with("2"))


rok_female_employment_rate <- rok_female_employment_rate[2,] |>
  pivot_longer(
    cols = starts_with("2"),
    names_to = "year",
    values_to = "female employement rate (thousands)"
  )
  
rok_female_employment_rate$year <- as.numeric(rok_female_employment_rate$year)
rok_female_employment_rate$`female employement rate (thousands)` <- 
  as.numeric(rok_female_employment_rate$`female employement rate (thousands)`)
  
rok_fertility_2000_2023 <-
  rok_fertility_1970_2023 |>
  filter(year >1999)
  
rok_fertility_female_employment_rate <-
  full_join(x = rok_fertility_2000_2023, y = rok_female_employment_rate, by = join_by(year))


# create south korea's fertility-female employment rate relationship map
 
ggplot(data = rok_fertility_female_employment_rate, aes(x = `female employement rate (thousands)`, y = total_fertility_rate_persons, label = year)) +
  geom_point() +
  geom_text_repel(size = 3) + 
  geom_smooth(se = FALSE) +
  labs(
    title = "The Relationship between \nFertility Rate & Female Employment Rate in South Korea: 2000 - 2023",
    subtitle = str_wrap("Overtime, South Korea's Fertility Rate has decreased while the Female Employment Rate has increased"),
    caption = "Source: Korean Statistical Information Service (KOSIS)",
    x = "Female Employment Rate (Thousands)",
    y = "Total Fertility Rate (Persons)") + 
  theme_minimal() + 
  theme(
    panel.grid = element_blank(),
    axis.line = element_line() 
  ) +
  theme(text = element_text(face = "bold"))

To discover another potentially related variable to the decreasing fertility rate of South Korea, we also analyzed the relationship between “Female Employment Rate” and “Fertility Rate”. As visually displayed in Graph 3, the country’s fertility rate is negatively affected by the over time increase in female employment rate.

Graph 4A & 4B - Gender Equality Matters

Code
rok_2024_obligation_sentiment <- 
  read_excel("data/rok_2024_obligation_sentiment.xlsx") |>
  pivot_longer(
    cols = 1:5,
    names_to = "question",
    values_to = "percent_agree"
  ) |> 
  filter(question != "total")

ggplot(data = rok_2024_obligation_sentiment) + 
  geom_col(aes(x = factor(question, 
                          levels = c("decide_for_themselves","women_have_obligation","it_depends","refused"),
                          labels = c(str_wrap("Should Women decide for themselves?"),
                                     str_wrap("Do Women have an obligation?"),
                                     str_wrap("Does it depends on the situation?"), 
                                     str_wrap("Don't Know or Refused to Answer"))),
               y = percent_agree), 
           fill = "#0F64CD") + 
  theme_minimal() + 
  theme(
    axis.text.x = element_text(size = 6.5),
    text = element_text(face = "bold")
    )+ 
      labs(
        title = "South Koreans are alright with declining birthrates...?",
        subtitle = "Despite declining birthrates, South Koreans are not \npressuring women into having more children",
        caption = "Source: Pew Research Center Survey 2023: Religion in East Asia Survey Topline Questionanaire",
        x = "Question",
        y = "Percent (%) who agree (N = 2,104)"
      ) 

Despite the alarming decrease in birthrates, visualization 4A seems to suggest that South Koreans are resigned to lower fertility. Graph 4B suggests that gender equality is a aggravating factor in declining birth rates. This makes sense, as the United Nations methodology for measuring gender inequality uses reproductive health, gender empowerment, and labor force participation to create a composite measure.1 From a political lens, this relationship that we observe between the GII and fertility rates serves as ‘unsavory’ ammunition for conservative, far-right South Korean politicians to curb women’s empowerment and gender equality.2

Code
rok_gii <- read_excel("data/rok_hdr_data.xlsx") |>
  pivot_wider(
    names_from = indicator,
    values_from = value
  ) |>
  remove_empty() |>
  clean_names()

rok_gii$year <- as.numeric(rok_gii$year)

rok_gii_fertility <- 
  filter(.data = rok_fertility_1970_2023, year %in% (1990:2022)) |>
  full_join(rok_gii, rok_fertility_1970_2023, by = join_by(year))

ggplot(data = rok_gii_fertility, aes(x = gender_inequality_index_value, y = total_fertility_rate_persons, label = year)) +
  geom_text_repel(max.overlaps = 20) +
  geom_point() + 
  geom_smooth(se = FALSE, color = "#0F64CD") +
  geom_vline(xintercept = .462) +
  annotate("text", x = .455, y = 1.25, label = "World (2022)", angle = 90)+
  scale_x_continuous(n.breaks = 15)+
  labs(
    x = "Gender Inequality Index \n(higher is worse)",
    y = "Total Fertility Rate \n(persons)",
    title = "The Relationship between Gender Equality and Fertility Rates \nin South Korea: 1990 - 2022",
    subtitle = "South Korea's rapidly declining birthrates could be the effect of\n widening gender inequality",
    caption = "Source: KOSIS, United Nations Human Development Report - Gender Inequality Index"
  ) + 
  theme_minimal()+
  theme(
    text = element_text(face = "bold")
  )