library(tidyverse)
library(ggalluvial)
library(janitor)
library(scales)
# -------------------------------
# 1. Load data
# -------------------------------
csv_path <- "C:/Users/ijkha/Desktop/work charts/data/newschool_city_programs + centers, labs & institutes.xlsx - programs.csv"
df <- readr::read_csv(csv_path, show_col_types = FALSE) %>%
clean_names()
# -------------------------------
# 2. Create degree level
# -------------------------------
df2 <- df %>%
mutate(
degree_type_low = str_to_lower(str_squish(degree_type)),
level = case_when(
str_detect(degree_type_low, "phd|doctor") ~ "PhD",
str_detect(degree_type_low, "ma|ms|mfa|mph|mpp|march") ~ "Graduate",
str_detect(degree_type_low, "ba|bs|bfa|undergrad") ~ "Undergraduate",
TRUE ~ "Other/Unknown"
),
level = factor(level, levels = c("Undergraduate","Graduate","PhD","Other/Unknown")),
program = str_squish(program),
college = str_squish(college)
) %>%
filter(!is.na(program), !is.na(college))
# -------------------------------
# 3. Flow weights (enrollment or 1)
# -------------------------------
df2 <- df2 %>%
mutate(
enrollment_num = suppressWarnings(as.numeric(enrollment_in_2024_or_last_enrolled)),
weight = if_else(!is.na(enrollment_num) & enrollment_num > 0, enrollment_num, 1)
)
# -------------------------------
# 4. Build alluvial table
# -------------------------------
alluvial_df <- df2 %>%
count(college, level, program, wt = weight, name = "value") %>%
filter(value > 0)
# -------------------------------
# 5. Simplify: Top N programs
# -------------------------------
top_n <- 15
keep_programs <- alluvial_df %>%
group_by(program) %>%
summarise(total = sum(value), .groups = "drop") %>%
slice_max(total, n = top_n) %>%
pull(program)
alluvial_df_big <- alluvial_df %>%
mutate(program = if_else(program %in% keep_programs, program, "Other programs")) %>%
group_by(college, level, program) %>%
summarise(value = sum(value), .groups = "drop") %>%
mutate(
program = forcats::fct_relevel(program, "Other programs", after = Inf)
)
# -------------------------------
# 6. Plot (with black flow borders)
# -------------------------------
wrap_width <- 20
p <- ggplot(
alluvial_df_big,
aes(axis1 = college, axis2 = level, axis3 = program, y = value)
) +
geom_alluvium(
aes(fill = level),
alpha = 0.55,
width = 1/18,
color = "black", # <-- border added
size = 0.15 # <-- border thickness
) +
geom_stratum(
width = 1/11,
color = "grey25",
fill = "grey95"
) +
geom_text(
stat = "stratum",
aes(label = stringr::str_wrap(after_stat(stratum), width = wrap_width)),
size = 3,
lineheight = 0.9
) +
scale_y_continuous(labels = comma) +
labs(
title = "New School Programs by Degree Level",
subtitle = paste0("College → Level → Program (Top ", top_n, " programs; others grouped)"),
x = NULL,
y = "Flow weight",
fill = "Level"
) +
theme_minimal(base_size = 12) +
theme(
panel.grid.major.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.margin = margin(10, 80, 10, 20)
) +
coord_cartesian(clip = "off")
p