# Unsupervised Machine Learning for Clustering in Political and Social Research
# Philip D. Waggoner, pdwaggoner@uchicago.edu

# Section 2

# Note: load packages from Section 1 first

# Munge a bit by subsetting for only the 09-10 legislative session
st <- x %>% 
  filter(sessid == "2009/10") %>% 
  select(-c(fips, stateabv, sessid, mds1, mds2, year)) %>%
  na.omit(st); skim(st)

states <- st$state # will need this later

st_scale <- data.frame(scale(st[,2:5]))
rownames(st_scale) <- states


## Check for clusterability via VAT/ODI plot + Hopkins
# H first
h <- get_clust_tendency(st_scale, nrow(st_scale) - 1)
h$hopkins_stat 

# ODI next
h <- get_clust_tendency(st_scale, # data set
                        nrow(st_scale) - 1,
                        gradient = list(low = "black", 
                                        mid = "gray", 
                                        high = "red")) 

(h_plot <- h$plot + labs(fill = "Range of\nDissimilarity"))