library(readtext)
library(quanteda)
library(tidyverse)
library(stm)
library(tidytext)
library(haven)
library(data.table)
library(ggridges)
library(viridis)

UNGD data are available on the Harvard Dataverse at https://doi.org/10.7910/DVN/0TJX8Y


DATA_DIR <- "~/Dropbox/Research/UNGDC projects/UN Data/" 

ungd_files <- readtext(paste0(DATA_DIR, "TXT/*"), 
                                 docvarsfrom = "filenames", 
                                 dvsep="_", 
                                 docvarnames = c("Country", "Session", "Year"))


ungd_corpus <- corpus(ungd_files, text_field = "text") 

#Analysis

pres_similarity$Country <- rownames(pres_similarity)

pres_similarity$Country <- str_replace(pres_similarity$Country , ".txt", "") %>%
  str_replace(. , "_\\d{4}", "") %>%
  str_replace(. , "_\\d{2}", "")
#Logicals for EU member states
EU <- c("BEL", "FRA", "DEU", "ITA", "LUX", "NLD")
wave1 <- c("DNK", "IRL", "GBR")
wave2 <- "GRC" 
wave3 <- c("ESP", "PRT") 
wave4 <- c("AUT", "FIN", "SWE") 
wave5 <- c("CZE", "HUN", "POL", "EST", "LVA", "LTU", "CYP", "MLT", "SVK", "SVN")
wave6 <- c("BGR", "ROU") 
wave7 <- "HRV" 
simil <- pres_similarity

simil <- simil %>% mutate(is.eu = Country %in% EU)

simil$waves <- simil$is.eu

#first wave
simil$is.eu[simil$Country== "DNK" & simil$Year >1972] <- TRUE
simil$is.eu[simil$Country== "IRL" & simil$Year >1972] <- TRUE
simil$is.eu[simil$Country== "GBR" & simil$Year >1972] <- TRUE

#second wave
simil$is.eu[simil$Country== "GRC" & simil$Year >1980] <- TRUE

#third wave
simil$is.eu[simil$Country== "ESP" & simil$Year >1985] <- TRUE
simil$is.eu[simil$Country== "PRT" & simil$Year >1985] <- TRUE

#fourth wave
simil$is.eu[simil$Country== "AUT" & simil$Year >1994] <- TRUE
simil$is.eu[simil$Country== "FIN" & simil$Year >1994] <- TRUE
simil$is.eu[simil$Country== "SWE" & simil$Year >1994] <- TRUE

#fifth wave
simil$is.eu[simil$Country== "CZE" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "HUN" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "POL" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "EST" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "LVA" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "LTU" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "CYP" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "MLT" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "SVK" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "SVN" & simil$Year >2003] <- TRUE

#sixth wave
simil$is.eu[simil$Country== "BGR" & simil$Year >2006] <- TRUE
simil$is.eu[simil$Country== "ROU" & simil$Year >2006] <- TRUE

#seventh wave
simil$is.eu[simil$Country== "HRV" & simil$Year >2012] <- TRUE
simil <-  mutate(simil, eu6 = Country %in% EU)
#simil <-  mutate(simil, wave1 = Country %in% wave1)
#simil <-  mutate(simil, wave2 = Country %in% wave2)
#simil <-  mutate(simil, wave3 = Country %in% wave3)
#simil <-  mutate(simil, wave4 = Country %in% wave4)
simil <-  mutate(simil, wave5 = Country %in% wave5)
#simil <-  mutate(simil, wave6 = Country %in% wave6)
#simil <-  mutate(simil, wave7 = Country %in% wave7)
simil <-  mutate(simil, eu9 = eu6)
simil$eu9[simil$Country== "DNK" & simil$Year >1972] <- TRUE
simil$eu9[simil$Country== "IRL" & simil$Year >1972] <- TRUE
simil$eu9[simil$Country== "GBR" & simil$Year >1972] <- TRUE

simil <-  mutate(simil, eu12 = eu9)
simil$eu12[simil$Country== "GRC" & simil$Year >1980] <- TRUE
simil$eu12[simil$Country== "ESP" & simil$Year >1985] <- TRUE
simil$eu12[simil$Country== "PRT" & simil$Year >1985] <- TRUE

simil <-  mutate(simil, eu15 = eu12)
simil$eu15[simil$Country== "AUT" & simil$Year >1994] <- TRUE
simil$eu15[simil$Country== "FIN" & simil$Year >1994] <- TRUE
simil$eu15[simil$Country== "SWE" & simil$Year >1994] <- TRUE
readr::write_csv(simil, "cosine_similarity.csv")
LS0tCnRpdGxlOiAiY29zaW5lIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgoKCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQpyZXF1aXJlKGRldnRvb2xzKQppbnN0YWxsX3ZlcnNpb24oInF1YW50ZWRhIiwgdmVyc2lvbiA9ICIxLjIuMCIsIHJlcG9zID0gImh0dHA6Ly9jcmFuLnVzLnItcHJvamVjdC5vcmciKQpgYGAKCgpgYGB7ciwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeShyZWFkdGV4dCkKbGlicmFyeShxdWFudGVkYSkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoc3RtKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KGhhdmVuKQpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkoZ2dyaWRnZXMpCmxpYnJhcnkodmlyaWRpcykKYGBgCgpVTkdEIGRhdGEgYXJlIGF2YWlsYWJsZSBvbiB0aGUgSGFydmFyZCBEYXRhdmVyc2UgYXQgaHR0cHM6Ly9kb2kub3JnLzEwLjc5MTAvRFZOLzBUSlg4WQoKCmBgYHtyfQoKREFUQV9ESVIgPC0gIn4vRHJvcGJveC9SZXNlYXJjaC9VTkdEQyBwcm9qZWN0cy9VTiBEYXRhLyIgCgp1bmdkX2ZpbGVzIDwtIHJlYWR0ZXh0KHBhc3RlMChEQVRBX0RJUiwgIlRYVC8qIiksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkb2N2YXJzZnJvbSA9ICJmaWxlbmFtZXMiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZHZzZXA9Il8iLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZG9jdmFybmFtZXMgPSBjKCJDb3VudHJ5IiwgIlNlc3Npb24iLCAiWWVhciIpKQoKCnVuZ2RfY29ycHVzIDwtIGNvcnB1cyh1bmdkX2ZpbGVzLCB0ZXh0X2ZpZWxkID0gInRleHQiKSAKCmBgYAoKCgojQW5hbHlzaXMKCgpgYGB7ciBFVSBwcmVzaWRlbnQsIGluY2x1ZGU9RkFMU0V9CnByZXNpZGVuY3kgPC0gcmVhZHhsOjpyZWFkX2V4Y2VsKCIuLi9wcmVzaWRlbmN5Lnhsc3giKQoKYGBgCgoKYGBge3Igc2ltaWxhcml0eSwgaW5jbHVkZT1GQUxTRX0KI0VVIHByZXNpZGVudApwcmVzX3NpbWlsYXJpdHkgPC0gZGF0YS5mcmFtZSgpCgpmb3IgKGkgaW4gYygxOTcwOjIwMTcpKSB7CiAgCiNDcmVhdGluZyBjb3JwdXMgZm9yIGVhY2ggeWVhcgp1bmdkYy5pIDwtIGNvcnB1c19zdWJzZXQodW5nZF9jb3JwdXMsIFllYXI9PWkpCgp0b2sgPC0gdG9rZW5zKHVuZ2RjLmksIHdoYXQgPSAid29yZCIsCiAgICAgICAgICAgICAgcmVtb3ZlX3B1bmN0ID0gVFJVRSwKICAgICAgICAgICAgICByZW1vdmVfc3ltYm9scyA9IFRSVUUsCiAgICAgICAgICAgICAgcmVtb3ZlX251bWJlcnMgPSBUUlVFLAogICAgICAgICAgICAgIHJlbW92ZV90d2l0dGVyID0gVFJVRSwKICAgICAgICAgICAgICByZW1vdmVfdXJsID0gVFJVRSwKICAgICAgICAgICAgICByZW1vdmVfaHlwaGVucyA9IFRSVUUsCiAgICAgICAgICAgICAgdmVyYm9zZSA9IFRSVUUpCgojIHN0ZW1taW5nIHJlLWludHJvZHVjZWQgCmRmbSA8LSBkZm0odG9rLCAKICAgICAgICAgICB0b2xvd2VyID0gVFJVRSwKICAgICAgICAgICByZW1vdmU9c3RvcHdvcmRzKCJlbmdsaXNoIiksCiAgICAgICAgICAgc3RlbT1UUlVFLCAKICAgICAgICAgICB2ZXJib3NlID0gVFJVRSkKCiNSZW1vdmluZyBhbnkgZGlnaXRzLiBgZGZtYCBwaWNrcyB1cCBhbnkgc2VwYXJhdGVkIGRpZ2l0cywgbm90IGRpZ2l0cyB0aGF0IGFyZSBwYXJ0IG9mIHRva2Vucy4KI1JlbW92aW5nIGFueSBwdW5jdHVhdGlvbi4gYGRmbWAgcGlja3MgdXAgYW55IHB1bmN0dWF0aW9uIHVubGVzcyBpdCdzIHBhcnQgb2YgYSB0b2tlbi4KI1JlbW92aW5nIGFueSB0b2tlbnMgbGVzcyB0aGFuIGZvdXIgY2hhcmFjdGVycy4KZGZtLm0gPC0gZGZtX3NlbGVjdChkZm0sIGMoIltcXGQtXSIsICJbWzpwdW5jdDpdXSIsICJeLnsxLDN9JCIpLCBzZWxlY3Rpb24gPSAicmVtb3ZlIiwgCiAgICAgICAgICAgICAgICAgICAgdmFsdWV0eXBlPSJyZWdleCIsIHZlcmJvc2UgPSBUUlVFKQoKCiNEcm9wcGluZyB3b3JkcyB0aGF0IGFwcGVhciBsZXNzIHRoYW4gNSB0aW1lcyBhbmQgaW4gbGVzcyB0aGFuIDMgZG9jdW1lbnRzLgpkZm0udHJpbSA8LSBkZm1fdHJpbShkZm0ubSwgbWluX3Rlcm1mcmVxID0gNSwgbWluX2RvY2ZyZXEgPSAzKQoKI3RmaWRmIHdlaWdodGluZwpkZm0udyA8LSBkZm1fdGZpZGYoZGZtKQoKI2hvbGRlcnMgZm9yIGNvdW50cnkgbmFtZXMgaW4gZGlzdGFuY2UgbWVhc3VyZXMgYmVsb3cKcHJlcyA8LSBwYXN0ZTAocHJlc2lkZW5jeSRDb3VudHJ5X2FsdFtwcmVzaWRlbmN5JFllYXI9PWldKQoKI0Nvc2luZSBzaW1pbGFyaXR5IGNhbGN1bGF0aW9ucwoKZG9jIDwtIHBhc3RlMChkZm0ud0BEaW1uYW1lcyRkb2NzW2RmbS53QGRvY3ZhcnMkQ291bnRyeT09cHJlc10pCgpzaW1pbGFyaXRpZXMuaSA8LSBhcy5kYXRhLmZyYW1lKGFzLmxpc3QodGV4dHN0YXRfc2ltaWwoZGZtLncsIGRmbS53QERpbW5hbWVzJGRvY3NbZGZtLndAZG9jdmFycyRDb3VudHJ5PT1wcmVzXSxtYXJnaW4gPSAiZG9jdW1lbnRzIiwgbWV0aG9kID0gImNvc2luZSIpLCBzb3J0ZWQgPSBGQUxTRSlbMV0pCgpuYW1lcyhzaW1pbGFyaXRpZXMuaSlbMV0gPC0gIlBSRVMiCgpzaW1pbGFyaXRpZXMuaSRZZWFyIDwtIGkKCnByZXNfc2ltaWxhcml0eSA8LSByYmluZChwcmVzX3NpbWlsYXJpdHksc2ltaWxhcml0aWVzLmkpCgp9CmBgYAoKYGBge3Igc2ltaWxhcml0eSBhZGRpbmcgY291bnRyeSBuYW1lcyB9CnByZXNfc2ltaWxhcml0eSRDb3VudHJ5IDwtIHJvd25hbWVzKHByZXNfc2ltaWxhcml0eSkKCnByZXNfc2ltaWxhcml0eSRDb3VudHJ5IDwtIHN0cl9yZXBsYWNlKHByZXNfc2ltaWxhcml0eSRDb3VudHJ5ICwgIi50eHQiLCAiIikgJT4lCiAgc3RyX3JlcGxhY2UoLiAsICJfXFxkezR9IiwgIiIpICU+JQogIHN0cl9yZXBsYWNlKC4gLCAiX1xcZHsyfSIsICIiKQpgYGAKCgoKYGBge3J9CiNMb2dpY2FscyBmb3IgRVUgbWVtYmVyIHN0YXRlcwpFVSA8LSBjKCJCRUwiLCAiRlJBIiwgIkRFVSIsICJJVEEiLCAiTFVYIiwgIk5MRCIpCndhdmUxIDwtIGMoIkROSyIsICJJUkwiLCAiR0JSIikKd2F2ZTIgPC0gIkdSQyIgCndhdmUzIDwtIGMoIkVTUCIsICJQUlQiKSAKd2F2ZTQgPC0gYygiQVVUIiwgIkZJTiIsICJTV0UiKSAKd2F2ZTUgPC0gYygiQ1pFIiwgIkhVTiIsICJQT0wiLCAiRVNUIiwgIkxWQSIsICJMVFUiLCAiQ1lQIiwgIk1MVCIsICJTVksiLCAiU1ZOIikKd2F2ZTYgPC0gYygiQkdSIiwgIlJPVSIpIAp3YXZlNyA8LSAiSFJWIiAKYGBgCgoKCgoKYGBge3J9CnNpbWlsIDwtIHByZXNfc2ltaWxhcml0eQoKc2ltaWwgPC0gc2ltaWwgJT4lIG11dGF0ZShpcy5ldSA9IENvdW50cnkgJWluJSBFVSkKCnNpbWlsJHdhdmVzIDwtIHNpbWlsJGlzLmV1CgojZmlyc3Qgd2F2ZQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkROSyIgJiBzaW1pbCRZZWFyID4xOTcyXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiSVJMIiAmIHNpbWlsJFllYXIgPjE5NzJdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJHQlIiICYgc2ltaWwkWWVhciA+MTk3Ml0gPC0gVFJVRQoKI3NlY29uZCB3YXZlCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiR1JDIiAmIHNpbWlsJFllYXIgPjE5ODBdIDwtIFRSVUUKCiN0aGlyZCB3YXZlCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiRVNQIiAmIHNpbWlsJFllYXIgPjE5ODVdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJQUlQiICYgc2ltaWwkWWVhciA+MTk4NV0gPC0gVFJVRQoKI2ZvdXJ0aCB3YXZlCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiQVVUIiAmIHNpbWlsJFllYXIgPjE5OTRdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJGSU4iICYgc2ltaWwkWWVhciA+MTk5NF0gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIlNXRSIgJiBzaW1pbCRZZWFyID4xOTk0XSA8LSBUUlVFCgojZmlmdGggd2F2ZQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkNaRSIgJiBzaW1pbCRZZWFyID4yMDAzXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiSFVOIiAmIHNpbWlsJFllYXIgPjIwMDNdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJQT0wiICYgc2ltaWwkWWVhciA+MjAwM10gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkVTVCIgJiBzaW1pbCRZZWFyID4yMDAzXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiTFZBIiAmIHNpbWlsJFllYXIgPjIwMDNdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJMVFUiICYgc2ltaWwkWWVhciA+MjAwM10gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkNZUCIgJiBzaW1pbCRZZWFyID4yMDAzXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiTUxUIiAmIHNpbWlsJFllYXIgPjIwMDNdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJTVksiICYgc2ltaWwkWWVhciA+MjAwM10gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIlNWTiIgJiBzaW1pbCRZZWFyID4yMDAzXSA8LSBUUlVFCgojc2l4dGggd2F2ZQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkJHUiIgJiBzaW1pbCRZZWFyID4yMDA2XSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiUk9VIiAmIHNpbWlsJFllYXIgPjIwMDZdIDwtIFRSVUUKCiNzZXZlbnRoIHdhdmUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJIUlYiICYgc2ltaWwkWWVhciA+MjAxMl0gPC0gVFJVRQpgYGAKCgpgYGB7cn0Kc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgZXU2ID0gQ291bnRyeSAlaW4lIEVVKQojc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgd2F2ZTEgPSBDb3VudHJ5ICVpbiUgd2F2ZTEpCiNzaW1pbCA8LSAgbXV0YXRlKHNpbWlsLCB3YXZlMiA9IENvdW50cnkgJWluJSB3YXZlMikKI3NpbWlsIDwtICBtdXRhdGUoc2ltaWwsIHdhdmUzID0gQ291bnRyeSAlaW4lIHdhdmUzKQojc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgd2F2ZTQgPSBDb3VudHJ5ICVpbiUgd2F2ZTQpCnNpbWlsIDwtICBtdXRhdGUoc2ltaWwsIHdhdmU1ID0gQ291bnRyeSAlaW4lIHdhdmU1KQojc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgd2F2ZTYgPSBDb3VudHJ5ICVpbiUgd2F2ZTYpCiNzaW1pbCA8LSAgbXV0YXRlKHNpbWlsLCB3YXZlNyA9IENvdW50cnkgJWluJSB3YXZlNykKYGBgCgpgYGB7cn0Kc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgZXU5ID0gZXU2KQpzaW1pbCRldTlbc2ltaWwkQ291bnRyeT09ICJETksiICYgc2ltaWwkWWVhciA+MTk3Ml0gPC0gVFJVRQpzaW1pbCRldTlbc2ltaWwkQ291bnRyeT09ICJJUkwiICYgc2ltaWwkWWVhciA+MTk3Ml0gPC0gVFJVRQpzaW1pbCRldTlbc2ltaWwkQ291bnRyeT09ICJHQlIiICYgc2ltaWwkWWVhciA+MTk3Ml0gPC0gVFJVRQoKc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgZXUxMiA9IGV1OSkKc2ltaWwkZXUxMltzaW1pbCRDb3VudHJ5PT0gIkdSQyIgJiBzaW1pbCRZZWFyID4xOTgwXSA8LSBUUlVFCnNpbWlsJGV1MTJbc2ltaWwkQ291bnRyeT09ICJFU1AiICYgc2ltaWwkWWVhciA+MTk4NV0gPC0gVFJVRQpzaW1pbCRldTEyW3NpbWlsJENvdW50cnk9PSAiUFJUIiAmIHNpbWlsJFllYXIgPjE5ODVdIDwtIFRSVUUKCnNpbWlsIDwtICBtdXRhdGUoc2ltaWwsIGV1MTUgPSBldTEyKQpzaW1pbCRldTE1W3NpbWlsJENvdW50cnk9PSAiQVVUIiAmIHNpbWlsJFllYXIgPjE5OTRdIDwtIFRSVUUKc2ltaWwkZXUxNVtzaW1pbCRDb3VudHJ5PT0gIkZJTiIgJiBzaW1pbCRZZWFyID4xOTk0XSA8LSBUUlVFCnNpbWlsJGV1MTVbc2ltaWwkQ291bnRyeT09ICJTV0UiICYgc2ltaWwkWWVhciA+MTk5NF0gPC0gVFJVRQoKYGBgCgoKYGBge3J9CnJlYWRyOjp3cml0ZV9jc3Yoc2ltaWwsICJjb3NpbmVfc2ltaWxhcml0eS5jc3YiKQpgYGAKCgoKCg==