library(readtext)
library(quanteda)
library(tidyverse)
library(stm)
library(tidytext)
library(haven)
library(data.table)
library(ggridges)
library(viridis)

UNGD data are available on the Harvard Dataverse at https://doi.org/10.7910/DVN/0TJX8Y


DATA_DIR <- "~/Dropbox/Research/UNGDC projects/UN Data/" 

ungd_files <- readtext(paste0(DATA_DIR, "TXT/*"), 
                                 docvarsfrom = "filenames", 
                                 dvsep="_", 
                                 docvarnames = c("Country", "Session", "Year"))


ungd_corpus <- corpus(ungd_files, text_field = "text") 

#Analysis

pres_similarity$Country <- rownames(pres_similarity)

pres_similarity$Country <- str_replace(pres_similarity$Country , ".txt", "") %>%
  str_replace(. , "_\\d{4}", "") %>%
  str_replace(. , "_\\d{2}", "")
#Logicals for EU member states
EU <- c("BEL", "FRA", "DEU", "ITA", "LUX", "NLD")
wave1 <- c("DNK", "IRL", "GBR")
wave2 <- "GRC" 
wave3 <- c("ESP", "PRT") 
wave4 <- c("AUT", "FIN", "SWE") 
wave5 <- c("CZE", "HUN", "POL", "EST", "LVA", "LTU", "CYP", "MLT", "SVK", "SVN")
wave6 <- c("BGR", "ROU") 
wave7 <- "HRV" 
simil <- pres_similarity

simil <- simil %>% mutate(is.eu = Country %in% EU)

simil$waves <- simil$is.eu

#first wave
simil$is.eu[simil$Country== "DNK" & simil$Year >1972] <- TRUE
simil$is.eu[simil$Country== "IRL" & simil$Year >1972] <- TRUE
simil$is.eu[simil$Country== "GBR" & simil$Year >1972] <- TRUE

#second wave
simil$is.eu[simil$Country== "GRC" & simil$Year >1980] <- TRUE

#third wave
simil$is.eu[simil$Country== "ESP" & simil$Year >1985] <- TRUE
simil$is.eu[simil$Country== "PRT" & simil$Year >1985] <- TRUE

#fourth wave
simil$is.eu[simil$Country== "AUT" & simil$Year >1994] <- TRUE
simil$is.eu[simil$Country== "FIN" & simil$Year >1994] <- TRUE
simil$is.eu[simil$Country== "SWE" & simil$Year >1994] <- TRUE

#fifth wave
simil$is.eu[simil$Country== "CZE" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "HUN" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "POL" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "EST" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "LVA" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "LTU" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "CYP" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "MLT" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "SVK" & simil$Year >2003] <- TRUE
simil$is.eu[simil$Country== "SVN" & simil$Year >2003] <- TRUE

#sixth wave
simil$is.eu[simil$Country== "BGR" & simil$Year >2006] <- TRUE
simil$is.eu[simil$Country== "ROU" & simil$Year >2006] <- TRUE

#seventh wave
simil$is.eu[simil$Country== "HRV" & simil$Year >2012] <- TRUE
simil <-  mutate(simil, eu6 = Country %in% EU)
#simil <-  mutate(simil, wave1 = Country %in% wave1)
#simil <-  mutate(simil, wave2 = Country %in% wave2)
#simil <-  mutate(simil, wave3 = Country %in% wave3)
#simil <-  mutate(simil, wave4 = Country %in% wave4)
simil <-  mutate(simil, wave5 = Country %in% wave5)
#simil <-  mutate(simil, wave6 = Country %in% wave6)
#simil <-  mutate(simil, wave7 = Country %in% wave7)
simil <-  mutate(simil, eu9 = eu6)
simil$eu9[simil$Country== "DNK" & simil$Year >1972] <- TRUE
simil$eu9[simil$Country== "IRL" & simil$Year >1972] <- TRUE
simil$eu9[simil$Country== "GBR" & simil$Year >1972] <- TRUE

simil <-  mutate(simil, eu12 = eu9)
simil$eu12[simil$Country== "GRC" & simil$Year >1980] <- TRUE
simil$eu12[simil$Country== "ESP" & simil$Year >1985] <- TRUE
simil$eu12[simil$Country== "PRT" & simil$Year >1985] <- TRUE

simil <-  mutate(simil, eu15 = eu12)
simil$eu15[simil$Country== "AUT" & simil$Year >1994] <- TRUE
simil$eu15[simil$Country== "FIN" & simil$Year >1994] <- TRUE
simil$eu15[simil$Country== "SWE" & simil$Year >1994] <- TRUE
readr::write_csv(simil, "jaccard_similarity.csv")
LS0tCnRpdGxlOiAiamFjY2FyZCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCgoKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KcmVxdWlyZShkZXZ0b29scykKaW5zdGFsbF92ZXJzaW9uKCJxdWFudGVkYSIsIHZlcnNpb24gPSAiMS4yLjAiLCByZXBvcyA9ICJodHRwOi8vY3Jhbi51cy5yLXByb2plY3Qub3JnIikKYGBgCgoKYGBge3IsIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkocmVhZHRleHQpCmxpYnJhcnkocXVhbnRlZGEpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHN0bSkKbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeShoYXZlbikKbGlicmFyeShkYXRhLnRhYmxlKQpsaWJyYXJ5KGdncmlkZ2VzKQpsaWJyYXJ5KHZpcmlkaXMpCmBgYAoKClVOR0QgZGF0YSBhcmUgYXZhaWxhYmxlIG9uIHRoZSBIYXJ2YXJkIERhdGF2ZXJzZSBhdCBodHRwczovL2RvaS5vcmcvMTAuNzkxMC9EVk4vMFRKWDhZCgoKYGBge3J9CgpEQVRBX0RJUiA8LSAifi9Ecm9wYm94L1Jlc2VhcmNoL1VOR0RDIHByb2plY3RzL1VOIERhdGEvIiAKCnVuZ2RfZmlsZXMgPC0gcmVhZHRleHQocGFzdGUwKERBVEFfRElSLCAiVFhULyoiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRvY3ZhcnNmcm9tID0gImZpbGVuYW1lcyIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkdnNlcD0iXyIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkb2N2YXJuYW1lcyA9IGMoIkNvdW50cnkiLCAiU2Vzc2lvbiIsICJZZWFyIikpCgoKdW5nZF9jb3JwdXMgPC0gY29ycHVzKHVuZ2RfZmlsZXMsIHRleHRfZmllbGQgPSAidGV4dCIpIAoKYGBgCgoKCiNBbmFseXNpcwoKCmBgYHtyIEVVIHByZXNpZGVudCwgaW5jbHVkZT1GQUxTRX0KcHJlc2lkZW5jeSA8LSByZWFkeGw6OnJlYWRfZXhjZWwoIi4uL3ByZXNpZGVuY3kueGxzeCIpCgpgYGAKCgpgYGB7ciBzaW1pbGFyaXR5LCBpbmNsdWRlPUZBTFNFfQojRVUgcHJlc2lkZW50CnByZXNfc2ltaWxhcml0eSA8LSBkYXRhLmZyYW1lKCkKCmZvciAoaSBpbiBjKDE5NzA6MjAxNykpIHsKICAKI0NyZWF0aW5nIGNvcnB1cyBmb3IgZWFjaCB5ZWFyCnVuZ2RjLmkgPC0gY29ycHVzX3N1YnNldCh1bmdkX2NvcnB1cywgWWVhcj09aSkKCnRvayA8LSB0b2tlbnModW5nZGMuaSwgd2hhdCA9ICJ3b3JkIiwKICAgICAgICAgICAgICByZW1vdmVfcHVuY3QgPSBUUlVFLAogICAgICAgICAgICAgIHJlbW92ZV9zeW1ib2xzID0gVFJVRSwKICAgICAgICAgICAgICByZW1vdmVfbnVtYmVycyA9IFRSVUUsCiAgICAgICAgICAgICAgcmVtb3ZlX3R3aXR0ZXIgPSBUUlVFLAogICAgICAgICAgICAgIHJlbW92ZV91cmwgPSBUUlVFLAogICAgICAgICAgICAgIHJlbW92ZV9oeXBoZW5zID0gVFJVRSwKICAgICAgICAgICAgICB2ZXJib3NlID0gVFJVRSkKCiMgc3RlbW1pbmcgcmUtaW50cm9kdWNlZCAKZGZtIDwtIGRmbSh0b2ssIAogICAgICAgICAgIHRvbG93ZXIgPSBUUlVFLAogICAgICAgICAgIHJlbW92ZT1zdG9wd29yZHMoImVuZ2xpc2giKSwKICAgICAgICAgICBzdGVtPVRSVUUsIAogICAgICAgICAgIHZlcmJvc2UgPSBUUlVFKQoKI1JlbW92aW5nIGFueSBkaWdpdHMuIGBkZm1gIHBpY2tzIHVwIGFueSBzZXBhcmF0ZWQgZGlnaXRzLCBub3QgZGlnaXRzIHRoYXQgYXJlIHBhcnQgb2YgdG9rZW5zLgojUmVtb3ZpbmcgYW55IHB1bmN0dWF0aW9uLiBgZGZtYCBwaWNrcyB1cCBhbnkgcHVuY3R1YXRpb24gdW5sZXNzIGl0J3MgcGFydCBvZiBhIHRva2VuLgojUmVtb3ZpbmcgYW55IHRva2VucyBsZXNzIHRoYW4gZm91ciBjaGFyYWN0ZXJzLgpkZm0ubSA8LSBkZm1fc2VsZWN0KGRmbSwgYygiW1xcZC1dIiwgIltbOnB1bmN0Ol1dIiwgIl4uezEsM30kIiksIHNlbGVjdGlvbiA9ICJyZW1vdmUiLCAKICAgICAgICAgICAgICAgICAgICB2YWx1ZXR5cGU9InJlZ2V4IiwgdmVyYm9zZSA9IFRSVUUpCgoKI0Ryb3BwaW5nIHdvcmRzIHRoYXQgYXBwZWFyIGxlc3MgdGhhbiA1IHRpbWVzIGFuZCBpbiBsZXNzIHRoYW4gMyBkb2N1bWVudHMuCmRmbS50cmltIDwtIGRmbV90cmltKGRmbS5tLCBtaW5fdGVybWZyZXEgPSA1LCBtaW5fZG9jZnJlcSA9IDMpCgojdGZpZGYgd2VpZ2h0aW5nCmRmbS53IDwtIGRmbV90ZmlkZihkZm0pCgojaG9sZGVycyBmb3IgY291bnRyeSBuYW1lcyBpbiBkaXN0YW5jZSBtZWFzdXJlcyBiZWxvdwpwcmVzIDwtIHBhc3RlMChwcmVzaWRlbmN5JENvdW50cnlfYWx0W3ByZXNpZGVuY3kkWWVhcj09aV0pCgojQ29zaW5lIHNpbWlsYXJpdHkgY2FsY3VsYXRpb25zCgpkb2MgPC0gcGFzdGUwKGRmbS53QERpbW5hbWVzJGRvY3NbZGZtLndAZG9jdmFycyRDb3VudHJ5PT1wcmVzXSkKCnNpbWlsYXJpdGllcy5pIDwtIGFzLmRhdGEuZnJhbWUoYXMubGlzdCh0ZXh0c3RhdF9zaW1pbChkZm0udywgZGZtLndARGltbmFtZXMkZG9jc1tkZm0ud0Bkb2N2YXJzJENvdW50cnk9PXByZXNdLG1hcmdpbiA9ICJkb2N1bWVudHMiLCBtZXRob2QgPSAiZWphY2NhcmQiKSwgc29ydGVkID0gRkFMU0UpWzFdKQoKbmFtZXMoc2ltaWxhcml0aWVzLmkpWzFdIDwtICJQUkVTIgoKc2ltaWxhcml0aWVzLmkkWWVhciA8LSBpCgpwcmVzX3NpbWlsYXJpdHkgPC0gcmJpbmQocHJlc19zaW1pbGFyaXR5LHNpbWlsYXJpdGllcy5pKQoKfQpgYGAKCmBgYHtyIHNpbWlsYXJpdHkgYWRkaW5nIGNvdW50cnkgbmFtZXMgfQpwcmVzX3NpbWlsYXJpdHkkQ291bnRyeSA8LSByb3duYW1lcyhwcmVzX3NpbWlsYXJpdHkpCgpwcmVzX3NpbWlsYXJpdHkkQ291bnRyeSA8LSBzdHJfcmVwbGFjZShwcmVzX3NpbWlsYXJpdHkkQ291bnRyeSAsICIudHh0IiwgIiIpICU+JQogIHN0cl9yZXBsYWNlKC4gLCAiX1xcZHs0fSIsICIiKSAlPiUKICBzdHJfcmVwbGFjZSguICwgIl9cXGR7Mn0iLCAiIikKYGBgCgoKCmBgYHtyfQojTG9naWNhbHMgZm9yIEVVIG1lbWJlciBzdGF0ZXMKRVUgPC0gYygiQkVMIiwgIkZSQSIsICJERVUiLCAiSVRBIiwgIkxVWCIsICJOTEQiKQp3YXZlMSA8LSBjKCJETksiLCAiSVJMIiwgIkdCUiIpCndhdmUyIDwtICJHUkMiIAp3YXZlMyA8LSBjKCJFU1AiLCAiUFJUIikgCndhdmU0IDwtIGMoIkFVVCIsICJGSU4iLCAiU1dFIikgCndhdmU1IDwtIGMoIkNaRSIsICJIVU4iLCAiUE9MIiwgIkVTVCIsICJMVkEiLCAiTFRVIiwgIkNZUCIsICJNTFQiLCAiU1ZLIiwgIlNWTiIpCndhdmU2IDwtIGMoIkJHUiIsICJST1UiKSAKd2F2ZTcgPC0gIkhSViIgCmBgYAoKCgoKCmBgYHtyfQpzaW1pbCA8LSBwcmVzX3NpbWlsYXJpdHkKCnNpbWlsIDwtIHNpbWlsICU+JSBtdXRhdGUoaXMuZXUgPSBDb3VudHJ5ICVpbiUgRVUpCgpzaW1pbCR3YXZlcyA8LSBzaW1pbCRpcy5ldQoKI2ZpcnN0IHdhdmUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJETksiICYgc2ltaWwkWWVhciA+MTk3Ml0gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIklSTCIgJiBzaW1pbCRZZWFyID4xOTcyXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiR0JSIiAmIHNpbWlsJFllYXIgPjE5NzJdIDwtIFRSVUUKCiNzZWNvbmQgd2F2ZQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkdSQyIgJiBzaW1pbCRZZWFyID4xOTgwXSA8LSBUUlVFCgojdGhpcmQgd2F2ZQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkVTUCIgJiBzaW1pbCRZZWFyID4xOTg1XSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiUFJUIiAmIHNpbWlsJFllYXIgPjE5ODVdIDwtIFRSVUUKCiNmb3VydGggd2F2ZQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkFVVCIgJiBzaW1pbCRZZWFyID4xOTk0XSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiRklOIiAmIHNpbWlsJFllYXIgPjE5OTRdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJTV0UiICYgc2ltaWwkWWVhciA+MTk5NF0gPC0gVFJVRQoKI2ZpZnRoIHdhdmUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJDWkUiICYgc2ltaWwkWWVhciA+MjAwM10gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkhVTiIgJiBzaW1pbCRZZWFyID4yMDAzXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiUE9MIiAmIHNpbWlsJFllYXIgPjIwMDNdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJFU1QiICYgc2ltaWwkWWVhciA+MjAwM10gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIkxWQSIgJiBzaW1pbCRZZWFyID4yMDAzXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiTFRVIiAmIHNpbWlsJFllYXIgPjIwMDNdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJDWVAiICYgc2ltaWwkWWVhciA+MjAwM10gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIk1MVCIgJiBzaW1pbCRZZWFyID4yMDAzXSA8LSBUUlVFCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiU1ZLIiAmIHNpbWlsJFllYXIgPjIwMDNdIDwtIFRSVUUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJTVk4iICYgc2ltaWwkWWVhciA+MjAwM10gPC0gVFJVRQoKI3NpeHRoIHdhdmUKc2ltaWwkaXMuZXVbc2ltaWwkQ291bnRyeT09ICJCR1IiICYgc2ltaWwkWWVhciA+MjAwNl0gPC0gVFJVRQpzaW1pbCRpcy5ldVtzaW1pbCRDb3VudHJ5PT0gIlJPVSIgJiBzaW1pbCRZZWFyID4yMDA2XSA8LSBUUlVFCgojc2V2ZW50aCB3YXZlCnNpbWlsJGlzLmV1W3NpbWlsJENvdW50cnk9PSAiSFJWIiAmIHNpbWlsJFllYXIgPjIwMTJdIDwtIFRSVUUKYGBgCgoKYGBge3J9CnNpbWlsIDwtICBtdXRhdGUoc2ltaWwsIGV1NiA9IENvdW50cnkgJWluJSBFVSkKI3NpbWlsIDwtICBtdXRhdGUoc2ltaWwsIHdhdmUxID0gQ291bnRyeSAlaW4lIHdhdmUxKQojc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgd2F2ZTIgPSBDb3VudHJ5ICVpbiUgd2F2ZTIpCiNzaW1pbCA8LSAgbXV0YXRlKHNpbWlsLCB3YXZlMyA9IENvdW50cnkgJWluJSB3YXZlMykKI3NpbWlsIDwtICBtdXRhdGUoc2ltaWwsIHdhdmU0ID0gQ291bnRyeSAlaW4lIHdhdmU0KQpzaW1pbCA8LSAgbXV0YXRlKHNpbWlsLCB3YXZlNSA9IENvdW50cnkgJWluJSB3YXZlNSkKI3NpbWlsIDwtICBtdXRhdGUoc2ltaWwsIHdhdmU2ID0gQ291bnRyeSAlaW4lIHdhdmU2KQojc2ltaWwgPC0gIG11dGF0ZShzaW1pbCwgd2F2ZTcgPSBDb3VudHJ5ICVpbiUgd2F2ZTcpCmBgYAoKYGBge3J9CnNpbWlsIDwtICBtdXRhdGUoc2ltaWwsIGV1OSA9IGV1NikKc2ltaWwkZXU5W3NpbWlsJENvdW50cnk9PSAiRE5LIiAmIHNpbWlsJFllYXIgPjE5NzJdIDwtIFRSVUUKc2ltaWwkZXU5W3NpbWlsJENvdW50cnk9PSAiSVJMIiAmIHNpbWlsJFllYXIgPjE5NzJdIDwtIFRSVUUKc2ltaWwkZXU5W3NpbWlsJENvdW50cnk9PSAiR0JSIiAmIHNpbWlsJFllYXIgPjE5NzJdIDwtIFRSVUUKCnNpbWlsIDwtICBtdXRhdGUoc2ltaWwsIGV1MTIgPSBldTkpCnNpbWlsJGV1MTJbc2ltaWwkQ291bnRyeT09ICJHUkMiICYgc2ltaWwkWWVhciA+MTk4MF0gPC0gVFJVRQpzaW1pbCRldTEyW3NpbWlsJENvdW50cnk9PSAiRVNQIiAmIHNpbWlsJFllYXIgPjE5ODVdIDwtIFRSVUUKc2ltaWwkZXUxMltzaW1pbCRDb3VudHJ5PT0gIlBSVCIgJiBzaW1pbCRZZWFyID4xOTg1XSA8LSBUUlVFCgpzaW1pbCA8LSAgbXV0YXRlKHNpbWlsLCBldTE1ID0gZXUxMikKc2ltaWwkZXUxNVtzaW1pbCRDb3VudHJ5PT0gIkFVVCIgJiBzaW1pbCRZZWFyID4xOTk0XSA8LSBUUlVFCnNpbWlsJGV1MTVbc2ltaWwkQ291bnRyeT09ICJGSU4iICYgc2ltaWwkWWVhciA+MTk5NF0gPC0gVFJVRQpzaW1pbCRldTE1W3NpbWlsJENvdW50cnk9PSAiU1dFIiAmIHNpbWlsJFllYXIgPjE5OTRdIDwtIFRSVUUKCmBgYAoKCmBgYHtyfQpyZWFkcjo6d3JpdGVfY3N2KHNpbWlsLCAiamFjY2FyZF9zaW1pbGFyaXR5LmNzdiIpCmBgYAoKCg==