I was initially going to make this only one post but I realized that there is actually a lot of material to analyze and visualize so I decided to divide this post into two parts. Just like any marriage, part 1 of this series will focus on marriage and part 2 on divorce. I will not write much as the plots speak for themselves, I tried to include some more statistical analysis. Drop me a line if you have any questions or suggestions!

library(tidyverse)
library(reshape2)
library(plyr)
library(stringr)
library(scales)
library(knitr)
library(data.table)
library(sp)
library(rgeos)
library(mgsub)
library(gridExtra)
library(ggrepel)
library(lmtest)
library(pander)
library(MASS)
library(lindia)

Regional Differences

tr_to_en <- function(datafile){
  turkish_letters <- c("Ç","Ş","Ğ","İ","Ü","Ö","ç","ş","ğ","ı","ü","ö")
  english_letters <- c("C","S","G","I","U","O","c","s","g","i","u","o")
  datafile <- mgsub(datafile,turkish_letters,english_letters)
  return(datafile)
}

#Set-up the map

tur <- readRDS("~/Documents/Personal/Projects/Blog/TUR_adm1.rds")

cities <- tur@data[,c("NAME_1", "ID_1")]
colnames(cities)[2]<- "id"
cities[,2] <- as.character(cities[,2])
tur <- gSimplify(tur, tol=0.01, topologyPreserve=TRUE)
tur <- fortify(tur)

map <- left_join(tur, cities, by = "id")

map$NAME_1 <- tr_to_en(map$NAME_1)
map$NAME_1 <- gsub("K. Maras", "Kahramanmaras", map$NAME_1)
map$NAME_1 <- gsub("Kinkkale","Kirikkale", map$NAME_1)
map$NAME_1 <- gsub("Zinguldak", "Zonguldak", map$NAME_1)
map$NAME_1 <- gsub("Afyon","Afyonkarahisar", map$NAME_1)

colnames(map)[8] <- "City"

It seems like residents of Tunceli (both men and women) get married the oldest amongst all Turks. On the other hand, it seems like the women of Agri get married earlier than the rest of the female population. Funnily that is not the case for the men of Agri which says a lot about the mariage dynamics (age difference between bride and groom, this will become more apparent in the second part of this series when I study age differences at the time of divorce).

mariageAge <- fread("~/Documents/Personal/Projects/Blog/data/Ortalama ilk evlenme Yasi.csv")


colnames(mariageAge) <- c("Gender", "Year", "Adana", "Adiyaman", "Afyonkarahisar", "Aksaray", "Amasya", "Ankara", "Antalya", "Ardahan", "Artvin", "Aydin", "Agri", "Balikesir", "Bartin", "Batman", "Bayburt", "Bilecik", "Bingol", "Bitlis", "Bolu", "Burdur", "Bursa", "Denizli", "Diyarbakir", "Duzce", "Edirne", "Elazig", "Erzincan", "Erzurum", "Eskisehir", "Gaziantep", "Giresun", "Gumushane", "Hakkari", "Hatay", "Isparta", "Igdir", "Kahramanmaras", "Karabuk", "Karaman", "Kars", "Kastamonu", "Kayseri", "Kilis", "Kocaeli", "Konya", "Kutahya", "Kirklareli", "Kirikkale", "Kirsehir", "Malatya", "Manisa", "Mardin", "Mersin", "Mugla", "Mus", "Nevsehir", "Nigde", "Ordu", "Osmaniye", "Rize", "Sakarya", "Samsun", "Siirt", "Sinop", "Sivas", "Tekirdag", "Tokat", "Trabzon", "Tunceli", "Usak", "Van", "Yalova", "Yozgat", "Zonguldak", "Canakkale", "Cankiri", "Corum", "Istanbul", "Izmir", "Sanliurfa", "Sirnak")


mariageAge <- melt(mariageAge, id.vars = 1:2 , variable.name = "City")

data <- left_join(map, mariageAge, by = "City")
## Warning: Column `City` joining character vector and factor, coercing into
## character vector
ageYear <- function(year) {
    yearData <- data[data$Year == year,]
    min.women <- yearData[yearData$Gender == "Women",][which.min(yearData[yearData$Gender == "Women",]$value),]
    min.men <- yearData[yearData$Gender  == "Men",][which.min(yearData[yearData$Gender  == "Men",]$value),]
    
    max.women <- yearData[yearData$Gender == "Women",][which.max(yearData[yearData$Gender == "Women",]$value),]
    max.men <- yearData[yearData$Gender  == "Men",][which.max(yearData[yearData$Gender  == "Men",]$value),]
    
    ageMap <- ggplot(data = yearData) + 
    geom_polygon(aes(x= long, y =lat,  group = group, fill = value)) + 
    guides(fill = guide_legend(title = "Age")) + 
    theme(legend.box.margin = margin(c(1,10,1,1))) +  
    scale_fill_distiller(palette = 11, direction = -1 ) + 
    facet_grid(Gender~.)  + coord_fixed(1.3) + theme_bw() +
    theme(axis.line=element_blank(),axis.text.x=element_blank(),
              axis.text.y=element_blank(),axis.ticks=element_blank(),
              axis.title.x=element_blank(),
              axis.title.y=element_blank(), 
          panel.grid.minor=element_blank(), 
          plot.title = element_text( family = "Arial", face = "bold", size = 10 )) + 
    ggtitle(paste("Age at First Marriage in Turkey in ", year), subtitle = "Labels correspond to minimum and maximum ages")  + geom_label_repel(data = rbind(min.men,min.women, max.men, max.women), aes(x = long, y = lat, label = paste(City, " :", value)), size = 2) 
    
    return(ageMap)
}

for (i in seq(2001,2016,2) ){
    grid.arrange(ageYear(i), ageYear(i+1), ncol = 2)
}

ageYear(2017)