############################################################
### Cleaning the data and preparing all vars needed      ###
###                                                      ###
### by Lisa Scholten, l.scholten@tudelft.nl              ### 
############################################################

# ---- PREPARE ENVIRONMENT      ----
# ----------------------------------
# cleanup
rm(list = ls()); gc()

library(plyr)
library(dplyr)
library(tidyr)
library(reshape2)
library(lubridate)
library(purrr)
library(Hmisc)
library(ggplot2)
library(stringr)
library(psych)
library(moments) # for skewness and kurtosis

# Set working directory
setwd("WRITE PATH")

# Load user-defined functions
source("Functions_20191121.R")

# Tell where stimuli are saved
stimuli.path <- "WRITE PATH HERE/"

# ---- Import files ---- 
d.q <- read.csv2(file = "questionnaires-2019-11-28.csv", sep = ",", stringsAsFactors = FALSE)
d.t <- read.csv2(file = "tasks-2019-11-28.csv", sep = ",", stringsAsFactors = FALSE)
d.tt <- read.csv2(file = "tot-2019-11-28.csv", sep = ",", stringsAsFactors = FALSE)


# ---- DATA CLEANSING to get files ready for analysis ---

# d.q
# rank information
d.q[d.q == "rank-1"] <- 1
d.q[d.q == "rank-2"] <- 2
d.q[d.q == "rank-3"] <- 3
d.q[d.q == "rank-4"] <- 4
d.q[d.q == "rank-5"] <- 5

# arousal and valence
d.q[d.q == "++++"] <- 4
d.q[d.q == "+++"] <- 3
d.q[d.q == "++"] <- 2
d.q[d.q == "+"] <- 1
d.q[d.q == "0"] <- 0
d.q[d.q == "-"] <- -1
d.q[d.q == "--"] <- -2
d.q[d.q == "---"] <- -3
d.q[d.q == "----"] <- -4


# convert columns to numeric
d.q[,c("bath","cost","ekf","floodh", "floodr")] <- apply(d.q[,c("bath","cost","ekf","floodh", "floodr")], 2, 
                                                         function(x) as.numeric(as.character(x)))

d.q[,c("expbathing", "expcost", "expekf", "expfloodh", "expfloodr")] <- apply(d.q[,c("expbathing",
                                                                                     "expcost",
                                                                                     "expekf",
                                                                                     "expfloodh", 
                                                                                     "expfloodr")], 2,
                                                                              function(x) ifelse(x == "Yes", 1, 0))
d.q$birthyear <- as.numeric(revalue(as.character(d.q$birthyear), 
                                    c("37" = 2019-37, 
                                    "28" = 2019-28,
                                    "23" = 2019-23,
                                    "50" = 2019-50)))


# d.t
#*********
# sapply(d.t, class) # what class is column?
# apply(d.t, 2, unique) # see which values to change

# Don't touch:
# - ID columns "participant.id", "participant.status", "e.cond"

# Across columns
# - rpe, rpv
d.t[d.t == "Choose A"] <- 1
d.t[d.t == "Choose B"] <- 0

# - rpe.why
d.t[d.t == "Increase expected gain"]             <- "e.gain"
d.t[d.t == "Reduce expected loss"]               <- "e.loss"
d.t[d.t == "Increase possibility of gain"]       <- "p.gain"
d.t[d.t == "Reduce possibility of loss"]         <- "p.loss"
d.t[d.t == "A and B are the same to me"]         <- "indiff"
d.t[d.t == "I actually prefer the other option"] <- "detected"

# check for more
# tody <- apply(d.t[,97:136], 2, ifelse(function(i) d.t[,i] %in% reasons, d.t[,i], "etc"))
reasons <- c("e.gain", "e.loss", "p.gain", "p.loss", "indiff", "detected")

etc <- apply(d.t[,c(97:length(d.t))], 2, table)

for(i in 97:length(d.t)){
 d.t[,i] <- ifelse(d.t[,i] %in% reasons, d.t[,i], "etc")
}

# # WRITE COMMENTS
# comments <- d.q[,c("participant.id", "comment")]
# write.csv(comments, file = paste0(getwd(), "/comments-", Sys.Date(), ".csv"), row.names = FALSE)

# apply(d.t, 2, unique) # see which values to change
d.t$e.cond     <- revalue(d.t$e.cond, c("Treatment (icon arrays)" = "icon", "Control (tree)" = "tree"))
d.t$practice.1 <- revalue(d.t$practice.1, c("£ 20" = 20, "£  5" = 5, "£ 50" = 50, "£ -1 (I would lose £1)" = -1))
d.t$practice.2 <- revalue(d.t$practice.2, c("90 / 100" = 90/100, "10 / 100" = 10/100, "30 / 100" = 30/100, "70 / 100" = 70/100))
d.t$practice.3 <- revalue(d.t$practice.3, c("Losing £ 1" = -1, "Winning £ 50" = 50, "Winning £ 1" = 1))
d.t$ct_1_crt   <- revalue(d.t$ct_1_crt, c("0.05" = 5, "0.10" = 10, "0.1" = 10, ".10" = 10, "1.05" = 105,
                                          "£0.10" = 10, "1.00" = 100, "0.50" = 50, "0.5" = 50,
                                          ".1.00" = 100, "2.10" = 210, ".90" = 90, "10p" = 10))
d.t$ct_2_crt   <- revalue(d.t$ct_2_crt, c("5 mins" = 5, "5 minutes" = 5, "100 minutes" = 100))
d.t$ct_3_crt   <- revalue(d.t$ct_3_crt, c("28\\" = 28, "28\"" = 28, "47 days" = 47, "24 days" = 24))
d.t$ct_1_bnt   <- revalue(as.character(d.t$ct_1_bnt), c("1 in 4" = 25, "20%" = 20, "1/4" = 25, "4 in 10" = 40,
                                                        "0.25" = 25, "40#" = 40, "2/10" = 20, "1/3" = 100/3, "10#" = 10,
                                                        "40%" = 40, ".25" = 25, "1/10" = 10))

d.t$ct_2a_bnt  <- revalue(as.character(d.t$ct_2a_bnt), c("not possible to say" = 0, "30%" = 30, "3 in 5" = 60)) # how about NA entry?
# d.t[which(d.t$ct_2a_bnt == ""), ]$ct_2a_bnt <- 0
d.t$ct_2b_bnt  <- revalue(as.character(d.t$ct_2b_bnt), c("20" = 20, "23.3'" = 23.3))
d.t$ct_3_bnt   <- revalue(as.character(d.t$ct_3_bnt), c("0.5" = 50))

# d.t[which(d.t$ct_3_bnt == ""), ]$ct_2a_bnt <- 0
d.t$ct_1_glt   <- revalue(as.character(d.t$ct_1_glt), c("40%" = 40, "04" = 40, "40#" = 40))
d.t$ct_2_glt   <- revalue(as.character(d.t$ct_2_glt), c("40v" = 40, "1.5" = 60, "2.5 times more" = 100,
                                                        "60 more" = 60, "02" = 20, "20 more adults" = 20, "2." = 20))

# Remove NA's from cognitive tasks
cognitives <- c("ct_1_crt", "ct_2_crt", "ct_3_crt",
                "ct_1_bnt", "ct_2a_bnt", "ct_2b_bnt", "ct_3_bnt",
                "ct_1_glt", "ct_2_glt")

# Check for and handle no response (=NA) cases
table(is.na(d.t[,cognitives]))
d.t[cognitives][is.na(d.t[cognitives])] <- 8888


# d.tt
#*********
# Remove NA's introduced to time stamp information by loading delay
# Recalculate from others
# summary(as.numeric(as.character(d.tt$rpe.tot)))
nrow(d.tt[d.tt$rpe.tot == "LOADING DELAY",])
nrow(d.tt[d.tt$rpv.tot == "LOADING DELAY",])
which(d.tt == "LOADING DELAY", arr.ind = T) # find where the "culprits" are

rpe.tot <- subset(d.tt, select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1,
                       rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                       rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                       rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1,
                       rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2,
                       rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                       rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                       rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2))

rpv.tot <- subset(d.tt, select = c(rpv_1_1, rpv_2_1, rpv_5_1, rpv_6_1, rpv_9_1,
                                   rpv_10_1, rpv_11_1, rpv_12_1, rpv_13_1, rpv_14_1,
                                   rpv_15_1, rpv_17_1, rpv_20_1, rpv_21_1,rpv_22_1,
                                   rpv_23_1,rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1,
                                   rpv_1_2, rpv_2_2, rpv_5_2, rpv_6_2, rpv_9_2,
                                   rpv_10_2, rpv_11_2, rpv_12_2, rpv_13_2, rpv_14_2,
                                   rpv_15_2, rpv_17_2, rpv_20_2, rpv_21_2,rpv_22_2,
                                   rpv_23_2,rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))

# sapply(rpe.tot, function(x) table(x))

for(i in 1:length(rpe.tot)){
  rpe.tot[,i] <- as.numeric(as.character(rpe.tot[,i]))
  rpv.tot[,i] <- as.numeric(as.character(rpv.tot[,i]))
}
rpe.tot$sum <- apply(rpe.tot, 1, sum)
rpv.tot$sum <- apply(rpv.tot, 1, sum)

d.tt[d.tt$rpe.tot == "LOADING DELAY",]$rpe.tot <- rpe.tot[d.tt$rpe.tot == "LOADING DELAY",]$sum
d.tt[d.tt$rpv.tot == "LOADING DELAY",]$rpv.tot <- rpv.tot[d.tt$rpv.tot == "LOADING DELAY",]$sum

# Recalculate for gambles practice
d.tt[c(14,144,275),8] <- apply(d.tt[c(14,144,275),5:7], 1, function(x) sum(as.numeric(as.character(x))))

which(d.tt == "LOADING DELAY", arr.ind = T) # find where the "culprits" are

rm(rpe.tot, rpv.tot,i); gc()


# Any further NA triggers?
# sapply(d.tt, class) # what class is column?
# apply(d.tt, 2, function(x) summary(as.numeric(x))) # see which values to change
# quiet_list <- map(d.tt[,c(5:length(d.tt))], quietly(as.numeric))
# quiet_list %>% map("warnings") %>% keep(~ !is.null(.))
# quiet_list %>% keep(~ length(.$warnings) != 0)

apply(is.na(d.tt), 2, which)
d.tt[is.na(d.tt)] <- 0 # assign time = 0 to NA values


# Convert remaining to numeric -- CHECK FOR MORE "LOADING DELAYS" or OTHER NA-TRIGGERS
d.tt[,c(5:length(d.tt))] <- apply(d.tt[,c(5:length(d.tt))], 2, function(x) as.numeric(as.character(x))) # convert columns to numeric


# ---- CHECK SUBMISSIONS FOR APPROVAL ----

# Create data.frame with approval information
approve <- data.frame(
  participant.id = d.q$participant.id,
  complete = d.q$participant.status,
  attend.yes = NA,
  attend.outcome = NA,
  training = NA,
  tot.overall = NA,
  tot.outcomes = NA,
  tot.trials1 = NA,
  tot.trials2 = NA,
  s.per.choice = NA,
  s.per.t1 = NA,
  s.per.t2 = NA,
  glt = NA,
  crt = NA,
  bnt = NA
  )

approve$attend.yes <- ifelse(d.q$attentionchckyes == "Yes", 1, 0)
approve$attend.outcome <- ifelse(d.q$attentionoutcomes == "Greenhouse gas emissions", 1, 0)

# Training
train <- d.t[,c("practice.1", "practice.2", "practice.3")]
train$practice1 <- ifelse(train$practice.1 == "20", 1, 0)
train$practice2 <- ifelse(train$practice.2 == "0.9", 1, 0)
train$practice3 <- ifelse(train$practice.3 == "-1", 1, 0)

train <-  as.data.frame(sapply(train, as.numeric))
approve$training <- train$practice1 + train$practice2 + train$practice3

# Cognitive tests
# Note that there is an issue if participants do not enter numbers in format as stated
# quiet_list <- map(ct, quietly(as.numeric))
# quiet_list %>% map("warnings") %>% keep(~ !is.null(.))
# quiet_list %>% keep(~ length(.$warnings) != 0)

ct <- d.t[,c("ct_1_glt", "ct_2_glt", "ct_1_crt", "ct_2_crt", "ct_3_crt", "ct_1_bnt", "ct_2a_bnt", "ct_2b_bnt", "ct_3_bnt")]
ct <- as.data.frame(sapply(ct, function(x) as.numeric(as.character(x))))

ct$ct_1_glt  <- ifelse(ct$ct_1_glt  == 40, 1, 0)
ct$ct_2_glt  <- ifelse(ct$ct_2_glt  == 20, 1, 0) 
ct$ct_1_crt  <- ifelse(ct$ct_1_crt  == 5, 1, 0)
ct$ct_2_crt  <- ifelse(ct$ct_2_crt  == 5, 1, 0)
ct$ct_3_crt  <- ifelse(ct$ct_3_crt  == 47, 1, 0)
ct$ct_1_bnt  <- ifelse(ct$ct_1_bnt  == 25, 1, 0) # 25
ct$ct_2a_bnt <- ifelse(ct$ct_2a_bnt == 30, 1, 0) # 30
ct$ct_2b_bnt <- ifelse(ct$ct_2b_bnt == 20, 1, 0) # 20
ct$ct_3_bnt  <- ifelse(ct$ct_3_bnt  == 50, 1, 0) # 50
approve$glt   <- ct$ct_1_glt + ct$ct_2_glt
approve$crt   <- ct$ct_1_crt + ct$ct_2_crt + ct$ct_3_crt
approve$bnt   <- ifelse(ct$ct_1_bnt == 0 & ct$ct_2a_bnt == 0, 1, # otherwise 2
                       ifelse(ct$ct_1_bnt == 0 & ct$ct_2a_bnt == 1, 2, # otherwise 3
                              ifelse(ct$ct_1_bnt == 1 & ct$ct_2b_bnt == 1, 4, 
                                     ifelse(ct$ct_1_bnt == 1 & ct$ct_2b_bnt == 0 & ct$ct_3_bnt == 1, 4, 
                                            ifelse(ct$ct_1_bnt == 1 & ct$ct_2b_bnt == 0 & ct$ct_3_bnt == 0, 3, 2)
                                     )
                              )
                       )
)

d.q$glt <- approve$glt
d.q$crt <- approve$crt
d.q$bnt <- approve$bnt - 1 # with rescaling to 0-3 

# Time on tasks
# d.tt[is.na(d.tt)] <- 0
tot <- data.frame(participant.id = d.q$participant.id,
                 intro = d.q$tot.intro,
                 affect = d.q$tot.affect1 +  d.q$tot.affect2 + d.q$tot.affect3,
                 glt = as.numeric(d.tt$ct_1_glt) + as.numeric(d.tt$ct_2_glt),
                 outcomes = d.q$tot.outcomes,
                 practice = d.tt$gp.tot,
                 rpe = d.tt$rpe.tot,
                 rpv = d.tt$rpv.tot,
                 debrief = d.q$tot.debriefing,
                 crt = d.tt$ct_1_crt + d.tt$ct_2_crt + d.tt$ct_3_crt,
                 bnt = d.tt$ct_1_bnt + ifelse(is.null(d.tt$ct_2a_bnt),0,d.tt$ct_2a_bnt) + ifelse(is.null(d.tt$ct_2b_bnt),0,d.tt$ct_2b_bnt) + 
                   ifelse(is.na(d.tt$ct_3_bnt), 0, d.tt$ct_3_bnt),
                 experience = d.q$tot.experience,
                 demo = d.q$tot.demographics,
                 end = d.q$tot.end)
tot$overall <- apply(tot[,-1], 1, function(x) sum(x, na.rm=TRUE))
tot[,-1] <- tot[,-1]/(60*1000)


approve$tot.overall  <- tot$overall
approve$tot.outcomes <- tot$outcomes
approve$tot.trials1  <- tot$rpe # check
approve$tot.trials2  <- tot$rpv
approve$s.per.choice <- (tot$rpe + tot$rpv)/(20*2*2+20*2)*60
approve$s.per.t1 <- tot$rpe/(20*2*2)*60  #check
approve$s.per.t2 <- tot$rpv/(20*2)*60
approve <- bind_cols(approve, tot)


# Approve or reject
approve$approve <- ifelse(approve$attend.yes == 0 | approve$attend.outcome == 0 | approve$training == 0|
                          approve$tot.overall < 18| approve$s.per.choice < 3,
                          0, 1)
# sapply(approve[,c(2:5, 13:length(approve))], table)
# sapply(approve[,c(6:12)], summary)


# Order by participant.id to facilitate subsequent approval and write file
approve <- approve[order(approve$participant.id),]
# write.csv(approve, file = paste0(getwd(), "/approve-", Sys.Date(), ".csv"), na="", row.names = FALSE)

# Check specific participants
# approve[approve$participant.id %in% c("5d204b04b1e50a001a19e385"),]

# ---- REMOVE DISQUALIFIED PARTICIPANTS ----

# (cond1) Number not passing either or both attention tests: 26
# (cond2) Number not passing attention test and answering no training question correctly: 32 
# (cond3) Number not passing attention test and not answering >=2 training question correctly: 51
# (cond4) Number not passing attention test and not answering 3 training question correctly: 108
# (cond5) Number not passing attention test and answering no training question correctly and spending less than 3s per choice: 38

# Using cond2
keep2 <- as.character(approve[-which(approve$attend.yes == 0 | approve$attend.outcome == 0 | approve$training == 0),]$participant.id)

# Using cond3
keep3 <- as.character(approve[-which(approve$attend.yes == 0 | approve$attend.outcome == 0 | approve$training < 2),]$participant.id)

# Using cond4
keep4 <- as.character(approve[-which(approve$attend.yes == 0 | approve$attend.outcome == 0 | approve$training != 3),]$participant.id)

keep <- keep2

d.q <- d.q[d.q$participant.id %in% keep, ]
d.t <- d.t[d.t$participant.id %in% keep, ]
d.tt <- d.tt[d.tt$participant.id %in% keep, ]

# merge questionnaire, task, and time data
table(d.q$participant.id == d.t$participant.id) # make sure order of entries' id's is the same
d <- bind_cols(d.q, d.t[,4:length(d.t)])

rm(tot, train, etc, d.q, d.t)

# ---- Write domain of series 2 to individual results ----
d$dom.2 <- NA
for(i in 1:nrow(d)){
  d[i,]$dom.2 <- ifelse(d[i,]$cost == 1, 
                        c("floodh", "floodr", "cost", "ekf", "bath")[which(d[i,c("floodh", "floodr", "cost", "ekf", "bath")] == 2)],
                        c("floodh", "floodr", "cost", "ekf", "bath")[which(d[i,c("floodh", "floodr", "cost", "ekf", "bath")] == 1)])
}

# Overwrite d$dom.2 for those 19 participants who were mistakenly forced to do bath in version 4
forced <- c("5dc6cb647081384c9ba35dd1", "5dc6cc96f7bfb14d6aa59092",
            "5dc6cc126a24b54c400e918b", "5dc6cbfcd872a44e30c69aed",
            "5dc6cc2cd999de4dcacd9369", "5dc6ce2a301fd14e9318c3cb",
            "5dc6cbd2e4f0ff4d650bc5b4", "5dc6cc04a8901b4df9172de1",
            "5dc6cb918853314dddd41d20", "5dc6caf484b21f4dfd1f876e",
            "5dc6cc1ffbf4744d14d3a622", "5dc6cc650feb414dcf55ceca",
            "5dc6d0510016bb4d2539ec05", "5dc6cbee0550ef4d4604be63",
            "5dc6d0e35f75344cf9abd49a", "5dc6cbff6bba9b4ce25f9603",
            "5dc6cf1bf9d4a04da37b3f59", "5dc6cbdff375a94e273e967c",
            "5dc6cbaf902a984df41aeb00"
            )
d[which(d$participant.id %in% forced),]$dom.2 <- "bath"

doms <- c("bath", "floodr", "floodh", "ekf")
d$dom2.rank <- NA

for(i in 1:length(doms)){
  d[d$dom.2 == doms[i],]$dom2.rank <- d[d$dom.2 == doms[i],doms[i]]
}


# ==== 1 Prepare choice and risk profile data ====
# Load choice risky spreadsheets as used in Gorilla
risky <- vector("list", length = 5)
names(risky) <- c("cost", "bath", "ekf", "floodh", "floodr")

risky$cost <- read.csv2(file = paste0(stimuli.path, "AL-cost-bath-icons_20.csv"), sep = ";", stringsAsFactors = FALSE)
risky$bath <- read.csv2(file = paste0(stimuli.path, "AL-cost-bath-icons_20.csv"), sep = ";", stringsAsFactors = FALSE)
risky$ekf <- read.csv2(file = paste0(stimuli.path, "AL-cost-ekf-icons_20.csv"), sep = ";", stringsAsFactors = FALSE)
risky$floodh <- read.csv2(file = paste0(stimuli.path, "AL-cost-floodh-icons_20.csv"), sep = ";", stringsAsFactors = FALSE)
risky$floodr <- read.csv2(file = paste0(stimuli.path, "AL-cost-floodr-icons_20.csv"), sep = ";", stringsAsFactors = FALSE)


# Keep only lotteries information
## For cost
risky$cost <- risky$cost[which(risky$cost$randomise_blocks == 1 & risky$cost$display == "trials"),
                         c("gamble", "manipulated", "x.A", "px.A", "y.A", "x.B", "px.B", "y.B", "type")]
## For other
risky[2:5] <- llply(risky[2:5], 
                    function(x) x[which(x$randomise_blocks == 2 & x$display == "trials"),
                                  c("gamble", "manipulated", "x.A", "px.A", "y.A", "x.B", "px.B", "y.B", "type")])

## Convert units to numeric 
risky$cost <- as.data.frame(sapply(risky$cost, function(x) gsub(x, pattern = "? ", replacement = "")), stringsAsFactors = F)
risky$bath <- as.data.frame(sapply(risky$bath, function(x) gsub(x, pattern = " days/season", replacement = "")), stringsAsFactors = F)
risky$ekf <- as.data.frame(sapply(risky$ekf, function(x) gsub(x, pattern = " units", replacement = "")), stringsAsFactors = F)
risky$floodh <- as.data.frame(sapply(risky$floodh, function(x) gsub(x, pattern = " % of properties", replacement = "")), stringsAsFactors = F)
risky$floodr <- as.data.frame(sapply(risky$floodr, function(x) gsub(x, pattern = " % of roads", replacement = "")), stringsAsFactors = F)

for (i in 1:length(risky)) risky[[i]][,1:8] <- llply(risky[[i]][,1:8], as.numeric)

## Convert probabilities from percentage to ratio
for (i in 1:length(risky)) risky[[i]][,c("px.A","px.B")] <- llply(risky[[i]][,c("px.A","px.B")], function(x) x/100)


## Order gambles
risky <- llply(risky, function(x) x[order(x$gamble),])
# - check: risky$floodh$gamble

## Compute what rational choice would be if considering EV only
# for (i in 1:length(risky)){
# risky[[i]]$rat <- ifelse(risky[[i]]$x.A*risky[[i]]$px.A + risky[[i]]$y.A*(1-risky[[i]]$px.A) > 
#                            risky[[i]]$x.B*risky[[i]]$px.B + risky[[i]]$y.B*(1-risky[[i]]$px.B), 1, 
#                                ifelse(risky[[i]]$x.A*risky[[i]]$px.A + risky[[i]]$y.A*(1-risky[[i]]$px.A) < 
#                                         risky[[i]]$x.B*risky[[i]]$px.B + risky[[i]]$y.B*(1-risky[[i]]$px.B), 0, 
#                                       2))
# }; rm(i)
for (i in 1:length(risky)){
  risky[[i]]$rat <- ifelse(do.call(ev.gamble, args = list(x = risky[[i]]$x.A,
                                                          p = risky[[i]]$px.A,
                                                          y = risky[[i]]$y.A)) > 
                             do.call(ev.gamble, args = list(x = risky[[i]]$x.B,
                                                            p = risky[[i]]$px.B,
                                                            y = risky[[i]]$y.B)), 1, 
                           ifelse(do.call(ev.gamble, args = list(x = risky[[i]]$x.A,
                                                                 p = risky[[i]]$px.A,
                                                                 y = risky[[i]]$y.A)) < 
                                    do.call(ev.gamble, args = list(x = risky[[i]]$x.B,
                                                                   p = risky[[i]]$px.B,
                                                                   y = risky[[i]]$y.B)), 0, 
                                  2))
}; rm(i)

save(risky, file = "risky_lotteries-2019-12-28.Rdata")

# ==== 2 Create risky choice trial data files ====

risky.icon <- risky.tree <- risky

## Cost
# - make columns of choices and write to cost data, name by participant id

### Across experimental conditions
risky$cost <- bind_cols(risky$cost, as.data.frame(t(subset(d, select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1, # Elicitation
                                                                         rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                                                                         rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                                                                         rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1))),
                                                  stringsAsFactors = F))
risky$cost <- bind_cols(risky$cost, as.data.frame(t(subset(d, select = c(rpv_1_1, rpv_2_1, rpv_5_1, rpv_6_1, rpv_9_1, # Validation
                                                                         rpv_10_1, rpv_11_1, rpv_12_1, rpv_13_1, rpv_14_1,
                                                                         rpv_15_1, rpv_17_1, rpv_20_1, rpv_21_1,rpv_22_1,
                                                                         rpv_23_1,rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1))),
                                                  stringsAsFactors = F))
colnames(risky$cost)[11:(10+nrow(d))] <- paste0(d$participant.id, "_e")
colnames(risky$cost)[(11+nrow(d)):(10+nrow(d)*2)] <- paste0(d$participant.id, "_v")

### Icon format
risky.icon$cost <- bind_cols(risky.icon$cost, 
                             as.data.frame(t(subset(d[d$e.cond == "icon",], 
                                                    select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1, # Elicitation
                                                               rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                                                               rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                                                               rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1))),
                                           stringsAsFactors = F))
risky.icon$cost <- bind_cols(risky.icon$cost, 
                             as.data.frame(t(subset(d[d$e.cond == "icon",], 
                                                    select = c(rpv_1_1, rpv_2_1, rpv_5_1, rpv_6_1, rpv_9_1, # Validation
                                                               rpv_10_1, rpv_11_1, rpv_12_1, rpv_13_1, rpv_14_1,
                                                               rpv_15_1, rpv_17_1, rpv_20_1, rpv_21_1,rpv_22_1,
                                                               rpv_23_1,rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1))),
                                           stringsAsFactors = F))
colnames(risky.icon$cost)[11:(10+nrow(d[d$e.cond == "icon",]))] <- paste0(d[d$e.cond == "icon",]$participant.id, "_e")
colnames(risky.icon$cost)[(11+nrow(d[d$e.cond == "icon",])):(10+nrow(d[d$e.cond == "icon",])*2)] <- paste0(d[d$e.cond == "icon",]$participant.id, "_v")

### tree format
risky.tree$cost <- bind_cols(risky.tree$cost, 
                             as.data.frame(t(subset(d[d$e.cond == "tree",], 
                                                    select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1, # Elicitation
                                                               rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                                                               rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                                                               rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1))),
                                           stringsAsFactors = F))
risky.tree$cost <- bind_cols(risky.tree$cost, 
                             as.data.frame(t(subset(d[d$e.cond == "tree",], 
                                                    select = c(rpv_1_1, rpv_2_1, rpv_5_1, rpv_6_1, rpv_9_1, # Validation
                                                               rpv_10_1, rpv_11_1, rpv_12_1, rpv_13_1, rpv_14_1,
                                                               rpv_15_1, rpv_17_1, rpv_20_1, rpv_21_1,rpv_22_1,
                                                               rpv_23_1,rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1))),
                                           stringsAsFactors = F))
colnames(risky.tree$cost)[11:(10+nrow(d[d$e.cond == "tree",]))] <- paste0(d[d$e.cond == "tree",]$participant.id, "_e")
colnames(risky.tree$cost)[(11+nrow(d[d$e.cond == "tree",])):(10+nrow(d[d$e.cond == "tree",])*2)] <- paste0(d[d$e.cond == "tree",]$participant.id, "_v")

## Other domains

# Separately
# - same as for cost but conditional on dom.2
indy <- c("bath", "ekf",  "floodh", "floodr")

### Across experimental conditions
for(i in 1:length(indy)){
  risky[[indy[i]]] <- bind_cols(risky[[indy[i]]], 
                                as.data.frame(t(subset(d[d$dom.2 == indy[i],], 
                                                       select = c(rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2,
                                                                  rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                                                                  rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                                                                  rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2))),
                                              stringsAsFactors = F))
  risky[[indy[i]]] <- bind_cols(risky[[indy[i]]], 
                                as.data.frame(t(subset(d[d$dom.2 == indy[i],], 
                                                       select = c(rpv_1_2, rpv_2_2, rpv_5_2, rpv_6_2, rpv_9_2,
                                                                  rpv_10_2, rpv_11_2, rpv_12_2, rpv_13_2, rpv_14_2,
                                                                  rpv_15_2, rpv_17_2, rpv_20_2, rpv_21_2,rpv_22_2,
                                                                  rpv_23_2,rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))),
                                              stringsAsFactors = F))
  colnames(risky[[indy[i]]])[11:(10+table(d$dom.2)[[indy[i]]])] <- as.character(paste0(d[d$dom.2 == indy[i],]$participant.id, "_e"))
  colnames(risky[[indy[i]]])[(11+table(d$dom.2)[[indy[i]]]):(10+table(d$dom.2)[[indy[i]]]*2)] <- as.character(paste0(d[d$dom.2 == indy[i],]$participant.id, "_v"))
}; rm(i)


### Icon condition
for(i in 1:length(indy)){
  risky.icon[[indy[i]]] <- bind_cols(risky.icon[[indy[i]]], 
                                     as.data.frame(t(subset(d[d$dom.2 == indy[i] & d$e.cond == "icon",], 
                                                            select = c(rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2,
                                                                       rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                                                                       rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                                                                       rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2))),
                                                   stringsAsFactors = F))
  risky.icon[[indy[i]]] <- bind_cols(risky.icon[[indy[i]]], 
                                     as.data.frame(t(subset(d[d$dom.2 == indy[i] & d$e.cond == "icon",], 
                                                            select = c(rpv_1_2, rpv_2_2, rpv_5_2, rpv_6_2, rpv_9_2,
                                                                       rpv_10_2, rpv_11_2, rpv_12_2, rpv_13_2, rpv_14_2,
                                                                       rpv_15_2, rpv_17_2, rpv_20_2, rpv_21_2,rpv_22_2,
                                                                       rpv_23_2,rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))),
                                                   stringsAsFactors = F))
  colnames(risky.icon[[indy[i]]])[11:(10+table(d[d$e.cond == "icon",]$dom.2)[[indy[i]]])] <- as.character(
    paste0(d[d$dom.2 == indy[i] & d$e.cond == "icon",]$participant.id, "_e"))
  colnames(risky.icon[[indy[i]]])[(11+table(d[d$e.cond == "icon",]$dom.2)[[indy[i]]]):(10+table(d[d$e.cond == "icon",]$dom.2)[[indy[i]]]*2)] <- as.character(
    paste0(d[d$dom.2 == indy[i] & d$e.cond == "icon",]$participant.id, "_v"))
  
  
  ### Tree condition
  risky.tree[[indy[i]]] <- bind_cols(risky.tree[[indy[i]]], 
                                     as.data.frame(t(subset(d[d$dom.2 == indy[i] & d$e.cond == "tree",], 
                                                            select = c(rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2,
                                                                       rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                                                                       rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                                                                       rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2))),
                                                   stringsAsFactors = F))
  risky.tree[[indy[i]]] <- bind_cols(risky.tree[[indy[i]]], 
                                     as.data.frame(t(subset(d[d$dom.2 == indy[i] & d$e.cond == "tree",], 
                                                            select = c(rpv_1_2, rpv_2_2, rpv_5_2, rpv_6_2, rpv_9_2,
                                                                       rpv_10_2, rpv_11_2, rpv_12_2, rpv_13_2, rpv_14_2,
                                                                       rpv_15_2, rpv_17_2, rpv_20_2, rpv_21_2,rpv_22_2,
                                                                       rpv_23_2,rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))),
                                                   stringsAsFactors = F))
  colnames(risky.tree[[indy[i]]])[11:(10+table(d[d$e.cond == "tree",]$dom.2)[[indy[i]]])] <- as.character(paste0(d[d$e.cond == "tree" & d$dom.2 == indy[i],]$participant.id, "_e"))
  colnames(risky.tree[[indy[i]]])[(11+table(d[d$e.cond == "tree",]$dom.2)[[indy[i]]]):(10+table(d[d$e.cond == "tree",]$dom.2)[[indy[i]]]*2)] <- as.character(
    paste0(d[d$dom.2 == indy[i] & d$e.cond == "tree",]$participant.id, "_v"))
  
}; rm(indy, i)

## Convert choices to numeric
for (i in 1:length(risky)) risky[[i]][,c(11:length(risky[[i]]))] <- llply(risky[[i]][,c(11:length(risky[[i]]))], as.numeric)
for (i in 1:length(risky.icon)) risky.icon[[i]][,c(11:length(risky.icon[[i]]))] <- llply(risky.icon[[i]][,c(11:length(risky.icon[[i]]))], as.numeric)
for (i in 1:length(risky.tree)) risky.tree[[i]][,c(11:length(risky.tree[[i]]))] <- llply(risky.tree[[i]][,c(11:length(risky.tree[[i]]))], as.numeric)

# Check results
# lapply(risky, dim)
# table(d$dom.2)*2+10
# bath    ekf floodh floodr 
# 55    199    313     65
# Preference computations to do for 4*298 = 1192 choice sets


# ==== 3 Compute risk preferences ====

# ---- 3.1 Participants 'risk profile to determining participant rank and rank stability ----
# If preference suggests risk seeking assign -1, if risk avoiding assign 1 (as in Frey et al. and Pedroni et al. (2017)). Neutral = 0.
# Following Frey et al. (2017), a risk-averse individual chooses the gamble with smaller probability variance
# - if probability variance is the same, then the one with lower EV
# - $rat = 1 indicates EVA > EVB, $rat = 0 EVA < EVB

### Create support object from which to take results
riski <- risky

for (i in 1:length(riski)){
  for(j in 11:length(riski[[i]])){
    riski[[i]][,j] <- ifelse(riski[[i]][,j] == 1, 
                             # chosen is A
                             # and if probability difference is smaller for chosen, then risk averse
                             ifelse(abs(0.5 - riski[[i]]$px.A) < abs(0.5 - riski[[i]]$px.B), 1,   
                                    # if probability difference is same and EV of chosen alternative is smaller, then risk averse
                                    ifelse(abs(0.5 - riski[[i]]$px.A) == abs(0.5 - riski[[i]]$px.B) & riski[[i]]$rat == 0, 1, 
                                           -1)), # otherwise risk seeking
                             # chosen is B
                             # and if probability difference is smaller for chosen, then risk averse
                             ifelse(abs(0.5 - riski[[i]]$px.A) > abs(0.5 - riski[[i]]$px.B), 1, 
                                    # if probability difference is same and EV of chosen alternative is smaller, then risk averse
                                    ifelse(abs(0.5 - riski[[i]]$px.A) == abs(0.5 - riski[[i]]$px.B) & riski[[i]]$rat == 1, 1, 
                                           -1)) # otherwise risk seeking
    )
    risky[[i]][21,j] <- sum(riski[[i]][,j]) # Overall
    risky[[i]][22,j] <- sum(riski[[i]][,j][3])          # G5
    risky[[i]][23,j] <- sum(riski[[i]][,j][5])          # G9
    risky[[i]][24,j] <- sum(riski[[i]][,j][6])          # G10
    risky[[i]][25,j] <- sum(riski[[i]][,j][14])         # G21
  }
}; rm(riski, i,j)

for (i in 1:length(risky)) risky[[i]][21,]$type <- "risky.sum"
for (i in 1:length(risky)) risky[[i]][22,]$type <- "risky.G5"
for (i in 1:length(risky)) risky[[i]][23,]$type <- "risky.G9"
for (i in 1:length(risky)) risky[[i]][24,]$type <- "risky.G10"
for (i in 1:length(risky)) risky[[i]][25,]$type <- "risky.G21"

# Results: llply(risky, function(x) table(as.vector(unlist(x[21,]))))

### Check if it does as intended
# tody <- risky$bath
# sapply(tody[1:20,11:length(tody)], sum)
# 
# tody.11 <- tody[,11]
# tody.11
# [1] 0       1     0     1     1     1     0     1     1     1     1     1     0     1     0     0     0     1     1    1 

# abs(0.5 - riski$bath$px.A) == abs(0.5 - riski$bath$px.B)
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE

# risky$bath$rat
# [1]  0      0      0    0     0     0     0     0     0     1      1    1     1     1     0     1     0     1     0     1

# abs(0.5 - riski$bath$px.A) < abs(0.5 - riski$bath$px.B)
#[1] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE

# abs(0.5 - riski$bath$px.A) > abs(0.5 - riski$bath$px.B)
# [1] TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE   SUM

# WOULD ASSIGN
# [1] +1     +1     +1     -1     -1    +1   +1    +1    +1    +1   -1   -1     +1     -1   -1     -1     -1   -1    +1    -1    0
# Hurray! tody[21,11] == 0


## Wrangle data into suitable format for writing results back to d
tody <- llply(risky, function(x) melt(x[21, 11:length(x)], variable.name = "participant.id"))
tody.g5 <- llply(risky, function(x) melt(x[22, 11:length(x)], variable.name = "participant.id"))
tody.g9 <- llply(risky, function(x) melt(x[23, 11:length(x)], variable.name = "participant.id"))
tody.g10 <- llply(risky, function(x) melt(x[24, 11:length(x)], variable.name = "participant.id"))
tody.g21 <- llply(risky, function(x) melt(x[25, 11:length(x)], variable.name = "participant.id"))

for(i in 1:length(tody)) {
  # Overall
  tody[[i]]$series <- as.character(sapply(tody[[i]]$participant.id, 
                                          function(x) unlist(strsplit(as.character(x), "_"))[2]))
  tody[[i]]$participant.id <- as.character(sapply(tody[[i]]$participant.id, 
                                                  function(x) unlist(strsplit(as.character(x), "_"))[1]))
  # G5
  tody.g5[[i]]$series <- as.character(sapply(tody.g5[[i]]$participant.id, 
                                             function(x) unlist(strsplit(as.character(x), "_"))[2]))
  tody.g5[[i]]$participant.id <- as.character(sapply(tody.g5[[i]]$participant.id, 
                                                     function(x) unlist(strsplit(as.character(x), "_"))[1]))
  # G9
  tody.g9[[i]]$series <- as.character(sapply(tody.g9[[i]]$participant.id, 
                                             function(x) unlist(strsplit(as.character(x), "_"))[2]))
  tody.g9[[i]]$participant.id <- as.character(sapply(tody.g9[[i]]$participant.id, 
                                                     function(x) unlist(strsplit(as.character(x), "_"))[1]))
  # G10
  tody.g10[[i]]$series <- as.character(sapply(tody.g10[[i]]$participant.id, 
                                              function(x) unlist(strsplit(as.character(x), "_"))[2]))
  tody.g10[[i]]$participant.id <- as.character(sapply(tody.g10[[i]]$participant.id, 
                                                      function(x) unlist(strsplit(as.character(x), "_"))[1]))
  # G21
  tody.g21[[i]]$series <- as.character(sapply(tody.g21[[i]]$participant.id, 
                                              function(x) unlist(strsplit(as.character(x), "_"))[2]))
  tody.g21[[i]]$participant.id <- as.character(sapply(tody.g21[[i]]$participant.id, 
                                                      function(x) unlist(strsplit(as.character(x), "_"))[1]))
}

tody     <- llply(tody, function(x) dcast(x, participant.id ~ series))
tody.g5  <- llply(tody.g5, function(x) dcast(x, participant.id ~ series))
tody.g9  <- llply(tody.g9, function(x) dcast(x, participant.id ~ series))
tody.g10 <- llply(tody.g10, function(x) dcast(x, participant.id ~ series))
tody.g21 <- llply(tody.g21, function(x) dcast(x, participant.id ~ series))


## Write results back to d
d$risk.v_2 <- d$risk.v_1 <- d$risk.e_2 <- d$risk.e_1 <- NA
d$risk.g5.v_2 <- d$risk.g5.v_1 <- d$risk.g5.e_2 <- d$risk.g5.e_1 <- NA
d$risk.g9.v_2 <- d$risk.g9.v_1 <- d$risk.g9.e_2 <- d$risk.g9.e_1 <- NA
d$risk.g10.v_2 <- d$risk.g10.v_1 <- d$risk.g10.e_2 <- d$risk.g10.e_1 <- NA
d$risk.g21.v_2 <- d$risk.g21.v_1 <- d$risk.g21.e_2 <- d$risk.g21.e_1 <- NA

for(i in 1:nrow(tody[[1]])){
  d[d$participant.id == tody$cost$participant.id[i],]$risk.e_1 <- tody$cost[i,]$e
  d[d$participant.id == tody$cost$participant.id[i],]$risk.v_1 <- tody$cost[i,]$v
  
  d[d$participant.id == tody.g5$cost$participant.id[i],]$risk.g5.e_1 <- tody.g5$cost[i,]$e
  d[d$participant.id == tody.g5$cost$participant.id[i],]$risk.g5.v_1 <- tody.g5$cost[i,]$v
  
  d[d$participant.id == tody.g9$cost$participant.id[i],]$risk.g9.e_1 <- tody.g9$cost[i,]$e
  d[d$participant.id == tody.g9$cost$participant.id[i],]$risk.g9.v_1 <- tody.g9$cost[i,]$v
  
  d[d$participant.id == tody.g10$cost$participant.id[i],]$risk.g10.e_1 <- tody.g10$cost[i,]$e
  d[d$participant.id == tody.g10$cost$participant.id[i],]$risk.g10.v_1 <- tody.g10$cost[i,]$v
  
  d[d$participant.id == tody.g21$cost$participant.id[i],]$risk.g21.e_1 <- tody.g21$cost[i,]$e
  d[d$participant.id == tody.g21$cost$participant.id[i],]$risk.g21.v_1 <- tody.g21$cost[i,]$v
}

for(j in 2:length(tody)) {
  for(i in 1:nrow(tody[[j]])){
    d[d$participant.id == tody[[j]]$participant.id[i],]$risk.e_2 <- tody[[j]][i,]$e
    d[d$participant.id == tody[[j]]$participant.id[i],]$risk.v_2 <- tody[[j]][i,]$v
    
    d[d$participant.id == tody.g5[[j]]$participant.id[i],]$risk.g5.e_2 <- tody.g5[[j]][i,]$e
    d[d$participant.id == tody.g5[[j]]$participant.id[i],]$risk.g5.v_2 <- tody.g5[[j]][i,]$v
    
    d[d$participant.id == tody.g9[[j]]$participant.id[i],]$risk.g9.e_2 <- tody.g9[[j]][i,]$e
    d[d$participant.id == tody.g9[[j]]$participant.id[i],]$risk.g9.v_2 <- tody.g9[[j]][i,]$v
    
    d[d$participant.id == tody.g10[[j]]$participant.id[i],]$risk.g10.e_2 <- tody.g10[[j]][i,]$e
    d[d$participant.id == tody.g10[[j]]$participant.id[i],]$risk.g10.v_2 <- tody.g10[[j]][i,]$v
    
    d[d$participant.id == tody.g21[[j]]$participant.id[i],]$risk.g21.e_2 <- tody.g21[[j]][i,]$e
    d[d$participant.id == tody.g21[[j]]$participant.id[i],]$risk.g21.v_2 <- tody.g21[[j]][i,]$v
  }
}

d$risk.e   <- d$risk.e_1 + d$risk.e_2
d$risk.v   <- d$risk.v_1 + d$risk.v_2 

# Change in risk attitude score
d$risk.diff <- -1*(d$risk.e - d$risk.v)




# ---- Export data for parameter estimation ---- 
save(risky, risky.icon, risky.tree , file = "risky-choice-data-2019-12-01.Rdata")

rm(tody, tody.g5, tody.g9, tody.g10, tody.g21, i,j, risky, risky.icon, risky.tree, 
   risk.m.v, risk.m.e, risk.m.easy, risk.m.difficult)

# ==== 4 Generate and wrangle variables into suitable format ====
# Useful functions
# gender <- with(d, table(e.cond, gender))
# aggregate(gender~e.cond, d, count)
# 
# group_by(d, e.cond, gender) %>%
#   summarise(count=n())

## Explanatory variables: overall and per condition
# ==================================================
# Age
age <- addmargins(with(d, table(e.cond, cut(2019-birthyear, breaks = seq(18,68, by = 10)), dnn =c("Condition","Age"))))
dimnames(age)$Condition <- c("Tree", "Icon", "Both")

### Overwrite NA's by median age
d[is.na(d$birthyear),]$birthyear <- median(d$birthyear, na.rm = TRUE)

# Experience
d$exp <- apply(d[,c("expfloodh", "expfloodr", "expcost", "expekf", "expbathing")], 1, sum)

# Gambles training
d$training <- NA
for(i in 1:nrow(d)) d[i,]$training <- approve[approve$participant.id == d[i,]$participant.id, ]$training

# Response time
rpe.tot <- subset(d.tt, select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1,
                                   rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                                   rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                                   rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1,
                                   rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2,
                                   rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                                   rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                                   rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2))

rpv.tot <- subset(d.tt, select = c(rpv_1_1, rpv_2_1, rpv_5_1, rpv_6_1, rpv_9_1,
                                   rpv_10_1, rpv_11_1, rpv_12_1, rpv_13_1, rpv_14_1,
                                   rpv_15_1, rpv_17_1, rpv_20_1, rpv_21_1,rpv_22_1,
                                   rpv_23_1,rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1,
                                   rpv_1_2, rpv_2_2, rpv_5_2, rpv_6_2, rpv_9_2,
                                   rpv_10_2, rpv_11_2, rpv_12_2, rpv_13_2, rpv_14_2,
                                   rpv_15_2, rpv_17_2, rpv_20_2, rpv_21_2,rpv_22_2,
                                   rpv_23_2,rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))


### Overall time taken to complete survey and by part?
d$tot.overall <- NA
for(i in 1:nrow(d)) d[i,]$tot.overall <- approve[approve$participant.id == d[i,]$participant.id, ]$tot.overall

### Response time per choice overall and by 1st series and 2nd series, by treatment condition and outcome domain
rpe.tot$t1 <- rowSums(rpe.tot[,1:40])
rpv.tot$t2 <- rowSums(rpv.tot[,1:40])

rpe.tot$t1_cost  <- rowSums(rpe.tot[,1:20])
rpe.tot$t1_dom.2 <- rowSums(rpe.tot[,21:40])

rpv.tot$t2_cost  <- rowSums(rpv.tot[,1:20])
rpv.tot$t2_dom.2 <- rowSums(rpv.tot[,21:40])

d$s.per.choice.t2 <- d$s.per.choice.t1 <- d$s.per.choice <- NA

d$s.per.choice       <- (rpe.tot$t1 + rpv.tot$t2)/80/1000  # overall
d$s.per.choice.t1    <- rpe.tot$t1/40/1000                 # 1st trial series
d$s.per.choice.t2    <- rpv.tot$t2/40/1000                 # 2nd trial series
d$s.per.choice.cost  <- (rpe.tot$t1_cost + rpv.tot$t2_cost)/40/1000 # all cost choices
d$s.per.choice.dom.2 <- (rpe.tot$t1_dom.2 + rpv.tot$t2_dom.2)/40/1000 # all dom.2 choices


## Dependent variables: overall and per condition
# ==================================================
# CBD concurrent on NM and M trials, overall and by question
trials <- subset(d, select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1, 
                                 rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                                 rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                                 rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1,
                                 rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2,
                                 rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                                 rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                                 rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2))

why <- subset(d, select = c(rpe.why_1_1, rpe.why_2_1, rpe.why_5_1, rpe.why_6_1, rpe.why_9_1, 
                              rpe.why_10_1, rpe.why_11_1, rpe.why_12_1, rpe.why_13_1, rpe.why_14_1,
                              rpe.why_15_1, rpe.why_17_1, rpe.why_20_1, rpe.why_21_1,rpe.why_22_1,
                              rpe.why_23_1,rpe.why_24_1, rpe.why_25_1, rpe.why_26_1, rpe.why_27_1,
                              rpe.why_1_2, rpe.why_2_2, rpe.why_5_2, rpe.why_6_2, rpe.why_9_2,
                              rpe.why_10_2, rpe.why_11_2, rpe.why_12_2, rpe.why_13_2, rpe.why_14_2,
                              rpe.why_15_2, rpe.why_17_2, rpe.why_20_2, rpe.why_21_2,rpe.why_22_2,
                              rpe.why_23_2,rpe.why_24_2, rpe.why_25_2, rpe.why_26_2, rpe.why_27_2))

trials <- as.data.frame(sapply(trials, as.numeric))

cbd <- data.frame(
  manipulated = rep(0,40), 
  p.A         = NA,
  detect      = NA,
  indiff      = NA,
  e.gain      = NA,
  e.loss      = NA,
  p.gain      = NA,
  p.loss      = NA,
  other       = NA)

rownames(cbd) <- colnames(trials)
cbd[c("rpe_5_1", "rpe_5_2", "rpe_9_1", "rpe_9_2",     # 5, 9, 10, 21, first 2 easy, 3 difficult, 4th intermediate
      "rpe_10_1", "rpe_10_2","rpe_21_1", "rpe_21_2"),]$manipulated <- 1

cbd$p.A    <- sapply(trials, sum)/nrow(trials)
cbd$detect <- sapply(why, function(x) sum(str_count(x, "detected"))) #/nrow(cbd)
cbd$indiff <- sapply(why, function(x) sum(str_count(x, "indiff")))   #/nrow(cbd)
cbd$e.gain <- sapply(why, function(x) sum(str_count(x, "e.gain")))   #/nrow(cbd)
cbd$e.loss <- sapply(why, function(x) sum(str_count(x, "e.loss")))   #/nrow(cbd)
cbd$p.gain <- sapply(why, function(x) sum(str_count(x, "p.gain")))   #/nrow(cbd)
cbd$p.loss <- sapply(why, function(x) sum(str_count(x, "p.loss")))   #/nrow(cbd)
cbd$other  <- sapply(why, function(x) sum(str_count(x, "etc")))      #/nrow(cbd)

# Reasons per participant
d$indiff <- apply(why, 1, function(x) sum(str_count(x, "indiff")))
d$e.gain <- apply(why, 1, function(x) sum(str_count(x, "e.gain")))
d$e.loss <- apply(why, 1, function(x) sum(str_count(x, "e.loss")))
d$p.gain <- apply(why, 1, function(x) sum(str_count(x, "p.gain")))
d$p.loss <- apply(why, 1, function(x) sum(str_count(x, "p.loss")))
d$etc    <- apply(why, 1, function(x) sum(str_count(x, "etc")))


# Detections per participant
d$detected <- apply(why, 1, function(x) sum(str_count(x, "detected")))
d$detected.m <- apply(why[,c("rpe.why_5_1", "rpe.why_5_2", 
                             "rpe.why_9_1", "rpe.why_9_2",
                             "rpe.why_10_1", "rpe.why_10_2",
                             "rpe.why_21_1", "rpe.why_21_2")], 
                      1, function(x) sum(str_count(x, "detected")))
d$detected.5_1  <- ifelse(why$rpe.why_5_1 == "detected", 1, 0)
d$detected.9_1  <- ifelse(why$rpe.why_9_1 == "detected", 1, 0)
d$detected.10_1 <- ifelse(why$rpe.why_10_1 == "detected", 1, 0)
d$detected.21_1 <- ifelse(why$rpe.why_21_1 == "detected", 1, 0)

d$detected.5_2  <- ifelse(why$rpe.why_5_2 == "detected", 1, 0)
d$detected.9_2  <- ifelse(why$rpe.why_9_2 == "detected", 1, 0)
d$detected.10_2 <- ifelse(why$rpe.why_10_2 == "detected", 1, 0)
d$detected.21_2 <- ifelse(why$rpe.why_21_2 == "detected", 1, 0)

d$detected.difficult  <- d$detected.5_1 + d$detected.5_2 + d$detected.10_1 + d$detected.10_2
d$detected.easy       <- d$detected.9_1 + d$detected.9_2 + d$detected.21_1 + d$detected.21_2

# Ex-Post detection
d$detect1 <- ifelse(d$detect1 == "Yes", 1, 0)
d$detect2 <- ifelse(d$detect2 == "Yes", 1, 0)
d$detect3 <- ifelse(d$detect3 == "Yes", 1, 0)
d$detect4 <- ifelse(d$detect4 == "Yes", 1, 0)
d$detect5 <- ifelse(d$detect5 == "Yes", 1, 0)

d$detect.post <-   apply(d[,c("detect1","detect2", "detect3")], 1, sum)

# Identify who among the participants detected expost
d$detected.post <- apply(d[,c("detect1","detect2", "detect3", "detect4", "detect5")], 1, 
        function(x) sum(str_count(x, "Yes")))


# ---- Choice consistency ---- 

# Consistency using simple matching coefficient and Pearson's Phi
# # Pearson's Phi (which is about same as Pearson for n > 40) https://en.m.wikipedia.org/wiki/Phi_coefficient
# # Using psych package
# # Interpretation https://www.statisticshowto.datasciencecentral.com/phi-coefficient-mean-square-contingency-coefficient/

## Create contingency tables, then write consistency measures
# - _1: first domain, _2: second domain
d$phi <- d$phi_2 <- d$phi_1 <- d$smc <- d$smc_2 <- d$smc_1 <- NA
d$phi_m <- d$phi_m_2 <- d$phi_m_1 <- d$smc_m <- d$smc_m_2 <- d$smc_m_1 <- NA
d$phi_nm <- d$phi_nm_2 <- d$phi_nm_1 <- d$smc_nm <- d$smc_nm_2 <- d$smc_nm_1 <- NA
d$smc_m_easy <- d$smc_m_difficult <- NA

for(i in 1:nrow(d)){
  # all trials for domain 1
  tody_1 <- as.matrix(table(subset(d[i,], select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1, # Elicitation
                                                     rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                                                     rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                                                     rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1)),
                            subset(d[i,], select = c(rpv_1_1, rpv_2_1, rpv_5_1, rpv_6_1, rpv_9_1, # Validation
                                                     rpv_10_1, rpv_11_1, rpv_12_1, rpv_13_1, rpv_14_1,
                                                     rpv_15_1, rpv_17_1, rpv_20_1, rpv_21_1,rpv_22_1,
                                                     rpv_23_1,rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1))))
  # all trials for domain 2
  tody_2 <- as.matrix(table(subset(d[i,], select = c(rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2, # Elicitation
                                                     rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                                                     rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                                                     rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2)),
                            subset(d[i,], select = c(rpv_1_2, rpv_2_2, rpv_5_2, rpv_6_2, rpv_9_2, # Validation
                                                     rpv_10_2, rpv_11_2, rpv_12_2, rpv_13_2, rpv_14_2,
                                                     rpv_15_2, rpv_17_2, rpv_20_2, rpv_21_2,rpv_22_2,
                                                     rpv_23_2,rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))))
  # all trials
  tody_both <- as.matrix(table(subset(d[i,], select = c(rpe_1_1, rpe_2_1, rpe_5_1, rpe_6_1, rpe_9_1, # Elicitation
                                                      rpe_10_1, rpe_11_1, rpe_12_1, rpe_13_1, rpe_14_1,
                                                      rpe_15_1, rpe_17_1, rpe_20_1, rpe_21_1,rpe_22_1,
                                                      rpe_23_1,rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1,
                                                      rpe_1_2, rpe_2_2, rpe_5_2, rpe_6_2, rpe_9_2, # Elicitation
                                                      rpe_10_2, rpe_11_2, rpe_12_2, rpe_13_2, rpe_14_2,
                                                      rpe_15_2, rpe_17_2, rpe_20_2, rpe_21_2,rpe_22_2,
                                                      rpe_23_2,rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2)),
                             subset(d[i,], select = c(rpv_1_1, rpv_2_1, rpv_5_1, rpv_6_1, rpv_9_1, # Validation
                                                      rpv_10_1, rpv_11_1, rpv_12_1, rpv_13_1, rpv_14_1,
                                                      rpv_15_1, rpv_17_1, rpv_20_1, rpv_21_1,rpv_22_1,
                                                      rpv_23_1,rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1,
                                                      rpv_1_2, rpv_2_2, rpv_5_2, rpv_6_2, rpv_9_2, # Validation
                                                      rpv_10_2, rpv_11_2, rpv_12_2, rpv_13_2, rpv_14_2,
                                                      rpv_15_2, rpv_17_2, rpv_20_2, rpv_21_2,rpv_22_2,
                                                      rpv_23_2,rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))))
  
  # all manipulated trials, domain 1
  tody_m_1 <- as.matrix(table(factor(subset(d[i,], select = c(rpe_5_1, rpe_9_1, rpe_10_1, rpe_21_1)), lev = 0:1),
                              factor(subset(d[i,], select = c(rpv_5_1, rpv_9_1, rpv_10_1, rpv_21_1)), lev = 0:1)))
  
  # all manipulated trials, domain 2
  tody_m_2 <- as.matrix(table(factor(subset(d[i,], select = c(rpe_5_2, rpe_9_2, rpe_10_2, rpe_21_2)), lev = 0:1),
                              factor(subset(d[i,], select = c(rpv_5_2, rpv_9_2, rpv_10_2, rpv_21_2)), lev = 0:1)))
  
  # all manipulated trials
  tody_m <- as.matrix(table(factor(subset(d[i,], select = c(rpe_5_1, rpe_9_1, rpe_10_1, rpe_21_1,
                                                     rpe_5_2, rpe_9_2, rpe_10_2, rpe_21_2)), lev = 0:1),
                            factor(subset(d[i,], select = c(rpv_5_1, rpv_9_1, rpv_10_1, rpv_21_1,
                                                     rpv_5_2, rpv_9_2, rpv_10_2, rpv_21_2)), lev = 0:1)))
  
  # all non-manipulated trials, domain 1
  tody_nm_1 <- as.matrix(table(subset(d[i,], select = c(rpe_1_1, rpe_2_1, rpe_6_1, rpe_11_1, 
                                                      rpe_12_1, rpe_13_1, rpe_14_1, rpe_15_1, 
                                                      rpe_17_1, rpe_20_1,rpe_22_1, rpe_23_1,
                                                      rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1)),
                             subset(d[i,], select = c(rpv_1_1, rpv_2_1, rpv_6_1, rpv_11_1, 
                                                      rpv_12_1, rpv_13_1, rpv_14_1, rpv_15_1, 
                                                      rpv_17_1, rpv_20_1,rpv_22_1, rpv_23_1,
                                                      rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1))))
  
  # all non-manipulated trials, domain 2
  tody_nm_2 <- as.matrix(table(subset(d[i,], select = c(rpe_1_2, rpe_2_2, rpe_6_2, rpe_11_2, 
                                                        rpe_12_2, rpe_13_2, rpe_14_2, rpe_15_2, 
                                                        rpe_17_2, rpe_20_2,rpe_22_2, rpe_23_2,
                                                        rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2)),
                               subset(d[i,], select = c(rpv_1_2, rpv_2_2, rpv_6_2, rpv_11_2, 
                                                        rpv_12_2, rpv_13_2, rpv_14_2, rpv_15_2, 
                                                        rpv_17_2, rpv_20_2,rpv_22_2, rpv_23_2,
                                                        rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))))
  
  # all non-manipulated trials
  tody_nm <- as.matrix(table(subset(d[i,], select = c(rpe_1_1, rpe_2_1, rpe_6_1, rpe_11_1, 
                                                      rpe_12_1, rpe_13_1, rpe_14_1, rpe_15_1, 
                                                      rpe_17_1, rpe_20_1,rpe_22_1, rpe_23_1,
                                                      rpe_24_1, rpe_25_1, rpe_26_1, rpe_27_1,
                                                      rpe_1_2, rpe_2_2, rpe_6_2, rpe_11_2, 
                                                      rpe_12_2, rpe_13_2, rpe_14_2, rpe_15_2, 
                                                      rpe_17_2, rpe_20_2, rpe_22_2, rpe_23_2,
                                                      rpe_24_2, rpe_25_2, rpe_26_2, rpe_27_2)),
                             subset(d[i,], select = c(rpv_1_1, rpv_2_1, rpv_6_1, rpv_11_1, 
                                                      rpv_12_1, rpv_13_1, rpv_14_1, rpv_15_1, 
                                                      rpv_17_1, rpv_20_1,rpv_22_1, rpv_23_1,
                                                      rpv_24_1, rpv_25_1, rpv_26_1, rpv_27_1,
                                                      rpv_1_2, rpv_2_2, rpv_6_2, rpv_11_2, 
                                                      rpv_12_2, rpv_13_2, rpv_14_2, rpv_15_2, 
                                                      rpv_17_2, rpv_20_2, rpv_22_2, rpv_23_2,
                                                      rpv_24_2, rpv_25_2, rpv_26_2, rpv_27_2))))
  
  # Easy gambles
  tody_easy <- as.matrix(table(factor(subset(d[i,], select = c(rpe_9_1, rpe_9_2, rpe_21_1, rpe_21_2)), lev = 0:1),
                               factor(subset(d[i,], select = c(rpv_9_1, rpv_9_2, rpv_21_1, rpv_21_2)), lev = 0:1)))
  
  # Difficult gambles
  tody_difficult <- as.matrix(table(factor(subset(d[i,], select = c(rpe_5_1, rpe_5_2, rpe_10_1, rpe_10_2)), lev = 0:1),
                               factor(subset(d[i,], select = c(rpv_5_1, rpv_5_2, rpv_10_1, rpv_10_2)), lev = 0:1)))
  
  
  # Simple matching score (a+d)/(a+b+c+d)
  d[i,]$smc_1 <- sum(diag(tody_1)) / sum(tody_1)
  d[i,]$smc_2 <- sum(diag(tody_2)) / sum(tody_2)
  d[i,]$smc   <- sum(diag(tody_both)) / sum(tody_both)
  
  d[i,]$smc_m_1 <- sum(diag(tody_m_1)) / sum(tody_m_1)
  d[i,]$smc_m_2 <- sum(diag(tody_m_2)) / sum(tody_m_2)
  d[i,]$smc_m   <- sum(diag(tody_m)) / sum(tody_m)
  
  d[i,]$smc_nm_1 <- sum(diag(tody_nm_1)) / sum(tody_nm_1)
  d[i,]$smc_nm_2 <- sum(diag(tody_nm_2)) / sum(tody_nm_2)
  d[i,]$smc_nm   <- sum(diag(tody_nm)) / sum(tody_nm)
  
  d[i,]$smc_m_easy      <- sum(diag(tody_easy)) / sum(tody_easy)
  d[i,]$smc_m_difficult <- sum(diag(tody_difficult)) / sum(tody_difficult)
  
  # Phi
  d[i,]$phi_1 <- phi(tody_1)
  d[i,]$phi_2 <- phi(tody_2)
  d[i,]$phi   <- phi(tody_both)
  
  d[i,]$phi_m_1 <- phi(tody_m_1) # does not make much sense as sample size is 4 per dimension
  d[i,]$phi_m_2 <- phi(tody_m_2) # same
  d[i,]$phi_m   <- phi(tody_m)   # boarderline informative, as sample size is 4 per dimension
  
  d[i,]$phi_nm_1 <- phi(tody_nm_1)
  d[i,]$phi_nm_2 <- phi(tody_nm_2)
  d[i,]$phi_nm   <- phi(tody_nm)
  
  rm(tody_1, tody_2, tody_both, tody_m_1, tody_m_2, 
     tody_m, tody_nm_1, tody_nm_2, tody_nm, 
     tody_m_easy, tody_m_difficult)
}

## Phi coefficient 
# remove NaN phis returned if either column or row is all zero's
# Many elements of phi_m_1 (45 entries) and phi_m_2 are NaN, does not make much sense to use Phi here
d[is.na(d$phi_m),]$phi_m <- mean(d$phi_m, na.rm = TRUE) # element 186

# ---- Rank stability of individual ---- 
# No. risk seeking / neutral / risk averse
d$risk.e <- d$risk.e_1 + d$risk.e_2
d$risk.v <- d$risk.v_1 + d$risk.v_2 
d$risk   <- d$risk.e + d$risk.v

### Rank stability of individual from most risk seeking (1) to least
# All trials
# Use Kendall-b rather than Spearman, as more tractable when ties are present Gilpin (1993) Educational and Psych. Measurement, 53(1):87-92
cor(d$risk.e, d$risk.v, method = "kendall") # 0.3788, corrected for ties, i.e computed tau_b

## same as:
# d$risk.rank.e <- rank(d$risk.e, ties.method = "average")
# d$risk.rank.v <- rank(d$risk.v, ties.method = "average")
# cor(d$risk.rank.e, d$risk.rank.v, method = "kendall")


# Distinguish M and NM trials based on risk score
# - How many shift and by how much? Risk score from one type to another?
# - Any difference on M or NM trials?


# ---- Export data for data analysis ---- 
save(d, d.tt, file = "wrangled-data-2019-12-01.Rdata")

