#########################################################################################################
# Selecting 24 represenative items for the short version of the questionnaire in research: 
# Siska Fitrianie, Merijn Bruijnes, Fengxiang Li, Amal Abdulrahman, Willem-Paul Brinkman. 2022. 
# The Artificial-Social-Agent Questionnaire: Establishing the long and short questionnaire versions. 
# In ACM International Conference on Intelligent Virtual Agents (IVA’22), September, 2022, Faro, Portugal. 
# ACM, New York, NY, USA. https://doi.org/10.1145/3514197.3549612
#
# Run EFA of and calculate the correlation scores between 24 representative items
# Input:  result_all_pItem_std.csv (standardized observed data)
# Output: final_latentPredicted_convergentAnalysis.csv (predicted latent scores of constructs/dimensions)
#########################################################################################################

# Library
library(dplyr) 
library(psych)
library(crayon)
library(CTT)
library(factoextra)
library(lattice)
library(boot)
library(nFactors)
library(Rcsdp)
library(GPArotation)
library(corrplot)

## Retrieve standardized observed dataset
d_results_std=read.csv2("data/result_all_pItem_std.csv", header = TRUE, sep =",")
d_results_std <- mutate_all(d_results_std, function(x) as.numeric(as.character(x)))

## Select only 24 representative items
drop = c("C01D01Q3", "C01D01Q13", "C01D01Q15", "C01D01Q16",
         "C01D02Q7", "C01D02Q10", "C01D02Q0", "C01D02Q1",
         "C01D03Q7", "C01D03Q9", "C01D03Q10", "C01D03Q12",
         "C01D04Q0", "C01D04Q13", "C01D04Q8", "C01D04Q9",
         "C01D05Q9", "C01D05Q0", "C01D05Q11","C01D05Q12",
         "C02D00Q2", "C02D00Q8", "R_C02D00Q11","R_C02D00Q15",
         "C03D01Q1","C03D01Q5","C03D01Q6", "C03D01Q12", 
         "C03D02Q0","C03D02Q5", "C03D02Q10", "R_C03D02Q13","R_C03D02Q1",
         "C04D00Q1", "R_C04D00Q11", "C04D00Q4", "C04D00Q12", 
         "R_C05D00Q3", "C05D00Q7", "C05D00Q1", "C05D00Q18",
         "R_C06D01Q6", "C06D01Q11", "R_C06D01Q13","C06D01Q8",
         "C07D00Q11", "R_C07D00Q15", "C07D00Q13", "C07D00Q14",
         "C08D00Q4", "C08D00Q0", "R_C08D00Q9", "R_C08D00Q10",
         "C09D00Q4", "C09D00Q1", "C09D00Q5","C09D00Q3","R_C09D00Q9",
         "C10D00Q1", "C10D00Q6", "C10D00Q16", "C10D00Q10",
         "C11D01Q6", "C11D01Q10", "C11D01Q11", "C11D01Q14", 
         "C11D02Q4", "C11D02Q7", "C11D02Q1", "C11D02Q3", "C11D02Q8",
         "C12D00Q0", "C12D00Q1", "C12D00Q8", "R_C12D00Q6",
         "C13D00Q7", "R_C13D00Q11", "R_C13D00Q6", "R_C13D00Q15",
         "C14D00Q0", "C14D00Q10", "C14D00Q16", "C14D00Q15",
         "C15D00Q11", "R_C15D00Q15", "C15D00Q8", "C15D00Q12",
         "C16D00Q11", "C16D00Q12", "C16D00Q17", "C16D00Q16",
         "C17D00Q2", "C17D00Q9", "C17D00Q10", "C17D00Q8",
         "C18D01Q2", "C18D01Q3", "C18D01Q8", "R_C18D01Q14",
         "C18D03Q0", "C18D03Q1", "C18D03Q12", "C18D03Q9",
         "C19D00Q3", "C19D00Q7", "C19D00Q14", "C19D00Q26")
d_24items = d_results_std[,!(names(d_results_std) %in% drop)]
colnames(d_24items)<-constructs

# Determine Number of Factors to Extract
parallel<-fa.parallel(d_24items, fm='ml', fa='fa')

# Maximum Likelihood Factor Analysis
printFA <- function(data, nfactors){
  data_cor <- cor(data)
  factors_data <- fa(data_cor, nfactors, rotate = "promax", SMC=FALSE, fm="ml", max.iter=100)
  print(factors_data, digits=2, sort=TRUE)
}
printFA(d_24items, 4)

# Calculate and plot the correlation scores between the 24 representative items
data_cor<-cor(d_24items)
corrplot(data_cor, method = "color", type="upper", order="hclust", tl.col="black", addCoef.col = "black", tl.srt=,number.cex=0.75)

