import numpy as np
import pandas as pd
import os
import sys
from util import UserPerformance
from util import load_answers, load_user_info, find_valid_users, read_decisions, get_condition_users
from util import calc_ATI_scale, calc_familiarity, calc_propensity_to_trust
from util import find_valid_users, load_answers, read_decisions, get_assessment, calc_user_reliance_measures
from scipy.stats import wilcoxon, kruskal, mannwhitneyu
from collections import Counter
import math

def valid_user_age_gender(valid_users):
	age_list = []
	gender_list = []
	filename = os.path.join("../anonymous_data", "prolific_demographics.csv")
	usecols = ["Participant id", "Status", "Age", "Sex"]
	df = pd.read_csv(filename, usecols=usecols)
	user_list = df.values.tolist()
	for pid, status, age, gender in user_list:
		if pid in valid_users:
			if age == "DATA_EXPIRED" or age == "CONSENT_REVOKED":
				continue
			age_list.append(float(age))
			gender_list.append(gender)
	# assert len(age_list) == len(valid_users)
	print(len(age_list))
	print("{} participants are kept for further analysis".format(len(valid_users)))
	print("Average age:", np.mean(age_list), "SD:", np.std(age_list))
	res = Counter(gender_list)
	print("Gender distribution:", res)

def show_time(valid_users, user_condition_dict):
	files = ["prolific_no_tut_no_xai.csv", "prolific_no_tut_with_xai.csv", "prolific_with_tut_no_xai.csv", "prolific_with_tut_with_xai.csv"]
	time_list = []
	time_dict = {}
	filename = os.path.join("../anonymous_data", "prolific_demographics.csv")
	usecols = ["Participant id", "Time taken"]
	df = pd.read_csv(filename, usecols=usecols)
	user_list = df.values.tolist()
	time_dict = {}
	for condition in range(4):
		time_dict[condition] = []
	# print(time_dict)
	condition_list = []
	for pid, time in user_list:
		if pid in valid_users:
			if time == "DATA_EXPIRED":
				continue
			if math.isnan(float(time)):
				continue
			time_list.append(float(time))
			condition = user_condition_dict[pid]
			time_dict[condition].append(float(time))
			condition_list.append(condition)
	# assert len(age_list) == len(valid_users)
	# print(len(time_list))
	# print(time_list)
	# print(condition_list)
	print("{} participants are kept for further analysis".format(len(valid_users)))
	print("Overall")
	print("Average time:", np.mean(time_list), "SD:", np.std(time_list))
	res = kruskal(time_dict[0], time_dict[1], time_dict[2], time_dict[3])
	print(res)
	for c1 in range(0, 4):
		for c2 in range(c1 + 1, 4):
			statistic, pvalue = mannwhitneyu(time_dict[c1], time_dict[c2], alternative='two-sided')
			if pvalue < 0.05 / 3:
				print("Alternative {} <> {},".format(c1, c2), "pvalue %.4f"%pvalue, "statistic %.4f"%statistic)
				statistic, pvalue = mannwhitneyu(time_dict[c1], time_dict[c2], alternative='greater')
				if pvalue < 0.05 / 3:
					print("Alternative {} > {},".format(c1, c2), "pvalue %.4f"%pvalue, "statistic %.4f"%statistic)
	for key_ in time_dict:
		print(key_)
		print("Average time:", np.mean(time_dict[key_]), "SD:", np.std(time_dict[key_]))

if __name__ == "__main__":
	valid_users, approved_users = find_valid_users()
	user_condition_dict, user_batch_order = load_user_info(reserved_users=valid_users)
	valid_user_age_gender(valid_users)
	show_time(valid_users, user_condition_dict)







