import numpy as np
import pandas as pd
import os
import sys
from util import UserPerformance
from util import load_answers, load_user_info, find_valid_users, read_decisions, get_condition_users, read_TiA_scales
from util import calc_ATI_scale, calc_familiarity, calc_propensity_to_trust, load_debugging_performance
from util import find_valid_users, load_answers, read_decisions, get_assessment, calc_user_reliance_measures
from scipy.stats import wilcoxon, kruskal


p1_tasks = ["p1-{}".format(i) for i in range(10)]
p2_tasks = ["p2-{}".format(i) for i in range(10)]

def compare_performance_across_conditions(users, user_condition_dict, user_batch_order, usertask_dict, user_flip_dict, answer_dict):
	user2performance = {}
	for user in users:
		tp_condition = user_condition_dict[user]
		if user_batch_order[user][0] == 1:
			first_batch = p1_tasks
			second_batch = p2_tasks
		else:
			assert user_batch_order[user][0] == 2
			first_batch = p2_tasks
			second_batch = p1_tasks
		tp_performance = UserPerformance(username=user)
		
		tp_correct, tp_agreement_fraction, tp_switching_fraction, initial_disagreement_1, relative_positive_ai_reliance,\
			relative_positive_self_reliance = calc_user_reliance_measures(user, usertask_dict, user_flip_dict, answer_dict, first_batch)
		tp_accuracy = tp_correct / 10.0
		tp_performance.add_performance(accuracy=tp_accuracy, agreement_fraction=tp_agreement_fraction, switching_fraction=tp_switching_fraction, 
			relative_positive_ai_reliance=relative_positive_ai_reliance, relative_positive_self_reliance=relative_positive_self_reliance, group="first_batch")
		
		tp_correct, tp_agreement_fraction, tp_switching_fraction, initial_disagreement_2, relative_positive_ai_reliance,\
			relative_positive_self_reliance = calc_user_reliance_measures(user, usertask_dict, user_flip_dict, answer_dict, second_batch)
		tp_accuracy = tp_correct / 10.0
		tp_performance.add_performance(accuracy=tp_accuracy, agreement_fraction=tp_agreement_fraction, switching_fraction=tp_switching_fraction, 
			relative_positive_ai_reliance=relative_positive_ai_reliance, relative_positive_self_reliance=relative_positive_self_reliance, group="second_batch")

		# user_performance_list.append(tp_performance)
		user2performance[user] = tp_performance
	return user2performance


TiA_subscales = ["Reliability/Competence", "Understanding/Predictability", "Intention of Developers",  "Trust in Automation"]

if __name__ == "__main__":
	# load answer dict
	answer_dict_p1 = load_answers("reviews_p1.json")
	answer_dict_p2 = load_answers("reviews_p2.json")
	answer_dict = {}
	for task_id in answer_dict_p1:
		answer_dict["p1-{}".format(task_id)] = answer_dict_p1[task_id]
	for task_id in answer_dict_p2:
		answer_dict["p2-{}".format(task_id)] = answer_dict_p2[task_id]

	valid_users, approved_users = find_valid_users()
	user_condition_dict, user_batch_order = load_user_info(reserved_users=valid_users)

	user_TiA_scale_first, user_TiA_scale_second = read_TiA_scales(reserved_users=valid_users)
	user_ATI_scale = calc_ATI_scale(valid_users)
	user_familiarity_dict = calc_familiarity(valid_users)
	user_ptt_scale = calc_propensity_to_trust(valid_users)
	user_task_dict, user_confidence_dict, user_flip_dict = read_decisions(user_batch_order, reserved_users=valid_users)
	user2performance = compare_performance_across_conditions(valid_users, user_condition_dict, user_batch_order, user_task_dict, user_flip_dict, answer_dict)

	condition_users = get_condition_users(user_condition_dict)
	users_dict = {
		"Debugging-R": valid_users & condition_users[0],
		"Debugging-D": valid_users & condition_users[1],
		"Debugging-I": valid_users & condition_users[2],
		"Control"    : valid_users & condition_users[3]
	}
	# for subscale in TiA_subscales:
	# 	compare_trust(valid_users - users_dict["Control"], user_TiA_scale_first, user_TiA_scale_second, subscale)
	# sys.exit(-1)

	user_data = {
		"user_id": [],
		"condition": [],
		"ATI": [],
		"TiA-Propensity": [],
		"TiA-Familiarity": [],
		"accuracy": [],
		"agreement_fraction": [],
		"switching_fraction": [],
		"relative_positive_ai_reliance": [],
		"relative_positive_self_reliance": []
	}
	for subscale in TiA_subscales:
		user_data["TiA-{}".format(subscale)] = []
	# consider all users for analysis of covariates on trust and reliance
	user_acc_list = []
	user2agreement = {}
	user2agreement_1= {}
	for user in user_TiA_scale_first:
		user_data["user_id"].append(user)
		user_data["condition"].append(user_condition_dict[user])
		user_data["ATI"].append(user_ATI_scale[user])
		user_data["TiA-Propensity"].append(user_ptt_scale[user])
		user_data["TiA-Familiarity"].append(user_familiarity_dict[user])
		for subscale in TiA_subscales:
			# trust_change = user_TiA_scale_second[user][subscale] - user_TiA_scale_first[user][subscale]
			# user_data["TiA-{}".format(subscale)].append(trust_change)
			trust_avg = (user_TiA_scale_second[user][subscale] + user_TiA_scale_first[user][subscale]) / 2
			user_data["TiA-{}".format(subscale)].append(trust_avg)
		
		for metric in  ["accuracy", "agreement_fraction", "switching_fraction", "relative_positive_ai_reliance", "relative_positive_self_reliance"]:
			# performance_change = user2performance[user].performance["second_batch"][metric] - user2performance[user].performance["first_batch"][metric]
			# user_data[metric].append(performance_change)
			# performance_avg = (user2performance[user].performance["second_batch"][metric] + user2performance[user].performance["first_batch"][metric]) / 2
			performance_second = user2performance[user].performance["second_batch"][metric]
			user_data[metric].append(performance_second)
		user2agreement_1[user] = user2performance[user].performance["first_batch"]["agreement_fraction"]
		user2agreement[user] = user2performance[user].performance["second_batch"]["agreement_fraction"]
		user_acc_list.append((user, user2performance[user].performance["second_batch"]["accuracy"]))
	# df = pd.DataFrame(user_data)
	# print(df.shape)
	user_acc_list.sort(key=lambda x:x[1], reverse=True)
	print(len(user_acc_list))
	import math
	top_quat = math.ceil(len(user_acc_list) / 4.0)
	excelence_dict = {
		0: [],
		1: [],
		2: [],
		3: []
	}
	print(user_acc_list[:top_quat])
	print(top_quat)
	for i in range(len(user_acc_list)):
		tp_user = user_acc_list[i][0]
		if user_acc_list[i][1] == 0.7:
			break
		if user2agreement[tp_user] == 1.0:
			print(tp_user)
			continue
		tp_condition = user_condition_dict[tp_user]
		excelence_dict[tp_condition].append(tp_user)
	for condition in excelence_dict:
		print(condition, len(excelence_dict[condition]))
	# for var_name in ["ATI", "TiA-Propensity", "TiA-Familiarity"]:
	# 	print("{} Mean: {:.2f}, SD: {:.2f}".format(var_name, np.mean(user_data[var_name]), np.std(user_data[var_name])))

	for metric in  ["accuracy", "agreement_fraction", "switching_fraction", "relative_positive_ai_reliance", "relative_positive_self_reliance"]:
		print("{} Mean: {:.2f}, SD: {:.2f}".format(metric, np.mean(user_data[metric]), np.std(user_data[metric])))

	# users_with_debugging = valid_users - users_dict["Control"]
	# user_nasa_tlx_dict = read_nasa_tlx(valid_users - users_dict["Control"])
	# scale_dict = {}
	# mean_dict, sd_dict, median_dict = {}, {}, {}
	# for scale in ["Mental\nDemand", "Physical\nDemand", "Temporal\nDemand", "Performance", "Effort", "Frustration"]:
	# 	temp_list = []
	# 	for user in users_with_debugging:
	# 		temp_list.append(user_nasa_tlx_dict[user][scale.replace("\n", " ")])
	# 	scale_dict[scale] = temp_list
	# 	mean_dict[scale] = np.mean(scale_dict[scale])
	# 	sd_dict[scale] = np.std(scale_dict[scale], ddof=1)
	# 	median_dict[scale] = np.median(scale_dict[scale])
	# draw_box_plot(scale_dict, mean_dict, sd_dict, median_dict)


	# ai_assessment_first, ai_assessment_second, self_assessment_first, self_assessment_second = get_assessment(reserved_users=valid_users)
	# ai_assessment_first_list = [ai_assessment_first[user] for user in ai_assessment_first]
	# ai_assessment_second_list = [ai_assessment_second[user] for user in ai_assessment_second]
	# self_assessment_first_list = [self_assessment_first[user] for user in self_assessment_first]
	# self_assessment_second_list = [self_assessment_second[user] for user in self_assessment_second]
	# print("{} Mean: {:.2f}, SD: {:.2f}".format("AI assessment - first", np.mean(ai_assessment_first_list), np.std(ai_assessment_first_list)))
	# print("{} Mean: {:.2f}, SD: {:.2f}".format("AI assessment - second", np.mean(ai_assessment_second_list), np.std(ai_assessment_second_list)))
	# print("{} Mean: {:.2f}, SD: {:.2f}".format("self assessment - first", np.mean(self_assessment_first_list), np.std(self_assessment_first_list)))
	# print("{} Mean: {:.2f}, SD: {:.2f}".format("self assessment - second", np.mean(self_assessment_second_list), np.std(self_assessment_second_list)))
