import numpy as np
#import matplotlib.pyplot as plt
import math
from random import sample
from scipy.spatial import distance
from scipy.stats import spearmanr
from scipy.stats import rankdata
from sklearn import svm
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
from sklearn import metrics
from joblib import dump, load


def supportVectorRegression(domSizeList, degreeList, varOrderList, varList, valList, valPosList, minValDomList, maxValDomList, regretMinList, regretMaxList, nodeScoreList, reverse_ranking):
    print("Pre pruning data set size: ", len(domSizeList))
    #print(nodeScoreList)
    realDomSizeList = []
    realDegreeList = []
    realVarOrderList = []
    realValList = []
    realVarList = []
    realValPosList = []
    realMinValDomList = []
    realMaxValDomList = []
    realRegretMinList = []
    realRegretMaxList = []
    X = []
    y = []
    for i in range(len(domSizeList)):
        nodeScore = nodeScoreList[i]
        if not(math.isnan(nodeScore)) and (nodeScore > 0):
            realDomSizeList.append(domSizeList[i])
            realDegreeList.append(degreeList[i])
            realVarOrderList.append(varOrderList[i])
            realVarList.append(varList[i])
            realValList.append(valList[i])
            realValPosList.append(valPosList[i])
            realMinValDomList.append(minValDomList[i])
            realMaxValDomList.append(maxValDomList[i])
            realRegretMinList.append(regretMinList[i])
            realRegretMaxList.append(regretMaxList[i])
            # Took out features: 
            X.append([domSizeList[i], degreeList[i], varOrderList[i], varList[i], valList[i], valPosList[i], minValDomList[i], maxValDomList[i], regretMinList[i], regretMaxList[i]])
            y.append(round(nodeScoreList[i], 4))

    lab_enc = preprocessing.LabelEncoder()
    y_encoded = lab_enc.fit_transform(y)
    listSize = len(y_encoded)
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=0)
    print("Full data set size: ", len(X))

    # Normalizing the data
    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Linear Regression
    #rgr = linear_model.SGDRegressor()
    #estimator = linear_model.LinearRegression() 
    #n_est = 100
    #rgr = RandomForestRegressor()
    #estimator = svm.SVR(gamma='scale', C=1000, epsilon=1)
    reg = RandomForestRegressor(n_estimators=100)
    #reg = ExtraTreesRegressor(n_estimators=100)
    #rgr = svm.SVR()

    #max_iter = np.maximum(1, np.ceil(10**6 / len(X_train)))
    
    #SGD
    # tuned_parameters = [{
    #     'loss': ['huber', 'epsilon_insensitive'],
    #     'learning_rate': ['invscaling','constant','optimal','adaptive'],
    #     'penalty': ['elasticnet'],
    #     'eta0': [0.01, 0.02, 0.03, 0.05],
    #     'alpha': [10e-4, 10e-5, 10e-6],
    #     'max_iter': [1000],
    #     'epsilon': [1, 10, 20]
    # }]

    #RFR
    # tuned_parameters = [{
    #     'n_estimators': [100]
    # }]

    # Should epsilon be tuned?
    # tuned_parameters = [{
    #     'kernel': ['rbf'], 
    #     'gamma': ['scale', 10, 100, 1e-5],
    #     'C': [10, 100, 1000, 10000], 
    #     'epsilon': [1, 10, 100]}]
    #{'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'epsilon': e_params}]

    #reg = GridSearchCV(rgr, tuned_parameters, scoring='r2', cv=3)
    reg.fit(X_train, y_train)
    #reg = rgr
    
    # print(rgr)
    # print(reg)
    #print("Max iter: ", max_iter)
    #print("Best params found on ", reg.best_params_)
    #print("CV Results: ")
    #print(reg.cv_results_)
    #print(reg.cv_results_['mean_test_score'])
    #print(reg.cv_results_['params'])
    y_pred = reg.predict(X_test)

    # WCS - test
    # y_testing = [2, 3, 1]
    # y_to_pred = [1, 2, 3]
    # y_to_pred_2 = [5, 3, 4, 2, 1]
    # y_equal= y_to_pred
    # y_good = [1, 3, 2]
    # y_good_2 = [4, 2, 3, 5, 1]
    # y_bad = [4, 1, 2, 3, 5]
    # y_worst = [1, 2, 3, 4, 5]
    # w1 = [1, 0.5, 0.2]
    # w2 = [1, 0.1, 0.1]
    # w_custom = list(map(weighting, y_to_pred_2))
    # print(w_custom)

    # print(f"Perfect WCS {distance.cosine(y_equal, y_to_pred):.2f}")
    # print(f"GOOD WCS no weight {distance.cosine(y_good_2, y_to_pred_2):.2f}")
    # print(f"bad WCS {distance.cosine(y_bad, y_to_pred_2, w_custom):.2f}")
    # print(f"GOOD WCS {distance.cosine(y_good_2, y_to_pred_2, w_custom):.2f}")
    # print(f"WORST WCS {distance.cosine(y_worst, y_to_pred_2, w_custom):.2f}")

    # print(f"Spearman corr 1: {spearmanr([1,2,3], [1,2,3])}")
    # print(f"Spearman corr 2: {spearmanr([1,2,3], [3,2,1])}")
    if reverse_ranking:
        y_pred_ranked = (len(y_pred) + 1) - rankdata(y_pred, method='ordinal')
        y_test_ranked = (len(y_test) + 1) - rankdata(y_test, method='ordinal')
        print("doing this")
    else:
        y_pred_ranked = rankdata(y_pred, method='ordinal')
        y_test_ranked = rankdata(y_test, method='ordinal')
    y_pred_ranked_10 = np.zeros(10)
    y_test_ranked_10 = np.zeros(10)
    y_pred_pred_ranked_10 = np.zeros(10)
    y_pred_test_ranked_10 = np.zeros(10)
    y_test_10 = np.zeros(10)
    y_pred_10 = np.zeros(10)

    for i in range(10):
        try:
            ind = y_test_ranked.tolist().index(i + 1)
            y_pred_ranked_10[i] = y_pred_ranked[ind]
            y_test_ranked_10[i] = (i + 1)
            y_test_10[i] = y_test[ind]
            y_pred_10[i] = y_pred[ind]

            ind2 = y_pred_ranked.tolist().index(i + 1)
            y_pred_pred_ranked_10[i] = (i + 1)
            y_pred_test_ranked_10[i] = y_test_ranked[ind2]
        except ValueError:
            print("First position not found")
        
    #print(y_test_ranked)
    #print(y_test_10)
    #print(y_pred_10)
    r2top10 = metrics.r2_score(y_test_10, y_pred_10)
    #print(f"R2 Top 10: {r2top10}")


    #print(f"test ranked 10 {y_test_ranked_10}")
    #print(f"pred ranked 10 {y_pred_ranked_10}")
    spearmantop10 = spearmanr(y_test_ranked_10, y_pred_ranked_10)
    #print(f"spearman @10 {spearmantop10}")

    #print(f"pred to test ranked 10 {y_pred_test_ranked_10}")
    #print(f"pred to pred ranked 10 {y_pred_pred_ranked_10}")
    spearman_pred_top10 = spearmanr(y_pred_test_ranked_10, y_pred_pred_ranked_10)
    #print(f"spearman (predicted) @10 {spearman_pred_top10}")

    try:
        first_pred_place = y_test_ranked[y_pred_ranked.tolist().index(1.0)]
        second_pred_place = y_test_ranked[y_pred_ranked.tolist().index(2.0)]
        third_pred_place = y_test_ranked[y_pred_ranked.tolist().index(3.0)]
        
        #print(f"Rank of first predicted place in test set: {first_pred_place}")
        first_test_place = y_pred_ranked[y_test_ranked.tolist().index(1.0)]
        second_test_place = y_pred_ranked[y_test_ranked.tolist().index(2.0)]
        third_test_place = y_pred_ranked[y_test_ranked.tolist().index(3.0)]
        # print(f"Rank of first test place in predicted ste: {first_test_place}")
        # print(f"Value of first test place: {y_test[y_test_ranked.tolist().index(1.0)]}")
        # print(f"Value of first predicted place in test set: {y_test[y_pred_ranked.tolist().index(1.0)]}")
        # print(f"Value of first test place in predicted ste: {y_pred[y_test_ranked.tolist().index(1.0)]}")
    except ValueError:
        print("First position not found")
    #w_y_test = list(map(y_test_weighting, y_test_ranked))
    #print(y_test[1:20])
    #print(rankdata(y_test[1:20]))
    #print(y_pred[1:20])
    #print(rankdata(y_pred[1:20]))
    ## Plotting domain size vs node score
    # plt.plot(X_train[:, 2], y_train, 'ro', color='black', label='Train Data')
    # plt.plot(X_test[:, 2], y_pred, 'ro', color='red', label='Prediction')
    # plt.axis([-1, 2, 0, 200])
    # plt.xlabel('Domain size')
    # plt.ylabel('Node score')
    # plt.legend()
    #plt.show()
    #plt.savefig('Domsize_versus_node_score.png')
    #print("Python: saved image domain size vs node score")
    #print("(Regression) Explained Variance Score: ", metrics.explained_variance_score(y_test, y_pred))
    cosine = distance.cosine(y_test_ranked, y_pred_ranked)
    #print(f"Cosine sim: {cosine:.2f}")
    #print(f"Weighted Cosine sim: {distance.cosine(y_test_ranked, y_pred_ranked, w_y_test):.2f}")
    spearman = spearmanr(y_test, y_pred)
    #print(f"Spearman corr: {spearman}") # Can this be ranked first?
    max_score = metrics.max_error(y_test, y_pred)
    #print("Max score: ", max_score)
    r2 = metrics.r2_score(y_test, y_pred)
    #print("R2 score: ", r2)
    print("MSE score: ", metrics.mean_squared_error(y_test, y_pred))
    print("MSE logged score: ", metrics.mean_squared_log_error(y_test, y_pred))
    print("EVS score: ", metrics.explained_variance_score(y_test, y_pred))
    print("MAE score: ", metrics.median_absolute_error(y_test, y_pred))

    #print("Features used when fitting: ", clf.n_features_)
    print("Feature importances: ", reg.feature_importances_)

    # Save model to file for iterative use
    #dump(clf, 'mlmodel.joblib')
    #print("Python: model saved and dumped")

    return [reg, scaler, cosine, spearman[0], spearmantop10[0], max_score, r2, r2top10, first_pred_place, first_test_place,
            second_pred_place, second_test_place, third_pred_place, third_test_place, len(X)]

def predictML(mlModel, scaler, domSizeTuple, degreeTuple, varOrderTuple, varTuple, valTuple, valPosTuple, minValDomTuple, maxValDomTuple, regretMinTuple, regretMaxTuple):
    if mlModel is None:
        print("Error: ML model in python prediction is null")

    X_pred = []
    # Took out features:
    for i in range(len(domSizeTuple)):
        X_pred.append([domSizeTuple[i], degreeTuple[i], varOrderTuple[i], varTuple[i], valTuple[i], valPosTuple[i], minValDomTuple[i], maxValDomTuple[i], regretMinTuple[i], regretMaxTuple[i]])

    X = scaler.transform(X_pred)
    return mlModel.predict(X).tolist()


# x_train = sample(list(X), math.ceil(listSize * 0.8))
# y_encoded_train = sample(list(y_encoded), math.ceil(listSize * 0.8))
# x_test = sample(list(X), math.floor(listSize * 0.2))
# y_encoded_test = sample(list(y_encoded), math.floor(listSize * 0.2))

#print(*y)
#print(*y_encoded)

## Plotting domain size vs node score
# plt.plot(realDomSizeList, y_encoded, 'ro')
# #plt.axis([0, 1, 0, 100])
# plt.xlabel('Domain size')
# plt.ylabel('Node score')
# plt.savefig('Domsize_versus_node_score.png')
# print("Python: saved image domain size vs node score")

# plt.plot(realVarList, y_encoded, 'ro')
# #plt.axis([0, 1, 0, 100])
# plt.xlabel('Var position')
# plt.ylabel('Node score')
# plt.savefig('Var_versus_node_score.png')
# print("Python: saved image var vs node score")

# plt.plot(realValList, y_encoded, 'ro')
# #plt.axis([0, 1, 0, 100])
# plt.xlabel('Value assigned')
# plt.ylabel('Node score')
# plt.savefig('Val_versus_node_score.png')
# print("Python: saved image val vs node score")

#plt.plot(X, y)
#plt.savefig('test_plot.png')