"""
@author: j.h.koo@tudelf.nl
"""

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from imblearn.combine import SMOTEENN
from sklearn.linear_model import LinearRegression
from lineartree import LinearForestRegressor

input_path = "D:\\"
result_path = "D:\\"


class W_ML_():
    def __init__(self):
        OW_d = pd.read_excel(input_path + 'Collected_detail.xlsx', index_col=0)
        OW = OW_d.copy()

        test_E = [1, 2, 25]
        val_E = [22, 23, 24]
        train_E = [i for i in range(1, 29) if i not in test_E + val_E]

        OW_train = OW[OW['E'].isin(train_E)]
        OW_test = OW[OW['E'].isin(test_E)]

        W_columns = ['I_RWL_t', 'PI_0', 'PI_1', 'PI_2', 'PI_3', 'PI_4', 'PI_5', 'p_OUT_0', 'p_OUT_1', 'p_OUT_2',
                     'p_OUT_3', 'p_OUT_4', 'p_OUT_5', 'Z1', 'Z2', 'Z3', 'ZI', ]
        X_columns = ['I_RWL_t', 'PI_0', 'PI_1', 'PI_2', 'PI_3', 'PI_4', 'PI_5', 'p_OUT_0', 'p_OUT_1', 'p_OUT_2',
                     'p_OUT_3', 'p_OUT_4', 'p_OUT_5']
        y_columns = ['Z1', 'Z2', 'Z3']
        ZI_columns = ['ZI']

        self.train_W = OW_train[W_columns]
        self.test_W = OW_test[W_columns]

        train_X = self.train_W[X_columns]
        train_ZI = self.train_W[ZI_columns]

        sme = SMOTEENN()  # resampling using SMOTEENN
        self.X_res, self.y_res = sme.fit_resample(train_X, train_ZI)
        self.train_r = self.train_W[self.train_W['ZI'] == 2]
        self.test_r = self.test_W[self.test_W['ZI'] == 2]
        self.train_Xr = self.train_r[X_columns]
        self.train_yr = self.train_r[y_columns]

    def rfc(self):
        self.rfC = RandomForestClassifier()
        self.rfC.fit(self.X_res, self.y_res)
        return self.rfC

    def rfr(self, max_depth=30, n_estimators=150):
        # regression for the weights only when the optimal weights are not 1
        self.rfR = RandomForestRegressor(max_depth=max_depth, n_estimators=n_estimators)
        self.rfR.fit(self.train_Xrs, self.train_yr)
        return self.rfR

    def lfr(self):
        self.lfR = LinearForestRegressor(base_estimator=LinearRegression(), max_features=None, n_estimators=300, max_depth=150)
        self.lfR.fit(self.train_Xrs.to_numpy(), self.train_yr.to_numpy())
        return self.lfR

    def clust_K_nc(self, n_cluster, plot_ = False):
        train_cluster = self.train_r[self.train_r.columns[:-4]].copy()
        test_cluster = self.test_r[self.test_r.columns[:-4]].copy()
        train_cluster['PI_mean'] = train_cluster[['PI_0', 'PI_1', 'PI_2', 'PI_3', 'PI_4', 'PI_5']].mean(axis=1)
        train_cluster['p_OUT_mean'] = train_cluster[['p_OUT_0', 'p_OUT_1', 'p_OUT_2', 'p_OUT_3', 'p_OUT_4', 'p_OUT_5']].mean(axis=1)
        test_cluster['PI_mean'] = test_cluster[['PI_0', 'PI_1', 'PI_2', 'PI_3', 'PI_4', 'PI_5']].mean(axis=1)
        test_cluster['p_OUT_mean'] = test_cluster[['p_OUT_0', 'p_OUT_1', 'p_OUT_2', 'p_OUT_3', 'p_OUT_4', 'p_OUT_5']].mean(axis=1)
        n_cluster_ls = [i for i in range(n_cluster)]
        self.cluster_K_nc = KMeans(n_clusters=n_cluster, n_init='auto', random_state=100).fit(train_cluster[['I_RWL_t', 'PI_mean', 'p_OUT_mean']])
        train_cluster['c_label'] = self.cluster_K_nc.labels_
        test_cluster['c_label'] = self.cluster_K_nc.predict(test_cluster[['I_RWL_t', 'PI_mean', 'p_OUT_mean']])
        Dist_info_train = []
        Dist_info_test = []
        for i in n_cluster_ls:
            i_index = train_cluster[train_cluster['c_label'] == i].index
            MV_ = []
            for j in ['Z1', 'Z2', 'Z3']:
                W_ = self.train_r[j].loc[i_index]
                MV_ += [W_.mean(), W_.std()]
            Dist_info_train.append(MV_)
        for i in n_cluster_ls:
            i_index_test = test_cluster[test_cluster['c_label'] == i].index
            MV_test = []
            for j in ['Z1', 'Z2', 'Z3']:
                W_test = self.test_r[j].loc[i_index_test]
                MV_test += [W_test.mean(), W_test.std()]
            Dist_info_test.append(MV_test)
        d_info_train = pd.DataFrame(Dist_info_train, columns=['Z1 mean', 'Z1 std', 'Z2 mean', 'Z2 std', 'Z3 mean', 'Z3 std'])
        d_info_test = pd.DataFrame(Dist_info_test, columns=['Z1 mean', 'Z1 std', 'Z2 mean', 'Z2 std', 'Z3 mean', 'Z3 std'])
        return self.cluster_K_nc, d_info_train, d_info_test

    def clust_K_op__(self, n_cluster = 200):
        self.train_Xrs_ = self.train_Xrs.copy()
        n_cluster = n_cluster
        self.cluster_K_op = KMeans(n_clusters=n_cluster, n_init='auto', random_state=150)
        self.cluster_K_op.fit(self.train_Xrs, self.train_yr)
        self.train_Xrs_['c'] = self.cluster_K_op.predict(self.train_Xrs)

    def clust_K_op(self, state_X):
        c_label_ = self.cluster_K_op.predict(state_X)
        lr_tmp = LinearRegression()
        c_train_index = self.train_Xrs_[self.train_Xrs_['c'] == c_label_[0]].index
        lr_tmp.fit(self.train_Xrs.loc[c_train_index], self.train_yr.loc[c_train_index])
        y_test_predict = lr_tmp.predict(state_X)
        return y_test_predict
