# ---------------------------------------------------------------------------------------------------------------------
"""
Author: Raphael Andreas Elbing
Last Modified: 25/08/2022
License: This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) License.
"""
# --------------------------------------------------------------------------------------------------------------------
"""This file calculates the absolute and normalized spareman correlation between all inputs and the total in use stock,
the hibernating stock, the disposal flow and the export flow on the basis of the results of "monte_carlo_simulations.py"
and "monte_carlo_evaluation.py""."""

# packages
import pickle
import pandas as pd
import numpy as np
from collections import Counter
from scipy import stats

proof_of_concept_run = True

# Settings
considered_runs = 10000
start_year = 2015
n_years = 36

# Set folder to load and save files
if proof_of_concept_run:
    folder_path = "monte_carlo_results/proof_of_concept"
    considered_runs = 10

else:
    folder_path = "monte_carlo_results/full_model_7_5_18"

# Loading data
# inputs

with open('{}/inputs.pkl'.format(folder_path), 'rb') as f:
    input_pd = pickle.load(f)

if proof_of_concept_run:
    with open('{}/compact_results.pkl'.format(folder_path), "rb") as f:
        compact_results_pd = pickle.load(f)

else:
    single_pandas_list = []
    for i in range(10):
        start_run = i * 1000
        end_run = start_run + 999

        file_path = '{}/compact_results_run_{}_till_{}.pkl'.format(folder_path, start_run, end_run)

        with open(file_path, "rb") as f:
            single_pandas_list.append(pickle.load(f))

    # combine data frames
    compact_results_pd = pd.concat(single_pandas_list, axis='columns')

# Combining exports to one frame

frame_collector_list = []

for i in range(considered_runs):
    frame_collector_list.append(pd.concat([compact_results_pd[str(i)]['E.2 exported eol products'] +
                                           compact_results_pd[str(i)]['E.11 exported recycled materials'] +
                                           compact_results_pd[str(i)]['P.8 export recycled products']],
                                          keys=['total_export'], names=['stock_flow'], axis=1))

exports_pd = pd.concat(frame_collector_list, keys=[str(i) for i in range(considered_runs)], names=['run'], axis=1)

# Combining disposal to one frame

frame_collector_list = []

for i in range(considered_runs):
    frame_collector_list.append(pd.concat([compact_results_pd[str(i)]['E.4 E.5 non-selective collection'] +
                                           compact_results_pd[str(i)]['E.7 pretreatment waste'] +
                                           compact_results_pd[str(i)]['E.8 recycling waste'] +
                                           compact_results_pd[str(i)]['P.4p disposed scrap']], keys=['total_disposal'],
                                          names=['stock_flow'], axis=1))

disposal_pd = pd.concat(frame_collector_list, keys=[str(i) for i in range(considered_runs)], names=['run'], axis=1)

# joining the data frame
compact_results_pd = pd.concat([compact_results_pd, exports_pd, disposal_pd], axis=1)

# ## Collecting the results of total use stock, hoarding stock, and disposal flow to one data frame

monte_carlo_results_index = []
result_items = ['total_use_stock', 'total_hoarding_stock', 'to_disposal_flow', 'U.B hoarding stock', 'U.A use stock',
                'total_export', 'total_disposal']

for item in result_items:
    for year in range(n_years):
        monte_carlo_results_index.append([item, str(year)])

temp_data_frame = pd.DataFrame(monte_carlo_results_index, columns=['result', 'year'])
monte_carlo_results_index = pd.MultiIndex.from_frame(temp_data_frame)

monte_carlo_results_pd = pd.DataFrame({}, columns=monte_carlo_results_index)

# To populate the data frame, a vector per year with the runs in the right order has to be created.

monte_carlo_results_index = []

for item in result_items:
    for year in range(n_years):
        monte_carlo_results_index.append([item, str(year)])

temp_data_frame = pd.DataFrame(monte_carlo_results_index, columns=['result', 'year'])
monte_carlo_results_index = pd.MultiIndex.from_frame(temp_data_frame)

monte_carlo_results_pd = pd.DataFrame({}, columns=monte_carlo_results_index)

for run in range(considered_runs):
    collector_list = []
    for result_item in result_items:
        collector_list = collector_list + compact_results_pd[str(run)][result_item]['sum'].tolist()

    # add to data frame
    monte_carlo_results_pd.loc[run] = collector_list

# Create dataframe for spearman results

temp_pd = pd.DataFrame({}, columns=input_pd.columns)
spearman_results_abs_pd = pd.concat([temp_pd, temp_pd, temp_pd, temp_pd, temp_pd, temp_pd, temp_pd], keys=result_items,
                                    axis=1, names=['result_item'])

print('Calculate spearman correlation between inputs and:')
print(result_items)

for year in range(n_years):

    print('Year {} of {}'.format(year + 1, n_years))

    collector_list = []

    for identifier_tub in spearman_results_abs_pd.columns:
        result_item = identifier_tub[0]
        table = identifier_tub[1]
        input = identifier_tub[2]
        product = identifier_tub[3]

        if len(Counter(input_pd.loc[:considered_runs - 1][table][input][product].to_list()).keys()) != 1:
            collector_list.append(stats.spearmanr(input_pd.loc[:considered_runs - 1][table][input][product],
                                                  monte_carlo_results_pd[result_item][str(year)])[0])
        else:
            collector_list.append(np.nan)

    spearman_results_abs_pd.loc[year] = collector_list

# getting the squared normalized correlation.
spearman_results_squared_pd = spearman_results_abs_pd ** 2

temp_pd_list = []
for result_item in result_items:
    sum_vec = spearman_results_squared_pd[result_item].sum(axis=1)

    temp_pd_list.append(spearman_results_squared_pd[result_item].div(sum_vec, axis='rows'))

spearman_results_normalized_pd = pd.concat(temp_pd_list, keys=result_items, axis=1, names=['result_item'])

spearman_results_normalized_pd = spearman_results_normalized_pd * 100

print('Export Monte Carlo results')
monte_carlo_results_pd.to_pickle('{}/monte_carlo_results.pkl'.format(folder_path))

print('Export spearman absolute results')
spearman_results_abs_pd.to_pickle('{}/spearman_results_abs.pkl'.format(folder_path))

print('Export spearman normalized results')
spearman_results_normalized_pd.to_pickle('{}/spearman_results_normalized.pkl'.format(folder_path))
