# ---------------------------------------------------------------------------------------------------------------------
"""
Author: Raphael Andreas Elbing
Last Modified: 25/08/2022
License: This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) License.
"""
# --------------------------------------------------------------------------------------------------------------------
"""This file is used to execute Monte Carlo simulations. Data is imported, random inputs are generated and normalized.
Afterwards, experiments are run and the results are exported."""

from functions import generate_random_numbers
from combined_reuse_matrace_model import evaluate_cohort_combined_model
from datetime import datetime
import os
import pickle
import pandas as pd
import numpy as np

proof_of_concept_run = True

# If set to true, it will be checked whether the mass balance for the 10 first runs is fulfilled.
check_mass_balance = True

# settings:
n_runs = 10000
n_years = 36
start_year = 2015
considered_use_cycles = 3

# loading required data
defined_distributions_pd = pd.read_excel(
    "data_cobalt_case_study/defined_distributions.xlsx"
)
path_model_inputs = "data_cobalt_case_study/data_input_cobalt_extended_data_set.xlsx"
path_uncertainty_rating = (
    "data_cobalt_case_study/data_uncertainty_rating_cobalt_case_study.xlsx"
)

np.random.seed(seed=233423)

start_time = datetime.now()

if proof_of_concept_run:
    run_folder = "proof_of_concept"
    n_runs = 10

else:
    run_folder = "full_model_{}_{}_{}".format(
        start_time.month, start_time.day, start_time.hour
    )

print(
    "Creating dictionary for results. Path: monte_carlo_results/{}".format(run_folder)
)
os.mkdir("monte_carlo_results/{}".format(run_folder))

# Import Data:
# One dictionary with actual data, and one dictionary with the uncertainty scoring for each value.

print("Importing data")
data_sheets = pd.ExcelFile(path_uncertainty_rating).sheet_names

# Model data
model_data_dic = {}

for data_sheet in data_sheets:
    try:
        model_data_dic[data_sheet] = pd.read_excel(
            path_model_inputs,
            sheet_name=data_sheet,
        ).set_index("Product categories")
    except:
        model_data_dic[data_sheet] = pd.read_excel(
            path_model_inputs,
            sheet_name=data_sheet,
        ).set_index("Products")

# Uncertainty score
uncertainty_data_dic = {}

for data_sheet in data_sheets:
    try:
        uncertainty_data_dic[data_sheet] = pd.read_excel(
            path_uncertainty_rating,
            sheet_name=data_sheet,
        ).set_index("Product categories")
    except:
        uncertainty_data_dic[data_sheet] = pd.read_excel(
            path_uncertainty_rating,
            sheet_name=data_sheet,
        ).set_index("Products")

# Setting up a dataframe for the generated inputs
column_index_list = []

for sheet in data_sheets:
    for column_name in model_data_dic[sheet].columns:
        for row_name in model_data_dic[sheet].index:
            column_index_list.append([sheet, column_name, row_name])

# temporary dataframe
temp_data_frame = pd.DataFrame(column_index_list, columns=["sheet", "column", "item"])

# Create actual index from data frame
index_input_pd = pd.MultiIndex.from_frame(temp_data_frame)

# Creation of actual data frame
input_pd = pd.DataFrame({}, columns=index_input_pd, index=range(n_runs))
input_pd

# Populating the dataframe.
print("Creating inputs for Monte Carlo simulations based on uncertainty score")

for column_index_list_segment in column_index_list:
    # getting identifiers
    sheet = column_index_list_segment[0]
    column = column_index_list_segment[1]
    item = column_index_list_segment[2]

    # getting data value and uncertainty score
    value = model_data_dic[sheet][column].loc[item]
    try:
        uncertainty_score = uncertainty_data_dic[sheet][column].loc[item]
    except:
        uncertainty_score = 0

    # identify weibull columns and set survival curve paramenter
    if (column == "shape" or column == "scale") or column == "hoarding time":
        survival_curve = True
    else:
        survival_curve = False

    # populate input dataframe
    # print('sheet {} column {} item {}'.format(sheet, column, item))

    input_pd.loc[:, (sheet, column, item)] = generate_random_numbers(
        value, uncertainty_score, survival_curve, n_runs
    )

# # Normalize relevant inputs
# Inputs need to be normalized. Transfer coefficients have to equal 1 among one item. When material is split over
# multiple products/items, the sum among all items has to equal 1.
# The normalization will be tested at the end of each cell. The output has to be 'True'.
#
## Normalizing the MaTrace part of the model
### Normalizing initial inputs


print("Normalize inputs")

# initial input
sum_input = input_pd["MaTrace_initial_inflow"]["share"].sum(axis=1)

for column in input_pd.loc[:, ("MaTrace_initial_inflow", "share")].columns:
    input_pd.loc[:, ("MaTrace_initial_inflow", "share", column)] = (
        input_pd["MaTrace_initial_inflow"]["share"][column] / sum_input
    )

### The hoarding time
# The hoarding time of the MaTrace model has to be an integer. Therefore, this column has to be rounded

# In[ ]:


items = input_pd["MaTrace_hibernating_stock"]["hoarding time"].columns

for item in items:
    # round
    input_pd.loc[:, ("MaTrace_hibernating_stock", "hoarding time", item)] = (
        input_pd["MaTrace_hibernating_stock"]["hoarding time"][item]
        .round()
        .astype("int32")
    )

sheet = "MaTrace_end_of_life"
column_list_list = [
    ["fraction export eol products", "fraction collected eol products"],
    ["collection to recycling rate", "postconsumer disposal rate"],
]
items = input_pd[sheet][column_list_list[0][0]].columns

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )

### Normalization of MaTrace_B_recycling transfer coefficients: (Chemical, Zn, Downcycling)
# The list of items from the previous normalization will be used since there are also two efficies in the file.


sheet = "MaTrace_B_recycling"
column_list_list = [["Chemical", "Zn", "Downcycling"]]
input_pd[sheet]

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
            + input_pd[sheet][column_list[2]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[2], item)] = (
            input_pd[sheet][column_list[2]][item] / sum_item
        )

### Normalization of MaTrace_D_secondary_material: (Co metal or compound, W-Co powder)
# The columns do not need to be normalized with another. But the material is distributed of products.

sheet = "MaTrace_D_secondary_material"
column_list_list = [["Co metal or compound", "W-Co powder"]]
input_pd[sheet]

for column in column_list_list[0]:
    sum_item = input_pd[sheet][column][items].sum(axis=1)
    for item in items:
        input_pd.loc[:, (sheet, column, item)] = (
            input_pd[sheet][column][item] / sum_item
        )

### Normalizing (export of products, domestic product inflow) of MaTrace_production

sheet = "MaTrace_production"
column_list_list = [["export of products", "domestic product inflow"]]
items = input_pd[sheet][column_list_list[0][0]].columns

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )

## Normalizing the inputs to the reuse model
### Normalize Reuse split

sheet = "Reuse_inflow_split"
column_list_list = [["split"]]

# Define items of reuse part
items = input_pd[sheet][column_list_list[0][0]].columns

for column in column_list_list[0]:
    sum_item = input_pd[sheet][column][items].sum(axis=1)
    for item in items:
        input_pd.loc[:, (sheet, column, item)] = (
            input_pd[sheet][column][item] / sum_item
        )

### Normalizing transfer coefficients of Reuse_service_time_1 (to_use, to_storage, to_disposal)

sheet = "Reuse_service_time_1"
column_list_list = [["to_use", "to_storage", "to_disposal"]]

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
            + input_pd[sheet][column_list[2]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[2], item)] = (
            input_pd[sheet][column_list[2]][item] / sum_item
        )

### Normalizing transfer coefficients Reuse_storage_time_1 (to_use, to_disposal)

sheet = "Reuse_storage_time_1"
column_list_list = [["to_use", "to_disposal"]]

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )

### Normalizing Reuse_service_time_2 (to_use, to_storage, to_disposal)
# This works exactly as for the first one.

# In[ ]:


sheet = "Reuse_service_time_2"
column_list_list = [["to_use", "to_storage", "to_disposal"]]

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
            + input_pd[sheet][column_list[2]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[2], item)] = (
            input_pd[sheet][column_list[2]][item] / sum_item
        )

### Normalizing Reuse_storage_time_2: (to_use, to_disposal)
# This works in the same way as for Reuse_storage_time_1

sheet = "Reuse_storage_time_2"
column_list_list = [["to_use", "to_disposal"]]

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )

# ### Normalizing reuse_serviec_time_3: (to_storage, to_disposal)
# differs from the others. Has only the column for storage and disposal

sheet = "Reuse_service_time_3"
column_list_list = [["to_storage", "to_disposal"]]

for column_list in column_list_list:
    for item in items:
        # Get sum
        sum_item = (
            input_pd[sheet][column_list[0]][item]
            + input_pd[sheet][column_list[1]][item]
        )

        # Normalize
        input_pd.loc[:, (sheet, column_list[0], item)] = (
            input_pd[sheet][column_list[0]][item] / sum_item
        )
        input_pd.loc[:, (sheet, column_list[1], item)] = (
            input_pd[sheet][column_list[1]][item] / sum_item
        )

# Save keys and columns so the do not have to be extracted at every iteration
model_input_keys = model_data_dic.keys()

columns = {}

for model_input_key in model_input_keys:
    columns[model_input_key] = uncertainty_data_dic[model_input_key].columns.to_list()

# Distinguish between columns as for uncertainty rating and columns as used for the input
columns_inputs = {}

for model_input_key in model_input_keys:
    columns_inputs[model_input_key] = model_data_dic[model_input_key].columns.to_list()

# Create dictionary with input data in the format needed to run the model
input_runs_dic_list = []

for run in range(n_runs):
    temp_input_dic = {}

    for model_input_key in model_input_keys:
        temp_frame_dic = {}

        for column in columns_inputs[model_input_key]:
            temp_frame_dic[column] = (
                input_pd[model_input_key][column].loc[run].to_list()
            )

        temp_pd_frame = pd.DataFrame(
            temp_frame_dic, index=model_data_dic[model_input_key].index
        )

        # Write data frame into dictionary
        temp_input_dic[model_input_key] = temp_pd_frame

    # Add complete dictionary to list
    input_runs_dic_list.append(temp_input_dic)

# Exporting the input data frame
with open("monte_carlo_results/{}/inputs.pkl".format(run_folder), "wb") as f:
    pickle.dump(input_pd, f)

start_time = datetime.now()

input_dic = model_data_dic.copy()

# Empty dictionray to collect data.
monte_carlo_results_dic = {}

print("Starting runs")

for run in range(n_runs):

    if (run + 1) % 100 == 0 or run < 100:
        print("Run {} of {}".format(run + 1, n_runs))

    # Execute experiment
    (
        matrace_data_dic,
        reuse_data_dic,
        extended_graph_data_pd,
    ) = evaluate_cohort_combined_model(
        input_runs_dic_list[run],
        n_years,
        start_year,
        defined_distributions_pd,
        print_state=False,
        separate_reuse_graph=False,
    )

    monte_carlo_results_dic[str(run)] = {
        "matrace_data_dic": matrace_data_dic,
        "reuse_data_dic": reuse_data_dic,
    }

    if check_mass_balance:
        print(
            "Mass balance run {}: {}".format(
                run, np.allclose(extended_graph_data_pd.sum(axis=1), 1)
            )
        )

        if run == 100:
            check_mass_balance = False

    # Dump data:
    if (run + 1) % 1000 == 0:
        with open(
            "monte_carlo_results/{}/run_from_{}_till_{}.pkl".format(
                run_folder, run - 999, run
            ),
            "wb",
        ) as f:
            pickle.dump(monte_carlo_results_dic, f)

        monte_carlo_results_dic = {}

if proof_of_concept_run:
    with open("monte_carlo_results/proof_of_concept/results.pkl", "wb") as f:
        pickle.dump(monte_carlo_results_dic, f)

stop_time = datetime.now()

duration = stop_time - start_time

print(
    "Total time - seconds: {},  hours: {}".format(
        duration.seconds, duration.seconds / 3600
    )
)
print("Time per run: {}".format(duration.seconds / n_runs))

print("Inputs and results are stored.")
