# ---------------------------------------------------------------------------------------------------------------------
"""
Author: Raphael Andreas Elbing
Last Modified: 25/08/2022
License: This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) License.
"""
# --------------------------------------------------------------------------------------------------------------------
"""This file contains multiple function. One function to create an empty excel file to support model users to create
an input file according to their needs. Furthermore, all functions needed to create the random values as used in 
the Monte Carlo simulations are in this file."""

import scipy.stats
import numpy as np
import pandas as pd


# Excel file creator ------------------------------------------------------------------------------------------
def input_file_creator(product_categories, category_for_reuse, products, considered_use_cycles, file_name):
    """
    The function creates and saves a plank excel file entailing sheets and columns needed to run the Reuse-MaTrce model.
    :param product_categories: list of product categories
    :param category_for_reuse: string product category to be considered in reuse model (must be in product_categories)
    :param products: list of products present in reuse part of matrace model
    :param considered_use_cycles: integer number of considered use cycles
    :param file_name: string name of created file WITHOUT file ending
    :return: None
    """
    # Bring product categories list in right order
    index_category_for_reuse = product_categories.index(category_for_reuse)

    product_categories = [category_for_reuse] + product_categories[:index_category_for_reuse] + product_categories[
                                                                                                index_category_for_reuse + 1:]

    # get sheet names
    data_sheets = pd.ExcelFile('data_model/basic_file_structure.xlsx').sheet_names

    list_pandas = []
    new_sheet_names = []

    # populate pandas
    for sheet_name in data_sheets:

        # Treat MaTrace sheets
        if sheet_name.split('_')[0] == 'MaTrace':
            new_sheet_names.append(sheet_name)

            sheet_pd = pd.read_excel('data_model/basic_file_structure.xlsx', sheet_name=sheet_name)

            # Add categories
            sheet_pd['Product categories'] = product_categories

            # Add sheet to list
            list_pandas.append(sheet_pd)

        else:

            if sheet_name.split('_')[1] == 'inflow':
                new_sheet_names.append(sheet_name)

                sheet_pd = pd.read_excel('data_model/basic_file_structure.xlsx', sheet_name=sheet_name)

                # Add categories
                sheet_pd['Products'] = products

                # Add sheet to list
                list_pandas.append(sheet_pd)

            else:

                for i in range(considered_use_cycles):
                    current_cycle = i + 1

                    sheet_pd = pd.read_excel('data_model/basic_file_structure.xlsx', sheet_name=sheet_name)

                    if current_cycle < considered_use_cycles:
                        sheet_pd['Products'] = products

                        list_pandas.append(sheet_pd)
                        new_sheet_names.append(sheet_name + str(current_cycle))

                    else:
                        sheet_pd['Products'] = products

                        if sheet_name.split('_')[1] == 'service':
                            sheet_pd = sheet_pd.loc[:,
                                       ['Products', 'distribution', 'location', 'scale', 'shape', 'to_storage',
                                        'to_disposal']]

                        else:
                            sheet_pd = sheet_pd.loc[:, ['Products', 'distribution', 'location', 'scale', 'shape']]

                        list_pandas.append(sheet_pd)
                        new_sheet_names.append(sheet_name + str(current_cycle))

    # export excel
    with pd.ExcelWriter('data_model/{}.xlsx'.format(file_name)) as writer:
        for data_frame, sheet_name in zip(list_pandas, new_sheet_names):
            try:
                data_frame = data_frame.set_index('Products')
            except:
                data_frame = data_frame.set_index('Product categories')

            data_frame.to_excel(writer, sheet_name=sheet_name)


# Monte Carlo ---------------------------------------------------------------------------------------------------------
# The following functions serve the creation of the distributions used in the monte carlo simulations.
# Some used values were found emperically.


def skewness_by_mean_and_sd(mean, sd):
    """
    The function returns the skewness of a normal distribution depending on the mean and standard deviation
    :param mean: float mean
    :param sd: float sd, either 0.1 or 0.05
    :return:
    """
    if sd == 0.1:
        slope = - (8 / 0.4)

    elif sd == 0.05:
        slope = - (9 / 0.45)

    return slope * (mean - 0.5)


def scale_form_skew_factor_and_sd(skew_factor, sd):
    """
    The function returns the scale on the basis of the skewness factor while keeping the selected standard deviation
    :param skew_factor: float skew factor
    :param sd: float standatd deviation
    :return: float scale
    """
    return np.sqrt(sd ** 2 / (1 - (2 * (skew_factor / np.sqrt(1 + skew_factor ** 2)) ** 2 / np.pi)))


def loc_from_skew_factor_mean_and_scale(skew_factor, mean, scale):
    """
    The function returns the location of a skewed normal distribution based on the skew_factor, mean and scale
    :param skew_factor: float skew factor
    :param mean: float mean
    :param scale: float scale
    :return: float loc
    """
    return mean - scale * (skew_factor / np.sqrt(1 + skew_factor ** 2)) * np.sqrt(2 / np.pi)


def skew_factor_loc_and_scale_by_mean_sd(mean, sd):
    """
    The function returns the skew factor, location and scale of a skewed normal distribution based on the sd and the mean
    :param mean: float mean
    :param sd: float standard deviation
    :return: float skew factor, float scale, float location
    """
    skew_factor = skewness_by_mean_and_sd(mean, sd)
    scale = scale_form_skew_factor_and_sd(skew_factor, sd)
    loc = loc_from_skew_factor_mean_and_scale(skew_factor, mean, scale)

    return skew_factor, loc, scale


# Functions for triangular distributions
def triang_dist_inputs_case_3(center):
    """
    The function returns the parameters of the triangular distribution for the uncertainty score 3.
    :param center: float center
    :return: float location, float scale, float c
    """

    # setting the location
    if center <= 0.6 and center >= 0.4:
        loc = center - 0.4
    elif center < 0.4:
        loc = 0
    elif center > 0.6:
        loc = 0.2

    # setting scale
    scale = 0.8

    # calculating c
    c = (center - loc) / scale

    return loc, scale, c


def triang_dist_inputs_case_4(center):
    """
    The function returns the parameters of the triangular distribution for the uncertainty score 4.
    :param center: float center
    :return: float location, float scale, float c
    """

    # setting the location
    loc = 0

    # setting scale
    scale = 1

    # calculating c
    c = (center - loc) / scale

    return loc, scale, c


# triangular distributions for survival curve
def triang_dist_inputs_survival_case_3(center, lower_bound=0.6):
    """
    The function returns the parameters of the triangular distribution for the uncertainty score 3 for survival curves.
    :param center: float center
    :return: float location, float scale, float c
    """

    if lower_bound < 0.6:
        lower_bound = 0.6

    initial_lower_bound = 0.6

    # setting scale
    original_scale = 0.8
    scale = original_scale + initial_lower_bound - lower_bound

    # setting the location
    loc = lower_bound

    # calculating c
    c = (center - loc) / scale

    return loc, scale, c


def triang_dist_inputs_survival_case_4(center, lower_bound=0.5):
    """
    The function returns the parameters of the triangular distribution for the uncertainty score 4 for survival curves.
    :param center: float center
    :return: float location, float scale, float c
    """

    initial_lower_bound = 0.5
    original_scale = 1
    scale = original_scale + initial_lower_bound - lower_bound

    # setting the location
    loc = lower_bound

    # calculating c
    c = (center - loc) / scale

    return loc, scale, c


# shape of folded normal distribution


def shape_folded_case_2(mean):
    """
    The function returns the shape of the folded normal distribution when the uncertainty score is 2.
    The shaped nomal distribution is applied from a certain threshold onwards. Ther parameters in this function
    were found empirically.
    :param mean: flaot mean
    :return: float shape
    """

    # catching the case that shape is smaller 0
    shape = 13 * mean - 0.6

    if shape < 0:
        shape = 0

    return shape


def shape_folded_case_1(mean):
    """
    The function returns the shape of the folded normal distribution when the uncertainty score is 1.
    The shaped nomal distribution is applied from a certain threshold onwards. Ther parameters in this function
    were found empirically.
    :param mean: flaot mean
    :return: float shape
    """

    # catching the cas that shape is smaller 0

    shape = 26 * mean - 0.6

    if shape < 0:
        shape = 0

    return shape


# Rando number generator
def generate_random_numbers(value, uncertainty_score, survival_curve, n_runs):
    """
    This function generates a vector (lenght number of runs) with random values following distributions which depend
    on the input value and the uncertainty score. Survival curves are treated differently then transfer coefficients.
    :param value: float input value
    :param uncertainty_score: integer uncertainty score
    :param survival_curve: boolean indicating wether the input belongs to a survival curve or not
    :param n_runs: integer number or runs
    :return: array of randomly generated numbers
    """

    # If uncertainty score is 0, the number is absolutly certain, hence the value is repeated.
    if uncertainty_score == 0:
        generated_numbers = [value for i in range(n_runs)]

    elif survival_curve:
        center = 1

        # Ensuring, that values do not end up lower then 1, if initial values are larger then 1

        if value > 1:
            lower_multiplier_bound = 1 / value
            if lower_multiplier_bound < 0.5:
                lower_multiplier_bound = 0.5

        else:
            # for values smaller 1
            lower_multiplier_bound = 0.5

        if uncertainty_score == 1:

            generated_numbers = scipy.stats.skewnorm.rvs(
                    a=0, loc=center, scale=0.05, size=n_runs)

            # Catch values which are either smaller then 0.5 or larger then 1.5
            while any(generated_numbers < lower_multiplier_bound) or any(generated_numbers > 1.5):
                generated_numbers = np.where((generated_numbers < lower_multiplier_bound) | (generated_numbers > 1.5),
                                             scipy.stats.skewnorm.rvs(
                                                     a=0, loc=center, scale=0.05, size=n_runs), generated_numbers)

        if uncertainty_score == 2:
            generated_numbers = scipy.stats.skewnorm.rvs(
                    a=0, loc=center, scale=0.1, size=n_runs)

            # Catch values which are either smaller then 0.5 or larger then 1.5
            while any(generated_numbers < lower_multiplier_bound) or any(generated_numbers > 1.5):
                generated_numbers = np.where((generated_numbers < lower_multiplier_bound) | (generated_numbers > 1.5),
                                             scipy.stats.skewnorm.rvs(
                                                     a=0, loc=center, scale=0.1, size=n_runs), generated_numbers)

        if uncertainty_score == 3:
            loc, scale, c = triang_dist_inputs_survival_case_3(
                    center=1, lower_bound=lower_multiplier_bound)
            generated_numbers = scipy.stats.triang.rvs(
                    c=c, loc=loc, scale=scale, size=n_runs)

        if uncertainty_score == 4:
            loc, scale, c = triang_dist_inputs_survival_case_4(
                    center=1, lower_bound=lower_multiplier_bound)

            generated_numbers = scipy.stats.triang.rvs(
                    c=c, loc=loc, scale=scale, size=n_runs)
        if uncertainty_score == 5:
            loc = lower_multiplier_bound
            scale = 1.5 - lower_multiplier_bound

            generated_numbers = scipy.stats.uniform.rvs(
                    loc=0.5, scale=1, size=n_runs)

        # Multiply the value with the generated vector to scale it.
        generated_numbers = generated_numbers * value

    elif survival_curve == False:

        if uncertainty_score == 1:
            a, loc, scale = skew_factor_loc_and_scale_by_mean_sd(value, 0.05)

            if loc > 0.05 and loc < 0.95:
                generated_numbers = scipy.stats.skewnorm.rvs(
                        a=a, loc=loc, scale=scale, size=n_runs)

                while any(generated_numbers < 0) or any(generated_numbers > 1):
                    generated_numbers = np.where((generated_numbers < 0) | (generated_numbers > 1),
                                                 scipy.stats.skewnorm.rvs(
                                                         a=a, loc=loc, scale=scale, size=n_runs), generated_numbers)

            # Location is not within the skewed normal distribution length
            else:
                if loc < 0.5:
                    c = shape_folded_case_1(value)
                    generated_numbers = scipy.stats.foldnorm.rvs(
                            c=c, loc=0.0, scale=0.05, size=n_runs)

                    while any(generated_numbers < 0) or any(generated_numbers > 1):
                        generated_numbers = np.where((generated_numbers < 0) | (generated_numbers > 1),
                                                     scipy.stats.foldnorm.rvs(
                                                             c=c, loc=0.0, scale=0.05, size=n_runs), generated_numbers)

                else:
                    # This code needs to be adapted to the other case.
                    c = shape_folded_case_1(1 - value)

                    generated_numbers = 1 - \
                                        scipy.stats.foldnorm.rvs(
                                                c=c, loc=0.0, scale=0.05, size=n_runs)

                    while any(generated_numbers < 0) or any(generated_numbers > 1):
                        generated_numbers = np.where((generated_numbers < 0) | (generated_numbers > 1), (
                                1 - scipy.stats.foldnorm.rvs(c=c, loc=0.0, scale=0.05, size=n_runs)), generated_numbers)

        if uncertainty_score == 2:
            a, loc, scale = skew_factor_loc_and_scale_by_mean_sd(value, 0.1)

            if loc > 0.1 and loc < 0.9:
                generated_numbers = scipy.stats.skewnorm.rvs(
                        a=a, loc=loc, scale=scale, size=n_runs)

                while any(generated_numbers < 0) or any(generated_numbers > 1):
                    generated_numbers = np.where((generated_numbers < 0) | (generated_numbers > 1),
                                                 scipy.stats.skewnorm.rvs(
                                                         a=a, loc=loc, scale=scale, size=n_runs), generated_numbers)

            # if folded distribution is supposed to be used
            else:
                if loc < 0.5:
                    c = shape_folded_case_2(value)

                    generated_numbers = scipy.stats.foldnorm.rvs(
                            c=c, loc=0.0, scale=0.1, size=n_runs)

                    while any(generated_numbers < 0) or any(generated_numbers > 1):
                        generated_numbers = np.where((generated_numbers < 0) | (generated_numbers > 1),
                                                     scipy.stats.foldnorm.rvs(
                                                             c=c, loc=0.0, scale=0.1, size=n_runs), generated_numbers)

                else:
                    # This code needs to be adapted to the other case.
                    c = shape_folded_case_2(1 - value)
                    generated_numbers = 1 - \
                                        scipy.stats.foldnorm.rvs(
                                                c=c, loc=0.0, scale=0.1, size=n_runs)

                    while any(generated_numbers < 0) or any(generated_numbers > 1):
                        generated_numbers = np.where((generated_numbers < 0) | (generated_numbers > 1), (
                                1 - scipy.stats.foldnorm.rvs(c=c, loc=0.0, scale=0.1, size=n_runs)), generated_numbers)

        if uncertainty_score == 3:
            loc, scale, c = triang_dist_inputs_case_3(value)
            generated_numbers = scipy.stats.triang.rvs(
                    c=c, loc=loc, scale=scale, size=n_runs)

        if uncertainty_score == 4:
            loc, scale, c = triang_dist_inputs_case_4(value)
            generated_numbers = scipy.stats.triang.rvs(
                    c=c, loc=loc, scale=scale, size=n_runs)

        if uncertainty_score == 5:
            generated_numbers = scipy.stats.uniform.rvs(
                    loc=0, scale=1, size=n_runs)

    return generated_numbers
