% This script will determine the inter-class separability using the Jeffries-Matisuta distance and the intra-class variability using the average Euclidean distance and its STD
% The file 'grouped_classes.txt' is required by Corine_Class_Data.m

% This file is suited for parallel computing
function Class_Separability_Variability_P(Data_choices, Data_source, LC_data, number_classes, class_values, class_names, NDVI_TS_cell, NDVI_Trend_cell, NDVI_Season_cell, LAI_TS_cell, FAPAR_TS_cell, Fcover_TS_cell, Spectral_Fourier_Coefficients_cell, Fourier_series_length, Landsat_last_time, number_years, rows_data, columns_data, MODIS_OoB, Resolution_downscaling, coarsening_factor)
    %%% Inputs %%%
        % Size of each section, can be similar to those used by the classifier
        Section_height      = 60;      % Rows of each section that will be classified
        Section_width       = 60;      % Columns of each section that will be classified
        
        ED_coarsening = 05;    % Reduces the resolution of the data when the average Euclidean distance is calculated, as it is computationally demanding

        % Whether or not textural featurse are computed as well
        Textural_features   = 'Yes';    % [Yes/No]
        
    %%% The data that is used to train the classifier %%%
        NDVI_Data = Data_choices{1};
        NDVI_Component_Data = Data_choices{2};
        LAI_Data = Data_choices{3};
        FAPAR_Data = Data_choices{4};
        Fcover_Data = Data_choices{5};
        
    %%% Determine the annual feature data %%%
        % The number of annual time stamps
        if strcmp(Data_source, 'Copernicus')
            n_p = 36;
        elseif strcmp(Data_source, 'MODIS')
            n_p = 23;
        elseif strcmp(Data_source, 'Landsat')
            n_p = 365;
        end

        % Yearly time series        
        feature_data_years_cell = cell(1, number_years);

        for y = 1:number_years
            t_start = (y - 1) * n_p + 1;
            t_end = y * n_p;      % In case the last year is incomplete

            if strcmp(NDVI_Data, 'Yes')
                NDVI_TS_years_cell = cell(rows_data, columns_data);
            end
            if strcmp(NDVI_Component_Data, 'Yes')
                NDVI_Trend_years_cell = cell(rows_data, columns_data);
                NDVI_Season_years_cell = cell(rows_data, columns_data);
            end
            if strcmp(LAI_Data, 'Yes')
                LAI_TS_years_cell = cell(rows_data, columns_data);
            end
            if strcmp(FAPAR_Data, 'Yes')
                FAPAR_TS_years_cell = cell(rows_data, columns_data);
            end
            if strcmp(Fcover_Data, 'Yes')
                Fcover_TS_years_cell = cell(rows_data, columns_data);
            end

            for r = 1:rows_data
                for c = 1:columns_data
                    if strcmp(NDVI_Data, 'Yes')
                        NDVI_TS_years_cell{r, c} = NDVI_TS_cell{r, c}(t_start : t_end);
                    end                
                    if strcmp(NDVI_Component_Data, 'Yes')
                        NDVI_Trend_years_cell{r, c} = NDVI_Trend_cell{r, c}(t_start : t_end);
                        NDVI_Season_years_cell{r, c} = NDVI_Season_cell{r, c}(t_start : t_end);
                    end
                    if strcmp(LAI_Data, 'Yes')
                        LAI_TS_years_cell{r, c} = LAI_TS_cell{r, c}(t_start : t_end);
                    end
                    if strcmp(FAPAR_Data, 'Yes')
                        FAPAR_TS_years_cell{r, c} = FAPAR_TS_cell{r, c}(t_start : t_end);
                    end
                    if strcmp(Fcover_Data, 'Yes')
                        Fcover_TS_years_cell{r, c} = Fcover_TS_cell{r, c}(t_start : t_end);
                    end
                end
            end

            if strcmp(NDVI_Data, 'Yes')
                % Feature data
                feature_names = {'mean', 'max', 'min', 't max', 'amp', 'SoS', 'EoS', 'LoS', 'RoG', 'RoS', 'GPP'}; 
                [feature_data, number_features] = NDVI_metrics_P(NDVI_TS_years_cell, rows_data, columns_data, n_p);
            end
            if strcmp(NDVI_Component_Data, 'Yes')
                % Feature data            
                feature_names = {'mean', 'max', 'min', 't max', 'amp', 'RoC', 'SoS', 'EoS', 'LoS', 'RoG', 'RoS', 'GPP'}; 
                [feature_data, number_features] = NDVI_component_metrics_P(NDVI_Trend_years_cell, NDVI_Season_years_cell, rows_data, columns_data, n_p); 
            end
            if strcmp(LAI_Data, 'Yes')
                % Feature data
                feature_names = {'mean', 'max', 'min', 't max', 'amp', 'LE', 'LS', 'RoG', 'CUP', 'RoS', 'CU'}; 
                [feature_data, number_features] = LAI_metrics_P(LAI_TS_years_cell, rows_data, columns_data, n_p);
            end
            if strcmp(FAPAR_Data, 'Yes')
                % Feature data
                feature_names = {'mean', 'max', 'min', 't max', 'amp', 'SoG', 'EoG', 'RoG', 'LoG', 'RoS', 'CP'}; 
                [feature_data, number_features] = FAPAR_metrics_P(FAPAR_TS_years_cell, rows_data, columns_data, n_p);
            end
            if strcmp(Fcover_Data, 'Yes')
                % Feature data
                feature_names = {'mean', 'max', 'min', 't max', 'amp', 'SoG', 'EoG', 'RoG', 'LoG', 'RoS', 'CP'}; 
                [feature_data, number_features] = Fcover_metrics_P(Fcover_TS_years_cell, rows_data, columns_data, n_p);
            end
            if strcmp(Data_source, 'Landsat')
                % Feature data
                [feature_data, number_features, feature_names] = Spectral_metrics_P(Spectral_Fourier_Coefficients_cell, Fourier_series_length, Landsat_last_time, y);
            end

            if strcmp(Textural_features, 'Yes')
                % The first feature matrix is used
                data_matrix = feature_data{1};
                [textural_feature_data, number_textural_features] = Textural_Features(data_matrix, rows_data, columns_data);
                textural_feature_names = {'Contrast', 'Correlation', 'ASM', 'Homogeneity'};

                feature_data = [feature_data, textural_feature_data];
                feature_names = [feature_names, textural_feature_names];
                number_features = number_features + number_textural_features;
            end

            feature_data_years_cell{y} = feature_data;
        end

        % Free memory
        clear NDVI_TS_cell & NDVI_Trend_cell & NDVI_Season_cell & LAI_TS_cell & FAPAR_TS_cell & Fcover_TS_cell & Spectral_Fourier_Coefficients_cell

    %%% MODIS Coordinates %%%
        % These coordinates can be used to determine which indices are out of bounds
        
        if strcmp(MODIS_OoB, 'Yes')
            % Additionally, indices outside the bounds are also ignored
            MODIS_Coordinates_file = load('MODIS_Coordinates.mat');
            latitude_list = MODIS_Coordinates_file.latitude_list;
            longitude_matrix = MODIS_Coordinates_file.longitude_matrix;

            if strcmp(Resolution_downscaling, 'On')
                latitude_list = latitude_list(1 : coarsening_factor : end);
                longitude_matrix = longitude_matrix(1 : coarsening_factor : end, 1 : coarsening_factor : end);
            end

            lat_N = 71.5;
            lat_S = 34.5;
            long_W = -11;
            long_E = 32;

            matrix_OoB = zeros(rows_data, columns_data);

            % Latitude
            Latitude_OoB = latitude_list > lat_N | latitude_list < lat_S;
            matrix_OoB(Latitude_OoB) = 1;

            % Longitude
            Longitude_OoB = longitude_matrix > long_E | longitude_matrix < long_W;
            matrix_OoB(Longitude_OoB) = 1;

            ind_OoB = find(matrix_OoB == 1);
            
            % Free memory
            clear matrix_OoB & longitude_matrix & latitude_list & MODIS_Coordinates_file
            
        elseif strcmp(MODIS_OoB, 'No')
            ind_OoB = [];
        end
        
    %%% Define section data %%%
        number_sections_x = ceil(columns_data / Section_width);
        number_sections_y = ceil(rows_data / Section_height);
        number_sections = number_sections_x * number_sections_y;
        
        Class_Data_sections = cell(1, number_sections);
        Feature_Data_sections = cell(number_sections, number_years);
        
        for y = 1:number_years
            feature_data_year = feature_data_years_cell{y};
            
            for s = 1:number_sections
                % Define the section
                [s_y, s_x] = ind2sub([number_sections_y, number_sections_x], s);

                col_West = (s_x - 1) * Section_width + 1;
                col_East = s_x * Section_width;
                row_North = (s_y - 1) * Section_height + 1;
                row_South = s_y * Section_height;

                % Ensures that when the data is not fully divisible by the number of chunks, the final lists have the remaining members
                if s_x == number_sections_x
                    col_East = columns_data;
                end
                if s_y == number_sections_y
                    row_South = rows_data;
                end
                
                % Convert to indices
                [columns, rows] = meshgrid(col_West : col_East, row_North : row_South);
                indices_section = sub2ind([rows_data, columns_data], rows, columns);

                % Remove indices that are out of bounds
                indices_section = setdiff(indices_section, ind_OoB);

                % Class data
                if y == 1
                    Class_Data = LC_data(indices_section);
                    Class_Data_sections{s} = Class_Data;
                end

                % Feature data
                Feature_Data_sections{s, y} = cell(1, number_features);

                for f = 1:number_features
                    feature_data = feature_data_year{f}(indices_section);
                    Feature_Data_sections{s, y}{f} = feature_data;
                end
            end
        end
        
        % Free memory
        clear feature_data_years_cell
        
    %%% Class separability and variability %%%
        Euclidean_distance_sections_years = cell(number_sections, number_years);
        ED_STD_sections_years = cell(number_sections, number_years);
    
        JM_matrix_sections_years = cell(number_sections, number_years);
        feature_matrix_sections_years = cell(number_sections, number_years);
        
        % Embedded parallel computing loop
        DQ = parallel.pool.DataQueue;   % Progress bar
        tick = 0;
        N = number_sections * number_years;
        afterEach(DQ, @ProgressUpdate);
        
        for y = 1:number_years
            Feature_Data_cell = Feature_Data_sections(:, y);
            
            parfor s = 1:number_sections            
                % Section data 
                class_data = Class_Data_sections{s};
                feature_data = Feature_Data_cell{s};

                %%% Determine the average Euclidean distance and its STD within each class %%% 
                Euclidean_distance_classes = zeros(1, number_classes);
                ED_STD_classes = zeros(1, number_classes);

                for c = 1:number_classes       
                    % Find the class indices and reduce it to save on computational cost
                    class_value = class_values(c);

                    number_samples = length(find(class_data == class_value));
                    number_samples_reduced = floor(number_samples/ED_coarsening);

                    if number_samples_reduced <= 2  % The code progresses if there are too few class samples
                        Euclidean_distance_classes(c) = 0;
                        ED_STD_classes(c) = 0;
                        
                        continue
                    end

                    class_indices = find(class_data == class_value);
                    class_indices = datasample(class_indices, number_samples_reduced, 'replace', false);

                    distance_matrix = zeros(number_samples_reduced);   

                    % Pair-wise distances for each feature
                    for f = 1:number_features
                        for i = 1:number_samples_reduced
                            for j = i + 1:number_samples_reduced
                                ind_i = class_indices(i);
                                ind_j = class_indices(j);

                                distance = (feature_data{f}(ind_i) - feature_data{f}(ind_j))^2;
                                distance_matrix(i, j) = distance_matrix(i, j) + distance;
                                distance_matrix(j, i) = distance_matrix(j, i) + distance;
                            end
                        end
                    end

                    distance_matrix = (distance_matrix).^(1/2);

                    % Append mean Euclidean distance and its STD, normalised with the number of features
                    Euclidean_distance_classes(c) = mean(mean(distance_matrix)) / number_features;
                    ED_STD_classes(c) = mean(std(distance_matrix)) / number_features;

                    if isnan(Euclidean_distance_classes(c))
                        Euclidean_distance_classes(c) = 0;
                    end

                    if isnan(ED_STD_classes(c))
                        ED_STD_classes(c) = 0;
                    end
                end    

                Euclidean_distance_sections_years{s, y} = Euclidean_distance_classes;
                ED_STD_sections_years{s, y} = ED_STD_classes;

                %%% Determine the max. JM distances of the classes and the feature for which they occur %%%
                JM_matrix = zeros(number_classes);
                Feature_matrix = zeros(number_classes);

                for i = 1:number_classes
                    for j = i + 1:number_classes
                        % Indices belonging to classes i and j
                        class_indices_i = class_data == class_values(i);
                        class_indices_j = class_data == class_values(j);

                        distance_list = zeros(1, number_features);

                        for f = 1:number_features
                            % Find the metrics values for class i and j
                            class_i_metrics = feature_data{f}(class_indices_i);
                            class_j_metrics = feature_data{f}(class_indices_j);

                            mean_i = mean(class_i_metrics);
                            mean_j = mean(class_j_metrics);

                            i_cov = cov(class_i_metrics);
                            j_cov = cov(class_j_metrics);

                            % If there are no samples or only 1 unique sample, determining the distance fails
                            if i_cov == 0 | j_cov == 0 | isnan(i_cov) | isnan(j_cov)
                                JM = 0;
                                distance_list(f) = JM;
                                continue
                            end

                            % B is the Bhattacharyya distance, JM is Jeffries-Matusita which bounds it between 0 and 2
                            B = 1/8 * (mean_i - mean_j)'.*(0.5*(i_cov + j_cov))^(-1).*(mean_i - mean_j) + 0.5*log(abs(0.5*(i_cov + j_cov)) / (sqrt(abs(i_cov) * abs(j_cov))));
                            JM = 2*(1 - exp(-B));                               
                            distance_list(f) = JM;
                        end

                        % The maximum distance is appended
                        JM_matrix(i, j) = max(distance_list);
                        JM_matrix(j, i) = max(distance_list);

                        % As well as the feature for which it occurs
                        feature_index = find(distance_list == max(distance_list));
                        feature_index = feature_index(1);

                        Feature_matrix(i, j) = feature_index;
                        Feature_matrix(j, i) = feature_index;
                    end
                end

                JM_matrix_sections_years{s, y} = JM_matrix;
                feature_matrix_sections_years{s, y} = Feature_matrix;

                % Update the progress bar
                send(DQ, s);
            end
        end

    %%% Average the results %%%
        Euclidean_distance_sections = cell(1, number_sections);
        ED_STD_sections = cell(1, number_sections);
    
        JM_matrix_sections = cell(1, number_sections);
        feature_matrix_sections = cell(1, number_sections);
        
        for s = 1:number_sections
            ED = zeros(number_years, number_classes);
            ED_STD = zeros(number_years, number_classes);
            
            JM = zeros(number_classes, number_classes, number_years);
            feature = zeros(number_classes, number_classes, number_years);
            
            for y = 1:number_years
                ED(y, :) = Euclidean_distance_sections_years{s, y};
                ED_STD(y, :) = ED_STD_sections_years{s, y};
                
                JM(:, :, y) = JM_matrix_sections_years{s, y};
                feature(:, :, y) = feature_matrix_sections_years{s, y};
            end
            
            Euclidean_distance_sections{s} = mean(ED, 1);
            ED_STD_sections{s} = mean(ED_STD, 1);

            JM_matrix_sections{s} = mean(JM, 3);
            feature_matrix_sections{s} = mode(feature, 3);
        end
        
        
    %%% Weigh the results %%%    
        JM_matrix = zeros(number_classes);
        Feature_matrix = cell(number_classes);
        
        Euclidean_distance_classes = zeros(1, number_classes);
        ED_STD_classes = zeros(1, number_classes);
        
        for c = 1:number_classes
            class_value = class_values(c);
            number_class_samples = length(find(LC_data == class_value));
            
            ED_list = zeros(1, number_sections);
            ED_STD_list = zeros(1, number_sections);
            
            JM_list = zeros(number_classes, number_sections);
            feature_list = zeros(number_classes, number_sections);
            
            number_samples_total_list = zeros(1, number_classes);

            for s = 1:number_sections
                % The Euclidean distance and its STD are weighted using the number of samples in the section
                class_data = Class_Data_sections{s};
                number_samples_c = length(find(class_data == class_value));
                
                ED_list(s) = Euclidean_distance_sections{s}(c) * number_samples_c;
                ED_STD_list(s) = ED_STD_sections{s}(c) * number_samples_c;                
                
                % The Jeffries-Matisuta distance is weighted using the number fo samples in the section                
                for c2 = 1:number_classes
                    if c2 == c
                        continue
                    end
                    
                    class_value_c2 = class_values(c2);
                    number_samples_c2 = length(find(class_data == class_value_c2));
                    
                    % Append the normalised JM distance and the feature data
                    JM_list(c2, s) = JM_matrix_sections{s}(c, c2) * number_samples_c * number_samples_c2;  
                    feature_list(c2, s) = feature_matrix_sections{s}(c, c2);                    
                    
                    number_samples_total_list(c2) = number_samples_total_list(c2) + number_samples_c * number_samples_c2;
                end
            end
            
            % The results are appended if the class has samples in it       
            if number_class_samples ~= 0
                Euclidean_distance_classes(c) = sum(ED_list) / number_class_samples;   
                ED_STD_classes(c) = sum(ED_STD_list) / number_class_samples;     
            else
                Euclidean_distance_classes(c) = 0;
                ED_STD_classes(c) = 0;
            end
            
            for c2 = 1:number_classes                    
                if c2 == c | number_samples_total_list(c2) == 0     % In case the classes are the same, or it does not exist
                    JM = 0;
                    Feature = '-';
                else
                    JM = sum(JM_list(c2, :)) / number_samples_total_list(c2);               
                    feature_index = mode(feature_list(c2, :));                           
                    Feature = feature_names{feature_index};
                end
                
                JM_matrix(c, c2) = JM;
                JM_matrix(c2, c) = JM;
                Feature_matrix{c, c2} = Feature;
                Feature_matrix{c2, c} = Feature;
            end
        end
    
    %%% Create the tables %%%        
        disp('------------------------------------');
        disp('Maximum Jeffries-Matusita distances');
        
        JM_table = table(class_names', 'VariableNames', {'Classes'});
        
        for c = 1:number_classes
            table_column = table(JM_matrix(:, c), 'VariableNames', class_names(c));
            
            JM_table = [JM_table, table_column];
        end
        
        disp(JM_table);
        
        disp('------------------------------------');
        disp('Feature for which the JM distance is maximum');
        
        Feature_table = table(class_names', 'VariableNames', {'Classes'});
        
        for c = 1:number_classes
            table_column = table(Feature_matrix(:, c), 'VariableNames', class_names(c));
            
            Feature_table = [Feature_table, table_column];
        end
        
        disp(Feature_table);
        
        disp('------------------------------------');
        disp('Euclidean distance and its standard deviation within each class');
        
        ED_table = table({'Mean E.D.', 'STD E.D.'}', 'VariableNames', {'Classes'});
        
        for c = 1:number_classes
            table_column = table([Euclidean_distance_classes(c); ED_STD_classes(c)], 'VariableNames', class_names(c));
            
            ED_table = [ED_table, table_column];
        end
        
        disp(ED_table);
        disp('------------------------------------');
        
        % The tables are saved
        writetable(ED_table, 'Euclidean_Distances.xls');
        save('Euclidean_Distances.mat', 'ED_table')
        
        writetable(JM_table, 'Jeffries_Matusita_Distances.xls');
        save('Jeffries_Matusita_Distances.mat', 'JM_table')
        
        writetable(Feature_table, 'Features.xls');
        save('Features.mat', 'Feature_table')
        
        disp('The tables have been saved successfully')
        
    % Progress function
    function ProgressUpdate(~)
        tick = tick + 1;
        
        % Ensures that at most every percent is printed
        progress_last = (tick - 1) / N * 100;
        progress = tick / N * 100;
        
        if floor(progress) - floor(progress_last) >= 1
            fprintf('   Progress in determining class separability and variability: %g%% \n', round(progress));
        end
    end
end