% This script classifies data using bagged or random forest classification trees
% The classifiers are taught using one data set, the others are classified using the resulting trees

%%% Inputs %%%
% feature_data_cell         : Contains the feature data of the pixels to be classified, and is a cell that contains the feature data of each year in each entry
% train_feature_data_cell   : The feature data of the pixels used to train the classifier
% ground_truth_data         : The land cover types of the pixels used to train the classifier
% SaveTestData              : [Yes/No], signifies whether the test data should be saved (valid sections) or not (invalid setions) to avoid unecessary overhead

% Other inputs are explained in Annual_Classifier_P.m

%%% Outputs %%%
% Classified_Test_Data         : Contains the resulting land cover types, generated by the test data for accuracy assessment
% Ground_truth_data_test       : The ground truth test data
% Classified_Land_Cover_cell   : The resulting land cover classification, in the shape of a cell with the results of each year in each entry

function [Classified_Test_Data, Ground_truth_data_test, Classified_Land_Cover_cell] = Decision_Tree_Classifier(Sampling_Technique, Ensemble_Technique, SaveTestData, number_years, feature_data_cell, train_feature_data_cell, ground_truth_data, number_class_samples, number_features, Number_Trees, Number_Splits, k, Alpha, Beta, number_data_partitions)
    %%% Define feature and ground truth data used in training the classifier and testing its accuracy %%%
        % Matrix containing the feature data used to train and test the classifier
        number_training_samples = numel(train_feature_data_cell{1});
                
        Feature_data_matrix = zeros(number_training_samples, number_features);
        
        for f = 1:number_features
            Feature_data_matrix(:, f) = train_feature_data_cell{f}(:);
        end

        % Matrix containing the ground truth data used to train and test the classifier
        Ground_truth_data_list = ground_truth_data(:);
        
        % Free memory
        clear train_feature_data_cell & ground_truth_data

        % Remove missing pixels from the training data
        missing_ind = find(Ground_truth_data_list == 999);
        
        Ground_truth_data_list(missing_ind) = [];
        Feature_data_matrix(missing_ind, :) = [];
        
        number_training_samples = length(Ground_truth_data_list);
        
        % The last partition forms the test data, other partitions are used for training
        data_partitions = cvpartition(number_training_samples, 'Kfold', number_data_partitions);       

        if strcmp(SaveTestData, 'Yes')
            index_test = data_partitions.test(number_data_partitions);      
            Feature_data_matrix_test = Feature_data_matrix(index_test, :);
            Ground_truth_data_test = Ground_truth_data_list(index_test);

            number_test_samples = length(Ground_truth_data_test);
            
        elseif strcmp(SaveTestData, 'No')
            index_test = [];
            Ground_truth_data_test = [];
        end
        
        % The training data
        Feature_data_matrix_training = Feature_data_matrix;
        Feature_data_matrix_training(index_test, :) = [];
        Ground_truth_data_training = Ground_truth_data_list;
        Ground_truth_data_training(index_test, :) = [];
        
        number_training_samples = length(Ground_truth_data_training);
        
    %%% Define feature data of the pixels that are to be classified %%%        
        Feature_data_classification_cell = cell(1, number_years);
        
        for y = 1:number_years        
            % Feature data of this year
            Feature_data_year = feature_data_cell(:, y);
            
            % Matrix format
            Feature_data_year_matrix = zeros(number_class_samples, number_features);

            for f = 1:number_features
                Feature_data_year_matrix(:, f) = Feature_data_year{f}(:);
            end
            
            Feature_data_classification_cell{y} = Feature_data_year_matrix;
        end
        
    %%% Decision tree classification %%%       
        % The test data is saved, if desired
        if strcmp(SaveTestData, 'Yes')
            Classified_Test_Data_Trees_matrix = zeros(number_test_samples, Number_Trees);
        end
    
        % The classified data for each year
        Classified_Years_Trees_cell = cell(1, number_years);

        for y = 1:number_years
            Classified_Years_Trees_cell{y} = zeros(number_class_samples, Number_Trees);
        end

        %%% Forest (ensemble) of trees %%%
        
        for t = 1 : Number_Trees
            % Random data is selected to grow (train) this tree
            data_partitions_tree = cvpartition(number_training_samples, 'KFold', number_data_partitions - 1);       % One partition was already reserved
            tree_training_indices = data_partitions_tree.test(1);
        
            Feature_data_train_tree = Feature_data_matrix_training(tree_training_indices, :);
            Ground_truth_data_train_tree = Ground_truth_data_training(tree_training_indices, :);
                        
            % If a random forest is created, features are selected randomly
            if strcmp(Ensemble_Technique, 'RF')
                number_features_RF = sqrt(number_features);     % The very common square root of the number of features is used
                number_features_RF = round(number_features_RF);
                
                % Select the features randomly
                features_RF = datasample(1 : number_features, number_features_RF, 'Replace', false);
                
                % The training data of these randomly selected features
                Feature_data_train_tree = Feature_data_train_tree(:, features_RF);
            end

         %--% Rebalance the classes through oversampling %--%
            % Determine number of members in each class
            class_values = unique(Ground_truth_data_train_tree);
            number_classes = length(class_values);
            
            number_samples_list = zeros(1, number_classes);
            class_samples_cell = cell(1, number_classes);

            for c = 1:number_classes
                class_samples = find(Ground_truth_data_train_tree == class_values(c));
                
                number_samples_list(c) = length(class_samples);
                class_samples_cell{c} = class_samples;
            end
         
            % Standard oversampling
            if strcmp(Sampling_Technique, 'Oversampling')
                [Ground_truth_data_train_tree_total, Feature_data_train_tree_total, delta_number_samples_list] = Oversampler(Ground_truth_data_train_tree, class_values, Feature_data_train_tree, Beta, number_classes, number_samples_list, class_samples_cell);
            
            % Borderline oversampling
            elseif strcmp(Sampling_Technique, 'B-Oversampling')
                [Ground_truth_data_train_tree_total, Feature_data_train_tree_total, delta_number_samples_list] = Borderline_Oversampler(Ground_truth_data_train_tree, class_values, Feature_data_train_tree, k, Beta, number_classes, number_samples_list, class_samples_cell, number_features);
                
            % SMOTE sampling
            elseif strcmp(Sampling_Technique, 'SMOTE')
                [Ground_truth_data_train_tree_total, Feature_data_train_tree_total, delta_number_samples_list] = SMOTE(Ground_truth_data_train_tree, class_values, Feature_data_train_tree, k, Alpha, Beta, 'No', number_classes, number_samples_list, class_samples_cell, number_features);
            
            % Borderline-SMOTE sampling
            elseif strcmp(Sampling_Technique, 'B-SMOTE')
                [Ground_truth_data_train_tree_total, Feature_data_train_tree_total, delta_number_samples_list] = SMOTE(Ground_truth_data_train_tree, class_values, Feature_data_train_tree, k, Alpha, Beta, 'Yes', number_classes, number_samples_list, class_samples_cell, number_features);
            end
            
         %--% Correct the probability of each class %--%
            prior_probability_list = ones(1, number_classes);

            % Creating a prior probability list is only sensible if more than one class is present
            if number_classes > 1
                for c = 1:number_classes
                    % The number of samples for this class (seen as minority)
                    number_samples_class = number_samples_list(c);
                    delta_number_samples_class = delta_number_samples_list(c);
                    number_samples_class_total = number_samples_class + delta_number_samples_class;
                    
                    % The number of pixels in the other classes (seen as majority)
                    number_pixels_rest = sum(number_samples_list) + sum(delta_number_samples_list) - number_samples_class_total;
                    
                    % The degree of oversampling/undersampling
                    degree_sampling = (number_samples_class_total / number_pixels_rest) / (number_samples_class / (sum(number_samples_list) - number_samples_class));

                    % Frequency based probability estimate
                    prior_probability_list(c) = number_samples_class_total / (number_samples_class_total + degree_sampling * number_pixels_rest);
                end
            end

         %--% Create the classification tree %--% 
            Classification_Tree = fitctree(Feature_data_train_tree_total, Ground_truth_data_train_tree_total, 'MinParentSize', 1, 'MaxNumSplits', Number_Splits, 'Prior', prior_probability_list, 'SplitCriterion',  'deviance', 'prune', 'off');                 

         %--% Gather classification data %--%
            % Test data
            if strcmp(SaveTestData, 'Yes')
                
                if strcmp(Ensemble_Technique, 'Bagging')        % All the feature data can be used
                    Classified_test_data_tree = predict(Classification_Tree, Feature_data_matrix_test);
                    
                elseif strcmp(Ensemble_Technique, 'RF')         % Only the features used to grow the tree can be used
                    Feature_data_test_RF = Feature_data_matrix_test(:, features_RF);
                    Classified_test_data_tree = predict(Classification_Tree, Feature_data_test_RF);
                end

                Classified_Test_Data_Trees_matrix(:, t) = Classified_test_data_tree; 
            end

            % Classify the land cover
            for y = 1:number_years
                % This year's feature data
                Feature_data_classification_year = Feature_data_classification_cell{y};

                if strcmp(Ensemble_Technique, 'Bagging')
                    classified_pixels = predict(Classification_Tree, Feature_data_classification_year);
                    
                elseif strcmp(Ensemble_Technique, 'RF')
                    Feature_data_classification_year_RF = Feature_data_classification_year(:, features_RF);     % With a random forest, only part of the feature data is used
                    classified_pixels = predict(Classification_Tree, Feature_data_classification_year_RF);
                end

                Classified_Years_Trees_cell{y}(:, t) = classified_pixels;
            end
        end

     %--% Majority voting, bagging %--%
        % Test data
        if strcmp(SaveTestData, 'Yes')
            Classified_Test_Data = zeros(1, number_test_samples);

            for i = 1:number_test_samples
                Classified_Test_Data(i) = mode(Classified_Test_Data_Trees_matrix(i, :));
            end
        elseif strcmp(SaveTestData, 'No')
            Classified_Test_Data = [];
        end

        % Pixels to be classified
        Classified_Land_Cover_cell = cell(1, number_years);
        
        for y = 1:number_years
            Classified_Land_Cover_cell{y} = zeros(1, number_class_samples);
        end
        
        for y = 1:number_years                
            for j = 1:number_class_samples
                Classified_Land_Cover_cell{y}(j) = mode(Classified_Years_Trees_cell{y}(j, :));
            end
        end
end





