% This function determines which sections are invalid for their own classification
% Indices may be moved to make sections suitable
% Additionally, the indices of the invalid clusters as well as their reference indices which are used for training are specified

function [valid_sections, invalid_sections, sea_sections, Indices_sections_updated, Number_invalid_clusters, Indices_invalid_clusters, Indices_invalid_clusters_training] = Invalid_Regions(land_cover_threshold, max_cluster_size, rows_data, columns_data, Indices_sections, Water_indices, Missing_indices, Number_sections, Section_size)
    
    %%% Ensure horizontal shape of the input data %%%
        for s = 1:Number_sections
            Indices_sections{s} = Indices_sections{s}(:)';
        end
            
        Indices_sections_updated = Indices_sections;    % Will contain the updated indices

    %%% Determine whether a section contains too many missing or sea indices %%%
        invalid_sections_list = zeros(1, Number_sections);                          % 1 if the section cannot classify itself
        sea_sections = zeros(1, Number_sections);                                   % 1 if the section contains only sea pixels

        Valid_indices = cell(1, Number_sections);                                   % The valid land indices that have to be moved
        
        for s = 1:Number_sections
            % Indices within this section
            ind = Indices_sections{s};
            number_indices = length(ind);

            % Water indices within this section
            water_ind = intersect(Water_indices, ind);

            % Missing indices within this section
            missing_ind = intersect(Missing_indices, ind);

            % The remaining land indices that can be used for classification
            land_ind = setdiff(ind, [missing_ind, water_ind]);
            percent_land = length(land_ind) / number_indices * 100;
            
            if percent_land < land_cover_threshold
                % Save valid land indices to be moved to another section
                Valid_indices{s} = land_ind;
                                
                if isempty(missing_ind)                     % The only invalid indices are the water indices, and the section can be classified
                    Indices_sections_updated{s} = water_ind;
                    sea_sections(s) = 1;                    % The section is assume to be sea
                    
                else                                        % There are indices for which land cover data is not available
                    Indices_sections_updated{s} = [water_ind, missing_ind];
                    invalid_sections_list(s) = 1;          % The section contains too few usable land pixels and is invalid for classification
                end
            
            elseif number_indices == 0  % If the section is empty, it is also considered invalid
                invalid_sections_list(s) = 1;
            end
        end

        valid_sections = find(invalid_sections_list == 0);
        number_valid_sections = length(valid_sections);
        invalid_sections = find(invalid_sections_list == 1);
        number_invalid_sections = length(invalid_sections);
        
        valid_sections = reshape(valid_sections, [1, number_valid_sections]);          % Ensures horizontal vector shape
        invalid_sections = reshape(invalid_sections, [1, number_invalid_sections]);

        % Sections that are valid and contain land pixels
        valid_land_sections = setdiff(valid_sections, find(sea_sections == 1));
        number_valid_land_sections = length(valid_land_sections);
        
    %%% Create a valid section, if none exist %%%
        valid_indices_total = horzcat(Valid_indices{:});
        
        if number_valid_land_sections == 0 & number_invalid_sections > 1 & ~isempty(valid_indices_total)
            section = invalid_sections(1);    % The first invalid section is appropriated
            section_2 = invalid_sections(2);
                        
            % Invalid indices are placed in the second section
            invalid_ind = [Indices_sections_updated{section}, Indices_sections_updated{section_2}];
            Indices_sections_updated{section_2} = invalid_ind;
            
            % Valid indices are placed in the first section
            Indices_sections_updated{section} = valid_indices_total;
            
            % Change the markers
            number_valid_land_sections = 1;
            valid_land_sections = section;
            valid_sections = [valid_sections, section];
            
            invalid_sections(1) = [];
            
            number_invalid_sections = number_invalid_sections - 1;
        end
              
    %%% Determine the 'centre of gravity' of each of the valid land sections %%%
        cg_valid_sections_y = NaN(1, number_valid_land_sections);          % NaN s.t. invalid sections are never selected
        cg_valid_sections_x = NaN(1, number_valid_land_sections);
    
        for s = 1:number_valid_land_sections
            valid_section = valid_land_sections(s);
            ind = Indices_sections{valid_section};

            [rows, columns] = ind2sub([rows_data, columns_data], ind);

            cg_valid_sections_y(s) = round(mean(rows));
            cg_valid_sections_x(s) = round(mean(columns));
        end
        
    %%% Determine which valid section should contain the valid indices %%%
        for s = 1 : Number_sections        
            valid_ind = Valid_indices{s};

            if isempty(valid_ind)
                continue
            end

            % 'Centre of gravity' of these valid indices
            [rows, columns] = ind2sub([rows_data, columns_data], valid_ind);

            cg_y = round(mean(rows));
            cg_x = round(mean(columns));

            % Determine the closest valid section
            diff_sections = sqrt((cg_valid_sections_y - cg_y).^2 + (cg_valid_sections_x - cg_x).^2);
            [~, diff_order] = sort(diff_sections);

            % Check that there is no intersect between the indices, as otherwise a level of iteration is lost
            intersection = true;
            i = 0;
            while intersection == true
                i = i + 1;

                d = diff_order(i);
                section = valid_land_sections(d);
                indices_section = Indices_sections_updated{section};  

                if isempty(intersect(valid_ind, indices_section))
                    intersection = false;

                    % The new indices are appended to this section
                    indices_section_updated = [indices_section, valid_ind];
                    Indices_sections_updated{section} = indices_section_updated; 
                    
                    % Compute the new centre of gravity
                    [rows, columns] = ind2sub([rows_data, columns_data], indices_section_updated);

                    cg_valid_sections_y(section) = round(mean(rows));
                    cg_valid_sections_x(section) = round(mean(columns));
                end

                % If no section could be found without intersection, the non-overlapping indices are appended to the first section
                if i == number_valid_land_sections
                    intersection = false;

                    d = diff_order(1);
                    section = valid_land_sections(d);
                    indices_section = Indices_sections_updated{section};

                    valid_ind = setdiff(valid_ind, indices_section);

                    % The new indices are appended to this section
                    indices_section_updated = [indices_section; valid_ind];
                    Indices_sections_updated{section} = indices_section_updated; 
                    
                    % Compute the new centre of gravity
                    [rows, columns] = ind2sub([rows_data, columns_data], indices_section_updated);

                    cg_valid_sections_y(section) = round(mean(rows));
                    cg_valid_sections_x(section) = round(mean(columns));
                end
            end
        end
        
    %%% Create clusters of the invalid pixels %%%     
        % The maximum cluster size in the number of pixels
        maximum_region_size = max_cluster_size * Section_size^2;
    
        % Label matrix
        invalid_pixels = Indices_sections_updated(invalid_sections);
        invalid_pixels = horzcat(invalid_pixels{:});
        
        invalid_pixels_matrix = zeros(rows_data, columns_data);
        invalid_pixels_matrix(invalid_pixels) = 1;
    
        % Detect connected invalid regions
        Connected_Invalid_Regions = bwconncomp(invalid_pixels_matrix, 4);     % Only pixels directly connected via the edges are taken into account
        Invalid_Regions = Connected_Invalid_Regions.PixelIdxList;

        number_invalid_regions = length(Invalid_Regions);
        
        % Clusters are created based on the distance to valid data, as well as the distance to the centroid of each invalid region
        valid_data_distance_matrix = bwdist(~invalid_pixels_matrix);                        % The distance to valid data
        Invalid_Regions_Centroids = regionprops(Connected_Invalid_Regions, 'centroid');     % The centroid of each invalid region

        % Valid indices are not included
        indices_valid_sections = Indices_sections_updated(valid_sections);
        indices_valid_sections = horzcat(indices_valid_sections{:});
        
        % Divide the regions into clusters of the desired size
        Indices_invalid_clusters = cell(1, number_invalid_regions);
        Indices_invalid_clusters_training = cell(1, number_invalid_regions);
        
        for r = 1 : number_invalid_regions
            % This region's indices
            region_indices = Invalid_Regions{r};
            region_indices = setdiff(region_indices, indices_valid_sections);
            
            number_region_pixels = length(region_indices);
            
            % This region's centroid
            centroid = Invalid_Regions_Centroids(r).Centroid;
            centroid_x = centroid(1);
            centroid_y = centroid(2);
            
            % The number of clusters to be generated
            number_clusters = ceil(number_region_pixels / maximum_region_size);

            if number_clusters > 1
                % Distance to the region's centroid
                [rows, columns] = ind2sub([rows_data, columns_data], region_indices);

                distance_x = centroid_x - columns;
                distance_y = centroid_y - rows;

                % Distance to valid data
                valid_data_distance = valid_data_distance_matrix(region_indices);

                % Clusters are generated using the distance to valid data and the distance to the region's centroid
                region_data = [distance_x, distance_y, valid_data_distance];

                cluster_indices_list = kmeans(region_data, number_clusters);        % Note that these indices do not correspond to [rows_data, columns_data]
            else
                cluster_indices_list = 1 : number_region_pixels;
            end
            
            % The indices of the clusters
            cluster_indices_cell = cell(1, number_clusters);
            
            for c = 1 : number_clusters
                cluster_indices = cluster_indices_list == c;
                
                cluster_indices_cell{c} = region_indices(cluster_indices);
            end
            
            % Determine valid training data for each cluster
            cluster_training_indices_cell = cell(1, number_clusters);
            
            for c = 1 : number_clusters
                if number_valid_land_sections > 1       
                    % This cluster's indices
                    cluster_indices = cluster_indices_cell{c};

                    % The centroid of this cluster
                    [rows, columns] = ind2sub([rows_data, columns_data], cluster_indices);

                    cg_x = round(mean(columns));
                    cg_y = round(mean(rows));

                    % The distance to valid sections
                    distance_x = (cg_x - cg_valid_sections_x).^2;
                    distance_y = (cg_y - cg_valid_sections_y).^2;

                    distance_total = sqrt(distance_x + distance_y);

                    % The nearest valid sections are used as training data
                    [~, order] = sort(distance_total, 'ascend');
                    sections = valid_land_sections(order);

                    max_number_sections = min(number_valid_land_sections, max_cluster_size);
                    training_sections = sections(1 : max_number_sections);

                    % The pixels of these sections
                    cluster_training_pixels = Indices_sections_updated(training_sections);
                    cluster_training_pixels = vertcat(cluster_training_pixels{:});
                    cluster_training_pixels = unique(cluster_training_pixels);

                    % Determine the percentage of water pixels in the cluster
                    water_indices_cluster = intersect(Water_indices, cluster_indices);
                    water_percentage_cluster = length(water_indices_cluster) / length(cluster_indices) * 100;
                
                    % Determine the percentage of water pixels for the training data
                    training_water_pixels_cluster = intersect(Water_indices, cluster_training_pixels);
                    training_water_percentage_cluster = length(training_water_pixels_cluster) / length(cluster_training_pixels) * 100;

                    % Pixels are added or removed, to make the percentage of water pixels in the training data equal to that of the cluster
                    water_percentage_difference = water_percentage_cluster - training_water_percentage_cluster;

                    if water_percentage_difference > 0          % There are more water pixels in the invalid cluster
                        number_additional_sea_pixels = round(water_percentage_difference / 100 * length(cluster_training_pixels));

                        % Random valid sea pixels are added, as they are invariant
                        additional_water_pixels = datasample(Water_indices, number_additional_sea_pixels, 'Replace', false);
                        cluster_training_pixels = [cluster_training_pixels, additional_water_pixels];

                    elseif water_percentage_difference < 0      % There are fewer water pixels in the invalid cluster
                        number_removed_sea_pixels = round(-water_percentage_difference / 100 * length(cluster_training_pixels));

                        % Random water pixels are removed from the reference cluster
                        removed_water_pixels = datasample(training_water_pixels_cluster, number_removed_sea_pixels, 'Replace', false);
                        cluster_training_pixels = setdiff(cluster_training_pixels, removed_water_pixels);
                    end

                % If no valid land sections exist, instead the first sea section is used
                else
                    cluster_training_pixels = Indices_sections_updated{sea_sections(1)};
                end
                
                cluster_training_indices_cell{c} = cluster_training_pixels;
            end    
            
            % Append the data for the clusters within this region
            Indices_invalid_clusters{r} = cluster_indices_cell;
            Indices_invalid_clusters_training{r} = cluster_training_indices_cell;
        end
        
        % The aggregated results
        Indices_invalid_clusters = horzcat(Indices_invalid_clusters{:});
        Indices_invalid_clusters_training = horzcat(Indices_invalid_clusters_training{:});
        
        Number_invalid_clusters = length(Indices_invalid_clusters);
end














