% This script merges the pre-processed data, whether it be of mat or geotiff format

% Data_type         = The type of data that is merged, for instance 'NDVI' or 'MOD13Q1'
% Data_source       = [Copernicus, MODIS, Landsat], the source of the data - this is needed to load the correct projection information
% file_format       = [mat / geottiff], denotes the type of the files to be merged (note that .tif files are also accepted)
%                     Note that if mat files are merged, file_date_list and number_files may be empty
% chunk_deletion    = [On / Off], determines whether the files of the individual chunks are removed after merging

% Plot          = [On / Off], whether or not plots are created of the merged data files   
% RGB_codes     = If plots are created of land cover, the colours of each class can be specified here, can be empty otherwise
% class_values  = The identifying numbers of the classes, can be empty if other data is merged

% This file is suited for parallel computing
function Data_Merger_P(Data_type, Data_source, R, Resolution_downscaling, coarsening_factor, number_chunks_x, number_chunks_y, rows_chunks_o, columns_chunks_o, zero_string, file_format, number_files, file_date_list, Plot, RGB_codes, class_values, chunk_deletion)
    % Size of the chunks
    if strcmp(Resolution_downscaling, 'On')
        columns_chunks = zeros(1, number_chunks_x);
        rows_chunks = zeros(1, number_chunks_y);
        
        for c_x = 2 : number_chunks_x + 1 
            % The columns within this chunk
            columns_o = columns_chunks_o(c_x) - columns_chunks_o(c_x - 1);
            columns = ceil(columns_o / coarsening_factor);
            
            columns_chunks(c_x) = columns_chunks(c_x - 1) + columns;
        end
        
        for c_y = 2 : number_chunks_y + 1
            % The rows within this chunk
            rows_o = rows_chunks_o(c_y) - rows_chunks_o(c_y - 1);
            rows = ceil(rows_o / coarsening_factor);
            
            rows_chunks(c_y) = rows_chunks(c_y - 1) + rows;
        end
    else
        columns_chunks = columns_chunks_o;
        rows_chunks = rows_chunks_o;
    end
    
    % The size of the data
    rows_data = rows_chunks(end);
    columns_data = columns_chunks(end);

    %%% If geotiff files are merged, the file of each date is merged individually %%%
    if strcmp(file_format, 'geotiff')        
        % The suffix (SG, INT, etc.) is retrieved
        file_list = dir(sprintf('%s*x*y*.tif*', Data_type));
        file_list = file_list(~contains({file_list.name}, 'pixel'));    % Pixel or doy can throw off this logic
        
        file_name = file_list(1).name;
        
        file_name_parts = strsplit(file_name, '_');
        file_name_end = file_name_parts{end};
        file_name_end_parts = strsplit(file_name_end, '.');
        file_suffix = file_name_end_parts{1};
        file_suffix = ['_', file_suffix];
        
        if contains(file_suffix, 'x')       % If it is simply the chunk string, the suffix is empty
            file_suffix = '';
        end
        
        % Additionally, it is determined whether it is a .tif or .tiff file
        geotiff_type = file_name_end_parts{2};
        geotiff_type = ['.', geotiff_type];
        
        % The data format is determined using the first data sample
        data = geotiffread(file_name);
        data_sample = data(1);

        % The resolution of R is changed, if need be
        if R.RasterSize(1) ~= rows_data | R.RasterSize(2) ~= columns_data
            R.RasterSize = [rows_data, columns_data];
        end
        
        % Aggregate each date's pre-processed data

        % Parallel loop
        DQ = parallel.pool.DataQueue;
        tick = 0;
        N = number_files;
        afterEach(DQ, @ProgressUpdate);

        parfor k = 1:number_files
            date = file_date_list{k};
            date_split = strsplit(date, '.');
            
            if length(date_split) == 3    % The date format is changed from DD.MM.YYYY to YYYYMMDD
                date = [date_split{3}, date_split{2}, date_split{1}];
            end

            % Aggregated data for this date
            if isa(data_sample, 'uint8')
                data_matrix = uint8(zeros(rows_data, columns_data));
            elseif isa(data_sample, 'uint16')
                data_matrix = uint16(zeros(rows_data, columns_data));
            else
                data_matrix = zeros(rows_data, columns_data);
            end

            for c_x = 1:number_chunks_x
                % Bounds of this chunk
                column_start = columns_chunks(c_x) + 1;
                column_end = columns_chunks(c_x + 1);

                for c_y = 1:number_chunks_y
                    row_start = rows_chunks(c_y) + 1;
                    row_end = rows_chunks(c_y + 1);

                    % String to identify this chunk, and the data produced with it
                    s_x = num2str(c_x, zero_string);
                    s_y = num2str(c_y, zero_string);

                    chunk_string = sprintf('x%sy%s', s_x, s_y);

                    % Name of the data file              
                    data_file_name = sprintf('%s_%s_%s%s%s', Data_type, date, chunk_string, file_suffix, geotiff_type);

                    % Data of this chunk
                    data_chunk = geotiffread(data_file_name);
                    
                    % With MODIS classification, a few indices showed 0 
                    if strcmp(Data_type, 'CLC')
                        data_chunk(data_chunk == 0) = 511;
                    end

                    % Append the data
                    data_matrix(row_start : row_end, column_start : column_end) = data_chunk;
                end
            end   
            
            % Write the merged data file
            file_name = sprintf('%s_%s%s', Data_type, date, file_suffix);

            % Remapped or dekad strings are removed
            file_name = erase(file_name, 'DEKAD_');
            file_name = erase(file_name, 'REMAPPED_');

            if strcmp(Data_source, 'Copernicus')
                geotiffwrite(file_name, data_matrix, R);
            elseif strcmp(Data_source, 'MODIS')
                Key_file = load('MODIS_NDVI_Geokey.mat');
                Tag = Key_file.Tag;
                
                geotiffwrite(file_name, data_matrix, R, 'GeoKeyDirectoryTag', Tag);
            elseif strcmp(Data_source, 'Landsat')
                Key_file = load('Landsat_Geokey.mat');
                Tag = Key_file.Tag;
                
                geotiffwrite(file_name, data_matrix, R, 'GeoKeyDirectoryTag', Tag);
            end
            
            % The extra f is added, if need be
            if strcmp(geotiff_type, '.tiff')
                movefile([file_name, '.tif'], [file_name, '.tiff']);
            end

            % Plot the merged data
            if strcmp(Plot, 'On')
                figure(1)
                
                % If land cover is plotted, and the RGB codes are given, the image data is different
                if (strcmp(Data_type, 'SLC') | strcmp(Data_type, 'LC') | strcmp(Data_type, 'CLC')) & ~isempty(RGB_codes)
                    % The image data
                    land_cover_image = double(cat(3, data_matrix, data_matrix, data_matrix));

                    for m = 1:3
                        M = land_cover_image(:, :, m);
                        lst = unique(M);
                        lst = lst(~isnan(lst));

                        for i = 1:length(lst)
                            lst_value = lst(i);
                            ind = class_values == lst_value;

                            M(M == lst_value) = RGB_codes(ind, m);
                        end

                        land_cover_image(:, :, m) = M;
                    end

                    imshow(land_cover_image);

                % Otherwise, a plot is created with a simple gradient colormap
                else
                    imshow(data_matrix)
                    
                    cb = colorbar;
                    caxis manual
                    shading interp
                    caxis([min(min(data_matrix)), max(max(data_matrix))])
                    colortitlehandle = get(cb, 'Title');
                    titlestring = Data_type;
                    set(colortitlehandle, 'String', titlestring);
                    cb.FontSize = 25;
                end
                
                % Set the size and white background color
                set(gcf, 'Units', 'Normalized', 'Position', [0 0 1 1])
                set(gcf, 'color', [1, 1, 1])
                
                figure_name = [file_name, '.png'];
                    
                try
                    export_fig(figure_name)
                catch
                    frame = getframe(1);
                    im = frame2im(frame);
                    [imind, cm] = rgb2ind(im, 256);
                    imwrite(imind, cm, figure_name);
                end

                close(1)   
            end
            
            % Progress update
            send(DQ, k);
        end
    
    %%% If mat files are merged, they have to be merged at once %%%
    elseif strcmp(file_format, 'mat')
        % The suffix (Forecast) is retrieved
        file_list = dir(sprintf('%s*x*y*.mat', Data_type));
        file_name = file_list(1).name;
        
        file_name_parts = strsplit(file_name, '_');
        file_name_end = file_name_parts{end};
        file_name_end_parts = strsplit(file_name_end, '.');
        file_suffix = file_name_end_parts{1};
        file_suffix = ['_', file_suffix];
        
        if contains(file_suffix, 'x')   % If it is simply the chunk string, the suffix is empty
            file_suffix = '';
        end
        
        % The NDVI, LST, GPM data is assumed to be trend and seasonal components
        if strcmp(Data_type, 'NDVI') | strcmp(Data_type, 'MOD13Q1') | strcmp(Data_type, 'LST') | strcmp(Data_type, 'GPM')
            % To reduce the memory cost, the trend and seasonal component are merged separately
            Component_types_cell = {'Season', 'Trend'};
            number_components = length(Component_types_cell);

            for c = 1 : number_components
                Component_type = Component_types_cell{c};

                if strcmp(Component_type, 'Season')
                    Season_cell = cell(rows_data, columns_data);
                    S_UB_matrix = zeros(rows_data, columns_data);
                    S_LB_matrix = zeros(rows_data, columns_data);
                elseif strcmp(Component_type, 'Trend')
                    Trend_cell = cell(rows_data, columns_data);
                    T_UB_matrix = zeros(rows_data, columns_data);
                    T_LB_matrix = zeros(rows_data, columns_data);
                end

                for c_x = 1:number_chunks_x
                    % Bounds of this chunk
                    column_start = columns_chunks(c_x) + 1;
                    column_end = columns_chunks(c_x + 1);

                    for c_y = 1:number_chunks_y
                        row_start = rows_chunks(c_y) + 1;
                        row_end = rows_chunks(c_y + 1);

                        % String to identify this chunk, and the data produced with it
                        s_x = num2str(c_x, zero_string);
                        s_y = num2str(c_y, zero_string);

                        chunk_string = sprintf('x%sy%s', s_x, s_y);

                        % Names of the data files              
                        TS_data_file_name = sprintf('%s_%s_%s%s.mat', Data_type, Component_type, chunk_string, file_suffix);
                        TS_UB_data_file_name = sprintf('%s_%s_UB_%s%s.mat', Data_type, Component_type(1), chunk_string, file_suffix);
                        TS_LB_data_file_name = sprintf('%s_%s_LB_%s%s.mat', Data_type, Component_type(1), chunk_string, file_suffix);

                        % Data of this chunk
                        TS_data_name = sprintf('%s_cell', Component_type);          % The identifier of the parameter within the mat file
                        TS_chunk_data_file = load(TS_data_file_name, TS_data_name);

                        TS_UB_data_name = sprintf('%s_UB_matrix', Component_type(1));
                        TS_UB_chunk_data_file = load(TS_UB_data_file_name, TS_UB_data_name);
                        TS_LB_data_name = sprintf('%s_LB_matrix', Component_type(1));
                        TS_LB_chunk_data_file = load(TS_LB_data_file_name, TS_LB_data_name);

                        % Append the data
                        if strcmp(Component_type, 'Season')
                            Season_cell(row_start : row_end, column_start : column_end) = TS_chunk_data_file.Season_cell;
                            S_UB_matrix(row_start : row_end, column_start : column_end) = TS_UB_chunk_data_file.S_UB_matrix;
                            S_LB_matrix(row_start : row_end, column_start : column_end) = TS_LB_chunk_data_file.S_LB_matrix;
                        elseif strcmp(Component_type, 'Trend')
                            Trend_cell(row_start : row_end, column_start : column_end) = TS_chunk_data_file.Trend_cell;
                            T_UB_matrix(row_start : row_end, column_start : column_end) = TS_UB_chunk_data_file.T_UB_matrix;
                            T_LB_matrix(row_start : row_end, column_start : column_end) = TS_LB_chunk_data_file.T_LB_matrix;
                        end
                    end

                    % Progress update
                    fprintf('%g%% of the %s component files have been merged \n', c_x/number_chunks_x * 100, Component_type);
                end   

                % Create the merged component data files
                TS_file_name = sprintf('%s_%s%s.mat', Data_type, Component_type, file_suffix);
                UB_file_name = sprintf('%s_%s_UB%s.mat', Data_type, Component_type(1), file_suffix);
                LB_file_name = sprintf('%s_%s_LB%s.mat', Data_type, Component_type(1), file_suffix);

                save(TS_file_name, sprintf('%s_cell', Component_type), '-v7.3');
                save(UB_file_name, sprintf('%s_UB_matrix', Component_type(1)));
                save(LB_file_name, sprintf('%s_LB_matrix', Component_type(1)));

                % Plot
                if strcmp(Plot, 'On')
                    % The average component data is plotted
                    if strcmp(Component_type, 'Season')
                        mean_component_data = cellfun(@mean, Season_cell);
                    elseif strcmp(Component_type, 'Trend')
                        mean_component_data = cellfun(@mean, Trend_cell);
                    end

                    figure(1)                                
                    imshow(mean_component_data)

                    % Set the size and white background color
                    set(gcf, 'Units', 'Normalized', 'Position', [0 0 1 1])
                    set(gcf, 'color', [1, 1, 1])

                    cb = colorbar;
                    caxis manual
                    shading interp
                    caxis([min(min(mean_component_data)), max(max(mean_component_data))])
                    colortitlehandle = get(cb, 'Title');
                    titlestring = sprintf('Mean %s %s', Data_type, Component_type);
                    set(colortitlehandle, 'String', titlestring);
                    cb.FontSize = 25;

                    figure_name = sprintf('%s_%s_Component%s.png', Data_type, Component_type, file_suffix);

                    try
                        export_fig(figure_name)
                    catch
                        frame = getframe(1);
                        im = frame2im(frame);
                        [imind, cm] = rgb2ind(im, 256);
                        imwrite(imind, cm, figure_name);
                    end

                    close(1)   
                end

                % Free memory
                if strcmp(Component_type, 'Season')
                    clear Season_cell & S_UB_matrix & S_LB_matrix
                elseif strcmp(Component_type, 'Trend')
                    clear Trend_cell & T_UB_matrix & T_LB_matrix
                end
            end
            
        % Otherwise, if the data source is Landsat, it is assumed that they are Fourier coefficients
        elseif strcmp(Data_source, 'Landsat')
            % The Fourier series coefficients 
            Landsat_Fourier_Coeff_cell = cell(rows_data, columns_data);
            
            for c_x = 1:number_chunks_x
                % Bounds of this chunk
                column_start = columns_chunks(c_x) + 1;
                column_end = columns_chunks(c_x + 1);

                for c_y = 1:number_chunks_y
                    row_start = rows_chunks(c_y) + 1;
                    row_end = rows_chunks(c_y + 1);

                    % String to identify this chunk, and the data produced with it
                    s_x = num2str(c_x, zero_string);
                    s_y = num2str(c_y, zero_string);

                    chunk_string = sprintf('x%sy%s', s_x, s_y);

                    % Retrieve this chunk's coefficients
                    Landsat_Fourier_Coeff_file_name = sprintf('%s_Fourier_Coefficients_%s.mat', Data_type, chunk_string);
                    Landsat_Fourier_file = load(Landsat_Fourier_Coeff_file_name);
                    
                    Landsat_Fourier_Coeff_chunk_cell = Landsat_Fourier_file.Landsat_Fourier_Coeff_cell;
                    
                    % Append it to the full cell
                    Landsat_Fourier_Coeff_cell(row_start : row_end, column_start : column_end) = Landsat_Fourier_Coeff_chunk_cell;
                end
            end
            
            % Load the dates
            date_initial = Landsat_Fourier_file.date_initial;
            date_last = Landsat_Fourier_file.date_last;
            
            % Load the time stamps
            initial_time = Landsat_Fourier_file.initial_time;
            last_time = Landsat_Fourier_file.last_time;
            
            % Save the Fourier coefficients of the entire domain
            Landsat_Fourier_Coeff_file_name = sprintf('%s_Fourier_Coefficients.mat', Data_type);
            save(Landsat_Fourier_Coeff_file_name, 'Landsat_Fourier_Coeff_cell', 'date_initial', 'date_last', 'initial_time', 'last_time', '-v7.3');
        end
    end
    
    %%% Remove the subset files %%%
        if strcmp(chunk_deletion, 'On')
            subset_files = sprintf('%s_*x*y*', Data_type);
            delete(subset_files);
        end
        
    % Progress function
    function ProgressUpdate(~)
        tick = tick + 1;
        
        % Ensures that at most every percent is printed
        progress_last = (tick - 1) / N * 100;
        progress = tick / N * 100;
        
        if floor(progress) - floor(progress_last) >= 1
            fprintf('   Progress in merging the pre-processed %s files: %g%% \n', Data_type, round(progress));
        end
    end    
end