%% Load data
clc
clearvars
close all

List_GUI_output_files = cell(0,1);

List_GUI_output_files{end+1,1} = 'TECAN 2017_12_22.mat';
List_GUI_output_files{end+1,1} = 'TECAN 2017_12_25.mat';
List_GUI_output_files{end+1,1} = 'TECAN 2018_01_26.mat';
List_GUI_output_files{end+1,1} = 'TECAN 2018_01_28.mat';
% 2018_01_28 run has contaminated last column, and this column is removed
% from further analysis in the next section


%% Loading data into one variable
% Load into cell
Outputs_cell                        = cell(numel(List_GUI_output_files), 1);
for file_no = 1 : numel(List_GUI_output_files)
    Output_temp                     = load( List_GUI_output_files{file_no, 1} );
    if ~isempty(regexp(List_GUI_output_files{file_no, 1}, '2018_01_28', 'once'))
        % This measurement had a contaminated last column, so omit from
        % further analysis
        Outputs_cell{file_no, 1}    = Output_temp.Wells(1:88);
    elseif ~isempty(regexp(List_GUI_output_files{file_no, 1}, '2017_12_15', 'once'))
        % This measurement had a contaminated last column, so omit from
        % further analysis
        Outputs_cell{file_no, 1}    = Output_temp.Wells(setdiff(1:96, 8:8:96));
    else
        Outputs_cell{file_no, 1}    = Output_temp.Wells(1:end);
    end
    if file_no == 1
        All_fields                  = fieldnames(Outputs_cell{file_no, 1});
    else
        All_fields                  = intersect(All_fields, fieldnames(Outputs_cell{file_no, 1}));
    end
end
% Trim fields that are not always present for some reason
for file_no = 1 : numel(List_GUI_output_files)
    Current_fields              = fieldnames(Outputs_cell{file_no, 1});
    Removed_fields              = setdiff(Current_fields, All_fields);
    Outputs_cell{file_no, 1}    = rmfield(Outputs_cell{file_no, 1}, Removed_fields);
end
% Vectorize
Outputs_vec = [Outputs_cell{:}]';
clearvars -except Outputs_vec

%% Quick statistics on how much fits changed from default:
Default_vec = [Outputs_vec.Start_Time_Fit] > 158 & [Outputs_vec.Start_Time_Fit] < 165 & ...
                [Outputs_vec.End_Time_Fit] > 2720 & [Outputs_vec.End_Time_Fit] < 2730 & ...
                [Outputs_vec.Size_Fit_Window] == 51 & ...
                [Outputs_vec.Minimal_Signal_to_Noise] == 2 & ...
                [Outputs_vec.WLS] == 1 & ...
                [Outputs_vec.Min_R_squared] == 0.9;
fprintf('Default settings for %.0f %% of the fits.\n', 100*mean(Default_vec))

%% Get convenient data format
% Keep only the critical fields
fields_to_keep  = {'t_doubl', 't_doubl_err', 'Genotype', 'Medium', 'base', 'end_OD', 'Size_Fit_Window'};
Outputs_vec2    = struct2cell( rmfield(Outputs_vec, setdiff(fieldnames(Outputs_vec), fields_to_keep)) )';
% Convert doubling times back to growth rates
for i = 1 : size(Outputs_vec2, 1)
    Outputs_vec2{i, 1} = 1/Outputs_vec2{i, 1};
    Outputs_vec2{i, 2} = -diff(1./Outputs_vec2{i, 2})/(2*tinv(0.975,Outputs_vec2{i, 3}-2));
end
gen_col = 6;
med_col = 7;
% Delete empty wells
Outputs_vec2            = Outputs_vec2( ~strcmp(Outputs_vec2(:,gen_col), 'None'), :);
% Trim medium text
for i = 1 : size(Outputs_vec2, 1)
    Outputs_vec2(i, med_col) = strcat(regexp(Outputs_vec2{i, med_col}, '(?<=Raf \+ )[\w.]{1,5}', 'match'), '% Gal');
end
% Replace inconsistent naming of WT
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'WT'), gen_col} ]= deal('WT CDC42');
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'WT + CDC42'), gen_col} ]= deal('WT CDC42');
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'WT + Gal1-sfGFP-CDC42'), gen_col} ]= deal('WT Gal1-sfGFP-CDC42');
% Replace inconsistent naming of dbem1
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'dbem1 + CDC42'), gen_col} ]= deal('dbem1 CDC42');
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'dbem1 + Gal1-CDC42'), gen_col} ]= deal('dbem1 Gal1-CDC42');
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'dbem1 + Gal1-sfGFP-CDC42'), gen_col} ]= deal('dbem1 Gal1-sfGFP-CDC42');
% Replace inconsistent naming of dbem1dbem3
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'dbem1 + dbem3 + CDC42'), gen_col} ]= deal('dbem1 dbem3 CDC42');
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'dbem1 + dbem3 + Gal1-CDC42'), gen_col} ]= deal('dbem1 dbem3 Gal1-CDC42');
[Outputs_vec2{ strcmp(Outputs_vec2(:,gen_col), 'dbem1 + dbem3 + Gal1-sfGFP-CDC42'), gen_col} ]= deal('dbem1 dbem3 Gal1-sfGFP-CDC42');

%% Further rejection of growths (if OD rise < 0.01)
% Sometimes growth stochadtically possible, but colony ultimately dies out before a significant OD rise.
% Analysis of the end ODs of wells labelled as growth/non-growth suggests
% this threshold is around 0.01-0.02 OD rise (clear cut-off there)
OD_rise_threshold       = 0.01;
OD_rise_growth          = zeros(size(Outputs_vec2,1),2);
for i = 1 : size(Outputs_vec2,1)
    OD_rise_growth(i, 1:2) = [Outputs_vec2{i,5}-Outputs_vec2{i,4} ~isnan(Outputs_vec2{i,1})];
end
[Outputs_vec2{ OD_rise_growth(:,1) < OD_rise_threshold , 1 }] = deal(NaN);
[Outputs_vec2{ OD_rise_growth(:,1) < OD_rise_threshold , 2 }] = deal(NaN);

%% Combine replicates
Genotypes                       = table2cell(unique(cell2table(Outputs_vec2(:,gen_col))));
Media                           = table2cell(unique(cell2table(Outputs_vec2(:,med_col))));

Summary_cell                    = cell(numel(Genotypes)+1, numel(Media)+1 );
Summary_cell(2:end,1)           = Genotypes;
Summary_cell(1,2:end)           = Media';

for genotype_no = 1 : numel(Genotypes)
    ind_this_genotype           = strcmp(Outputs_vec2(:, gen_col), Genotypes(genotype_no, 1));
    
    for media_no = 1 : numel(Media)
        ind_this_medium         = strcmp(Outputs_vec2(:, med_col), Media(media_no, 1));
        if isempty(Outputs_vec2(ind_this_genotype & ind_this_medium, 1:2))
            Summary_cell{genotype_no + 1, media_no + 1} = NaN(1,6);
        else
            rates                   = cell2mat(Outputs_vec2(ind_this_genotype & ind_this_medium, 1:2));
            num_replicates          = size(rates, 1);
            num_growths             = nnz(~isnan(rates(:,1)));
            weights                 = 1./rates(:,2).^2;
            rates_comb              = nansum(weights.*rates(:,1)) / nansum(weights);
            rates_err_comb          = sqrt(1/ nansum(weights));
            if rates_err_comb == Inf
                rates_comb          = 0;
            end
            chisq_red               = 1/(num_growths-1)*nansum((rates(:,1)-rates_comb).^2./rates(:,2).^2);
            Summary_cell{genotype_no + 1, media_no + 1} = [num_growths num_replicates rates_comb rates_err_comb chisq_red];
        end
    end
end
chisq_red_mat           = cellfun(@(z) z(5), Summary_cell(2:end, 2:end));
num_obs_WT_mat          = cellfun(@(z) z(1), Summary_cell(2, 2:end));
over_dispersion_corr    = sqrt((num_obs_WT_mat-1)/(num_obs_WT_mat-3)).*sqrt(mean(chisq_red_mat(1,:)));

for genotype_no = 1 : numel(Genotypes)
    for media_no = 1 : numel(Media)
        Summary_cell{genotype_no + 1, media_no + 1}(4) = over_dispersion_corr*Summary_cell{genotype_no + 1, media_no + 1}(4);
    end
end
for i = 1 : size(Outputs_vec2, 1)
    Outputs_vec2{i, 2} = over_dispersion_corr*Outputs_vec2{i, 2};
end

clearvars -except Outputs_vec2 Summary_cell Genotypes Media gen_col med_col

%% Bayesian distributions for dispersion corrected growth rates
rng default  % For reproducibility
r_min           = 0;
r_max           = 1/70; % Not faster than 70 min. doubling time seems likely prior to measurement
delta           = Summary_cell{2,2}(4)/2;
nsamples        = 5e4;
Samples         = cell(numel(Genotypes)+1, numel(Media)+1 );
Summary_cell2   = cell(numel(Genotypes)+1, numel(Media)+1 );
Summary_cell3   = cell(numel(Genotypes)+1, numel(Media)+1 );
n_MC            = 1e4;

for genotype_no = 1 : numel(Genotypes)
    ind_this_genotype           = strcmp(Outputs_vec2(:, gen_col), Genotypes(genotype_no, 1));  
    for media_no = 1 : numel(Media)
        fprintf('Genotype %0.f of %0.f, Media %0.f of %0.f \n', genotype_no , numel(Genotypes), media_no , numel(Media))
        ind_this_medium         = strcmp(Outputs_vec2(:, med_col), Media(media_no, 1));
        Samples{genotype_no + 1, media_no + 1} = NaN(1,5);
        Summary_cell2{genotype_no + 1, media_no + 1} = NaN(1,5);
        if isempty(Outputs_vec2(ind_this_genotype & ind_this_medium, 1:2))
        else
            rates           = cell2mat(Outputs_vec2(ind_this_genotype & ind_this_medium, 1:2));
            dof             = cell2mat(Outputs_vec2(ind_this_genotype & ind_this_medium, 3)) - 2;
            dof             = dof(~isnan(rates(:,1)), :);
            rates           = rates(~isnan(rates(:,1)), :);
            if isempty(rates), continue, end
            likelihood      = @(mu) prod(tpdf((mu-rates(:,1))./rates(:,2), dof));
            prior           = @(mu) unifpdf(mu, r_min, r_max);
            pdf             = @(mu) likelihood(mu)*prior(mu);
            proprnd         = @(x) x + rand*2*delta - delta;
            proppdf         = @(x,y) normpdf(x, Summary_cell{genotype_no + 1, media_no + 1}(3), ...
                                    10*Summary_cell{genotype_no + 1, media_no + 1}(4));
            x               = mhsample(Summary_cell{genotype_no + 1, media_no + 1}(3), nsamples, ...
                                'pdf',pdf, 'proppdf', proppdf, 'proprnd', proprnd, 'burnin', 1e3);
            Samples{genotype_no + 1, media_no + 1} = x;
            Summary_cell2{genotype_no + 1, media_no + 1} = [quantile(x, 0.025) mean(x) quantile(x, 0.975) ...
                                                            quantile(x, 0.16) mean(x) quantile(x, 0.84)];
            
            if genotype_no>1
                relative_x = x(randi(nsamples, n_MC, 1)) ./ Samples{2, media_no + 1}(randi(nsamples, n_MC, 1));
                Summary_cell3{genotype_no + 1, media_no + 1} = [quantile(relative_x, 0.025) mean(relative_x) quantile(relative_x, 0.975) ...
                                                                quantile(relative_x, 0.16) mean(relative_x) quantile(relative_x, 0.84) ];
            else
                Summary_cell3{genotype_no + 1, media_no + 1} = ...
                    Summary_cell2{genotype_no + 1, media_no + 1} / Summary_cell2{genotype_no + 1, media_no + 1}(2);
            end        
        end
    end
end

%% Plot
fz                              = 24;
fn                              = 'Candara';
lw                              = 0.5; % Line width
ms                              = 5; % Marker size
alphaVal                        = 0.2;
t_val                           = uint8(alphaVal * 255);
colors                          = [lines(7); colorcube(96-7)];
colors                          = colors([4 5 1 6], :);
%colors                          ={'red';'green';'blue';'purple'};

le_names                        = Genotypes;
for i = 1 : numel(Genotypes)
    if strcmp(Genotypes{i}, 'WT CDC42')
        le_names{i,1}            = '{\it{CDC42}} {\it{BEM1}} {\it{BEM3}}';
    elseif strcmp(Genotypes{i}, 'WT Gal1-sfGFP-CDC42')
        le_names{i,1}            = '{\it{pGAL1-CDC42-sfGFP^{SW}}} {\it{BEM1}} {\it{BEM3}}';
    elseif strcmp(Genotypes{i}, 'dbem1 Gal1-sfGFP-CDC42')
        le_names{i,1}            = '{\it{pGAL1-CDC42-sfGFP^{SW}}} \Delta{\it{bem1}} {\it{BEM3}}';
    elseif strcmp(Genotypes{i}, 'dbem1 dbem3 Gal1-sfGFP-CDC42')
        le_names{i,1}            = '{\it{pGAL1-CDC42}} \Delta{\it{bem1}} \Delta{\it{bem3}}';
    end
end


set(0,'defaultAxesFontName', fn)
set(0,'defaultTextFontName', fn)
set(0,'defaultAxesFontSize', fz)
set(0,'defaultTextFontSize', fz)

[fig_rel_fit, ax_fig_rel_fit, ax_fig_rel_fit2] = New_figure_with_axes('fig_rel_fit', true);
update_axes_style({ax_fig_rel_fit, ax_fig_rel_fit2}, lw, fz, fn, 'add', {'% galactose', ''}, {'Relative fitness compared to WT'});

x_Gal_cell  = cellfun(@(z) regexp(z, '[\d.]{1,5}', 'match'), Summary_cell(1, 2:end))';
x_Gal_num   = cell2mat(cellfun(@(z) str2double(char(regexp(z, '[\d.]{1,5}', 'match'))), Summary_cell(1, 2:end), 'UniformOutput', false))';
x_plot      = (1:numel(x_Gal_num))';
errcellfun  = @(x, varargin) NaN;
rel_fit     = cellfun(@(z) z(2), Summary_cell3(2:end, 2:end), 'ErrorHandler', errcellfun)';
rel_fit_lb  = rel_fit - cellfun(@(z) z(4), Summary_cell3(2:end, 2:end), 'ErrorHandler', errcellfun)';
rel_fit_ub  = cellfun(@(z) z(6), Summary_cell3(2:end, 2:end), 'ErrorHandler', errcellfun)' - rel_fit;

Markers             = {'o', 'v', 's', 'd'};

% Error bar lines
rel_fit_lines = cell(size(rel_fit, 2), 1);
for i = 1 : size(rel_fit, 2)
    x_shift = 0.6/(size(rel_fit, 2)-1);
    rel_fit_lines {i,1} = errorbar(x_plot-0.3+(i-1)*x_shift, rel_fit(:,i), rel_fit_lb(:,i), rel_fit_ub(:,i), 'Parent', ax_fig_rel_fit, ...
        'DisplayName', le_names{i}, 'LineWidth', lw, 'LineStyle', '--', 'Color', colors(i,:), 'Marker', Markers{i}, 'MarkerSize', ms);
end
% Patches to split mediums
medium_patches = cell(numel(x_plot), 1);
for i = 1 : 2 : numel(x_plot)
    medium_patches{i,1} = patch([x_plot(i)-0.5 x_plot(i)+0.5 x_plot(i)+0.5 x_plot(i)-0.5], ...
                                    [0 0 2 2], [0 0 0], 'Parent', ax_fig_rel_fit, 'EdgeColor', 'none');
    medium_patches{i,1}.Annotation.LegendInformation.IconDisplayStyle = 'off';
    medium_patches{i,1}.FaceAlpha = alphaVal/4;
end
% WT reference line
WT_line = line([x_plot(1)-0.5 x_plot(end)+0.5], [1 1], 'Parent', ax_fig_rel_fit, ...
        'Color', rel_fit_lines{1,1}.Color, 'LineWidth', lw);
WT_line.Annotation.LegendInformation.IconDisplayStyle = 'off';

ax_fig_rel_fit.YLim         = [0.5 max(x_plot)+0.5];
ax_fig_rel_fit.YLim         = [0 2];
ax_fig_rel_fit.XLim         = [min(x_plot)-0.5 max(x_plot+0.5)];
ax_fig_rel_fit.XTick        = x_plot;
ax_fig_rel_fit.XTickLabel   = x_Gal_cell;
ax_fig_rel_fit.TickLength   = [0 0];
axes(ax_fig_rel_fit)
legend('show')

%now secondary axes for ticks
axes(ax_fig_rel_fit2)
ax_fig_rel_fit2.XTick = [];
ax_fig_rel_fit2.Position = ax_fig_rel_fit.Position;
ax_fig_rel_fit2.Color = 'none';
linkaxes([ax_fig_rel_fit, ax_fig_rel_fit2])