%%
% De Winter, J. C. F., Dodou, D., Moorlag, F., Broekens, J. (2024). Social robots in education: A meta-analysis of learning outcomes. Manuscript submitted for publication.
% https://www.researchgate.net/publication/383908660_Social_Robots_A_Meta-Analysis_of_Learning_Outcomes
% https://doi.org/10.13140/RG.2.2.10148.49289

% 1 Faculty of Mechanical Engineering, Delft University of Technology, The Netherlands
% 2 Leiden Institute of Advanced Computer Science, Leiden University, The Netherlands
% Corresponding author: j.c.f.dewinter@tudelft.nl

clear all;close all;clc %#ok<CLALL>
[D,T] = xlsread('Data.xlsx','A2:AF560'); %#ok<XLSRD> % Read the Excel file with study data (D contains numeric data, T contains text data)

Agent = 1*strcmp(T(:,5),'Audio')+ ... % Convert data about 'Agent' to a number (Robot is the experimental condition; the other Agents are control conditions)
    2*strcmp(T(:,5),'Human')+ ...
    3*strcmp(T(:,5),'Nothing')+ ...
    4*strcmp(T(:,5),'Paper')+ ...
    5*strcmp(T(:,5),'Sham')+ ...
    6*strcmp(T(:,5),'Virtual interface')+ ...
    7*strcmp(T(:,5),'Robot');
[CD_prepost,CD_prepost_n,CD_post,CD_post_low,CD_post_high,CD_post_n]=deal(NaN(size(D,1),1));

for i=1:size(D,1) % loop over all rows of the Excel file
    [CD_prepost(i),CD_prepost_n(i)] = cohend(D(i,21),D(i,22),D(i,23),  D(i,19),D(i,20),D(i,23),    D(i,24),1); % calculate Cohen's d, pre-post (this is a within-subject comparison, by definition)
    % function [d,n] = cohend(m1,s1,n1,m2,s2,n2,sr,b)
    % sr indicates whether the sign should be reversed
    % b indicates whether the study is of a within-subject design
end

% Create strings which identify rows that correspond to each other
[ParticipantString, ExperimentalCondition] = deal(cell(1, size(D,1)));

for i=1:size(D,1) % loop over all rows of the Excel file
    ParticipantString{i} =     ['Study number: ' num2str(D(i,2)) ', Agent: ' T{i,5}      ', Condition: ' T{i,7}  ', Age: ' num2str(D(i,17))  ', Gender: ' num2str(D(i,18))];
    ExperimentalCondition{i} = ['Study number: ' num2str(D(i,2)) ', Condition: ' T{i,7}  ', Time interval: ' num2str(D(i,14)) ', Similarity: ' num2str(D(i,15))];  % Condition, Time interval
end
[~, ~, ParticipantGroupNumber] = unique(ParticipantString','stable'); % define a unique number that determines the participant group (based on study number, agent, condition, age, and gender)

Study_N = NaN(max(D(:,2)),1); % pre-allocate study sample size

for s = 1:max(D(:,2)) % loop over studies
    idx = find(D(:,2) == s); % indices of rows of study s

    [~,idx_pgroup] = unique(ParticipantGroupNumber(idx)); % index of the participant group
    if D(idx(1),25)==1 % if the study is of a within-subject design
        Study_N(s) = mean(D(idx(idx_pgroup),23));
        ParticipantGroupNumber(idx) = ParticipantGroupNumber(idx(1)); % in a within-subject design, all participants belong to the same group by definition
    else % the study is of a between-subjects design
        Study_N(s) = sum(D(idx(idx_pgroup),23));
    end

    measures = unique(T(idx,14)); % extract unique outcome measures of the study
    for m = 1:length(measures) % loop over the outcome measures
        idxm = find(D(:,2)==s & strcmp(T(:,14),measures(m))); % indices of measure m of study s

        for i = 1:length(idxm) % loop over all rows of measure m of study s
            robotconditions = find(Agent(idxm)==7); % indices of idxm that are 'robot'
            if ismember(Agent(idxm(i)),1:6) % if not a robot, so a control condition

                % Here, we assess whether the comparison was done under identical conditions in terms of time interval and condition (e.g., iconic gestures, no iconic gestures)
                samecondition = find(double(strcmp(ExperimentalCondition(idxm),ExperimentalCondition(idxm(i))))); % same experimental condition
                samecondition(samecondition==i)=[]; % exclude the current condition (i.e., do not compare with oneself
                robotcontrolcondition = intersect(samecondition,robotconditions); % robot condition that corresponds to the current control condition

                data_control = D(idxm(i),21:25); % post-test data of the control condition

                m2 = data_control(1); % mean (control condition)
                s2 = data_control(2); % standard deviation (control condition)
                n2 = data_control(3); % sample size (control condition)
                pg2 = ParticipantGroupNumber(idxm(i)); % participant group

                if length(robotcontrolcondition)~=1 % if there are multiple robot conditions to compare with (in this case there is no comparison to be made between the control condition and a single robot condition)
                    if isempty(robotcontrolcondition) % if there is no match, then use all robot conditions
                        data_robot = D(idxm(robotconditions),21:25);
                    else % otherwise use multiple matched robot conditions
                        data_robot = D(idxm(robotcontrolcondition),21:25);
                    end

                    [d, n] = deal(NaN(size(data_robot,1),1));
                    [pg, nn] = deal(NaN(size(data_robot,1),2));

                    for es = 1:size(data_robot,1) % loop over all robot conditions of measure m of study s
                        m1 = data_robot(es,1); % mean robot (robot condition)
                        s1 = data_robot(es,2); % standard deviation (robot condition)
                        n1 = data_robot(es,3); % sample size (robot condition)
                        sr1 = data_robot(es,4); % sign reversal of effect (lower mean is better)
                        sp1 = data_robot(es,5); % special case (within-subject experiment, or cohen d directly)
                        pg1 = ParticipantGroupNumber(idxm(robotconditions(es)));
                        pg(es,:) = [pg1 pg2];
                        nn(es,:) = [n1 n2];
                        [d(es),n(es)] = cohend(m1,s1,n1,m2,s2,n2,sr1,sp1);
                    end
                    d = mean(n.'*d,2)/sum(n); % calculate d, weighted by sample size

                    pg = pg(:); % participant group
                    nn = nn(:); % sample sizes
                    [~,idxb] = unique(pg); % unique participant groups
                    n = sum(sum(nn(idxb))); % total sample size of effect (counting the unique participant groups)
                else  % if there is exactly 1 robot control condition to compare with
                    data_robot = D(idxm(robotcontrolcondition),21:25);
                    m1 = data_robot(1); % mean robot (post-test)
                    s1 = data_robot(2);
                    n1 = data_robot(3);
                    sr1 = data_robot(4); % sign reversal of effect (lower mean is better)
                    sp1 = data_robot(5); % special case (within-subject experiment, or cohen d directly)
                    [d,n] = cohend(m1,s1,n1,m2,s2,n2,sr1,sp1);
                end

                if ~isnan(d)
                    CD_post(idxm(i)) = d;
                    CD_post_n(idxm(i)) = n;
                end
            end
        end %  % end of loop over all rows of measure m
    end % end of loop over measures
end % end of loop over studies

% pre-allocate matrices for study data
DM = NaN(max(D(:,2)),size(D,2));
HP = NaN(max(D(:,2)),1);
[Country,Citation,PDF] = deal(cell(max(D(:,2)),1));

for s = 1:max(D(:,2)) % loop over studies (column D in the Excel file)i
    idx = find(D(:,2) == s); % indices for study s
    DM(s,:)=sum(D(idx,:).*D(idx,23),1)./sum(D(idx,23)); % sample-size weighted mean per study
    Country(s) = T(idx(1),2); % Country of the study
    Citation(s) = T(idx(1),1); % Full APA-formatted citation of the study
    PDF(s) = T(idx(1),30); % PDF file names of the study
    HP(s) = sign(sum(D(idx,11)==1 & strcmp(T(idx,5),'Robot'))); % look up of there is human presence in the robot condition
end

%% Calculate effect sizes per study and control group type
[CD_prepost_matrix, CD_prepost_n_matrix,CD_post_matrix,CD_post_matrix_low,CD_post_matrix_high,CD_post_n_matrix] = deal(NaN(max(D(:,2)),max(Agent)));
for s = 1:max(D(:,2)) % loop over studies
    for a = 1 : max(Agent)

        % Pre-post effects
        idx = find(D(:,2) == s & Agent == a & ~isnan(CD_prepost)); % indices for study s & agent a
        if ~isempty(idx)
            n(s,a)=length(idx);
            cd_prepost = CD_prepost(idx); % pre-post Cohen's d
            cd_prepost_n = CD_prepost_n(idx); % corresponding sample size
            cd_prepost_pg = ParticipantGroupNumber(idx); % participant group
            [~,idxb]=unique(cd_prepost_pg);
            CD_prepost_matrix(s,a) = sum(cd_prepost.*cd_prepost_n,'omitnan')./sum(cd_prepost_n,'omitnan'); % sample-size weighted mean Cohen's d
            CD_prepost_n_matrix(s,a) = sum(cd_prepost_n(idxb)); % sample size
        end

        % Post effects
        idx2 = find(D(:,2) == s & Agent == a & ~isnan(CD_post)); % indices for study s & agent a
        if ~isempty(idx2)
            cd_post = CD_post(idx2); % post-test Cohen's d
            cd_post_n = CD_post_n(idx2); % corresponding sample size
            cd_post_pg = ParticipantGroupNumber(idx2); % participant group
            [~,idx2b]=unique(cd_post_pg);
            CD_post_matrix(s,a) = sum(cd_post.*cd_post_n,'omitnan')./sum(cd_post_n,'omitnan'); % sample-size weighted mean Cohen's d
            CD_post_n_matrix(s,a) = sum(cd_post_n(idx2b)); % sample size

            if DM(s,25) ==1 % within-subject experiment
                nh = CD_post_n_matrix(s,a); % If the experiment is of a within-subject design, then treat it as a between-subject design for calculating confidence intervals. For example, if the experiment contains 15 participants, then n1 = n2 = 15
            else
                nh = CD_post_n_matrix(s,a)/2; % If experiment is of a between-subjects design, then split the sample size. For example, if the total sample size is 30, then assume n1 = n2 = 15
            end
            lambda = sqrt((nh*nh*2) / (2*nh + 2*nh));
            df = nh + nh - 2; % degrees of freedom
            ci = getNCTConfidenceBounds(CD_post_matrix(s,a)*lambda, df, [0.025 0.975]) / lambda; % calculate 95% confidence interval
            CD_post_matrix_low(s,a) = ci(1); % lower bound of 95% confidence interval
            CD_post_matrix_high(s,a) = ci(2); % upper bound of 95% confidence interval
        end
    end
end
%% Overview of number of studies and effects
clc
disp(['We retrieved a total of ' num2str(max(D(:,2))) ' studies, comprising ' num2str(sum(~isnan(CD_post))) ' effect sizes between robot group and control group and '  num2str(sum(~isnan(CD_prepost))) ' pre-post effects.'])

disp(['Effect sizes between the robot group and control group were available for ' num2str(sum(sum(~isnan(CD_post_matrix),2)>0)) ' studies (' num2str(sum(Study_N(sum(~isnan(CD_post_matrix),2)>0)))  ...
    ' participants), while pre-post effect sizes were available for ' num2str(sum(sum(~isnan(CD_prepost_matrix),2)>0))  ' studies (' num2str(sum(Study_N(sum(~isnan(CD_prepost_matrix),2)>0)))  ' participants).'])

disp(['The mean Cohen''s d of the ' num2str(sum(~isnan(CD_prepost))) ' pre-post effects was ' sprintf('%0.2f',mean(CD_prepost,'omitnan')) ' (SD = ' sprintf('%0.2f',std(CD_prepost,'omitnan')) ')'])

disp(['Out of ' num2str(sum(sum(~isnan(CD_post_matrix),2)>0)) ' studies with a robot and control group, a human not was present in the robot condition in ' num2str(sum(HP(sum(~isnan(CD_post_matrix),2)>0)==0)) ' studies (' num2str(round(100*sum(HP(sum(~isnan(CD_post_matrix),2)>0)==0)/sum(sum(~isnan(CD_post_matrix),2)>0))) '% of studies).'])

%%
colors = [0 0.7 0.7; % Audio
    1 0 0.5; % Human
    0 0 0; % Nothing
    [204 153 0]/255; % Paper
    0 0 1; % Sham
    0 0.8 0; % Virtual
    1 0 1];
shapes={'o','d','o','s','pentagram','hexagram','o'};
ControlText = {'Audio','Human','Nothing','Paper','Sham','Virtual','Robot'};
%% Figure 1. Scatter plot showing the pre-post effect sizes for the robot condition compared to the respective control group
figure('WindowState', 'maximized');
Fig1Stats = NaN(max(Agent)-1,7);
for a = [1 2 3 4 5 6] % loop over control conditions
    EScondition = squeeze(CD_prepost_matrix(:,a)); % pre-post effect, control condition
    Ncondition = squeeze(CD_prepost_n_matrix(:,a)); % sample size, control condition

    ESconditionR = squeeze(CD_prepost_matrix(:,end));  % pre-post effect, robot condition
    NconditionR = squeeze(CD_prepost_n_matrix(:,end)); % sample size, control condition

    idx=find(~isnan(EScondition) & ~isnan(ESconditionR)); % only if both are available, so these are studies with pre-post design & control group

    EScondition  = EScondition(idx); % these are the pre-post effect sizes for a given control condition
    Ncondition   = Ncondition(idx); % corresponding sample sizes
    ESconditionR = ESconditionR(idx); % these are the pre-post effect sizes for the robot condition
    NconditionR  = NconditionR(idx); % corresponding sample sizes

    if a==1 % plot the line of unity only once
        plot([-2 9],[-2 9],'--','LineWidth',2,'Color',[.5 .5 .5]);hold on
    end

    if ~isempty(idx)
        sc1=scatter(EScondition, ESconditionR,(10*Ncondition+10*NconditionR)/2,colors(a,:),shapes{a},'filled','markeredgecolor','k'); % plot markers for pre-post (reference group and robot pairs)
        sc1.MarkerFaceAlpha=0.5;
    end
    Fig1Stats(a,:) = [length(ESconditionR) sum(Ncondition) sum(NconditionR) sum(Ncondition)+sum(NconditionR) round(mean(EScondition,'omitnan'),2) round(mean(ESconditionR,'omitnan'),2) round(mean(ESconditionR,'omitnan')-mean(EScondition,'omitnan'),2)];

end
grid on
ax=gca;ax.GridLineWidth = 1;
axis equal
xlabel('Cohen''s {\itd} (pre-post) - Control condition')
ylabel('Cohen''s {\itd} (pre-post) - Robot condition')
set(gca,'xlim',[-1.5 8])
set(gca,'ylim',[-1.5 8])
[legendh,obj]=legend('Line of unity',...
    ['Robot vs. Audio (\it{k}\rm = ' num2str(Fig1Stats(1,1)) ', \it{n}\rm = ' num2str(Fig1Stats(1,4)) ')'],...
    ['Robot vs. Human (\it{k}\rm = '   num2str(Fig1Stats(2,1)) ', \it{n}\rm = ' num2str(Fig1Stats(2,4)) ')'],...
    ['Robot vs. Nothing (\it{k}\rm = ' num2str(Fig1Stats(3,1)) ', \it{n}\rm = ' num2str(Fig1Stats(3,4)) ')'],...
    ['Robot vs. Paper (\it{k}\rm = ' num2str(Fig1Stats(4,1)) ', \it{n}\rm = ' num2str(Fig1Stats(4,4)) ')'],...
    ['Robot vs. Sham (\it{k}\rm = '    num2str(Fig1Stats(5,1)) ', \it{n}\rm = ' num2str(Fig1Stats(5,4)) ')'],...
    ['Robot vs. Virtual (\it{k}\rm = '  num2str(Fig1Stats(6,1)) ', \it{n}\rm = ' num2str(Fig1Stats(6,4)) ')'],'location','northwest');
obj2 = findobj(obj, 'type', 'patch');
set(obj2, 'Markersize', 15);
obj3 = findobj(obj, 'type', 'text');
set(obj3,'Fontsize',24)
h=findobj('FontName','Helvetica'); set(h,'FontSize',24,'Fontname','Arial');
legendh.Position = [0.255 0.732 0.329 0.239];
set(gca,'TickDir','out','LooseInset',[0.01 0.01 0.01 0.01])
set(gcf,'WindowState', 'maximized'); %print(gcf,'Figure1.eps','-depsc');print(gcf,'Figure1.png','-dpng','-r600')
%% Figure 2. Scatter plots of pre-post effect sizes versus the total duration of the training sessions (logarithmic scale).
figure('WindowState', 'maximized');
A = [2 5 6 7]; % 4 selected agent types

for i=1:length(A)
    subplot(2,2,i)
    scatter1=scatter(log10(D(Agent==A(i),12)),CD_prepost(Agent==A(i)),5*CD_prepost_n(Agent==A(i)),colors(A(i),:),'o','filled','markeredgecolor','k');hold on
    scatter1.MarkerFaceAlpha = 0.5;
    grid on; box on
    if ismember(i,[3 4]) % only for the lower two subfigures
        xlabel('Duration of training (min) (log scale)')
    end
    ylabel('Cohen''s {\itd} (pre-post)')
    ax=gca;ax.GridLineWidth = 1;
    set(gca,'xtick',log10([1:9 10:10:90 100:100:900 1000:1000:9000]),'xticklabelrotation',45,'xlim',[log10(3) log10(2000)],'ylim',[-2.5 9],'TickDir','out')
    set(gca,'xticklabel',{'1','2','3','4','','','','','','10','20','30','40','','','','','','100','200','300','400','','','','','','1000','2000','3000','4000',''})
    plot([-2 3000],[0 0],'--','LineWidth',2,'Color',[.5 .5 .5]);hold on

    if i==1
        set(gca,'position',[0.045 0.61 0.43 0.38])
    elseif i==2
        set(gca,'position',[0.56 0.61 0.43 0.38])
    elseif i==3
        set(gca,'position',[0.045 0.13 0.43 0.38])
    elseif i==4
        set(gca,'position',[0.56 0.13 0.43 0.38])
    end
    [legendh,obj]=legend([ControlText{A(i)} ' (\it{m}\rm = '   num2str(sum(~isnan(CD_prepost(Agent==A(i) & D(:,12)>0)))) ')'],'location','northwest','FontSize',24);
    objs = findobj(obj, 'type', 'patch');
    set(objs, 'Markersize', 15); % set marker size
    legendh.Position(3) = 0.15; % adjust the width of the legend box
    legendh.Position(4) = 0.043; % adjust the height of the legend box
end

h=findobj('FontName','Helvetica');
set(h,'FontSize',24,'Fontname','Arial');
set(gca,'LooseInset',[0.01 0.01 0.01 0.01])

disp(newline)
disp(['The correlation coefficient between training duration and pre-post effects is: ' sprintf('%0.2f',corr(log(D(:,12)),CD_prepost,'rows','pairwise')) ', based on m = ' num2str(sum(~isnan(log(D(:,12))+CD_prepost))) ' effect sizes.'])

%% Figure 3. Cohen’s d effect sizes for the robot group relative to the control group, for six different types of control groups.
figure('WindowState', 'maximized');
plot([-100 100],[0 0],'--','LineWidth',2,'Color',[.5 .5 .5]); hold on % horizontal line

for i=1:max(Agent)-1 % loop over control group conditions
    [a,effect_idx]=sort(CD_post_matrix(:,i)); % sort the effects of control group type i in ascending order
    effect_idx(isnan(a))=[];
    xv = -(length(effect_idx)-1)/2:(length(effect_idx)-1)/2; % vector of x-coordinates for plotting
    xv=xv/40;

    for e=1:length(effect_idx)
        plot([i+xv(e) i+xv(e)],[CD_post_matrix_low(effect_idx(e),i) CD_post_matrix_high(effect_idx(e),i)],'color',colors(i,:),'LineWidth',2); % plot 95% confidence interval (vertical line)
        if HP(effect_idx(e)) % human teacher present
            scatter1=scatter(i+xv(e),CD_post_matrix(effect_idx(e),i),4*CD_post_n_matrix(effect_idx(e),i),'white','filled','MarkerEdgeColor',colors(i,:),'LineWidth',2); % plot marker
        else % human teacher not present
            scatter1=scatter(i+xv(e),CD_post_matrix(effect_idx(e),i),4*CD_post_n_matrix(effect_idx(e),i),colors(i,:),'filled','MarkerEdgeColor',colors(i,:),'LineWidth',2); % plot marker
        end
        scatter1.MarkerFaceAlpha = .7;
    end
    text(i,3.53,['\it{k}\rm = '   num2str(sum(~isnan(CD_post_matrix(:,i))))],'horizontalalignment','center','color',colors(i,:)) % number of studies
    text(i,3.38,['\it{n}\rm = '   num2str(sum(CD_post_n_matrix(:,i),'omitnan'))],'horizontalalignment','center','color',colors(i,:)) % total sample size
    text(i,3.13,['\it{M}\rm = '   sprintf('%0.2f',mean(CD_post_matrix(:,i),'omitnan'))],'horizontalalignment','center','color',colors(i,:)) % mean of Cohen's d
    text(i,2.98,['\it{SD}\rm = '   sprintf('%0.2f',std(CD_post_matrix(:,i),'omitnan'))],'horizontalalignment','center','color',colors(i,:)) % SD of Cohen's
end

xticks(1:6);
xticklabels({'Robot vs. Audio','Robot vs. Human','Robot vs. Nothing','Robot vs. Paper','Robot vs. Sham','Robot vs. Virtual'})
ylabel('Cohen''s {\itd} - Post-test')
set(gca,'TickDir','out','LooseInset',[0.01 0.01 0.01 0.01],'xlim',[.6 6.5],'ylim',[-1.8 2.8])
grid on;box on
ax=gca;ax.GridLineWidth = 1;
h=findobj('FontName','Helvetica'); set(h,'FontSize',24,'Fontname','Arial');
set(gca,'pos',[0.066 0.059 0.931 0.79])

disp(newline)
disp('According to an independent-samples t-test, Cohen’s d was larger for ‘Nothing’ control groups compared to ‘Virtual interface’ control groups:')
ttest2_output(CD_post_matrix(:,3),CD_post_matrix(:,6),2);

disp('The difference in Cohen’s d between Human control groups and Nothing control groups was not significant:')
ttest2_output(CD_post_matrix(:,3),CD_post_matrix(:,2),2);

disp('The Cohen’s d for robot vs. human comparisons was larger for studies where a human and robot were co-teaching (i.e., white markers in Figure 3) compared to robot-only (solid markers in Figure 3):')
ttest2_output(CD_post_matrix(HP==1,2),CD_post_matrix(HP==0,2),2);

%%
load('PDF_GPT_scores.mat') % matrix of PDF files x items, containing GPT-4o-based scores
load('filenames.mat') % all documents and file names corresponding to the PDF files
GPTscores = NaN(max(D(:,2)),size(PDF_GPT_scores,2)); % number of studies x number of pdf files per study x number of items

for s = 1:max(D(:,2)) % loop over all studies
    pdf_name = unique(T(D(:,2)==s,30)); % PDF name of study s
    idx = find(strcmp(filenames,char(pdf_name))); % find the pdf filename of study s among 'filenames'
    GPTscores(s,:) = squeeze(PDF_GPT_scores(idx,:));
end

GPTscores = GPTscores(:,1:6); % only keep the first 6 items
loadings = -pcao(corr(GPTscores),1);
Positivism_score = zscore(zscore(GPTscores)/loadings');

[c,p]=corr(Positivism_score,mean(CD_post_matrix,2,'omitnan'),'rows','pairwise');
disp(newline)
disp(['It was found the Positivism score was substantially and significantly correlated with effect size (r = ' sprintf('%0.2f',c) ', p  = ' sprintf('%0.3f',p) ', k = '  num2str(sum(~isnan(Positivism_score+mean(CD_post_matrix,2,'omitnan')))) ')'])

%% Figure 4. GPT-4o-based Positivism scores of the research paper for different types of control groups.

GPT_matrix = NaN(size(CD_post_matrix));
for i=1:size(GPT_matrix,1) % loop over all studies
    idx=find(~isnan(CD_post_matrix(i,:)));
    if length(idx)==1 %#ok<ISCL> % if there is only one comparison between robot and control condition, then the LLM score is extracted; otherwise not
        GPT_matrix(i,idx)= Positivism_score(i);
    end
end

figure('WindowState', 'maximized');
plot([-100 100],[0 0],'--','LineWidth',2,'Color',[.5 .5 .5])
for i=1:6
    [a,effect_idx]=sort(GPT_matrix(:,i)); % sort the Positivism scores of control group type i in ascending order
    effect_idx(isnan(a))=[];
    xv = -(length(effect_idx)-1)/2:(length(effect_idx)-1)/2; % vector of x-coordinates for plotting
    xv=xv/40;
    for e=1:length(effect_idx)
        scatter1=scatter(i+xv(e),GPT_matrix(effect_idx(e),i),4*CD_post_n_matrix(effect_idx(e),i),colors(i,:),'filled','MarkerEdgeColor','k');hold on
        scatter1.MarkerFaceAlpha = .7;
    end
    text(i,-0.8+3.55,['\it{k}\rm = '   num2str(sum(~isnan(GPT_matrix(:,i))))],'horizontalalignment','center','color',colors(i,:))
    text(i,-0.8+3.38,['\it{n}\rm = '   num2str(sum(CD_post_n_matrix(:,i),'omitnan'))],'horizontalalignment','center','color',colors(i,:))
    text(i,-0.8+3.15,['\it{M}\rm = '   sprintf('%0.2f',mean(GPT_matrix(:,i),'omitnan'))],'horizontalalignment','center','color',colors(i,:))
    text(i,-0.8+2.98,['\it{SD}\rm = '   sprintf('%0.2f',std(GPT_matrix(:,i),'omitnan'))],'horizontalalignment','center','color',colors(i,:))
end

xticks(1:6);
xticklabels({'Robot vs. Audio','Robot vs. Human','Robot vs. Nothing','Robot vs. Paper','Robot vs. Sham','Robot vs. Virtual'})
ylabel('Positivism score')
set(gca,'TickDir','out','LooseInset',[0.01 0.01 0.01 0.01],'xlim',[.6 6.5])
grid on;box on
ax=gca;ax.GridLineWidth = 1;
h=findobj('FontName','Helvetica'); set(h,'FontSize',24,'Fontname','Arial');
set(gca,'pos',[0.066 0.059 0.931 0.79])

disp(newline)
disp('According to an independent-samples t-test, the Positivism score was not significantly different between ‘Nothing’ control groups and ‘Virtual interface’ control groups:')
ttest2_output(GPT_matrix(:,3),GPT_matrix(:,6),2);

%% Figure 5. Comparison of studies from European countries and non-European countries regarding effect size between social robot vs. control group (left) and Positivism score (right).
effects_average = mean(CD_post_matrix,2,'omitnan');
eff_europe = effects_average(DM(:,8)==1 & ~isnan(effects_average));
eff_neurope = effects_average(DM(:,8)==0 & ~isnan(effects_average));

disp(newline)
disp('Effect sizes - Europe vs No Europe')
ttest2_output(eff_europe,eff_neurope,2);
disp(newline)
disp('Positivism score - Europe vs No Europe')
ttest2_output(Positivism_score(DM(:,8)==1),Positivism_score(DM(:,8)==0),2);

fh = figure('WindowState', 'maximized');

for b=1:2 % create two subfigures
    subplot(1,2,b)
    hold on
    if b==1 % left subfigure
        v1=eff_europe;
        v2=eff_neurope;
        v1n=Study_N(DM(:,8)==1 & ~isnan(effects_average));
        v2n=Study_N(DM(:,8)==0 & ~isnan(effects_average));
        title('Effect size - Post-test')
        ylabel('Cohen''s {\itd} - Post-test')
        set(gca,'pos',[0.06 0.055 0.3 0.905])
    elseif b==2 % right subfigure
        v1=Positivism_score(DM(:,8)==1);
        v2=Positivism_score(DM(:,8)==0);
        v1n=Study_N(DM(:,8)==1);
        v2n=Study_N(DM(:,8)==0);
        title('Positivism score')
        ylabel('Positivism score')
        set(gca,'pos',[0.45 0.055 0.3 0.905])
    end

    % create data matrix for plotting
    if length(v1)>=length(v2)
        datamatrix = [v1  [v2;NaN(length(v1)-length(v2),1)]]; % values
        datamatrix_n = [v1n  [v2n;NaN(length(v1n)-length(v2n),1)]]; % sample sizes
    else
        datamatrix = [ [v1;[NaN(length(v2)-length(v1),1)]]  v2]; % values
        datamatrix_n = [ [v1n;[NaN(length(v2n)-length(v1n),1)]]  v2n]; % values
    end

    for i=1:2 % loop over 2 columns
        if i==1 % left column (within Europe)
            scatter1=scatter(i*ones(size(datamatrix,1),1),datamatrix(:,i),10*datamatrix_n(:,i),'b','filled','MarkerEdgeColor','k');
        else % right column (outside Europe)
            scatter1=scatter(i*ones(size(datamatrix,1),1),datamatrix(:,i),10*datamatrix_n(:,i),'g','filled','MarkerEdgeColor','k');
        end
        scatter1.MarkerFaceAlpha = .2;
    end
    xticks(1:2);
    xticklabels({'Within Europe','Outside Europe'})
    set(gca,'TickDir','out','LooseInset',[0.01 0.01 0.01 0.01],'xlim',[.6 2.5])
    grid on;box on
    ax=gca;ax.GridLineWidth = 1;

end

h=findobj('FontName','Helvetica');
set(h,'FontSize',24,'Fontname','Arial');
%% Top 4 - highest Cohens'd
[a,b]=sort(effects_average,'descend');
b(isnan(a))=[]; % remove NaNs
b=b(1:4); % retain top 4
disp(newline)
disp(['Studies with the highest effects overall (total number of studies with effect = ' num2str(sum(~isnan(effects_average))) '):'])
disp(char(Citation(b)))
disp('Corresponding effect sizes')
disp(round(effects_average(b),2)')
effects_average_rank=tiedrank(effects_average); % rank of positivism score
disp(['Corresponding ranking scores of effects scores (1 = lowest, out of ' num2str(max(effects_average_rank)) ')'])
disp(effects_average_rank(b)')
disp('Corresponding positivism scores')
disp(round(Positivism_score(b)',2))
Positivism_score_rank=tiedrank(Positivism_score); % rank of positivism score
disp(['Corresponding ranking scores of positivism scores (1 = lowest, out of ' num2str(max(Positivism_score_rank)) ')'])
disp(Positivism_score_rank(b)')

%% Top 4 - lowest Cohens'd
[~,b]=sort(effects_average,'ascend');
b=b(1:4); % retain top 4
disp(newline)
disp(['Studies with the lowest effects overall (total number of studies with effect = ' num2str(sum(~isnan(effects_average))) '):'])
disp(char(Citation(b)))
disp('Corresponding effect sizes')
disp(round(effects_average(b),2)')
disp(['Corresponding ranking scores of effects scores (1 = highest, out of ' num2str(max(effects_average_rank)) ')'])
disp(effects_average_rank(b)')
disp('Corresponding positivism scores')
disp(round(Positivism_score(b)',2))
disp(['Corresponding ranking scores of positivism scores (1 = highest, out of ' num2str(max(Positivism_score_rank)) ')'])
disp(Positivism_score_rank(b)')

%% Top 4 lowest positivism scores
disp(newline)
[~,b]=sort(Positivism_score,'ascend');
b=b(1:4); % retain top 4
disp(newline)
disp(['Studies with the lowest Positivism scores overall (total number of studies with effect = ' num2str(sum(~isnan(Positivism_score))) '):'])
disp(char(Citation(b)))
disp('Corresponding effect sizes')
disp(round(effects_average(b),2)')
disp(['Corresponding ranking scores of effects scores (1 = lowest, out of ' num2str(max(effects_average_rank)) ')'])
disp(effects_average_rank(b)')
disp('Corresponding positivism scores')
disp(round(Positivism_score(b)',2))
disp(['Corresponding ranking scores of positivism scores (1 = lowest, out of ' num2str(max(Positivism_score_rank)) ')'])
disp(Positivism_score_rank(b)')
%% High positivism score compared to effect size
Diff_score = Positivism_score_rank./max(Positivism_score_rank)-effects_average_rank./max(effects_average_rank);
[~,b]=sort(Diff_score,'ascend');
b=b(1:4); % retain top 4
disp(newline)
disp(['Studies with the highest positivism score compared to effect size (total number of studies = ' num2str(sum(~isnan(Diff_score))) '):'])
disp(char(Citation(b)))
disp('Corresponding effect sizes')
disp(round(effects_average(b),2)')
disp(['Corresponding ranking scores of effects scores (1 = highest, out of ' num2str(max(effects_average_rank)) ')'])
disp(effects_average_rank(b)')
disp('Corresponding positivism scores')
disp(round(Positivism_score(b)',2))
disp(['Corresponding ranking scores of positivism scores (1 = highest, out of ' num2str(max(Positivism_score_rank)) ')'])
disp(Positivism_score_rank(b)')
%% Low Positivism score compared to effect size
Diff_score = Positivism_score_rank./max(Positivism_score_rank)-effects_average_rank./max(effects_average_rank);
[a,b]=sort(Diff_score,'descend');
b(isnan(a))=[]; % remove NaNs
b=b(1:4); % retain top 4
disp(newline)
disp(['Studies with the lowest positivism score compared to effect size (total number of studies = ' num2str(sum(~isnan(Diff_score))) '):'])
disp(char(Citation(b)))
disp('Corresponding effect sizes')
disp(round(effects_average(b),2)')
disp(['Corresponding ranking scores of effects scores (1 = highest, out of ' num2str(max(effects_average_rank)) ')'])
disp(effects_average_rank(b)')
disp('Corresponding positivism scores')
disp(round(Positivism_score(b)',2))
disp(['Corresponding ranking scores of positivism scores (1 = highest, out of ' num2str(max(Positivism_score_rank)) ')'])
disp(Positivism_score_rank(b)')

%% Highest robot vs virtual interface effects overall
disp(newline)
disp('Studies that showed that the robot was much more effective than a virtual interface (d > 0.75)')
disp(char(Citation(CD_post_matrix(:,6)>0.75)))
%% Figure 6. Scatter plot depicting the effect size between social robot vs. control group versus the mean participant age.
disp('Pearson product-moment correlation coefficient between effect size and age (r, p, k)')
[c,p]=corr(DM(:,17),effects_average,'rows','pairwise');
disp([round(c,2) round(p,3) sum(~isnan(DM(:,17)+effects_average))])
disp('Spearman rank-order correlation coefficient between effect size and age (rho, p, k)')
[c,p]=corr(DM(:,17),effects_average,'rows','pairwise','type','spearman');
disp([round(c,2) round(p,3) sum(~isnan(DM(:,17)+effects_average))])

figure;
sc1=scatter(DM(:,17), effects_average,10*Study_N,'k',shapes{1},'filled','markeredgecolor','k'); % plot markers for pre-post (reference group and robot pairs)
sc1.MarkerFaceAlpha=0.2;
grid on
ax=gca;ax.GridLineWidth = 1;
xlabel('Mean age of participant (years)')
ylabel('Cohen''s {\itd}')
h=findobj('FontName','Helvetica'); set(h,'FontSize',24,'Fontname','Arial');
set(gcf,'pos',[608 106 1060 893])
set(gca,'TickDir','out','LooseInset',[0.01 0.01 0.01 0.01])

%%
