Analysis Q2, Q3¶

Author: Andrei Stefan
Date: 09-11-2023
Required files: data/data_adjusted.csv
Output files: no output files

This file contains the code to reproduce the results of running the analysis for questions 2, 3, and the thematic analysis. The corresponding figures and tables are: Figure 4.2, Figure 4.3.

Define helper functions and global variables needed for the questions¶

In [1]:
# import all required packages
import csv
import matplotlib.pylab as plt
import numpy as np
import os
import pandas as pd
import scipy

from collections import Counter
from sklearn.metrics import cohen_kappa_score

Define global variables that hold the number of times each action was done in each reduced state and the mean reward overall.

In [2]:
state_action_times_reduced = {}
mean_reward_overall = 0

Define a helper function to help process the raw reward, which could either be a 0 or a list with four values which make up the reward.

In [3]:
def reward_function(reward):
    """
    Function that helps parse the reward.
    
    Args: reward - the reward as a string.

    Returns: the parsed reward.
    """
    
    # if the reward is a 0
    if reward == 0:
        # then return 0
        return 0
    
    # if there is a newline in the string
    if "\n" in reward:
        # then remove it
        reward = reward[:-1]
    
    # if the reward is a 0, but as string
    if reward == "0":
        # then return 0
        return 0
    
    # otherwise, the reward is not 0 and we need to process it
    # it might be enclosed in quotes and will always have four numbers enclosed in square brackets
    elif '\"' in reward or "\'" in reward or "[" in reward:
        # so, while there are either quotes or square brackets, remove the first and last character
        # e.g. it could start off as "[0, 1, 2, 3]"
        # removing the first and last character once leaves [0, 1, 2, 3]
        # removing them again leaves 0, 1, 2, 3
        while '\"' in reward or "\'" in reward or "[" in reward:
            reward = reward[1:-1]
        
        # split the reward
        split = reward.split(", ")
        
        # extract reward components
        split[0] = int(split[0])
        split[1] = int(split[1])
        split[2] = int(split[2])
        split[3] = int(split[3])
        
        # return the reward after applying the function
        return (split[0] + 0.8 * split[1] + 0.2 * split[2] + split[3]) / 3

Define a helper function for converting strings to booleans.

In [4]:
def string_to_bool(s):
    """
    Function that helps turn a "True" or "False" string into a boolean.
    
    Args: s - the string.

    Returns: a boolean which is True if the string is "True" or False if the string is "False".
    """
    
    if s == "False":
        return 0
    elif s == "True":
        return 1

Calculate how the data is distributed across the reduced states and the states without a person's situation which will be needed for the imputation.

In [5]:
def data_distribution(filename):
    """
    Function that calculates how many times each action was done in each reduced state
    
    Args: filename - the name of the file with the data.

    Returns: a dict containing the number of times each action was done in each reduced state,
             the mean reward of all end states,
             a dict which contains the end state rewards at each time step from 2 to 6.
    """
    
    # initialise an empty list for holding the rewards
    rewards = []
    
    # initialise an empty dict for holding the number of times an action was done in a state
    state_action_times = {}
    
    # initialise an empty dict for holding the end state rewards in each time step
    rewards_per_timestep = {}
    
    # initialise all time steps 2 - 6 to an empty list in the dict
    for i in range(2,7):
        rewards_per_timestep[i] = []
    
    # open the file
    with open(filename) as f:
        
        # read all lines in the file
        lines = f.readlines()
        
        # loop over the lines, skipping the header
        for line in lines[1:]:
            
            # split the line
            split = line.split("\",")
            
            # get the state before
            state_before = split[0][1:]
            
            # get the action
            action = split[1].split(",\"")[0]
            
            # get the state after
            state_after = split[1].split(",\"")[1]
            
            # get the reward
            reward = split[2]
            
            # process the reward by applying the reward function to it
            reward_processed = reward_function(reward)
            
            # split the state_after
            split2 = state_after[1:-1].split(", ")
            
            # extract the data from the state after
            plans = int(split2[0])

            a1 = string_to_bool(split2[3])
            a2 = string_to_bool(split2[4])
            a3 = string_to_bool(split2[5])
            a4 = string_to_bool(split2[6])
            
            # calculate the time step which this state corresponds to by looking at how many actions have been done
            time_step = plans + a1 + a2 + a3 + a4
            
            # if the original reward was a list (so if it was a reward at the end of the dialogue)
            if "[" in reward:
                
                # then add it to the list of rewards
                rewards.append(reward_processed)
                
                # and add it to the list of rewards for this corresponding time step
                rewards_per_timestep[time_step].append(reward_processed)
            
            # if there is no entry for this action being done in this state_before
            if (state_before, action) not in state_action_times:
                # then make an entry and record that the action was done once
                state_action_times[(state_before, action)] = 1
            # otherwise, increment the count by 1
            else:
                state_action_times[(state_before, action)] += 1
    
    # loop over the number of times each action was done in each state to reduce the state
    for (state, action), times in state_action_times.items():
        
        # split the state
        split = state[1:-1].split(", ")
        
        # keep only the confidence and perceived usefulness from the original state
        reduced_state = f"[{split[1]}, {split[2]}]"
        
        # if there is no entry for the reduced state and action in the state_action_times_reduced dict
        if (reduced_state, action) not in state_action_times_reduced:
            # then make an entry and record how many times the action was done so far
            state_action_times_reduced[(reduced_state, action)] = times
        # otherwise, increment the count by the number of times this current item in the dict was done
        else:
            state_action_times_reduced[(reduced_state, action)] += times
    
    # make sure that the global variable is changed
    global mean_reward_overall
    # set the mean reward overall to the mean of the rewards at the end of the dialogue
    mean_reward_overall = np.mean(rewards)
    
    # return the number of times each action was done in each reduced state, the mean reward overall, and the rewards per time step
    return state_action_times_reduced, mean_reward_overall, rewards_per_timestep

# call the function to make sure that the global variable is updated and that we have what we need for calling q2 later
state_action_times_reduced, mean_reward_overall, rewards_per_timestep = data_distribution("../../data/data_adjusted.csv")

# print the number of times each action was done in each reduced state
print(state_action_times_reduced)
{("['0', '0']", 'show_testimonials'): 27, ("['0', '0']", 'changes_to_plan'): 51, ("['0', '0']", 'explain_planning'): 25, ("['0', '0']", 'identify_barriers'): 22, ("['0', '0']", 'deal_with_barriers'): 23, ("['1', '0']", 'show_testimonials'): 11, ("['1', '0']", 'changes_to_plan'): 23, ("['1', '0']", 'identify_barriers'): 17, ("['1', '0']", 'deal_with_barriers'): 10, ("['1', '0']", 'explain_planning'): 14, ("['1', '1']", 'show_testimonials'): 41, ("['1', '1']", 'changes_to_plan'): 81, ("['1', '1']", 'identify_barriers'): 33, ("['1', '1']", 'deal_with_barriers'): 41, ("['1', '1']", 'explain_planning'): 39, ("['0', '1']", 'show_testimonials'): 11, ("['0', '1']", 'changes_to_plan'): 18, ("['0', '1']", 'explain_planning'): 8, ("['0', '1']", 'identify_barriers'): 13, ("['0', '1']", 'deal_with_barriers'): 11}

Define helper functions for adjusting states to only contain the specified features and nothing else.

In [6]:
def adjust_state(state, features):
    """
    Function to adjust a state to only include the features given.
    
    Args: state - the state to adjust as a string,
          features - the list of features to include in the state.

    Returns: the adjusted state.
    """
    # split the state
    split = state.split(", ")
    
    # extract the confidence, perceived usefulness, and attitude from the state
    c = split[1]
    pu = split[2]
    

    # start building the new state
    state = "["
    
    # check what features are provided and add them to the new state
    if "confidence" in features:
        state += f"{c}, "
    if "perceived_usefulness" in features:
        state += f"{pu}, "
    
    # close the state
    state += "]"
    
    # correction for the situations where confidence or perceived usefulness are the last feature to be added, 
    # in which case the state would end with a ", ]"
    if ", ]" in state:
        state = state[:-3] + "]"
    
    # return the new state
    return state
In [7]:
def adjust_states(original, new, features):
    """
    Function to adjust all states in a given file to only include the features given and save them to sa new file.
    
    Args: original - the name of the file with states to adjust,
          new - the name of the file to save the adjusted states to,
          features - the list of features to include in the state.

    Returns: none.
    """
    # open the original file in read mode
    with open(original, 'r', newline='') as input_file:
        # open the new file in write mode
        with open(new, 'w', newline='') as file:
            # initialise a csv writer
            writer = csv.writer(file)
            
            # write the header of the file
            writer.writerow(["state_before", "action", "state_after", "reward"])
            
            # loop over all lines in the original file, except the first one which is the header
            for line in input_file.readlines()[1:]:
                # remove the newline at the end of the line
                if "\n" in line:
                    line = line[:-1]
                
                # split the line
                split = line.split("\",")
                
                # extract the state before, action, state after, and reward from the line
                state_before = split[0][1:]

                action = split[1].split(",")[0]

                state_after = split[1].split(",\"")[1]

                split_2 = line.split("]")

                reward = split_2[2].split("\",")[1]
                
                # make corrections to the reward extracted previously,
                # based on what kind of reward we have - it can be just a 0 or a list of the form [..., ..., ..., ...]
                if "[" in reward:
                    # if the reward is a list, then we removed the ] previously, so add it back
                    reward = f"{reward[1:]}]"
                    # if the reward has a \r (carriage return) at the end, remove it
                elif "\r" in reward:
                    reward = reward[:-1]
                
                # create new states before and after which only include the specified features
                new_state_before = adjust_state(state_before, features)
                new_state_after = adjust_state(state_after, features)
                
                # write the new row in the new file
                writer.writerow([f"{new_state_before}", f"{action}", f"{new_state_after}", f"{reward}"])

Define a helper function which removed the file adjusted.csv which is created for the reduced states.

In [8]:
def remove_new_file():
    """
    Function to remove the newly created file called "adjusted.csv".
    
    Args: none.

    Returns: none.
    """
    
    # set the name of the file
    name = "adjusted.csv"
    # if it exists
    if os.path.exists(name):
        # remove it
        os.remove(name)

Define a helper function for calculating the transition dict.

In [9]:
def calculate_transitions(data):
    """
    Function to calculate the transitions.
    
    Args: data - the data as a list of lines.

    Returns: the transition dict.
    """
    
    # initialise an empty transition dict
    transition_dict = {}

    # initialise a dict that will hold the number of times an action was done in a state
    state_action = {}
    
    # loop over the lines in the data
    for line in data:
        
        # split the line
        split = line[:-1].split("\",")
        # get the state before
        state_before = f"{split[0]}\""
        # get the action
        action = split[1].split(",\"")[0]
        # get the state after
        state_after = "\"" + split[1].split(",\"")[1].split("\",")[0] + "\""
        
        # split the state before
        split_1 = state_before[1:-1].split(", ")
        # adjust the state before to only include confidence and perceived usefulness
        adjusted_state_before = f"[{split_1[1]}, {split_1[2]}]"
        
        # split the state after
        split_2 = state_after[1:-1].split(", ")
        # adjust the state after to only include confidence and perceived usefulness
        adjusted_state_after = f"[{split_2[1]}, {split_2[2]}]"
        
        # if there is no entry for the adjusted state before - action - adjusted state after triple in the transition dict
        if (adjusted_state_before, action, adjusted_state_after) not in transition_dict:
            # then create an entry and start the count at 1
            transition_dict[(adjusted_state_before, action, adjusted_state_after)] = 1
        # otherwise, increment the count by 1
        else:
            transition_dict[(adjusted_state_before, action, adjusted_state_after)] += 1
        
        # if there is no entry for the adjusted state before - action pair in the state action dict
        if (adjusted_state_before, action) not in state_action:
            # then create an entry and start the count at 1
            state_action[(adjusted_state_before, action)] = 1
        # otherwise, increment the count by 1
        else:
            state_action[(adjusted_state_before, action)] += 1
    
    # create an empty dict for the final transition probabilities
    transition_dict_final = {}
    
    # loop over the initial transition dict
    for (state_before, action, state_after), count in transition_dict.items():
        # create an entry in the final dict by dividing the number of times the action lead to the state after
        # by number of time the action was taken in the state before
        transition_dict_final[(state_before, action, state_after)] = count / state_action[(state_before, action)]
    
    # return the final transition dict
    return transition_dict_final

Define a helper function for selecting the states at the beginning and end of the dialogue.

In [10]:
def get_start_and_end_states():
    """
    Function to help easily get the start and end states.
    
    Args: none - implicitly uses the data_adjusted.csv as the file for the data.

    Returns: the starting states, the rewards in end states, the end states without the person's situation, and all the end states.
    """
    
    # initialise 2 lists for holding the start and end states
    start_states = []
    all_end_states = []
    
    # initialise 2 dicts for holding the rewards of all end states and 
    # the rewards of all end states without the person's situation (same rewards, just states formatted differently)
    end_states_rewards = {}
    end_states_no_features_rewards = {}
    
    # open the file
    with open("../../data/data_adjusted.csv") as f:
        
        # read all lines
        lines = f.readlines()

        # initialise 2 variables which indicate if we started processing a person and if we finished processing a person
        got_start = False
        got_end = False
        
        # loop over all lines except the header
        for line in lines[1:]:
            # if we didn't start processing a person
            if not got_start:
                # then check if its reward is just a 0
                if line[-2] == "0":
                    # if it is, the split it
                    split = line.split("\",")
                    # get the state before
                    start_state = split[0][1:]
                    # add it to the list of start states
                    start_states.append(start_state)
                    
                    # set got_start to true so we don't look at any other samples from this person until the last one
                    got_start = True
                    # also set got_end to false to indicate that we haven't yet found this person's last sample
                    got_end = False
                    
            # if we didn't finish processing a person
            if not got_end:
                # check if the line has a " before the endline (this means it is a list enclosen in quotes, so it is the last sample)
                if line[-2] == "\"":
                    # split the line
                    split = line.split(",\"[")
                    # get the state after
                    end_state = f"[{split[1][:-1]}"
                    # get the reward
                    reward = f"[{split[2][:-2]}"
                    
                    # if there is no entry for this end state in end_states__rewards
                    if end_state not in end_states_rewards:
                        # then create a new list with the reward of this state as the first item
                        end_states_rewards[end_state] = [reward_function(reward)]
                    # otherwise, append the reward to the list
                    else:
                        end_states_rewards[end_state].append(reward_function(reward))
                    
                    # add the end state to the list of end states
                    all_end_states.append(end_state)
                    
                    # split the end state
                    split = end_state[1:-1].split(", ")
                    
                    # extract the data from it
                    plans = int(split[0])
                    a1 = split[3]
                    a2 = split[4]
                    a3 = split[5]
                    a4 = split[6]
                    
                    # adjust the end state to only include the actions done
                    end_state = f"[{plans}, {a1}, {a2}, {a3}, {a4}]"
                    
                    # if there is no entry for this adjusted end state in end_states_no_features_rewards
                    if end_state not in end_states_no_features_rewards:
                        # then create a new list with the reward of this state as the first item
                        end_states_no_features_rewards[end_state] = [reward_function(reward)]
                    # otherwise, append the reward to the list
                    else:
                        end_states_no_features_rewards[end_state].append(reward_function(reward))
                    # set got_end to True and got_start to False, so we know that the next sample is a start sample that we should save
                    got_end = True
                    got_start = False
        
        # calculate the means of all rewards for the end states and the end states without the person's situation
        end_states_rewards = {k: np.mean(v) for k, v in end_states_rewards.items()}
        end_states_no_features_rewards = {k: np.mean(v) for k, v in end_states_no_features_rewards.items()}
        
        # return the start states, the mean rewards of the end states, the mean rewards of the 
        # end states without the person's situation, and all the end states
        return start_states, end_states_rewards, end_states_no_features_rewards, all_end_states

Analysis Q2, Figure 4.2¶

In Q2 we are investigating how well the states at the end of the dialogue can predict the rewards at the end of the dialogue. Using leave-one-out crossvalidation, leaving out samples, we are comparing two ways of predicting these rewards, first by predicting that they are equal to the mean reward overall, and second by predicting that they are equal to the mean reward of all the states which are the same as the left out sample. We compute L1 errors (absolute difference) for each of the two approaches between the predicted value and the value of the left out sample.

In [11]:
def q2(filename, state_action_times_reduced, mean_reward_overall):
    """
    Function to make the computations necessary to answer Q2.
    
    Args: filename - the name of the file containing the data,
          state_action_times_reduced - a dict containing the number of times each action was done in each reduced state,
          mean_reward_overall - a float representing the mean reward of all end states.

    Returns: none, but displays a plot.
    """
    
    # define the name of the new file as adjusted.csv
    new_file = "adjusted.csv"
    
    # create a new file with the states adjusted to only include the given featues and nothing else
    adjust_states(filename, new_file, features=["confidence", "perceived_usefulness"])
    
    # read the new file into a dataframe
    df = pd.read_csv(new_file)
    
    # keep only end states
    df = df[df['reward'] != "0"]
        
    # apply the reward function to all rewards in the dataframe
    df['reward'] = df['reward'].apply(lambda x: reward_function(x))

    # initialise a dict that will hold the rewards in each state
    reward_per_state = {}
    
    # initialise dicts which will hold the L1 errors for each state and for each of the two approaches
    L1_errors_1 = {}

    L1_errors_2 = {}
    
    # initialise the list of possible states
    X = []
    
    # loop over 2 values for confidence
    for c in [0, 1]:
        # loop over 2 values for perceived usefulness
        for pu in [0, 1]:
            # generate the possible state
            state = f"['{c}', '{pu}']"
            # add it to the list of possible states
            X.append(state)
            # initialise empty lists in the other dicts under the key of this state
            reward_per_state[state] = []
            L1_errors_1[state] = []
            L1_errors_2[state] = []

    # loop over the dataframe
    for index, row in df.iterrows():
        
        # remove the current index to do leave-one-out crossvalidation
        df = df.drop(index)
        
        # get the data of the left-out state
        left_out_state_before = row['state_before']
        left_out_action = row['action']
        left_out_state = row['state_after']
        left_out_reward = row['reward']
        
        # compute the L1 error 
        L1_error_1 = abs(mean_reward_overall - left_out_reward)
        
        # get the states which are the same as the left out state
        df_current_same_states = df[df['state_after'] == left_out_state]
        
        # if there are states which are the same as this one
        if len(df_current_same_states) != 0:
            # then set the predicted reward to be the mean of the rewards of the states which are the same
            predicted_reward_2 = df_current_same_states['reward'].mean()
        else:
            # otherwise, predict that the reward is 0
            predicted_reward_2 = 0

        # check how many times the left out action was done in the left out state
        state_action_time = state_action_times_reduced[(left_out_state_before, left_out_action)]
        
        # if that is less than 25, impute the remainder up to 25 with the mean reward
        # e.g. if the action was done 20 times, then the final reward is 20/25 * the current reard + 5/25 * the mean reward overall
        # do this for both the predicted reward and the left out reward
        if state_action_time < 25:
            adjusted_reward = (state_action_time / 25) * predicted_reward_2 + \
                              (((25 - state_action_time) / 25) * mean_reward_overall)
            adjusted_left_out_reward = (state_action_time / 25) * left_out_reward + \
                                       (((25 - state_action_time) / 25) * mean_reward_overall)
        # otherwise, if there are at least 25 samples
        else:
            # then keep the predicted reward and the left out reward as they are
            adjusted_reward = predicted_reward_2
            adjusted_left_out_reward = left_out_reward
        
        # calculate the L1 error for the second approach, using the non-adjusted left out reward
        L1_error_2 = abs(adjusted_reward - left_out_reward)
        
        # if there is no entry for the left out state in reward_per_state
        if left_out_state not in reward_per_state:
            # then create a new list containing the adjusted left out reward
            reward_per_state[left_out_state] = [adjusted_left_out_reward]
            # and create new lists for the L1 errors in the corresponding dicts
            L1_errors_1[left_out_state] = [L1_error_1]
            L1_errors_2[left_out_state] = [L1_error_2]
        # otherwise, there is already an entry
        else:
            # so append the adjusted left out reward
            reward_per_state[left_out_state].append(adjusted_left_out_reward)
            # and also append the L1 errors
            L1_errors_1[left_out_state].append(L1_error_1)
            L1_errors_2[left_out_state].append(L1_error_2)
    
    # initialise an empty list that will hold the mean rewards per state
    rewards_per_state = []
    
    # add the mean reward of each state in the list
    for k, v in reward_per_state.items():
        rewards_per_state.append(np.mean(v))
    
    # initialise empty dicts for the mean L1 errors of each approach
    mean_errors_per_state_1 = {}

    mean_errors_per_state_2 = {}
    
    # add the mean of all L1 errors of each state to the dicts
    for k, v in L1_errors_1.items():
        mean_errors_per_state_1[k] = np.mean(v)

    for k, v in L1_errors_2.items():
        mean_errors_per_state_2[k] = np.mean(v)
    
    # initialise two empty lists which will hold the error bar intervals which display the 95% credibility intervals
    yerr_1 = []
    yerr_2 = []
    
    # loop over the states and their lists of L1 errors
    for k, v in L1_errors_1.items():
        
        # get the L1 errors as a list
        data = list(v)
        
        # compute the 95% credibility interval of the L1 errors
        mean, variance, std = scipy.stats.bayes_mvs(data, 0.95)
        
        # get the endpoints of the interval
        interval = mean[1]
        lowest = interval[0]
        highest = interval[1]
        
        # get the length of the interval
        diff = abs(highest - lowest)

        # add half of the length of the error bar to the list, since it will display this length upwards and
        # downwards from the mean, thereby displaying the entire length of the interval
        yerr_1.append(diff / 2)
    
    # same for the L1 errors of the second approach
    for k, v in L1_errors_2.items():
        data = list(v)

        mean, variance, std = scipy.stats.bayes_mvs(data, 0.95)

        interval = mean[1]
        lowest = interval[0]
        highest = interval[1]

        diff = abs(highest - lowest)

        yerr_2.append(diff / 2)
    
    # plot the results
    plt.rcParams['hatch.linewidth'] = 1
    plt.rcParams['hatch.color'] = "black"
    plt.rcParams['font.size'] = 15
    plt.rcParams["figure.figsize"] = (15,7)
    
    X_axis = np.arange(len(X))

    fig, ax1 = plt.subplots()

    color = 'tab:gray'
    ax1.set_xlabel('States', fontsize=15)
    ax1.set_ylabel('Mean L1 error', color="#0086b3", fontsize=15)
    ax1.bar(X_axis - 0.1, mean_errors_per_state_1.values(), 0.1,
            label='mean L1 error per state - predicting reward based on all states',
            color="#caf0f8", yerr=yerr_1, hatch="/")
    ax1.bar(X_axis + 0.1, mean_errors_per_state_2.values(), 0.1,
            label='mean L1 error per state - predicting reward based on same states',
            color="#0086b3", yerr=yerr_2, hatch="\\")
    ax1.tick_params(axis='y', labelcolor=color)

    ax2 = ax1.twinx()

    color = 'tab:purple'
    ax2.set_ylabel('Mean Reward', color=color)
    ax2.plot(np.array(rewards_per_state), label="mean reward per state", color="purple", linestyle="dashed")
    ax2.plot(np.repeat(mean_reward_overall, len(X)), label="mean reward overall", color="green")
    ax2.tick_params(axis='y')

    # set the reward axis to have the maximum and minimum possbile rewards as endpoints
    ax2.set_ylim([-6.67, 10])

    for label in ax1.xaxis.get_majorticklabels():
        label.set_fontsize(15)

    for label in ax1.yaxis.get_majorticklabels():
        label.set_fontsize(15)


    plt.xticks(X_axis, X)

    ax1.legend(loc='center right', bbox_to_anchor=(1, 1.06))
    ax2.legend(loc='center left', bbox_to_anchor=(0, 1.06))
    
    plt.show()
    
    plt.close()
    
    remove_new_file()
    
q2("../../data/data_adjusted.csv", state_action_times_reduced, mean_reward_overall)
print("States are formatted as [confidence, perceived usefulness]")
States are formatted as [confidence, perceived usefulness]

Analysis Q3, Figure 4.3¶

In Q3, we are investigating how states can predict next states. Using leave-one-out crossvalidation, leaving out people this time (since samples from the same person are not independent), we are comparing three ways of predicting the next state: using the transition function learned from the data, using an equal probability for each next state (so 25% for 4 possible next states), and predicting that the state stays the same.

In [12]:
def q3(filename):
    """
    Function to make the computations necessary to answer Q3.
    
    Args: filename - the name of the file containing the data.

    Returns: none, but displays a plot.
    """
    
    # need to know what the end states are since we want to exclude people instead of samples 
    # when doing leave-one-out crossvalidation
    start_states, end_states, end_states_no_features, all_end_states = get_start_and_end_states()
    
    # initialise an empty list which will hold all the possible states
    X = []
    
    # open the file
    with open(filename) as f:
        # read all lines
        lines = f.readlines()
        
        # initialise an empty list which will hold the start and end indices of each person
        indices = []
        
        # initialise the start index to 0
        start_index = 0
        
        # loop over all the lines except the header
        for i, line in enumerate(lines[1:]):
            
            # split the line
            split = line.split(",\"[")
            # get the end state
            end_state = f"[{split[1][:-1]}"
            
            # check if the end state is in the list of all end states, but starting from the length of the indices list
            # to avoid adding states by mistake (because they could be the same)
            if end_state in all_end_states[len(indices):]:
                # if it is, then the end index is the current one (i)
                end_index = i
                # add the previous start index and the newly found end index to the list of indices
                indices.append((start_index, end_index))
                # and the next start index is at i+1
                start_index = i + 1
        
        # initialise two dicts which will hold the probabilities of each approach
        approach_1 = {}

        approach_3 = {}
        
        # loop over 2 values for confidence
        for c in [0, 1]:
            # loop over 2 values of perceived usefulness
            for pu in [0, 1]:
                # create the possible state
                state = f"['{c}', '{pu}']"
                # add it to the list of all possible states
                X.append(state)
                # initialise empty lists for this possible state for each approach
                approach_1[state] = []
                approach_3[state] = []
        
        # loop over the start and end indices
        for (start_index, end_index) in indices:
            
            # loop over the lines of the current person, leaving out one line at a time
            for left_out_line in lines[start_index + 1:end_index + 2]:
                
                # split the left out line
                split = left_out_line[:-1].split("\",")
                # get the state before
                left_out_state_before = f"{split[0]}\""
                # get the action
                left_out_action = split[1].split(",\"")[0]
                # get the state after
                left_out_state_after = "\"" + split[1].split(",\"")[1].split("\",")[0] + "\""
                
                # split the state before
                split_1 = left_out_state_before[1:-1].split(", ")
                # adjust it to include only confidence and perceived usefulness
                adjusted_left_out_state_before = f"[{split_1[1]}, {split_1[2]}]"
                
                # do the same for the state after
                split_2 = left_out_state_after[1:-1].split(", ")
                adjusted_left_out_state_after = f"[{split_2[1]}, {split_2[2]}]"
                
                # make a copy of all the lines
                lines_copy = lines.copy()
                
                # delete this person's samples from the copied list of lines
                del lines_copy[start_index + 1:end_index + 2]
                
                # also delete the header
                del lines_copy[0]
                
                # calculate the transitions from the remaining samples
                transition_dict = calculate_transitions(lines_copy)

                # approach 1: using the transition dictionary
                
                # if there is an entry for the adjusted state before - action - adjusted state after in the transition dict
                if (adjusted_left_out_state_before, left_out_action, adjusted_left_out_state_after) in transition_dict:
                    # and if there is no entry for the adjusted state before in approach_1
                    if adjusted_left_out_state_before not in approach_1:
                        # then create a new list with the probability of transitioning to the adjusted state after 
                        # by taking the action in the adjusted state before
                        approach_1[adjusted_left_out_state_before] = [transition_dict[(
                            adjusted_left_out_state_before, left_out_action, adjusted_left_out_state_after)]]
                    # if there is already an entry, it means the list exists
                    else:
                        # so append the same probability to the list
                        approach_1[adjusted_left_out_state_before].append(transition_dict[(
                            adjusted_left_out_state_before, left_out_action, adjusted_left_out_state_after)])
                # if there is no entry in the transition dict, then the probability is 0
                else:
                    # if there is no entry for the adjusted state before in approach_1 
                    if adjusted_left_out_state_before not in approach_1:
                        # then create a new list with the probability of 0 in it
                        approach_1[adjusted_left_out_state_before] = [0]
                    # otherwise, the list exists, so append the probability of 0 to it
                    else:
                        approach_1[adjusted_left_out_state_before].append(0)

                # approach 3: same state
                
                # if the adjusted state before is the same as the adjusted state after, 
                # then we need to count that the state remained the same (adding a 1 to the dict for this state)
                if adjusted_left_out_state_before == adjusted_left_out_state_after:
                    # if there is no entry for the adjusted state before in approach_3
                    if adjusted_left_out_state_before not in approach_3:
                        # then create a new list with a 1 in it
                        approach_3[adjusted_left_out_state_before] = [1]
                    # otherwise, the list exists, so append a 1 to it
                    else:
                        approach_3[adjusted_left_out_state_before].append(1)
                # if the adjusted state before is not the same,
                # then we need to count that the state changed (adding a 0 to the dict for this state)
                else:
                    # if there is no entry for the adjusted state before in approach_3
                    if adjusted_left_out_state_before not in approach_3:
                        # then create a new list with a 0 in it
                        approach_3[adjusted_left_out_state_before] = [0]
                    # otherwise, the list exists, so append a 0 to it
                    else:
                        approach_3[adjusted_left_out_state_before].append(0)
        
        # initialise an empty dict which will hold the mean probabilities of being correct for each of the reduced states in approach 1
        mean_probabilities_per_state_1 = {}
        
        # add the mean probabilities in the dict
        for k, v in approach_1.items():
            mean_probabilities_per_state_1[k] = np.mean(v)
        
        # do the same for approach 3
        mean_per_state_3 = {}

        for k, v in approach_3.items():
            mean_per_state_3[k] = np.mean(v)
        
        # initialise two empty lists which will hold the error bar intervals which display the 95% credibility intervals
        yerr_1 = []
        yerr_3 = []
        
        # loop over the probabilities of approach 1
        for k, v in approach_1.items():
            # get the probabilities as a list
            data = list(v)
            
            # if there is at least one probability for this state
            if len(data) > 1:
                
                # then compute the 95% credibility interval
                mean, variance, std = scipy.stats.bayes_mvs(data, 0.95)
                
                # get the endpoints of the interval
                interval = mean[1]
                lowest = interval[0]
                highest = interval[1]
                
                # get the length of the interval
                diff = abs(highest - lowest)
                
                # add half of the length of the error bar to the list, since it will display this length upwards and
                # downwards from the mean, thereby displaying the entire length of the interval
                yerr_1.append(diff / 2)
            # otherwise, there is no probability for this state, so set the probability to 0
            else:
                yerr_1.append(0)
        
        # do the same for the third approach
        for k, v in approach_3.items():
            data = list(v)

            if len(data) > 1:

                mean, variance, std = scipy.stats.bayes_mvs(data, 0.95)

                interval = mean[1]
                lowest = interval[0]
                highest = interval[1]

                diff = abs(highest - lowest)

                yerr_3.append(diff / 2)

            else:
                yerr_3.append(0)
        
        # the mean of the second approach is always 1/4 since it is predicting that the next state is a random one
        mean_approach_2 = 1 / 4
        
        # plot the results
                
        X_axis = np.arange(len(X))

        plt.rcParams['hatch.linewidth'] = 1
        plt.rcParams['hatch.color'] = "black"
        plt.rcParams['font.size'] = 15
        plt.rcParams["figure.figsize"] = (15,7)

        plt.bar(X_axis - 0.1, mean_probabilities_per_state_1.values(), 0.1, label='Transition function',
                color="#caf0f8", yerr=yerr_1, hatch="/")
        plt.bar(X_axis, mean_per_state_3.values(), 0.1, label='Same state', yerr=yerr_3, color="#48cae4",
                hatch="\\")
        plt.bar(X_axis + 0.1, np.repeat(mean_approach_2, len(X)), 0.1, label='Equally likely next state',
                color="#0086b3", hatch=".")

        plt.xticks(X_axis, X)
        plt.xlabel("States")
        plt.ylabel("Mean probability of next state")
        plt.legend(loc='center right', bbox_to_anchor=(1, 1.11))
        plt.tight_layout()
        plt.show()
        
        plt.close()
        
q3("../../data/data_adjusted.csv")
print("States are formatted as [confidence, perceived usefulness]")
States are formatted as [confidence, perceived usefulness]