Author: Martin Dierikx, Nele Albers
Date: January 2024
Required files: data/preprocessed_pre_screening_data.csv, data/preprocessed_demographic_data.csv, data/preprocessed_post_questionnaire_data.csv, and data/preprocessed_database_data.csv
Output files: none
This file contains the code to create the results and values reported in the paper. It also creates the figures 2a, 2b, 3, 4, 5a, 5b, and 6.
import math
from matplotlib import pyplot as plt
import numpy as np
import scipy.stats as sc
from sklearn import metrics
We gathered some demographic data through a pre-screening questionnaire.
# Initialize variables to store the demographic data
never_exercise_count = 0
steps_per_day = []
samsung_health_app = 0
iphone_health_app = 0
smartwatch = 0
other_apps = []
contemplating = 0
# Read the pre-processed pre-screening data
for d in open("../data/preprocessed_pre_screening_data.csv"):
data = d.split(",")
# Skip the header cells
if data[0] != 'ID':
# Count the people that are inactive
if data[1].__contains__('Never'):
never_exercise_count += 1
# Keep track of the average steps per day of people
steps_per_day.append(int(data[2]))
# Count the different ways of tracking steps
if data[3].__contains__('Samsung'):
samsung_health_app += 1
elif data[3].__contains__('iPhone'):
iphone_health_app += 1
elif data[3].__contains__('Smartwatch'):
smartwatch += 1
# List the other ways of tracking steps people inputted
if data[4] != '':
other_apps.append(data[4])
# Count the people that are contemplating to become more physically active
if data[5].__contains__('6 months'):
contemplating += 1
# Print all the data for the table
print(f"A total of {len(steps_per_day)} people participated in the study")
print(f"The average number of steps per day of people was {int(round(np.mean(steps_per_day), 0))}, with standard deviation of {int(round(np.std(steps_per_day), 0))}, within the range of {min(steps_per_day)} and {max(steps_per_day)}")
print(f"Samsung health app is used by {samsung_health_app} people, which is {int(round(samsung_health_app / 117 * 100, 0))}%")
print(f"iPhone health app is used by {iphone_health_app} people, which is {int(round(iphone_health_app / 117 * 100, 0))}%")
print(f"Smartwatch is used by {smartwatch} people, which is {int(round(smartwatch / 117 * 100, 0))}%")
print(f"Other ways are used by {117 - samsung_health_app - iphone_health_app - smartwatch} people, which is {int(round((117 - samsung_health_app - iphone_health_app - smartwatch) / 117 * 100, 0))}%")
print(f"The other ways mentioned by people are {other_apps}")
A total of 117 people participated in the study The average number of steps per day of people was 4402, with standard deviation of 2383, within the range of 30 and 9000 Samsung health app is used by 36 people, which is 31% iPhone health app is used by 38 people, which is 32% Smartwatch is used by 28 people, which is 24% Other ways are used by 15 people, which is 13% The other ways mentioned by people are ['Huawei Health App', 'Huawei Health', 'Huawei Health ', 'Via pokemon go', 'Sweatcoin', 'map runner', 'Google Fit', 'sweetcoin', 'huawei health app', 'Stepcounter app on my phone', 'Google Fit', 'Ito Pedometer', ' Huawei Health app', 'Huawei health app', 'MStep app']
We also gathered some demographic data from Prolific.
# Intialize the variables to store the demographic data
men = 0
women = 0
age = []
# Read the data from the pre-processed Prolific data file
for d in open("../data/preprocessed_demographic_data.csv"):
data = d.split(",")
# Skip the header cells
if data[0] != 'ID':
# Count the number of males and females
if data[2].__contains__('Man'):
men += 1
elif data[2].__contains__('Woman'):
women += 1
# Keep track of the age of people
age.append(int(data[5].strip('\n')))
print(f"{women} people were females, which is {int(round(women / 117 * 100, 0))}%")
print(f"{men} people were males, which is {int(round(men / 117 * 100, 0))}%")
print(f"{117 - men - women} people were other, which is {int(round((117 - men - women) / 117 * 100, 0))}%")
print(f"The average age of people was {int(round(np.mean(age), 0))}, with standard deviation of {int(round(np.std(age), 0))}, within the range of {min(age)} and {max(age)}")
60 people were females, which is 51% 55 people were males, which is 47% 2 people were other, which is 2% The average age of people was 28, with standard deviation of 8, within the range of 18 and 56
We calculate the ASA-score of our virtual coach and the average number of steps on the last day of the intervention using the data from the post-questionnaire.
# Store the scores from the participants
p_ids = []
average_steps_final_day = []
ASAQ = [[] for i in range(24)]
goal_difficulty_list = []
for d in open("../data/preprocessed_post_questionnaire_data.csv"):
data = d.split(",")
if data[0] != 'ID':
# Extract data on the average number of steps taken by people on the final day of the intervention
p_ids.append(data[0].strip("\""))
average_steps_final_day.append(int(data[1]))
# Extract the scores for each of the 24 items of the short version of the ASAQ
ASAQ[0].append(int(data[4].split(" (")[0]))
ASAQ[1].append(int(data[5].split(" (")[0]))
ASAQ[2].append(int(data[6].split(" (")[0]))
ASAQ[3].append(int(data[7].split(" (")[0]))
ASAQ[4].append(int(data[8].split(" (")[0]))
ASAQ[5].append(int(data[9].split(" (")[0]))
ASAQ[6].append(int(data[10].split(" (")[0]))
ASAQ[7].append(int(data[11].split(" (")[0]))
ASAQ[8].append(int(data[12].split(" (")[0]))
ASAQ[9].append(int(data[13].split(" (")[0]))
ASAQ[10].append(int(data[14].split(" (")[0]))
ASAQ[11].append(int(data[15].split(" (")[0]))
ASAQ[12].append(int(data[16].split(" (")[0]))
ASAQ[13].append(int(data[17].split(" (")[0]))
ASAQ[14].append(int(data[18].split(" (")[0]))
ASAQ[15].append(int(data[19].split(" (")[0]))
ASAQ[16].append(int(data[20].split(" (")[0]))
ASAQ[17].append(int(data[21].split(" (")[0]))
ASAQ[18].append(int(data[22].split(" (")[0]))
ASAQ[19].append(int(data[23].split(" (")[0]))
ASAQ[20].append(int(data[24].split(" (")[0]))
ASAQ[21].append(int(data[25].split(" (")[0]))
ASAQ[22].append(int(data[26].split(" (")[0]))
ASAQ[23].append(int(data[27].split(" (")[0]))
# Keep track of the goal difficulty data
if data[30] != '' and data[30] != 'Goal_difficulty':
goal_difficulty = data[30].split(" (")[0]
goal_difficulty_list.append(int(goal_difficulty))
# Calculate the mean values for all the ASAQ items
ASAQ_means = [np.mean(ASAQ[i]) for i in range(24)]
# Account for reversed scoring questions
ASAQ_means[11] = ASAQ_means[11] * -1
ASAQ_means[16] = ASAQ_means[16] * -1
ASAQ_means[17] = ASAQ_means[17] * -1
ASAQ_means[21] = ASAQ_means[21] * -1
print(f"A total of {len(ASAQ[0])} people completed the post questionnaire")
print(f"The average number of steps taken on the final day by people who completed the full study was {round(np.mean(average_steps_final_day))} with standard deviation of {round(np.std(average_steps_final_day))}")
print(f"The ASA score = {sum(ASAQ_means)}")
print(f"The mean goal difficulty is {np.mean(goal_difficulty_list)} with standard deviation of {np.std(goal_difficulty_list)}.")
A total of 75 people completed the post questionnaire The average number of steps taken on the final day by people who completed the full study was 5367 with standard deviation of 3353 The ASA score = 19.32 The mean goal difficulty is 2.0434782608695654 with standard deviation of 2.0602151471207955.
# The number of values for each feature with format [motivation, self-efficacy, context state]
state_sizes = [2, 2, 2]
# Convert mood into a number between 0-10 to be used in the context state
possible_moods = ['glad', 'happy', 'pleased', 'delighted', 'serene', 'content', 'satisfied', 'relaxed', 'calm',
'excited', 'astonished', 'aroused', 'sleepy', 'neutral', 'tired', 'tense', 'alarmed', 'afraid',
'droopy', 'bored', 'angry', 'annoyed', 'frustrated', 'distressed', 'depressed', 'sad', 'gloomy',
'miserable']
mood_lookup = [10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 7, 7, 5, 5, 5, 5, 5, 4, 3, 3, 3, 3, 2, 1, 1, 1, 1, 0]
# The discount factor and epsilon used in the value iteration
discount = 0.85
epsilon = 0.01
# List all the possible states and actions
possible_actions = ['dec', 'sdec', 'nothing', 'sinc', 'inc']
possible_states = []
for sm in range(state_sizes[0]):
for se in range(state_sizes[1]):
for cs in range(state_sizes[2]):
possible_states.append(f"[{sm}, {se}, {cs}]")
def calculateReward(steps_taken, recommended_goal, action, reward_function_to_use):
"""
Function to calculate the reward for a sample.
Args:
steps_taken: number of steps taken in the sample.
recommended_goal: recommended goal to achieve in the sample.
action: reinforcement learning action taken in the sample.
reward_function_to_use: whether to penalize overachieving (0) or not (1)
Returns: calculated reward belonging to the sample.
"""
diff = abs(recommended_goal - steps_taken)
# If the goal was achieved, it should be less penalized for the difference in steps
if recommended_goal < steps_taken:
diff = 0.5 * diff
# The standard reward function
if reward_function_to_use == 0:
reward = 1 - diff / recommended_goal
return reward
# Remove the penalty for overachieving
else:
if recommended_goal < steps_taken:
reward = 1
else:
reward = 1 - diff / recommended_goal
return reward
def read_data(index_to_leave_out, reward_function_to_use):
"""
Method to read the sample data from the csv file and put it into variables.
If index_to_leave_out is specified, the samples with that pid will be retrieved as left-out samples for analysis.
Args:
index_to_leave_out: if specified, the the samples with that pid will be retrieved as left-out samples for analysis.
reward_function_to_use: number indicating which reward function to use for the calculation of the rewards:
- 0: default reward,
- 1: do not penalize overachieving
Returns:
- a list of states for every sample,
- a list of actions for every sample,
- a list of next states for every sample,
- a list of rewards for every sample,
- a table with the mean rewards for each state-action pair,
- a list with states for every sample of the (possible) left-out person,
- a list with actions for every sample of the (possible) left-out person,
- a list with rewards for every sample of the (possible) left-out person,
- a list with next states for every sample of the (possible) left-out person,
- a lookup list with the transformed values (0-1) for every plain motivation value (0-10),
- a lookup list with the transformed values (0-1) for every plain self-efficacy value (0-10),
- a lookup list with the transformed values (0-1) for every plain context state value (0-30),
- a list with session_numbers for every sample,
- a list with the number of rejected goal proposals per sample,
- a list with the actual goal that was set for each sample,
- a list with the average steps taken before the intervention by people who completed the whole study
"""
# Lists for all motivation, self-efficacy, and context state values
sm_list = []
se_list = []
cs_list = []
# Lists for the other information on the data samples
full_states = []
transformed_states = []
actions = []
full_next_states = []
transformed_next_states = []
rewards = []
steps_taken = []
recommended_goals = []
session_numbers = []
number_of_rejected_proposals = []
actual_goals = []
average_steps_before_intervention = []
# Lists for the information on the left-out samples
loocv_datapoints = []
loocv_actions = []
loocv_rewards = []
loocv_next_states = []
# Read the data from the file
for d in open("../data/preprocessed_database_data.csv"):
data = d.split(",")
# To ignore the header cells
if data[0] != "ID":
# Check if this data point needs to be left out for analysis
if data[0] == "p" + str(index_to_leave_out):
# State data
loocv_datapoints.append([int(data[1]), int(data[2]), int(data[3]), int(data[4]), mood_lookup[possible_moods.index(data[5])]])
sm_list.append(int(data[1]))
se_list.append(int(data[2]))
cs_list.append(int(data[3]) + int(data[4]) + mood_lookup[possible_moods.index(data[5])])
# Action data
loocv_actions.append(possible_actions.index(data[6]))
# Reward data
prev_activity = data[13].split(";")
prev_steps = []
for steps in prev_activity:
prev_steps.append(int(steps))
prev_steps.sort()
recommended_goal = max(2000, min(10000, int(math.ceil(np.percentile(prev_steps, 60) / 100.0)) * 100))
loocv_rewards.append(calculateReward(int(data[14]), recommended_goal, possible_actions.index(data[6]), reward_function_to_use))
# Next state data
loocv_next_states.append([int(data[7]), int(data[8]), int(data[9]), int(data[10]), mood_lookup[possible_moods.index(data[11])]])
else:
# State data
self_motivation = int(data[1])
sm_list.append(self_motivation)
self_efficacy = int(data[2])
se_list.append(self_efficacy)
rest = int(data[3])
available_time = int(data[4])
mood = mood_lookup[possible_moods.index(data[5])]
cs_list.append(rest + available_time + mood)
full_states.append([self_motivation, self_efficacy, rest, available_time, mood])
# Action data
action = possible_actions.index(data[6])
actions.append(action)
# Reward data
prev_activity = data[13].split(";")
prev_steps = []
for steps in prev_activity:
prev_steps.append(int(steps))
prev_steps.sort()
recommended_goal = max(2000, min(10000, int(math.ceil(np.percentile(prev_steps, 60) / 100.0)) * 100))
initial_reward = calculateReward(int(data[14]), recommended_goal, action, reward_function_to_use)
rewards.append(initial_reward)
steps_taken.append(int(data[14]))
recommended_goals.append(recommended_goal)
if int(data[20].strip('\n')) == 1 and data[0].strip("\"") in p_ids:
average_steps_before_intervention.append(np.mean(prev_steps))
# Next state data
self_motivation_next = int(data[7])
self_efficacy_next = int(data[8])
rest_next = int(data[9])
available_time_next = int(data[10])
mood_next = mood_lookup[possible_moods.index(data[11])]
full_next_states.append([self_motivation_next, self_efficacy_next, rest_next, available_time_next, mood_next])
session_numbers.append(int(data[20].strip('\n')))
number_of_rejected_proposals.append(int(data[18]))
actual_goals.append(int(data[17]))
# Get the transformations of the motivation
sm_lookup = []
sm_precentiles = []
for i in range(11):
percentile = sc.percentileofscore(sm_list, i, 'weak')
sm_precentiles.append(percentile)
# Transform into a 1 if the value is in the upper 50% of the data
if percentile > 50:
sm_lookup.append(1)
else:
sm_lookup.append(0)
# Get the transformations of the self-efficacy
se_lookup = []
se_precentiles = []
for i in range(11):
percentile = sc.percentileofscore(se_list, i, 'weak')
se_precentiles.append(percentile)
# Transform into a 1 if the value is in the upper 50% of the data
if percentile > 50:
se_lookup.append(1)
else:
se_lookup.append(0)
# Get the transformations of the context state
cs_lookup = []
cs_precentiles = []
for i in range(31):
percentile = sc.percentileofscore(cs_list, i, 'weak')
cs_precentiles.append(percentile)
# Transform into a 1 if the value is in the upper 50% of the data
if percentile > 50:
cs_lookup.append(1)
else:
cs_lookup.append(0)
# Get the mean rewards for each state-action pair
mean_rewards = [[0 for i in range(len(possible_actions))] for j in range(state_sizes[2] * state_sizes[1] * state_sizes[0])]
state_action_pair_counter = [[0 for i in range(len(possible_actions))] for j in range(state_sizes[2] * state_sizes[1] * state_sizes[0])]
for i in range(len(rewards)):
# Transform the state
transformed_sm = sm_lookup[full_states[i][0]]
transformed_se = se_lookup[full_states[i][1]]
transformed_cs = cs_lookup[full_states[i][2] + full_states[i][3] + full_states[i][4]]
transformed_states.append([transformed_sm, transformed_se, transformed_cs])
# Make a unique number from the state features to use it as index
state_as_number = transformed_sm * state_sizes[1] * state_sizes[2] + transformed_se * state_sizes[2] + transformed_cs
# Transform the next state
transformed_next_sm = sm_lookup[full_next_states[i][0]]
transformed_next_se = se_lookup[full_next_states[i][1]]
transformed_next_cs = cs_lookup[full_next_states[i][2] + full_next_states[i][3] + full_next_states[i][4]]
transformed_next_states.append([transformed_next_sm, transformed_next_se, transformed_next_cs])
# Keep track of the mean reward R(s,a)
reward = mean_rewards[state_as_number][actions[i]]
state_action_pair_encounters = state_action_pair_counter[state_as_number][actions[i]]
if state_action_pair_encounters == 0:
# The first encounter for this state-action pair, so just use the sample reward as mean reward
mean_rewards[state_as_number][actions[i]] = rewards[i]
state_action_pair_counter[state_as_number][actions[i]] = 1
else:
# Update the previously saved mean reward
total_reward = reward * state_action_pair_encounters + rewards[i]
state_action_pair_counter[state_as_number][actions[i]] += 1
mean_rewards[state_as_number][actions[i]] = total_reward / (state_action_pair_encounters + 1)
# Return the data
return transformed_states, actions, transformed_next_states, rewards, mean_rewards, loocv_datapoints, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken
We visualized the distribution of the number of rejected proposals per conversation session.
# Extract the sample data
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(-1, 0)
# Print the number of rejections per session
print(f"The total number of samples is {len(states)}.")
print(f"In total {sum(number_of_rejected_proposals)} proposed goals were rejected.")
print(f"On average {np.mean(number_of_rejected_proposals)} proposed goals were rejected per session with standard deviation of {np.std(number_of_rejected_proposals)}.")
The total number of samples is 381. In total 100 proposed goals were rejected. On average 0.26246719160104987 proposed goals were rejected per session with standard deviation of 0.6714134439398249.
If this is lower than the target sample size divided by the number of state-action pairs (380 / 40 = 10), we wanted to balance the lack of samples out by weighing in some mean reward or transition.
# Extract the sample data
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(-1, 0)
# Count how many times each state-action pair occurs
sample_count = [[0 for i in range(len(possible_actions))] for j in range(len(possible_states))]
state_to_number_lookup = [state_sizes[1] * state_sizes[2], state_sizes[2], 1]
for i in range(len(states)):
state_as_number = state_to_number_lookup[0] * states[i][0] + state_to_number_lookup[1] * states[i][1] + states[i][2]
sample_count[state_as_number][actions[i]] += 1
def calculateTransitions(states, actions, next_states):
"""
Method to calculate the transition function from the data.
Args:
states: list with the states from all the samples in the data.
actions: list with the actions from all the samples in the data.
next_states: list with the next states from all the samples in the data.
Returns: three-dimensional list containing a list of probabilities per next state for each state-action pair.
"""
# Every state-action pair has a list with probabilities for each next state
transitions = [[[] for i in range(len(possible_actions))] for j in range(len(possible_states))]
# Get the next state for each datapoint and sort them by state-action pair
for i in range(len(states)):
state_as_number = state_to_number_lookup[0] * states[i][0] + state_to_number_lookup[1] * states[i][1] + state_to_number_lookup[2] * states[i][2]
action_as_number = actions[i]
transitions[state_as_number][action_as_number].append(next_states[i])
# Calculate the probability of each next state happening for each state-action pair
for i in range(len(possible_states)):
for j in range(len(possible_actions)):
next_states_probs = [0 for l in range(len(possible_states))]
number_of_next_states = len(transitions[i][j])
for k in range(number_of_next_states):
next_state = transitions[i][j][k]
next_state_as_number = state_to_number_lookup[0] * next_state[0] + state_to_number_lookup[1] * next_state[1] + state_to_number_lookup[2] * next_state[2]
if next_states_probs[next_state_as_number] == 0:
# Count how many of the transitions of a state-action pair are towards a specific next state
number_of_occurrences_of_next_state = transitions[i][j].count(next_state)
next_states_probs[next_state_as_number] = number_of_occurrences_of_next_state / number_of_next_states
transitions[i][j] = next_states_probs
# Balance the transition probabilities for the state-action pairs with too little data
for i in range(len(possible_states)):
for j in range(len(possible_actions)):
if sample_count[i][j] < 10:
for k in range(len(possible_states)):
transitions[i][j][k] = (sample_count[i][j] / 10) * transitions[i][j][k] + ((10 - sample_count[i][j]) / 10) * (1 / len(possible_states))
return transitions
def calculateQValues(mean_rewards, transitions):
"""
Method to calculate the q-values using value iteration.
Args:
mean_rewards: list with mean rewards for each state-action pair.
transitions: list containing the probabilities for each transition for each state-action pair.
Returns: list containing the q-values for each state-action pair.
"""
delta = 10
q_values = [[0 for i in range(len(possible_actions))] for j in range(len(possible_states))]
# Continue for as long as there is a significant change in the q-table after an iteration
while delta > epsilon:
delta = 0
next_qvalues = [[0 for i in range(len(possible_actions))] for j in range(len(possible_states))]
for i in range(len(possible_states)):
for j in range(len(possible_actions)):
future_reward = 0
# Calculate the future rewards for each next state
for k in range(len(possible_states)):
if transitions[i][j][k] != 0:
best_future_reward = 0
# Use the future action that gives the highest q-value
for l in range(len(possible_actions)):
best_future_reward = max(best_future_reward, q_values[k][l])
future_reward += transitions[i][j][k] * best_future_reward
# Calculate the new q-values and calculate the change in values
next_qvalues[i][j] = mean_rewards[i][j] + discount * future_reward
delta = max(delta, abs(next_qvalues[i][j] - q_values[i][j]))
# Update the q-values
q_values = next_qvalues.copy()
return q_values
We predicted people's reward using two approaches: basing it on the samples with only the same action and basing it on samples with both the same action and state.
After that, we did the same thing but then removing the penalty for overachieving from the reward function.
print("Analysis Question 1:")
# Keep track of the prediction error per state when predicting based on only actions or both actions and states
total_prediction_error_actions_per_state = [[] for i in range(len(possible_states))]
total_prediction_error_actions_states_per_state = [[] for i in range(len(possible_states))]
# Leave one person out and predict for the samples of that person
for iteration in range(118):
# Extract the sample data with one left-out person
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(iteration, 0)
# Calculate prediction errors for every sample of the left-out person
for j in range(len(loocv_states)):
total_reward_action = 0
total_reward_action_state = 0
action_counter = 0
action_state_counter = 0
loocv_state_as_number = state_to_number_lookup[0] * sm_lookup[loocv_states[j][0]] + state_to_number_lookup[1] * se_lookup[loocv_states[j][1]] + cs_lookup[loocv_states[j][2] + loocv_states[j][3] + loocv_states[j][4]]
for i in range(len(actions)):
if actions[i] == loocv_actions[j]:
# Sum the rewards to base the prediction on if the action is the same
total_reward_action += rewards[i]
action_counter += 1
if state_to_number_lookup[0] * states[i][0] + state_to_number_lookup[1] * states[i][1] + states[i][2] == loocv_state_as_number:
# Sum the rewards to base the prediction on if the action and the state are the same
total_reward_action_state += rewards[i]
action_state_counter += 1
# Balance the total reward if the number of samples for a certain state-action pair is too little
number_of_samples_for_action_state = sample_count[loocv_state_as_number][loocv_actions[j]]
if number_of_samples_for_action_state < 10:
total_reward_action += (10 - number_of_samples_for_action_state) * np.mean(rewards)
total_reward_action_state += (10 - number_of_samples_for_action_state) * np.mean(rewards)
action_counter += 10 - number_of_samples_for_action_state
action_state_counter += 10 - number_of_samples_for_action_state
# Predict the reward for the sample and calculate the prediction error
average_reward_action_state = total_reward_action_state / action_state_counter
average_reward_action = total_reward_action / action_counter
l1_error_action = abs(loocv_rewards[j] - average_reward_action)
l1_error_action_and_state = abs(loocv_rewards[j] - average_reward_action_state)
total_prediction_error_actions_per_state[loocv_state_as_number].append(l1_error_action)
total_prediction_error_actions_states_per_state[loocv_state_as_number].append(l1_error_action_and_state)
average_prediction_error_actions_per_state_graph = []
average_prediction_error_actions_states_per_state_graph = []
# Lists for calculating the credibility intervals for the prediction error
means1 = []
means2 = []
for i in range(len(possible_states)):
# Calculate the average prediction error per state
average_prediction_error_actions_per_state_graph.append(sum(total_prediction_error_actions_per_state[i]) / len(total_prediction_error_actions_per_state[i]))
average_prediction_error_actions_states_per_state_graph.append(sum(total_prediction_error_actions_states_per_state[i]) / len(total_prediction_error_actions_states_per_state[i]))
# Calculate the 95% credibility intervals
mean, variance, std = sc.bayes_mvs(total_prediction_error_actions_per_state[i], 0.95)
means1.append(abs(mean[1][0] - mean[1][1]) * 0.5)
mean, variance, std = sc.bayes_mvs(total_prediction_error_actions_states_per_state[i], 0.95)
means2.append(abs(mean[1][0] - mean[1][1]) * 0.5)
# Extract the sample data without leaving a person out
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(-1, 0)
# Calculate the mean reward per state after balancing the state-action pairs with too little data
# Also, count the number of imputed samples
mean_rewards_per_state = []
number_of_imputed_samples = 0
for i in range(len(possible_states)):
total_reward_per_state = 0
for j in range(len(possible_actions)):
if sample_count[i][j] < 10:
total_reward_per_state += (sample_count[i][j] / 10) * mean_rewards[i][j] + ((10 - sample_count[i][j]) / 10) * np.mean(rewards)
number_of_imputed_samples += 10 - sample_count[i][j]
else:
total_reward_per_state += mean_rewards[i][j]
total_reward_per_state = total_reward_per_state / len(possible_actions)
mean_rewards_per_state.append(total_reward_per_state)
# Print the mean reward per state
print(f"The mean reward for state {possible_states[i]} is {total_reward_per_state}")
# Print the number of imputed data samples
print(f"In total, {number_of_imputed_samples} samples were imputed, which is {round(number_of_imputed_samples / (381 + number_of_imputed_samples) * 100, 2)}% of the entire dataset.")
# Calculate the overall mean reward
average_reward_total = [np.mean(rewards) for k in range(len(possible_states))]
# Plot the prediction errors
plt.subplots(figsize=(10, 6))
x1 = [i for i in range(len(possible_states))]
x2 = [i + 0.4 for i in x1]
x3 = [i + 0.2 for i in x1]
plt.bar(x1, average_prediction_error_actions_per_state_graph, width=0.4, label='action', color='lightblue', yerr=means1)
plt.bar(x2, average_prediction_error_actions_states_per_state_graph, width=0.4, label='action\n& state', color='orange', hatch='/', yerr=means2, edgecolor='white')
plt.xticks(x3, possible_states)
plt.subplots_adjust(bottom=0.2)
plt.ylim(0.00, 0.35)
plt.legend(bbox_to_anchor=(1.215, 0.66))
plt.xlabel("State [motivation, self-efficacy, context state]", fontsize = 18)
plt.ylabel("Mean prediction error", fontsize = 18)
# Plot the mean rewards
axes2 = plt.twinx()
axes2.plot(x3, mean_rewards_per_state, label='per state', color='grey')
axes2.plot(x3, average_reward_total, label='overall', color='black', linestyle='dashed')
axes2.set_ylabel("Mean reward", fontsize = 18)
axes2.set_ylim(0.0, 1.0)
plt.legend(bbox_to_anchor=(1.23, 0.5))
plt.show()
print("Analysis Question 1 without penalty for overachieving:")
# Keep track of the prediction error per state when predicting based on only actions or both actions and states
total_prediction_error_actions_per_state = [[] for i in range(len(possible_states))]
total_prediction_error_actions_states_per_state = [[] for i in range(len(possible_states))]
# Leave one person out and predict for the samples of that person
for iteration in range(118):
# Extract the sample data with one left-out person
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(iteration, 1)
# Calculate prediction errors for every sample of the left-out person
for j in range(len(loocv_states)):
total_reward_action = 0
total_reward_action_state = 0
action_counter = 0
action_state_counter = 0
loocv_state_as_number = state_to_number_lookup[0] * sm_lookup[loocv_states[j][0]] + state_to_number_lookup[1] * se_lookup[loocv_states[j][1]] + cs_lookup[loocv_states[j][2] + loocv_states[j][3] + loocv_states[j][4]]
for i in range(len(actions)):
if actions[i] == loocv_actions[j]:
# Sum the rewards to base the prediction on if the action is the same
total_reward_action += rewards[i]
action_counter += 1
if state_to_number_lookup[0] * states[i][0] + state_to_number_lookup[1] * states[i][1] + states[i][2] == loocv_state_as_number:
# Sum the rewards to base the prediction on if the action and the state are the same
total_reward_action_state += rewards[i]
action_state_counter += 1
# Balance the total reward if the number of samples for a certain state-action pair is too little
number_of_samples_for_action_state = sample_count[loocv_state_as_number][loocv_actions[j]]
if number_of_samples_for_action_state < 10:
total_reward_action += (10 - number_of_samples_for_action_state) * np.mean(rewards)
total_reward_action_state += (10 - number_of_samples_for_action_state) * np.mean(rewards)
action_counter += 10 - number_of_samples_for_action_state
action_state_counter += 10 - number_of_samples_for_action_state
# Predict the reward for the sample and calculate the prediction error
average_reward_action_state = total_reward_action_state / action_state_counter
average_reward_action = total_reward_action / action_counter
l1_error_action = abs(loocv_rewards[j] - average_reward_action)
l1_error_action_and_state = abs(loocv_rewards[j] - average_reward_action_state)
total_prediction_error_actions_per_state[loocv_state_as_number].append(l1_error_action)
total_prediction_error_actions_states_per_state[loocv_state_as_number].append(l1_error_action_and_state)
average_prediction_error_actions_per_state_graph = []
average_prediction_error_actions_states_per_state_graph = []
# Lists for calculating the credibility intervals for the prediction error
means1 = []
means2 = []
for i in range(len(possible_states)):
# Calculate the average prediction error per state
average_prediction_error_actions_per_state_graph.append(sum(total_prediction_error_actions_per_state[i]) / len(total_prediction_error_actions_per_state[i]))
average_prediction_error_actions_states_per_state_graph.append(sum(total_prediction_error_actions_states_per_state[i]) / len(total_prediction_error_actions_states_per_state[i]))
# Calculate the 95% credibility intervals
mean, variance, std = sc.bayes_mvs(total_prediction_error_actions_per_state[i], 0.95)
means1.append(abs(mean[1][0] - mean[1][1]) * 0.5)
mean, variance, std = sc.bayes_mvs(total_prediction_error_actions_states_per_state[i], 0.95)
means2.append(abs(mean[1][0] - mean[1][1]) * 0.5)
# Extract the sample data without leaving a person out
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(-1, 1)
# Calculate the mean reward per state after balancing the state-action pairs with too little data
mean_rewards_per_state = []
for i in range(len(possible_states)):
total_reward_per_state = 0
for j in range(len(possible_actions)):
if sample_count[i][j] < 10:
total_reward_per_state += (sample_count[i][j] / 10) * mean_rewards[i][j] + ((10 - sample_count[i][j]) / 10) * np.mean(rewards)
else:
total_reward_per_state += mean_rewards[i][j]
total_reward_per_state = total_reward_per_state / len(possible_actions)
mean_rewards_per_state.append(total_reward_per_state)
# Calculate the overall mean reward
average_reward_total = [np.mean(rewards) for k in range(len(possible_states))]
# Plot the prediction errors
plt.subplots(figsize=(10, 6))
x1 = [i for i in range(len(possible_states))]
x2 = [i + 0.4 for i in x1]
x3 = [i + 0.2 for i in x1]
plt.bar(x1, average_prediction_error_actions_per_state_graph, width=0.4, label='action', color='lightblue', yerr=means1)
plt.bar(x2, average_prediction_error_actions_states_per_state_graph, width=0.4, label='action and state', color='orange', hatch='/', yerr=means2, edgecolor='white')
plt.xticks(x3, possible_states)
plt.subplots_adjust(bottom=0.2)
plt.ylim(0.00, 0.35)
plt.xlabel("State [motivation, self-efficacy, context state]", fontsize = 18)
plt.ylabel("Mean prediction error", fontsize = 18)
# Plot the mean rewards
axes2 = plt.twinx()
axes2.plot(x3, mean_rewards_per_state, label='per state', color='grey')
axes2.plot(x3, average_reward_total, label='overall', color='black', linestyle='dashed')
axes2.set_ylabel("Mean reward", fontsize = 18)
axes2.set_ylim(0.0, 1.0)
plt.show()
Analysis Question 1: The mean reward for state [0, 0, 0] is 0.7649474262833345 The mean reward for state [0, 0, 1] is 0.7333409723983719 The mean reward for state [0, 1, 0] is 0.7569768989638951 The mean reward for state [0, 1, 1] is 0.7981019682777097 The mean reward for state [1, 0, 0] is 0.7744406481841228 The mean reward for state [1, 0, 1] is 0.7572524593028683 The mean reward for state [1, 1, 0] is 0.7951508293425871 The mean reward for state [1, 1, 1] is 0.7340725553004372 In total, 125 samples were imputed, which is 24.7% of the entire dataset.
Analysis Question 1 without penalty for overachieving:
We visualized the amount of people that perform more or less than their recommended goal. Also the amount they go over or under is displayed.
# Keep track of number of people over and underachieving in each state
over_achieving = [0, 0, 0, 0, 0, 0, 0, 0]
exact_achieving = [0, 0, 0, 0, 0, 0, 0, 0]
under_achieving = [0, 0, 0, 0, 0, 0, 0, 0]
goals_achieved = 0
# Keep track of how much is done too much or too little
steps_over = [[] for i in range(8)]
steps_under = [[] for i in range(8)]
# Extract the sample data
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(-1, 0)
for i in range(len(states)):
if recommended_goals[i] - steps_taken[i] < 0:
# Count the overachieving per state
over_achieving[states[i][0] * state_sizes[1] * state_sizes[2] + states[i][1] * state_sizes[2] + states[i][2]] += 1
steps_over[states[i][0] * state_sizes[1] * state_sizes[2] + states[i][1] * state_sizes[2] + states[i][2]].append((steps_taken[i] - recommended_goals[i]) / recommended_goals[i])
elif recommended_goals[i] - steps_taken[i] > 0:
# Count the underachieving per state
under_achieving[states[i][0] * state_sizes[1] * state_sizes[2] + states[i][1] * state_sizes[2] + states[i][2]] += 1
steps_under[states[i][0] * state_sizes[1] * state_sizes[2] + states[i][1] * state_sizes[2] + states[i][2]].append((recommended_goals[i] - steps_taken[i]) / recommended_goals[i])
else:
# Count the exact achieving per state
exact_achieving[states[i][0] * state_sizes[1] * state_sizes[2] + states[i][1] * state_sizes[2] + states[i][2]] += 1
if steps_taken[i] >= actual_goals[i]:
goals_achieved += 1
print(f"The total number of samples with an achieved goal was {goals_achieved} which is {round(goals_achieved / 381 * 100)}%")
print(f"The average steps per day before the intervention of people who completed the full study was {round(np.mean(average_steps_before_intervention))} with standard deviation of {round(np.std(average_steps_before_intervention))}")
# Plot the over- and underachieving
print("The amount of over- and underachieving was as follows:")
fig, ax = plt.subplots(figsize=(10, 6))
x = [i for i in range(8)]
plt.plot(x, [100 * over_achieving[i] / (over_achieving[i] + under_achieving[i] + exact_achieving[i]) for i in range(8)], label='overachieving', color='lightblue')
plt.plot(x, [100 * under_achieving[i] / (over_achieving[i] + under_achieving[i] + exact_achieving[i]) for i in range(8)], label='underachieving', color='orange')
plt.xticks(x, ['[0,0,0]', '[0,0,1]', '[0,1,0]', '[0,1,1]', '[1,0,0]', '[1,0,1]', '[1,1,0]', '[1,1,1]'])
plt.subplots_adjust(bottom=0.2)
plt.legend(loc='upper left', title="Percentage of total samples")
plt.xlabel("State [motivation, self-efficacy, context state]", fontsize = 18)
plt.ylabel("Percentage of total samples", fontsize = 18)
ax.set_ylim(0, 100)
axes2 = plt.twinx()
axes2.plot(x, [100 * np.mean(steps_over[i]) for i in range(8)], label='overachieving', color='lightblue', linestyle='dashed')
axes2.plot(x, [100 * np.mean(steps_under[i]) for i in range(8)], label='underachieving', color='orange', linestyle='dashed')
axes2.set_ylabel("Percentage of recommended goal", fontsize = 18)
axes2.set_ylim(0, 100)
plt.legend(loc = "upper right", title="Percentage of recommended goal")
plt.show()
The total number of samples with an achieved goal was 251 which is 66% The average steps per day before the intervention of people who completed the full study was 4549 with standard deviation of 4350 The amount of over- and underachieving was as follows:
We calculated the likelihood for a next state based on three approaches: equal probability for each next state, predict that people stay in the same state, and predict transitions using the calculated transition function.
Running the code will give a warning which comes from calculating the credibility interval for an empty list, namely for predicting that people stay in the same state for state [1,0,0]. This is not a problem and will still generate the correct results.
print("Analysis Question 2:")
# Keep track of the likelihood of the different approaches for predicting the next state
total_likelihood_transition_function_per_state = [[] for i in range(len(possible_states))]
total_likelihood_same_state_per_state = [[] for i in range(len(possible_states))]
likelihood_counter_per_state = [0 for i in range(len(possible_states))]
# Leave one person out and predict for the samples of that person
for iteration in range(118):
# Extract the data
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(iteration, 0)
# Calculate the transition likelihood for each of the samples of the left-out person
for j in range(len(loocv_states)):
# Convert the left-out sample's state and next state into a number to use as index
loocv_state_as_number = state_to_number_lookup[0] * sm_lookup[loocv_states[j][0]] + state_to_number_lookup[1] * se_lookup[loocv_states[j][1]] + cs_lookup[loocv_states[j][2] + loocv_states[j][3] + loocv_states[j][4]]
loocv_next_state_as_number = state_to_number_lookup[0] * sm_lookup[loocv_next_states[j][0]] + state_to_number_lookup[1] * se_lookup[loocv_next_states[j][1]] + cs_lookup[loocv_next_states[j][2] + loocv_next_states[j][3] + loocv_next_states[j][4]]
# Calculate the transition function
transitions = calculateTransitions(states, actions, next_states)
# Get the likelihood of the next state according to the transition function
likelihood_for_loocv_next_state = transitions[loocv_state_as_number][loocv_actions[j]][loocv_next_state_as_number]
total_likelihood_transition_function_per_state[loocv_state_as_number].append(likelihood_for_loocv_next_state)
# Get the likelihood of the next state when assuming to stay in the same state
if loocv_state_as_number == loocv_next_state_as_number:
total_likelihood_same_state_per_state[loocv_state_as_number].append(1)
else:
total_likelihood_same_state_per_state[loocv_state_as_number].append(0)
# Keep track of the amount of predictions per state
likelihood_counter_per_state[loocv_state_as_number] += 1
average_likelihood_equal_prob_per_state_graph = []
average_likelihood_same_state_per_state_graph = []
average_likelihood_transition_function_per_state_graph = []
# Lists for calculating the credibility intervals for the prediction error
means1 = []
means2 = []
# Calculate the mean likelihood per state using the three approaches
for i in range(len(possible_states)):
average_likelihood_equal_prob_per_state_graph.append(1 / len(possible_states))
average_likelihood_same_state_per_state_graph.append(sum(total_likelihood_same_state_per_state[i]) / likelihood_counter_per_state[i])
average_likelihood_transition_function_per_state_graph.append(sum(total_likelihood_transition_function_per_state[i]) / likelihood_counter_per_state[i])
# Calculate the 95% credibility intervals
mean, variance, std = sc.bayes_mvs(total_likelihood_same_state_per_state[i], 0.95)
means1.append(abs(mean[1][0] - mean[1][1]) * 0.5)
mean, variance, std = sc.bayes_mvs(total_likelihood_transition_function_per_state[i], 0.95)
means2.append(abs(mean[1][0] - mean[1][1]) * 0.5)
# Plot the results
plt.subplots(figsize=(10, 6))
x1 = [i for i in range(len(possible_states))]
x2 = [i + 0.3 for i in x1]
x3 = [i + 0.3 for i in x2]
plt.bar(x1, average_likelihood_equal_prob_per_state_graph, width=0.3, label='equal probability', color='lightblue')
plt.bar(x2, average_likelihood_same_state_per_state_graph, width=0.3, label='same next state', color='orange', hatch='/', yerr=means1, edgecolor='white')
plt.bar(x3, average_likelihood_transition_function_per_state_graph, width=0.3, label='transition function', color='grey', hatch='-', yerr=means2, edgecolor='white')
plt.xticks(x2, possible_states)
plt.xlabel("State [motivation, self-efficacy, context state]", fontsize = 18)
plt.ylabel("Mean likelihood of next state", fontsize = 18)
plt.subplots_adjust(bottom=0.2)
plt.legend(loc='upper center')
plt.show()
Analysis Question 2:
/opt/conda/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:2241: RuntimeWarning: invalid value encountered in multiply lower_bound = _a * scale + loc /opt/conda/lib/python3.10/site-packages/scipy/stats/_distn_infrastructure.py:2242: RuntimeWarning: invalid value encountered in multiply upper_bound = _b * scale + loc
We simulated people over time and see how they transition according to the optimal policy.
Finally, we simulated people again, but this time following the worst policy.
print("Analysis Question 3:")
# Extract the data
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(-1, 0)
# Balance the mean rewards if there was too little data in a state-action pair
for i in range(len(possible_states)):
for j in range(len(possible_actions)):
if sample_count[i][j] < 10:
mean_rewards[i][j] = (sample_count[i][j] / 10) * mean_rewards[i][j] + ((10 - sample_count[i][j]) / 10) * np.mean(rewards)
# Calculate transitions
transitions = calculateTransitions(states, actions, next_states)
# Calculate q-values
q_values = calculateQValues(mean_rewards, transitions)
# Initialize an even distribution amongst all the states
distribution = [1000 for i in range(len(possible_states))]
# Track the percentage of people in each state for certain moments in time
distribution_0 = [1/len(possible_states)*100 for i in range(len(possible_states))]
distribution_1 = [0 for i in range(len(possible_states))]
distribution_2 = [0 for i in range(len(possible_states))]
distribution_5 = [0 for i in range(len(possible_states))]
distribution_10 = [0 for i in range(len(possible_states))]
distribution_20 = [0 for i in range(len(possible_states))]
# Simulate 20 time steps of the optimal policy
for i in range(1, 21):
new_distribution = [0 for j in range(len(possible_states))]
for j in range(len(possible_states)):
# Follow the optimal policy
best_action = q_values[j].index(max(q_values[j]))
transitions_for_best_action = transitions[j][best_action]
# Redistribute people according to the transition function
for k in range(len(possible_states)):
if transitions_for_best_action[k] > 0:
new_distribution[k] += distribution[j] * transitions_for_best_action[k]
distribution = new_distribution.copy()
# Get the percentage of people in each state
graph_distribution = []
for j in range(len(new_distribution)):
graph_distribution.append(round(new_distribution[j] / (len(possible_states) * 1000), 3) * 100)
# Save the distribution for the specified timesteps
if i == 1:
distribution_1 = graph_distribution.copy()
elif i == 2:
distribution_2 = graph_distribution.copy()
elif i == 5:
distribution_5 = graph_distribution.copy()
elif i == 10:
distribution_10 = graph_distribution.copy()
elif i == 20:
distribution_20 = graph_distribution.copy()
print(f"The percentage of people in state 000 after 20 timesteps is {distribution_20[0]}%")
print(f"The percentage of people in state 111 after 20 timesteps is {distribution_20[7]}%")
# Plot the distribution of people over the states over time
plt.subplots(figsize=(10, 6))
x1 = [i for i in range(len(possible_states))]
x2 = [i + 0.15 for i in x1]
x3 = [i + 0.15 for i in x2]
x4 = [i + 0.15 for i in x3]
x5 = [i + 0.15 for i in x4]
x6 = [i + 0.15 for i in x5]
plt.bar(x1, distribution_0, width=0.15, label='0', color='lightgreen')
plt.bar(x2, distribution_1, width=0.15, label='1', color='mediumaquamarine')
plt.bar(x3, distribution_2, width=0.15, label='2', color='lightseagreen')
plt.bar(x4, distribution_5, width=0.15, label='5', color='cadetblue')
plt.bar(x5, distribution_10, width=0.15, label='10', color='teal')
plt.bar(x6, distribution_20, width=0.15, label='20', color='darkslategrey')
plt.xticks(x3, possible_states)
plt.subplots_adjust(bottom=0.2)
plt.ylim(0, 40)
plt.xlabel("State [motivation, self-efficacy, context state]", fontsize = 18)
plt.ylabel("Percentage of people", fontsize = 18)
plt.legend(bbox_to_anchor=(1.01, 0.65), title = "Timesteps")
plt.show()
print("Analysis Question 3 with worst policy:")
# Initialize an even distribution amongst all the states
distribution = [1000 for i in range(len(possible_states))]
# Track the percentage of people in each state for certain moments in time
distribution_0 = [1/len(possible_states)*100 for i in range(len(possible_states))]
distribution_1 = [0 for i in range(len(possible_states))]
distribution_2 = [0 for i in range(len(possible_states))]
distribution_5 = [0 for i in range(len(possible_states))]
distribution_10 = [0 for i in range(len(possible_states))]
distribution_20 = [0 for i in range(len(possible_states))]
# Simulate 20 time steps of the worst policy
for i in range(1, 21):
new_distribution = [0 for j in range(len(possible_states))]
for j in range(len(possible_states)):
# Follow the worst policy
worst_action = q_values[j].index(min(q_values[j]))
transitions_for_worst_action = transitions[j][worst_action]
# Redistribute people according to the transition function
for k in range(len(possible_states)):
if transitions_for_worst_action[k] > 0:
new_distribution[k] += distribution[j] * transitions_for_worst_action[k]
distribution = new_distribution.copy()
# Get the percentage of people in each state
graph_distribution = []
for j in range(len(new_distribution)):
graph_distribution.append(round(new_distribution[j] / (len(possible_states) * 1000), 3) * 100)
# Save the distribution for the specified timesteps
if i == 1:
distribution_1 = graph_distribution.copy()
elif i == 2:
distribution_2 = graph_distribution.copy()
elif i == 5:
distribution_5 = graph_distribution.copy()
elif i == 10:
distribution_10 = graph_distribution.copy()
elif i == 20:
distribution_20 = graph_distribution.copy()
# Plot the distribution of people over the states over time
plt.subplots(figsize=(10, 6))
x1 = [i for i in range(len(possible_states))]
x2 = [i + 0.15 for i in x1]
x3 = [i + 0.15 for i in x2]
x4 = [i + 0.15 for i in x3]
x5 = [i + 0.15 for i in x4]
x6 = [i + 0.15 for i in x5]
plt.bar(x1, distribution_0, width=0.15, label='0', color='lightgreen')
plt.bar(x2, distribution_1, width=0.15, label='1', color='mediumaquamarine')
plt.bar(x3, distribution_2, width=0.15, label='2', color='lightseagreen')
plt.bar(x4, distribution_5, width=0.15, label='5', color='cadetblue')
plt.bar(x5, distribution_10, width=0.15, label='10', color='teal')
plt.bar(x6, distribution_20, width=0.15, label='20', color='darkslategrey')
plt.xticks(x3, possible_states)
plt.subplots_adjust(bottom=0.2)
plt.ylim(0, 40)
plt.xlabel("State [motivation, self-efficacy, context state]", fontsize = 18)
plt.ylabel("Percentage of people", fontsize = 18)
plt.show()
Analysis Question 3: The percentage of people in state 000 after 20 timesteps is 15.7% The percentage of people in state 111 after 20 timesteps is 36.5%
Analysis Question 3 with worst policy:
We simulated people again over time following three different policies (optimal, average and worst) and kept track of the mean reward of all of them. The optimal policy uses the action with the highest q-value, the worst policy uses the action with the lowest q-value, and the average policy assumes that all actions are used the same number of times per timestep.
print("Analysis Question 4:")
# Extract the data
states, actions, next_states, rewards, mean_rewards, loocv_states, loocv_actions, loocv_rewards, loocv_next_states, sm_lookup, se_lookup, cs_lookup, session_numbers, number_of_rejected_proposals, actual_goals, average_steps_before_intervention, recommended_goals, steps_taken = read_data(-1, 0)
# Balance the mean rewards if there was too little data in a state-action pair
for i in range(len(possible_states)):
for j in range(len(possible_actions)):
if sample_count[i][j] < 10:
mean_rewards[i][j] = (sample_count[i][j] / 10) * mean_rewards[i][j] + ((10 - sample_count[i][j]) / 10) * np.mean(rewards)
# Calculate transitions
transitions = calculateTransitions(states, actions, next_states)
# Calculate Q-values
q_values = calculateQValues(mean_rewards, transitions)
# Initialize the distribution by using the actual states from the first session
distribution_opt = [0 for i in range(len(possible_states))]
number_of_samples = 0
for i in range(len(states)):
if session_numbers[i] == 1:
# Only use the samples of the first session to create the distribution
number_of_samples += 1
state_as_number = state_to_number_lookup[0] * states[i][0] + state_to_number_lookup[1] * states[i][1] + state_to_number_lookup[2] * states[i][2]
distribution_opt[state_as_number] += 1
distribution_avg = distribution_opt.copy()
distribution_bad = distribution_opt.copy()
mean_reward_per_transition_opt_graph = []
mean_reward_per_transition_avg_graph = []
mean_reward_per_transition_bad_graph = []
# For 100 time steps simulate 3 different policies and check the mean reward
for i in range(100):
new_distribution_opt = [0 for j in range(len(possible_states))]
new_distribution_avg = [0 for j in range(len(possible_states))]
new_distribution_bad = [0 for j in range(len(possible_states))]
total_reward_opt = 0
total_reward_avg = 0
total_reward_bad = 0
for j in range(len(possible_states)):
# Get the action following a policy
best_action = q_values[j].index(max(q_values[j]))
worst_action = q_values[j].index(min(q_values[j]))
# Get the total reward per transition following a policy
total_reward_opt += distribution_opt[j] * mean_rewards[j][best_action]
total_reward_avg += 0.2 * distribution_avg[j] * mean_rewards[j][0] + 0.2 * distribution_avg[j] * mean_rewards[j][1] + 0.2 * distribution_avg[j] * mean_rewards[j][2] + 0.2 * distribution_avg[j] * mean_rewards[j][3] + 0.2 * distribution_avg[j] * mean_rewards[j][4]
total_reward_bad += distribution_bad[j] * mean_rewards[j][worst_action]
# Simulate the transition following a policy
transitions_for_best_action = transitions[j][best_action]
transitions_for_avg_action = [0 for j in range(len(possible_states))]
for k in range(len(possible_states)):
# Calculate the transitions for the average policy
transitions_for_avg_action[k] = 0.2 * transitions[j][0][k] + 0.2 * transitions[j][1][k] + 0.2 * transitions[j][2][k] + 0.2 * transitions[j][3][k] + 0.2 * transitions[j][4][k]
transitions_for_avg_action[j] += 1 - sum(transitions_for_avg_action)
transitions_for_worst_action = transitions[j][worst_action]
# Simulate the transitions per policy
for k in range(len(possible_states)):
if transitions_for_best_action[k] > 0:
new_distribution_opt[k] += distribution_opt[j] * transitions_for_best_action[k]
for k in range(len(possible_states)):
if transitions_for_avg_action[k] > 0:
new_distribution_avg[k] += distribution_avg[j] * transitions_for_avg_action[k]
for k in range(len(possible_states)):
if transitions_for_worst_action[k] > 0:
new_distribution_bad[k] += distribution_bad[j] * transitions_for_worst_action[k]
# Update the distribution of people over the states
distribution_opt = new_distribution_opt.copy()
distribution_avg = new_distribution_avg.copy()
distribution_bad = new_distribution_bad.copy()
# Get the mean reward per transition following a policy for certain timesteps
if i in [0, 1, 2, 3, 4, 9, 19, 29, 49, 99]:
mean_reward_per_transition_opt_graph.append(total_reward_opt / sum(distribution_opt))
mean_reward_per_transition_avg_graph.append(total_reward_avg / sum(distribution_avg))
mean_reward_per_transition_bad_graph.append(total_reward_bad / sum(distribution_bad))
# Plot the results
plt.subplots(figsize=(10, 6))
x1 = [i for i in range(10)]
plt.plot(x1, mean_reward_per_transition_opt_graph, label='optimal policy', color='lightblue')
plt.plot(x1, mean_reward_per_transition_avg_graph, label='average policy', color='orange')
plt.plot(x1, mean_reward_per_transition_bad_graph, label='worst policy', color='grey')
plt.xticks(x1, [1, 2, 3, 4, 5, 10, 20, 30, 50, 100])
plt.subplots_adjust(bottom=0.2)
plt.ylim(0.0, 1.0)
plt.ylabel("Mean reward", fontsize = 18)
plt.xlabel("Timestep", fontsize = 18)
plt.legend(loc='center right')
plt.show()
# Calculate the percentile differences between the rewards from the policies after 100 time steps
difference_optimal_worst = mean_reward_per_transition_opt_graph[9] - mean_reward_per_transition_bad_graph[9]
difference_optimal_average = mean_reward_per_transition_opt_graph[9] - mean_reward_per_transition_avg_graph[9]
print(f"The optimal policy is {round(difference_optimal_worst, 2)} higher than the worst policy, which is {round((difference_optimal_worst / mean_reward_per_transition_bad_graph[9]) * 100, 2)}% better after 100 timesteps.")
print(f"The optimal policy is {round(difference_optimal_average, 2)} higher than the average policy, which is {round((difference_optimal_average / mean_reward_per_transition_avg_graph[9]) * 100, 2)}% better after 100 timesteps.")
Analysis Question 4:
The optimal policy is 0.11 higher than the worst policy, which is 15.76% better after 100 timesteps. The optimal policy is 0.06 higher than the average policy, which is 8.01% better after 100 timesteps.