Author: Martin Dierikx
Date: 25-09-2023
Required files: data/preprocessed_pre_screening_data.csv and data/preprocessed_demographic_data.csv
Output files: none
This file contains the code to create the results for the demographics of the participants of Table 4.1.
import numpy as np
We gathered some demographic data through a pre-screening questionnaire.
# Initialize variables to store the demographic data
never_exercise_count = 0
steps_per_day = []
samsung_health_app = 0
iphone_health_app = 0
smartwatch = 0
other_apps = []
contemplating = 0
# Read the pre-processed pre-screening data
for d in open("../data/preprocessed_pre_screening_data.csv"):
data = d.split(",")
# Skip the header cells
if data[0] != 'ID':
# Count the people that are inactive
if data[1].__contains__('Never'):
never_exercise_count += 1
# Keep track of the average steps per day of people
steps_per_day.append(int(data[2]))
# Count the different ways of tracking steps
if data[3].__contains__('Samsung'):
samsung_health_app += 1
elif data[3].__contains__('iPhone'):
iphone_health_app += 1
elif data[3].__contains__('Smartwatch'):
smartwatch += 1
# List the other ways of tracking steps people inputted
if data[4] != '':
other_apps.append(data[4])
# Count the people that are contemplating to become more physically active
if data[5].__contains__('6 months'):
contemplating += 1
# Print all the data for the table
print(f"A total of {len(steps_per_day)} people participated in the study")
print(f"Less than 60 minutes of exercise is done by {never_exercise_count} people, which is {int(round(never_exercise_count / 117 * 100, 0))}%")
print(f"Between 60 and 150 minutes of exercise is done by {117 - never_exercise_count} people, which is {int(round((117 - never_exercise_count) / 117 * 100, 0))}%")
print(f"The average number of steps per day of people was {int(round(np.mean(steps_per_day), 0))}, with standard deviation of {int(round(np.std(steps_per_day), 0))}, within the range of {min(steps_per_day)} and {max(steps_per_day)}")
print(f"Samsung health app is used by {samsung_health_app} people, which is {int(round(samsung_health_app / 117 * 100, 0))}%")
print(f"iPhone health app is used by {iphone_health_app} people, which is {int(round(iphone_health_app / 117 * 100, 0))}%")
print(f"Smartwatch is used by {smartwatch} people, which is {int(round(smartwatch / 117 * 100, 0))}%")
print(f"Other ways are used by {117 - samsung_health_app - iphone_health_app - smartwatch} people, which is {int(round((117 - samsung_health_app - iphone_health_app - smartwatch) / 117 * 100, 0))}%")
print(f"The other ways mentioned by people are {other_apps}")
print(f"Contemplating to become more physically active was done by {contemplating} people, which is {int(round(contemplating / 117 * 100, 0))}%")
print(f"Preparing to become more physically active was done by {117 - contemplating} people, which is {int(round((117 - contemplating) / 117 * 100, 0))}%")
We also gathered some demographic data from Prolific.
# Intialize the variables to store the demographic data
men = 0
women = 0
age = []
# Read the data from the pre-processed Prolific data file
for d in open("../data/preprocessed_demographic_data.csv"):
data = d.split(",")
# Skip the header cells
if data[0] != 'ID':
# Count the number of males and females
if data[2].__contains__('Man'):
men += 1
elif data[2].__contains__('Woman'):
women += 1
# Keep track of the age of people
age.append(int(data[5].strip('\n')))
print(f"{women} people were females, which is {int(round(women / 117 * 100, 0))}%")
print(f"{men} people were males, which is {int(round(men / 117 * 100, 0))}%")
print(f"{117 - men - women} people were other, which is {int(round((117 - men - women) / 117 * 100, 0))}%")
print(f"The average age of people was {int(round(np.mean(age), 0))}, with standard deviation of {int(round(np.std(age), 0))}, within the range of {min(age)} and {max(age)}")