-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
184 lines (154 loc) · 7.43 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# %%
'''
Created by Richard Felius
Main script for the GRIP HAP project.
- Run this script to process raw data
- Get activity scores for every 10 seconds / minute
- creates an excel with outcomes per person per measurement (day)
# TODO
'''
import logging
import pandas as pd
from src.utils import *
from src.characteristics import characteristics, characteristics_pain
from src.prepare_data import prepare_data, split_data, clean_data
from src.predict_data import predict_data
from src.barcode_plot import barcodeplot
import os
import matplotlib.pyplot as plt
# General project settings.
# More specific settings can be found in the config_file.
settings = {
'PAIN_SCORES': True,
'RESULTS_2HOURS': True,
'VERBOSE': True,
'VISUALISE': False,
'RAW_DATA_DIR': 'data/raw_data',
'CONFIG_DIR': 'config'
}
# %%
def main():
# Create folders and logging
initialise_project()
# Load config file and settings
config = load_config(settings, config_name="config_file.yaml")
subjects = os.listdir(settings['RAW_DATA_DIR'])
final_results = pd.DataFrame()
if settings['RESULTS_2HOURS']:
final_results_2hours = pd.DataFrame()
# Loop over subjecs
for subject in subjects:
if settings['VERBOSE']:
print(f'Analysing subject {subject}')
try:
if subject.endswith('.DS_Store'):
continue
if settings['PAIN_SCORES']:
# Load painscores
painscores = pd.read_csv(
f'data/raw_data/{subject}/pain_score/{subject}_pain_score.csv', index_col=0)
days = os.listdir(f"{settings['RAW_DATA_DIR']}/{subject}")
days.remove('pain_score')
# Loop over days
for day in days:
if day.endswith('.csv'):
continue
try:
if settings['VERBOSE']:
print(f'Analysing day {day}')
results = {}
file_path = os.listdir(
f"{settings['RAW_DATA_DIR']}/{subject}/{day}")[0]
file = f"{subject}/{day}/{file_path}"
file_name = f"{subject}_{day}"
# Load data, downsample and remove not worn
data_df, endtime, begintime = prepare_data(
file, config, settings)
data_df = split_data(
data_df, begintime, endtime, config)
# Drop first and last 30 seconds and drop not worn
data_df, not_worn_samples = clean_data(
data_df, config, all=True)
# Predict data based on a previously trained ML algorithm
data = predict_data(data_df, config, settings,
file_name, file, config['chunk_size'])
# visualise activities per chunk size
if settings['VISUALISE']:
barcodeplot(data, file, file_name)
results['subject'] = subject
results['day'] = day
results['Samples'] = len(data_df) + len(not_worn_samples)
results['not_worn_samples'] = len(not_worn_samples)
results['Name'] = file
results['begintime'] = begintime
results['endtime'] = endtime
results = characteristics(
results, data['activities [1min]'].values)
# Pain score data
if settings['PAIN_SCORES']:
try:
results = characteristics_pain(
painscores, results, day)
except Exception as e:
logging.error(f'pain: {subject} {day} {e}')
# data to dataframe
results_df = pd.DataFrame.from_dict(
results, orient='index').transpose()
for column in results_df.columns[7:]:
results_df[column] = pd.to_numeric(
results_df.loc[:, column]).round(3)
final_results = pd.concat((final_results, results_df))
# Predict data based on a previously trained ML algorithm
if settings['RESULTS_2HOURS']:
for key in data['time_2hours'].unique():
if key == '':
continue
tmp_data = data.loc[data['time_2hours'] == key]
results = {}
results['subject'] = subject
results['day'] = day
results['Samples'] = len(tmp_data)
# results['not_worn_samples'] = not_worn_samples
results['Name'] = file
results['begintime'] = begintime
results['endtime'] = endtime
results['key'] = key
if settings['PAIN_SCORES']:
try:
# Use the following code to get the pain score at the start
# results = characteristics_pain(
# painscores, results, day, time=f'{key.split('_')[0]}:00')
results = characteristics_pain(
painscores, results, day, time=f'{key.split('_')[1]}:00')
except Exception as e:
logging.error(f'pain: {subject} {day} {e}')
results = characteristics(
results, tmp_data['activities [1min]'].values)
results_df = pd.DataFrame.from_dict(
results, orient='index').transpose()
for column in results_df.columns[10:]:
results_df[column] = pd.to_numeric(
results_df.loc[:, column]).round(3)
final_results_2hours = pd.concat(
(final_results_2hours, results_df))
except Exception as e:
logging.error(f'day: {subject} {day} {e}')
except Exception as e:
logging.error(f'subject: {subject} {day} {e}')
# Replace empty values with 0
final_results = final_results.fillna(0)
final_results.to_excel('Results/results_per_day.xlsx', index=False)
final_results.loc[:, 'average_activity_level':].corr().to_excel(
'Results/correlations_per_day.xlsx')
if settings['RESULTS_2HOURS']:
# Replace empty values with 0
final_results_2hours = final_results_2hours.fillna(0)
final_results_2hours.to_excel(
'Results/results_per_2hours.xlsx', index=False)
# final_results_2hours = pd.read_excel('Results/results_per_2hours.xlsx')
final_results_2hours = final_results_2hours.drop(columns = ['tijd', 'Epochs_of_1minute'])
final_results_2hours.loc[:, 'pijn_score':].corr().to_excel(
'Results/correlations_per_2hours.xlsx')
if __name__ == '__main__':
main()
# %%