-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats_test.py
76 lines (57 loc) · 2.31 KB
/
stats_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
from scipy.stats import wilcoxon
import os
import pdb
current_path = os.getcwd()
#wilcoxon test for Q1-9
df = pd.read_csv(os.getcwd()+"/survey_result.csv")
results = []
# pdb.set_trace()
for i in range(9):
#get the df 3rd column
df_q = df.iloc[:, 2+i].dropna()
#convert the df to numpy array
df_q = df_q.to_numpy()
#perform t-test
t_stat, p_value = wilcoxon(df_q - 3, alternative='greater')
# print(t_stat, p_value)
#print the non scientific notation
print(f"Q{i+1}",'{:.10f}'.format(t_stat),'{:.10f}'.format(p_value),len(df_q))
results.append([t_stat,p_value,len(df_q)])
#binom test for Q10
from scipy.stats import binomtest
binary_responses = df.iloc[:, 11].dropna()
binary_responses = binary_responses.to_list()
#minus the value by 1
binary_responses = [x-1 for x in binary_responses]
#perform binom test
binomtest_results = binomtest(sum(binary_responses),len(binary_responses),0.5,alternative='greater')
t_stat, p_value, n = binomtest_results.statistic, binomtest_results.pvalue, binomtest_results.n
print(f"Q10",'{:.10f}'.format(t_stat),'{:.10f}'.format(p_value),n)
results.append([t_stat,p_value,n])
#convert the results to df
results = pd.DataFrame(results,columns=['Test_Stats', 'p_value', 'n'],index=[f'Q{i+1}' for i in range(len(results))])
#save the result
results.to_csv(current_path+"/result.csv")
#compare strong and weak student
from scipy.stats import mannwhitneyu
results_group = []
# pdb.set_trace()
for i in range(10):
#get the df 3rd column
# df_q = df.iloc[:, 2+i].dropna()
df_q_s = df[df.group=="strong"].iloc[:, 2+i].dropna()
df_q_w = df[df.group=="weak"].iloc[:, 2+i].dropna()
#convert the df to numpy array
df_q_s = df_q_s.to_numpy()
df_q_w = df_q_w.to_numpy()
#perform t-test
t_stat, p_value = mannwhitneyu(df_q_w, df_q_s, alternative='greater')
# print(t_stat, p_value)
#print the non scientific notation
print(f"Q{i+1}",'{:.10f}'.format(t_stat),'{:.10f}'.format(p_value),len(df_q_w),len(df_q_s))
results_group.append([t_stat,p_value,len(df_q_w),len(df_q_s)])
#convert results_group to df
results_group = pd.DataFrame(results_group,columns=['Test_Stats', 'p_value', 'n_weak','n_strong'],index=[f'Q{i+1}' for i in range(len(results))])
#save the df
results_group.to_csv(current_path+"/result_group.csv")