-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfm_complexity.py
218 lines (173 loc) · 7.04 KB
/
fm_complexity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import os
import argparse
from typing import Any
from alive_progress import alive_bar
import matplotlib.pyplot as plt
import seaborn as sns
from flamapy.metamodels.fm_metamodel.transformations import UVLReader
from flamapy.metamodels.bdd_metamodel.transformations import FmToBDD
from flamapy.metamodels.bdd_metamodel.operations import (
BDDProductDistribution,
BDDFeatureInclusionProbability,
BDDConfigurationsNumber
)
def descriptive_statistics(prod_dist: list[int]) -> dict[str, Any]:
total_elements = sum(prod_dist)
if total_elements == 0:
return {
'Mean': 0,
'Standard deviation': 0,
'Median': 0,
'Median absolute deviation': 0,
'Mode': 0,
'Min': None,
'Max': None,
'Range': 0
}
total_sum = 0
running_total = 0
median1 = None
median2 = None
median_pos1 = (total_elements + 1) // 2
median_pos2 = (total_elements + 2) // 2
min_val = None
max_val = None
mode = None
mode_count = 0
sum_squared_diff = 0
abs_deviation_total = 0
abs_deviation_running_total = 0
mad1 = None
mad2 = None
mad_pos1 = (total_elements + 1) // 2
mad_pos2 = (total_elements + 2) // 2
for i, count in enumerate(prod_dist):
if count > 0:
if min_val is None:
min_val = i
max_val = i
total_sum += i * count
running_total += count
if mode is None or count > mode_count:
mode = i
mode_count = count
if median1 is None and running_total >= median_pos1:
median1 = i
if median2 is None and running_total >= median_pos2:
median2 = i
mean = total_sum / total_elements
median = (median1 + median2) / 2
running_total = 0
for i, count in enumerate(prod_dist):
if count > 0:
deviation = abs(i - median)
abs_deviation_total += deviation * count
running_total += count
sum_squared_diff += count * (i - mean) ** 2
abs_deviation_running_total += count
if mad1 is None and abs_deviation_running_total >= mad_pos1:
mad1 = deviation
if mad2 is None and abs_deviation_running_total >= mad_pos2:
mad2 = deviation
if mad1 is not None and mad2 is not None:
break
std_dev = (sum_squared_diff / total_elements) ** 0.5
mad = (mad1 + mad2) / 2 if mad1 is not None and mad2 is not None else 0
statistics = {
'Mean': mean,
'Standard deviation': std_dev,
'Median': median,
'Median absolute deviation': mad,
'Mode': mode,
'Min': min_val,
'Max': max_val,
'Range': max_val - min_val if min_val is not None and max_val is not None else 0
}
return statistics
def plot_product_distribution(data: list[int]):
# Generate an array of indices
indices = range(len(data))
# Create the plot
plt.figure(figsize=(10, 6))
# Plot the smooth histogram using seaborn's kdeplot
sns.kdeplot(x=indices, weights=data, bw_adjust=0.5, fill=True)
# Set labels and title
plt.xlabel('#Features')
plt.ylabel("Products' density")
plt.title('Product distribution')
# Set the x-axis minimum value if specified
plt.xlim(left=0)
# Show the plot
plt.show()
def plot_feature_inclusion_probabilities(probabilities):
# Create the plot
plt.figure(figsize=(10, 6))
# Create a dictionary to store counts for each unique probability
probability_counts = {}
for prob in probabilities:
probability_counts[prob] = probability_counts.get(prob, 0) + 1
# Plot the smooth histogram using seaborn's kdeplot
percentages = [p/len(probabilities)*100 for p in probability_counts.values()]
counts, bins, _ = plt.hist(x=probability_counts.keys(), weights=percentages, bins=len(probabilities), edgecolor='black', alpha=0.7)
#sns.kdeplot(probabilities, bw_adjust=0.05, fill=True)
# Highlight the area for each unique probability
for prob, count in probability_counts.items():
if prob == 0.5:
plt.axvspan(prob - 0.025, prob + 0.025, color='yellow', alpha=0.2, label=f'Pure optional features (p=0.5): {count}')
elif prob == 1.00:
plt.axvspan(prob - 0.025, prob + 0.025, color='green', alpha=0.2, label=f'Core features (p=1.0): {count}')
elif prob == 0.00:
plt.axvspan(prob - 0.025, prob + 0.025, color='red', alpha=0.2, label=f'Dead features (p=0.0): {count}')
# Add legend
plt.legend()
# Set labels and title
plt.xlabel('Feature probability of being included in a valid configuration')
plt.ylabel('%Features')
plt.title('Feature probability distribution')
# Set x-axis limits to ensure it ranges from 0 to 100
plt.xlim(0, 1)
# Set y-axis limits to ensure it ranges from 0 to 100
max_y = max(percentages)
max_y = max_y + (10 - max_y % 10)
plt.ylim(0, min(100, max_y))
# Show the plot
plt.show()
def main(fm_path: str):
path, filename = os.path.split(fm_path)
filename = '.'.join(filename.split('.')[:-1])
with alive_bar(title=f'Reading FM {fm_path}...') as bar:
fm = UVLReader(fm_path).transform()
bar()
with alive_bar(title=f'Transforming FM to BDD...') as bar:
bdd_model = FmToBDD(fm).transform()
bar()
with alive_bar(title=f'Calculating number of configurations...') as bar:
n_configs = BDDConfigurationsNumber().execute(bdd_model).get_result()
bar()
print(f'Number of features the SPL manages: {len(fm.get_features())}')
print(f'Number of valid configurations that can be derived: {n_configs}')
print(f'Homogeneity of configurations: ??')
with alive_bar(title=f'Calculating Product distribution...') as bar:
prod_dist_op = BDDProductDistribution().execute(bdd_model)
dist = prod_dist_op.product_distribution()
bar()
print(f'Product distribution: {dist}')
plot_product_distribution(dist)
with alive_bar(title=f'Calculating Feature inclusion probabilities...') as bar:
fip = BDDFeatureInclusionProbability().execute(bdd_model).get_result()
bar()
print('Feature Inclusion Probabilities:')
for feat, prob in fip.items():
print(f'{feat}: {prob}')
plot_feature_inclusion_probabilities(list(fip.values()))
with alive_bar(title=f'Calculating descriptive analysis...') as bar:
dist_stats = descriptive_statistics(dist)
bar()
print('Descriptive analysis (number of features for a product):')
for ds, dv in dist_stats.items():
print(f' |-{ds}: {dv}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Characterize feature model complexity.')
parser.add_argument(metavar='fm', dest='fm_filepath', type=str, help='Input feature model (.uvl).')
args = parser.parse_args()
main(args.fm_filepath)