1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#!/usr/bin/env python3
import sys
import numpy as np
from dfatool.dfatool import (
PTAModel,
RawData,
regression_measures,
pta_trace_to_aggregate,
)
from gplearn.genetic import SymbolicRegressor
from multiprocessing import Pool
def splitidx_srs(length):
shuffled = np.random.permutation(np.arange(length))
border = int(length * float(2) / 3)
training = shuffled[:border]
validation = shuffled[border:]
return (training, validation)
def _gp_fit(arg):
param = arg[0]
X = arg[1]
Y = arg[2]
est_gp = SymbolicRegressor(
population_size=param[0],
generations=450,
parsimony_coefficient=param[1],
function_set=param[2].split(" "),
const_range=(-param[3], param[3]),
)
training, validation = splitidx_srs(len(Y))
X_train = X[training]
Y_train = Y[training]
X_validation = X[validation]
Y_validation = Y[validation]
try:
est_gp.fit(X_train, Y_train)
return (
param,
str(est_gp._program),
est_gp._program.raw_fitness_,
regression_measures(est_gp.predict(X_validation), Y_validation),
)
except Exception as e:
return (param, "Exception: {}".format(str(e)), 999999999)
if __name__ == "__main__":
population_size = [100, 500, 1000, 2000, 5000, 10000]
parsimony_coefficient = [0.1, 0.5, 0.1, 1]
function_set = ["add mul", "add mul sub div", "add mul sub div sqrt log inv"]
const_lim = [100000, 50000, 10000, 1000, 500, 10, 1]
filenames = sys.argv[4:]
raw_data = RawData(filenames)
preprocessed_data = raw_data.get_preprocessed_data()
by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data)
model = PTAModel(by_name, parameters, arg_count, traces=preprocessed_data)
by_param = model.by_param
state_or_tran = sys.argv[1]
model_attribute = sys.argv[2]
dimension = int(sys.argv[3])
X = [[] for i in range(dimension)]
Y = []
for key, val in by_param.items():
if key[0] == state_or_tran and len(key[1]) == dimension:
Y.extend(val[model_attribute])
for i in range(dimension):
X[i].extend([float(key[1][i])] * len(val[model_attribute]))
X = np.array(X)
Y = np.array(Y)
paramqueue = []
for popsize in population_size:
for coef in parsimony_coefficient:
for fs in function_set:
for cl in const_lim:
for i in range(10):
paramqueue.append(((popsize, coef, fs, cl), X.T, Y))
with Pool() as pool:
results = pool.map(_gp_fit, paramqueue)
for res in sorted(results, key=lambda r: r[2]):
print("{} {:.0f} ({:.0f})\n{}".format(res[0], res[3]["mae"], res[2], res[1]))
|