diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2019-10-08 08:22:50 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2019-10-08 08:22:50 +0200 |
commit | 30926c1b1625978b66179b01978bebd3d3243623 (patch) | |
tree | 1a3f4c88be9bba7c2e6ac4f95ceede8e6fd82960 | |
parent | 3a02d19162975b619ea11259ce763c1902837b81 (diff) |
add legacy genetic programming (symbolic regression) benchmark
-rwxr-xr-x | bin/gptest.py | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/bin/gptest.py b/bin/gptest.py new file mode 100755 index 0000000..dd29a2a --- /dev/null +++ b/bin/gptest.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +import sys +import numpy as np +from dfatool import PTAModel, RawData, regression_measures, pta_trace_to_aggregate +from gplearn.genetic import SymbolicRegressor +from multiprocessing import Pool + +def splitidx_srs(length): + shuffled = np.random.permutation(np.arange(length)) + border = int(length * float(2) / 3) + training = shuffled[:border] + validation = shuffled[border:] + return (training, validation) + +def _gp_fit(arg): + param = arg[0] + X = arg[1] + Y = arg[2] + est_gp = SymbolicRegressor( + population_size = param[0], + generations = 450, + parsimony_coefficient = param[1], + function_set = param[2].split(' '), + const_range = (-param[3], param[3]) + ) + + training, validation = splitidx_srs(len(Y)) + X_train = X[training] + Y_train = Y[training] + X_validation = X[validation] + Y_validation = Y[validation] + + try: + est_gp.fit(X_train, Y_train) + return (param, str(est_gp._program), est_gp._program.raw_fitness_, regression_measures(est_gp.predict(X_validation), Y_validation)) + except Exception as e: + return (param, 'Exception: {}'.format(str(e)), 999999999) + + +if __name__ == '__main__': + population_size = [100, 500, 1000, 2000, 5000, 10000] + parsimony_coefficient = [0.1, 0.5, 0.1, 1] + function_set = ['add mul', 'add mul sub div', 'add mul sub div sqrt log inv'] + const_lim = [100000, 50000, 10000, 1000, 500, 10, 1] + filenames = sys.argv[4:] + raw_data = RawData(filenames) + + preprocessed_data = raw_data.get_preprocessed_data() + by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data) + model = PTAModel(by_name, parameters, arg_count, traces = preprocessed_data) + + by_param = model.by_param + + state_or_tran = sys.argv[1] + + model_attribute = sys.argv[2] + + dimension = int(sys.argv[3]) + + X = [[] for i in range(dimension)] + Y = [] + + + for key, val in by_param.items(): + if key[0] == state_or_tran and len(key[1]) == dimension: + Y.extend(val[model_attribute]) + for i in range(dimension): + X[i].extend([float(key[1][i])] * len(val[model_attribute])) + + + X = np.array(X) + Y = np.array(Y) + + paramqueue = [] + + for popsize in population_size: + for coef in parsimony_coefficient: + for fs in function_set: + for cl in const_lim: + for i in range(10): + paramqueue.append(((popsize, coef, fs, cl), X.T, Y)) + + with Pool() as pool: + results = pool.map(_gp_fit, paramqueue) + + for res in sorted(results, key=lambda r: r[2]): + print('{} {:.0f} ({:.0f})\n{}'.format(res[0], res[3]['mae'], res[2], res[1])) |