diff options
Diffstat (limited to 'bin/gptest.py')
-rwxr-xr-x | bin/gptest.py | 38 |
1 files changed, 24 insertions, 14 deletions
diff --git a/bin/gptest.py b/bin/gptest.py index 869328e..82b4575 100755 --- a/bin/gptest.py +++ b/bin/gptest.py @@ -2,10 +2,16 @@ import sys import numpy as np -from dfatool.dfatool import PTAModel, RawData, regression_measures, pta_trace_to_aggregate +from dfatool.dfatool import ( + PTAModel, + RawData, + regression_measures, + pta_trace_to_aggregate, +) from gplearn.genetic import SymbolicRegressor from multiprocessing import Pool + def splitidx_srs(length): shuffled = np.random.permutation(np.arange(length)) border = int(length * float(2) / 3) @@ -13,16 +19,17 @@ def splitidx_srs(length): validation = shuffled[border:] return (training, validation) + def _gp_fit(arg): param = arg[0] X = arg[1] Y = arg[2] est_gp = SymbolicRegressor( - population_size = param[0], - generations = 450, - parsimony_coefficient = param[1], - function_set = param[2].split(' '), - const_range = (-param[3], param[3]) + population_size=param[0], + generations=450, + parsimony_coefficient=param[1], + function_set=param[2].split(" "), + const_range=(-param[3], param[3]), ) training, validation = splitidx_srs(len(Y)) @@ -33,22 +40,27 @@ def _gp_fit(arg): try: est_gp.fit(X_train, Y_train) - return (param, str(est_gp._program), est_gp._program.raw_fitness_, regression_measures(est_gp.predict(X_validation), Y_validation)) + return ( + param, + str(est_gp._program), + est_gp._program.raw_fitness_, + regression_measures(est_gp.predict(X_validation), Y_validation), + ) except Exception as e: - return (param, 'Exception: {}'.format(str(e)), 999999999) + return (param, "Exception: {}".format(str(e)), 999999999) -if __name__ == '__main__': +if __name__ == "__main__": population_size = [100, 500, 1000, 2000, 5000, 10000] parsimony_coefficient = [0.1, 0.5, 0.1, 1] - function_set = ['add mul', 'add mul sub div', 'add mul sub div sqrt log inv'] + function_set = ["add mul", "add mul sub div", "add mul sub div sqrt log inv"] const_lim = [100000, 50000, 10000, 1000, 500, 10, 1] filenames = sys.argv[4:] raw_data = RawData(filenames) preprocessed_data = raw_data.get_preprocessed_data() by_name, parameters, arg_count = pta_trace_to_aggregate(preprocessed_data) - model = PTAModel(by_name, parameters, arg_count, traces = preprocessed_data) + model = PTAModel(by_name, parameters, arg_count, traces=preprocessed_data) by_param = model.by_param @@ -61,14 +73,12 @@ if __name__ == '__main__': X = [[] for i in range(dimension)] Y = [] - for key, val in by_param.items(): if key[0] == state_or_tran and len(key[1]) == dimension: Y.extend(val[model_attribute]) for i in range(dimension): X[i].extend([float(key[1][i])] * len(val[model_attribute])) - X = np.array(X) Y = np.array(Y) @@ -85,4 +95,4 @@ if __name__ == '__main__': results = pool.map(_gp_fit, paramqueue) for res in sorted(results, key=lambda r: r[2]): - print('{} {:.0f} ({:.0f})\n{}'.format(res[0], res[3]['mae'], res[2], res[1])) + print("{} {:.0f} ({:.0f})\n{}".format(res[0], res[3]["mae"], res[2], res[1])) |