-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_results_sanity.py
111 lines (94 loc) · 4.88 KB
/
check_results_sanity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
Script to check whether all results exist.
"""
import warnings
import mlflow
import shutil
from os.path import sep
import numpy as np
from choleskies.openblas.default_cholesky import DefaultCholeskyBLAS
from choleskies.openblas.pivoted_cholesky import PivotedCholeskyBLAS
from choleskies.openblas.stopped_cholesky import StoppedCholeskyBLAS
from registry import KERNEL_DICT, CLIP
from result_management import ENV_CPUS, find_experiments_with_tags, delete_runs_if_crashed, filter_runs_with_tags, \
get_run_list_from_dataframe
from util.execution.cluster import execute_job_array_on_slurm_cluster
from util.result_processing.diagonal_tolererance_to_precision import _get_precisions_from_pivoted_cholesky_runs_df
datasets = ['metro', 'tamilnadu_electricity', 'pm25', 'protein', 'bank']
sn2 = 1e-3
theta = 0.
seeds = [str(i) for i in range(0, 10)]
ds = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5]
ls = [-1., 0., 1., 2.]
ks = list(KERNEL_DICT.keys())
cpus = 40
environment = {ENV_CPUS: cpus}
block_size = 256 * cpus
command_template = "python run_single_configuration.py -a %s %s -d %s -k %s -k-ls %s -s %i"
command_list = []
def delete_older_runs(run_df) -> []:
run_df.sort_values(by=['start_time'])
run_ls = get_run_list_from_dataframe(run_df)
start_time = run_ls[0].info.start_time
for i in range(1, len(run_ls)):
run = run_ls[i]
# make sure we keep the most recent run
assert(run.info.start_time < start_time)
remove = mlflow.get_tracking_uri() + sep + run.info.experiment_id + sep + run.info.run_id
#print(remove)
shutil.rmtree(remove)
def check_consistency(dataset, kernel, algorithm, algo_parameters, seed, runs):
runs = filter_runs_with_tags(runs, algorithm, algo_parameters, seed)
remaining_runs = delete_runs_if_crashed(get_run_list_from_dataframe(runs))
if len(runs) != len(remaining_runs):
runs = mlflow.search_runs(experiment_ids=[remaining_runs[0].info.experiment_id])
runs = filter_runs_with_tags(runs, algorithm, algo_parameters, seed)
if len(remaining_runs) > 1:
warnings.warn("More than one result for configuration: ")
delete_older_runs(runs)
runs = mlflow.search_runs(experiment_ids=[remaining_runs[0].info.experiment_id])
runs = filter_runs_with_tags(runs, algorithm, algo_parameters, seed)
elif len(remaining_runs) == 0:
parameters = ""
sig = "--" + algorithm.get_signature()
for k in algo_parameters.keys():
parameters += sig + "-" + k.replace('_', '-') + " " + str(algo_parameters[k]) + " "
command = command_template % (algorithm.get_signature(), parameters, dataset, kernel.name,
str(kernel.parameters["ls"]), int(seed))
print(command)
command_list.append(command)
return runs
for dataset in datasets:
for l in ls:
for k in ks:
print(dataset + " " + k + " " + str(l))
kernel = KERNEL_DICT[k]
kernel.initialize(D=None, var=theta, ls=l)
exps = find_experiments_with_tags(dataset, kernel, environment, sn2, CLIP)
assert(len(exps) == 1)
runs = mlflow.search_runs(experiment_ids=[e.experiment_id for e in exps])
for s in seeds:
check_consistency(dataset=dataset, kernel=kernel, algorithm=DefaultCholeskyBLAS(), algo_parameters={},
seed=s, runs=runs)
for dt in ds:
piv_runs = check_consistency(dataset=dataset, kernel=kernel, algorithm=PivotedCholeskyBLAS(),
algo_parameters={"diagonal_tolerance": str(dt)}, seed=s, runs=runs)
rs = _get_precisions_from_pivoted_cholesky_runs_df(piv_runs)
assert(len(rs) <= 1)
if len(rs) == 0:
# a pivoted run is still missing
# we have to run that first before we can run the stopped experiment
continue
#r = str(0.0) if rs[0] == 0 else ("%.6f" % rs[0]).rstrip('0')
# convert precision row to numerical value---hell, mlflow can be treacherous...
stopped_runs = filter_runs_with_tags(runs, StoppedCholeskyBLAS(),
algo_parameters={"delta": str(0.1)}, seed=s)
stopped_runs = stopped_runs.astype({"tags.algorithm.r": float})
stopped_runs = stopped_runs.round({"tags.algorithm.r": 6})
r = np.around(rs[0], decimals=6)
check_consistency(dataset=dataset, kernel=kernel, algorithm=StoppedCholeskyBLAS(),
algo_parameters={"r": r, "delta": str(0.1)}, seed=s, runs=stopped_runs)
if len(command_list) > 0:
print(execute_job_array_on_slurm_cluster(commands=command_list, cpus=cpus))
else:
print("All results exist.")