-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_optim_loop.py
More file actions
117 lines (99 loc) · 3.88 KB
/
run_optim_loop.py
File metadata and controls
117 lines (99 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import argparse
from main import run_conv_wrapper
from tools.validate_tool.sf_list_gen import gen_sf
from utils.cli_config import add_common_args, build_run_config
from utils.gen_common import parse_query_ids
from utils.wandb_api_helper import wandb_retrieve_metrics_for_run
### RUN CMD
# python run_optim_loop.py --conv brunoptim1-22v1 --bespoke_storage --benchmark tpch --notify --replay_cache --auto_u --auto_finish
def main(args):
# extract parameters
bespoke_storage = args.bespoke_storage
short_name = args.conv
benchmark = args.benchmark
# extract queries from short name
prefix = "runoptim"
assert short_name.startswith(prefix)
assert "wstorage" not in short_name, (
f"Use --bespoke_storage flag instead of encoding it in the conversation name {short_name}. This is automatically added to the versioning string"
)
if "v" in short_name:
query_ids = parse_query_ids(short_name, prefix, benchmark=benchmark)
assert query_ids is not None, (
f"Could not parse query ids from short name {short_name}"
)
if bespoke_storage:
short_name += "_wstorage"
# assemble default sf values for the selected benchmark
verify_sf_list, max_scale_factor = gen_sf(benchmark)
if benchmark == "tpch":
if bespoke_storage:
wandb_id = "a2tlnfrk"
else:
wandb_id = "ijvzlkif"
elif benchmark == "ceb":
if bespoke_storage:
wandb_id = "blqeh6i0"
else:
wandb_id = "fx7rshq2"
else:
raise ValueError(f"Unknown benchmark {benchmark}")
# lookup git snapshot from wandb
statistics, _ = wandb_retrieve_metrics_for_run(
benchmark, wandb_id, fetch_latest_runtimes=False
)
commit_hash = statistics["last_commit_hash"]
config = build_run_config(
benchmark=benchmark,
conv_name=short_name,
conv_mode="optimization", # delegate the optimization loop logic to the conversation instead of hardcoding it in the main function
query_list=",".join(map(str, query_ids)),
notify=args.notify,
disable_repo_sync=args.disable_repo_sync,
max_scale_factor=max_scale_factor,
replay_cache=args.replay_cache,
start_snapshot=commit_hash,
storage_plan_snapshot=None,
keep_csv=True, # keep .csv files around instead of git-ignoring them (maybe to backtrack correctness issues)
disable_tracing=args.disable_tracing,
disable_wandb=args.disable_wandb,
auto_u=args.auto_u,
auto_finish=args.auto_finish,
is_bespoke_storage=bespoke_storage,
run_tool_offer_trace_option=True, # for optimization conversations, we want to offer the option to run with tracing compile flag enabled to collect more fine-grained performance data for the optimized plans
only_from_llm_cache=args.only_from_llm_cache,
only_from_cache=args.only_from_cache,
)
# run conversation
run_conv_wrapper(config)
def build_parser(*, add_help: bool = True) -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(add_help=add_help)
parser.add_argument(
"--conv",
type=str,
required=True,
help="Short name for the conversation",
)
parser.add_argument(
"--bespoke_storage",
action="store_true",
default=False,
help="Whether to read the storage plan from a previous run",
)
add_common_args(
parser,
include_notify=True,
include_disable_repo_sync=True,
include_replay_cache=True,
include_benchmark=True,
include_disable_wandb=True,
include_disable_tracing=True,
include_auto_u=True,
include_auto_finish=True,
include_only_from_llm_cache=True,
include_only_from_cache=True,
)
return parser
if __name__ == "__main__":
args = build_parser().parse_args()
main(args)