From da7c5b0f5687ac964b2de2da3e86554c17dfe6cb Mon Sep 17 00:00:00 2001
From: William Zhang <17zhangw@gmail.com>
Date: Thu, 4 Apr 2024 16:00:27 -0400
Subject: [PATCH] Patch #21 and #23 (#26)

Small fixes for #21 and #23.

- Fix to how we select the `normalize_state` HPO parameter. We always
want to normalize the representations to `[-1, 1]` (since the metrics
can be arbitrarily large) for neural network stability (#21).
- Added an error check such that when we clip the gradient, it'll throw
if it's already bad for clearer error checking (#21).
- For a bit more "backwards" compatibility, added back the
`structure_normalize` option where we use the latent projection directly
and not rely on the observation based normalization.
- When a query times out, there might be additional DDL afterwards
(TPC-H Q15 is `[create view, select, drop view]`). We need to actually
execute the `drop view` cleanup if executing the select exceeds the
workload timeout (#23).

---------

Co-authored-by: Patrick Wang <wang.patrick57@gmail.com>
---
 tune/protox/agent/build_trial.py              |  8 ++++
 tune/protox/agent/hpo.py                      |  8 ++--
 tune/protox/agent/wolp/policies.py            |  4 +-
 .../space/latent_space/latent_index_space.py  |  3 ++
 .../space/latent_space/latent_knob_space.py   |  3 ++
 .../space/latent_space/latent_query_space.py  |  3 ++
 .../env/space/latent_space/lsc_index_space.py |  3 ++
 tune/protox/env/space/state/lsc_space.py      |  3 +-
 tune/protox/env/space/state/structure.py      | 41 +++++++++++++------
 tune/protox/env/workload.py                   | 10 ++++-
 10 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 90e630e..aefce4d 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -281,6 +281,14 @@ def _build_obs_space(
         return LSCStructureStateSpace(
             lsc=lsc,
             action_space=action_space,
+            normalize=False,
+            seed=seed,
+        )
+    elif hpoed_params["metric_state"] == "structure_normalize":
+        return LSCStructureStateSpace(
+            lsc=lsc,
+            action_space=action_space,
+            normalize=True,
             seed=seed,
         )
     else:
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 91f9d7e..705ca85 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -227,13 +227,11 @@ def build_space(
         "reward_scaler": tune.choice([1, 2, 5, 10]),
         "workload_timeout_penalty": tune.choice([1, 2, 4]),
         # State.
-        "metric_state": tune.choice(["metric", "structure"]),
+        "metric_state": tune.choice(["metric", "structure", "structure_normalize"]),
         "maximize_state": not benchmark_config.get("oltp_workload", False),
         # Whether to normalize state or not.
         "normalize_state": tune.sample_from(
-            lambda spc: False
-            if spc["config"]["metric_state"] == "structure_normalize"
-            else bool(np.random.choice([False, True]))
+            lambda spc: False if spc["config"]["metric_state"] == "structure_normalize" else True
         ),
         # Whether to normalize reward or not.
         "normalize_reward": tune.choice([False, True]),
@@ -567,4 +565,4 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         for i in range(len(results)):
             if results[i].error:
                 print(f"Trial {results[i]} FAILED")
-        assert False, print("Encountered exceptions!")
\ No newline at end of file
+        assert False, print("Encountered exceptions!")
diff --git a/tune/protox/agent/wolp/policies.py b/tune/protox/agent/wolp/policies.py
index 5242fe9..c4294f0 100644
--- a/tune/protox/agent/wolp/policies.py
+++ b/tune/protox/agent/wolp/policies.py
@@ -244,7 +244,7 @@ def train_critic(
         self.critic_optimizer.zero_grad()
         assert not th.isnan(critic_loss).any()
         critic_loss.backward()  # type: ignore
-        th.nn.utils.clip_grad_norm_(list(self.critic.parameters()), self.grad_clip)  # type: ignore
+        th.nn.utils.clip_grad_norm_(list(self.critic.parameters()), self.grad_clip, error_if_nonfinite=True)  # type: ignore
         self.critic.check_grad()
         self.critic_optimizer.step()
         return critic_loss
@@ -282,7 +282,7 @@ def train_actor(self, replay_data: ReplayBufferSamples) -> Any:
         self.actor_optimizer.zero_grad()
         assert not th.isnan(actor_loss).any()
         actor_loss.backward()  # type: ignore
-        th.nn.utils.clip_grad_norm_(list(self.actor.parameters()), self.grad_clip)  # type: ignore
+        th.nn.utils.clip_grad_norm_(list(self.actor.parameters()), self.grad_clip, error_if_nonfinite=True)  # type: ignore
         self.actor.check_grad()
         self.actor_optimizer.step()
         return actor_loss
diff --git a/tune/protox/env/space/latent_space/latent_index_space.py b/tune/protox/env/space/latent_space/latent_index_space.py
index d7a36bf..33d5946 100644
--- a/tune/protox/env/space/latent_space/latent_index_space.py
+++ b/tune/protox/env/space/latent_space/latent_index_space.py
@@ -77,6 +77,9 @@ def critic_dim(self) -> int:
 
         return index_dim
 
+    def uses_embed(self) -> bool:
+        return True
+
     def transform_noise(
         self, subproto: ProtoAction, noise: Optional[torch.Tensor] = None
     ) -> ProtoAction:
diff --git a/tune/protox/env/space/latent_space/latent_knob_space.py b/tune/protox/env/space/latent_space/latent_knob_space.py
index 1a27842..6d1a97e 100644
--- a/tune/protox/env/space/latent_space/latent_knob_space.py
+++ b/tune/protox/env/space/latent_space/latent_knob_space.py
@@ -43,6 +43,9 @@ def latent_dim(self) -> int:
     def critic_dim(self) -> int:
         return self.latent_dim()
 
+    def uses_embed(self) -> bool:
+        return False
+
     def transform_noise(
         self, subproto: ProtoAction, noise: Optional[torch.Tensor] = None
     ) -> ProtoAction:
diff --git a/tune/protox/env/space/latent_space/latent_query_space.py b/tune/protox/env/space/latent_space/latent_query_space.py
index 443e82d..0bfa40b 100644
--- a/tune/protox/env/space/latent_space/latent_query_space.py
+++ b/tune/protox/env/space/latent_space/latent_query_space.py
@@ -22,6 +22,9 @@ def __init__(
         self.logger = logger
         self.name = "query"
 
+    def uses_embed(self) -> bool:
+        return False
+
     def generate_state_container(
         self,
         prev_state: Optional[QuerySpaceContainer],
diff --git a/tune/protox/env/space/latent_space/lsc_index_space.py b/tune/protox/env/space/latent_space/lsc_index_space.py
index e6e5af0..e142508 100644
--- a/tune/protox/env/space/latent_space/lsc_index_space.py
+++ b/tune/protox/env/space/latent_space/lsc_index_space.py
@@ -62,6 +62,9 @@ def __init__(
         assert lsc is not None
         self.lsc = lsc
 
+    def uses_embed(self) -> bool:
+        return True
+
     def pad_center_latent(
         self, subproto: ProtoAction, lscs: torch.Tensor
     ) -> ProtoAction:
diff --git a/tune/protox/env/space/state/lsc_space.py b/tune/protox/env/space/state/lsc_space.py
index 58df375..6cc247a 100644
--- a/tune/protox/env/space/state/lsc_space.py
+++ b/tune/protox/env/space/state/lsc_space.py
@@ -16,10 +16,11 @@ def __init__(
         self,
         lsc: LSC,
         action_space: HolonSpace,
+        normalize: bool,
         seed: int,
     ) -> None:
         spaces = {"lsc": Box(low=-1, high=1.0)}
-        super().__init__(action_space, spaces, seed)
+        super().__init__(action_space, spaces, normalize, seed)
         self.lsc = lsc
 
     def construct_offline(
diff --git a/tune/protox/env/space/state/structure.py b/tune/protox/env/space/state/structure.py
index 6a9a469..d1a0998 100644
--- a/tune/protox/env/space/state/structure.py
+++ b/tune/protox/env/space/state/structure.py
@@ -25,14 +25,25 @@ def __init__(
         self,
         action_space: HolonSpace,
         spaces: Mapping[str, spaces.Space[Any]],
+        normalize: bool,
         seed: int,
     ) -> None:
         self.action_space = action_space
+        self.normalize = normalize
+
+        if self.normalize:
+            self.internal_spaces: Dict[str, gym.spaces.Space[Any]] = {
+                k: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(s.critic_dim(),))
+                for k, s in action_space.get_spaces()
+            }
+        else:
+            self.internal_spaces = {
+                k: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(s.critic_dim(),))
+                if s.uses_embed() else s
+                for k, s in action_space.get_spaces()
+
+            }
 
-        self.internal_spaces: Dict[str, gym.spaces.Space[Any]] = {
-            k: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(s.critic_dim(),))
-            for k, s in action_space.get_spaces()
-        }
         self.internal_spaces.update(spaces)
         super().__init__(self.internal_spaces, seed)
 
@@ -63,21 +74,25 @@ def construct_offline(
             assert isinstance(knobs, LatentKnobSpace)
             assert check_subspace(knobs, knob_state)
 
-            knob_state = np.array(
-                knobs.to_latent([knob_state]),
-                dtype=np.float32,
-            )[0]
-
+            if self.normalize:
+                knob_state = np.array(
+                    knobs.to_latent([knob_state]),
+                    dtype=np.float32,
+                )[0]
             assert self.internal_spaces["knobs"].contains(knob_state)
 
         if ql_state is not None:
             query = self.action_space.get_query_space()
             assert isinstance(query, LatentQuerySpace)
             assert check_subspace(query, ql_state)
-            query_state = np.array(
-                query.to_latent([ql_state]),
-                dtype=np.float32,
-            )[0]
+
+            if self.normalize:
+                query_state = np.array(
+                    query.to_latent([ql_state]),
+                    dtype=np.float32,
+                )[0]
+            else:
+                query_state = ql_state
 
         # Handle indexes.
         indexes_ = [v[1] for v in splits if isinstance(v[0], LatentIndexSpace)]
diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py
index a5dc82c..b1bf939 100644
--- a/tune/protox/env/workload.py
+++ b/tune/protox/env/workload.py
@@ -417,9 +417,10 @@ def _execute_workload(
                 # Skip any query in blocklist.
                 continue
 
-            for sql_type, query in queries:
+            for qidx, (sql_type, query) in enumerate(queries):
                 assert sql_type != QueryType.UNKNOWN
                 if sql_type != QueryType.SELECT:
+                    # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. 
                     assert sql_type != QueryType.INS_UPD_DEL
                     pgconn.conn().execute(query)
                     continue
@@ -533,6 +534,13 @@ def _execute_workload(
                 time_left -= qid_runtime / 1e6
                 workload_time += qid_runtime / 1e6
                 if time_left < 0:
+                    # We need to undo any potential statements after the timed out query.
+                    for st, rq in queries[qidx+1:]:
+                        if st != QueryType.SELECT:
+                            # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. If we do have INS_UPD_DEL queries, our "undo" logic will likely have to change.
+                            assert st != QueryType.INS_UPD_DEL
+                            pgconn.conn().execute(rq)
+
                     stop_running = True
                     break