From a06d4529dad41a879300141cdc41663447b768d9 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Tue, 3 Sep 2024 20:16:06 +0000
Subject: [PATCH 01/17] Combine two tf_agents policies with timestep spec given
 by combine_tfa_policies_lib.get_input_signature() and action spec given by
 combine_tfa_policies_lib.get_action_spec() The combiner policy uses a new
 timestep spec feature "model_selector" to select the requested policy at the
 current state. The feature is computed as a md5 hash from the respective
 policies names.

---
 compiler_opt/tools/combine_tfa_policies.py    |  30 +++
 .../tools/combine_tfa_policies_lib.py         | 176 ++++++++++++++++++
 .../tools/combine_tfa_policies_lib_test.py    | 115 ++++++++++++
 3 files changed, 321 insertions(+)
 create mode 100755 compiler_opt/tools/combine_tfa_policies.py
 create mode 100644 compiler_opt/tools/combine_tfa_policies_lib.py
 create mode 100644 compiler_opt/tools/combine_tfa_policies_lib_test.py

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
new file mode 100755
index 00000000..d3932a60
--- /dev/null
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -0,0 +1,30 @@
+from absl import app
+
+import tensorflow as tf
+
+from compiler_opt.rl import policy_saver
+from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib
+
+
+def main(_):
+  expected_signature = cfa_lib.get_input_signature()
+  action_spec = cfa_lib.get_action_spec()
+  policy1_name = input("First policy name: ")
+  policy1_path = input(policy1_name + " path: ")
+  policy2_name = input("Second policy name: ")
+  policy2_path = input(policy2_name + " path: ")
+  policy1 = tf.saved_model.load(policy1_path, tags=None, options=None)
+  policy2 = tf.saved_model.load(policy2_path, tags=None, options=None)
+  combined_policy = cfa_lib.CombinedTFPolicy(
+     tf_policies={policy1_name:policy1, policy2_name:policy2},
+     time_step_spec=expected_signature,
+     action_spec=action_spec
+  )
+  combined_policy_path = input("Save combined policy path: ")
+  policy_dict = {'combined_policy': combined_policy}
+  saver = policy_saver.PolicySaver(policy_dict=policy_dict)
+  saver.save(combined_policy_path)
+
+if __name__ == "__main__": 
+    app.run(main)
+
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
new file mode 100644
index 00000000..db808112
--- /dev/null
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -0,0 +1,176 @@
+from typing import Dict, List, Optional, Tuple
+
+import gin
+import tensorflow as tf
+import hashlib
+
+import tf_agents
+from tf_agents.trajectories import time_step
+from tf_agents.typing import types
+from tf_agents.trajectories import policy_step
+import tensorflow_probability as tfp
+from tf_agents.specs import tensor_spec
+
+
+class CombinedTFPolicy(tf_agents.policies.TFPolicy):
+
+  def __init__(self, *args,
+               tf_policies: Dict[str, tf_agents.policies.TFPolicy],
+               **kwargs):
+    super(CombinedTFPolicy, self).__init__(*args, **kwargs)
+
+    self.tf_policies = []
+    self.tf_policy_names = []
+    for name, policy in tf_policies.items():
+      self.tf_policies.append(policy)
+      self.tf_policy_names.append(name)
+
+    self.expected_signature = self.time_step_spec
+    self.sorted_keys = sorted(self.expected_signature.observation.keys())
+
+    high_low_tensors = []
+    for name in self.tf_policy_names:
+      m = hashlib.md5()
+      m.update(name.encode('utf-8'))
+      high_low_tensors.append(tf.stack([
+          tf.constant(int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64),
+          tf.constant(int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64)
+          ])
+      )
+    self.high_low_tensors = tf.stack(high_low_tensors)
+
+    m = hashlib.md5()
+    m.update(self.tf_policy_names[0].encode('utf-8'))
+    self.high = int.from_bytes(m.digest()[8:], 'little')
+    self.low = int.from_bytes(m.digest()[:8], 'little')
+    self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64)
+
+  def _process_observation(self, observation):
+    for name in self.sorted_keys:
+      if name in ['model_selector']:
+        switch_tensor = observation.pop(name)[0]
+        high_low_tensor = switch_tensor
+    
+        tf.debugging.Assert(
+            tf.equal(
+                tf.reduce_any(
+                    tf.reduce_all(
+                        tf.equal(high_low_tensor, self.high_low_tensors), axis=1
+                        )
+                    ),True
+                ),
+                 [high_low_tensor, self.high_low_tensors])
+        return observation, switch_tensor
+
+  def _create_distribution(self, inlining_prediction):
+    probs = [inlining_prediction, 1.0 - inlining_prediction]
+    logits = [[0.0, tf.math.log(probs[1]/(1.0 - probs[1]))]]
+    return tfp.distributions.Categorical(logits=logits)
+
+  def _action(self, time_step: time_step.TimeStep,
+              policy_state: types.NestedTensorSpec,
+              seed: Optional[types.Seed] = None) -> policy_step.PolicyStep:
+    new_observation = time_step.observation
+    new_observation, switch_tensor = self._process_observation(new_observation)
+    updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type,
+                                      reward=time_step.reward,
+                                      discount=time_step.discount,
+                                      observation=new_observation)
+    def f0():
+      return tf.cast(
+          self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64)
+    def f1():
+      return tf.cast(
+          self.tf_policies[1].action(updated_step).action[0], dtype=tf.int64)
+    action = tf.cond(
+        tf.math.reduce_all(
+            tf.equal(switch_tensor, self.high_low_tensor)),
+        f0,
+        f1
+        )
+    return tf_agents.trajectories.PolicyStep(action=action, state=policy_state)
+
+  def _distribution(
+      self, time_step: time_step.TimeStep,
+      policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep:
+    new_observation = time_step.observation
+    new_observation, switch_tensor = self._process_observation(new_observation)
+    updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type,
+                                      reward=time_step.reward,
+                                      discount=time_step.discount,
+                                      observation=new_observation)
+    def f0():
+      return tf.cast(
+          self.tf_policies[0].distribution(updated_step).action.cdf(0)[0],
+          dtype=tf.float32)
+    def f1():
+      return tf.cast(
+          self.tf_policies[1].distribution(updated_step).action.cdf(0)[0],
+          dtype=tf.float32)
+    distribution = tf.cond(
+        tf.math.reduce_all(
+            tf.equal(switch_tensor, self.high_low_tensor)),
+        f0,
+        f1
+        )
+    return tf_agents.trajectories.PolicyStep(
+        action=self._create_distribution(distribution),
+        state=policy_state)
+
+
+
+@gin.configurable()
+def get_input_signature():
+    """Returns the list of features for LLVM inlining to be used in combining models."""
+    # int64 features
+    inputs = dict(
+        (key,tf.TensorSpec(dtype=tf.int64, shape=(), name=key))
+        for key in [
+            "caller_basic_block_count",
+            "caller_conditionally_executed_blocks",
+            "caller_users",
+            "callee_basic_block_count",
+            "callee_conditionally_executed_blocks",
+            "callee_users",
+            "nr_ctant_params",
+            "node_count",
+            "edge_count",
+            "callsite_height",
+            "cost_estimate",
+            "inlining_default",
+            "sroa_savings",
+            "sroa_losses",
+            "load_elimination",
+            "call_penalty",
+            "call_argument_setup",
+            "load_relative_intrinsic",
+            "lowered_call_arg_setup",
+            "indirect_call_penalty",
+            "jump_table_penalty",
+            "case_cluster_penalty",
+            "switch_penalty",
+            "unsimplified_common_instructions",
+            "num_loops",
+            "dead_blocks",
+            "simplified_instructions",
+            "constant_args",
+            "constant_offset_ptr_args",
+            "callsite_cost",
+            "cold_cc_penalty",
+            "last_call_to_static_bonus",
+            "is_multiple_blocks",
+            "nested_inlines",
+            "nested_inline_cost_estimate",
+            "threshold",
+            "is_callee_avail_external",
+            "is_caller_avail_external",
+        ]
+    )
+    inputs.update({'model_selector': tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')})
+    return time_step.time_step_spec(inputs)
+
+@gin.configurable()
+def get_action_spec():
+  return tensor_spec.BoundedTensorSpec(
+    dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1
+  )
\ No newline at end of file
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
new file mode 100644
index 00000000..0c5f71c6
--- /dev/null
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -0,0 +1,115 @@
+"""Tests for the feature_importance_utils.py module"""
+
+from absl.testing import absltest
+
+import tensorflow as tf
+from compiler_opt.tools import combine_tfa_policies_lib
+from tf_agents.trajectories import time_step
+import tf_agents
+from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import policy_step
+import hashlib
+import numpy as np
+
+class AddOnePolicy(tf_agents.policies.TFPolicy):
+  def __init__(self):
+    observation_spec = {'obs': tensor_spec.TensorSpec(
+        shape=(1,), dtype=tf.int64)}
+    time_step_spec = time_step.time_step_spec(observation_spec)
+
+    action_spec = tensor_spec.TensorSpec(
+        shape=(1,), dtype=tf.int64)
+
+    super(AddOnePolicy, self).__init__(time_step_spec=time_step_spec,
+                                     action_spec=action_spec)
+  def _distribution(self, time_step):
+    pass
+
+  def _variables(self):
+    return ()
+
+  def _action(self, time_step, policy_state, seed):
+    observation = time_step.observation['obs'][0]
+    action = tf.reshape(observation + 1, (1,))
+    return policy_step.PolicyStep(action, policy_state)
+
+class SubtractOnePolicy(tf_agents.policies.TFPolicy):
+  def __init__(self):
+    observation_spec = {'obs': tensor_spec.TensorSpec(
+        shape=(1,), dtype=tf.int64)}
+    time_step_spec = time_step.time_step_spec(observation_spec)
+
+    action_spec = tensor_spec.TensorSpec(
+        shape=(1,), dtype=tf.int64)
+
+    super(SubtractOnePolicy, self).__init__(time_step_spec=time_step_spec,
+                                     action_spec=action_spec)
+  def _distribution(self, time_step):
+    pass
+
+  def _variables(self):
+    return ()
+
+  def _action(self, time_step, policy_state, seed):
+    observation = time_step.observation['obs'][0]
+    action = tf.reshape(observation - 1, (1,))
+    return policy_step.PolicyStep(action, policy_state)
+  
+observation_spec = time_step.time_step_spec(
+    {'obs':tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'),
+     'model_selector': tf.TensorSpec(shape=(2,),
+     dtype=tf.uint64, name='model_selector')}
+)
+
+action_spec = tensor_spec.TensorSpec(
+        shape=(1,), dtype=tf.int64)
+
+class FeatureImportanceTest(absltest.TestCase):
+  
+  def test_select_add_policy(self):
+    policy1 = AddOnePolicy()
+    policy2 = SubtractOnePolicy()
+    combined_policy = combine_tfa_policies_lib.CombinedTFPolicy(
+      tf_policies={'add_one':policy1, 'subtract_one':policy2},
+      time_step_spec=observation_spec,
+      action_spec=action_spec)
+
+    m = hashlib.md5()
+    m.update('add_one'.encode('utf-8'))
+    high = int.from_bytes(m.digest()[8:], 'little')
+    low = int.from_bytes(m.digest()[:8], 'little')
+    model_selector = tf.constant([[high, low]], dtype=tf.uint64)
+
+    state = tf_agents.trajectories.TimeStep(
+    discount=tf.constant(np.array([0.]), dtype=tf.float32),
+    observation={'obs':tf.constant(np.array([0]), dtype=tf.int64),
+                 'model_selector':model_selector},
+    reward=tf.constant(np.array([0]), dtype=tf.float64),
+    step_type=tf.constant(np.array([0]), dtype=tf.int64)
+    )
+
+    self.assertEqual(combined_policy.action(state).action, tf.constant(1, dtype=tf.int64))
+
+  def test_select_subtract_policy(self):
+    policy1 = AddOnePolicy()
+    policy2 = SubtractOnePolicy()
+    combined_policy = combine_tfa_policies_lib.CombinedTFPolicy(
+      tf_policies={'add_one':policy1, 'subtract_one':policy2},
+      time_step_spec=observation_spec,
+      action_spec=action_spec)
+
+    m = hashlib.md5()
+    m.update('subtract_one'.encode('utf-8'))
+    high = int.from_bytes(m.digest()[8:], 'little')
+    low = int.from_bytes(m.digest()[:8], 'little')
+    model_selector = tf.constant([[high, low]], dtype=tf.uint64)
+
+    state = tf_agents.trajectories.TimeStep(
+    discount=tf.constant(np.array([0.]), dtype=tf.float32),
+    observation={'obs':tf.constant(np.array([0]), dtype=tf.int64),
+                 'model_selector':model_selector},
+    reward=tf.constant(np.array([0]), dtype=tf.float64),
+    step_type=tf.constant(np.array([0]), dtype=tf.int64)
+    )
+
+    self.assertEqual(combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64))
\ No newline at end of file

From 9bc8c0548f733b60536213f54f76fa497347d913 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 17:49:05 +0000
Subject: [PATCH 02/17] Added licence.

---
 compiler_opt/tools/combine_tfa_policies.py        | 15 +++++++++++++++
 compiler_opt/tools/combine_tfa_policies_lib.py    | 15 +++++++++++++++
 .../tools/combine_tfa_policies_lib_test.py        | 14 ++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index d3932a60..3c0db9c1 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -1,3 +1,18 @@
+# coding=utf-8
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Runs the policy combiner."""
 from absl import app
 
 import tensorflow as tf
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index db808112..87faf829 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -1,3 +1,18 @@
+# coding=utf-8
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Combines two tf-agent policies with the given state and action spec."""
 from typing import Dict, List, Optional, Tuple
 
 import gin
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index 0c5f71c6..92ab24d7 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -1,3 +1,17 @@
+# coding=utf-8
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Tests for the feature_importance_utils.py module"""
 
 from absl.testing import absltest

From 86e4d12460fec210afb34340c9cb9d05f7f8652c Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 17:53:32 +0000
Subject: [PATCH 03/17] yapf . -ir

---
 compiler_opt/tools/combine_tfa_policies.py    |  14 +-
 .../tools/combine_tfa_policies_lib.py         | 171 +++++++++---------
 .../tools/combine_tfa_policies_lib_test.py    | 102 ++++++-----
 3 files changed, 154 insertions(+), 133 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index 3c0db9c1..7309b60f 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -31,15 +31,17 @@ def main(_):
   policy1 = tf.saved_model.load(policy1_path, tags=None, options=None)
   policy2 = tf.saved_model.load(policy2_path, tags=None, options=None)
   combined_policy = cfa_lib.CombinedTFPolicy(
-     tf_policies={policy1_name:policy1, policy2_name:policy2},
-     time_step_spec=expected_signature,
-     action_spec=action_spec
-  )
+      tf_policies={
+          policy1_name: policy1,
+          policy2_name: policy2
+      },
+      time_step_spec=expected_signature,
+      action_spec=action_spec)
   combined_policy_path = input("Save combined policy path: ")
   policy_dict = {'combined_policy': combined_policy}
   saver = policy_saver.PolicySaver(policy_dict=policy_dict)
   saver.save(combined_policy_path)
 
-if __name__ == "__main__": 
-    app.run(main)
 
+if __name__ == "__main__":
+  app.run(main)
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index 87faf829..8aff3e3f 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -29,8 +29,7 @@
 
 class CombinedTFPolicy(tf_agents.policies.TFPolicy):
 
-  def __init__(self, *args,
-               tf_policies: Dict[str, tf_agents.policies.TFPolicy],
+  def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy],
                **kwargs):
     super(CombinedTFPolicy, self).__init__(*args, **kwargs)
 
@@ -47,11 +46,13 @@ def __init__(self, *args,
     for name in self.tf_policy_names:
       m = hashlib.md5()
       m.update(name.encode('utf-8'))
-      high_low_tensors.append(tf.stack([
-          tf.constant(int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64),
-          tf.constant(int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64)
-          ])
-      )
+      high_low_tensors.append(
+          tf.stack([
+              tf.constant(
+                  int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64),
+              tf.constant(
+                  int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64)
+          ]))
     self.high_low_tensors = tf.stack(high_low_tensors)
 
     m = hashlib.md5()
@@ -65,44 +66,44 @@ def _process_observation(self, observation):
       if name in ['model_selector']:
         switch_tensor = observation.pop(name)[0]
         high_low_tensor = switch_tensor
-    
+
         tf.debugging.Assert(
             tf.equal(
                 tf.reduce_any(
                     tf.reduce_all(
-                        tf.equal(high_low_tensor, self.high_low_tensors), axis=1
-                        )
-                    ),True
-                ),
-                 [high_low_tensor, self.high_low_tensors])
+                        tf.equal(high_low_tensor, self.high_low_tensors),
+                        axis=1)), True),
+            [high_low_tensor, self.high_low_tensors])
         return observation, switch_tensor
 
   def _create_distribution(self, inlining_prediction):
     probs = [inlining_prediction, 1.0 - inlining_prediction]
-    logits = [[0.0, tf.math.log(probs[1]/(1.0 - probs[1]))]]
+    logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]]
     return tfp.distributions.Categorical(logits=logits)
 
-  def _action(self, time_step: time_step.TimeStep,
+  def _action(self,
+              time_step: time_step.TimeStep,
               policy_state: types.NestedTensorSpec,
               seed: Optional[types.Seed] = None) -> policy_step.PolicyStep:
     new_observation = time_step.observation
     new_observation, switch_tensor = self._process_observation(new_observation)
-    updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type,
-                                      reward=time_step.reward,
-                                      discount=time_step.discount,
-                                      observation=new_observation)
+    updated_step = tf_agents.trajectories.TimeStep(
+        step_type=time_step.step_type,
+        reward=time_step.reward,
+        discount=time_step.discount,
+        observation=new_observation)
+
     def f0():
       return tf.cast(
           self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64)
+
     def f1():
       return tf.cast(
           self.tf_policies[1].action(updated_step).action[0], dtype=tf.int64)
+
     action = tf.cond(
-        tf.math.reduce_all(
-            tf.equal(switch_tensor, self.high_low_tensor)),
-        f0,
-        f1
-        )
+        tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0,
+        f1)
     return tf_agents.trajectories.PolicyStep(action=action, state=policy_state)
 
   def _distribution(
@@ -110,82 +111,82 @@ def _distribution(
       policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep:
     new_observation = time_step.observation
     new_observation, switch_tensor = self._process_observation(new_observation)
-    updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type,
-                                      reward=time_step.reward,
-                                      discount=time_step.discount,
-                                      observation=new_observation)
+    updated_step = tf_agents.trajectories.TimeStep(
+        step_type=time_step.step_type,
+        reward=time_step.reward,
+        discount=time_step.discount,
+        observation=new_observation)
+
     def f0():
       return tf.cast(
           self.tf_policies[0].distribution(updated_step).action.cdf(0)[0],
           dtype=tf.float32)
+
     def f1():
       return tf.cast(
           self.tf_policies[1].distribution(updated_step).action.cdf(0)[0],
           dtype=tf.float32)
+
     distribution = tf.cond(
-        tf.math.reduce_all(
-            tf.equal(switch_tensor, self.high_low_tensor)),
-        f0,
-        f1
-        )
+        tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0,
+        f1)
     return tf_agents.trajectories.PolicyStep(
-        action=self._create_distribution(distribution),
-        state=policy_state)
-
+        action=self._create_distribution(distribution), state=policy_state)
 
 
 @gin.configurable()
 def get_input_signature():
-    """Returns the list of features for LLVM inlining to be used in combining models."""
-    # int64 features
-    inputs = dict(
-        (key,tf.TensorSpec(dtype=tf.int64, shape=(), name=key))
-        for key in [
-            "caller_basic_block_count",
-            "caller_conditionally_executed_blocks",
-            "caller_users",
-            "callee_basic_block_count",
-            "callee_conditionally_executed_blocks",
-            "callee_users",
-            "nr_ctant_params",
-            "node_count",
-            "edge_count",
-            "callsite_height",
-            "cost_estimate",
-            "inlining_default",
-            "sroa_savings",
-            "sroa_losses",
-            "load_elimination",
-            "call_penalty",
-            "call_argument_setup",
-            "load_relative_intrinsic",
-            "lowered_call_arg_setup",
-            "indirect_call_penalty",
-            "jump_table_penalty",
-            "case_cluster_penalty",
-            "switch_penalty",
-            "unsimplified_common_instructions",
-            "num_loops",
-            "dead_blocks",
-            "simplified_instructions",
-            "constant_args",
-            "constant_offset_ptr_args",
-            "callsite_cost",
-            "cold_cc_penalty",
-            "last_call_to_static_bonus",
-            "is_multiple_blocks",
-            "nested_inlines",
-            "nested_inline_cost_estimate",
-            "threshold",
-            "is_callee_avail_external",
-            "is_caller_avail_external",
-        ]
-    )
-    inputs.update({'model_selector': tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')})
-    return time_step.time_step_spec(inputs)
+  """Returns the list of features for LLVM inlining to be used in combining models."""
+  # int64 features
+  inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key))
+                for key in [
+                    "caller_basic_block_count",
+                    "caller_conditionally_executed_blocks",
+                    "caller_users",
+                    "callee_basic_block_count",
+                    "callee_conditionally_executed_blocks",
+                    "callee_users",
+                    "nr_ctant_params",
+                    "node_count",
+                    "edge_count",
+                    "callsite_height",
+                    "cost_estimate",
+                    "inlining_default",
+                    "sroa_savings",
+                    "sroa_losses",
+                    "load_elimination",
+                    "call_penalty",
+                    "call_argument_setup",
+                    "load_relative_intrinsic",
+                    "lowered_call_arg_setup",
+                    "indirect_call_penalty",
+                    "jump_table_penalty",
+                    "case_cluster_penalty",
+                    "switch_penalty",
+                    "unsimplified_common_instructions",
+                    "num_loops",
+                    "dead_blocks",
+                    "simplified_instructions",
+                    "constant_args",
+                    "constant_offset_ptr_args",
+                    "callsite_cost",
+                    "cold_cc_penalty",
+                    "last_call_to_static_bonus",
+                    "is_multiple_blocks",
+                    "nested_inlines",
+                    "nested_inline_cost_estimate",
+                    "threshold",
+                    "is_callee_avail_external",
+                    "is_caller_avail_external",
+                ])
+  inputs.update({
+      'model_selector':
+          tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')
+  })
+  return time_step.time_step_spec(inputs)
+
 
 @gin.configurable()
 def get_action_spec():
   return tensor_spec.BoundedTensorSpec(
-    dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1
-  )
\ No newline at end of file
+      dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1)
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index 92ab24d7..c79b592c 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -25,17 +25,20 @@
 import hashlib
 import numpy as np
 
+
 class AddOnePolicy(tf_agents.policies.TFPolicy):
+
   def __init__(self):
-    observation_spec = {'obs': tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)}
+    observation_spec = {
+        'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+    }
     time_step_spec = time_step.time_step_spec(observation_spec)
 
-    action_spec = tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)
+    action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+
+    super(AddOnePolicy, self).__init__(
+        time_step_spec=time_step_spec, action_spec=action_spec)
 
-    super(AddOnePolicy, self).__init__(time_step_spec=time_step_spec,
-                                     action_spec=action_spec)
   def _distribution(self, time_step):
     pass
 
@@ -47,17 +50,20 @@ def _action(self, time_step, policy_state, seed):
     action = tf.reshape(observation + 1, (1,))
     return policy_step.PolicyStep(action, policy_state)
 
+
 class SubtractOnePolicy(tf_agents.policies.TFPolicy):
+
   def __init__(self):
-    observation_spec = {'obs': tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)}
+    observation_spec = {
+        'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+    }
     time_step_spec = time_step.time_step_spec(observation_spec)
 
-    action_spec = tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)
+    action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+
+    super(SubtractOnePolicy, self).__init__(
+        time_step_spec=time_step_spec, action_spec=action_spec)
 
-    super(SubtractOnePolicy, self).__init__(time_step_spec=time_step_spec,
-                                     action_spec=action_spec)
   def _distribution(self, time_step):
     pass
 
@@ -68,25 +74,30 @@ def _action(self, time_step, policy_state, seed):
     observation = time_step.observation['obs'][0]
     action = tf.reshape(observation - 1, (1,))
     return policy_step.PolicyStep(action, policy_state)
-  
-observation_spec = time_step.time_step_spec(
-    {'obs':tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'),
-     'model_selector': tf.TensorSpec(shape=(2,),
-     dtype=tf.uint64, name='model_selector')}
-)
 
-action_spec = tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)
+
+observation_spec = time_step.time_step_spec({
+    'obs':
+        tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'),
+    'model_selector':
+        tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')
+})
+
+action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+
 
 class FeatureImportanceTest(absltest.TestCase):
-  
+
   def test_select_add_policy(self):
     policy1 = AddOnePolicy()
     policy2 = SubtractOnePolicy()
     combined_policy = combine_tfa_policies_lib.CombinedTFPolicy(
-      tf_policies={'add_one':policy1, 'subtract_one':policy2},
-      time_step_spec=observation_spec,
-      action_spec=action_spec)
+        tf_policies={
+            'add_one': policy1,
+            'subtract_one': policy2
+        },
+        time_step_spec=observation_spec,
+        action_spec=action_spec)
 
     m = hashlib.md5()
     m.update('add_one'.encode('utf-8'))
@@ -95,22 +106,27 @@ def test_select_add_policy(self):
     model_selector = tf.constant([[high, low]], dtype=tf.uint64)
 
     state = tf_agents.trajectories.TimeStep(
-    discount=tf.constant(np.array([0.]), dtype=tf.float32),
-    observation={'obs':tf.constant(np.array([0]), dtype=tf.int64),
-                 'model_selector':model_selector},
-    reward=tf.constant(np.array([0]), dtype=tf.float64),
-    step_type=tf.constant(np.array([0]), dtype=tf.int64)
-    )
+        discount=tf.constant(np.array([0.]), dtype=tf.float32),
+        observation={
+            'obs': tf.constant(np.array([0]), dtype=tf.int64),
+            'model_selector': model_selector
+        },
+        reward=tf.constant(np.array([0]), dtype=tf.float64),
+        step_type=tf.constant(np.array([0]), dtype=tf.int64))
 
-    self.assertEqual(combined_policy.action(state).action, tf.constant(1, dtype=tf.int64))
+    self.assertEqual(
+        combined_policy.action(state).action, tf.constant(1, dtype=tf.int64))
 
   def test_select_subtract_policy(self):
     policy1 = AddOnePolicy()
     policy2 = SubtractOnePolicy()
     combined_policy = combine_tfa_policies_lib.CombinedTFPolicy(
-      tf_policies={'add_one':policy1, 'subtract_one':policy2},
-      time_step_spec=observation_spec,
-      action_spec=action_spec)
+        tf_policies={
+            'add_one': policy1,
+            'subtract_one': policy2
+        },
+        time_step_spec=observation_spec,
+        action_spec=action_spec)
 
     m = hashlib.md5()
     m.update('subtract_one'.encode('utf-8'))
@@ -119,11 +135,13 @@ def test_select_subtract_policy(self):
     model_selector = tf.constant([[high, low]], dtype=tf.uint64)
 
     state = tf_agents.trajectories.TimeStep(
-    discount=tf.constant(np.array([0.]), dtype=tf.float32),
-    observation={'obs':tf.constant(np.array([0]), dtype=tf.int64),
-                 'model_selector':model_selector},
-    reward=tf.constant(np.array([0]), dtype=tf.float64),
-    step_type=tf.constant(np.array([0]), dtype=tf.int64)
-    )
-
-    self.assertEqual(combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64))
\ No newline at end of file
+        discount=tf.constant(np.array([0.]), dtype=tf.float32),
+        observation={
+            'obs': tf.constant(np.array([0]), dtype=tf.int64),
+            'model_selector': model_selector
+        },
+        reward=tf.constant(np.array([0]), dtype=tf.float64),
+        step_type=tf.constant(np.array([0]), dtype=tf.int64))
+
+    self.assertEqual(
+        combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64))

From 27dee696f888a550267e5b839118a6f10c0380a0 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 17:53:32 +0000
Subject: [PATCH 04/17] yapf . -ir

---
 compiler_opt/tools/combine_tfa_policies.py    |  14 +-
 .../tools/combine_tfa_policies_lib.py         | 171 +++++++++---------
 .../tools/combine_tfa_policies_lib_test.py    | 102 ++++++-----
 3 files changed, 154 insertions(+), 133 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index 3c0db9c1..7309b60f 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -31,15 +31,17 @@ def main(_):
   policy1 = tf.saved_model.load(policy1_path, tags=None, options=None)
   policy2 = tf.saved_model.load(policy2_path, tags=None, options=None)
   combined_policy = cfa_lib.CombinedTFPolicy(
-     tf_policies={policy1_name:policy1, policy2_name:policy2},
-     time_step_spec=expected_signature,
-     action_spec=action_spec
-  )
+      tf_policies={
+          policy1_name: policy1,
+          policy2_name: policy2
+      },
+      time_step_spec=expected_signature,
+      action_spec=action_spec)
   combined_policy_path = input("Save combined policy path: ")
   policy_dict = {'combined_policy': combined_policy}
   saver = policy_saver.PolicySaver(policy_dict=policy_dict)
   saver.save(combined_policy_path)
 
-if __name__ == "__main__": 
-    app.run(main)
 
+if __name__ == "__main__":
+  app.run(main)
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index 87faf829..8aff3e3f 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -29,8 +29,7 @@
 
 class CombinedTFPolicy(tf_agents.policies.TFPolicy):
 
-  def __init__(self, *args,
-               tf_policies: Dict[str, tf_agents.policies.TFPolicy],
+  def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy],
                **kwargs):
     super(CombinedTFPolicy, self).__init__(*args, **kwargs)
 
@@ -47,11 +46,13 @@ def __init__(self, *args,
     for name in self.tf_policy_names:
       m = hashlib.md5()
       m.update(name.encode('utf-8'))
-      high_low_tensors.append(tf.stack([
-          tf.constant(int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64),
-          tf.constant(int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64)
-          ])
-      )
+      high_low_tensors.append(
+          tf.stack([
+              tf.constant(
+                  int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64),
+              tf.constant(
+                  int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64)
+          ]))
     self.high_low_tensors = tf.stack(high_low_tensors)
 
     m = hashlib.md5()
@@ -65,44 +66,44 @@ def _process_observation(self, observation):
       if name in ['model_selector']:
         switch_tensor = observation.pop(name)[0]
         high_low_tensor = switch_tensor
-    
+
         tf.debugging.Assert(
             tf.equal(
                 tf.reduce_any(
                     tf.reduce_all(
-                        tf.equal(high_low_tensor, self.high_low_tensors), axis=1
-                        )
-                    ),True
-                ),
-                 [high_low_tensor, self.high_low_tensors])
+                        tf.equal(high_low_tensor, self.high_low_tensors),
+                        axis=1)), True),
+            [high_low_tensor, self.high_low_tensors])
         return observation, switch_tensor
 
   def _create_distribution(self, inlining_prediction):
     probs = [inlining_prediction, 1.0 - inlining_prediction]
-    logits = [[0.0, tf.math.log(probs[1]/(1.0 - probs[1]))]]
+    logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]]
     return tfp.distributions.Categorical(logits=logits)
 
-  def _action(self, time_step: time_step.TimeStep,
+  def _action(self,
+              time_step: time_step.TimeStep,
               policy_state: types.NestedTensorSpec,
               seed: Optional[types.Seed] = None) -> policy_step.PolicyStep:
     new_observation = time_step.observation
     new_observation, switch_tensor = self._process_observation(new_observation)
-    updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type,
-                                      reward=time_step.reward,
-                                      discount=time_step.discount,
-                                      observation=new_observation)
+    updated_step = tf_agents.trajectories.TimeStep(
+        step_type=time_step.step_type,
+        reward=time_step.reward,
+        discount=time_step.discount,
+        observation=new_observation)
+
     def f0():
       return tf.cast(
           self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64)
+
     def f1():
       return tf.cast(
           self.tf_policies[1].action(updated_step).action[0], dtype=tf.int64)
+
     action = tf.cond(
-        tf.math.reduce_all(
-            tf.equal(switch_tensor, self.high_low_tensor)),
-        f0,
-        f1
-        )
+        tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0,
+        f1)
     return tf_agents.trajectories.PolicyStep(action=action, state=policy_state)
 
   def _distribution(
@@ -110,82 +111,82 @@ def _distribution(
       policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep:
     new_observation = time_step.observation
     new_observation, switch_tensor = self._process_observation(new_observation)
-    updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type,
-                                      reward=time_step.reward,
-                                      discount=time_step.discount,
-                                      observation=new_observation)
+    updated_step = tf_agents.trajectories.TimeStep(
+        step_type=time_step.step_type,
+        reward=time_step.reward,
+        discount=time_step.discount,
+        observation=new_observation)
+
     def f0():
       return tf.cast(
           self.tf_policies[0].distribution(updated_step).action.cdf(0)[0],
           dtype=tf.float32)
+
     def f1():
       return tf.cast(
           self.tf_policies[1].distribution(updated_step).action.cdf(0)[0],
           dtype=tf.float32)
+
     distribution = tf.cond(
-        tf.math.reduce_all(
-            tf.equal(switch_tensor, self.high_low_tensor)),
-        f0,
-        f1
-        )
+        tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0,
+        f1)
     return tf_agents.trajectories.PolicyStep(
-        action=self._create_distribution(distribution),
-        state=policy_state)
-
+        action=self._create_distribution(distribution), state=policy_state)
 
 
 @gin.configurable()
 def get_input_signature():
-    """Returns the list of features for LLVM inlining to be used in combining models."""
-    # int64 features
-    inputs = dict(
-        (key,tf.TensorSpec(dtype=tf.int64, shape=(), name=key))
-        for key in [
-            "caller_basic_block_count",
-            "caller_conditionally_executed_blocks",
-            "caller_users",
-            "callee_basic_block_count",
-            "callee_conditionally_executed_blocks",
-            "callee_users",
-            "nr_ctant_params",
-            "node_count",
-            "edge_count",
-            "callsite_height",
-            "cost_estimate",
-            "inlining_default",
-            "sroa_savings",
-            "sroa_losses",
-            "load_elimination",
-            "call_penalty",
-            "call_argument_setup",
-            "load_relative_intrinsic",
-            "lowered_call_arg_setup",
-            "indirect_call_penalty",
-            "jump_table_penalty",
-            "case_cluster_penalty",
-            "switch_penalty",
-            "unsimplified_common_instructions",
-            "num_loops",
-            "dead_blocks",
-            "simplified_instructions",
-            "constant_args",
-            "constant_offset_ptr_args",
-            "callsite_cost",
-            "cold_cc_penalty",
-            "last_call_to_static_bonus",
-            "is_multiple_blocks",
-            "nested_inlines",
-            "nested_inline_cost_estimate",
-            "threshold",
-            "is_callee_avail_external",
-            "is_caller_avail_external",
-        ]
-    )
-    inputs.update({'model_selector': tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')})
-    return time_step.time_step_spec(inputs)
+  """Returns the list of features for LLVM inlining to be used in combining models."""
+  # int64 features
+  inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key))
+                for key in [
+                    "caller_basic_block_count",
+                    "caller_conditionally_executed_blocks",
+                    "caller_users",
+                    "callee_basic_block_count",
+                    "callee_conditionally_executed_blocks",
+                    "callee_users",
+                    "nr_ctant_params",
+                    "node_count",
+                    "edge_count",
+                    "callsite_height",
+                    "cost_estimate",
+                    "inlining_default",
+                    "sroa_savings",
+                    "sroa_losses",
+                    "load_elimination",
+                    "call_penalty",
+                    "call_argument_setup",
+                    "load_relative_intrinsic",
+                    "lowered_call_arg_setup",
+                    "indirect_call_penalty",
+                    "jump_table_penalty",
+                    "case_cluster_penalty",
+                    "switch_penalty",
+                    "unsimplified_common_instructions",
+                    "num_loops",
+                    "dead_blocks",
+                    "simplified_instructions",
+                    "constant_args",
+                    "constant_offset_ptr_args",
+                    "callsite_cost",
+                    "cold_cc_penalty",
+                    "last_call_to_static_bonus",
+                    "is_multiple_blocks",
+                    "nested_inlines",
+                    "nested_inline_cost_estimate",
+                    "threshold",
+                    "is_callee_avail_external",
+                    "is_caller_avail_external",
+                ])
+  inputs.update({
+      'model_selector':
+          tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')
+  })
+  return time_step.time_step_spec(inputs)
+
 
 @gin.configurable()
 def get_action_spec():
   return tensor_spec.BoundedTensorSpec(
-    dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1
-  )
\ No newline at end of file
+      dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1)
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index 92ab24d7..c79b592c 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -25,17 +25,20 @@
 import hashlib
 import numpy as np
 
+
 class AddOnePolicy(tf_agents.policies.TFPolicy):
+
   def __init__(self):
-    observation_spec = {'obs': tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)}
+    observation_spec = {
+        'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+    }
     time_step_spec = time_step.time_step_spec(observation_spec)
 
-    action_spec = tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)
+    action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+
+    super(AddOnePolicy, self).__init__(
+        time_step_spec=time_step_spec, action_spec=action_spec)
 
-    super(AddOnePolicy, self).__init__(time_step_spec=time_step_spec,
-                                     action_spec=action_spec)
   def _distribution(self, time_step):
     pass
 
@@ -47,17 +50,20 @@ def _action(self, time_step, policy_state, seed):
     action = tf.reshape(observation + 1, (1,))
     return policy_step.PolicyStep(action, policy_state)
 
+
 class SubtractOnePolicy(tf_agents.policies.TFPolicy):
+
   def __init__(self):
-    observation_spec = {'obs': tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)}
+    observation_spec = {
+        'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+    }
     time_step_spec = time_step.time_step_spec(observation_spec)
 
-    action_spec = tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)
+    action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+
+    super(SubtractOnePolicy, self).__init__(
+        time_step_spec=time_step_spec, action_spec=action_spec)
 
-    super(SubtractOnePolicy, self).__init__(time_step_spec=time_step_spec,
-                                     action_spec=action_spec)
   def _distribution(self, time_step):
     pass
 
@@ -68,25 +74,30 @@ def _action(self, time_step, policy_state, seed):
     observation = time_step.observation['obs'][0]
     action = tf.reshape(observation - 1, (1,))
     return policy_step.PolicyStep(action, policy_state)
-  
-observation_spec = time_step.time_step_spec(
-    {'obs':tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'),
-     'model_selector': tf.TensorSpec(shape=(2,),
-     dtype=tf.uint64, name='model_selector')}
-)
 
-action_spec = tensor_spec.TensorSpec(
-        shape=(1,), dtype=tf.int64)
+
+observation_spec = time_step.time_step_spec({
+    'obs':
+        tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'),
+    'model_selector':
+        tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')
+})
+
+action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+
 
 class FeatureImportanceTest(absltest.TestCase):
-  
+
   def test_select_add_policy(self):
     policy1 = AddOnePolicy()
     policy2 = SubtractOnePolicy()
     combined_policy = combine_tfa_policies_lib.CombinedTFPolicy(
-      tf_policies={'add_one':policy1, 'subtract_one':policy2},
-      time_step_spec=observation_spec,
-      action_spec=action_spec)
+        tf_policies={
+            'add_one': policy1,
+            'subtract_one': policy2
+        },
+        time_step_spec=observation_spec,
+        action_spec=action_spec)
 
     m = hashlib.md5()
     m.update('add_one'.encode('utf-8'))
@@ -95,22 +106,27 @@ def test_select_add_policy(self):
     model_selector = tf.constant([[high, low]], dtype=tf.uint64)
 
     state = tf_agents.trajectories.TimeStep(
-    discount=tf.constant(np.array([0.]), dtype=tf.float32),
-    observation={'obs':tf.constant(np.array([0]), dtype=tf.int64),
-                 'model_selector':model_selector},
-    reward=tf.constant(np.array([0]), dtype=tf.float64),
-    step_type=tf.constant(np.array([0]), dtype=tf.int64)
-    )
+        discount=tf.constant(np.array([0.]), dtype=tf.float32),
+        observation={
+            'obs': tf.constant(np.array([0]), dtype=tf.int64),
+            'model_selector': model_selector
+        },
+        reward=tf.constant(np.array([0]), dtype=tf.float64),
+        step_type=tf.constant(np.array([0]), dtype=tf.int64))
 
-    self.assertEqual(combined_policy.action(state).action, tf.constant(1, dtype=tf.int64))
+    self.assertEqual(
+        combined_policy.action(state).action, tf.constant(1, dtype=tf.int64))
 
   def test_select_subtract_policy(self):
     policy1 = AddOnePolicy()
     policy2 = SubtractOnePolicy()
     combined_policy = combine_tfa_policies_lib.CombinedTFPolicy(
-      tf_policies={'add_one':policy1, 'subtract_one':policy2},
-      time_step_spec=observation_spec,
-      action_spec=action_spec)
+        tf_policies={
+            'add_one': policy1,
+            'subtract_one': policy2
+        },
+        time_step_spec=observation_spec,
+        action_spec=action_spec)
 
     m = hashlib.md5()
     m.update('subtract_one'.encode('utf-8'))
@@ -119,11 +135,13 @@ def test_select_subtract_policy(self):
     model_selector = tf.constant([[high, low]], dtype=tf.uint64)
 
     state = tf_agents.trajectories.TimeStep(
-    discount=tf.constant(np.array([0.]), dtype=tf.float32),
-    observation={'obs':tf.constant(np.array([0]), dtype=tf.int64),
-                 'model_selector':model_selector},
-    reward=tf.constant(np.array([0]), dtype=tf.float64),
-    step_type=tf.constant(np.array([0]), dtype=tf.int64)
-    )
-
-    self.assertEqual(combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64))
\ No newline at end of file
+        discount=tf.constant(np.array([0.]), dtype=tf.float32),
+        observation={
+            'obs': tf.constant(np.array([0]), dtype=tf.int64),
+            'model_selector': model_selector
+        },
+        reward=tf.constant(np.array([0]), dtype=tf.float64),
+        step_type=tf.constant(np.array([0]), dtype=tf.int64))
+
+    self.assertEqual(
+        combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64))

From 47f5efc9a4e92c9663495863c66161b183b48330 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 18:51:42 +0000
Subject: [PATCH 05/17] Fixed pylint errors.

---
 compiler_opt/tools/combine_tfa_policies.py    |  2 +-
 .../tools/combine_tfa_policies_lib.py         | 39 ++++++++++---------
 .../tools/combine_tfa_policies_lib_test.py    | 28 ++++++-------
 3 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index 7309b60f..05a956ee 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -38,7 +38,7 @@ def main(_):
       time_step_spec=expected_signature,
       action_spec=action_spec)
   combined_policy_path = input("Save combined policy path: ")
-  policy_dict = {'combined_policy': combined_policy}
+  policy_dict = {"combined_policy": combined_policy}
   saver = policy_saver.PolicySaver(policy_dict=policy_dict)
   saver.save(combined_policy_path)
 
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index 8aff3e3f..33a38708 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -13,21 +13,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Combines two tf-agent policies with the given state and action spec."""
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, Optional
 
 import gin
 import tensorflow as tf
 import hashlib
 
 import tf_agents
-from tf_agents.trajectories import time_step
+from tf_agents.trajectories import time_step as ts
 from tf_agents.typing import types
 from tf_agents.trajectories import policy_step
-import tensorflow_probability as tfp
 from tf_agents.specs import tensor_spec
+import tensorflow_probability as tfp
 
 
 class CombinedTFPolicy(tf_agents.policies.TFPolicy):
+  """Policy which combines two target policies."""
 
   def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy],
                **kwargs):
@@ -45,25 +46,25 @@ def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy],
     high_low_tensors = []
     for name in self.tf_policy_names:
       m = hashlib.md5()
-      m.update(name.encode('utf-8'))
+      m.update(name.encode("utf-8"))
       high_low_tensors.append(
           tf.stack([
               tf.constant(
-                  int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64),
+                  int.from_bytes(m.digest()[8:], "little"), dtype=tf.uint64),
               tf.constant(
-                  int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64)
+                  int.from_bytes(m.digest()[:8], "little"), dtype=tf.uint64)
           ]))
     self.high_low_tensors = tf.stack(high_low_tensors)
 
     m = hashlib.md5()
-    m.update(self.tf_policy_names[0].encode('utf-8'))
-    self.high = int.from_bytes(m.digest()[8:], 'little')
-    self.low = int.from_bytes(m.digest()[:8], 'little')
+    m.update(self.tf_policy_names[0].encode("utf-8"))
+    self.high = int.from_bytes(m.digest()[8:], "little")
+    self.low = int.from_bytes(m.digest()[:8], "little")
     self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64)
 
   def _process_observation(self, observation):
     for name in self.sorted_keys:
-      if name in ['model_selector']:
+      if name in ["model_selector"]:
         switch_tensor = observation.pop(name)[0]
         high_low_tensor = switch_tensor
 
@@ -82,12 +83,12 @@ def _create_distribution(self, inlining_prediction):
     return tfp.distributions.Categorical(logits=logits)
 
   def _action(self,
-              time_step: time_step.TimeStep,
+              time_step: ts.TimeStep,
               policy_state: types.NestedTensorSpec,
               seed: Optional[types.Seed] = None) -> policy_step.PolicyStep:
     new_observation = time_step.observation
     new_observation, switch_tensor = self._process_observation(new_observation)
-    updated_step = tf_agents.trajectories.TimeStep(
+    updated_step = ts.TimeStep(
         step_type=time_step.step_type,
         reward=time_step.reward,
         discount=time_step.discount,
@@ -107,11 +108,11 @@ def f1():
     return tf_agents.trajectories.PolicyStep(action=action, state=policy_state)
 
   def _distribution(
-      self, time_step: time_step.TimeStep,
+      self, time_step: ts.TimeStep,
       policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep:
     new_observation = time_step.observation
     new_observation, switch_tensor = self._process_observation(new_observation)
-    updated_step = tf_agents.trajectories.TimeStep(
+    updated_step = ts.TimeStep(
         step_type=time_step.step_type,
         reward=time_step.reward,
         discount=time_step.discount,
@@ -136,7 +137,7 @@ def f1():
 
 @gin.configurable()
 def get_input_signature():
-  """Returns the list of features for LLVM inlining to be used in combining models."""
+  """Returns a list of inlining features to be used with the combined models."""
   # int64 features
   inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key))
                 for key in [
@@ -180,13 +181,13 @@ def get_input_signature():
                     "is_caller_avail_external",
                 ])
   inputs.update({
-      'model_selector':
-          tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')
+      "model_selector":
+          tf.TensorSpec(shape=(2,), dtype=tf.uint64, name="model_selector")
   })
-  return time_step.time_step_spec(inputs)
+  return ts.time_step_spec(inputs)
 
 
 @gin.configurable()
 def get_action_spec():
   return tensor_spec.BoundedTensorSpec(
-      dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1)
+      dtype=tf.int64, shape=(), name="inlining_decision", minimum=0, maximum=1)
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index c79b592c..03b3ae96 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -12,13 +12,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Tests for the feature_importance_utils.py module"""
+"""Tests for the combine_tfa_policies_lib.py module"""
 
 from absl.testing import absltest
 
 import tensorflow as tf
 from compiler_opt.tools import combine_tfa_policies_lib
-from tf_agents.trajectories import time_step
+from tf_agents.trajectories import time_step as ts
 import tf_agents
 from tf_agents.specs import tensor_spec
 from tf_agents.trajectories import policy_step
@@ -27,19 +27,20 @@
 
 
 class AddOnePolicy(tf_agents.policies.TFPolicy):
+  """Test policy which adds one to obs feature."""
 
   def __init__(self):
-    observation_spec = {
+    obs_spec = {
         'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
     }
-    time_step_spec = time_step.time_step_spec(observation_spec)
+    time_step_spec = ts.time_step_spec(obs_spec)
 
-    action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+    act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
 
     super(AddOnePolicy, self).__init__(
-        time_step_spec=time_step_spec, action_spec=action_spec)
+        time_step_spec=time_step_spec, action_spec=act_spec)
 
-  def _distribution(self, time_step):
+  def _distribution(self, t_step):
     pass
 
   def _variables(self):
@@ -52,19 +53,20 @@ def _action(self, time_step, policy_state, seed):
 
 
 class SubtractOnePolicy(tf_agents.policies.TFPolicy):
+  """Test policy which subtracts one to obs feature."""
 
   def __init__(self):
-    observation_spec = {
+    obs_spec = {
         'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
     }
-    time_step_spec = time_step.time_step_spec(observation_spec)
+    time_step_spec = ts.time_step_spec(obs_spec)
 
-    action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
+    act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
 
     super(SubtractOnePolicy, self).__init__(
-        time_step_spec=time_step_spec, action_spec=action_spec)
+        time_step_spec=time_step_spec, action_spec=act_spec)
 
-  def _distribution(self, time_step):
+  def _distribution(self, t_step):
     pass
 
   def _variables(self):
@@ -76,7 +78,7 @@ def _action(self, time_step, policy_state, seed):
     return policy_step.PolicyStep(action, policy_state)
 
 
-observation_spec = time_step.time_step_spec({
+observation_spec = ts.time_step_spec({
     'obs':
         tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'),
     'model_selector':

From f5b6b6f4b0a7081290fc336f2f71267013ffe253 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 19:02:35 +0000
Subject: [PATCH 06/17] yapf . -ir

---
 compiler_opt/tools/combine_tfa_policies_lib_test.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index e0f46f2e..89a74eef 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -30,9 +30,7 @@ class AddOnePolicy(tf_agents.policies.TFPolicy):
   """Test policy which adds one to obs feature."""
 
   def __init__(self):
-    obs_spec = {
-        'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
-    }
+    obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)}
     time_step_spec = ts.time_step_spec(obs_spec)
 
     act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
@@ -56,9 +54,7 @@ class SubtractOnePolicy(tf_agents.policies.TFPolicy):
   """Test policy which subtracts one to obs feature."""
 
   def __init__(self):
-    obs_spec = {
-        'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
-    }
+    obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)}
     time_step_spec = ts.time_step_spec(obs_spec)
 
     act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)

From 35d9e8c5fa6206fbeb2f0c3f81c7db3e63263d6c Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 19:05:51 +0000
Subject: [PATCH 07/17] Fixed super without arguments pylint error.

---
 compiler_opt/tools/combine_tfa_policies_lib.py      | 2 +-
 compiler_opt/tools/combine_tfa_policies_lib_test.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index 33a38708..ebb1b1f8 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -32,7 +32,7 @@ class CombinedTFPolicy(tf_agents.policies.TFPolicy):
 
   def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy],
                **kwargs):
-    super(CombinedTFPolicy, self).__init__(*args, **kwargs)
+    super().__init__(*args, **kwargs)
 
     self.tf_policies = []
     self.tf_policy_names = []
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index 89a74eef..030d213f 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -35,7 +35,7 @@ def __init__(self):
 
     act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
 
-    super(AddOnePolicy, self).__init__(
+    super().__init__(
         time_step_spec=time_step_spec, action_spec=act_spec)
 
   def _distribution(self, time_step):
@@ -59,7 +59,7 @@ def __init__(self):
 
     act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
 
-    super(SubtractOnePolicy, self).__init__(
+    super().__init__(
         time_step_spec=time_step_spec, action_spec=act_spec)
 
   def _distribution(self, time_step):

From 5d6783d7659abec63d666d33eb75434746be5ac1 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 19:09:06 +0000
Subject: [PATCH 08/17] yapf . -ir

---
 compiler_opt/tools/combine_tfa_policies_lib_test.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index 030d213f..9fb8bb4b 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -35,8 +35,7 @@ def __init__(self):
 
     act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
 
-    super().__init__(
-        time_step_spec=time_step_spec, action_spec=act_spec)
+    super().__init__(time_step_spec=time_step_spec, action_spec=act_spec)
 
   def _distribution(self, time_step):
     pass
@@ -59,8 +58,7 @@ def __init__(self):
 
     act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
 
-    super().__init__(
-        time_step_spec=time_step_spec, action_spec=act_spec)
+    super().__init__(time_step_spec=time_step_spec, action_spec=act_spec)
 
   def _distribution(self, time_step):
     pass

From 7997f143587eadb2200682e9bf66bd1462e85cbf Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 19:58:38 +0000
Subject: [PATCH 09/17] Fixing pytype annotations.

---
 compiler_opt/tools/combine_tfa_policies_lib.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index ebb1b1f8..439f1b9b 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -62,7 +62,7 @@ def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy],
     self.low = int.from_bytes(m.digest()[:8], "little")
     self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64)
 
-  def _process_observation(self, observation):
+  def _process_observation(self, observation: types.NestedSpecTensorOrArray):
     for name in self.sorted_keys:
       if name in ["model_selector"]:
         switch_tensor = observation.pop(name)[0]
@@ -105,7 +105,7 @@ def f1():
     action = tf.cond(
         tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0,
         f1)
-    return tf_agents.trajectories.PolicyStep(action=action, state=policy_state)
+    return policy_step.PolicyStep(action=action, state=policy_state)
 
   def _distribution(
       self, time_step: ts.TimeStep,
@@ -131,7 +131,7 @@ def f1():
     distribution = tf.cond(
         tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0,
         f1)
-    return tf_agents.trajectories.PolicyStep(
+    return policy_step.PolicyStep(
         action=self._create_distribution(distribution), state=policy_state)
 
 

From 59d36774660273416de75b03437a8c7f2a5ab477 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Fri, 6 Sep 2024 22:31:55 +0000
Subject: [PATCH 10/17] Fixed pytype errors. Addressed comments.

---
 compiler_opt/tools/combine_tfa_policies.py    | 53 +++++++++++---
 .../tools/combine_tfa_policies_lib.py         | 73 +++----------------
 .../tools/combine_tfa_policies_lib_test.py    | 36 +++++----
 3 files changed, 75 insertions(+), 87 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index 05a956ee..fb1a8f25 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -14,20 +14,55 @@
 # limitations under the License.
 """Runs the policy combiner."""
 from absl import app
+from absl import flags
+
+import gin
 
 import tensorflow as tf
 
 from compiler_opt.rl import policy_saver
+from compiler_opt.rl import registry
 from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib
 
+_COMBINE_POLICIES_NAMES = flags.DEFINE_multi_string(
+    'policies_names', [],
+    'List in order of policy names for combined policies.')
+_COMBINE_POLICIES_PATHS = flags.DEFINE_multi_string(
+    'policies_paths', [],
+    'List in order of policy paths for combined policies.')
+_COMBINED_POLICY_PATH = flags.DEFINE_string(
+    'combined_policy_path', '', 'Path to save the combined policy.')
+_GIN_FILES = flags.DEFINE_multi_string(
+    'gin_files', [], 'List of paths to gin configuration files.')
+_GIN_BINDINGS = flags.DEFINE_multi_string(
+    'gin_bindings', [],
+    'Gin bindings to override the values set in the config files.')
+
 
 def main(_):
-  expected_signature = cfa_lib.get_input_signature()
-  action_spec = cfa_lib.get_action_spec()
-  policy1_name = input("First policy name: ")
-  policy1_path = input(policy1_name + " path: ")
-  policy2_name = input("Second policy name: ")
-  policy2_path = input(policy2_name + " path: ")
+  flags.mark_flag_as_required('policies_names')
+  flags.mark_flag_as_required('policies_paths')
+  flags.mark_flag_as_required('combined_policy_path')
+  assert len(_COMBINE_POLICIES_NAMES.value) == len(
+      _COMBINE_POLICIES_PATHS.value)
+  gin.add_config_file_search_path(
+      'compiler_opt/rl/inlining/gin_configs/common.gin')
+  gin.parse_config_files_and_bindings(
+      _GIN_FILES.value, bindings=_GIN_BINDINGS.value, skip_unknown=False)
+
+  problem_config = registry.get_configuration()
+  expected_signature, action_spec = problem_config.get_signature_spec()
+  expected_signature.observation.update({
+      'model_selector':
+          tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')
+  })
+  assert len(_COMBINE_POLICIES_NAMES.value
+            ) == 2, 'Combiner supports only two policies.'
+
+  policy1_name = _COMBINE_POLICIES_NAMES.value[0]
+  policy1_path = _COMBINE_POLICIES_PATHS.value[0]
+  policy2_name = _COMBINE_POLICIES_NAMES.value[1]
+  policy2_path = _COMBINE_POLICIES_PATHS.value[1]
   policy1 = tf.saved_model.load(policy1_path, tags=None, options=None)
   policy2 = tf.saved_model.load(policy2_path, tags=None, options=None)
   combined_policy = cfa_lib.CombinedTFPolicy(
@@ -37,11 +72,11 @@ def main(_):
       },
       time_step_spec=expected_signature,
       action_spec=action_spec)
-  combined_policy_path = input("Save combined policy path: ")
-  policy_dict = {"combined_policy": combined_policy}
+  combined_policy_path = _COMBINED_POLICY_PATH.value
+  policy_dict = {'combined_policy': combined_policy}
   saver = policy_saver.PolicySaver(policy_dict=policy_dict)
   saver.save(combined_policy_path)
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
   app.run(main)
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index 439f1b9b..a2303bf0 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -13,9 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Combines two tf-agent policies with the given state and action spec."""
-from typing import Dict, Optional
+from typing import Dict, Optional, Tuple
 
-import gin
 import tensorflow as tf
 import hashlib
 
@@ -23,7 +22,6 @@
 from tf_agents.trajectories import time_step as ts
 from tf_agents.typing import types
 from tf_agents.trajectories import policy_step
-from tf_agents.specs import tensor_spec
 import tensorflow_probability as tfp
 
 
@@ -55,14 +53,18 @@ def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy],
                   int.from_bytes(m.digest()[:8], "little"), dtype=tf.uint64)
           ]))
     self.high_low_tensors = tf.stack(high_low_tensors)
-
+    # Related LLVM commit: https://github.com/llvm/llvm-project/pull/96276
     m = hashlib.md5()
     m.update(self.tf_policy_names[0].encode("utf-8"))
     self.high = int.from_bytes(m.digest()[8:], "little")
     self.low = int.from_bytes(m.digest()[:8], "little")
     self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64)
 
-  def _process_observation(self, observation: types.NestedSpecTensorOrArray):
+  def _process_observation(
+      self, observation: types.NestedSpecTensorOrArray
+  ) -> Tuple[types.NestedSpecTensorOrArray, types.TensorOrArray]:
+    assert "model_selector" in self.sorted_keys
+    high_low_tensor = self.high_low_tensor
     for name in self.sorted_keys:
       if name in ["model_selector"]:
         switch_tensor = observation.pop(name)[0]
@@ -75,7 +77,8 @@ def _process_observation(self, observation: types.NestedSpecTensorOrArray):
                         tf.equal(high_low_tensor, self.high_low_tensors),
                         axis=1)), True),
             [high_low_tensor, self.high_low_tensors])
-        return observation, switch_tensor
+
+    return observation, high_low_tensor
 
   def _create_distribution(self, inlining_prediction):
     probs = [inlining_prediction, 1.0 - inlining_prediction]
@@ -133,61 +136,3 @@ def f1():
         f1)
     return policy_step.PolicyStep(
         action=self._create_distribution(distribution), state=policy_state)
-
-
-@gin.configurable()
-def get_input_signature():
-  """Returns a list of inlining features to be used with the combined models."""
-  # int64 features
-  inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key))
-                for key in [
-                    "caller_basic_block_count",
-                    "caller_conditionally_executed_blocks",
-                    "caller_users",
-                    "callee_basic_block_count",
-                    "callee_conditionally_executed_blocks",
-                    "callee_users",
-                    "nr_ctant_params",
-                    "node_count",
-                    "edge_count",
-                    "callsite_height",
-                    "cost_estimate",
-                    "inlining_default",
-                    "sroa_savings",
-                    "sroa_losses",
-                    "load_elimination",
-                    "call_penalty",
-                    "call_argument_setup",
-                    "load_relative_intrinsic",
-                    "lowered_call_arg_setup",
-                    "indirect_call_penalty",
-                    "jump_table_penalty",
-                    "case_cluster_penalty",
-                    "switch_penalty",
-                    "unsimplified_common_instructions",
-                    "num_loops",
-                    "dead_blocks",
-                    "simplified_instructions",
-                    "constant_args",
-                    "constant_offset_ptr_args",
-                    "callsite_cost",
-                    "cold_cc_penalty",
-                    "last_call_to_static_bonus",
-                    "is_multiple_blocks",
-                    "nested_inlines",
-                    "nested_inline_cost_estimate",
-                    "threshold",
-                    "is_callee_avail_external",
-                    "is_caller_avail_external",
-                ])
-  inputs.update({
-      "model_selector":
-          tf.TensorSpec(shape=(2,), dtype=tf.uint64, name="model_selector")
-  })
-  return ts.time_step_spec(inputs)
-
-
-@gin.configurable()
-def get_action_spec():
-  return tensor_spec.BoundedTensorSpec(
-      dtype=tf.int64, shape=(), name="inlining_decision", minimum=0, maximum=1)
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index 9fb8bb4b..2404bff1 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -22,12 +22,22 @@
 import tf_agents
 from tf_agents.specs import tensor_spec
 from tf_agents.trajectories import policy_step
+from tf_agents.typing import types
 import hashlib
 import numpy as np
 
 
+def client_side_model_selector_calculation(policy_name: str) -> types.Tensor:
+  m = hashlib.md5()
+  m.update(policy_name.encode('utf-8'))
+  high = int.from_bytes(m.digest()[8:], 'little')
+  low = int.from_bytes(m.digest()[:8], 'little')
+  model_selector = tf.constant([[high, low]], dtype=tf.uint64)
+  return model_selector
+
+
 class AddOnePolicy(tf_agents.policies.TFPolicy):
-  """Test policy which adds one to obs feature."""
+  """Test policy which increments the obs feature."""
 
   def __init__(self):
     obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)}
@@ -38,19 +48,22 @@ def __init__(self):
     super().__init__(time_step_spec=time_step_spec, action_spec=act_spec)
 
   def _distribution(self, time_step):
+    """Boilerplate function for TFPolicy."""
     pass
 
   def _variables(self):
+    """Boilerplate function for TFPolicy."""
     return ()
 
   def _action(self, time_step, policy_state, seed):
+    """Boilerplate function for TFPolicy."""
     observation = time_step.observation['obs'][0]
     action = tf.reshape(observation + 1, (1,))
     return policy_step.PolicyStep(action, policy_state)
 
 
 class SubtractOnePolicy(tf_agents.policies.TFPolicy):
-  """Test policy which subtracts one to obs feature."""
+  """Test policy which decrements the obs feature."""
 
   def __init__(self):
     obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)}
@@ -61,12 +74,15 @@ def __init__(self):
     super().__init__(time_step_spec=time_step_spec, action_spec=act_spec)
 
   def _distribution(self, time_step):
+    """Boilerplate function for TFPolicy."""
     pass
 
   def _variables(self):
+    """Boilerplate function for TFPolicy."""
     return ()
 
   def _action(self, time_step, policy_state, seed):
+    """Boilerplate function for TFPolicy."""
     observation = time_step.observation['obs'][0]
     action = tf.reshape(observation - 1, (1,))
     return policy_step.PolicyStep(action, policy_state)
@@ -95,13 +111,9 @@ def test_select_add_policy(self):
         time_step_spec=observation_spec,
         action_spec=action_spec)
 
-    m = hashlib.md5()
-    m.update('add_one'.encode('utf-8'))
-    high = int.from_bytes(m.digest()[8:], 'little')
-    low = int.from_bytes(m.digest()[:8], 'little')
-    model_selector = tf.constant([[high, low]], dtype=tf.uint64)
+    model_selector = client_side_model_selector_calculation('add_one')
 
-    state = tf_agents.trajectories.TimeStep(
+    state = ts.TimeStep(
         discount=tf.constant(np.array([0.]), dtype=tf.float32),
         observation={
             'obs': tf.constant(np.array([0]), dtype=tf.int64),
@@ -124,13 +136,9 @@ def test_select_subtract_policy(self):
         time_step_spec=observation_spec,
         action_spec=action_spec)
 
-    m = hashlib.md5()
-    m.update('subtract_one'.encode('utf-8'))
-    high = int.from_bytes(m.digest()[8:], 'little')
-    low = int.from_bytes(m.digest()[:8], 'little')
-    model_selector = tf.constant([[high, low]], dtype=tf.uint64)
+    model_selector = client_side_model_selector_calculation('subtract_one')
 
-    state = tf_agents.trajectories.TimeStep(
+    state = ts.TimeStep(
         discount=tf.constant(np.array([0.]), dtype=tf.float32),
         observation={
             'obs': tf.constant(np.array([0]), dtype=tf.int64),

From 6d8c0c77a2185dc3bbc41df2ee85c5494a3c35b7 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Mon, 9 Sep 2024 13:46:27 +0000
Subject: [PATCH 11/17] Addressed comments.

---
 compiler_opt/tools/combine_tfa_policies.py    | 31 +++++++++++++------
 .../tools/combine_tfa_policies_lib.py         | 11 +++++++
 .../tools/combine_tfa_policies_lib_test.py    | 11 ++++---
 3 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index fb1a8f25..0e758a8f 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -15,6 +15,9 @@
 """Runs the policy combiner."""
 from absl import app
 from absl import flags
+from absl import logging
+
+import sys
 
 import gin
 
@@ -25,11 +28,15 @@
 from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib
 
 _COMBINE_POLICIES_NAMES = flags.DEFINE_multi_string(
-    'policies_names', [],
-    'List in order of policy names for combined policies.')
+    'policies_names',
+    [],
+    'List in order of policy names for combined policies. Order must match that of policies_paths.'  # pylint: disable=line-too-long
+)
 _COMBINE_POLICIES_PATHS = flags.DEFINE_multi_string(
-    'policies_paths', [],
-    'List in order of policy paths for combined policies.')
+    'policies_paths',
+    [],
+    'List in order of policy paths for combined policies. Order must match that of policies_names.'  # pylint: disable=line-too-long
+)
 _COMBINED_POLICY_PATH = flags.DEFINE_string(
     'combined_policy_path', '', 'Path to save the combined policy.')
 _GIN_FILES = flags.DEFINE_multi_string(
@@ -43,8 +50,11 @@ def main(_):
   flags.mark_flag_as_required('policies_names')
   flags.mark_flag_as_required('policies_paths')
   flags.mark_flag_as_required('combined_policy_path')
-  assert len(_COMBINE_POLICIES_NAMES.value) == len(
-      _COMBINE_POLICIES_PATHS.value)
+  if len(_COMBINE_POLICIES_NAMES.value) != len(_COMBINE_POLICIES_PATHS.value):
+    logging.error(
+        'Length of policies_names: %d must equal length of policies_paths: %d.',
+        len(_COMBINE_POLICIES_NAMES.value), len(_COMBINE_POLICIES_PATHS.value))
+    sys.exit(1)
   gin.add_config_file_search_path(
       'compiler_opt/rl/inlining/gin_configs/common.gin')
   gin.parse_config_files_and_bindings(
@@ -56,9 +66,12 @@ def main(_):
       'model_selector':
           tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')
   })
-  assert len(_COMBINE_POLICIES_NAMES.value
-            ) == 2, 'Combiner supports only two policies.'
-
+  # TODO(359): We only support combining two policies.Generalize this to handle
+  # multiple policies.
+  if len(_COMBINE_POLICIES_NAMES.value) != 2:
+    logging.error('Policy combiner only supports two policies, %d given.',
+                  len(_COMBINE_POLICIES_NAMES.value))
+    sys.exit(1)
   policy1_name = _COMBINE_POLICIES_NAMES.value[0]
   policy1_path = _COMBINE_POLICIES_PATHS.value[0]
   policy2_name = _COMBINE_POLICIES_NAMES.value[1]
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index a2303bf0..c8d09b6a 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -67,6 +67,7 @@ def _process_observation(
     high_low_tensor = self.high_low_tensor
     for name in self.sorted_keys:
       if name in ["model_selector"]:
+        # model_selector is a Tensor of shape (1,) which requires indexing [0]
         switch_tensor = observation.pop(name)[0]
         high_low_tensor = switch_tensor
 
@@ -81,6 +82,12 @@ def _process_observation(
     return observation, high_low_tensor
 
   def _create_distribution(self, inlining_prediction):
+    """Ensures that even deterministic policies return a distribution.
+
+    This will not change the behavior of the action function which is
+    what is used at inference time. The change for the distribution
+    function is so that we can always support sampling even for
+    deterministic policies."""
     probs = [inlining_prediction, 1.0 - inlining_prediction]
     logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]]
     return tfp.distributions.Categorical(logits=logits)
@@ -97,6 +104,8 @@ def _action(self,
         discount=time_step.discount,
         observation=new_observation)
 
+    # TODO(359): We only support combining two policies.Generalize this to
+    # handle multiple policies.
     def f0():
       return tf.cast(
           self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64)
@@ -121,6 +130,8 @@ def _distribution(
         discount=time_step.discount,
         observation=new_observation)
 
+    # TODO(359): We only support combining two policies.Generalize this to
+    # handle multiple policies.
     def f0():
       return tf.cast(
           self.tf_policies[0].distribution(updated_step).action.cdf(0)[0],
diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py
index 2404bff1..7cd873c5 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib_test.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py
@@ -98,7 +98,8 @@ def _action(self, time_step, policy_state, seed):
 action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)
 
 
-class FeatureImportanceTest(absltest.TestCase):
+class CombinedTFPolicyTest(absltest.TestCase):
+  """Test for CombinedTFPolicy."""
 
   def test_select_add_policy(self):
     policy1 = AddOnePolicy()
@@ -116,14 +117,14 @@ def test_select_add_policy(self):
     state = ts.TimeStep(
         discount=tf.constant(np.array([0.]), dtype=tf.float32),
         observation={
-            'obs': tf.constant(np.array([0]), dtype=tf.int64),
+            'obs': tf.constant(np.array([42]), dtype=tf.int64),
             'model_selector': model_selector
         },
         reward=tf.constant(np.array([0]), dtype=tf.float64),
         step_type=tf.constant(np.array([0]), dtype=tf.int64))
 
     self.assertEqual(
-        combined_policy.action(state).action, tf.constant(1, dtype=tf.int64))
+        combined_policy.action(state).action, tf.constant(43, dtype=tf.int64))
 
   def test_select_subtract_policy(self):
     policy1 = AddOnePolicy()
@@ -141,11 +142,11 @@ def test_select_subtract_policy(self):
     state = ts.TimeStep(
         discount=tf.constant(np.array([0.]), dtype=tf.float32),
         observation={
-            'obs': tf.constant(np.array([0]), dtype=tf.int64),
+            'obs': tf.constant(np.array([42]), dtype=tf.int64),
             'model_selector': model_selector
         },
         reward=tf.constant(np.array([0]), dtype=tf.float64),
         step_type=tf.constant(np.array([0]), dtype=tf.int64))
 
     self.assertEqual(
-        combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64))
+        combined_policy.action(state).action, tf.constant(41, dtype=tf.int64))

From 3b0cefd0c245f1a4b6a890ce6b0e0bfefdc4b78d Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Mon, 9 Sep 2024 16:55:01 +0000
Subject: [PATCH 12/17] Resolved _distribution and common.gin comments.

---
 compiler_opt/tools/combine_tfa_policies.py    |  2 -
 .../tools/combine_tfa_policies_lib.py         | 37 +------------------
 2 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index 0e758a8f..0aad21b9 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -55,8 +55,6 @@ def main(_):
         'Length of policies_names: %d must equal length of policies_paths: %d.',
         len(_COMBINE_POLICIES_NAMES.value), len(_COMBINE_POLICIES_PATHS.value))
     sys.exit(1)
-  gin.add_config_file_search_path(
-      'compiler_opt/rl/inlining/gin_configs/common.gin')
   gin.parse_config_files_and_bindings(
       _GIN_FILES.value, bindings=_GIN_BINDINGS.value, skip_unknown=False)
 
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index c8d09b6a..4a53ba19 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -81,17 +81,6 @@ def _process_observation(
 
     return observation, high_low_tensor
 
-  def _create_distribution(self, inlining_prediction):
-    """Ensures that even deterministic policies return a distribution.
-
-    This will not change the behavior of the action function which is
-    what is used at inference time. The change for the distribution
-    function is so that we can always support sampling even for
-    deterministic policies."""
-    probs = [inlining_prediction, 1.0 - inlining_prediction]
-    logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]]
-    return tfp.distributions.Categorical(logits=logits)
-
   def _action(self,
               time_step: ts.TimeStep,
               policy_state: types.NestedTensorSpec,
@@ -122,28 +111,6 @@ def f1():
   def _distribution(
       self, time_step: ts.TimeStep,
       policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep:
-    new_observation = time_step.observation
-    new_observation, switch_tensor = self._process_observation(new_observation)
-    updated_step = ts.TimeStep(
-        step_type=time_step.step_type,
-        reward=time_step.reward,
-        discount=time_step.discount,
-        observation=new_observation)
-
-    # TODO(359): We only support combining two policies.Generalize this to
-    # handle multiple policies.
-    def f0():
-      return tf.cast(
-          self.tf_policies[0].distribution(updated_step).action.cdf(0)[0],
-          dtype=tf.float32)
-
-    def f1():
-      return tf.cast(
-          self.tf_policies[1].distribution(updated_step).action.cdf(0)[0],
-          dtype=tf.float32)
-
-    distribution = tf.cond(
-        tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0,
-        f1)
+    """Placeholder for distribution as every TFPolicy requires it."""
     return policy_step.PolicyStep(
-        action=self._create_distribution(distribution), state=policy_state)
+        action=tfp.distributions.Deterministic(2.), state=policy_state)

From 78460ce93ca26b495cc243949c3de1c980e346af Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Mon, 9 Sep 2024 18:58:17 +0000
Subject: [PATCH 13/17] Fixed Aiden's nits.

---
 compiler_opt/tools/combine_tfa_policies.py     | 12 ++++--------
 compiler_opt/tools/combine_tfa_policies_lib.py |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py
index 0aad21b9..c3146711 100755
--- a/compiler_opt/tools/combine_tfa_policies.py
+++ b/compiler_opt/tools/combine_tfa_policies.py
@@ -28,15 +28,11 @@
 from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib
 
 _COMBINE_POLICIES_NAMES = flags.DEFINE_multi_string(
-    'policies_names',
-    [],
-    'List in order of policy names for combined policies. Order must match that of policies_paths.'  # pylint: disable=line-too-long
-)
+    'policies_names', [], 'List in order of policy names for combined policies.'
+    'Order must match that of policies_paths.')
 _COMBINE_POLICIES_PATHS = flags.DEFINE_multi_string(
-    'policies_paths',
-    [],
-    'List in order of policy paths for combined policies. Order must match that of policies_names.'  # pylint: disable=line-too-long
-)
+    'policies_paths', [], 'List in order of policy paths for combined policies.'
+    'Order must match that of policies_names.')
 _COMBINED_POLICY_PATH = flags.DEFINE_string(
     'combined_policy_path', '', 'Path to save the combined policy.')
 _GIN_FILES = flags.DEFINE_multi_string(
diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py
index 4a53ba19..4d1de2b3 100644
--- a/compiler_opt/tools/combine_tfa_policies_lib.py
+++ b/compiler_opt/tools/combine_tfa_policies_lib.py
@@ -93,7 +93,7 @@ def _action(self,
         discount=time_step.discount,
         observation=new_observation)
 
-    # TODO(359): We only support combining two policies.Generalize this to
+    # TODO(359): We only support combining two policies. Generalize this to
     # handle multiple policies.
     def f0():
       return tf.cast(

From 6be71863921662e6bcc7486cc386487bb6906a68 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Tue, 1 Oct 2024 20:35:53 +0000
Subject: [PATCH 14/17] Patch to env.py and compilation_runner.py which adds
 working_dir to TimeStep. The patch also gives the option to keep the
 temporary working_dir by setting keep_temps in compilation_runner.py to a
 directory where all temporary working_dirs will be saved.

---
 compiler_opt/rl/compilation_runner.py | 17 +++++++++++----
 compiler_opt/rl/env.py                | 17 ++++++++++++---
 compiler_opt/rl/env_test.py           | 30 +++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py
index a3f5c0b4..50021929 100644
--- a/compiler_opt/rl/compilation_runner.py
+++ b/compiler_opt/rl/compilation_runner.py
@@ -80,6 +80,18 @@ def __exit__(self, exc, value, tb):
     pass
 
 
+def get_directory_context():
+  """Return a context which manages how the temperory directories are handled.
+  
+  When the flag keep_temps is specified temporary directories are stored in
+  keep_temps."""
+  if _KEEP_TEMPS.value is not None:
+    tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value)
+  else:
+    tempdir_context = tempfile.TemporaryDirectory()
+  return tempdir_context
+
+
 def _overwrite_trajectory_reward(sequence_example: tf.train.SequenceExample,
                                  reward: float) -> tf.train.SequenceExample:
   """Overwrite the reward in the trace (sequence_example) with the given one.
@@ -401,10 +413,7 @@ def collect_data(self,
       compilation_runner.ProcessKilledException is passed through.
       ValueError if example under default policy and ml policy does not match.
     """
-    if _KEEP_TEMPS.present:
-      tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value)
-    else:
-      tempdir_context = tempfile.TemporaryDirectory()
+    tempdir_context = get_directory_context()
 
     with tempdir_context as tempdir:
       final_cmd_line = loaded_module_spec.build_command_line(tempdir)
diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py
index 904fd388..de4fea0d 100644
--- a/compiler_opt/rl/env.py
+++ b/compiler_opt/rl/env.py
@@ -31,6 +31,7 @@
 
 from compiler_opt.rl import corpus
 from compiler_opt.rl import log_reader
+from compiler_opt.rl import compilation_runner
 
 
 class StepType(Enum):
@@ -47,6 +48,7 @@ class TimeStep:
   score_default: Optional[dict[str, float]]
   context: Optional[str]
   module_name: str
+  working_dir: str
   obs_id: Optional[int]
   step_type: StepType
 
@@ -115,10 +117,13 @@ class ClangProcess:
   """
 
   def __init__(self, proc: subprocess.Popen,
-               get_scores_fn: Callable[[], dict[str, float]], module_name):
+               get_scores_fn: Callable[[], dict[str, float]],
+               module_name: str,
+               working_dir: str):
     self._proc = proc
     self._get_scores_fn = get_scores_fn
     self._module_name = module_name
+    self._working_dir = working_dir
 
   def get_scores(self, timeout: Optional[int] = None):
     self._proc.wait(timeout=timeout)
@@ -133,10 +138,11 @@ def __init__(
       proc: subprocess.Popen,
       get_scores_fn: Callable[[], dict[str, float]],
       module_name: str,
+      working_dir: str,
       reader_pipe: io.BufferedReader,
       writer_pipe: io.BufferedWriter,
   ):
-    super().__init__(proc, get_scores_fn, module_name)
+    super().__init__(proc, get_scores_fn, module_name, working_dir)
     self._reader_pipe = reader_pipe
     self._writer_pipe = writer_pipe
     self._obs_gen = log_reader.read_log_from_file(self._reader_pipe)
@@ -150,6 +156,7 @@ def __init__(
         score_default=None,
         context=None,
         module_name=module_name,
+        working_dir=working_dir,
         obs_id=None,
         step_type=StepType.LAST,
     )
@@ -180,6 +187,7 @@ def _get_step_type() -> StepType:
           score_default=None,
           context=obs.context,
           module_name=self._module_name,
+          working_dir=self._working_dir,
           obs_id=obs.observation_id,
           step_type=_get_step_type(),
       )
@@ -235,7 +243,8 @@ def clang_session(
   Yields:
     Either the constructed InteractiveClang or DefaultClang object.
   """
-  with tempfile.TemporaryDirectory() as td:
+  tempdir_context = compilation_runner.get_directory_context()
+  with tempdir_context as td:
     task_working_dir = os.path.join(td, '__task_working_dir__')
     os.mkdir(task_working_dir)
     task = task_type()
@@ -264,6 +273,7 @@ def _get_scores() -> dict[str, float]:
                   proc,
                   _get_scores,
                   module.name,
+                  task_working_dir,
                   reader_pipe,
                   writer_pipe,
               )
@@ -272,6 +282,7 @@ def _get_scores() -> dict[str, float]:
               proc,
               _get_scores,
               module.name,
+              task_working_dir,
           )
 
       finally:
diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py
index 87577b3e..4d690cb2 100644
--- a/compiler_opt/rl/env_test.py
+++ b/compiler_opt/rl/env_test.py
@@ -19,6 +19,9 @@
 import ctypes
 from unittest import mock
 import subprocess
+import os
+import shutil
+from absl.testing import flagsaver
 
 from typing import Dict, List, Optional
 
@@ -161,6 +164,33 @@ def test_interactive_clang_session(self, mock_popen):
         self.assertEqual(obs.context, f'context_{idx}')
       mock_popen.assert_called_once()
 
+  @mock.patch('subprocess.Popen')
+  def test_interactive_clang_temp_dir(self, mock_popen):
+    mock_popen.side_effect = mock_interactive_clang
+    working_dir = None
+
+    with env.clang_session(
+        _CLANG_PATH, _MOCK_MODULE, MockTask, interactive=True) as clang_session:
+      for _ in range(_NUM_STEPS):
+        obs = clang_session.get_observation()
+        working_dir = obs.working_dir
+        self.assertEqual(os.path.exists(working_dir), True)
+    self.assertEqual(os.path.exists(working_dir), False)
+
+    with flagsaver.flagsaver(
+        (env.compilation_runner._KEEP_TEMPS, '/tmp')):   # pylint: disable=protected-access
+      with env.clang_session(
+          _CLANG_PATH, _MOCK_MODULE, MockTask,
+          interactive=True) as clang_session:
+        for _ in range(_NUM_STEPS):
+          obs = clang_session.get_observation()
+          working_dir = obs.working_dir
+          self.assertEqual(os.path.exists(working_dir), True)
+      self.assertEqual(os.path.exists(working_dir), True)
+      temp_dir_name = str.split(working_dir, '/')[2]
+      temp_dir_name = os.path.join('/tmp', temp_dir_name)
+      shutil.rmtree(temp_dir_name)
+
 
 class MLGOEnvironmentTest(tf.test.TestCase):
 

From 6342ddaa9a44c4f3cb8411883b5d3a0d20591a55 Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Tue, 1 Oct 2024 21:55:25 +0000
Subject: [PATCH 15/17] Fixed comments.

---
 compiler_opt/rl/compilation_runner.py |  6 +++---
 compiler_opt/rl/env.py                |  6 ++----
 compiler_opt/rl/env_test.py           | 24 +++++++++++-------------
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py
index 50021929..452bab55 100644
--- a/compiler_opt/rl/compilation_runner.py
+++ b/compiler_opt/rl/compilation_runner.py
@@ -80,9 +80,9 @@ def __exit__(self, exc, value, tb):
     pass
 
 
-def get_directory_context():
+def get_workdir_context():
   """Return a context which manages how the temperory directories are handled.
-  
+
   When the flag keep_temps is specified temporary directories are stored in
   keep_temps."""
   if _KEEP_TEMPS.value is not None:
@@ -413,7 +413,7 @@ def collect_data(self,
       compilation_runner.ProcessKilledException is passed through.
       ValueError if example under default policy and ml policy does not match.
     """
-    tempdir_context = get_directory_context()
+    tempdir_context = get_workdir_context()
 
     with tempdir_context as tempdir:
       final_cmd_line = loaded_module_spec.build_command_line(tempdir)
diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py
index de4fea0d..0b40f1b9 100644
--- a/compiler_opt/rl/env.py
+++ b/compiler_opt/rl/env.py
@@ -24,7 +24,6 @@
 import contextlib
 import io
 import os
-import tempfile
 from typing import Callable, Generator, List, Optional, Tuple, Type
 
 import numpy as np
@@ -117,8 +116,7 @@ class ClangProcess:
   """
 
   def __init__(self, proc: subprocess.Popen,
-               get_scores_fn: Callable[[], dict[str, float]],
-               module_name: str,
+               get_scores_fn: Callable[[], dict[str, float]], module_name: str,
                working_dir: str):
     self._proc = proc
     self._get_scores_fn = get_scores_fn
@@ -243,7 +241,7 @@ def clang_session(
   Yields:
     Either the constructed InteractiveClang or DefaultClang object.
   """
-  tempdir_context = compilation_runner.get_directory_context()
+  tempdir_context = compilation_runner.get_workdir_context()
   with tempdir_context as td:
     task_working_dir = os.path.join(td, '__task_working_dir__')
     os.mkdir(task_working_dir)
diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py
index 4d690cb2..79b36598 100644
--- a/compiler_opt/rl/env_test.py
+++ b/compiler_opt/rl/env_test.py
@@ -21,6 +21,7 @@
 import subprocess
 import os
 import shutil
+import tempfile
 from absl.testing import flagsaver
 
 from typing import Dict, List, Optional
@@ -177,19 +178,16 @@ def test_interactive_clang_temp_dir(self, mock_popen):
         self.assertEqual(os.path.exists(working_dir), True)
     self.assertEqual(os.path.exists(working_dir), False)
 
-    with flagsaver.flagsaver(
-        (env.compilation_runner._KEEP_TEMPS, '/tmp')):   # pylint: disable=protected-access
-      with env.clang_session(
-          _CLANG_PATH, _MOCK_MODULE, MockTask,
-          interactive=True) as clang_session:
-        for _ in range(_NUM_STEPS):
-          obs = clang_session.get_observation()
-          working_dir = obs.working_dir
-          self.assertEqual(os.path.exists(working_dir), True)
-      self.assertEqual(os.path.exists(working_dir), True)
-      temp_dir_name = str.split(working_dir, '/')[2]
-      temp_dir_name = os.path.join('/tmp', temp_dir_name)
-      shutil.rmtree(temp_dir_name)
+    with tempfile.TemporaryDirectory() as td:
+      with flagsaver.flagsaver((env.compilation_runner._KEEP_TEMPS, td)):  # pylint: disable=protected-access
+        with env.clang_session(
+            _CLANG_PATH, _MOCK_MODULE, MockTask,
+            interactive=True) as clang_session:
+          for _ in range(_NUM_STEPS):
+            obs = clang_session.get_observation()
+            working_dir = obs.working_dir
+            self.assertEqual(os.path.exists(working_dir), True)
+        self.assertEqual(os.path.exists(working_dir), True)
 
 
 class MLGOEnvironmentTest(tf.test.TestCase):

From 3082ae712cd47c97475b6433a57854d53da35a8f Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Tue, 1 Oct 2024 22:00:34 +0000
Subject: [PATCH 16/17] Fixed pylint.

---
 compiler_opt/rl/compilation_runner.py | 2 +-
 compiler_opt/rl/env_test.py           | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py
index 452bab55..e60f6112 100644
--- a/compiler_opt/rl/compilation_runner.py
+++ b/compiler_opt/rl/compilation_runner.py
@@ -88,7 +88,7 @@ def get_workdir_context():
   if _KEEP_TEMPS.value is not None:
     tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value)
   else:
-    tempdir_context = tempfile.TemporaryDirectory()
+    tempdir_context = tempfile.TemporaryDirectory()  # pylint: disable=consider-using-with
   return tempdir_context
 
 
diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py
index 79b36598..f6d3c63b 100644
--- a/compiler_opt/rl/env_test.py
+++ b/compiler_opt/rl/env_test.py
@@ -20,7 +20,6 @@
 from unittest import mock
 import subprocess
 import os
-import shutil
 import tempfile
 from absl.testing import flagsaver
 

From 2e262438bb2393660aa87d28c3c6a525904b248b Mon Sep 17 00:00:00 2001
From: "Teodor V. Marinov" <tvmarinov@google.com>
Date: Wed, 2 Oct 2024 00:09:25 +0000
Subject: [PATCH 17/17] Fixed a nit

---
 compiler_opt/rl/compilation_runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py
index e60f6112..26231244 100644
--- a/compiler_opt/rl/compilation_runner.py
+++ b/compiler_opt/rl/compilation_runner.py
@@ -84,7 +84,8 @@ def get_workdir_context():
   """Return a context which manages how the temperory directories are handled.
 
   When the flag keep_temps is specified temporary directories are stored in
-  keep_temps."""
+  keep_temps.
+  """
   if _KEEP_TEMPS.value is not None:
     tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value)
   else: