From f58569351caa2f10dde6bf96516346aef1964a7c Mon Sep 17 00:00:00 2001
From: Dirk Groeneveld <dirkg@allenai.org>
Date: Fri, 5 Apr 2024 13:34:19 -0700
Subject: [PATCH 1/4] Fixing the dependency situation (#613)

---
 .github/workflows/main.yml                  |  3 ++-
 CHANGELOG.md                                |  4 ++++
 pyproject.toml                              |  8 ++++----
 tango/__main__.py                           |  2 +-
 tango/integrations/beaker/executor.py       | 10 +++++++++-
 tango/integrations/flax/data.py             |  2 +-
 tango/integrations/flax/optim.py            |  3 ++-
 tango/integrations/flax/util.py             |  4 ++--
 tango/integrations/transformers/__init__.py |  1 +
 tests/integrations/beaker/executor_test.py  |  3 +++
 tests/integrations/flax/train_test.py       |  7 ++++++-
 11 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3a589ca60..b00d8238a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -211,7 +211,7 @@ jobs:
         if: steps.virtualenv-cache.outputs.cache-hit != 'true' && (contains(matrix.task.extras, 'flax') || contains(matrix.task.extras, 'all'))
         run: |
           . .venv/bin/activate
-          pip install flax==0.6.1 jax==0.4.1 jaxlib==0.4.1 tensorflow-cpu==2.9.1 optax==0.1.3
+          pip install flax jax jaxlib "tensorflow-cpu>=2.9.1" optax
 
       - name: Install editable (no cache hit)
         if: steps.virtualenv-cache.outputs.cache-hit != 'true'
@@ -282,6 +282,7 @@ jobs:
           spec: |
             version: v2
             description: GPU Tests
+            budget: ai2/oe-training
             tasks:
               - name: tests
                 image:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86fa40edb..5b842499e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Fixed
+
+- Fixed a bunch of dependencies
+
 ## [v1.3.2](https://github.com/allenai/tango/releases/tag/v1.3.2) - 2023-10-27
 
 ### Fixed
diff --git a/pyproject.toml b/pyproject.toml
index 31a1807ab..6db17ec82 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -89,10 +89,10 @@ fairscale = [
 ]
 flax = [
   "datasets>=1.12,<3.0",
-  "jax>=0.4.1,<=0.4.13",
-  "jaxlib>=0.4.1,<=0.4.13",
-  "flax>=0.6.1,<=0.7.0",
-  "optax>=0.1.2",
+  "jax",
+  "jaxlib",
+  "flax",
+  "optax",
   "tensorflow-cpu>=2.9.1"
 ]
 wandb = [
diff --git a/tango/__main__.py b/tango/__main__.py
index c13d6e45e..49ec9cac5 100644
--- a/tango/__main__.py
+++ b/tango/__main__.py
@@ -115,7 +115,7 @@ class SettingsObject(NamedTuple):
     called_by_executor: bool
 
 
-@click.group(**_CLICK_GROUP_DEFAULTS)
+@click.group(name=None, **_CLICK_GROUP_DEFAULTS)
 @click.version_option(version=VERSION)
 @click.option(
     "--settings",
diff --git a/tango/integrations/beaker/executor.py b/tango/integrations/beaker/executor.py
index c7254433c..b0d95f5dc 100644
--- a/tango/integrations/beaker/executor.py
+++ b/tango/integrations/beaker/executor.py
@@ -355,6 +355,7 @@ def __init__(
         priority: Optional[Union[str, Priority]] = None,
         allow_dirty: bool = False,
         scheduler: Optional[BeakerScheduler] = None,
+        budget: Optional[str] = None,
         **kwargs,
     ):
         # Pre-validate arguments.
@@ -365,6 +366,11 @@ def __init__(
                 "Either 'beaker_image' or 'docker_image' must be specified for BeakerExecutor, but not both."
             )
 
+        if budget is None:
+            raise ConfigurationError("You must specify a budget to use the beaker executor.")
+        else:
+            self._budget = budget
+
         from tango.workspaces import LocalWorkspace, MemoryWorkspace
 
         if isinstance(workspace, MemoryWorkspace):
@@ -1029,7 +1035,9 @@ def _build_experiment_spec(
         return (
             experiment_name,
             ExperimentSpec(
-                tasks=[task_spec], description=f'Tango step "{step_name}" ({step.unique_id})'
+                tasks=[task_spec],
+                description=f'Tango step "{step_name}" ({step.unique_id})',
+                budget=self._budget,
             ),
             [step_graph_dataset],
         )
diff --git a/tango/integrations/flax/data.py b/tango/integrations/flax/data.py
index b434d720a..483789670 100644
--- a/tango/integrations/flax/data.py
+++ b/tango/integrations/flax/data.py
@@ -40,7 +40,7 @@ def __init__(
 
         self.logger = logging.getLogger(FlaxDataLoader.__name__)
 
-    def __call__(self, rng: jax.random.PRNGKeyArray, do_distributed: bool):
+    def __call__(self, rng: jax._src.random.KeyArrayLike, do_distributed: bool):
         steps_per_epoch = self.dataset_size // self.batch_size
 
         if self.shuffle:
diff --git a/tango/integrations/flax/optim.py b/tango/integrations/flax/optim.py
index 920991560..ea773790a 100644
--- a/tango/integrations/flax/optim.py
+++ b/tango/integrations/flax/optim.py
@@ -28,6 +28,7 @@ class Optimizer(Registrable):
             :options: +ELLIPSIS
 
             optax::adabelief
+            optax::adadelta
             optax::adafactor
             optax::adagrad
             optax::adam
@@ -100,7 +101,7 @@ def factory_func():
         Optimizer.register("optax::" + name)(factory_func)
 
 # Register all learning rate schedulers.
-for name, cls in optax._src.schedule.__dict__.items():
+for name, cls in optax.schedules.__dict__.items():
     if isfunction(cls) and not name.startswith("_") and cls.__annotations__:
         factory_func = scheduler_factory(cls)
         LRScheduler.register("optax::" + name)(factory_func)
diff --git a/tango/integrations/flax/util.py b/tango/integrations/flax/util.py
index 387974e93..311440ce2 100644
--- a/tango/integrations/flax/util.py
+++ b/tango/integrations/flax/util.py
@@ -3,7 +3,7 @@
 import jax
 
 
-def get_PRNGkey(seed: int = 42) -> Union[Any, jax.random.PRNGKeyArray]:
+def get_PRNGkey(seed: int = 42) -> Union[Any, jax._src.random.KeyArray]:
     """
     Utility function to create a pseudo-random number generator key
     given a seed.
@@ -11,7 +11,7 @@ def get_PRNGkey(seed: int = 42) -> Union[Any, jax.random.PRNGKeyArray]:
     return jax.random.PRNGKey(seed)
 
 
-def get_multiple_keys(key, multiple: int = 1) -> Union[Any, jax.random.PRNGKeyArray]:
+def get_multiple_keys(key, multiple: int = 1) -> Union[Any, jax._src.random.KeyArray]:
     """
     Utility function to split a PRNG key into multiple new keys.
     Used in distributed training.
diff --git a/tango/integrations/transformers/__init__.py b/tango/integrations/transformers/__init__.py
index 386e48ecb..950de3680 100644
--- a/tango/integrations/transformers/__init__.py
+++ b/tango/integrations/transformers/__init__.py
@@ -70,6 +70,7 @@
 
             transformers::Adafactor
             transformers::AdamW
+            transformers::LayerWiseDummyOptimizer
 
 - :class:`~tango.integrations.torch.LRScheduler`: All learning rate scheduler function from transformers
   are registered according to their type name (e.g. "transformers::linear").
diff --git a/tests/integrations/beaker/executor_test.py b/tests/integrations/beaker/executor_test.py
index 6fe71e033..b8f20a908 100644
--- a/tests/integrations/beaker/executor_test.py
+++ b/tests/integrations/beaker/executor_test.py
@@ -19,6 +19,7 @@ def test_from_params(beaker_workspace_name: str):
             beaker_image="ai2/conda",
             github_token="FAKE_TOKEN",
             datasets=[{"source": {"beaker": "some-dataset"}, "mount_path": "/input"}],
+            budget="ai2/allennlp",
         ),
         workspace=BeakerWorkspace(workspace=beaker_workspace_name),
         clusters=["fake-cluster"],
@@ -38,6 +39,7 @@ def test_init_with_mem_workspace(beaker_workspace_name: str):
             beaker_image="ai2/conda",
             github_token="FAKE_TOKEN",
             clusters=["fake-cluster"],
+            budget="ai2/allennlp",
         )
 
 
@@ -50,6 +52,7 @@ def settings(beaker_workspace_name: str) -> TangoGlobalSettings:
             "beaker_workspace": beaker_workspace_name,
             "install_cmd": "pip install .[beaker]",
             "clusters": ["ai2/allennlp-cirrascale", "ai2/general-cirrascale"],
+            "budget": "ai2/allennlp",
         },
     )
 
diff --git a/tests/integrations/flax/train_test.py b/tests/integrations/flax/train_test.py
index 0eb6bcebf..16b1b830f 100644
--- a/tests/integrations/flax/train_test.py
+++ b/tests/integrations/flax/train_test.py
@@ -20,5 +20,10 @@ def test_trainer(self):
             ],
         )
         assert (
-            result_dir / "train" / "work" / "checkpoint_state_latest" / "checkpoint_0"
+            result_dir
+            / "train"
+            / "work"
+            / "checkpoint_state_latest"
+            / "checkpoint_0"
+            / "checkpoint"
         ).is_file()

From 94b4df6ebaacae7ed1bc2454d481e51d04fee5ca Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 5 Apr 2024 13:49:32 -0700
Subject: [PATCH 2/4] Update more-itertools requirement from <10.0,>=8.0 to
 >=8.0,<11.0 (#594)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Dirk Groeneveld <dirkg@allenai.org>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6db17ec82..7e45ec466 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
   "click-help-colors>=0.9.1,<0.10",
   "rich>=12.3,<14.0",
   "tqdm>=4.62,<5.0",
-  "more-itertools>=8.0,<10.0",
+  "more-itertools>=8.0,<11.0",
   "sqlitedict",
   "glob2>=0.7",
   "petname>=2.6,<3.0",

From 437aa160aff6ac47c85142038ca517c780ea6efb Mon Sep 17 00:00:00 2001
From: Jun Harashima <j.harashima@gmail.com>
Date: Wed, 29 May 2024 09:49:14 +0900
Subject: [PATCH 3/4] fix first_steps.md (#566)

---
 docs/source/first_steps.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/first_steps.md b/docs/source/first_steps.md
index 16989205c..cb7d34200 100644
--- a/docs/source/first_steps.md
+++ b/docs/source/first_steps.md
@@ -247,7 +247,7 @@ Computing...: 100%|##########| 100/100 [00:05<00:00, 18.99it/s]
 ✓ The output for "add_numbers" is in workspace/runs/live-tarpon/add_numbers
 ```
 
-The last line in the output tells us where we can find the result of our "add_numbers" step. `live-parpon` is
+The last line in the output tells us where we can find the result of our "add_numbers" step. `live-tarpon` is
 the name of the run. Run names are randomly generated and may be different on your machine. `add_numbers` is the
 name of the step in your config. The whole path is a symlink to a directory, which contains (among other things)
 a file `data.json`:

From 6aaa8ff0f20387c51dcdf4ab0718787eb55ea794 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 30 May 2024 10:33:29 -0700
Subject: [PATCH 4/4] Update wandb requirement from <0.14.3,>=0.12 to
 >=0.12,<0.15.9 (#598)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Dirk Groeneveld <dirkg@allenai.org>
---
 .github/workflows/main.yml                  |  3 +--
 CHANGELOG.md                                |  1 +
 pyproject.toml                              |  2 +-
 tango/integrations/transformers/__init__.py |  1 +
 tango/integrations/wandb/step_cache.py      |  8 +++-----
 tango/integrations/wandb/util.py            | 13 ++++++++++++-
 6 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b00d8238a..c2131a5a1 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -20,7 +20,6 @@ env:
   WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
   BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
   BEAKER_WORKSPACE: ai2/tango-testing
-  BEAKER_DEFAULT_CLUSTER: ai2/allennlp-cirrascale
   BEAKER_IMAGE: petew/tango-testing
   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
@@ -288,7 +287,7 @@ jobs:
                 image:
                   beaker: ${{ env.BEAKER_IMAGE }}
                 context:
-                  cluster: ${{ env.BEAKER_DEFAULT_CLUSTER }}
+                  preemptible: true 
                 resources:
                   gpuCount: 2
                 envVars:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5b842499e..aa674066c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 - Fixed a bunch of dependencies
+- Upgraded to new version of wandb
 
 ## [v1.3.2](https://github.com/allenai/tango/releases/tag/v1.3.2) - 2023-10-27
 
diff --git a/pyproject.toml b/pyproject.toml
index 7e45ec466..3b29a2ad2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,7 +96,7 @@ flax = [
   "tensorflow-cpu>=2.9.1"
 ]
 wandb = [
-  "wandb>=0.12,<0.14.3",
+  "wandb>=0.16",
   "retry"
 ]
 beaker = [
diff --git a/tango/integrations/transformers/__init__.py b/tango/integrations/transformers/__init__.py
index 950de3680..817c97f7a 100644
--- a/tango/integrations/transformers/__init__.py
+++ b/tango/integrations/transformers/__init__.py
@@ -93,6 +93,7 @@
             transformers::constant
             transformers::constant_with_warmup
             transformers::cosine
+            transformers::cosine_with_min_lr
             transformers::cosine_with_restarts
             transformers::inverse_sqrt
             transformers::linear
diff --git a/tango/integrations/wandb/step_cache.py b/tango/integrations/wandb/step_cache.py
index 35851b2c1..56ea8b84c 100644
--- a/tango/integrations/wandb/step_cache.py
+++ b/tango/integrations/wandb/step_cache.py
@@ -72,7 +72,7 @@ def _step_artifact_name(self, step: Union[Step, StepInfo]) -> str:
 
     def _step_result_remote(  # type: ignore
         self, step: Union[Step, StepInfo]
-    ) -> Optional[wandb.apis.public.Artifact]:
+    ) -> Optional[wandb.Artifact]:
         artifact_kind = (step.metadata or {}).get("artifact_kind", ArtifactKind.STEP_RESULT.value)
         try:
             return self.wandb_client.artifact(
@@ -88,9 +88,7 @@ def _step_result_remote(  # type: ignore
     def create_step_result_artifact(self, step: Step, objects_dir: Optional[PathOrStr] = None):
         self._upload_step_remote(step, objects_dir)
 
-    def get_step_result_artifact(
-        self, step: Union[Step, StepInfo]
-    ) -> Optional[wandb.apis.public.Artifact]:
+    def get_step_result_artifact(self, step: Union[Step, StepInfo]) -> Optional[wandb.Artifact]:
         artifact_kind = (step.metadata or {}).get("artifact_kind", ArtifactKind.STEP_RESULT.value)
         try:
             return self.wandb_client.artifact(
@@ -144,7 +142,7 @@ def use_step_result_artifact(self, step: Union[Step, StepInfo]) -> None:
 
     def _download_step_remote(self, step_result, target_dir: PathOrStr):
         try:
-            step_result.download(root=target_dir, recursive=True)
+            step_result.download(root=target_dir)
         except (WandbError, ValueError):
             raise RemoteNotFoundError()
 
diff --git a/tango/integrations/wandb/util.py b/tango/integrations/wandb/util.py
index 29d5ae644..7f5b3d211 100644
--- a/tango/integrations/wandb/util.py
+++ b/tango/integrations/wandb/util.py
@@ -1,4 +1,5 @@
 import os
+import re
 import warnings
 from enum import Enum
 
@@ -13,7 +14,17 @@ def is_missing_artifact_error(err: WandbError):
     Check if a specific W&B error is caused by a 404 on the artifact we're looking for.
     """
     # This is brittle, but at least we have a test for it.
-    return "does not contain artifact" in err.message
+
+    # This is a workaround for a bug in the wandb API
+    if err.message == "'NoneType' object has no attribute 'get'":
+        return True
+
+    if re.search(r"^artifact '.*' not found in '.*'$", err.message):
+        return True
+
+    return ("does not contain artifact" in err.message) or (
+        "Unable to fetch artifact with name" in err.message
+    )
 
 
 def check_environment():