Adding R3 changes for v5

RWKV · Sep 6, 2023 · beb46d5 · beb46d5
1 parent 8582908
commit beb46d5
Show file tree

Hide file tree

Showing 19 changed files with 170,392 additions and 21 deletions.
diff --git a/RWKV-v4neo/config-example.yaml b/RWKV-v4neo/config-example.yaml
@@ -345,7 +345,7 @@ data:
 
   # Use data_dir, if you are using source=text/json/etc
   # If using relative path, this should be relative to the trainer script path
-  # source_data_dir: ../dataset-text/
+  source_data_dir: ../dataset-text/
 
   # After loading the dataset, split out test data used for validation, 
   # This process is skipped if the dataset includes a test split

diff --git a/RWKV-v4neo/config-minimum-example.yaml b/RWKV-v4neo/config-minimum-example.yaml
@@ -162,7 +162,7 @@ data:
 
   # Use data_dir, if you are using source=text/json/etc
   # If using relative path, this should be relative to the trainer script path
-  # source_data_dir: ../dataset-json-dir/
+  source_data_dir: ../dataset-json-dir/
 
   # Tokenizer to use, use either the inbuilt 'neox', or 'world' tokenizer
   # If using a custom tokenizer, provide the HF tokenizer name/path

diff --git a/RWKV-v4neo/src/model.py b/RWKV-v4neo/src/model.py
@@ -1021,7 +1021,11 @@ def checkpointed_step(idx, targets, mask, prev_loss, last_shift_states,
             if self.trainer.num_devices > 1:
                 if self.bptt_learning_range <= 0:
                     # We perform forward/backward on the shared max segment count across all GPUs
-                    forward_segment_count  = self.trainer.strategy.reduce(segment_count, reduce_op="max")
+                    # ---
+                    # we map it to be a tensor, instead of the int directly, as this is more reliable across certain versions of torch/lightning
+                    # https://discord.com/channels/992359628979568762/1148755392638234697/1148821863749931008
+                    forward_segment_count  = self.trainer.strategy.reduce(torch.Tensor([segment_count]).to(torch.int), reduce_op="max")
+
                     # Convert to int, if its a torch tensor
                     if isinstance(forward_segment_count, torch.Tensor):
                         forward_segment_count = forward_segment_count.item()